Go Performance Optimization: From Basics to Advanced
const x = () =>
<div className="...">
npm install
git commit -m
console.log()
Back to blog
golangperformanceoptimizationprofilingadvanced

Go Performance Optimization: From Basics to Advanced

Learn how to optimize Go applications for maximum performance. Covering profiling, memory management, concurrency optimization, and advanced techniques.

15 min read
1,876 views

Go Performance Fundamentals

Go is known for its excellent performance, but understanding how to optimize your code is crucial for building high-performance applications. Let's explore the key areas of Go performance optimization.

Measuring Performance

Benchmarking

go
package main

import (
    "testing"
    "strings"
)

// Benchmark string concatenation methods
func BenchmarkStringConcat(b *testing.B) {
    for i := 0; i < b.N; i++ {
        result := ""
        for j := 0; j < 1000; j++ {
            result += "a"
        }
    }
}

func BenchmarkStringBuilder(b *testing.B) {
    for i := 0; i < b.N; i++ {
        var builder strings.Builder
        for j := 0; j < 1000; j++ {
            builder.WriteString("a")
        }
        _ = builder.String()
    }
}

// Run benchmarks
// go test -bench=.

Profiling

go
import (
    "os"
    "runtime/pprof"
)

func main() {
    // CPU profiling
    f, err := os.Create("cpu.prof")
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()

    pprof.StartCPUProfile(f)
    defer pprof.StopCPUProfile()

    // Your code here
    doWork()
}

// Memory profiling
func profileMemory() {
    f, err := os.Create("mem.prof")
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()

    runtime.GC() // Get accurate memory stats
    pprof.WriteHeapProfile(f)
}

Memory Optimization

1. Avoid Unnecessary Allocations

go
// Bad - creates new slice on each call
func processData(data []int) []int {
    result := []int{} // This allocates!
    for _, v := range data {
        if v > 0 {
            result = append(result, v*2)
        }
    }
    return result
}

// Good - pre-allocate with known capacity
func processData(data []int) []int {
    result := make([]int, 0, len(data)) // Pre-allocate
    for _, v := range data {
        if v > 0 {
            result = append(result, v*2)
        }
    }
    return result
}

// Even better - reuse slice
func processDataReuse(data []int, result []int) []int {
    result = result[:0] // Reset length, keep capacity
    for _, v := range data {
        if v > 0 {
            result = append(result, v*2)
        }
    }
    return result
}

2. String Optimization

go
// Bad - creates many temporary strings
func buildString(parts []string) string {
    result := ""
    for _, part := range parts {
        result += part + " "
    }
    return result
}

// Good - use strings.Builder
func buildString(parts []string) string {
    var builder strings.Builder
    builder.Grow(len(parts) * 10) // Pre-allocate capacity
    for i, part := range parts {
        if i > 0 {
            builder.WriteString(" ")
        }
        builder.WriteString(part)
    }
    return builder.String()
}

3. Avoid Interface When Possible

go
// Bad - interface{} causes boxing
func processAny(data []interface{}) {
    for _, v := range data {
        // Type assertion overhead
        if str, ok := v.(string); ok {
            fmt.Println(str)
        }
    }
}

// Good - use generics or specific types
func processStrings(data []string) {
    for _, str := range data {
        fmt.Println(str)
    }
}

Concurrency Optimization

1. Goroutine Pool Pattern

go
type WorkerPool struct {
    workers    int
    jobQueue   chan Job
    resultChan chan Result
    wg         sync.WaitGroup
}

func NewWorkerPool(workers int) *WorkerPool {
    return &WorkerPool{
        workers:    workers,
        jobQueue:   make(chan Job, workers*2),
        resultChan: make(chan Result, workers*2),
    }
}

func (wp *WorkerPool) Start() {
    for i := 0; i < wp.workers; i++ {
        wp.wg.Add(1)
        go wp.worker()
    }
}

func (wp *WorkerPool) worker() {
    defer wp.wg.Done()
    for job := range wp.jobQueue {
        result := processJob(job)
        wp.resultChan <- result
    }
}

func (wp *WorkerPool) Submit(job Job) {
    wp.jobQueue <- job
}

func (wp *WorkerPool) Close() {
    close(wp.jobQueue)
    wp.wg.Wait()
    close(wp.resultChan)
}

2. Channel Optimization

go
// Use buffered channels when appropriate
func processWithBufferedChannel(data []int) {
    results := make(chan int, len(data)) // Buffered

    for _, item := range data {
        go func(val int) {
            results <- processItem(val)
        }(item)
    }

    for i := 0; i < len(data); i++ {
        result := <-results
        fmt.Println(result)
    }
}

3. Sync.Pool for Object Reuse

go
var bufferPool = sync.Pool{
    New: func() interface{} {
        return make([]byte, 0, 1024)
    },
}

func getBuffer() []byte {
    return bufferPool.Get().([]byte)
}

func putBuffer(buf []byte) {
    buf = buf[:0] // Reset length
    bufferPool.Put(buf)
}

func processData(data []byte) {
    buf := getBuffer()
    defer putBuffer(buf)

    // Use buffer
    buf = append(buf, data...)
    // Process...
}

CPU Optimization

1. Loop Unrolling

go
// Bad - many loop iterations
func sumArray(data []int) int {
    sum := 0
    for _, v := range data {
        sum += v
    }
    return sum
}

// Good - unroll small loops
func sumArrayUnrolled(data []int) int {
    sum := 0
    i := 0

    // Unroll by 4
    for i < len(data)-3 {
        sum += data[i] + data[i+1] + data[i+2] + data[i+3]
        i += 4
    }

    // Handle remaining elements
    for i < len(data) {
        sum += data[i]
        i++
    }

    return sum
}

2. Avoid Function Calls in Hot Paths

go
// Bad - function call overhead
func processItems(items []Item) {
    for _, item := range items {
        if isValid(item) { // Function call
            process(item)  // Function call
        }
    }
}

// Good - inline simple operations
func processItems(items []Item) {
    for _, item := range items {
        if item.valid && item.value > 0 { // Inline
            item.value *= 2 // Inline
        }
    }
}

3. Use Appropriate Data Structures

go
// Bad - slice for frequent lookups
func findInSlice(data []string, target string) bool {
    for _, item := range data {
        if item == target {
            return true
        }
    }
    return false
}

// Good - map for O(1) lookups
func findInMap(data map[string]bool, target string) bool {
    return data[target]
}

Advanced Optimization Techniques

1. SIMD with Assembly

go
//go:noescape
func addInt32s(a, b []int32, result []int32)

// Assembly implementation for SIMD
// This would be in a .s file

2. Memory Layout Optimization

go
// Bad - poor cache locality
type BadStruct struct {
    name    string
    id      int
    active  bool
    data    []byte
    count   int
}

// Good - group related fields
type GoodStruct struct {
    id      int
    count   int
    active  bool
    name    string
    data    []byte
}

3. Compiler Optimizations

go
// Use build tags for optimization
//go:build !debug
// +build !debug

func expensiveOperation() {
    // Optimized version
}

//go:build debug
// +build debug

func expensiveOperation() {
    // Debug version with logging
    log.Println("Starting expensive operation")
    // ... operation
}

Profiling Tools

1. pprof Web Interface

go
import _ "net/http/pprof"

func main() {
    go func() {
        log.Println(http.ListenAndServe("localhost:6060", nil))
    }()

    // Your application code
}

2. Runtime Metrics

go
import "runtime"

func printMemStats() {
    var m runtime.MemStats
    runtime.ReadMemStats(&m)

    fmt.Printf("Alloc = %d KB", m.Alloc/1024)
    fmt.Printf("\tTotalAlloc = %d KB", m.TotalAlloc/1024)
    fmt.Printf("\tSys = %d KB", m.Sys/1024)
    fmt.Printf("\tNumGC = %d\n", m.NumGC)
}

3. Trace Analysis

go
import (
    "os"
    "runtime/trace"
)

func main() {
    f, err := os.Create("trace.out")
    if err != nil {
        log.Fatal(err)
    }
    defer f.Close()

    trace.Start(f)
    defer trace.Stop()

    // Your code here
}

Performance Best Practices

1. Measure Before Optimizing

go
// Always benchmark your changes
func BenchmarkOptimized(b *testing.B) {
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        // Your optimized code
    }
}

2. Use the Right Tool

  • CPU bound: Optimize algorithms, use SIMD
  • Memory bound: Reduce allocations, improve cache locality
  • I/O bound: Use async operations, connection pooling

3. Profile in Production

go
// Add profiling endpoints
func setupProfiling() {
    http.HandleFunc("/debug/pprof/", pprof.Index)
    http.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
    http.HandleFunc("/debug/pprof/profile", pprof.Profile)
    http.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
    http.HandleFunc("/debug/pprof/trace", pprof.Trace)
}

Common Performance Pitfalls

  1. Premature optimization - Measure first!
  2. Ignoring memory allocations - Use profiling tools
  3. Not using buffered channels - When you know the capacity
  4. String concatenation in loops - Use strings.Builder
  5. Not reusing objects - Use sync.Pool

Real-World Example: HTTP Server Optimization

go
func optimizedHTTPServer() {
    // Use connection pooling
    transport := &http.Transport{
        MaxIdleConns:        100,
        MaxIdleConnsPerHost: 10,
        IdleConnTimeout:     90 * time.Second,
    }

    client := &http.Client{Transport: transport}

    // Use worker pool for processing
    pool := NewWorkerPool(runtime.NumCPU())
    pool.Start()
    defer pool.Close()

    // Process requests
    for req := range requestChan {
        pool.Submit(req)
    }
}

Performance optimization in Go is about understanding the trade-offs and measuring the impact of your changes. Use the profiling tools, benchmark your code, and optimize based on real data, not assumptions.

Remember: Premature optimization is the root of all evil - but when you need performance, Go gives you the tools to achieve it! 🚀