Go Performance Fundamentals
Go is known for its excellent performance, but understanding how to optimize your code is crucial for building high-performance applications. Let's explore the key areas of Go performance optimization.
Measuring Performance
Benchmarking
package main
import (
"testing"
"strings"
)
// Benchmark string concatenation methods
func BenchmarkStringConcat(b *testing.B) {
for i := 0; i < b.N; i++ {
result := ""
for j := 0; j < 1000; j++ {
result += "a"
}
}
}
func BenchmarkStringBuilder(b *testing.B) {
for i := 0; i < b.N; i++ {
var builder strings.Builder
for j := 0; j < 1000; j++ {
builder.WriteString("a")
}
_ = builder.String()
}
}
// Run benchmarks
// go test -bench=.
Profiling
import (
"os"
"runtime/pprof"
)
func main() {
// CPU profiling
f, err := os.Create("cpu.prof")
if err != nil {
log.Fatal(err)
}
defer f.Close()
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
// Your code here
doWork()
}
// Memory profiling
func profileMemory() {
f, err := os.Create("mem.prof")
if err != nil {
log.Fatal(err)
}
defer f.Close()
runtime.GC() // Get accurate memory stats
pprof.WriteHeapProfile(f)
}
Memory Optimization
1. Avoid Unnecessary Allocations
// Bad - creates new slice on each call
func processData(data []int) []int {
result := []int{} // This allocates!
for _, v := range data {
if v > 0 {
result = append(result, v*2)
}
}
return result
}
// Good - pre-allocate with known capacity
func processData(data []int) []int {
result := make([]int, 0, len(data)) // Pre-allocate
for _, v := range data {
if v > 0 {
result = append(result, v*2)
}
}
return result
}
// Even better - reuse slice
func processDataReuse(data []int, result []int) []int {
result = result[:0] // Reset length, keep capacity
for _, v := range data {
if v > 0 {
result = append(result, v*2)
}
}
return result
}
2. String Optimization
// Bad - creates many temporary strings
func buildString(parts []string) string {
result := ""
for _, part := range parts {
result += part + " "
}
return result
}
// Good - use strings.Builder
func buildString(parts []string) string {
var builder strings.Builder
builder.Grow(len(parts) * 10) // Pre-allocate capacity
for i, part := range parts {
if i > 0 {
builder.WriteString(" ")
}
builder.WriteString(part)
}
return builder.String()
}
3. Avoid Interface When Possible
// Bad - interface{} causes boxing
func processAny(data []interface{}) {
for _, v := range data {
// Type assertion overhead
if str, ok := v.(string); ok {
fmt.Println(str)
}
}
}
// Good - use generics or specific types
func processStrings(data []string) {
for _, str := range data {
fmt.Println(str)
}
}
Concurrency Optimization
1. Goroutine Pool Pattern
type WorkerPool struct {
workers int
jobQueue chan Job
resultChan chan Result
wg sync.WaitGroup
}
func NewWorkerPool(workers int) *WorkerPool {
return &WorkerPool{
workers: workers,
jobQueue: make(chan Job, workers*2),
resultChan: make(chan Result, workers*2),
}
}
func (wp *WorkerPool) Start() {
for i := 0; i < wp.workers; i++ {
wp.wg.Add(1)
go wp.worker()
}
}
func (wp *WorkerPool) worker() {
defer wp.wg.Done()
for job := range wp.jobQueue {
result := processJob(job)
wp.resultChan <- result
}
}
func (wp *WorkerPool) Submit(job Job) {
wp.jobQueue <- job
}
func (wp *WorkerPool) Close() {
close(wp.jobQueue)
wp.wg.Wait()
close(wp.resultChan)
}
2. Channel Optimization
// Use buffered channels when appropriate
func processWithBufferedChannel(data []int) {
results := make(chan int, len(data)) // Buffered
for _, item := range data {
go func(val int) {
results <- processItem(val)
}(item)
}
for i := 0; i < len(data); i++ {
result := <-results
fmt.Println(result)
}
}
3. Sync.Pool for Object Reuse
var bufferPool = sync.Pool{
New: func() interface{} {
return make([]byte, 0, 1024)
},
}
func getBuffer() []byte {
return bufferPool.Get().([]byte)
}
func putBuffer(buf []byte) {
buf = buf[:0] // Reset length
bufferPool.Put(buf)
}
func processData(data []byte) {
buf := getBuffer()
defer putBuffer(buf)
// Use buffer
buf = append(buf, data...)
// Process...
}
CPU Optimization
1. Loop Unrolling
// Bad - many loop iterations
func sumArray(data []int) int {
sum := 0
for _, v := range data {
sum += v
}
return sum
}
// Good - unroll small loops
func sumArrayUnrolled(data []int) int {
sum := 0
i := 0
// Unroll by 4
for i < len(data)-3 {
sum += data[i] + data[i+1] + data[i+2] + data[i+3]
i += 4
}
// Handle remaining elements
for i < len(data) {
sum += data[i]
i++
}
return sum
}
2. Avoid Function Calls in Hot Paths
// Bad - function call overhead
func processItems(items []Item) {
for _, item := range items {
if isValid(item) { // Function call
process(item) // Function call
}
}
}
// Good - inline simple operations
func processItems(items []Item) {
for _, item := range items {
if item.valid && item.value > 0 { // Inline
item.value *= 2 // Inline
}
}
}
3. Use Appropriate Data Structures
// Bad - slice for frequent lookups
func findInSlice(data []string, target string) bool {
for _, item := range data {
if item == target {
return true
}
}
return false
}
// Good - map for O(1) lookups
func findInMap(data map[string]bool, target string) bool {
return data[target]
}
Advanced Optimization Techniques
1. SIMD with Assembly
//go:noescape
func addInt32s(a, b []int32, result []int32)
// Assembly implementation for SIMD
// This would be in a .s file
2. Memory Layout Optimization
// Bad - poor cache locality
type BadStruct struct {
name string
id int
active bool
data []byte
count int
}
// Good - group related fields
type GoodStruct struct {
id int
count int
active bool
name string
data []byte
}
3. Compiler Optimizations
// Use build tags for optimization
//go:build !debug
// +build !debug
func expensiveOperation() {
// Optimized version
}
//go:build debug
// +build debug
func expensiveOperation() {
// Debug version with logging
log.Println("Starting expensive operation")
// ... operation
}
Profiling Tools
1. pprof Web Interface
import _ "net/http/pprof"
func main() {
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
// Your application code
}
2. Runtime Metrics
import "runtime"
func printMemStats() {
var m runtime.MemStats
runtime.ReadMemStats(&m)
fmt.Printf("Alloc = %d KB", m.Alloc/1024)
fmt.Printf("\tTotalAlloc = %d KB", m.TotalAlloc/1024)
fmt.Printf("\tSys = %d KB", m.Sys/1024)
fmt.Printf("\tNumGC = %d\n", m.NumGC)
}
3. Trace Analysis
import (
"os"
"runtime/trace"
)
func main() {
f, err := os.Create("trace.out")
if err != nil {
log.Fatal(err)
}
defer f.Close()
trace.Start(f)
defer trace.Stop()
// Your code here
}
Performance Best Practices
1. Measure Before Optimizing
// Always benchmark your changes
func BenchmarkOptimized(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Your optimized code
}
}
2. Use the Right Tool
- CPU bound: Optimize algorithms, use SIMD
- Memory bound: Reduce allocations, improve cache locality
- I/O bound: Use async operations, connection pooling
3. Profile in Production
// Add profiling endpoints
func setupProfiling() {
http.HandleFunc("/debug/pprof/", pprof.Index)
http.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
http.HandleFunc("/debug/pprof/profile", pprof.Profile)
http.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
http.HandleFunc("/debug/pprof/trace", pprof.Trace)
}
Common Performance Pitfalls
- Premature optimization - Measure first!
- Ignoring memory allocations - Use profiling tools
- Not using buffered channels - When you know the capacity
- String concatenation in loops - Use strings.Builder
- Not reusing objects - Use sync.Pool
Real-World Example: HTTP Server Optimization
func optimizedHTTPServer() {
// Use connection pooling
transport := &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
}
client := &http.Client{Transport: transport}
// Use worker pool for processing
pool := NewWorkerPool(runtime.NumCPU())
pool.Start()
defer pool.Close()
// Process requests
for req := range requestChan {
pool.Submit(req)
}
}
Performance optimization in Go is about understanding the trade-offs and measuring the impact of your changes. Use the profiling tools, benchmark your code, and optimize based on real data, not assumptions.
Remember: Premature optimization is the root of all evil - but when you need performance, Go gives you the tools to achieve it! 🚀