System Design Part 3: Golang Concurrency & Performance

A Complete Guide for Senior Engineers


13. How Would You Design a High-Throughput Worker Pool?

The Challenge

Process millions of tasks per hour with:
  • Bounded concurrency (don't exhaust resources)
  • Graceful shutdown (finish in-progress work)
  • Backpressure (don't accept more than we can handle)
  • Observability (know what's happening)

Basic Worker Pool

go
package workerpool import ( "context" "sync" ) type Task interface { Process(ctx context.Context) error } type WorkerPool struct { workers int taskQueue chan Task wg sync.WaitGroup ctx context.Context cancel context.CancelFunc } func NewWorkerPool(workers, queueSize int) *WorkerPool { ctx, cancel := context.WithCancel(context.Background()) return &WorkerPool{ workers: workers, taskQueue: make(chan Task, queueSize), ctx: ctx, cancel: cancel, } } func (p *WorkerPool) Start() { for i := 0; i < p.workers; i++ { p.wg.Add(1) go p.worker(i) } } func (p *WorkerPool) worker(id int) { defer p.wg.Done() for { select { case task, ok := <-p.taskQueue: if !ok { return // Channel closed } task.Process(p.ctx) case <-p.ctx.Done(): return } } } func (p *WorkerPool) Submit(task Task) error { select { case p.taskQueue <- task: return nil case <-p.ctx.Done(): return context.Canceled default: return ErrQueueFull // Backpressure } } func (p *WorkerPool) Shutdown(timeout time.Duration) { // Stop accepting new tasks close(p.taskQueue) // Wait for workers with timeout done := make(chan struct{}) go func() { p.wg.Wait() close(done) }() select { case <-done: // All workers finished case <-time.After(timeout): // Force shutdown p.cancel() } }

Production-Grade Worker Pool

go
package workerpool import ( "context" "sync" "sync/atomic" "time" "github.com/prometheus/client_golang/prometheus" ) type Config struct { Workers int QueueSize int MaxRetries int RetryDelay time.Duration ShutdownTimeout time.Duration } type Metrics struct { tasksProcessed prometheus.Counter tasksFailed prometheus.Counter tasksRetried prometheus.Counter queueDepth prometheus.Gauge processingTime prometheus.Histogram activeWorkers prometheus.Gauge } type WorkerPool struct { config Config taskQueue chan *TaskWrapper wg sync.WaitGroup ctx context.Context cancel context.CancelFunc metrics *Metrics isShutdown atomic.Bool } type TaskWrapper struct { Task Task Retries int CreatedAt time.Time RetryAfter time.Time } func NewProductionWorkerPool(config Config) *WorkerPool { ctx, cancel := context.WithCancel(context.Background()) metrics := &Metrics{ tasksProcessed: prometheus.NewCounter(prometheus.CounterOpts{ Name: "worker_pool_tasks_processed_total", }), tasksFailed: prometheus.NewCounter(prometheus.CounterOpts{ Name: "worker_pool_tasks_failed_total", }), tasksRetried: prometheus.NewCounter(prometheus.CounterOpts{ Name: "worker_pool_tasks_retried_total", }), queueDepth: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "worker_pool_queue_depth", }), processingTime: prometheus.NewHistogram(prometheus.HistogramOpts{ Name: "worker_pool_processing_seconds", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }), activeWorkers: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "worker_pool_active_workers", }), } return &WorkerPool{ config: config, taskQueue: make(chan *TaskWrapper, config.QueueSize), ctx: ctx, cancel: cancel, metrics: metrics, } } func (p *WorkerPool) Start() { // Start workers for i := 0; i < p.config.Workers; i++ { p.wg.Add(1) go p.worker(i) } // Start metrics updater go p.updateMetrics() } func (p *WorkerPool) worker(id int) { defer p.wg.Done() for { select { case wrapper, ok := <-p.taskQueue: if !ok { return } p.metrics.activeWorkers.Inc() p.processTask(wrapper) p.metrics.activeWorkers.Dec() case <-p.ctx.Done(): return } } } func (p *WorkerPool) processTask(wrapper *TaskWrapper) { start := time.Now() // Wait for retry delay if this is a retry if wrapper.Retries > 0 && time.Now().Before(wrapper.RetryAfter) { time.Sleep(time.Until(wrapper.RetryAfter)) } // Process with panic recovery err := p.safeProcess(wrapper.Task) duration := time.Since(start) p.metrics.processingTime.Observe(duration.Seconds()) if err != nil { if wrapper.Retries < p.config.MaxRetries { // Retry with exponential backoff wrapper.Retries++ wrapper.RetryAfter = time.Now().Add(p.config.RetryDelay * time.Duration(1<<wrapper.Retries)) p.metrics.tasksRetried.Inc() // Requeue select { case p.taskQueue <- wrapper: default: // Queue full, drop with logging p.metrics.tasksFailed.Inc() } } else { p.metrics.tasksFailed.Inc() } } else { p.metrics.tasksProcessed.Inc() } } func (p *WorkerPool) safeProcess(task Task) (err error) { defer func() { if r := recover(); r != nil { err = fmt.Errorf("panic recovered: %v", r) } }() return task.Process(p.ctx) } func (p *WorkerPool) Submit(task Task) error { if p.isShutdown.Load() { return ErrPoolShutdown } wrapper := &TaskWrapper{ Task: task, Retries: 0, CreatedAt: time.Now(), } select { case p.taskQueue <- wrapper: return nil default: return ErrQueueFull } } func (p *WorkerPool) SubmitWait(ctx context.Context, task Task) error { if p.isShutdown.Load() { return ErrPoolShutdown } wrapper := &TaskWrapper{ Task: task, Retries: 0, CreatedAt: time.Now(), } select { case p.taskQueue <- wrapper: return nil case <-ctx.Done(): return ctx.Err() } } func (p *WorkerPool) Shutdown() { p.isShutdown.Store(true) close(p.taskQueue) done := make(chan struct{}) go func() { p.wg.Wait() close(done) }() select { case <-done: // Graceful shutdown case <-time.After(p.config.ShutdownTimeout): p.cancel() // Force shutdown } } func (p *WorkerPool) updateMetrics() { ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() for { select { case <-ticker.C: p.metrics.queueDepth.Set(float64(len(p.taskQueue))) case <-p.ctx.Done(): return } } }

Dynamic Worker Pool (Auto-scaling)

go
type DynamicWorkerPool struct { minWorkers int maxWorkers int currentWorkers atomic.Int32 taskQueue chan *TaskWrapper workerChan chan struct{} // Signal to add/remove workers ctx context.Context cancel context.CancelFunc } func (p *DynamicWorkerPool) autoScale() { ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() for { select { case <-ticker.C: queueDepth := len(p.taskQueue) current := int(p.currentWorkers.Load()) // Scale up if queue is filling if queueDepth > current*10 && current < p.maxWorkers { toAdd := min(p.maxWorkers-current, queueDepth/10) for i := 0; i < toAdd; i++ { p.addWorker() } } // Scale down if queue is empty if queueDepth == 0 && current > p.minWorkers { toRemove := min(current-p.minWorkers, (current-p.minWorkers)/2+1) for i := 0; i < toRemove; i++ { p.removeWorker() } } case <-p.ctx.Done(): return } } } func (p *DynamicWorkerPool) addWorker() { p.currentWorkers.Add(1) go p.worker() } func (p *DynamicWorkerPool) removeWorker() { select { case p.workerChan <- struct{}{}: p.currentWorkers.Add(-1) default: // No idle workers to remove } } func (p *DynamicWorkerPool) worker() { for { select { case task, ok := <-p.taskQueue: if !ok { return } task.Task.Process(p.ctx) case <-p.workerChan: // Signal to stop this worker return case <-p.ctx.Done(): return } } }

14. How Do You Prevent Goroutine Leaks?

Common Leak Patterns

go
// LEAK 1: Unbounded goroutines func processRequests(requests <-chan Request) { for req := range requests { go handle(req) // No limit! Can spawn millions } } // FIX: Use worker pool func processRequestsFixed(requests <-chan Request) { sem := make(chan struct{}, 100) // Max 100 concurrent for req := range requests { sem <- struct{}{} // Acquire go func(r Request) { defer func() { <-sem }() // Release handle(r) }(req) } } // LEAK 2: Blocked channel send func leakyProducer(ch chan<- int) { for i := 0; i < 1000; i++ { go func(n int) { ch <- n // Blocks forever if no receiver! }(i) } } // FIX: Use select with context func fixedProducer(ctx context.Context, ch chan<- int) { for i := 0; i < 1000; i++ { go func(n int) { select { case ch <- n: case <-ctx.Done(): return // Exit if context cancelled } }(i) } } // LEAK 3: Forgotten goroutines in HTTP handlers func badHandler(w http.ResponseWriter, r *http.Request) { go expensiveOperation() // Fire and forget - what if it hangs? w.Write([]byte("OK")) } // FIX: Track goroutines var activeGoroutines sync.WaitGroup func goodHandler(w http.ResponseWriter, r *http.Request) { activeGoroutines.Add(1) go func() { defer activeGoroutines.Done() ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() expensiveOperationWithContext(ctx) }() w.Write([]byte("OK")) } // On shutdown func shutdown() { // Wait for all background goroutines activeGoroutines.Wait() }

Detecting Goroutine Leaks

go
// In tests func TestNoGoroutineLeaks(t *testing.T) { before := runtime.NumGoroutine() // Run your code doSomething() // Give time for cleanup time.Sleep(100 * time.Millisecond) after := runtime.NumGoroutine() if after > before { t.Errorf("Goroutine leak: before=%d, after=%d", before, after) // Print goroutine stacks for debugging buf := make([]byte, 1024*1024) n := runtime.Stack(buf, true) t.Logf("Goroutines:\n%s", buf[:n]) } } // Using goleak in tests import "go.uber.org/goleak" func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } // Or per-test func TestSomething(t *testing.T) { defer goleak.VerifyNone(t) // your test code }

Best Practices

go
// 1. Always use context for cancellation func fetchData(ctx context.Context) ([]byte, error) { req, _ := http.NewRequestWithContext(ctx, "GET", url, nil) return http.DefaultClient.Do(req) } // 2. Use errgroup for managing multiple goroutines func fetchAll(ctx context.Context, urls []string) ([][]byte, error) { g, ctx := errgroup.WithContext(ctx) results := make([][]byte, len(urls)) for i, url := range urls { i, url := i, url // Capture g.Go(func() error { data, err := fetchData(ctx) if err != nil { return err } results[i] = data return nil }) } if err := g.Wait(); err != nil { return nil, err } return results, nil } // 3. Timeout for all blocking operations func safeRead(ch <-chan int) (int, bool) { select { case v, ok := <-ch: return v, ok case <-time.After(5 * time.Second): return 0, false } } // 4. Graceful shutdown with done channel type Service struct { done chan struct{} wg sync.WaitGroup } func (s *Service) Start() { s.done = make(chan struct{}) s.wg.Add(1) go func() { defer s.wg.Done() for { select { case <-s.done: return default: s.doWork() } } }() } func (s *Service) Stop() { close(s.done) s.wg.Wait() }

15. How Do You Debug a Deadlock in Production?

Identifying Deadlocks

go
// Enable deadlock detection (development only) import "github.com/sasha-s/go-deadlock" // Replace sync.Mutex with deadlock.Mutex var mu deadlock.Mutex // Production: Use pprof import _ "net/http/pprof" func main() { go func() { http.ListenAndServe("localhost:6060", nil) }() // ... } // Then: go tool pprof http://localhost:6060/debug/pprof/goroutine // Or: curl http://localhost:6060/debug/pprof/goroutine?debug=2

Common Deadlock Patterns

go
// DEADLOCK 1: Lock ordering type Account struct { mu sync.Mutex balance int } func transfer(from, to *Account, amount int) { from.mu.Lock() // Thread 1: locks A defer from.mu.Unlock() to.mu.Lock() // Thread 1: waits for B defer to.mu.Unlock() // Meanwhile Thread 2: locks B, waits for A = DEADLOCK! from.balance -= amount to.balance += amount } // FIX: Consistent lock ordering func transferFixed(from, to *Account, amount int) { // Always lock lower ID first first, second := from, to if from.ID > to.ID { first, second = to, from } first.mu.Lock() defer first.mu.Unlock() second.mu.Lock() defer second.mu.Unlock() from.balance -= amount to.balance += amount } // DEADLOCK 2: Nested locks type Cache struct { mu sync.Mutex data map[string]string } func (c *Cache) Get(key string) string { c.mu.Lock() defer c.mu.Unlock() return c.data[key] } func (c *Cache) GetOrCompute(key string, compute func() string) string { c.mu.Lock() defer c.mu.Unlock() if v, ok := c.data[key]; ok { return v } // compute() might call c.Get() = DEADLOCK! v := compute() c.data[key] = v return v } // FIX: Don't hold lock during computation func (c *Cache) GetOrComputeFixed(key string, compute func() string) string { c.mu.Lock() if v, ok := c.data[key]; ok { c.mu.Unlock() return v } c.mu.Unlock() // Compute without holding lock v := compute() c.mu.Lock() // Double-check (another goroutine might have computed) if existing, ok := c.data[key]; ok { c.mu.Unlock() return existing } c.data[key] = v c.mu.Unlock() return v } // DEADLOCK 3: Channel + Mutex type Queue struct { mu sync.Mutex items chan int } func (q *Queue) Push(item int) { q.mu.Lock() defer q.mu.Unlock() q.items <- item // Blocks if channel full! } func (q *Queue) Pop() int { q.mu.Lock() defer q.mu.Unlock() return <-q.items // Blocks if channel empty! } // If Push blocks waiting for Pop, but Pop needs the lock = DEADLOCK! // FIX: Don't hold lock during channel operations func (q *Queue) PushFixed(item int) { select { case q.items <- item: default: // Handle full queue } }

Production Debugging Steps

1. DETECT: - Service stops responding - Goroutine count keeps increasing - CPU usage drops (goroutines waiting, not working) 2. COLLECT DATA: curl http://service:6060/debug/pprof/goroutine?debug=2 > goroutines.txt 3. ANALYZE: Look for patterns like: - Multiple goroutines waiting on same mutex - Goroutines blocked on channel operations - Circular wait patterns 4. COMMON SIGNS: goroutine 1 [semacquire, 5 minutes]: sync.runtime_SemacquireMutex(...) sync/mutex.go:72 main.(*Account).transfer(...) main.go:25 5. FIX: - Add lock ordering - Use timeouts - Use context cancellation - Reduce lock scope

16. How Do You Design Cancellation Using Context Properly?

Context Best Practices

go
// Rule 1: Pass context as first parameter func DoSomething(ctx context.Context, arg string) error { // ... } // Rule 2: Never store context in struct type BadService struct { ctx context.Context // DON'T DO THIS } type GoodService struct { // No context field } func (s *GoodService) DoWork(ctx context.Context) error { // Context passed per-call } // Rule 3: Use context for cancellation, not values // BAD: Using context to pass data ctx = context.WithValue(ctx, "user_id", "123") // GOOD: Use context for request-scoped values only type contextKey string const userIDKey contextKey = "user_id" ctx = context.WithValue(ctx, userIDKey, "123") // Rule 4: Always check context.Done() in long operations func processItems(ctx context.Context, items []Item) error { for _, item := range items { select { case <-ctx.Done(): return ctx.Err() default: } process(item) } return nil } // Rule 5: Propagate context to all sub-operations func handleRequest(ctx context.Context, req *Request) error { // Create child context with timeout ctx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() // Pass to all operations data, err := fetchData(ctx, req.ID) if err != nil { return err } result, err := processData(ctx, data) if err != nil { return err } return storeResult(ctx, result) }

Proper Cancellation Patterns

go
// Pattern 1: Timeout for external calls func fetchWithTimeout(ctx context.Context, url string) ([]byte, error) { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() req, _ := http.NewRequestWithContext(ctx, "GET", url, nil) resp, err := http.DefaultClient.Do(req) if err != nil { return nil, err } defer resp.Body.Close() return io.ReadAll(resp.Body) } // Pattern 2: Cancel on first error (errgroup) func fetchAll(ctx context.Context, urls []string) error { g, ctx := errgroup.WithContext(ctx) for _, url := range urls { url := url g.Go(func() error { _, err := fetchWithTimeout(ctx, url) return err // First error cancels all others }) } return g.Wait() } // Pattern 3: Manual cancellation func longRunningTask(ctx context.Context) error { ctx, cancel := context.WithCancel(ctx) defer cancel() resultCh := make(chan Result, 1) errCh := make(chan error, 1) go func() { result, err := doExpensiveWork() if err != nil { errCh <- err } else { resultCh <- result } }() select { case result := <-resultCh: return processResult(result) case err := <-errCh: return err case <-ctx.Done(): // Context cancelled - clean up cancel() // Cancel any child operations return ctx.Err() } } // Pattern 4: Graceful shutdown with context type Server struct { httpServer *http.Server } func (s *Server) Start(ctx context.Context) error { // Start HTTP server go func() { if err := s.httpServer.ListenAndServe(); err != http.ErrServerClosed { log.Printf("HTTP server error: %v", err) } }() // Wait for shutdown signal <-ctx.Done() // Graceful shutdown with timeout shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() return s.httpServer.Shutdown(shutdownCtx) } // Pattern 5: Context with deadline for SLAs func handleWithSLA(w http.ResponseWriter, r *http.Request) { // Must respond within 100ms for SLA ctx, cancel := context.WithDeadline(r.Context(), time.Now().Add(100*time.Millisecond)) defer cancel() result, err := process(ctx) if err == context.DeadlineExceeded { // SLA breach - return partial result or error http.Error(w, "Request timeout", http.StatusGatewayTimeout) return } json.NewEncoder(w).Encode(result) }

17. When to Use sync.Mutex vs sync.RWMutex?

Decision Guide

┌─────────────────────────────────────────────────────────────────────────────────┐ │ MUTEX vs RWMUTEX DECISION │ ├─────────────────────────────────────────────────────────────────────────────────┤ │ │ │ USE sync.Mutex WHEN: │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ • Read:Write ratio is close to 1:1 │ │ │ │ • Operations are very fast (< 100ns) │ │ │ │ • Critical section is small │ │ │ │ • You want simpler code │ │ │ │ │ │ │ │ WHY: RWMutex has more overhead due to additional bookkeeping │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ USE sync.RWMutex WHEN: │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ • Read:Write ratio is high (10:1 or more) │ │ │ │ • Read operations are slow (> 1μs) │ │ │ │ • Many concurrent readers │ │ │ │ • Writes are infrequent │ │ │ │ │ │ │ │ WHY: Allows multiple readers, improving throughput │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ NEITHER - USE ATOMIC WHEN: │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ • Single value (counter, flag) │ │ │ │ • Simple operations (increment, swap) │ │ │ │ • Maximum performance needed │ │ │ │ │ │ │ │ WHY: No locking overhead │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ NEITHER - USE CHANNELS WHEN: │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ • Communicating between goroutines │ │ │ │ • Signaling events │ │ │ │ • Pipeline processing │ │ │ │ │ │ │ │ WHY: "Don't communicate by sharing memory; share memory by │ │ │ │ communicating" - Go proverb │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ └─────────────────────────────────────────────────────────────────────────────────┘

Benchmarks

go
// Benchmark to decide func BenchmarkMutex(b *testing.B) { var mu sync.Mutex data := make(map[string]int) data["key"] = 0 b.RunParallel(func(pb *testing.PB) { for pb.Next() { mu.Lock() _ = data["key"] // Read mu.Unlock() } }) } func BenchmarkRWMutex(b *testing.B) { var mu sync.RWMutex data := make(map[string]int) data["key"] = 0 b.RunParallel(func(pb *testing.PB) { for pb.Next() { mu.RLock() _ = data["key"] // Read mu.RUnlock() } }) } // Results (8 cores, read-heavy workload): // BenchmarkMutex-8 50000000 28.5 ns/op // BenchmarkRWMutex-8 200000000 8.2 ns/op // Results (8 cores, write-heavy workload): // BenchmarkMutex-8 50000000 30.1 ns/op // BenchmarkRWMutex-8 30000000 45.8 ns/op <- SLOWER!

Practical Examples

go
// Example 1: Cache (read-heavy) - USE RWMutex type Cache struct { mu sync.RWMutex data map[string]interface{} } func (c *Cache) Get(key string) (interface{}, bool) { c.mu.RLock() defer c.mu.RUnlock() v, ok := c.data[key] return v, ok } func (c *Cache) Set(key string, value interface{}) { c.mu.Lock() defer c.mu.Unlock() c.data[key] = value } // Example 2: Counter (write-heavy) - USE Atomic type Counter struct { value int64 } func (c *Counter) Increment() { atomic.AddInt64(&c.value, 1) } func (c *Counter) Get() int64 { return atomic.LoadInt64(&c.value) } // Example 3: Request limiter (mixed) - USE Mutex type RateLimiter struct { mu sync.Mutex requests int window time.Time } func (r *RateLimiter) Allow() bool { r.mu.Lock() defer r.mu.Unlock() now := time.Now() if now.Sub(r.window) > time.Minute { r.requests = 0 r.window = now } if r.requests >= 100 { return false } r.requests++ return true }

18. How Do You Reduce GC Pressure in Go?

GC Pressure Sources

┌─────────────────────────────────────────────────────────────────────────────────┐ │ GC PRESSURE SOURCES │ ├─────────────────────────────────────────────────────────────────────────────────┤ │ │ │ 1. ALLOCATIONS IN HOT PATHS │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ for i := 0; i < 1000000; i++ { │ │ │ │ data := make([]byte, 1024) // 1M allocations! │ │ │ │ process(data) │ │ │ │ } │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ 2. STRING CONCATENATION │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ var s string │ │ │ │ for _, item := range items { │ │ │ │ s += item // New allocation each iteration! │ │ │ │ } │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ 3. INTERFACE BOXING │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ func process(v interface{}) { ... } │ │ │ │ process(42) // Allocates to box the int │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ 4. CLOSURES CAPTURING VARIABLES │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ for _, item := range items { │ │ │ │ go func() { │ │ │ │ process(item) // Captures item, escapes to heap │ │ │ │ }() │ │ │ │ } │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ └─────────────────────────────────────────────────────────────────────────────────┘

Optimization Techniques

go
// 1. OBJECT POOLING var bufferPool = sync.Pool{ New: func() interface{} { return make([]byte, 1024) }, } func processWithPool() { buf := bufferPool.Get().([]byte) defer bufferPool.Put(buf) // Use buf... // Reset before returning to pool for i := range buf { buf[i] = 0 } } // 2. PRE-ALLOCATE SLICES // BAD func collectBad(items []Item) []Result { var results []Result for _, item := range items { results = append(results, process(item)) // Multiple reallocations } return results } // GOOD func collectGood(items []Item) []Result { results := make([]Result, 0, len(items)) // Pre-allocate capacity for _, item := range items { results = append(results, process(item)) } return results } // 3. STRINGS.BUILDER FOR CONCATENATION // BAD func concatBad(items []string) string { var result string for _, item := range items { result += item // O(n²) allocations } return result } // GOOD func concatGood(items []string) string { var builder strings.Builder builder.Grow(estimatedSize(items)) // Pre-allocate for _, item := range items { builder.WriteString(item) } return builder.String() } // 4. AVOID INTERFACE{} IN HOT PATHS // BAD func sumBad(values []interface{}) int64 { var sum int64 for _, v := range values { sum += v.(int64) // Boxing/unboxing } return sum } // GOOD func sumGood(values []int64) int64 { var sum int64 for _, v := range values { sum += v } return sum } // 5. STRUCT EMBEDDING INSTEAD OF POINTERS // More allocations (2 objects) type BadStruct struct { Data *InnerData } // Single allocation type GoodStruct struct { Data InnerData // Embedded, not pointer } // 6. ARRAY INSTEAD OF SLICE FOR FIXED SIZE type BadRequest struct { Headers []string // Slice = pointer + len + cap } type GoodRequest struct { Headers [8]string // Fixed array, no allocation } // 7. ESCAPE ANALYSIS AWARENESS // BAD - pointer escapes func createBad() *Data { d := Data{} // Allocated on heap return &d } // GOOD - value, no escape func createGood() Data { return Data{} // Stack allocated } // Usage data := createGood() // Stack allocation

Monitoring GC

go
// Print GC stats func printGCStats() { var m runtime.MemStats runtime.ReadMemStats(&m) fmt.Printf("Alloc = %v MiB", m.Alloc/1024/1024) fmt.Printf("TotalAlloc = %v MiB", m.TotalAlloc/1024/1024) fmt.Printf("Sys = %v MiB", m.Sys/1024/1024) fmt.Printf("NumGC = %v", m.NumGC) fmt.Printf("PauseTotal = %v ms", m.PauseTotalNs/1000000) } // Set GOGC for tuning // GOGC=100 (default): GC when heap doubles // GOGC=200: GC when heap triples (less frequent, more memory) // GOGC=50: GC when heap grows 50% (more frequent, less memory) // Runtime control debug.SetGCPercent(200) // Same as GOGC=200

19. How Do You Optimize Memory Footprint for High-QPS APIs?

Memory Optimization Strategies

go
// 1. REQUEST BODY POOLING var requestPool = sync.Pool{ New: func() interface{} { return &Request{ Headers: make(map[string]string, 10), Body: make([]byte, 0, 4096), } }, } func handleRequest(w http.ResponseWriter, r *http.Request) { req := requestPool.Get().(*Request) defer func() { req.Reset() requestPool.Put(req) }() // Use req... } // 2. RESPONSE WRITER POOLING var bufPool = sync.Pool{ New: func() interface{} { return bytes.NewBuffer(make([]byte, 0, 4096)) }, } func writeJSON(w http.ResponseWriter, data interface{}) { buf := bufPool.Get().(*bytes.Buffer) defer func() { buf.Reset() bufPool.Put(buf) }() json.NewEncoder(buf).Encode(data) w.Write(buf.Bytes()) } // 3. EFFICIENT JSON HANDLING // Use json.RawMessage for pass-through type APIResponse struct { Status string `json:"status"` Data json.RawMessage `json:"data"` // No parsing, just copy } // Use streaming for large responses func streamLargeResponse(w http.ResponseWriter, items <-chan Item) { enc := json.NewEncoder(w) w.Write([]byte("[")) first := true for item := range items { if !first { w.Write([]byte(",")) } enc.Encode(item) first = false } w.Write([]byte("]")) } // 4. EFFICIENT STRUCT LAYOUT // BAD - 24 bytes due to padding type BadStruct struct { A bool // 1 byte + 7 padding B int64 // 8 bytes C bool // 1 byte + 7 padding } // GOOD - 16 bytes type GoodStruct struct { B int64 // 8 bytes A bool // 1 byte C bool // 1 byte + 6 padding } // 5. INTERN COMMON STRINGS var stringInterner = struct { sync.RWMutex m map[string]string }{m: make(map[string]string)} func internString(s string) string { stringInterner.RLock() if interned, ok := stringInterner.m[s]; ok { stringInterner.RUnlock() return interned } stringInterner.RUnlock() stringInterner.Lock() defer stringInterner.Unlock() if interned, ok := stringInterner.m[s]; ok { return interned } stringInterner.m[s] = s return s } // 6. USE FASTHTTP FOR EXTREME PERFORMANCE import "github.com/valyala/fasthttp" func fastHandler(ctx *fasthttp.RequestCtx) { // fasthttp reuses memory aggressively // Request/response bodies are pooled ctx.SetBodyString("OK") }

Profiling Memory

go
// Run with memory profiling // go test -bench=. -memprofile=mem.out // go tool pprof mem.out // In code import "runtime/pprof" func startMemProfile() { f, _ := os.Create("mem.pprof") pprof.WriteHeapProfile(f) f.Close() } // HTTP endpoint for live profiling import _ "net/http/pprof" func main() { go http.ListenAndServe(":6060", nil) // Then: go tool pprof http://localhost:6060/debug/pprof/heap }

20. How Do You Design Backpressure in Go Services?

Backpressure Strategies

┌─────────────────────────────────────────────────────────────────────────────────┐ │ BACKPRESSURE STRATEGIES │ ├─────────────────────────────────────────────────────────────────────────────────┤ │ │ │ 1. BOUNDED QUEUES │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ taskQueue := make(chan Task, 1000) // Bounded! │ │ │ │ │ │ │ │ select { │ │ │ │ case taskQueue <- task: │ │ │ │ // Accepted │ │ │ │ default: │ │ │ │ // Queue full - reject or drop │ │ │ │ } │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ 2. SEMAPHORE (Limited Concurrency) │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ sem := make(chan struct{}, 100) // Max 100 concurrent │ │ │ │ │ │ │ │ sem <- struct{}{} // Acquire (blocks if full) │ │ │ │ defer func() { <-sem }() // Release │ │ │ │ process() │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ 3. RATE LIMITING │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ limiter := rate.NewLimiter(100, 10) // 100/sec, burst 10 │ │ │ │ │ │ │ │ if !limiter.Allow() { │ │ │ │ return ErrTooManyRequests │ │ │ │ } │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ │ 4. TIMEOUT │ │ ┌─────────────────────────────────────────────────────────────────────────┐ │ │ │ ctx, cancel := context.WithTimeout(ctx, 5*time.Second) │ │ │ │ defer cancel() │ │ │ │ │ │ │ │ select { │ │ │ │ case result := <-process(ctx): │ │ │ │ return result │ │ │ │ case <-ctx.Done(): │ │ │ │ return ErrTimeout │ │ │ │ } │ │ │ └─────────────────────────────────────────────────────────────────────────┘ │ │ │ └─────────────────────────────────────────────────────────────────────────────────┘

Complete Backpressure Implementation

go
type BackpressureService struct { // Concurrency limiter semaphore chan struct{} // Rate limiter rateLimiter *rate.Limiter // Queue with bounded size taskQueue chan Task // Circuit breaker circuitBreaker *CircuitBreaker // Metrics rejectedCounter int64 acceptedCounter int64 } func NewBackpressureService(maxConcurrency, queueSize int, rps float64) *BackpressureService { return &BackpressureService{ semaphore: make(chan struct{}, maxConcurrency), rateLimiter: rate.NewLimiter(rate.Limit(rps), int(rps/10)), taskQueue: make(chan Task, queueSize), circuitBreaker: NewCircuitBreaker(5, time.Minute), } } func (s *BackpressureService) Submit(ctx context.Context, task Task) error { // Layer 1: Check circuit breaker if s.circuitBreaker.IsOpen() { atomic.AddInt64(&s.rejectedCounter, 1) return ErrCircuitOpen } // Layer 2: Rate limiting if !s.rateLimiter.Allow() { atomic.AddInt64(&s.rejectedCounter, 1) return ErrRateLimited } // Layer 3: Queue depth check select { case s.taskQueue <- task: atomic.AddInt64(&s.acceptedCounter, 1) return nil default: atomic.AddInt64(&s.rejectedCounter, 1) return ErrQueueFull } } func (s *BackpressureService) Process(ctx context.Context) { for { select { case task := <-s.taskQueue: // Layer 4: Concurrency limiting select { case s.semaphore <- struct{}{}: go func(t Task) { defer func() { <-s.semaphore }() err := s.processWithTimeout(ctx, t) if err != nil { s.circuitBreaker.RecordFailure() } else { s.circuitBreaker.RecordSuccess() } }(task) case <-ctx.Done(): return } case <-ctx.Done(): return } } } func (s *BackpressureService) processWithTimeout(ctx context.Context, task Task) error { ctx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() done := make(chan error, 1) go func() { done <- task.Execute(ctx) }() select { case err := <-done: return err case <-ctx.Done(): return ctx.Err() } } // HTTP middleware for backpressure func BackpressureMiddleware(maxConcurrent int, timeout time.Duration) func(http.Handler) http.Handler { sem := make(chan struct{}, maxConcurrent) return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Try to acquire semaphore with timeout select { case sem <- struct{}{}: defer func() { <-sem }() // Add timeout to request context ctx, cancel := context.WithTimeout(r.Context(), timeout) defer cancel() next.ServeHTTP(w, r.WithContext(ctx)) case <-time.After(100 * time.Millisecond): // Can't acquire slot quickly - reject w.Header().Set("Retry-After", "1") http.Error(w, "Server busy", http.StatusServiceUnavailable) } }) } }

Adaptive Backpressure

go
type AdaptiveBackpressure struct { maxConcurrency int64 currentLimit int64 successCount int64 failureCount int64 adjustInterval time.Duration } func (a *AdaptiveBackpressure) Start() { ticker := time.NewTicker(a.adjustInterval) go func() { for range ticker.C { a.adjust() } }() } func (a *AdaptiveBackpressure) adjust() { successes := atomic.SwapInt64(&a.successCount, 0) failures := atomic.SwapInt64(&a.failureCount, 0) if failures == 0 && successes > 0 { // All successful - increase limit current := atomic.LoadInt64(&a.currentLimit) newLimit := min(current+10, a.maxConcurrency) atomic.StoreInt64(&a.currentLimit, newLimit) } else if failures > successes/10 { // >10% failure rate - decrease limit current := atomic.LoadInt64(&a.currentLimit) newLimit := max(current-20, 10) atomic.StoreInt64(&a.currentLimit, newLimit) } } func (a *AdaptiveBackpressure) Allow() bool { // Check current active against adaptive limit // ... implementation return true }

This completes Part 3 covering Go concurrency and performance patterns. The document continues with more topics in subsequent parts.
All Blogs
Tags:golangconcurrencyperformancesystem-designbackendgoroutinesprofilinginterview-prep