System Design Part 3: Golang Concurrency & Performance

A Complete Guide for Senior Engineers

13. How Would You Design a High-Throughput Worker Pool?

The Challenge

Process millions of tasks per hour with:

Bounded concurrency (don't exhaust resources)
Graceful shutdown (finish in-progress work)
Backpressure (don't accept more than we can handle)
Observability (know what's happening)

Basic Worker Pool

go
package workerpool

import (
    "context"
    "sync"
)

type Task interface {
    Process(ctx context.Context) error
}

type WorkerPool struct {
    workers    int
    taskQueue  chan Task
    wg         sync.WaitGroup
    ctx        context.Context
    cancel     context.CancelFunc
}

func NewWorkerPool(workers, queueSize int) *WorkerPool {
    ctx, cancel := context.WithCancel(context.Background())

    return &WorkerPool{
        workers:   workers,
        taskQueue: make(chan Task, queueSize),
        ctx:       ctx,
        cancel:    cancel,
    }
}

func (p *WorkerPool) Start() {
    for i := 0; i < p.workers; i++ {
        p.wg.Add(1)
        go p.worker(i)
    }
}

func (p *WorkerPool) worker(id int) {
    defer p.wg.Done()

    for {
        select {
        case task, ok := <-p.taskQueue:
            if !ok {
                return // Channel closed
            }
            task.Process(p.ctx)

        case <-p.ctx.Done():
            return
        }
    }
}

func (p *WorkerPool) Submit(task Task) error {
    select {
    case p.taskQueue <- task:
        return nil
    case <-p.ctx.Done():
        return context.Canceled
    default:
        return ErrQueueFull // Backpressure
    }
}

func (p *WorkerPool) Shutdown(timeout time.Duration) {
    // Stop accepting new tasks
    close(p.taskQueue)

    // Wait for workers with timeout
    done := make(chan struct{})
    go func() {
        p.wg.Wait()
        close(done)
    }()

    select {
    case <-done:
        // All workers finished
    case <-time.After(timeout):
        // Force shutdown
        p.cancel()
    }
}

Production-Grade Worker Pool

go
package workerpool

import (
    "context"
    "sync"
    "sync/atomic"
    "time"

    "github.com/prometheus/client_golang/prometheus"
)

type Config struct {
    Workers         int
    QueueSize       int
    MaxRetries      int
    RetryDelay      time.Duration
    ShutdownTimeout time.Duration
}

type Metrics struct {
    tasksProcessed  prometheus.Counter
    tasksFailed     prometheus.Counter
    tasksRetried    prometheus.Counter
    queueDepth      prometheus.Gauge
    processingTime  prometheus.Histogram
    activeWorkers   prometheus.Gauge
}

type WorkerPool struct {
    config     Config
    taskQueue  chan *TaskWrapper
    wg         sync.WaitGroup
    ctx        context.Context
    cancel     context.CancelFunc
    metrics    *Metrics
    isShutdown atomic.Bool
}

type TaskWrapper struct {
    Task       Task
    Retries    int
    CreatedAt  time.Time
    RetryAfter time.Time
}

func NewProductionWorkerPool(config Config) *WorkerPool {
    ctx, cancel := context.WithCancel(context.Background())

    metrics := &Metrics{
        tasksProcessed: prometheus.NewCounter(prometheus.CounterOpts{
            Name: "worker_pool_tasks_processed_total",
        }),
        tasksFailed: prometheus.NewCounter(prometheus.CounterOpts{
            Name: "worker_pool_tasks_failed_total",
        }),
        tasksRetried: prometheus.NewCounter(prometheus.CounterOpts{
            Name: "worker_pool_tasks_retried_total",
        }),
        queueDepth: prometheus.NewGauge(prometheus.GaugeOpts{
            Name: "worker_pool_queue_depth",
        }),
        processingTime: prometheus.NewHistogram(prometheus.HistogramOpts{
            Name:    "worker_pool_processing_seconds",
            Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
        }),
        activeWorkers: prometheus.NewGauge(prometheus.GaugeOpts{
            Name: "worker_pool_active_workers",
        }),
    }

    return &WorkerPool{
        config:    config,
        taskQueue: make(chan *TaskWrapper, config.QueueSize),
        ctx:       ctx,
        cancel:    cancel,
        metrics:   metrics,
    }
}

func (p *WorkerPool) Start() {
    // Start workers
    for i := 0; i < p.config.Workers; i++ {
        p.wg.Add(1)
        go p.worker(i)
    }

    // Start metrics updater
    go p.updateMetrics()
}

func (p *WorkerPool) worker(id int) {
    defer p.wg.Done()

    for {
        select {
        case wrapper, ok := <-p.taskQueue:
            if !ok {
                return
            }

            p.metrics.activeWorkers.Inc()
            p.processTask(wrapper)
            p.metrics.activeWorkers.Dec()

        case <-p.ctx.Done():
            return
        }
    }
}

func (p *WorkerPool) processTask(wrapper *TaskWrapper) {
    start := time.Now()

    // Wait for retry delay if this is a retry
    if wrapper.Retries > 0 && time.Now().Before(wrapper.RetryAfter) {
        time.Sleep(time.Until(wrapper.RetryAfter))
    }

    // Process with panic recovery
    err := p.safeProcess(wrapper.Task)

    duration := time.Since(start)
    p.metrics.processingTime.Observe(duration.Seconds())

    if err != nil {
        if wrapper.Retries < p.config.MaxRetries {
            // Retry with exponential backoff
            wrapper.Retries++
            wrapper.RetryAfter = time.Now().Add(p.config.RetryDelay * time.Duration(1<<wrapper.Retries))
            p.metrics.tasksRetried.Inc()

            // Requeue
            select {
            case p.taskQueue <- wrapper:
            default:
                // Queue full, drop with logging
                p.metrics.tasksFailed.Inc()
            }
        } else {
            p.metrics.tasksFailed.Inc()
        }
    } else {
        p.metrics.tasksProcessed.Inc()
    }
}

func (p *WorkerPool) safeProcess(task Task) (err error) {
    defer func() {
        if r := recover(); r != nil {
            err = fmt.Errorf("panic recovered: %v", r)
        }
    }()
    return task.Process(p.ctx)
}

func (p *WorkerPool) Submit(task Task) error {
    if p.isShutdown.Load() {
        return ErrPoolShutdown
    }

    wrapper := &TaskWrapper{
        Task:      task,
        Retries:   0,
        CreatedAt: time.Now(),
    }

    select {
    case p.taskQueue <- wrapper:
        return nil
    default:
        return ErrQueueFull
    }
}

func (p *WorkerPool) SubmitWait(ctx context.Context, task Task) error {
    if p.isShutdown.Load() {
        return ErrPoolShutdown
    }

    wrapper := &TaskWrapper{
        Task:      task,
        Retries:   0,
        CreatedAt: time.Now(),
    }

    select {
    case p.taskQueue <- wrapper:
        return nil
    case <-ctx.Done():
        return ctx.Err()
    }
}

func (p *WorkerPool) Shutdown() {
    p.isShutdown.Store(true)
    close(p.taskQueue)

    done := make(chan struct{})
    go func() {
        p.wg.Wait()
        close(done)
    }()

    select {
    case <-done:
        // Graceful shutdown
    case <-time.After(p.config.ShutdownTimeout):
        p.cancel() // Force shutdown
    }
}

func (p *WorkerPool) updateMetrics() {
    ticker := time.NewTicker(1 * time.Second)
    defer ticker.Stop()

    for {
        select {
        case <-ticker.C:
            p.metrics.queueDepth.Set(float64(len(p.taskQueue)))
        case <-p.ctx.Done():
            return
        }
    }
}

Dynamic Worker Pool (Auto-scaling)

go
type DynamicWorkerPool struct {
    minWorkers  int
    maxWorkers  int
    currentWorkers atomic.Int32
    taskQueue   chan *TaskWrapper
    workerChan  chan struct{} // Signal to add/remove workers
    ctx         context.Context
    cancel      context.CancelFunc
}

func (p *DynamicWorkerPool) autoScale() {
    ticker := time.NewTicker(5 * time.Second)
    defer ticker.Stop()

    for {
        select {
        case <-ticker.C:
            queueDepth := len(p.taskQueue)
            current := int(p.currentWorkers.Load())

            // Scale up if queue is filling
            if queueDepth > current*10 && current < p.maxWorkers {
                toAdd := min(p.maxWorkers-current, queueDepth/10)
                for i := 0; i < toAdd; i++ {
                    p.addWorker()
                }
            }

            // Scale down if queue is empty
            if queueDepth == 0 && current > p.minWorkers {
                toRemove := min(current-p.minWorkers, (current-p.minWorkers)/2+1)
                for i := 0; i < toRemove; i++ {
                    p.removeWorker()
                }
            }

        case <-p.ctx.Done():
            return
        }
    }
}

func (p *DynamicWorkerPool) addWorker() {
    p.currentWorkers.Add(1)
    go p.worker()
}

func (p *DynamicWorkerPool) removeWorker() {
    select {
    case p.workerChan <- struct{}{}:
        p.currentWorkers.Add(-1)
    default:
        // No idle workers to remove
    }
}

func (p *DynamicWorkerPool) worker() {
    for {
        select {
        case task, ok := <-p.taskQueue:
            if !ok {
                return
            }
            task.Task.Process(p.ctx)

        case <-p.workerChan:
            // Signal to stop this worker
            return

        case <-p.ctx.Done():
            return
        }
    }
}

14. How Do You Prevent Goroutine Leaks?

Common Leak Patterns

go
// LEAK 1: Unbounded goroutines
func processRequests(requests <-chan Request) {
    for req := range requests {
        go handle(req) // No limit! Can spawn millions
    }
}

// FIX: Use worker pool
func processRequestsFixed(requests <-chan Request) {
    sem := make(chan struct{}, 100) // Max 100 concurrent

    for req := range requests {
        sem <- struct{}{} // Acquire
        go func(r Request) {
            defer func() { <-sem }() // Release
            handle(r)
        }(req)
    }
}

// LEAK 2: Blocked channel send
func leakyProducer(ch chan<- int) {
    for i := 0; i < 1000; i++ {
        go func(n int) {
            ch <- n // Blocks forever if no receiver!
        }(i)
    }
}

// FIX: Use select with context
func fixedProducer(ctx context.Context, ch chan<- int) {
    for i := 0; i < 1000; i++ {
        go func(n int) {
            select {
            case ch <- n:
            case <-ctx.Done():
                return // Exit if context cancelled
            }
        }(i)
    }
}

// LEAK 3: Forgotten goroutines in HTTP handlers
func badHandler(w http.ResponseWriter, r *http.Request) {
    go expensiveOperation() // Fire and forget - what if it hangs?
    w.Write([]byte("OK"))
}

// FIX: Track goroutines
var activeGoroutines sync.WaitGroup

func goodHandler(w http.ResponseWriter, r *http.Request) {
    activeGoroutines.Add(1)
    go func() {
        defer activeGoroutines.Done()
        ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
        defer cancel()
        expensiveOperationWithContext(ctx)
    }()
    w.Write([]byte("OK"))
}

// On shutdown
func shutdown() {
    // Wait for all background goroutines
    activeGoroutines.Wait()
}

Detecting Goroutine Leaks

go
// In tests
func TestNoGoroutineLeaks(t *testing.T) {
    before := runtime.NumGoroutine()

    // Run your code
    doSomething()

    // Give time for cleanup
    time.Sleep(100 * time.Millisecond)

    after := runtime.NumGoroutine()

    if after > before {
        t.Errorf("Goroutine leak: before=%d, after=%d", before, after)
        // Print goroutine stacks for debugging
        buf := make([]byte, 1024*1024)
        n := runtime.Stack(buf, true)
        t.Logf("Goroutines:\n%s", buf[:n])
    }
}

// Using goleak in tests
import "go.uber.org/goleak"

func TestMain(m *testing.M) {
    goleak.VerifyTestMain(m)
}

// Or per-test
func TestSomething(t *testing.T) {
    defer goleak.VerifyNone(t)
    // your test code
}

Best Practices

go
// 1. Always use context for cancellation
func fetchData(ctx context.Context) ([]byte, error) {
    req, _ := http.NewRequestWithContext(ctx, "GET", url, nil)
    return http.DefaultClient.Do(req)
}

// 2. Use errgroup for managing multiple goroutines
func fetchAll(ctx context.Context, urls []string) ([][]byte, error) {
    g, ctx := errgroup.WithContext(ctx)
    results := make([][]byte, len(urls))

    for i, url := range urls {
        i, url := i, url // Capture
        g.Go(func() error {
            data, err := fetchData(ctx)
            if err != nil {
                return err
            }
            results[i] = data
            return nil
        })
    }

    if err := g.Wait(); err != nil {
        return nil, err
    }
    return results, nil
}

// 3. Timeout for all blocking operations
func safeRead(ch <-chan int) (int, bool) {
    select {
    case v, ok := <-ch:
        return v, ok
    case <-time.After(5 * time.Second):
        return 0, false
    }
}

// 4. Graceful shutdown with done channel
type Service struct {
    done chan struct{}
    wg   sync.WaitGroup
}

func (s *Service) Start() {
    s.done = make(chan struct{})

    s.wg.Add(1)
    go func() {
        defer s.wg.Done()
        for {
            select {
            case <-s.done:
                return
            default:
                s.doWork()
            }
        }
    }()
}

func (s *Service) Stop() {
    close(s.done)
    s.wg.Wait()
}

15. How Do You Debug a Deadlock in Production?

Identifying Deadlocks

go
// Enable deadlock detection (development only)
import "github.com/sasha-s/go-deadlock"

// Replace sync.Mutex with deadlock.Mutex
var mu deadlock.Mutex

// Production: Use pprof
import _ "net/http/pprof"

func main() {
    go func() {
        http.ListenAndServe("localhost:6060", nil)
    }()
    // ...
}

// Then: go tool pprof http://localhost:6060/debug/pprof/goroutine
// Or: curl http://localhost:6060/debug/pprof/goroutine?debug=2

Common Deadlock Patterns

go
// DEADLOCK 1: Lock ordering
type Account struct {
    mu      sync.Mutex
    balance int
}

func transfer(from, to *Account, amount int) {
    from.mu.Lock()   // Thread 1: locks A
    defer from.mu.Unlock()

    to.mu.Lock()     // Thread 1: waits for B
    defer to.mu.Unlock()
    // Meanwhile Thread 2: locks B, waits for A = DEADLOCK!

    from.balance -= amount
    to.balance += amount
}

// FIX: Consistent lock ordering
func transferFixed(from, to *Account, amount int) {
    // Always lock lower ID first
    first, second := from, to
    if from.ID > to.ID {
        first, second = to, from
    }

    first.mu.Lock()
    defer first.mu.Unlock()
    second.mu.Lock()
    defer second.mu.Unlock()

    from.balance -= amount
    to.balance += amount
}

// DEADLOCK 2: Nested locks
type Cache struct {
    mu    sync.Mutex
    data  map[string]string
}

func (c *Cache) Get(key string) string {
    c.mu.Lock()
    defer c.mu.Unlock()
    return c.data[key]
}

func (c *Cache) GetOrCompute(key string, compute func() string) string {
    c.mu.Lock()
    defer c.mu.Unlock()

    if v, ok := c.data[key]; ok {
        return v
    }

    // compute() might call c.Get() = DEADLOCK!
    v := compute()
    c.data[key] = v
    return v
}

// FIX: Don't hold lock during computation
func (c *Cache) GetOrComputeFixed(key string, compute func() string) string {
    c.mu.Lock()
    if v, ok := c.data[key]; ok {
        c.mu.Unlock()
        return v
    }
    c.mu.Unlock()

    // Compute without holding lock
    v := compute()

    c.mu.Lock()
    // Double-check (another goroutine might have computed)
    if existing, ok := c.data[key]; ok {
        c.mu.Unlock()
        return existing
    }
    c.data[key] = v
    c.mu.Unlock()
    return v
}

// DEADLOCK 3: Channel + Mutex
type Queue struct {
    mu    sync.Mutex
    items chan int
}

func (q *Queue) Push(item int) {
    q.mu.Lock()
    defer q.mu.Unlock()
    q.items <- item // Blocks if channel full!
}

func (q *Queue) Pop() int {
    q.mu.Lock()
    defer q.mu.Unlock()
    return <-q.items // Blocks if channel empty!
}
// If Push blocks waiting for Pop, but Pop needs the lock = DEADLOCK!

// FIX: Don't hold lock during channel operations
func (q *Queue) PushFixed(item int) {
    select {
    case q.items <- item:
    default:
        // Handle full queue
    }
}

Production Debugging Steps

1. DETECT:
   - Service stops responding
   - Goroutine count keeps increasing
   - CPU usage drops (goroutines waiting, not working)

2. COLLECT DATA:
   curl http://service:6060/debug/pprof/goroutine?debug=2 > goroutines.txt

3. ANALYZE:
   Look for patterns like:
   - Multiple goroutines waiting on same mutex
   - Goroutines blocked on channel operations
   - Circular wait patterns

4. COMMON SIGNS:
   goroutine 1 [semacquire, 5 minutes]:
   sync.runtime_SemacquireMutex(...)
       sync/mutex.go:72
   main.(*Account).transfer(...)
       main.go:25

5. FIX:
   - Add lock ordering
   - Use timeouts
   - Use context cancellation
   - Reduce lock scope

16. How Do You Design Cancellation Using Context Properly?

Context Best Practices

go
// Rule 1: Pass context as first parameter
func DoSomething(ctx context.Context, arg string) error {
    // ...
}

// Rule 2: Never store context in struct
type BadService struct {
    ctx context.Context // DON'T DO THIS
}

type GoodService struct {
    // No context field
}

func (s *GoodService) DoWork(ctx context.Context) error {
    // Context passed per-call
}

// Rule 3: Use context for cancellation, not values
// BAD: Using context to pass data
ctx = context.WithValue(ctx, "user_id", "123")

// GOOD: Use context for request-scoped values only
type contextKey string
const userIDKey contextKey = "user_id"
ctx = context.WithValue(ctx, userIDKey, "123")

// Rule 4: Always check context.Done() in long operations
func processItems(ctx context.Context, items []Item) error {
    for _, item := range items {
        select {
        case <-ctx.Done():
            return ctx.Err()
        default:
        }
        process(item)
    }
    return nil
}

// Rule 5: Propagate context to all sub-operations
func handleRequest(ctx context.Context, req *Request) error {
    // Create child context with timeout
    ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
    defer cancel()

    // Pass to all operations
    data, err := fetchData(ctx, req.ID)
    if err != nil {
        return err
    }

    result, err := processData(ctx, data)
    if err != nil {
        return err
    }

    return storeResult(ctx, result)
}

Proper Cancellation Patterns

go
// Pattern 1: Timeout for external calls
func fetchWithTimeout(ctx context.Context, url string) ([]byte, error) {
    ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
    defer cancel()

    req, _ := http.NewRequestWithContext(ctx, "GET", url, nil)
    resp, err := http.DefaultClient.Do(req)
    if err != nil {
        return nil, err
    }
    defer resp.Body.Close()

    return io.ReadAll(resp.Body)
}

// Pattern 2: Cancel on first error (errgroup)
func fetchAll(ctx context.Context, urls []string) error {
    g, ctx := errgroup.WithContext(ctx)

    for _, url := range urls {
        url := url
        g.Go(func() error {
            _, err := fetchWithTimeout(ctx, url)
            return err // First error cancels all others
        })
    }

    return g.Wait()
}

// Pattern 3: Manual cancellation
func longRunningTask(ctx context.Context) error {
    ctx, cancel := context.WithCancel(ctx)
    defer cancel()

    resultCh := make(chan Result, 1)
    errCh := make(chan error, 1)

    go func() {
        result, err := doExpensiveWork()
        if err != nil {
            errCh <- err
        } else {
            resultCh <- result
        }
    }()

    select {
    case result := <-resultCh:
        return processResult(result)
    case err := <-errCh:
        return err
    case <-ctx.Done():
        // Context cancelled - clean up
        cancel() // Cancel any child operations
        return ctx.Err()
    }
}

// Pattern 4: Graceful shutdown with context
type Server struct {
    httpServer *http.Server
}

func (s *Server) Start(ctx context.Context) error {
    // Start HTTP server
    go func() {
        if err := s.httpServer.ListenAndServe(); err != http.ErrServerClosed {
            log.Printf("HTTP server error: %v", err)
        }
    }()

    // Wait for shutdown signal
    <-ctx.Done()

    // Graceful shutdown with timeout
    shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
    defer cancel()

    return s.httpServer.Shutdown(shutdownCtx)
}

// Pattern 5: Context with deadline for SLAs
func handleWithSLA(w http.ResponseWriter, r *http.Request) {
    // Must respond within 100ms for SLA
    ctx, cancel := context.WithDeadline(r.Context(), time.Now().Add(100*time.Millisecond))
    defer cancel()

    result, err := process(ctx)
    if err == context.DeadlineExceeded {
        // SLA breach - return partial result or error
        http.Error(w, "Request timeout", http.StatusGatewayTimeout)
        return
    }

    json.NewEncoder(w).Encode(result)
}

17. When to Use sync.Mutex vs sync.RWMutex?

Decision Guide

┌─────────────────────────────────────────────────────────────────────────────────┐
│                    MUTEX vs RWMUTEX DECISION                                    │
├─────────────────────────────────────────────────────────────────────────────────┤
│                                                                                 │
│   USE sync.Mutex WHEN:                                                          │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   • Read:Write ratio is close to 1:1                                     │  │
│   │   • Operations are very fast (< 100ns)                                  │  │
│   │   • Critical section is small                                            │  │
│   │   • You want simpler code                                               │  │
│   │                                                                          │  │
│   │   WHY: RWMutex has more overhead due to additional bookkeeping          │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   USE sync.RWMutex WHEN:                                                        │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   • Read:Write ratio is high (10:1 or more)                             │  │
│   │   • Read operations are slow (> 1μs)                                    │  │
│   │   • Many concurrent readers                                              │  │
│   │   • Writes are infrequent                                                │  │
│   │                                                                          │  │
│   │   WHY: Allows multiple readers, improving throughput                    │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   NEITHER - USE ATOMIC WHEN:                                                    │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   • Single value (counter, flag)                                        │  │
│   │   • Simple operations (increment, swap)                                 │  │
│   │   • Maximum performance needed                                          │  │
│   │                                                                          │  │
│   │   WHY: No locking overhead                                              │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   NEITHER - USE CHANNELS WHEN:                                                  │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   • Communicating between goroutines                                    │  │
│   │   • Signaling events                                                     │  │
│   │   • Pipeline processing                                                  │  │
│   │                                                                          │  │
│   │   WHY: "Don't communicate by sharing memory; share memory by            │  │
│   │         communicating" - Go proverb                                     │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
└─────────────────────────────────────────────────────────────────────────────────┘

Benchmarks

go
// Benchmark to decide
func BenchmarkMutex(b *testing.B) {
    var mu sync.Mutex
    data := make(map[string]int)
    data["key"] = 0

    b.RunParallel(func(pb *testing.PB) {
        for pb.Next() {
            mu.Lock()
            _ = data["key"] // Read
            mu.Unlock()
        }
    })
}

func BenchmarkRWMutex(b *testing.B) {
    var mu sync.RWMutex
    data := make(map[string]int)
    data["key"] = 0

    b.RunParallel(func(pb *testing.PB) {
        for pb.Next() {
            mu.RLock()
            _ = data["key"] // Read
            mu.RUnlock()
        }
    })
}

// Results (8 cores, read-heavy workload):
// BenchmarkMutex-8      50000000    28.5 ns/op
// BenchmarkRWMutex-8   200000000     8.2 ns/op

// Results (8 cores, write-heavy workload):
// BenchmarkMutex-8      50000000    30.1 ns/op
// BenchmarkRWMutex-8    30000000    45.8 ns/op  <- SLOWER!

Practical Examples

go
// Example 1: Cache (read-heavy) - USE RWMutex
type Cache struct {
    mu   sync.RWMutex
    data map[string]interface{}
}

func (c *Cache) Get(key string) (interface{}, bool) {
    c.mu.RLock()
    defer c.mu.RUnlock()
    v, ok := c.data[key]
    return v, ok
}

func (c *Cache) Set(key string, value interface{}) {
    c.mu.Lock()
    defer c.mu.Unlock()
    c.data[key] = value
}

// Example 2: Counter (write-heavy) - USE Atomic
type Counter struct {
    value int64
}

func (c *Counter) Increment() {
    atomic.AddInt64(&c.value, 1)
}

func (c *Counter) Get() int64 {
    return atomic.LoadInt64(&c.value)
}

// Example 3: Request limiter (mixed) - USE Mutex
type RateLimiter struct {
    mu       sync.Mutex
    requests int
    window   time.Time
}

func (r *RateLimiter) Allow() bool {
    r.mu.Lock()
    defer r.mu.Unlock()

    now := time.Now()
    if now.Sub(r.window) > time.Minute {
        r.requests = 0
        r.window = now
    }

    if r.requests >= 100 {
        return false
    }

    r.requests++
    return true
}

18. How Do You Reduce GC Pressure in Go?

GC Pressure Sources

┌─────────────────────────────────────────────────────────────────────────────────┐
│                    GC PRESSURE SOURCES                                          │
├─────────────────────────────────────────────────────────────────────────────────┤
│                                                                                 │
│   1. ALLOCATIONS IN HOT PATHS                                                   │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   for i := 0; i < 1000000; i++ {                                         │  │
│   │       data := make([]byte, 1024)  // 1M allocations!                    │  │
│   │       process(data)                                                      │  │
│   │   }                                                                      │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   2. STRING CONCATENATION                                                       │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   var s string                                                           │  │
│   │   for _, item := range items {                                          │  │
│   │       s += item  // New allocation each iteration!                      │  │
│   │   }                                                                      │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   3. INTERFACE BOXING                                                           │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   func process(v interface{}) { ... }                                   │  │
│   │   process(42)  // Allocates to box the int                              │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   4. CLOSURES CAPTURING VARIABLES                                              │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   for _, item := range items {                                          │  │
│   │       go func() {                                                        │  │
│   │           process(item)  // Captures item, escapes to heap              │  │
│   │       }()                                                                │  │
│   │   }                                                                      │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
└─────────────────────────────────────────────────────────────────────────────────┘

Optimization Techniques

go
// 1. OBJECT POOLING
var bufferPool = sync.Pool{
    New: func() interface{} {
        return make([]byte, 1024)
    },
}

func processWithPool() {
    buf := bufferPool.Get().([]byte)
    defer bufferPool.Put(buf)

    // Use buf...
    // Reset before returning to pool
    for i := range buf {
        buf[i] = 0
    }
}

// 2. PRE-ALLOCATE SLICES
// BAD
func collectBad(items []Item) []Result {
    var results []Result
    for _, item := range items {
        results = append(results, process(item)) // Multiple reallocations
    }
    return results
}

// GOOD
func collectGood(items []Item) []Result {
    results := make([]Result, 0, len(items)) // Pre-allocate capacity
    for _, item := range items {
        results = append(results, process(item))
    }
    return results
}

// 3. STRINGS.BUILDER FOR CONCATENATION
// BAD
func concatBad(items []string) string {
    var result string
    for _, item := range items {
        result += item // O(n²) allocations
    }
    return result
}

// GOOD
func concatGood(items []string) string {
    var builder strings.Builder
    builder.Grow(estimatedSize(items)) // Pre-allocate
    for _, item := range items {
        builder.WriteString(item)
    }
    return builder.String()
}

// 4. AVOID INTERFACE{} IN HOT PATHS
// BAD
func sumBad(values []interface{}) int64 {
    var sum int64
    for _, v := range values {
        sum += v.(int64) // Boxing/unboxing
    }
    return sum
}

// GOOD
func sumGood(values []int64) int64 {
    var sum int64
    for _, v := range values {
        sum += v
    }
    return sum
}

// 5. STRUCT EMBEDDING INSTEAD OF POINTERS
// More allocations (2 objects)
type BadStruct struct {
    Data *InnerData
}

// Single allocation
type GoodStruct struct {
    Data InnerData // Embedded, not pointer
}

// 6. ARRAY INSTEAD OF SLICE FOR FIXED SIZE
type BadRequest struct {
    Headers []string // Slice = pointer + len + cap
}

type GoodRequest struct {
    Headers [8]string // Fixed array, no allocation
}

// 7. ESCAPE ANALYSIS AWARENESS
// BAD - pointer escapes
func createBad() *Data {
    d := Data{} // Allocated on heap
    return &d
}

// GOOD - value, no escape
func createGood() Data {
    return Data{} // Stack allocated
}

// Usage
data := createGood() // Stack allocation

Monitoring GC

go
// Print GC stats
func printGCStats() {
    var m runtime.MemStats
    runtime.ReadMemStats(&m)

    fmt.Printf("Alloc = %v MiB", m.Alloc/1024/1024)
    fmt.Printf("TotalAlloc = %v MiB", m.TotalAlloc/1024/1024)
    fmt.Printf("Sys = %v MiB", m.Sys/1024/1024)
    fmt.Printf("NumGC = %v", m.NumGC)
    fmt.Printf("PauseTotal = %v ms", m.PauseTotalNs/1000000)
}

// Set GOGC for tuning
// GOGC=100 (default): GC when heap doubles
// GOGC=200: GC when heap triples (less frequent, more memory)
// GOGC=50: GC when heap grows 50% (more frequent, less memory)

// Runtime control
debug.SetGCPercent(200) // Same as GOGC=200

19. How Do You Optimize Memory Footprint for High-QPS APIs?

Memory Optimization Strategies

go
// 1. REQUEST BODY POOLING
var requestPool = sync.Pool{
    New: func() interface{} {
        return &Request{
            Headers: make(map[string]string, 10),
            Body:    make([]byte, 0, 4096),
        }
    },
}

func handleRequest(w http.ResponseWriter, r *http.Request) {
    req := requestPool.Get().(*Request)
    defer func() {
        req.Reset()
        requestPool.Put(req)
    }()

    // Use req...
}

// 2. RESPONSE WRITER POOLING
var bufPool = sync.Pool{
    New: func() interface{} {
        return bytes.NewBuffer(make([]byte, 0, 4096))
    },
}

func writeJSON(w http.ResponseWriter, data interface{}) {
    buf := bufPool.Get().(*bytes.Buffer)
    defer func() {
        buf.Reset()
        bufPool.Put(buf)
    }()

    json.NewEncoder(buf).Encode(data)
    w.Write(buf.Bytes())
}

// 3. EFFICIENT JSON HANDLING
// Use json.RawMessage for pass-through
type APIResponse struct {
    Status string          `json:"status"`
    Data   json.RawMessage `json:"data"` // No parsing, just copy
}

// Use streaming for large responses
func streamLargeResponse(w http.ResponseWriter, items <-chan Item) {
    enc := json.NewEncoder(w)
    w.Write([]byte("["))
    first := true
    for item := range items {
        if !first {
            w.Write([]byte(","))
        }
        enc.Encode(item)
        first = false
    }
    w.Write([]byte("]"))
}

// 4. EFFICIENT STRUCT LAYOUT
// BAD - 24 bytes due to padding
type BadStruct struct {
    A bool   // 1 byte + 7 padding
    B int64  // 8 bytes
    C bool   // 1 byte + 7 padding
}

// GOOD - 16 bytes
type GoodStruct struct {
    B int64 // 8 bytes
    A bool  // 1 byte
    C bool  // 1 byte + 6 padding
}

// 5. INTERN COMMON STRINGS
var stringInterner = struct {
    sync.RWMutex
    m map[string]string
}{m: make(map[string]string)}

func internString(s string) string {
    stringInterner.RLock()
    if interned, ok := stringInterner.m[s]; ok {
        stringInterner.RUnlock()
        return interned
    }
    stringInterner.RUnlock()

    stringInterner.Lock()
    defer stringInterner.Unlock()

    if interned, ok := stringInterner.m[s]; ok {
        return interned
    }

    stringInterner.m[s] = s
    return s
}

// 6. USE FASTHTTP FOR EXTREME PERFORMANCE
import "github.com/valyala/fasthttp"

func fastHandler(ctx *fasthttp.RequestCtx) {
    // fasthttp reuses memory aggressively
    // Request/response bodies are pooled
    ctx.SetBodyString("OK")
}

Profiling Memory

go
// Run with memory profiling
// go test -bench=. -memprofile=mem.out
// go tool pprof mem.out

// In code
import "runtime/pprof"

func startMemProfile() {
    f, _ := os.Create("mem.pprof")
    pprof.WriteHeapProfile(f)
    f.Close()
}

// HTTP endpoint for live profiling
import _ "net/http/pprof"

func main() {
    go http.ListenAndServe(":6060", nil)
    // Then: go tool pprof http://localhost:6060/debug/pprof/heap
}

20. How Do You Design Backpressure in Go Services?

Backpressure Strategies

┌─────────────────────────────────────────────────────────────────────────────────┐
│                    BACKPRESSURE STRATEGIES                                      │
├─────────────────────────────────────────────────────────────────────────────────┤
│                                                                                 │
│   1. BOUNDED QUEUES                                                             │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   taskQueue := make(chan Task, 1000) // Bounded!                        │  │
│   │                                                                          │  │
│   │   select {                                                               │  │
│   │   case taskQueue <- task:                                               │  │
│   │       // Accepted                                                        │  │
│   │   default:                                                               │  │
│   │       // Queue full - reject or drop                                    │  │
│   │   }                                                                      │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   2. SEMAPHORE (Limited Concurrency)                                           │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   sem := make(chan struct{}, 100) // Max 100 concurrent                 │  │
│   │                                                                          │  │
│   │   sem <- struct{}{}  // Acquire (blocks if full)                        │  │
│   │   defer func() { <-sem }() // Release                                   │  │
│   │   process()                                                              │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   3. RATE LIMITING                                                              │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   limiter := rate.NewLimiter(100, 10) // 100/sec, burst 10             │  │
│   │                                                                          │  │
│   │   if !limiter.Allow() {                                                 │  │
│   │       return ErrTooManyRequests                                         │  │
│   │   }                                                                      │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
│   4. TIMEOUT                                                                    │
│   ┌─────────────────────────────────────────────────────────────────────────┐  │
│   │   ctx, cancel := context.WithTimeout(ctx, 5*time.Second)               │  │
│   │   defer cancel()                                                         │  │
│   │                                                                          │  │
│   │   select {                                                               │  │
│   │   case result := <-process(ctx):                                        │  │
│   │       return result                                                      │  │
│   │   case <-ctx.Done():                                                    │  │
│   │       return ErrTimeout                                                 │  │
│   │   }                                                                      │  │
│   └─────────────────────────────────────────────────────────────────────────┘  │
│                                                                                 │
└─────────────────────────────────────────────────────────────────────────────────┘

Complete Backpressure Implementation

go
type BackpressureService struct {
    // Concurrency limiter
    semaphore chan struct{}

    // Rate limiter
    rateLimiter *rate.Limiter

    // Queue with bounded size
    taskQueue chan Task

    // Circuit breaker
    circuitBreaker *CircuitBreaker

    // Metrics
    rejectedCounter int64
    acceptedCounter int64
}

func NewBackpressureService(maxConcurrency, queueSize int, rps float64) *BackpressureService {
    return &BackpressureService{
        semaphore:      make(chan struct{}, maxConcurrency),
        rateLimiter:    rate.NewLimiter(rate.Limit(rps), int(rps/10)),
        taskQueue:      make(chan Task, queueSize),
        circuitBreaker: NewCircuitBreaker(5, time.Minute),
    }
}

func (s *BackpressureService) Submit(ctx context.Context, task Task) error {
    // Layer 1: Check circuit breaker
    if s.circuitBreaker.IsOpen() {
        atomic.AddInt64(&s.rejectedCounter, 1)
        return ErrCircuitOpen
    }

    // Layer 2: Rate limiting
    if !s.rateLimiter.Allow() {
        atomic.AddInt64(&s.rejectedCounter, 1)
        return ErrRateLimited
    }

    // Layer 3: Queue depth check
    select {
    case s.taskQueue <- task:
        atomic.AddInt64(&s.acceptedCounter, 1)
        return nil
    default:
        atomic.AddInt64(&s.rejectedCounter, 1)
        return ErrQueueFull
    }
}

func (s *BackpressureService) Process(ctx context.Context) {
    for {
        select {
        case task := <-s.taskQueue:
            // Layer 4: Concurrency limiting
            select {
            case s.semaphore <- struct{}{}:
                go func(t Task) {
                    defer func() { <-s.semaphore }()

                    err := s.processWithTimeout(ctx, t)
                    if err != nil {
                        s.circuitBreaker.RecordFailure()
                    } else {
                        s.circuitBreaker.RecordSuccess()
                    }
                }(task)

            case <-ctx.Done():
                return
            }

        case <-ctx.Done():
            return
        }
    }
}

func (s *BackpressureService) processWithTimeout(ctx context.Context, task Task) error {
    ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
    defer cancel()

    done := make(chan error, 1)
    go func() {
        done <- task.Execute(ctx)
    }()

    select {
    case err := <-done:
        return err
    case <-ctx.Done():
        return ctx.Err()
    }
}

// HTTP middleware for backpressure
func BackpressureMiddleware(maxConcurrent int, timeout time.Duration) func(http.Handler) http.Handler {
    sem := make(chan struct{}, maxConcurrent)

    return func(next http.Handler) http.Handler {
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
            // Try to acquire semaphore with timeout
            select {
            case sem <- struct{}{}:
                defer func() { <-sem }()

                // Add timeout to request context
                ctx, cancel := context.WithTimeout(r.Context(), timeout)
                defer cancel()

                next.ServeHTTP(w, r.WithContext(ctx))

            case <-time.After(100 * time.Millisecond):
                // Can't acquire slot quickly - reject
                w.Header().Set("Retry-After", "1")
                http.Error(w, "Server busy", http.StatusServiceUnavailable)
            }
        })
    }
}

Adaptive Backpressure

go
type AdaptiveBackpressure struct {
    maxConcurrency int64
    currentLimit   int64
    successCount   int64
    failureCount   int64
    adjustInterval time.Duration
}

func (a *AdaptiveBackpressure) Start() {
    ticker := time.NewTicker(a.adjustInterval)
    go func() {
        for range ticker.C {
            a.adjust()
        }
    }()
}

func (a *AdaptiveBackpressure) adjust() {
    successes := atomic.SwapInt64(&a.successCount, 0)
    failures := atomic.SwapInt64(&a.failureCount, 0)

    if failures == 0 && successes > 0 {
        // All successful - increase limit
        current := atomic.LoadInt64(&a.currentLimit)
        newLimit := min(current+10, a.maxConcurrency)
        atomic.StoreInt64(&a.currentLimit, newLimit)
    } else if failures > successes/10 {
        // >10% failure rate - decrease limit
        current := atomic.LoadInt64(&a.currentLimit)
        newLimit := max(current-20, 10)
        atomic.StoreInt64(&a.currentLimit, newLimit)
    }
}

func (a *AdaptiveBackpressure) Allow() bool {
    // Check current active against adaptive limit
    // ... implementation
    return true
}

This completes Part 3 covering Go concurrency and performance patterns. The document continues with more topics in subsequent parts.