Part 27: Load Balancing Strategies - Distributing Traffic Effectively

"Load balancing is the art of distributing work so that no single component bears more than it can handle, while ensuring the collective serves the whole efficiently."

Welcome to Part 27 of our distributed systems course! After mastering rate limiting, we now explore load balancing - the critical technique for distributing traffic across service instances.

Why Load Balancing?

Load balancing provides:

Scalability: Distribute work across multiple instances
Availability: Route around failed instances
Performance: Direct traffic to the most capable servers
Flexibility: Enable zero-downtime deployments

Basic Load Balancer Interface

go
package loadbalancer

import (
    "context"
    "errors"
    "sync"
    "sync/atomic"
    "time"
)

var (
    ErrNoBackends = errors.New("no backends available")
)

// Backend represents a server instance
type Backend struct {
    ID       string
    Address  string
    Weight   int
    Healthy  bool
    Metadata map[string]string

    // Stats
    ActiveConns  int64
    TotalConns   int64
    AvgLatency   time.Duration
    LastUsed     time.Time

    mu sync.RWMutex
}

// LoadBalancer selects backends for requests
type LoadBalancer interface {
    // Select chooses a backend for the request
    Select(ctx context.Context) (*Backend, error)

    // Release signals that a request to a backend is complete
    Release(backend *Backend, latency time.Duration, err error)

    // AddBackend adds a new backend
    AddBackend(backend *Backend)

    // RemoveBackend removes a backend
    RemoveBackend(id string)

    // GetBackends returns all backends
    GetBackends() []*Backend

    // MarkHealthy marks a backend as healthy/unhealthy
    MarkHealthy(id string, healthy bool)
}

// BaseLoadBalancer provides common functionality
type BaseLoadBalancer struct {
    backends []*Backend
    mu       sync.RWMutex
}

func (b *BaseLoadBalancer) AddBackend(backend *Backend) {
    b.mu.Lock()
    defer b.mu.Unlock()

    // Check if already exists
    for i, existing := range b.backends {
        if existing.ID == backend.ID {
            b.backends[i] = backend
            return
        }
    }

    b.backends = append(b.backends, backend)
}

func (b *BaseLoadBalancer) RemoveBackend(id string) {
    b.mu.Lock()
    defer b.mu.Unlock()

    for i, backend := range b.backends {
        if backend.ID == id {
            b.backends = append(b.backends[:i], b.backends[i+1:]...)
            return
        }
    }
}

func (b *BaseLoadBalancer) GetBackends() []*Backend {
    b.mu.RLock()
    defer b.mu.RUnlock()

    result := make([]*Backend, len(b.backends))
    copy(result, b.backends)
    return result
}

func (b *BaseLoadBalancer) MarkHealthy(id string, healthy bool) {
    b.mu.Lock()
    defer b.mu.Unlock()

    for _, backend := range b.backends {
        if backend.ID == id {
            backend.mu.Lock()
            backend.Healthy = healthy
            backend.mu.Unlock()
            return
        }
    }
}

func (b *BaseLoadBalancer) getHealthyBackends() []*Backend {
    var healthy []*Backend
    for _, backend := range b.backends {
        backend.mu.RLock()
        if backend.Healthy {
            healthy = append(healthy, backend)
        }
        backend.mu.RUnlock()
    }
    return healthy
}

Round Robin

Simple, equal distribution:

go
// RoundRobinBalancer distributes requests evenly
type RoundRobinBalancer struct {
    BaseLoadBalancer
    counter uint64
}

// NewRoundRobin creates a round-robin balancer
func NewRoundRobin() *RoundRobinBalancer {
    return &RoundRobinBalancer{}
}

func (rr *RoundRobinBalancer) Select(ctx context.Context) (*Backend, error) {
    rr.mu.RLock()
    healthy := rr.getHealthyBackends()
    rr.mu.RUnlock()

    if len(healthy) == 0 {
        return nil, ErrNoBackends
    }

    // Atomic increment and select
    idx := atomic.AddUint64(&rr.counter, 1) % uint64(len(healthy))
    backend := healthy[idx]

    // Update stats
    backend.mu.Lock()
    backend.ActiveConns++
    backend.TotalConns++
    backend.LastUsed = time.Now()
    backend.mu.Unlock()

    return backend, nil
}

func (rr *RoundRobinBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    defer backend.mu.Unlock()

    backend.ActiveConns--

    // Update average latency (exponential moving average)
    if backend.AvgLatency == 0 {
        backend.AvgLatency = latency
    } else {
        backend.AvgLatency = (backend.AvgLatency*7 + latency*3) / 10
    }
}

Weighted Round Robin

Respect capacity differences:

go
// WeightedRoundRobinBalancer uses weights for distribution
type WeightedRoundRobinBalancer struct {
    BaseLoadBalancer

    currentWeight int
    maxWeight     int
    gcd           int
    currentIndex  int
    mu            sync.Mutex
}

// NewWeightedRoundRobin creates a weighted round-robin balancer
func NewWeightedRoundRobin() *WeightedRoundRobinBalancer {
    return &WeightedRoundRobinBalancer{
        currentIndex: -1,
    }
}

func (wrr *WeightedRoundRobinBalancer) AddBackend(backend *Backend) {
    wrr.BaseLoadBalancer.AddBackend(backend)
    wrr.recalculate()
}

func (wrr *WeightedRoundRobinBalancer) recalculate() {
    wrr.mu.Lock()
    defer wrr.mu.Unlock()

    backends := wrr.GetBackends()
    if len(backends) == 0 {
        return
    }

    // Calculate max weight and GCD
    wrr.maxWeight = 0
    wrr.gcd = backends[0].Weight

    for _, b := range backends {
        if b.Weight > wrr.maxWeight {
            wrr.maxWeight = b.Weight
        }
        wrr.gcd = gcd(wrr.gcd, b.Weight)
    }
}

func gcd(a, b int) int {
    for b != 0 {
        a, b = b, a%b
    }
    return a
}

func (wrr *WeightedRoundRobinBalancer) Select(ctx context.Context) (*Backend, error) {
    wrr.mu.Lock()
    defer wrr.mu.Unlock()

    healthy := wrr.getHealthyBackends()
    if len(healthy) == 0 {
        return nil, ErrNoBackends
    }

    // Weighted round-robin selection
    for {
        wrr.currentIndex = (wrr.currentIndex + 1) % len(healthy)

        if wrr.currentIndex == 0 {
            wrr.currentWeight -= wrr.gcd
            if wrr.currentWeight <= 0 {
                wrr.currentWeight = wrr.maxWeight
            }
        }

        backend := healthy[wrr.currentIndex]
        if backend.Weight >= wrr.currentWeight {
            backend.mu.Lock()
            backend.ActiveConns++
            backend.TotalConns++
            backend.LastUsed = time.Now()
            backend.mu.Unlock()

            return backend, nil
        }
    }
}

func (wrr *WeightedRoundRobinBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    defer backend.mu.Unlock()
    backend.ActiveConns--
}

Least Connections

Route to least busy server:

go
// LeastConnectionsBalancer routes to backend with fewest connections
type LeastConnectionsBalancer struct {
    BaseLoadBalancer
}

// NewLeastConnections creates a least-connections balancer
func NewLeastConnections() *LeastConnectionsBalancer {
    return &LeastConnectionsBalancer{}
}

func (lc *LeastConnectionsBalancer) Select(ctx context.Context) (*Backend, error) {
    lc.mu.RLock()
    healthy := lc.getHealthyBackends()
    lc.mu.RUnlock()

    if len(healthy) == 0 {
        return nil, ErrNoBackends
    }

    // Find backend with least connections
    var selected *Backend
    minConns := int64(^uint64(0) >> 1) // Max int64

    for _, backend := range healthy {
        backend.mu.RLock()
        conns := backend.ActiveConns
        backend.mu.RUnlock()

        if conns < minConns {
            minConns = conns
            selected = backend
        }
    }

    if selected == nil {
        return nil, ErrNoBackends
    }

    selected.mu.Lock()
    selected.ActiveConns++
    selected.TotalConns++
    selected.LastUsed = time.Now()
    selected.mu.Unlock()

    return selected, nil
}

func (lc *LeastConnectionsBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    defer backend.mu.Unlock()
    backend.ActiveConns--
}

// WeightedLeastConnections considers both weight and connections
type WeightedLeastConnectionsBalancer struct {
    BaseLoadBalancer
}

func NewWeightedLeastConnections() *WeightedLeastConnectionsBalancer {
    return &WeightedLeastConnectionsBalancer{}
}

func (wlc *WeightedLeastConnectionsBalancer) Select(ctx context.Context) (*Backend, error) {
    wlc.mu.RLock()
    healthy := wlc.getHealthyBackends()
    wlc.mu.RUnlock()

    if len(healthy) == 0 {
        return nil, ErrNoBackends
    }

    var selected *Backend
    minScore := float64(^uint64(0) >> 1)

    for _, backend := range healthy {
        backend.mu.RLock()
        conns := backend.ActiveConns
        weight := backend.Weight
        backend.mu.RUnlock()

        // Score = connections / weight (lower is better)
        score := float64(conns) / float64(weight)

        if score < minScore {
            minScore = score
            selected = backend
        }
    }

    if selected == nil {
        return nil, ErrNoBackends
    }

    selected.mu.Lock()
    selected.ActiveConns++
    selected.TotalConns++
    selected.LastUsed = time.Now()
    selected.mu.Unlock()

    return selected, nil
}

func (wlc *WeightedLeastConnectionsBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    defer backend.mu.Unlock()
    backend.ActiveConns--
}

Least Response Time

Route based on latency:

go
// LeastResponseTimeBalancer routes to fastest backend
type LeastResponseTimeBalancer struct {
    BaseLoadBalancer
}

// NewLeastResponseTime creates a least-response-time balancer
func NewLeastResponseTime() *LeastResponseTimeBalancer {
    return &LeastResponseTimeBalancer{}
}

func (lrt *LeastResponseTimeBalancer) Select(ctx context.Context) (*Backend, error) {
    lrt.mu.RLock()
    healthy := lrt.getHealthyBackends()
    lrt.mu.RUnlock()

    if len(healthy) == 0 {
        return nil, ErrNoBackends
    }

    var selected *Backend
    minTime := time.Duration(^uint64(0) >> 1)

    for _, backend := range healthy {
        backend.mu.RLock()
        avgLatency := backend.AvgLatency
        backend.mu.RUnlock()

        // New backends get priority (0 latency)
        if avgLatency == 0 {
            selected = backend
            break
        }

        if avgLatency < minTime {
            minTime = avgLatency
            selected = backend
        }
    }

    if selected == nil {
        return nil, ErrNoBackends
    }

    selected.mu.Lock()
    selected.ActiveConns++
    selected.TotalConns++
    selected.LastUsed = time.Now()
    selected.mu.Unlock()

    return selected, nil
}

func (lrt *LeastResponseTimeBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    defer backend.mu.Unlock()

    backend.ActiveConns--

    // Update exponential moving average
    if backend.AvgLatency == 0 {
        backend.AvgLatency = latency
    } else {
        // Heavier weight on recent measurements
        backend.AvgLatency = (backend.AvgLatency*6 + latency*4) / 10
    }
}

Consistent Hashing

For session affinity and caching:

go
import (
    "hash/crc32"
    "sort"
)

// ConsistentHashBalancer uses consistent hashing
type ConsistentHashBalancer struct {
    BaseLoadBalancer

    ring        []uint32
    ringMap     map[uint32]*Backend
    replicas    int
    mu          sync.RWMutex
}

// NewConsistentHash creates a consistent hash balancer
func NewConsistentHash(replicas int) *ConsistentHashBalancer {
    return &ConsistentHashBalancer{
        ringMap:  make(map[uint32]*Backend),
        replicas: replicas,
    }
}

func (ch *ConsistentHashBalancer) AddBackend(backend *Backend) {
    ch.BaseLoadBalancer.AddBackend(backend)
    ch.rebuildRing()
}

func (ch *ConsistentHashBalancer) RemoveBackend(id string) {
    ch.BaseLoadBalancer.RemoveBackend(id)
    ch.rebuildRing()
}

func (ch *ConsistentHashBalancer) rebuildRing() {
    ch.mu.Lock()
    defer ch.mu.Unlock()

    ch.ring = nil
    ch.ringMap = make(map[uint32]*Backend)

    backends := ch.GetBackends()

    for _, backend := range backends {
        for i := 0; i < ch.replicas; i++ {
            key := fmt.Sprintf("%s-%d", backend.ID, i)
            hash := ch.hash(key)
            ch.ring = append(ch.ring, hash)
            ch.ringMap[hash] = backend
        }
    }

    sort.Slice(ch.ring, func(i, j int) bool {
        return ch.ring[i] < ch.ring[j]
    })
}

func (ch *ConsistentHashBalancer) hash(key string) uint32 {
    return crc32.ChecksumIEEE([]byte(key))
}

// SelectWithKey selects a backend based on a key
func (ch *ConsistentHashBalancer) SelectWithKey(ctx context.Context, key string) (*Backend, error) {
    ch.mu.RLock()
    defer ch.mu.RUnlock()

    if len(ch.ring) == 0 {
        return nil, ErrNoBackends
    }

    hash := ch.hash(key)

    // Binary search for the first hash >= key hash
    idx := sort.Search(len(ch.ring), func(i int) bool {
        return ch.ring[i] >= hash
    })

    if idx >= len(ch.ring) {
        idx = 0
    }

    backend := ch.ringMap[ch.ring[idx]]

    // Skip unhealthy backends
    backend.mu.RLock()
    healthy := backend.Healthy
    backend.mu.RUnlock()

    if !healthy {
        // Find next healthy backend
        for i := 1; i < len(ch.ring); i++ {
            nextIdx := (idx + i) % len(ch.ring)
            nextBackend := ch.ringMap[ch.ring[nextIdx]]

            nextBackend.mu.RLock()
            nextHealthy := nextBackend.Healthy
            nextBackend.mu.RUnlock()

            if nextHealthy {
                backend = nextBackend
                break
            }
        }
    }

    backend.mu.Lock()
    backend.ActiveConns++
    backend.TotalConns++
    backend.LastUsed = time.Now()
    backend.mu.Unlock()

    return backend, nil
}

func (ch *ConsistentHashBalancer) Select(ctx context.Context) (*Backend, error) {
    // Default key is random
    return ch.SelectWithKey(ctx, fmt.Sprintf("%d", time.Now().UnixNano()))
}

func (ch *ConsistentHashBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    defer backend.mu.Unlock()
    backend.ActiveConns--
}

Power of Two Choices

Probabilistic load balancing:

go
import (
    "math/rand"
)

// P2CBalancer implements power of two choices
type P2CBalancer struct {
    BaseLoadBalancer
    rand *rand.Rand
    mu   sync.Mutex
}

// NewP2C creates a P2C balancer
func NewP2C() *P2CBalancer {
    return &P2CBalancer{
        rand: rand.New(rand.NewSource(time.Now().UnixNano())),
    }
}

func (p *P2CBalancer) Select(ctx context.Context) (*Backend, error) {
    p.BaseLoadBalancer.mu.RLock()
    healthy := p.getHealthyBackends()
    p.BaseLoadBalancer.mu.RUnlock()

    if len(healthy) == 0 {
        return nil, ErrNoBackends
    }

    if len(healthy) == 1 {
        backend := healthy[0]
        backend.mu.Lock()
        backend.ActiveConns++
        backend.TotalConns++
        backend.LastUsed = time.Now()
        backend.mu.Unlock()
        return backend, nil
    }

    // Pick two random backends
    p.mu.Lock()
    idx1 := p.rand.Intn(len(healthy))
    idx2 := p.rand.Intn(len(healthy))
    p.mu.Unlock()

    // Ensure they're different
    if idx2 == idx1 {
        idx2 = (idx1 + 1) % len(healthy)
    }

    backend1 := healthy[idx1]
    backend2 := healthy[idx2]

    // Choose the one with fewer connections
    backend1.mu.RLock()
    conns1 := backend1.ActiveConns
    backend1.mu.RUnlock()

    backend2.mu.RLock()
    conns2 := backend2.ActiveConns
    backend2.mu.RUnlock()

    var selected *Backend
    if conns1 <= conns2 {
        selected = backend1
    } else {
        selected = backend2
    }

    selected.mu.Lock()
    selected.ActiveConns++
    selected.TotalConns++
    selected.LastUsed = time.Now()
    selected.mu.Unlock()

    return selected, nil
}

func (p *P2CBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    defer backend.mu.Unlock()
    backend.ActiveConns--
}

Adaptive Load Balancing

Adjust based on real-time metrics:

go
// AdaptiveBalancer adjusts selection based on performance
type AdaptiveBalancer struct {
    BaseLoadBalancer

    // Scoring weights
    latencyWeight   float64
    errorRateWeight float64
    loadWeight      float64

    // Backend scores
    scores map[string]float64
    errors map[string]*RollingCounter

    mu sync.RWMutex
}

// RollingCounter counts events in a time window
type RollingCounter struct {
    counts []int64
    idx    int
    size   int
    mu     sync.Mutex
}

func NewRollingCounter(size int) *RollingCounter {
    return &RollingCounter{
        counts: make([]int64, size),
        size:   size,
    }
}

func (rc *RollingCounter) Inc() {
    rc.mu.Lock()
    defer rc.mu.Unlock()
    rc.counts[rc.idx]++
}

func (rc *RollingCounter) Advance() {
    rc.mu.Lock()
    defer rc.mu.Unlock()
    rc.idx = (rc.idx + 1) % rc.size
    rc.counts[rc.idx] = 0
}

func (rc *RollingCounter) Sum() int64 {
    rc.mu.Lock()
    defer rc.mu.Unlock()
    var sum int64
    for _, c := range rc.counts {
        sum += c
    }
    return sum
}

// NewAdaptiveBalancer creates an adaptive balancer
func NewAdaptiveBalancer() *AdaptiveBalancer {
    ab := &AdaptiveBalancer{
        latencyWeight:   0.4,
        errorRateWeight: 0.4,
        loadWeight:      0.2,
        scores:          make(map[string]float64),
        errors:          make(map[string]*RollingCounter),
    }

    // Start score calculation goroutine
    go ab.calculateScores()

    return ab
}

func (ab *AdaptiveBalancer) AddBackend(backend *Backend) {
    ab.BaseLoadBalancer.AddBackend(backend)

    ab.mu.Lock()
    ab.scores[backend.ID] = 1.0 // Default score
    ab.errors[backend.ID] = NewRollingCounter(60) // 1 minute of seconds
    ab.mu.Unlock()
}

func (ab *AdaptiveBalancer) Select(ctx context.Context) (*Backend, error) {
    ab.BaseLoadBalancer.mu.RLock()
    healthy := ab.getHealthyBackends()
    ab.BaseLoadBalancer.mu.RUnlock()

    if len(healthy) == 0 {
        return nil, ErrNoBackends
    }

    ab.mu.RLock()
    defer ab.mu.RUnlock()

    // Select based on scores (higher is better)
    var selected *Backend
    maxScore := -1.0

    for _, backend := range healthy {
        score := ab.scores[backend.ID]
        if score > maxScore {
            maxScore = score
            selected = backend
        }
    }

    if selected == nil {
        selected = healthy[0]
    }

    selected.mu.Lock()
    selected.ActiveConns++
    selected.TotalConns++
    selected.LastUsed = time.Now()
    selected.mu.Unlock()

    return selected, nil
}

func (ab *AdaptiveBalancer) Release(backend *Backend, latency time.Duration, err error) {
    backend.mu.Lock()
    backend.ActiveConns--

    // Update latency
    if backend.AvgLatency == 0 {
        backend.AvgLatency = latency
    } else {
        backend.AvgLatency = (backend.AvgLatency*8 + latency*2) / 10
    }
    backend.mu.Unlock()

    // Track errors
    if err != nil {
        ab.mu.RLock()
        counter := ab.errors[backend.ID]
        ab.mu.RUnlock()

        if counter != nil {
            counter.Inc()
        }
    }
}

func (ab *AdaptiveBalancer) calculateScores() {
    ticker := time.NewTicker(time.Second)
    defer ticker.Stop()

    for range ticker.C {
        ab.updateScores()
    }
}

func (ab *AdaptiveBalancer) updateScores() {
    ab.BaseLoadBalancer.mu.RLock()
    backends := ab.BaseLoadBalancer.backends
    ab.BaseLoadBalancer.mu.RUnlock()

    if len(backends) == 0 {
        return
    }

    // Calculate max values for normalization
    var maxLatency time.Duration
    var maxConns int64
    var maxErrors int64

    for _, b := range backends {
        b.mu.RLock()
        if b.AvgLatency > maxLatency {
            maxLatency = b.AvgLatency
        }
        if b.ActiveConns > maxConns {
            maxConns = b.ActiveConns
        }
        b.mu.RUnlock()

        ab.mu.RLock()
        if counter := ab.errors[b.ID]; counter != nil {
            errs := counter.Sum()
            if errs > maxErrors {
                maxErrors = errs
            }
        }
        ab.mu.RUnlock()
    }

    // Calculate scores
    ab.mu.Lock()
    defer ab.mu.Unlock()

    for _, b := range backends {
        b.mu.RLock()
        latency := b.AvgLatency
        conns := b.ActiveConns
        b.mu.RUnlock()

        errors := int64(0)
        if counter := ab.errors[b.ID]; counter != nil {
            errors = counter.Sum()
            counter.Advance() // Move to next bucket
        }

        // Normalize to 0-1 (lower is better, so we invert)
        latencyScore := 1.0
        if maxLatency > 0 {
            latencyScore = 1.0 - float64(latency)/float64(maxLatency)
        }

        errorScore := 1.0
        if maxErrors > 0 {
            errorScore = 1.0 - float64(errors)/float64(maxErrors)
        }

        loadScore := 1.0
        if maxConns > 0 {
            loadScore = 1.0 - float64(conns)/float64(maxConns)
        }

        // Combined score
        score := ab.latencyWeight*latencyScore +
            ab.errorRateWeight*errorScore +
            ab.loadWeight*loadScore

        ab.scores[b.ID] = score
    }
}

Health Checking

Monitor backend health:

go
// HealthChecker monitors backend health
type HealthChecker struct {
    balancer LoadBalancer
    interval time.Duration
    timeout  time.Duration
    checker  func(backend *Backend) error

    stopCh chan struct{}
}

// NewHealthChecker creates a health checker
func NewHealthChecker(
    balancer LoadBalancer,
    interval, timeout time.Duration,
    checker func(*Backend) error,
) *HealthChecker {
    return &HealthChecker{
        balancer: balancer,
        interval: interval,
        timeout:  timeout,
        checker:  checker,
        stopCh:   make(chan struct{}),
    }
}

// Start begins health checking
func (hc *HealthChecker) Start() {
    ticker := time.NewTicker(hc.interval)
    defer ticker.Stop()

    for {
        select {
        case <-ticker.C:
            hc.checkAll()
        case <-hc.stopCh:
            return
        }
    }
}

func (hc *HealthChecker) checkAll() {
    backends := hc.balancer.GetBackends()

    var wg sync.WaitGroup
    for _, backend := range backends {
        wg.Add(1)
        go func(b *Backend) {
            defer wg.Done()

            ctx, cancel := context.WithTimeout(context.Background(), hc.timeout)
            defer cancel()

            // Create a channel for the result
            done := make(chan error, 1)
            go func() {
                done <- hc.checker(b)
            }()

            select {
            case err := <-done:
                hc.balancer.MarkHealthy(b.ID, err == nil)
            case <-ctx.Done():
                hc.balancer.MarkHealthy(b.ID, false)
            }
        }(backend)
    }

    wg.Wait()
}

// Stop stops health checking
func (hc *HealthChecker) Stop() {
    close(hc.stopCh)
}

// HTTPHealthChecker checks HTTP endpoints
func HTTPHealthChecker(client *http.Client, path string) func(*Backend) error {
    return func(backend *Backend) error {
        url := fmt.Sprintf("http://%s%s", backend.Address, path)
        resp, err := client.Get(url)
        if err != nil {
            return err
        }
        defer resp.Body.Close()

        if resp.StatusCode >= 200 && resp.StatusCode < 300 {
            return nil
        }

        return fmt.Errorf("unhealthy: status %d", resp.StatusCode)
    }
}

// TCPHealthChecker checks TCP connectivity
func TCPHealthChecker(timeout time.Duration) func(*Backend) error {
    return func(backend *Backend) error {
        conn, err := net.DialTimeout("tcp", backend.Address, timeout)
        if err != nil {
            return err
        }
        conn.Close()
        return nil
    }
}

Load Balancer with Retry

Handle failed requests:

go
// RetryingBalancer retries on failure
type RetryingBalancer struct {
    inner       LoadBalancer
    maxRetries  int
    retryDelay  time.Duration
}

// NewRetryingBalancer creates a retrying balancer
func NewRetryingBalancer(inner LoadBalancer, maxRetries int, retryDelay time.Duration) *RetryingBalancer {
    return &RetryingBalancer{
        inner:      inner,
        maxRetries: maxRetries,
        retryDelay: retryDelay,
    }
}

// ExecuteWithRetry executes a function with retries
func (rb *RetryingBalancer) ExecuteWithRetry(
    ctx context.Context,
    fn func(*Backend) error,
) error {
    var lastErr error
    tried := make(map[string]bool)

    for attempt := 0; attempt <= rb.maxRetries; attempt++ {
        backend, err := rb.selectUntriedBackend(ctx, tried)
        if err != nil {
            return fmt.Errorf("no backends available: %w", lastErr)
        }

        tried[backend.ID] = true
        start := time.Now()

        err = fn(backend)
        latency := time.Since(start)

        if err == nil {
            rb.inner.Release(backend, latency, nil)
            return nil
        }

        rb.inner.Release(backend, latency, err)
        lastErr = err

        // Wait before retry
        if attempt < rb.maxRetries {
            select {
            case <-time.After(rb.retryDelay):
            case <-ctx.Done():
                return ctx.Err()
            }
        }
    }

    return fmt.Errorf("all retries failed: %w", lastErr)
}

func (rb *RetryingBalancer) selectUntriedBackend(ctx context.Context, tried map[string]bool) (*Backend, error) {
    backends := rb.inner.GetBackends()

    // Filter out tried backends
    var available []*Backend
    for _, b := range backends {
        if !tried[b.ID] && b.Healthy {
            available = append(available, b)
        }
    }

    if len(available) == 0 {
        return nil, ErrNoBackends
    }

    // Use inner balancer logic on available backends
    return rb.inner.Select(ctx)
}

Complete Example

go
package main

import (
    "context"
    "fmt"
    "log"
    "net/http"
    "time"
)

func main() {
    // Create load balancer
    balancer := NewAdaptiveBalancer()

    // Add backends
    backends := []*Backend{
        {ID: "server1", Address: "localhost:8081", Weight: 10, Healthy: true},
        {ID: "server2", Address: "localhost:8082", Weight: 10, Healthy: true},
        {ID: "server3", Address: "localhost:8083", Weight: 5, Healthy: true},
    }

    for _, b := range backends {
        balancer.AddBackend(b)
    }

    // Create health checker
    httpClient := &http.Client{Timeout: 2 * time.Second}
    healthChecker := NewHealthChecker(
        balancer,
        10*time.Second,
        5*time.Second,
        HTTPHealthChecker(httpClient, "/health"),
    )
    go healthChecker.Start()

    // Create retrying balancer
    retryingBalancer := NewRetryingBalancer(balancer, 3, 100*time.Millisecond)

    // Create HTTP client that uses load balancer
    client := &http.Client{Timeout: 10 * time.Second}

    // Simulate requests
    for i := 0; i < 100; i++ {
        go func(reqNum int) {
            ctx := context.Background()

            err := retryingBalancer.ExecuteWithRetry(ctx, func(backend *Backend) error {
                url := fmt.Sprintf("http://%s/api/data", backend.Address)
                resp, err := client.Get(url)
                if err != nil {
                    return err
                }
                defer resp.Body.Close()

                if resp.StatusCode >= 500 {
                    return fmt.Errorf("server error: %d", resp.StatusCode)
                }

                return nil
            })

            if err != nil {
                log.Printf("Request %d failed: %v", reqNum, err)
            } else {
                log.Printf("Request %d succeeded", reqNum)
            }
        }(i)
    }

    // Wait and print stats
    time.Sleep(10 * time.Second)

    fmt.Println("\nBackend Stats:")
    for _, b := range balancer.GetBackends() {
        b.mu.RLock()
        fmt.Printf("%s: total=%d, active=%d, avgLatency=%v, healthy=%v\n",
            b.ID, b.TotalConns, b.ActiveConns, b.AvgLatency, b.Healthy)
        b.mu.RUnlock()
    }
}

Best Practices

Choose the right algorithm
- Round-robin for homogeneous backends
- Weighted for heterogeneous capacity
- Least connections for variable request duration
- Consistent hashing for session affinity
Implement health checks
- Active checks detect failures quickly
- Passive checks based on request results
- Combine both for reliability
Handle failures gracefully
- Retry on different backends
- Circuit breaker integration
- Graceful degradation
Monitor continuously
- Track backend health
- Measure latency distribution
- Alert on imbalances

What's Next?

In Part 28, we'll explore Service Mesh Architecture - how modern infrastructure handles service-to-service communication at scale.

"Good load balancing is invisible - users never know how many servers are working together to serve their requests."