Part 28: Service Mesh Architecture - Modern Service Communication

"A service mesh is the nervous system of your microservices - it handles all the communication complexity so your services can focus on business logic."
Welcome to Part 28 of our distributed systems course! After mastering load balancing, we now explore service mesh architecture - the modern approach to managing service-to-service communication.

What is a Service Mesh?

A service mesh is a dedicated infrastructure layer that handles communication between services. It provides:
  • Traffic management: Load balancing, routing, retries
  • Security: mTLS, authentication, authorization
  • Observability: Metrics, tracing, logging
  • Resilience: Circuit breakers, timeouts, rate limiting

Core Concepts

The Sidecar Proxy Pattern

go
package servicemesh import ( "context" "crypto/tls" "crypto/x509" "fmt" "net/http" "sync" "time" ) // Sidecar represents a sidecar proxy for a service type Sidecar struct { serviceName string servicePort int proxyPort int // Components loadBalancer LoadBalancer circuitBreaker *CircuitBreaker rateLimiter *RateLimiter tracer *Tracer metricsClient *MetricsClient // TLS tlsConfig *tls.Config certManager *CertificateManager // Service discovery discovery ServiceDiscovery // Configuration config *SidecarConfig server *http.Server mu sync.RWMutex } // SidecarConfig holds sidecar configuration type SidecarConfig struct { ServiceName string ServicePort int ProxyPort int // Traffic management RetryAttempts int RetryDelay time.Duration Timeout time.Duration // Circuit breaker CBThreshold int CBTimeout time.Duration // Rate limiting RateLimit int RateLimitWindow time.Duration // mTLS EnableMTLS bool CertPath string KeyPath string CAPath string } // NewSidecar creates a new sidecar proxy func NewSidecar(cfg *SidecarConfig) (*Sidecar, error) { s := &Sidecar{ serviceName: cfg.ServiceName, servicePort: cfg.ServicePort, proxyPort: cfg.ProxyPort, config: cfg, } // Initialize components s.loadBalancer = NewRoundRobinBalancer() s.circuitBreaker = NewCircuitBreaker(cfg.CBThreshold, cfg.CBTimeout) s.rateLimiter = NewRateLimiter(cfg.RateLimit, cfg.RateLimitWindow) s.tracer = NewTracer(cfg.ServiceName) s.metricsClient = NewMetricsClient() // Initialize mTLS if enabled if cfg.EnableMTLS { tlsConfig, err := s.setupMTLS(cfg) if err != nil { return nil, fmt.Errorf("mTLS setup failed: %w", err) } s.tlsConfig = tlsConfig } return s, nil } func (s *Sidecar) setupMTLS(cfg *SidecarConfig) (*tls.Config, error) { // Load certificate cert, err := tls.LoadX509KeyPair(cfg.CertPath, cfg.KeyPath) if err != nil { return nil, err } // Load CA certificate caCert, err := os.ReadFile(cfg.CAPath) if err != nil { return nil, err } caCertPool := x509.NewCertPool() caCertPool.AppendCertsFromPEM(caCert) return &tls.Config{ Certificates: []tls.Certificate{cert}, ClientCAs: caCertPool, ClientAuth: tls.RequireAndVerifyClientCert, MinVersion: tls.VersionTLS12, }, nil } // Start starts the sidecar proxy func (s *Sidecar) Start() error { mux := http.NewServeMux() // Inbound traffic handler mux.HandleFunc("/", s.handleInbound) // Outbound traffic handler mux.HandleFunc("/proxy/", s.handleOutbound) // Health and metrics endpoints mux.HandleFunc("/health", s.handleHealth) mux.HandleFunc("/metrics", s.handleMetrics) s.server = &http.Server{ Addr: fmt.Sprintf(":%d", s.proxyPort), Handler: mux, TLSConfig: s.tlsConfig, } if s.config.EnableMTLS { return s.server.ListenAndServeTLS("", "") } return s.server.ListenAndServe() } // handleInbound handles incoming requests to the service func (s *Sidecar) handleInbound(w http.ResponseWriter, r *http.Request) { ctx := r.Context() // Start tracing span := s.tracer.StartSpan(ctx, "inbound") defer span.End() // Record metrics start := time.Now() defer func() { s.metricsClient.RecordLatency("inbound", time.Since(start)) }() // Rate limiting if !s.rateLimiter.Allow() { s.metricsClient.IncrementCounter("rate_limited") http.Error(w, "Rate limit exceeded", http.StatusTooManyRequests) return } // Forward to local service targetURL := fmt.Sprintf("http://localhost:%d%s", s.servicePort, r.URL.Path) proxyReq, err := http.NewRequestWithContext(ctx, r.Method, targetURL, r.Body) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } // Copy headers for key, values := range r.Header { for _, value := range values { proxyReq.Header.Add(key, value) } } // Add tracing headers s.tracer.InjectHeaders(span, proxyReq.Header) resp, err := http.DefaultClient.Do(proxyReq) if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) return } defer resp.Body.Close() // Copy response for key, values := range resp.Header { for _, value := range values { w.Header().Add(key, value) } } w.WriteHeader(resp.StatusCode) io.Copy(w, resp.Body) } // handleOutbound handles outgoing requests from the service func (s *Sidecar) handleOutbound(w http.ResponseWriter, r *http.Request) { ctx := r.Context() // Extract target service from path // /proxy/{service}/{path} parts := strings.SplitN(strings.TrimPrefix(r.URL.Path, "/proxy/"), "/", 2) if len(parts) < 1 { http.Error(w, "Invalid proxy path", http.StatusBadRequest) return } targetService := parts[0] targetPath := "/" if len(parts) > 1 { targetPath = "/" + parts[1] } // Start tracing span := s.tracer.StartSpan(ctx, fmt.Sprintf("outbound:%s", targetService)) defer span.End() // Record metrics start := time.Now() defer func() { s.metricsClient.RecordLatency(fmt.Sprintf("outbound:%s", targetService), time.Since(start)) }() // Circuit breaker check if !s.circuitBreaker.Allow(targetService) { s.metricsClient.IncrementCounter(fmt.Sprintf("circuit_open:%s", targetService)) http.Error(w, "Service unavailable", http.StatusServiceUnavailable) return } // Service discovery endpoints, err := s.discovery.GetEndpoints(ctx, targetService) if err != nil { http.Error(w, "Service not found", http.StatusServiceUnavailable) return } // Load balance and retry var lastErr error for attempt := 0; attempt <= s.config.RetryAttempts; attempt++ { endpoint := s.loadBalancer.Select(endpoints) targetURL := fmt.Sprintf("http://%s%s", endpoint, targetPath) proxyReq, err := http.NewRequestWithContext(ctx, r.Method, targetURL, r.Body) if err != nil { lastErr = err continue } // Copy and add headers for key, values := range r.Header { for _, value := range values { proxyReq.Header.Add(key, value) } } s.tracer.InjectHeaders(span, proxyReq.Header) // Make request with timeout client := &http.Client{Timeout: s.config.Timeout} resp, err := client.Do(proxyReq) if err != nil { lastErr = err s.circuitBreaker.RecordFailure(targetService) time.Sleep(s.config.RetryDelay) continue } // Success s.circuitBreaker.RecordSuccess(targetService) // Copy response defer resp.Body.Close() for key, values := range resp.Header { for _, value := range values { w.Header().Add(key, value) } } w.WriteHeader(resp.StatusCode) io.Copy(w, resp.Body) return } http.Error(w, fmt.Sprintf("All retries failed: %v", lastErr), http.StatusBadGateway) }

Service Discovery Integration

go
// ServiceDiscovery provides service endpoint resolution type ServiceDiscovery interface { GetEndpoints(ctx context.Context, service string) ([]string, error) Watch(ctx context.Context, service string, callback func([]string)) } // KubernetesDiscovery discovers services from Kubernetes type KubernetesDiscovery struct { clientset *kubernetes.Clientset namespace string cache sync.Map } // NewKubernetesDiscovery creates a Kubernetes service discovery func NewKubernetesDiscovery(namespace string) (*KubernetesDiscovery, error) { config, err := rest.InClusterConfig() if err != nil { return nil, err } clientset, err := kubernetes.NewForConfig(config) if err != nil { return nil, err } return &KubernetesDiscovery{ clientset: clientset, namespace: namespace, }, nil } func (kd *KubernetesDiscovery) GetEndpoints(ctx context.Context, service string) ([]string, error) { // Check cache first if cached, ok := kd.cache.Load(service); ok { return cached.([]string), nil } // Query Kubernetes endpoints, err := kd.clientset.CoreV1().Endpoints(kd.namespace).Get(ctx, service, metav1.GetOptions{}) if err != nil { return nil, err } var addrs []string for _, subset := range endpoints.Subsets { for _, addr := range subset.Addresses { for _, port := range subset.Ports { addrs = append(addrs, fmt.Sprintf("%s:%d", addr.IP, port.Port)) } } } // Update cache kd.cache.Store(service, addrs) return addrs, nil } func (kd *KubernetesDiscovery) Watch(ctx context.Context, service string, callback func([]string)) { watcher, err := kd.clientset.CoreV1().Endpoints(kd.namespace).Watch(ctx, metav1.ListOptions{ FieldSelector: fmt.Sprintf("metadata.name=%s", service), }) if err != nil { return } defer watcher.Stop() for event := range watcher.ResultChan() { if endpoints, ok := event.Object.(*v1.Endpoints); ok { var addrs []string for _, subset := range endpoints.Subsets { for _, addr := range subset.Addresses { for _, port := range subset.Ports { addrs = append(addrs, fmt.Sprintf("%s:%d", addr.IP, port.Port)) } } } kd.cache.Store(service, addrs) callback(addrs) } } }

Traffic Management

Canary Deployments

go
// TrafficRouter handles traffic splitting type TrafficRouter struct { rules []RoutingRule mu sync.RWMutex } // RoutingRule defines traffic routing type RoutingRule struct { Service string Subset string // e.g., "v1", "v2", "canary" Weight int // Percentage 0-100 Headers map[string]string // Header-based routing Endpoints []string } // NewTrafficRouter creates a traffic router func NewTrafficRouter() *TrafficRouter { return &TrafficRouter{ rules: make([]RoutingRule, 0), } } // AddRule adds a routing rule func (tr *TrafficRouter) AddRule(rule RoutingRule) { tr.mu.Lock() defer tr.mu.Unlock() tr.rules = append(tr.rules, rule) } // Route selects endpoints based on rules func (tr *TrafficRouter) Route(service string, headers http.Header) []string { tr.mu.RLock() defer tr.mu.RUnlock() // Check header-based rules first for _, rule := range tr.rules { if rule.Service != service { continue } if len(rule.Headers) > 0 { match := true for key, value := range rule.Headers { if headers.Get(key) != value { match = false break } } if match { return rule.Endpoints } } } // Weight-based routing var weightedRules []RoutingRule totalWeight := 0 for _, rule := range tr.rules { if rule.Service == service && len(rule.Headers) == 0 { weightedRules = append(weightedRules, rule) totalWeight += rule.Weight } } if len(weightedRules) == 0 { return nil } // Random selection based on weight rand := rand.Intn(totalWeight) cumulative := 0 for _, rule := range weightedRules { cumulative += rule.Weight if rand < cumulative { return rule.Endpoints } } return weightedRules[0].Endpoints } // Example: Configure canary deployment func configureCanary(router *TrafficRouter) { // 90% to stable version router.AddRule(RoutingRule{ Service: "payment-service", Subset: "stable", Weight: 90, Endpoints: []string{"payment-v1:8080"}, }) // 10% to canary router.AddRule(RoutingRule{ Service: "payment-service", Subset: "canary", Weight: 10, Endpoints: []string{"payment-v2:8080"}, }) // Beta testers always get canary router.AddRule(RoutingRule{ Service: "payment-service", Subset: "canary", Headers: map[string]string{"X-Beta-User": "true"}, Endpoints: []string{"payment-v2:8080"}, }) }

Request Mirroring

go
// RequestMirror mirrors requests to secondary endpoints type RequestMirror struct { primaryEndpoint string mirrorEndpoints []string mirrorPercentage int // 0-100 } // NewRequestMirror creates a request mirror func NewRequestMirror(primary string, mirrors []string, percentage int) *RequestMirror { return &RequestMirror{ primaryEndpoint: primary, mirrorEndpoints: mirrors, mirrorPercentage: percentage, } } // Forward sends request to primary and optionally mirrors func (rm *RequestMirror) Forward(ctx context.Context, req *http.Request) (*http.Response, error) { // Send to primary primaryReq := req.Clone(ctx) primaryReq.URL.Host = rm.primaryEndpoint // Maybe mirror (async, don't wait for response) if rand.Intn(100) < rm.mirrorPercentage { for _, mirror := range rm.mirrorEndpoints { go func(endpoint string) { mirrorCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() mirrorReq := req.Clone(mirrorCtx) mirrorReq.URL.Host = endpoint mirrorReq.Header.Set("X-Mirror-Request", "true") // Fire and forget resp, err := http.DefaultClient.Do(mirrorReq) if err == nil { resp.Body.Close() } }(mirror) } } return http.DefaultClient.Do(primaryReq) }

Security: mTLS and Authorization

go
// CertificateManager manages service certificates type CertificateManager struct { ca *x509.Certificate caKey *ecdsa.PrivateKey certificates sync.Map } // NewCertificateManager creates a certificate manager func NewCertificateManager(caPath, caKeyPath string) (*CertificateManager, error) { // Load CA certificate and key caCertPEM, err := os.ReadFile(caPath) if err != nil { return nil, err } caKeyPEM, err := os.ReadFile(caKeyPath) if err != nil { return nil, err } block, _ := pem.Decode(caCertPEM) ca, err := x509.ParseCertificate(block.Bytes) if err != nil { return nil, err } keyBlock, _ := pem.Decode(caKeyPEM) caKey, err := x509.ParseECPrivateKey(keyBlock.Bytes) if err != nil { return nil, err } return &CertificateManager{ ca: ca, caKey: caKey, }, nil } // GenerateCertificate generates a certificate for a service func (cm *CertificateManager) GenerateCertificate(serviceName string) (*tls.Certificate, error) { // Generate key pair privateKey, err := ecdsa.GenerateKey(elliptic.P256(), crypto_rand.Reader) if err != nil { return nil, err } // Create certificate template template := &x509.Certificate{ SerialNumber: big.NewInt(time.Now().UnixNano()), Subject: pkix.Name{ CommonName: serviceName, Organization: []string{"ServiceMesh"}, }, NotBefore: time.Now(), NotAfter: time.Now().Add(24 * time.Hour), KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}, BasicConstraintsValid: true, DNSNames: []string{serviceName, fmt.Sprintf("%s.default.svc.cluster.local", serviceName)}, } // Sign certificate certDER, err := x509.CreateCertificate(crypto_rand.Reader, template, cm.ca, &privateKey.PublicKey, cm.caKey) if err != nil { return nil, err } cert := &tls.Certificate{ Certificate: [][]byte{certDER}, PrivateKey: privateKey, } cm.certificates.Store(serviceName, cert) return cert, nil } // Authorization policies type AuthorizationPolicy struct { Name string Namespace string Rules []AuthRule } type AuthRule struct { From []Source To []Operation When []Condition } type Source struct { Principals []string // Service identities Namespaces []string } type Operation struct { Methods []string Paths []string Ports []int } type Condition struct { Key string Values []string } // AuthorizationEnforcer enforces authorization policies type AuthorizationEnforcer struct { policies []AuthorizationPolicy mu sync.RWMutex } // NewAuthorizationEnforcer creates an authorization enforcer func NewAuthorizationEnforcer() *AuthorizationEnforcer { return &AuthorizationEnforcer{ policies: make([]AuthorizationPolicy, 0), } } // AddPolicy adds an authorization policy func (ae *AuthorizationEnforcer) AddPolicy(policy AuthorizationPolicy) { ae.mu.Lock() defer ae.mu.Unlock() ae.policies = append(ae.policies, policy) } // Authorize checks if a request is authorized func (ae *AuthorizationEnforcer) Authorize(req *AuthorizationRequest) (bool, error) { ae.mu.RLock() defer ae.mu.RUnlock() for _, policy := range ae.policies { if policy.Namespace != req.Namespace && policy.Namespace != "*" { continue } for _, rule := range policy.Rules { if ae.matchesSource(rule.From, req) && ae.matchesOperation(rule.To, req) && ae.matchesConditions(rule.When, req) { return true, nil } } } return false, nil } type AuthorizationRequest struct { SourceService string SourceNamespace string TargetService string Namespace string Method string Path string Port int Headers http.Header } func (ae *AuthorizationEnforcer) matchesSource(sources []Source, req *AuthorizationRequest) bool { if len(sources) == 0 { return true } for _, source := range sources { for _, principal := range source.Principals { if principal == "*" || principal == req.SourceService { return true } } for _, ns := range source.Namespaces { if ns == "*" || ns == req.SourceNamespace { return true } } } return false } func (ae *AuthorizationEnforcer) matchesOperation(ops []Operation, req *AuthorizationRequest) bool { if len(ops) == 0 { return true } for _, op := range ops { methodMatch := len(op.Methods) == 0 for _, m := range op.Methods { if m == "*" || m == req.Method { methodMatch = true break } } pathMatch := len(op.Paths) == 0 for _, p := range op.Paths { if p == "*" || strings.HasPrefix(req.Path, p) { pathMatch = true break } } if methodMatch && pathMatch { return true } } return false } func (ae *AuthorizationEnforcer) matchesConditions(conditions []Condition, req *AuthorizationRequest) bool { if len(conditions) == 0 { return true } for _, cond := range conditions { headerValue := req.Headers.Get(cond.Key) match := false for _, v := range cond.Values { if v == headerValue { match = true break } } if !match { return false } } return true }

Observability

Distributed Tracing

go
// Tracer provides distributed tracing type Tracer struct { serviceName string spans sync.Map } // Span represents a trace span type Span struct { TraceID string SpanID string ParentID string Name string Service string StartTime time.Time EndTime time.Time Tags map[string]string Logs []SpanLog mu sync.Mutex } type SpanLog struct { Timestamp time.Time Message string } // NewTracer creates a new tracer func NewTracer(serviceName string) *Tracer { return &Tracer{ serviceName: serviceName, } } // StartSpan starts a new span func (t *Tracer) StartSpan(ctx context.Context, name string) *Span { var traceID, parentID string // Check for existing trace context if existing := ctx.Value("trace_id"); existing != nil { traceID = existing.(string) if parent := ctx.Value("span_id"); parent != nil { parentID = parent.(string) } } else { traceID = generateID() } span := &Span{ TraceID: traceID, SpanID: generateID(), ParentID: parentID, Name: name, Service: t.serviceName, StartTime: time.Now(), Tags: make(map[string]string), } t.spans.Store(span.SpanID, span) return span } // ExtractContext extracts trace context from headers func (t *Tracer) ExtractContext(headers http.Header) context.Context { ctx := context.Background() if traceID := headers.Get("X-Trace-ID"); traceID != "" { ctx = context.WithValue(ctx, "trace_id", traceID) } if spanID := headers.Get("X-Span-ID"); spanID != "" { ctx = context.WithValue(ctx, "span_id", spanID) } return ctx } // InjectHeaders injects trace context into headers func (t *Tracer) InjectHeaders(span *Span, headers http.Header) { headers.Set("X-Trace-ID", span.TraceID) headers.Set("X-Span-ID", span.SpanID) if span.ParentID != "" { headers.Set("X-Parent-Span-ID", span.ParentID) } } // End ends a span func (s *Span) End() { s.mu.Lock() defer s.mu.Unlock() s.EndTime = time.Now() } // SetTag sets a tag on the span func (s *Span) SetTag(key, value string) { s.mu.Lock() defer s.mu.Unlock() s.Tags[key] = value } // Log adds a log entry to the span func (s *Span) Log(message string) { s.mu.Lock() defer s.mu.Unlock() s.Logs = append(s.Logs, SpanLog{ Timestamp: time.Now(), Message: message, }) } func generateID() string { b := make([]byte, 16) crypto_rand.Read(b) return hex.EncodeToString(b) }

Metrics Collection

go
// MetricsClient collects service mesh metrics type MetricsClient struct { counters sync.Map histograms sync.Map gauges sync.Map } // NewMetricsClient creates a metrics client func NewMetricsClient() *MetricsClient { return &MetricsClient{} } // IncrementCounter increments a counter func (m *MetricsClient) IncrementCounter(name string) { actual, _ := m.counters.LoadOrStore(name, new(int64)) atomic.AddInt64(actual.(*int64), 1) } // RecordLatency records a latency observation func (m *MetricsClient) RecordLatency(name string, duration time.Duration) { actual, _ := m.histograms.LoadOrStore(name, &LatencyHistogram{}) actual.(*LatencyHistogram).Observe(duration) } // SetGauge sets a gauge value func (m *MetricsClient) SetGauge(name string, value float64) { m.gauges.Store(name, value) } // LatencyHistogram tracks latency distribution type LatencyHistogram struct { sum int64 count int64 buckets [10]int64 // p50, p75, p90, p95, p99, etc. mu sync.Mutex } func (h *LatencyHistogram) Observe(d time.Duration) { ms := d.Milliseconds() atomic.AddInt64(&h.sum, ms) atomic.AddInt64(&h.count, 1) // Update buckets h.mu.Lock() defer h.mu.Unlock() thresholds := []int64{1, 5, 10, 25, 50, 100, 250, 500, 1000, 5000} for i, threshold := range thresholds { if ms <= threshold { h.buckets[i]++ break } } } // GetMetrics returns all metrics func (m *MetricsClient) GetMetrics() map[string]interface{} { metrics := make(map[string]interface{}) m.counters.Range(func(key, value interface{}) bool { metrics[fmt.Sprintf("counter_%s", key)] = atomic.LoadInt64(value.(*int64)) return true }) m.histograms.Range(func(key, value interface{}) bool { h := value.(*LatencyHistogram) count := atomic.LoadInt64(&h.count) if count > 0 { sum := atomic.LoadInt64(&h.sum) metrics[fmt.Sprintf("latency_%s_avg_ms", key)] = float64(sum) / float64(count) } return true }) m.gauges.Range(func(key, value interface{}) bool { metrics[fmt.Sprintf("gauge_%s", key)] = value return true }) return metrics }

Control Plane

go
// ControlPlane manages service mesh configuration type ControlPlane struct { // Configuration routingRules sync.Map rateLimitRules sync.Map authPolicies sync.Map // Service registry services sync.Map // Connected proxies proxies sync.Map // Metrics aggregation metricsStore *MetricsStore } // NewControlPlane creates a control plane func NewControlPlane() *ControlPlane { return &ControlPlane{ metricsStore: NewMetricsStore(), } } // RegisterProxy registers a sidecar proxy func (cp *ControlPlane) RegisterProxy(proxyID string, info ProxyInfo) { cp.proxies.Store(proxyID, info) } // PushConfig pushes configuration to a proxy func (cp *ControlPlane) PushConfig(proxyID string) (*ProxyConfig, error) { config := &ProxyConfig{ RoutingRules: cp.getRoutingRules(), RateLimitRules: cp.getRateLimitRules(), AuthPolicies: cp.getAuthPolicies(), Services: cp.getServices(), } return config, nil } type ProxyInfo struct { ID string Service string Address string Version string LastSeen time.Time } type ProxyConfig struct { RoutingRules []RoutingRule RateLimitRules []RateLimitRule AuthPolicies []AuthorizationPolicy Services []ServiceInfo } type ServiceInfo struct { Name string Endpoints []string Port int Protocol string } func (cp *ControlPlane) getRoutingRules() []RoutingRule { var rules []RoutingRule cp.routingRules.Range(func(key, value interface{}) bool { rules = append(rules, value.(RoutingRule)) return true }) return rules } func (cp *ControlPlane) getRateLimitRules() []RateLimitRule { var rules []RateLimitRule cp.rateLimitRules.Range(func(key, value interface{}) bool { rules = append(rules, value.(RateLimitRule)) return true }) return rules } func (cp *ControlPlane) getAuthPolicies() []AuthorizationPolicy { var policies []AuthorizationPolicy cp.authPolicies.Range(func(key, value interface{}) bool { policies = append(policies, value.(AuthorizationPolicy)) return true }) return policies } func (cp *ControlPlane) getServices() []ServiceInfo { var services []ServiceInfo cp.services.Range(func(key, value interface{}) bool { services = append(services, value.(ServiceInfo)) return true }) return services } // MetricsStore aggregates metrics from all proxies type MetricsStore struct { metrics sync.Map } func NewMetricsStore() *MetricsStore { return &MetricsStore{} } func (ms *MetricsStore) Store(proxyID string, metrics map[string]interface{}) { ms.metrics.Store(proxyID, metrics) } func (ms *MetricsStore) GetAggregated() map[string]interface{} { aggregated := make(map[string]interface{}) ms.metrics.Range(func(key, value interface{}) bool { proxyMetrics := value.(map[string]interface{}) for name, val := range proxyMetrics { if existing, ok := aggregated[name]; ok { // Sum numeric values switch v := val.(type) { case int64: aggregated[name] = existing.(int64) + v case float64: aggregated[name] = existing.(float64) + v } } else { aggregated[name] = val } } return true }) return aggregated }

Best Practices

  1. Start simple - Begin with basic traffic management, add features gradually
  2. Monitor everything - Service meshes provide rich observability; use it
  3. Test resilience - Inject failures to verify circuit breakers and retries work
  4. Secure by default - Enable mTLS for all service-to-service communication
  5. Manage configuration centrally - Use the control plane for consistent policy enforcement

What's Next?

In Part 29, we'll explore Distributed Tracing Deep Dive - implementing comprehensive tracing across your distributed system.

"A service mesh doesn't add complexity - it manages the complexity that already exists in your distributed system."
All Blogs
Tags:service-meshmicroservicesobservabilitysecurity