package monitoring import ( "sort" "sync" "time" "github.com/labstack/echo/v4" ) // HTTPStatsCollector collects HTTP request metrics type HTTPStatsCollector struct { mu sync.RWMutex requests map[string]int64 // endpoint -> count totalLatency map[string]time.Duration // endpoint -> total latency errors map[string]int64 // endpoint -> error count byStatus map[int]int64 // status code -> count latencies []latencySample // recent latency samples for P95 startTime time.Time lastReset time.Time } type latencySample struct { endpoint string latency time.Duration timestamp time.Time } const ( maxLatencySamples = 1000 maxEndpoints = 200 // Cap unique endpoints tracked statsResetPeriod = 1 * time.Hour // Reset stats periodically to prevent unbounded growth ) // NewHTTPStatsCollector creates a new HTTP stats collector func NewHTTPStatsCollector() *HTTPStatsCollector { now := time.Now() return &HTTPStatsCollector{ requests: make(map[string]int64), totalLatency: make(map[string]time.Duration), errors: make(map[string]int64), byStatus: make(map[int]int64), latencies: make([]latencySample, 0, maxLatencySamples), startTime: now, lastReset: now, } } // Record records a single HTTP request func (c *HTTPStatsCollector) Record(endpoint string, latency time.Duration, status int) { c.mu.Lock() defer c.mu.Unlock() // Periodically reset to prevent unbounded memory growth if time.Since(c.lastReset) > statsResetPeriod { c.resetLocked() } // Check if we've hit the endpoint limit and this is a new endpoint if _, exists := c.requests[endpoint]; !exists && len(c.requests) >= maxEndpoints { // Use a catch-all bucket for overflow endpoints endpoint = "OTHER" } c.requests[endpoint]++ c.totalLatency[endpoint] += latency c.byStatus[status]++ if status >= 400 { c.errors[endpoint]++ } // Store latency sample c.latencies = append(c.latencies, latencySample{ endpoint: endpoint, latency: latency, timestamp: time.Now(), }) // Keep only recent samples if len(c.latencies) > maxLatencySamples { c.latencies = c.latencies[len(c.latencies)-maxLatencySamples:] } } // resetLocked resets stats while holding the lock func (c *HTTPStatsCollector) resetLocked() { c.requests = make(map[string]int64) c.totalLatency = make(map[string]time.Duration) c.errors = make(map[string]int64) c.byStatus = make(map[int]int64) c.latencies = make([]latencySample, 0, maxLatencySamples) c.lastReset = time.Now() // Keep startTime for uptime calculation } // GetStats returns the current HTTP statistics func (c *HTTPStatsCollector) GetStats() HTTPStats { c.mu.RLock() defer c.mu.RUnlock() stats := HTTPStats{ ByEndpoint: make(map[string]EndpointStats), ByStatusCode: make(map[int]int64), } var totalRequests int64 var totalErrors int64 var totalLatency time.Duration for endpoint, count := range c.requests { avgLatency := c.totalLatency[endpoint] / time.Duration(count) errCount := c.errors[endpoint] errRate := float64(0) if count > 0 { errRate = float64(errCount) / float64(count) } stats.ByEndpoint[endpoint] = EndpointStats{ Count: count, AvgLatencyMs: float64(avgLatency.Milliseconds()), ErrorRate: errRate, P95LatencyMs: c.calculateP95(endpoint), } totalRequests += count totalErrors += errCount totalLatency += c.totalLatency[endpoint] } // Copy status code counts for status, count := range c.byStatus { stats.ByStatusCode[status] = count } stats.RequestsTotal = totalRequests if totalRequests > 0 { stats.AvgLatencyMs = float64(totalLatency.Milliseconds()) / float64(totalRequests) stats.ErrorRate = float64(totalErrors) / float64(totalRequests) } uptime := time.Since(c.startTime).Minutes() if uptime > 0 { stats.RequestsPerMinute = float64(totalRequests) / uptime } return stats } // calculateP95 calculates the 95th percentile latency for an endpoint // Must be called with read lock held func (c *HTTPStatsCollector) calculateP95(endpoint string) float64 { var endpointLatencies []time.Duration for _, sample := range c.latencies { if sample.endpoint == endpoint { endpointLatencies = append(endpointLatencies, sample.latency) } } if len(endpointLatencies) == 0 { return 0 } // Sort latencies sort.Slice(endpointLatencies, func(i, j int) bool { return endpointLatencies[i] < endpointLatencies[j] }) // Calculate P95 index p95Index := int(float64(len(endpointLatencies)) * 0.95) if p95Index >= len(endpointLatencies) { p95Index = len(endpointLatencies) - 1 } return float64(endpointLatencies[p95Index].Milliseconds()) } // Reset clears all collected stats func (c *HTTPStatsCollector) Reset() { c.mu.Lock() defer c.mu.Unlock() c.requests = make(map[string]int64) c.totalLatency = make(map[string]time.Duration) c.errors = make(map[string]int64) c.byStatus = make(map[int]int64) c.latencies = make([]latencySample, 0, maxLatencySamples) c.startTime = time.Now() } // MetricsMiddleware returns an Echo middleware that collects request metrics func MetricsMiddleware(collector *HTTPStatsCollector) echo.MiddlewareFunc { return func(next echo.HandlerFunc) echo.HandlerFunc { return func(c echo.Context) error { start := time.Now() // Process request err := next(c) // Calculate latency latency := time.Since(start) // Get endpoint pattern (use route path, fallback to actual path) endpoint := c.Path() if endpoint == "" { endpoint = c.Request().URL.Path } // Combine method with path for unique endpoint identification endpoint = c.Request().Method + " " + endpoint // Record metrics collector.Record(endpoint, latency, c.Response().Status) return err } } }