Add real-time log monitoring and system stats dashboard

Implements a comprehensive monitoring system for the admin interface: Backend: - New monitoring package with Redis ring buffer for log storage - Zerolog MultiWriter to capture logs to Redis - System stats collection (CPU, memory, disk, goroutines, GC) - HTTP metrics middleware (request counts, latency, error rates) - Asynq queue stats for worker process - WebSocket endpoint for real-time log streaming - Admin auth middleware now accepts token in query params (for WebSocket) Frontend: - New monitoring page with tabs (Overview, Logs, API Stats, Worker Stats) - Real-time log viewer with level filtering and search - System stats cards showing CPU, memory, goroutines, uptime - HTTP endpoint statistics table - Asynq queue depth visualization - Enable/disable monitoring toggle in settings Memory safeguards: - Max 200 unique endpoints tracked - Hourly stats reset to prevent unbounded growth - Max 1000 log entries in ring buffer - Max 1000 latency samples for P95 calculation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-09 10:26:40 -06:00
parent 12eac24632
commit eb127fda20
31 changed files with 2880 additions and 213 deletions
@@ -0,0 +1,128 @@
+package monitoring
+
+import "time"
+
+// LogEntry represents a single log entry captured from zerolog
+type LogEntry struct {
+	ID        string         `json:"id"`
+	Timestamp time.Time      `json:"timestamp"`
+	Level     string         `json:"level"`   // debug, info, warn, error, fatal
+	Message   string         `json:"message"`
+	Caller    string         `json:"caller"`  // file:line
+	Process   string         `json:"process"` // "api" or "worker"
+	Fields    map[string]any `json:"fields"`  // Additional structured fields
+}
+
+// SystemStats contains all system and runtime statistics
+type SystemStats struct {
+	Timestamp time.Time    `json:"timestamp"`
+	Process   string       `json:"process"`
+	CPU       CPUStats     `json:"cpu"`
+	Memory    MemoryStats  `json:"memory"`
+	Disk      DiskStats    `json:"disk"`
+	Runtime   RuntimeStats `json:"runtime"`
+	HTTP      *HTTPStats   `json:"http,omitempty"`  // API only
+	Asynq     *AsynqStats  `json:"asynq,omitempty"` // Worker only
+}
+
+// CPUStats contains CPU usage information
+type CPUStats struct {
+	UsagePercent float64 `json:"usage_percent"`
+	NumCPU       int     `json:"num_cpu"`
+	LoadAvg1     float64 `json:"load_avg_1"`
+	LoadAvg5     float64 `json:"load_avg_5"`
+	LoadAvg15    float64 `json:"load_avg_15"`
+}
+
+// MemoryStats contains both system and Go runtime memory info
+type MemoryStats struct {
+	// System memory
+	UsedBytes    uint64  `json:"used_bytes"`
+	TotalBytes   uint64  `json:"total_bytes"`
+	UsagePercent float64 `json:"usage_percent"`
+	// Go heap
+	HeapAlloc uint64 `json:"heap_alloc"`
+	HeapSys   uint64 `json:"heap_sys"`
+	HeapInuse uint64 `json:"heap_inuse"`
+}
+
+// DiskStats contains disk usage information
+type DiskStats struct {
+	UsedBytes    uint64  `json:"used_bytes"`
+	TotalBytes   uint64  `json:"total_bytes"`
+	FreeBytes    uint64  `json:"free_bytes"`
+	UsagePercent float64 `json:"usage_percent"`
+}
+
+// RuntimeStats contains Go runtime information
+type RuntimeStats struct {
+	Goroutines  int    `json:"goroutines"`
+	NumGC       uint32 `json:"num_gc"`
+	LastGCPause uint64 `json:"last_gc_pause_ns"`
+	Uptime      int64  `json:"uptime_seconds"`
+}
+
+// HTTPStats contains HTTP request metrics (API server only)
+type HTTPStats struct {
+	RequestsTotal     int64                    `json:"requests_total"`
+	RequestsPerMinute float64                  `json:"requests_per_minute"`
+	AvgLatencyMs      float64                  `json:"avg_latency_ms"`
+	ErrorRate         float64                  `json:"error_rate"`
+	ByEndpoint        map[string]EndpointStats `json:"by_endpoint"`
+	ByStatusCode      map[int]int64            `json:"by_status_code"`
+}
+
+// EndpointStats contains per-endpoint HTTP metrics
+type EndpointStats struct {
+	Count        int64   `json:"count"`
+	AvgLatencyMs float64 `json:"avg_latency_ms"`
+	P95LatencyMs float64 `json:"p95_latency_ms"`
+	ErrorRate    float64 `json:"error_rate"`
+}
+
+// AsynqStats contains Asynq job queue metrics (Worker only)
+type AsynqStats struct {
+	Queues map[string]QueueStats `json:"queues"`
+}
+
+// QueueStats contains stats for a single Asynq queue
+type QueueStats struct {
+	Pending   int `json:"pending"`
+	Active    int `json:"active"`
+	Scheduled int `json:"scheduled"`
+	Retry     int `json:"retry"`
+	Archived  int `json:"archived"`
+	Completed int `json:"completed"`
+	Failed    int `json:"failed"`
+}
+
+// LogFilters for querying logs
+type LogFilters struct {
+	Level   string `form:"level"`
+	Process string `form:"process"`
+	Search  string `form:"search"`
+	Limit   int    `form:"limit,default=100"`
+}
+
+// GetLimit returns the limit with bounds checking
+func (f *LogFilters) GetLimit() int {
+	if f.Limit <= 0 {
+		return 100
+	}
+	if f.Limit > 1000 {
+		return 1000
+	}
+	return f.Limit
+}
+
+// WebSocket message types
+const (
+	WSMessageTypeLog   = "log"
+	WSMessageTypeStats = "stats"
+)
+
+// WSMessage wraps messages sent over WebSocket
+type WSMessage struct {
+	Type string `json:"type"` // "log" or "stats"
+	Data any    `json:"data"`
+}