Add real-time log monitoring and system stats dashboard

Implements a comprehensive monitoring system for the admin interface:

Backend:
- New monitoring package with Redis ring buffer for log storage
- Zerolog MultiWriter to capture logs to Redis
- System stats collection (CPU, memory, disk, goroutines, GC)
- HTTP metrics middleware (request counts, latency, error rates)
- Asynq queue stats for worker process
- WebSocket endpoint for real-time log streaming
- Admin auth middleware now accepts token in query params (for WebSocket)

Frontend:
- New monitoring page with tabs (Overview, Logs, API Stats, Worker Stats)
- Real-time log viewer with level filtering and search
- System stats cards showing CPU, memory, goroutines, uptime
- HTTP endpoint statistics table
- Asynq queue depth visualization
- Enable/disable monitoring toggle in settings

Memory safeguards:
- Max 200 unique endpoints tracked
- Hourly stats reset to prevent unbounded growth
- Max 1000 log entries in ring buffer
- Max 1000 latency samples for P95 calculation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Trey t
2025-12-09 10:26:40 -06:00
parent 12eac24632
commit eb127fda20
31 changed files with 2880 additions and 213 deletions

View File

@@ -0,0 +1,194 @@
package monitoring
import (
"io"
"time"
"github.com/hibiken/asynq"
"github.com/redis/go-redis/v9"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"github.com/treytartt/casera-api/internal/models"
)
const (
// DefaultStatsInterval is the default interval for collecting/publishing stats
DefaultStatsInterval = 5 * time.Second
// SettingsSyncInterval is how often to check the database for enable_monitoring setting
SettingsSyncInterval = 30 * time.Second
)
// Service orchestrates all monitoring components
type Service struct {
process string
logBuffer *LogBuffer
statsStore *StatsStore
collector *Collector
httpCollector *HTTPStatsCollector
handler *Handler
logWriter *RedisLogWriter
db *gorm.DB
settingsStopCh chan struct{}
}
// Config holds configuration for the monitoring service
type Config struct {
Process string // "api" or "worker"
RedisClient *redis.Client // Redis client for log buffer
StatsInterval time.Duration // Interval for stats collection (default 5s)
DB *gorm.DB // Database for checking enable_monitoring setting (optional)
}
// NewService creates a new monitoring service
func NewService(cfg Config) *Service {
if cfg.StatsInterval == 0 {
cfg.StatsInterval = DefaultStatsInterval
}
// Create components
logBuffer := NewLogBuffer(cfg.RedisClient)
statsStore := NewStatsStore(cfg.RedisClient)
collector := NewCollector(cfg.Process, statsStore)
handler := NewHandler(logBuffer, statsStore)
logWriter := NewRedisLogWriter(logBuffer, cfg.Process)
// For API server, create HTTP stats collector
var httpCollector *HTTPStatsCollector
if cfg.Process == "api" {
httpCollector = NewHTTPStatsCollector()
collector.SetHTTPCollector(httpCollector)
}
svc := &Service{
process: cfg.Process,
logBuffer: logBuffer,
statsStore: statsStore,
collector: collector,
httpCollector: httpCollector,
handler: handler,
logWriter: logWriter,
db: cfg.DB,
settingsStopCh: make(chan struct{}),
}
// Check initial setting from database
if cfg.DB != nil {
svc.syncSettingsFromDB()
}
return svc
}
// SetAsynqInspector sets the Asynq inspector for worker stats
func (s *Service) SetAsynqInspector(inspector *asynq.Inspector) {
s.collector.SetAsynqInspector(inspector)
}
// Start begins collecting and publishing stats
func (s *Service) Start() {
log.Info().
Str("process", s.process).
Dur("interval", DefaultStatsInterval).
Bool("enabled", s.logWriter.IsEnabled()).
Msg("Starting monitoring service")
s.collector.StartPublishing(DefaultStatsInterval)
// Start settings sync if database is available
if s.db != nil {
go s.startSettingsSync()
}
}
// Stop stops the monitoring service
func (s *Service) Stop() {
// Stop settings sync
close(s.settingsStopCh)
s.collector.Stop()
log.Info().Str("process", s.process).Msg("Monitoring service stopped")
}
// syncSettingsFromDB checks the database for the enable_monitoring setting
func (s *Service) syncSettingsFromDB() {
if s.db == nil {
return
}
var settings models.SubscriptionSettings
err := s.db.First(&settings, 1).Error
if err != nil {
if err == gorm.ErrRecordNotFound {
// No settings record, default to enabled
s.logWriter.SetEnabled(true)
}
// On other errors, keep current state
return
}
wasEnabled := s.logWriter.IsEnabled()
s.logWriter.SetEnabled(settings.EnableMonitoring)
if wasEnabled != settings.EnableMonitoring {
log.Info().
Str("process", s.process).
Bool("enabled", settings.EnableMonitoring).
Msg("Monitoring log capture setting changed")
}
}
// startSettingsSync periodically checks the database for settings changes
func (s *Service) startSettingsSync() {
ticker := time.NewTicker(SettingsSyncInterval)
defer ticker.Stop()
for {
select {
case <-s.settingsStopCh:
return
case <-ticker.C:
s.syncSettingsFromDB()
}
}
}
// SetEnabled manually enables or disables log capture
func (s *Service) SetEnabled(enabled bool) {
s.logWriter.SetEnabled(enabled)
}
// IsEnabled returns whether log capture is enabled
func (s *Service) IsEnabled() bool {
return s.logWriter.IsEnabled()
}
// SetDB sets the database connection for settings sync
// This can be called after NewService if DB wasn't available during initialization
func (s *Service) SetDB(db *gorm.DB) {
s.db = db
s.syncSettingsFromDB()
}
// LogWriter returns an io.Writer for zerolog that captures logs to Redis
func (s *Service) LogWriter() io.Writer {
return s.logWriter
}
// Handler returns the HTTP handler for monitoring endpoints
func (s *Service) Handler() *Handler {
return s.handler
}
// HTTPCollector returns the HTTP stats collector (nil for worker)
func (s *Service) HTTPCollector() *HTTPStatsCollector {
return s.httpCollector
}
// MetricsMiddleware returns the Gin middleware for HTTP metrics (API server only)
func (s *Service) MetricsMiddleware() interface{} {
if s.httpCollector == nil {
return nil
}
return MetricsMiddleware(s.httpCollector)
}