Add real-time log monitoring and system stats dashboard
Implements a comprehensive monitoring system for the admin interface: Backend: - New monitoring package with Redis ring buffer for log storage - Zerolog MultiWriter to capture logs to Redis - System stats collection (CPU, memory, disk, goroutines, GC) - HTTP metrics middleware (request counts, latency, error rates) - Asynq queue stats for worker process - WebSocket endpoint for real-time log streaming - Admin auth middleware now accepts token in query params (for WebSocket) Frontend: - New monitoring page with tabs (Overview, Logs, API Stats, Worker Stats) - Real-time log viewer with level filtering and search - System stats cards showing CPU, memory, goroutines, uptime - HTTP endpoint statistics table - Asynq queue depth visualization - Enable/disable monitoring toggle in settings Memory safeguards: - Max 200 unique endpoints tracked - Hourly stats reset to prevent unbounded growth - Max 1000 log entries in ring buffer - Max 1000 latency samples for P95 calculation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
194
internal/monitoring/service.go
Normal file
194
internal/monitoring/service.go
Normal file
@@ -0,0 +1,194 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/hibiken/asynq"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/treytartt/casera-api/internal/models"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultStatsInterval is the default interval for collecting/publishing stats
|
||||
DefaultStatsInterval = 5 * time.Second
|
||||
// SettingsSyncInterval is how often to check the database for enable_monitoring setting
|
||||
SettingsSyncInterval = 30 * time.Second
|
||||
)
|
||||
|
||||
// Service orchestrates all monitoring components
|
||||
type Service struct {
|
||||
process string
|
||||
logBuffer *LogBuffer
|
||||
statsStore *StatsStore
|
||||
collector *Collector
|
||||
httpCollector *HTTPStatsCollector
|
||||
handler *Handler
|
||||
logWriter *RedisLogWriter
|
||||
db *gorm.DB
|
||||
settingsStopCh chan struct{}
|
||||
}
|
||||
|
||||
// Config holds configuration for the monitoring service
|
||||
type Config struct {
|
||||
Process string // "api" or "worker"
|
||||
RedisClient *redis.Client // Redis client for log buffer
|
||||
StatsInterval time.Duration // Interval for stats collection (default 5s)
|
||||
DB *gorm.DB // Database for checking enable_monitoring setting (optional)
|
||||
}
|
||||
|
||||
// NewService creates a new monitoring service
|
||||
func NewService(cfg Config) *Service {
|
||||
if cfg.StatsInterval == 0 {
|
||||
cfg.StatsInterval = DefaultStatsInterval
|
||||
}
|
||||
|
||||
// Create components
|
||||
logBuffer := NewLogBuffer(cfg.RedisClient)
|
||||
statsStore := NewStatsStore(cfg.RedisClient)
|
||||
collector := NewCollector(cfg.Process, statsStore)
|
||||
handler := NewHandler(logBuffer, statsStore)
|
||||
logWriter := NewRedisLogWriter(logBuffer, cfg.Process)
|
||||
|
||||
// For API server, create HTTP stats collector
|
||||
var httpCollector *HTTPStatsCollector
|
||||
if cfg.Process == "api" {
|
||||
httpCollector = NewHTTPStatsCollector()
|
||||
collector.SetHTTPCollector(httpCollector)
|
||||
}
|
||||
|
||||
svc := &Service{
|
||||
process: cfg.Process,
|
||||
logBuffer: logBuffer,
|
||||
statsStore: statsStore,
|
||||
collector: collector,
|
||||
httpCollector: httpCollector,
|
||||
handler: handler,
|
||||
logWriter: logWriter,
|
||||
db: cfg.DB,
|
||||
settingsStopCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
// Check initial setting from database
|
||||
if cfg.DB != nil {
|
||||
svc.syncSettingsFromDB()
|
||||
}
|
||||
|
||||
return svc
|
||||
}
|
||||
|
||||
// SetAsynqInspector sets the Asynq inspector for worker stats
|
||||
func (s *Service) SetAsynqInspector(inspector *asynq.Inspector) {
|
||||
s.collector.SetAsynqInspector(inspector)
|
||||
}
|
||||
|
||||
// Start begins collecting and publishing stats
|
||||
func (s *Service) Start() {
|
||||
log.Info().
|
||||
Str("process", s.process).
|
||||
Dur("interval", DefaultStatsInterval).
|
||||
Bool("enabled", s.logWriter.IsEnabled()).
|
||||
Msg("Starting monitoring service")
|
||||
|
||||
s.collector.StartPublishing(DefaultStatsInterval)
|
||||
|
||||
// Start settings sync if database is available
|
||||
if s.db != nil {
|
||||
go s.startSettingsSync()
|
||||
}
|
||||
}
|
||||
|
||||
// Stop stops the monitoring service
|
||||
func (s *Service) Stop() {
|
||||
// Stop settings sync
|
||||
close(s.settingsStopCh)
|
||||
|
||||
s.collector.Stop()
|
||||
log.Info().Str("process", s.process).Msg("Monitoring service stopped")
|
||||
}
|
||||
|
||||
// syncSettingsFromDB checks the database for the enable_monitoring setting
|
||||
func (s *Service) syncSettingsFromDB() {
|
||||
if s.db == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var settings models.SubscriptionSettings
|
||||
err := s.db.First(&settings, 1).Error
|
||||
if err != nil {
|
||||
if err == gorm.ErrRecordNotFound {
|
||||
// No settings record, default to enabled
|
||||
s.logWriter.SetEnabled(true)
|
||||
}
|
||||
// On other errors, keep current state
|
||||
return
|
||||
}
|
||||
|
||||
wasEnabled := s.logWriter.IsEnabled()
|
||||
s.logWriter.SetEnabled(settings.EnableMonitoring)
|
||||
|
||||
if wasEnabled != settings.EnableMonitoring {
|
||||
log.Info().
|
||||
Str("process", s.process).
|
||||
Bool("enabled", settings.EnableMonitoring).
|
||||
Msg("Monitoring log capture setting changed")
|
||||
}
|
||||
}
|
||||
|
||||
// startSettingsSync periodically checks the database for settings changes
|
||||
func (s *Service) startSettingsSync() {
|
||||
ticker := time.NewTicker(SettingsSyncInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.settingsStopCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
s.syncSettingsFromDB()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SetEnabled manually enables or disables log capture
|
||||
func (s *Service) SetEnabled(enabled bool) {
|
||||
s.logWriter.SetEnabled(enabled)
|
||||
}
|
||||
|
||||
// IsEnabled returns whether log capture is enabled
|
||||
func (s *Service) IsEnabled() bool {
|
||||
return s.logWriter.IsEnabled()
|
||||
}
|
||||
|
||||
// SetDB sets the database connection for settings sync
|
||||
// This can be called after NewService if DB wasn't available during initialization
|
||||
func (s *Service) SetDB(db *gorm.DB) {
|
||||
s.db = db
|
||||
s.syncSettingsFromDB()
|
||||
}
|
||||
|
||||
// LogWriter returns an io.Writer for zerolog that captures logs to Redis
|
||||
func (s *Service) LogWriter() io.Writer {
|
||||
return s.logWriter
|
||||
}
|
||||
|
||||
// Handler returns the HTTP handler for monitoring endpoints
|
||||
func (s *Service) Handler() *Handler {
|
||||
return s.handler
|
||||
}
|
||||
|
||||
// HTTPCollector returns the HTTP stats collector (nil for worker)
|
||||
func (s *Service) HTTPCollector() *HTTPStatsCollector {
|
||||
return s.httpCollector
|
||||
}
|
||||
|
||||
// MetricsMiddleware returns the Gin middleware for HTTP metrics (API server only)
|
||||
func (s *Service) MetricsMiddleware() interface{} {
|
||||
if s.httpCollector == nil {
|
||||
return nil
|
||||
}
|
||||
return MetricsMiddleware(s.httpCollector)
|
||||
}
|
||||
Reference in New Issue
Block a user