Security: - Replace all binding: tags with validate: + c.Validate() in admin handlers - Add rate limiting to auth endpoints (login, register, password reset) - Add security headers (HSTS, XSS protection, nosniff, frame options) - Wire Google Pub/Sub token verification into webhook handler - Replace ParseUnverified with proper OIDC/JWKS key verification - Verify inner Apple JWS signatures in webhook handler - Add io.LimitReader (1MB) to all webhook body reads - Add ownership verification to file deletion - Move hardcoded admin credentials to env vars - Add uniqueIndex to User.Email - Hide ConfirmationCode from JSON serialization - Mask confirmation codes in admin responses - Use http.DetectContentType for upload validation - Fix path traversal in storage service - Replace os.Getenv with Viper in stripe service - Sanitize Redis URLs before logging - Separate DEBUG_FIXED_CODES from DEBUG flag - Reject weak SECRET_KEY in production - Add host check on /_next/* proxy routes - Use explicit localhost CORS origins in debug mode - Replace err.Error() with generic messages in all admin error responses Critical fixes: - Rewrite FCM to HTTP v1 API with OAuth 2.0 service account auth - Fix user_customuser -> auth_user table names in raw SQL - Fix dashboard verified query to use UserProfile model - Add escapeLikeWildcards() to prevent SQL wildcard injection Bug fixes: - Add bounds checks for days/expiring_soon query params (1-3650) - Add receipt_data/transaction_id empty-check to RestoreSubscription - Change Active bool -> *bool in device handler - Check all unchecked GORM/FindByIDWithProfile errors - Add validation for notification hour fields (0-23) - Add max=10000 validation on task description updates Transactions & data integrity: - Wrap registration flow in transaction - Wrap QuickComplete in transaction - Move image creation inside completion transaction - Wrap SetSpecialties in transaction - Wrap GetOrCreateToken in transaction - Wrap completion+image deletion in transaction Performance: - Batch completion summaries (2 queries vs 2N) - Reuse single http.Client in IAP validation - Cache dashboard counts (30s TTL) - Batch COUNT queries in admin user list - Add Limit(500) to document queries - Add reminder_stage+due_date filters to reminder queries - Parse AllowedTypes once at init - In-memory user cache in auth middleware (30s TTL) - Timezone change detection cache - Optimize P95 with per-endpoint sorted buffers - Replace crypto/md5 with hash/fnv for ETags Code quality: - Add sync.Once to all monitoring Stop()/Close() methods - Replace 8 fmt.Printf with zerolog in auth service - Log previously discarded errors - Standardize delete response shapes - Route hardcoded English through i18n - Remove FileURL from DocumentResponse (keep MediaURL only) - Thread user timezone through kanban board responses - Initialize empty slices to prevent null JSON - Extract shared field map for task Update/UpdateTx - Delete unused SoftDeleteModel, min(), formatCron, legacy handlers Worker & jobs: - Wire Asynq email infrastructure into worker - Register HandleReminderLogCleanup with daily 3AM cron - Use per-user timezone in HandleSmartReminder - Replace direct DB queries with repository calls - Delete legacy reminder handlers (~200 lines) - Delete unused task type constants Dependencies: - Replace archived jung-kurt/gofpdf with go-pdf/fpdf - Replace unmaintained gomail.v2 with wneessen/go-mail - Add TODO for Echo jwt v3 transitive dep removal Test infrastructure: - Fix MakeRequest/SeedLookupData error handling - Replace os.Exit(0) with t.Skip() in scope/consistency tests - Add 11 new FCM v1 tests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
206 lines
5.2 KiB
Go
206 lines
5.2 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"io"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/hibiken/asynq"
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/redis/go-redis/v9"
|
|
"github.com/rs/zerolog/log"
|
|
"gorm.io/gorm"
|
|
|
|
"github.com/treytartt/honeydue-api/internal/models"
|
|
)
|
|
|
|
const (
|
|
// DefaultStatsInterval is the default interval for collecting/publishing stats
|
|
DefaultStatsInterval = 5 * time.Second
|
|
// SettingsSyncInterval is how often to check the database for enable_monitoring setting
|
|
SettingsSyncInterval = 30 * time.Second
|
|
)
|
|
|
|
// Service orchestrates all monitoring components
|
|
type Service struct {
|
|
process string
|
|
logBuffer *LogBuffer
|
|
statsStore *StatsStore
|
|
collector *Collector
|
|
httpCollector *HTTPStatsCollector
|
|
handler *Handler
|
|
logWriter *RedisLogWriter
|
|
db *gorm.DB
|
|
settingsStopCh chan struct{}
|
|
stopOnce sync.Once
|
|
statsInterval time.Duration
|
|
}
|
|
|
|
// Config holds configuration for the monitoring service
|
|
type Config struct {
|
|
Process string // "api" or "worker"
|
|
RedisClient *redis.Client // Redis client for log buffer
|
|
StatsInterval time.Duration // Interval for stats collection (default 5s)
|
|
DB *gorm.DB // Database for checking enable_monitoring setting (optional)
|
|
}
|
|
|
|
// NewService creates a new monitoring service
|
|
func NewService(cfg Config) *Service {
|
|
if cfg.StatsInterval == 0 {
|
|
cfg.StatsInterval = DefaultStatsInterval
|
|
}
|
|
|
|
// Create components
|
|
logBuffer := NewLogBuffer(cfg.RedisClient)
|
|
statsStore := NewStatsStore(cfg.RedisClient)
|
|
collector := NewCollector(cfg.Process, statsStore)
|
|
handler := NewHandler(logBuffer, statsStore)
|
|
logWriter := NewRedisLogWriter(logBuffer, cfg.Process)
|
|
|
|
// For API server, create HTTP stats collector
|
|
var httpCollector *HTTPStatsCollector
|
|
if cfg.Process == "api" {
|
|
httpCollector = NewHTTPStatsCollector()
|
|
collector.SetHTTPCollector(httpCollector)
|
|
}
|
|
|
|
svc := &Service{
|
|
process: cfg.Process,
|
|
logBuffer: logBuffer,
|
|
statsStore: statsStore,
|
|
collector: collector,
|
|
httpCollector: httpCollector,
|
|
handler: handler,
|
|
logWriter: logWriter,
|
|
db: cfg.DB,
|
|
settingsStopCh: make(chan struct{}),
|
|
statsInterval: cfg.StatsInterval,
|
|
}
|
|
|
|
// Check initial setting from database
|
|
if cfg.DB != nil {
|
|
svc.syncSettingsFromDB()
|
|
}
|
|
|
|
return svc
|
|
}
|
|
|
|
// SetAsynqInspector sets the Asynq inspector for worker stats
|
|
func (s *Service) SetAsynqInspector(inspector *asynq.Inspector) {
|
|
s.collector.SetAsynqInspector(inspector)
|
|
}
|
|
|
|
// Start begins collecting and publishing stats
|
|
func (s *Service) Start() {
|
|
log.Info().
|
|
Str("process", s.process).
|
|
Dur("interval", s.statsInterval).
|
|
Bool("enabled", s.logWriter.IsEnabled()).
|
|
Msg("Starting monitoring service")
|
|
|
|
s.collector.StartPublishing(s.statsInterval)
|
|
|
|
// Start settings sync if database is available
|
|
if s.db != nil {
|
|
go s.startSettingsSync()
|
|
}
|
|
}
|
|
|
|
// Stop stops the monitoring service. It is safe to call multiple times.
|
|
func (s *Service) Stop() {
|
|
s.stopOnce.Do(func() {
|
|
// Stop settings sync
|
|
close(s.settingsStopCh)
|
|
|
|
s.collector.Stop()
|
|
|
|
// Flush and close the log writer's background goroutine
|
|
s.logWriter.Close()
|
|
|
|
log.Info().Str("process", s.process).Msg("Monitoring service stopped")
|
|
})
|
|
}
|
|
|
|
// syncSettingsFromDB checks the database for the enable_monitoring setting
|
|
func (s *Service) syncSettingsFromDB() {
|
|
if s.db == nil {
|
|
return
|
|
}
|
|
|
|
var settings models.SubscriptionSettings
|
|
err := s.db.First(&settings, 1).Error
|
|
if err != nil {
|
|
if err == gorm.ErrRecordNotFound {
|
|
// No settings record, default to enabled
|
|
s.logWriter.SetEnabled(true)
|
|
}
|
|
// On other errors, keep current state
|
|
return
|
|
}
|
|
|
|
wasEnabled := s.logWriter.IsEnabled()
|
|
s.logWriter.SetEnabled(settings.EnableMonitoring)
|
|
|
|
if wasEnabled != settings.EnableMonitoring {
|
|
log.Info().
|
|
Str("process", s.process).
|
|
Bool("enabled", settings.EnableMonitoring).
|
|
Msg("Monitoring log capture setting changed")
|
|
}
|
|
}
|
|
|
|
// startSettingsSync periodically checks the database for settings changes
|
|
func (s *Service) startSettingsSync() {
|
|
ticker := time.NewTicker(SettingsSyncInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-s.settingsStopCh:
|
|
return
|
|
case <-ticker.C:
|
|
s.syncSettingsFromDB()
|
|
}
|
|
}
|
|
}
|
|
|
|
// SetEnabled manually enables or disables log capture
|
|
func (s *Service) SetEnabled(enabled bool) {
|
|
s.logWriter.SetEnabled(enabled)
|
|
}
|
|
|
|
// IsEnabled returns whether log capture is enabled
|
|
func (s *Service) IsEnabled() bool {
|
|
return s.logWriter.IsEnabled()
|
|
}
|
|
|
|
// SetDB sets the database connection for settings sync
|
|
// This can be called after NewService if DB wasn't available during initialization
|
|
func (s *Service) SetDB(db *gorm.DB) {
|
|
s.db = db
|
|
s.syncSettingsFromDB()
|
|
}
|
|
|
|
// LogWriter returns an io.Writer for zerolog that captures logs to Redis
|
|
func (s *Service) LogWriter() io.Writer {
|
|
return s.logWriter
|
|
}
|
|
|
|
// Handler returns the HTTP handler for monitoring endpoints
|
|
func (s *Service) Handler() *Handler {
|
|
return s.handler
|
|
}
|
|
|
|
// HTTPCollector returns the HTTP stats collector (nil for worker)
|
|
func (s *Service) HTTPCollector() *HTTPStatsCollector {
|
|
return s.httpCollector
|
|
}
|
|
|
|
// MetricsMiddleware returns the Echo middleware for HTTP metrics (API server only)
|
|
func (s *Service) MetricsMiddleware() echo.MiddlewareFunc {
|
|
if s.httpCollector == nil {
|
|
return nil
|
|
}
|
|
return MetricsMiddleware(s.httpCollector)
|
|
}
|