Fix 113 hardening issues across entire Go backend

Security:
- Replace all binding: tags with validate: + c.Validate() in admin handlers
- Add rate limiting to auth endpoints (login, register, password reset)
- Add security headers (HSTS, XSS protection, nosniff, frame options)
- Wire Google Pub/Sub token verification into webhook handler
- Replace ParseUnverified with proper OIDC/JWKS key verification
- Verify inner Apple JWS signatures in webhook handler
- Add io.LimitReader (1MB) to all webhook body reads
- Add ownership verification to file deletion
- Move hardcoded admin credentials to env vars
- Add uniqueIndex to User.Email
- Hide ConfirmationCode from JSON serialization
- Mask confirmation codes in admin responses
- Use http.DetectContentType for upload validation
- Fix path traversal in storage service
- Replace os.Getenv with Viper in stripe service
- Sanitize Redis URLs before logging
- Separate DEBUG_FIXED_CODES from DEBUG flag
- Reject weak SECRET_KEY in production
- Add host check on /_next/* proxy routes
- Use explicit localhost CORS origins in debug mode
- Replace err.Error() with generic messages in all admin error responses

Critical fixes:
- Rewrite FCM to HTTP v1 API with OAuth 2.0 service account auth
- Fix user_customuser -> auth_user table names in raw SQL
- Fix dashboard verified query to use UserProfile model
- Add escapeLikeWildcards() to prevent SQL wildcard injection

Bug fixes:
- Add bounds checks for days/expiring_soon query params (1-3650)
- Add receipt_data/transaction_id empty-check to RestoreSubscription
- Change Active bool -> *bool in device handler
- Check all unchecked GORM/FindByIDWithProfile errors
- Add validation for notification hour fields (0-23)
- Add max=10000 validation on task description updates

Transactions & data integrity:
- Wrap registration flow in transaction
- Wrap QuickComplete in transaction
- Move image creation inside completion transaction
- Wrap SetSpecialties in transaction
- Wrap GetOrCreateToken in transaction
- Wrap completion+image deletion in transaction

Performance:
- Batch completion summaries (2 queries vs 2N)
- Reuse single http.Client in IAP validation
- Cache dashboard counts (30s TTL)
- Batch COUNT queries in admin user list
- Add Limit(500) to document queries
- Add reminder_stage+due_date filters to reminder queries
- Parse AllowedTypes once at init
- In-memory user cache in auth middleware (30s TTL)
- Timezone change detection cache
- Optimize P95 with per-endpoint sorted buffers
- Replace crypto/md5 with hash/fnv for ETags

Code quality:
- Add sync.Once to all monitoring Stop()/Close() methods
- Replace 8 fmt.Printf with zerolog in auth service
- Log previously discarded errors
- Standardize delete response shapes
- Route hardcoded English through i18n
- Remove FileURL from DocumentResponse (keep MediaURL only)
- Thread user timezone through kanban board responses
- Initialize empty slices to prevent null JSON
- Extract shared field map for task Update/UpdateTx
- Delete unused SoftDeleteModel, min(), formatCron, legacy handlers

Worker & jobs:
- Wire Asynq email infrastructure into worker
- Register HandleReminderLogCleanup with daily 3AM cron
- Use per-user timezone in HandleSmartReminder
- Replace direct DB queries with repository calls
- Delete legacy reminder handlers (~200 lines)
- Delete unused task type constants

Dependencies:
- Replace archived jung-kurt/gofpdf with go-pdf/fpdf
- Replace unmaintained gomail.v2 with wneessen/go-mail
- Add TODO for Echo jwt v3 transitive dep removal

Test infrastructure:
- Fix MakeRequest/SeedLookupData error handling
- Replace os.Exit(0) with t.Skip() in scope/consistency tests
- Add 11 new FCM v1 tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-03-18 23:14:13 -05:00
parent 3b86d0aae1
commit 42a5533a56
95 changed files with 2892 additions and 1783 deletions

View File

@@ -2,6 +2,7 @@ package monitoring
import (
"runtime"
"sync"
"time"
"github.com/hibiken/asynq"
@@ -20,6 +21,7 @@ type Collector struct {
httpCollector *HTTPStatsCollector // nil for worker
asynqClient *asynq.Inspector // nil for api
stopChan chan struct{}
stopOnce sync.Once
}
// NewCollector creates a new stats collector
@@ -193,7 +195,9 @@ func (c *Collector) publishStats() {
}
}
// Stop stops the stats publishing
// Stop stops the stats publishing. It is safe to call multiple times.
func (c *Collector) Stop() {
close(c.stopChan)
c.stopOnce.Do(func() {
close(c.stopChan)
})
}

View File

@@ -150,7 +150,10 @@ func (h *Handler) WebSocket(c echo.Context) error {
defer statsTicker.Stop()
// Send initial stats
h.sendStats(conn, &wsMu)
if err := h.sendStats(conn, &wsMu); err != nil {
cancel()
return nil
}
for {
select {
@@ -173,11 +176,16 @@ func (h *Handler) WebSocket(c echo.Context) error {
if err != nil {
log.Debug().Err(err).Msg("WebSocket write error")
cancel()
return nil
}
case <-statsTicker.C:
// Send periodic stats update
h.sendStats(conn, &wsMu)
if err := h.sendStats(conn, &wsMu); err != nil {
cancel()
return nil
}
case <-ctx.Done():
return nil
@@ -185,9 +193,11 @@ func (h *Handler) WebSocket(c echo.Context) error {
}
}
func (h *Handler) sendStats(conn *websocket.Conn, mu *sync.Mutex) {
func (h *Handler) sendStats(conn *websocket.Conn, mu *sync.Mutex) error {
allStats, err := h.statsStore.GetAllStats()
if err != nil {
log.Error().Err(err).Msg("failed to send stats")
return err
}
wsMsg := WSMessage{
@@ -196,6 +206,12 @@ func (h *Handler) sendStats(conn *websocket.Conn, mu *sync.Mutex) {
}
mu.Lock()
conn.WriteJSON(wsMsg)
err = conn.WriteJSON(wsMsg)
mu.Unlock()
if err != nil {
log.Debug().Err(err).Msg("WebSocket write error sending stats")
}
return err
}

View File

@@ -10,39 +10,33 @@ import (
// HTTPStatsCollector collects HTTP request metrics
type HTTPStatsCollector struct {
mu sync.RWMutex
requests map[string]int64 // endpoint -> count
totalLatency map[string]time.Duration // endpoint -> total latency
errors map[string]int64 // endpoint -> error count
byStatus map[int]int64 // status code -> count
latencies []latencySample // recent latency samples for P95
startTime time.Time
lastReset time.Time
}
type latencySample struct {
endpoint string
latency time.Duration
timestamp time.Time
mu sync.RWMutex
requests map[string]int64 // endpoint -> count
totalLatency map[string]time.Duration // endpoint -> total latency
errors map[string]int64 // endpoint -> error count
byStatus map[int]int64 // status code -> count
endpointLatencies map[string][]time.Duration // per-endpoint sorted latency buffers for P95
startTime time.Time
lastReset time.Time
}
const (
maxLatencySamples = 1000
maxEndpoints = 200 // Cap unique endpoints tracked
statsResetPeriod = 1 * time.Hour // Reset stats periodically to prevent unbounded growth
maxLatencySamplesPerEndpoint = 200 // Max latency samples kept per endpoint
maxEndpoints = 200 // Cap unique endpoints tracked
statsResetPeriod = 1 * time.Hour // Reset stats periodically to prevent unbounded growth
)
// NewHTTPStatsCollector creates a new HTTP stats collector
func NewHTTPStatsCollector() *HTTPStatsCollector {
now := time.Now()
return &HTTPStatsCollector{
requests: make(map[string]int64),
totalLatency: make(map[string]time.Duration),
errors: make(map[string]int64),
byStatus: make(map[int]int64),
latencies: make([]latencySample, 0, maxLatencySamples),
startTime: now,
lastReset: now,
requests: make(map[string]int64),
totalLatency: make(map[string]time.Duration),
errors: make(map[string]int64),
byStatus: make(map[int]int64),
endpointLatencies: make(map[string][]time.Duration),
startTime: now,
lastReset: now,
}
}
@@ -70,17 +64,22 @@ func (c *HTTPStatsCollector) Record(endpoint string, latency time.Duration, stat
c.errors[endpoint]++
}
// Store latency sample
c.latencies = append(c.latencies, latencySample{
endpoint: endpoint,
latency: latency,
timestamp: time.Now(),
// Insert latency into per-endpoint sorted buffer using binary search
buf := c.endpointLatencies[endpoint]
idx := sort.Search(len(buf), func(i int) bool {
return buf[i] >= latency
})
buf = append(buf, 0)
copy(buf[idx+1:], buf[idx:])
buf[idx] = latency
// Keep only recent samples
if len(c.latencies) > maxLatencySamples {
c.latencies = c.latencies[len(c.latencies)-maxLatencySamples:]
// Trim to max samples per endpoint by removing the median element
// to preserve distribution tails (important for P95 accuracy)
if len(buf) > maxLatencySamplesPerEndpoint {
mid := len(buf) / 2
buf = append(buf[:mid], buf[mid+1:]...)
}
c.endpointLatencies[endpoint] = buf
}
// resetLocked resets stats while holding the lock
@@ -89,7 +88,7 @@ func (c *HTTPStatsCollector) resetLocked() {
c.totalLatency = make(map[string]time.Duration)
c.errors = make(map[string]int64)
c.byStatus = make(map[int]int64)
c.latencies = make([]latencySample, 0, maxLatencySamples)
c.endpointLatencies = make(map[string][]time.Duration)
c.lastReset = time.Now()
// Keep startTime for uptime calculation
}
@@ -147,33 +146,23 @@ func (c *HTTPStatsCollector) GetStats() HTTPStats {
return stats
}
// calculateP95 calculates the 95th percentile latency for an endpoint
// Must be called with read lock held
// calculateP95 calculates the 95th percentile latency for an endpoint.
// The per-endpoint buffer is maintained in sorted order during insertion,
// so this is an O(1) index lookup.
// Must be called with read lock held.
func (c *HTTPStatsCollector) calculateP95(endpoint string) float64 {
var endpointLatencies []time.Duration
for _, sample := range c.latencies {
if sample.endpoint == endpoint {
endpointLatencies = append(endpointLatencies, sample.latency)
}
}
if len(endpointLatencies) == 0 {
buf := c.endpointLatencies[endpoint]
if len(buf) == 0 {
return 0
}
// Sort latencies
sort.Slice(endpointLatencies, func(i, j int) bool {
return endpointLatencies[i] < endpointLatencies[j]
})
// Calculate P95 index
p95Index := int(float64(len(endpointLatencies)) * 0.95)
if p95Index >= len(endpointLatencies) {
p95Index = len(endpointLatencies) - 1
// Buffer is already sorted; direct index lookup
p95Index := int(float64(len(buf)) * 0.95)
if p95Index >= len(buf) {
p95Index = len(buf) - 1
}
return float64(endpointLatencies[p95Index].Milliseconds())
return float64(buf[p95Index].Milliseconds())
}
// Reset clears all collected stats
@@ -185,7 +174,7 @@ func (c *HTTPStatsCollector) Reset() {
c.totalLatency = make(map[string]time.Duration)
c.errors = make(map[string]int64)
c.byStatus = make(map[int]int64)
c.latencies = make([]latencySample, 0, maxLatencySamples)
c.endpointLatencies = make(map[string][]time.Duration)
c.startTime = time.Now()
}

View File

@@ -2,6 +2,7 @@ package monitoring
import (
"io"
"sync"
"time"
"github.com/hibiken/asynq"
@@ -31,6 +32,8 @@ type Service struct {
logWriter *RedisLogWriter
db *gorm.DB
settingsStopCh chan struct{}
stopOnce sync.Once
statsInterval time.Duration
}
// Config holds configuration for the monitoring service
@@ -71,6 +74,7 @@ func NewService(cfg Config) *Service {
logWriter: logWriter,
db: cfg.DB,
settingsStopCh: make(chan struct{}),
statsInterval: cfg.StatsInterval,
}
// Check initial setting from database
@@ -90,11 +94,11 @@ func (s *Service) SetAsynqInspector(inspector *asynq.Inspector) {
func (s *Service) Start() {
log.Info().
Str("process", s.process).
Dur("interval", DefaultStatsInterval).
Dur("interval", s.statsInterval).
Bool("enabled", s.logWriter.IsEnabled()).
Msg("Starting monitoring service")
s.collector.StartPublishing(DefaultStatsInterval)
s.collector.StartPublishing(s.statsInterval)
// Start settings sync if database is available
if s.db != nil {
@@ -102,17 +106,19 @@ func (s *Service) Start() {
}
}
// Stop stops the monitoring service
// Stop stops the monitoring service. It is safe to call multiple times.
func (s *Service) Stop() {
// Stop settings sync
close(s.settingsStopCh)
s.stopOnce.Do(func() {
// Stop settings sync
close(s.settingsStopCh)
s.collector.Stop()
s.collector.Stop()
// Flush and close the log writer's background goroutine
s.logWriter.Close()
// Flush and close the log writer's background goroutine
s.logWriter.Close()
log.Info().Str("process", s.process).Msg("Monitoring service stopped")
log.Info().Str("process", s.process).Msg("Monitoring service stopped")
})
}
// syncSettingsFromDB checks the database for the enable_monitoring setting

View File

@@ -2,6 +2,7 @@ package monitoring
import (
"encoding/json"
"sync"
"sync/atomic"
"time"
@@ -18,11 +19,12 @@ const (
// It uses a single background goroutine with a buffered channel instead of
// spawning a new goroutine per log line, preventing unbounded goroutine growth.
type RedisLogWriter struct {
buffer *LogBuffer
process string
enabled atomic.Bool
ch chan LogEntry
done chan struct{}
buffer *LogBuffer
process string
enabled atomic.Bool
ch chan LogEntry
done chan struct{}
closeOnce sync.Once
}
// NewRedisLogWriter creates a new writer that captures logs to Redis.
@@ -53,9 +55,12 @@ func (w *RedisLogWriter) drainLoop() {
// Close shuts down the background goroutine. It should be called during
// graceful shutdown to ensure all buffered entries are flushed.
// It is safe to call multiple times.
func (w *RedisLogWriter) Close() {
close(w.ch)
<-w.done // Wait for drain to finish
w.closeOnce.Do(func() {
close(w.ch)
<-w.done // Wait for drain to finish
})
}
// SetEnabled enables or disables log capture to Redis