Production hardening: security, resilience, observability, and compliance

Password complexity: custom validator requiring uppercase, lowercase, digit (min 8 chars)
Token expiry: 90-day token lifetime with refresh endpoint (60-90 day renewal window)
Health check: /api/health/ now pings Postgres + Redis, returns 503 on failure
Audit logging: async audit_log table for auth events (login, register, delete, etc.)
Circuit breaker: APNs/FCM push sends wrapped with 5-failure threshold, 30s recovery
FK indexes: 27 missing foreign key indexes across all tables (migration 017)
CSP header: default-src 'none'; frame-ancestors 'none'
Gzip compression: level 5 with media endpoint skipper
Prometheus metrics: /metrics endpoint using existing monitoring service
External timeouts: 15s push, 30s SMTP, context timeouts on all external calls

Migrations: 016 (token created_at), 017 (FK indexes), 018 (audit_log)
Tests: circuit breaker (15), audit service (8), token refresh (7), health (4),
       middleware expiry (5), validator (new)
This commit is contained in:
Trey T
2026-03-26 14:05:28 -05:00
parent 4abc57535e
commit b679f28e55
30 changed files with 2077 additions and 47 deletions

View File

@@ -0,0 +1,167 @@
package push
import (
"errors"
"sync"
"time"
)
// Circuit breaker states
const (
stateClosed = iota // Normal operation, requests pass through
stateOpen // Too many failures, requests are rejected
stateHalfOpen // Testing recovery, one request allowed through
)
// Default circuit breaker settings
const (
defaultFailureThreshold = 5 // Open after this many consecutive failures
defaultRecoveryTimeout = 30 * time.Second // Try again after this duration
)
// ErrCircuitOpen is returned when the circuit breaker is open and rejecting requests.
var ErrCircuitOpen = errors.New("circuit breaker is open")
// CircuitBreaker implements a simple circuit breaker pattern for external service calls.
// It is thread-safe and requires no external dependencies.
//
// States:
// - Closed: normal operation, all requests pass through. Consecutive failures are counted.
// - Open: after reaching the failure threshold, all requests are immediately rejected
// with ErrCircuitOpen until the recovery timeout elapses.
// - Half-Open: after the recovery timeout, one request is allowed through. If it
// succeeds the breaker resets to Closed; if it fails it returns to Open.
type CircuitBreaker struct {
mu sync.Mutex
state int
failureCount int
failureThreshold int
recoveryTimeout time.Duration
lastFailureTime time.Time
name string // For logging
}
// CircuitBreakerOption configures a CircuitBreaker.
type CircuitBreakerOption func(*CircuitBreaker)
// WithFailureThreshold sets the number of consecutive failures before opening the circuit.
func WithFailureThreshold(n int) CircuitBreakerOption {
return func(cb *CircuitBreaker) {
if n > 0 {
cb.failureThreshold = n
}
}
}
// WithRecoveryTimeout sets how long the circuit stays open before trying half-open.
func WithRecoveryTimeout(d time.Duration) CircuitBreakerOption {
return func(cb *CircuitBreaker) {
if d > 0 {
cb.recoveryTimeout = d
}
}
}
// NewCircuitBreaker creates a new CircuitBreaker with the given name and options.
// The name is used for logging and identification.
func NewCircuitBreaker(name string, opts ...CircuitBreakerOption) *CircuitBreaker {
cb := &CircuitBreaker{
state: stateClosed,
failureThreshold: defaultFailureThreshold,
recoveryTimeout: defaultRecoveryTimeout,
name: name,
}
for _, opt := range opts {
opt(cb)
}
return cb
}
// Allow checks whether a request should be allowed through.
// It returns true if the request can proceed, false if the circuit is open.
// When transitioning from open to half-open, it returns true for the probe request.
func (cb *CircuitBreaker) Allow() bool {
cb.mu.Lock()
defer cb.mu.Unlock()
switch cb.state {
case stateClosed:
return true
case stateOpen:
// Check if recovery timeout has elapsed
if time.Since(cb.lastFailureTime) >= cb.recoveryTimeout {
cb.state = stateHalfOpen
return true
}
return false
case stateHalfOpen:
// Only one request at a time in half-open state.
// The first caller that got here via Allow() is already in flight;
// reject subsequent callers until that probe resolves.
return false
default:
return true
}
}
// RecordSuccess records a successful request. If the breaker is half-open, it resets to closed.
func (cb *CircuitBreaker) RecordSuccess() {
cb.mu.Lock()
defer cb.mu.Unlock()
cb.failureCount = 0
cb.state = stateClosed
}
// RecordFailure records a failed request. If the failure threshold is reached, the
// breaker transitions to the open state.
func (cb *CircuitBreaker) RecordFailure() {
cb.mu.Lock()
defer cb.mu.Unlock()
cb.failureCount++
cb.lastFailureTime = time.Now()
if cb.failureCount >= cb.failureThreshold {
cb.state = stateOpen
}
}
// State returns the current state of the circuit breaker as a human-readable string.
func (cb *CircuitBreaker) State() string {
cb.mu.Lock()
defer cb.mu.Unlock()
switch cb.state {
case stateClosed:
return "closed"
case stateOpen:
return "open"
case stateHalfOpen:
return "half-open"
default:
return "unknown"
}
}
// Name returns the circuit breaker's name.
func (cb *CircuitBreaker) Name() string {
return cb.name
}
// Reset resets the circuit breaker to the closed state with zero failures.
func (cb *CircuitBreaker) Reset() {
cb.mu.Lock()
defer cb.mu.Unlock()
cb.state = stateClosed
cb.failureCount = 0
cb.lastFailureTime = time.Time{}
}
// Counts returns the current failure count (useful for testing and monitoring).
func (cb *CircuitBreaker) Counts() int {
cb.mu.Lock()
defer cb.mu.Unlock()
return cb.failureCount
}