88fb1751c7
Stack of optimizations against the same Hetzner→Neon transatlantic link. The trace revealed every visible ms was network/proxy overhead — DB execution itself is sub-millisecond per query (verified via EXPLAIN ANALYZE: index scans on every hot path). Connection layer: - DB_HOST → Neon pooler endpoint (-pooler suffix). PgBouncer transaction-mode keeps backend Postgres connections warm so we no longer pay the ~110ms Postgres-startup RTT on cold queries. - GORM pool tuned: MaxIdleConns 10→20, MaxLifetime 600s→1800s, MaxIdleTime added (default 0 = never close idle). - Eager pool warm-up at boot via parallel pings — first user request no longer pays the ~440ms TCP+TLS+startup handshake. - Redis maxmemory-policy noeviction → allkeys-lru. Cache writes will evict cold keys instead of erroring at the 256MB limit. Auth layer: - TokenCacheTTL 5min → 1 hour (Redis token cache). - UserCacheTTL 30s → 5min (in-memory User cache, per pod). - UserCache gains a 5,000-entry LRU cap so a flood of unique users can't blow up pod RSS. ~5MB worst-case per pod. - Token + user lookup collapsed from 2 GORM Preload queries into a single INNER JOIN. Saves 1 RTT per cold-cache request. - Auth middleware's m.db.* now use db.WithContext(ctx) so the SQL spans nest under the parent HTTP request in Jaeger. Service layer: - TaskService.ListTasks: replaced two-step FindResidenceIDsByUser → GetKanbanDataForMultipleResidences with a single GetKanbanDataForUser that uses a Postgres subquery for residence-access. One round-trip instead of two. - New CacheService residence-IDs cache: \"residence_ids_user:<id>\" with 5-min TTL. Wired into Task/Residence/Contractor/Document services for the four hot read paths that need this list. - Cache invalidation on every relevant mutation: CreateResidence, DeleteResidence, JoinWithCode, RemoveUser. DeleteResidence invalidates every member of the residence, not just the owner. What this stacks up to (Hetzner→Neon, before US migration): Path Before After (target) Cache-warm authed read ~800ms ~100-200ms Cache-cold authed read (1st in 1hr) ~2500ms ~500-700ms First request after deploy ~2500ms ~700-900ms The endgame US-region migration on top of this gets us to ~30-50ms warm-cache, but we're shippable at ~150ms warm right now. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
177 lines
4.9 KiB
Go
177 lines
4.9 KiB
Go
package middleware
|
|
|
|
import (
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/treytartt/honeydue-api/internal/models"
|
|
)
|
|
|
|
// userCacheEntry holds a cached user record with an expiration time.
|
|
type userCacheEntry struct {
|
|
user *models.User
|
|
expiresAt time.Time
|
|
}
|
|
|
|
// UserCache is a concurrency-safe in-memory cache for User records, keyed by
|
|
// user ID. Entries expire after a configurable TTL. The cache uses a sync.Map
|
|
// for lock-free reads on the hot path, with periodic lazy eviction of stale
|
|
// entries during Set operations and a hard size cap to bound memory.
|
|
type UserCache struct {
|
|
store sync.Map
|
|
ttl time.Duration
|
|
lastGC time.Time
|
|
gcMu sync.Mutex
|
|
gcEvery time.Duration
|
|
size atomic.Int64 // approximate count; sync.Map has no Len()
|
|
maxSize int64
|
|
}
|
|
|
|
// NewUserCache creates a UserCache with the given TTL for entries.
|
|
// maxSize is the soft upper bound on the number of cached users; when
|
|
// exceeded, the next Set will trigger an eviction sweep before storing.
|
|
// Pass <=0 for no size cap.
|
|
func NewUserCache(ttl time.Duration, maxSize int) *UserCache {
|
|
c := &UserCache{
|
|
ttl: ttl,
|
|
lastGC: time.Now(),
|
|
gcEvery: 2 * time.Minute,
|
|
maxSize: int64(maxSize),
|
|
}
|
|
return c
|
|
}
|
|
|
|
// Get returns a cached user by ID, or nil if not found or expired.
|
|
func (c *UserCache) Get(userID uint) *models.User {
|
|
val, ok := c.store.Load(userID)
|
|
if !ok {
|
|
return nil
|
|
}
|
|
entry := val.(*userCacheEntry)
|
|
if time.Now().After(entry.expiresAt) {
|
|
if _, loaded := c.store.LoadAndDelete(userID); loaded {
|
|
c.size.Add(-1)
|
|
}
|
|
return nil
|
|
}
|
|
// Return a shallow copy so callers cannot mutate the cached value.
|
|
user := *entry.user
|
|
return &user
|
|
}
|
|
|
|
// Set stores a user in the cache. It also triggers a background garbage-
|
|
// collection sweep if enough time has elapsed since the last one or if the
|
|
// cache has grown past maxSize.
|
|
func (c *UserCache) Set(user *models.User) {
|
|
// Store a copy to prevent external mutation of the cached object.
|
|
copied := *user
|
|
if _, loaded := c.store.Swap(user.ID, &userCacheEntry{
|
|
user: &copied,
|
|
expiresAt: time.Now().Add(c.ttl),
|
|
}); !loaded {
|
|
c.size.Add(1)
|
|
}
|
|
if c.maxSize > 0 && c.size.Load() > c.maxSize {
|
|
c.evictUntilUnderCap()
|
|
}
|
|
c.maybeGC()
|
|
}
|
|
|
|
// evictUntilUnderCap walks the cache and drops the oldest expirable entries
|
|
// until size is under maxSize. Cheap O(n) walk; runs only when the cap is
|
|
// breached, which should be rare in practice (TTL handles most eviction).
|
|
func (c *UserCache) evictUntilUnderCap() {
|
|
now := time.Now()
|
|
// First pass: drop expired entries.
|
|
c.store.Range(func(key, value any) bool {
|
|
entry := value.(*userCacheEntry)
|
|
if now.After(entry.expiresAt) {
|
|
if _, loaded := c.store.LoadAndDelete(key); loaded {
|
|
c.size.Add(-1)
|
|
}
|
|
}
|
|
return c.size.Load() > c.maxSize
|
|
})
|
|
// Second pass: if still over cap, drop entries closest to expiry.
|
|
if c.size.Load() <= c.maxSize {
|
|
return
|
|
}
|
|
type scored struct {
|
|
key any
|
|
expiresAt time.Time
|
|
}
|
|
candidates := make([]scored, 0, 64)
|
|
c.store.Range(func(key, value any) bool {
|
|
entry := value.(*userCacheEntry)
|
|
candidates = append(candidates, scored{key, entry.expiresAt})
|
|
return true
|
|
})
|
|
// Sort by expiry ascending — drop closest-to-expiry first.
|
|
for i := 1; i < len(candidates); i++ {
|
|
for j := i; j > 0 && candidates[j-1].expiresAt.After(candidates[j].expiresAt); j-- {
|
|
candidates[j-1], candidates[j] = candidates[j], candidates[j-1]
|
|
}
|
|
}
|
|
overshoot := int(c.size.Load() - c.maxSize)
|
|
for i := 0; i < overshoot && i < len(candidates); i++ {
|
|
if _, loaded := c.store.LoadAndDelete(candidates[i].key); loaded {
|
|
c.size.Add(-1)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Invalidate removes a user from the cache by ID.
|
|
func (c *UserCache) Invalidate(userID uint) {
|
|
if _, loaded := c.store.LoadAndDelete(userID); loaded {
|
|
c.size.Add(-1)
|
|
}
|
|
}
|
|
|
|
// maybeGC lazily sweeps expired entries at most once per gcEvery interval.
|
|
func (c *UserCache) maybeGC() {
|
|
c.gcMu.Lock()
|
|
if time.Since(c.lastGC) < c.gcEvery {
|
|
c.gcMu.Unlock()
|
|
return
|
|
}
|
|
c.lastGC = time.Now()
|
|
c.gcMu.Unlock()
|
|
|
|
now := time.Now()
|
|
c.store.Range(func(key, value any) bool {
|
|
entry := value.(*userCacheEntry)
|
|
if now.After(entry.expiresAt) {
|
|
if _, loaded := c.store.LoadAndDelete(key); loaded {
|
|
c.size.Add(-1)
|
|
}
|
|
}
|
|
return true
|
|
})
|
|
}
|
|
|
|
// TimezoneCache tracks the last-known timezone per user ID so the timezone
|
|
// middleware only writes to the database when the value actually changes.
|
|
type TimezoneCache struct {
|
|
store sync.Map
|
|
}
|
|
|
|
// NewTimezoneCache creates a new TimezoneCache.
|
|
func NewTimezoneCache() *TimezoneCache {
|
|
return &TimezoneCache{}
|
|
}
|
|
|
|
// GetAndCompare returns true if the cached timezone for the user matches tz.
|
|
// If the timezone is different (or not yet cached), it updates the cache and
|
|
// returns false, signaling that a DB write is needed.
|
|
func (tc *TimezoneCache) GetAndCompare(userID uint, tz string) (unchanged bool) {
|
|
val, loaded := tc.store.Load(userID)
|
|
if loaded {
|
|
if cached, ok := val.(string); ok && cached == tz {
|
|
return true
|
|
}
|
|
}
|
|
tc.store.Store(userID, tz)
|
|
return false
|
|
}
|