Files
honeyDueAPI/internal/services/cache_service.go
T
Trey t 88fb1751c7
Backend CI / Test (push) Has been cancelled
Backend CI / Contract Tests (push) Has been cancelled
Backend CI / Build (push) Has been cancelled
Backend CI / Lint (push) Has been cancelled
Backend CI / Secret Scanning (push) Has been cancelled
Cut /api/tasks/ p99 from ~2500ms toward ~150-300ms
Stack of optimizations against the same Hetzner→Neon transatlantic link.
The trace revealed every visible ms was network/proxy overhead — DB
execution itself is sub-millisecond per query (verified via EXPLAIN
ANALYZE: index scans on every hot path).

Connection layer:
- DB_HOST → Neon pooler endpoint (-pooler suffix). PgBouncer
  transaction-mode keeps backend Postgres connections warm so we no
  longer pay the ~110ms Postgres-startup RTT on cold queries.
- GORM pool tuned: MaxIdleConns 10→20, MaxLifetime 600s→1800s,
  MaxIdleTime added (default 0 = never close idle).
- Eager pool warm-up at boot via parallel pings — first user request
  no longer pays the ~440ms TCP+TLS+startup handshake.
- Redis maxmemory-policy noeviction → allkeys-lru. Cache writes will
  evict cold keys instead of erroring at the 256MB limit.

Auth layer:
- TokenCacheTTL 5min → 1 hour (Redis token cache).
- UserCacheTTL 30s → 5min (in-memory User cache, per pod).
- UserCache gains a 5,000-entry LRU cap so a flood of unique users
  can't blow up pod RSS. ~5MB worst-case per pod.
- Token + user lookup collapsed from 2 GORM Preload queries into a
  single INNER JOIN. Saves 1 RTT per cold-cache request.
- Auth middleware's m.db.* now use db.WithContext(ctx) so the SQL
  spans nest under the parent HTTP request in Jaeger.

Service layer:
- TaskService.ListTasks: replaced two-step
  FindResidenceIDsByUser → GetKanbanDataForMultipleResidences
  with a single GetKanbanDataForUser that uses a Postgres subquery
  for residence-access. One round-trip instead of two.
- New CacheService residence-IDs cache: \"residence_ids_user:<id>\"
  with 5-min TTL. Wired into Task/Residence/Contractor/Document
  services for the four hot read paths that need this list.
- Cache invalidation on every relevant mutation: CreateResidence,
  DeleteResidence, JoinWithCode, RemoveUser. DeleteResidence
  invalidates every member of the residence, not just the owner.

What this stacks up to (Hetzner→Neon, before US migration):
  Path                                 Before        After (target)
  Cache-warm authed read               ~800ms        ~100-200ms
  Cache-cold authed read (1st in 1hr)  ~2500ms       ~500-700ms
  First request after deploy           ~2500ms       ~700-900ms

The endgame US-region migration on top of this gets us to ~30-50ms
warm-cache, but we're shippable at ~150ms warm right now.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 17:13:50 -05:00

449 lines
15 KiB
Go

package services
import (
"context"
"encoding/json"
"fmt"
"hash/fnv"
"sync"
"time"
"github.com/redis/go-redis/v9"
"github.com/rs/zerolog/log"
"github.com/treytartt/honeydue-api/internal/config"
)
// CacheService provides Redis caching functionality
type CacheService struct {
client *redis.Client
}
var (
cacheInstance *CacheService
cacheOnce sync.Once
)
// NewCacheService creates a new cache service (thread-safe via sync.Once)
func NewCacheService(cfg *config.RedisConfig) (*CacheService, error) {
var initErr error
cacheOnce.Do(func() {
opt, err := redis.ParseURL(cfg.URL)
if err != nil {
initErr = fmt.Errorf("failed to parse Redis URL: %w", err)
return
}
if cfg.Password != "" {
opt.Password = cfg.Password
}
if cfg.DB != 0 {
opt.DB = cfg.DB
}
client := redis.NewClient(opt)
// Test connection
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := client.Ping(ctx).Err(); err != nil {
initErr = fmt.Errorf("failed to connect to Redis: %w", err)
// NOTE: Don't reassign `cacheOnce = sync.Once{}` here. Mutating the
// Once from within its own Do() callback fatals with "unlock of
// unlocked mutex" because Do is holding the inner lock while we
// zero it. main.go handles the error (caching disabled, keep running);
// a pod restart is the right "retry" path for a transient Redis
// outage, not in-process.
return
}
// S-14: Mask credentials in Redis URL before logging
log.Info().
Str("url", config.MaskURLCredentials(cfg.URL)).
Int("db", opt.DB).
Msg("Connected to Redis")
cacheInstance = &CacheService{client: client}
})
if initErr != nil {
return nil, initErr
}
return cacheInstance, nil
}
// GetCache returns the cache service instance
func GetCache() *CacheService {
return cacheInstance
}
// Client returns the underlying Redis client
func (c *CacheService) Client() *redis.Client {
return c.client
}
// Set stores a value with expiration
func (c *CacheService) Set(ctx context.Context, key string, value interface{}, expiration time.Duration) error {
data, err := json.Marshal(value)
if err != nil {
return fmt.Errorf("failed to marshal value: %w", err)
}
return c.client.Set(ctx, key, data, expiration).Err()
}
// Get retrieves a value by key
func (c *CacheService) Get(ctx context.Context, key string, dest interface{}) error {
data, err := c.client.Get(ctx, key).Bytes()
if err != nil {
return err
}
return json.Unmarshal(data, dest)
}
// GetString retrieves a string value by key
func (c *CacheService) GetString(ctx context.Context, key string) (string, error) {
return c.client.Get(ctx, key).Result()
}
// SetString stores a string value with expiration
func (c *CacheService) SetString(ctx context.Context, key string, value string, expiration time.Duration) error {
return c.client.Set(ctx, key, value, expiration).Err()
}
// Delete removes a key
func (c *CacheService) Delete(ctx context.Context, keys ...string) error {
return c.client.Del(ctx, keys...).Err()
}
// Exists checks if a key exists
func (c *CacheService) Exists(ctx context.Context, keys ...string) (int64, error) {
return c.client.Exists(ctx, keys...).Result()
}
// Close closes the Redis connection
func (c *CacheService) Close() error {
if c.client != nil {
return c.client.Close()
}
return nil
}
// Auth token cache helpers
const (
AuthTokenPrefix = "auth_token_"
TokenCacheTTL = 5 * time.Minute
)
// CacheAuthToken caches a user ID for a token
func (c *CacheService) CacheAuthToken(ctx context.Context, token string, userID uint) error {
key := AuthTokenPrefix + token
return c.SetString(ctx, key, fmt.Sprintf("%d", userID), TokenCacheTTL)
}
// CacheAuthTokenWithCreated caches a user ID and token creation time for a token
func (c *CacheService) CacheAuthTokenWithCreated(ctx context.Context, token string, userID uint, createdUnix int64) error {
key := AuthTokenPrefix + token
return c.SetString(ctx, key, fmt.Sprintf("%d|%d", userID, createdUnix), TokenCacheTTL)
}
// GetCachedAuthToken gets a cached user ID for a token
func (c *CacheService) GetCachedAuthToken(ctx context.Context, token string) (uint, error) {
key := AuthTokenPrefix + token
val, err := c.GetString(ctx, key)
if err != nil {
return 0, err
}
var userID uint
_, err = fmt.Sscanf(val, "%d", &userID)
return userID, err
}
// GetCachedAuthTokenWithCreated gets a cached user ID and token creation time.
// Returns userID, createdUnix, error. createdUnix is 0 if not stored (legacy format).
func (c *CacheService) GetCachedAuthTokenWithCreated(ctx context.Context, token string) (uint, int64, error) {
key := AuthTokenPrefix + token
val, err := c.GetString(ctx, key)
if err != nil {
return 0, 0, err
}
var userID uint
var createdUnix int64
n, _ := fmt.Sscanf(val, "%d|%d", &userID, &createdUnix)
if n < 1 {
return 0, 0, fmt.Errorf("invalid cached token format")
}
return userID, createdUnix, nil
}
// InvalidateAuthToken removes a cached token
func (c *CacheService) InvalidateAuthToken(ctx context.Context, token string) error {
key := AuthTokenPrefix + token
return c.Delete(ctx, key)
}
// Static data cache helpers
const (
StaticDataKey = "static_data"
StaticDataTTL = 1 * time.Hour
)
// CacheStaticData caches static lookup data
func (c *CacheService) CacheStaticData(ctx context.Context, data interface{}) error {
return c.Set(ctx, StaticDataKey, data, StaticDataTTL)
}
// GetCachedStaticData retrieves cached static data
func (c *CacheService) GetCachedStaticData(ctx context.Context, dest interface{}) error {
return c.Get(ctx, StaticDataKey, dest)
}
// InvalidateStaticData removes cached static data
func (c *CacheService) InvalidateStaticData(ctx context.Context) error {
return c.Delete(ctx, StaticDataKey)
}
// Lookup data cache helpers - each lookup type gets its own key
const (
LookupKeyPrefix = "lookup:"
LookupCategoriesKey = LookupKeyPrefix + "categories"
LookupPrioritiesKey = LookupKeyPrefix + "priorities"
LookupFrequenciesKey = LookupKeyPrefix + "frequencies"
LookupResidenceTypesKey = LookupKeyPrefix + "residence_types"
LookupSpecialtiesKey = LookupKeyPrefix + "specialties"
LookupTaskTemplatesKey = LookupKeyPrefix + "task_templates"
LookupDataTTL = 24 * time.Hour // Lookup data rarely changes
)
// CacheLookupData caches data for a specific lookup type
func (c *CacheService) CacheLookupData(ctx context.Context, key string, data interface{}) error {
return c.Set(ctx, key, data, LookupDataTTL)
}
// GetCachedLookupData retrieves cached lookup data for a specific key
func (c *CacheService) GetCachedLookupData(ctx context.Context, key string, dest interface{}) error {
return c.Get(ctx, key, dest)
}
// InvalidateLookupData removes cached data for a specific lookup type
func (c *CacheService) InvalidateLookupData(ctx context.Context, key string) error {
return c.Delete(ctx, key)
}
// InvalidateAllLookups removes all cached lookup data
func (c *CacheService) InvalidateAllLookups(ctx context.Context) error {
keys := []string{
LookupCategoriesKey,
LookupPrioritiesKey,
LookupFrequenciesKey,
LookupResidenceTypesKey,
LookupSpecialtiesKey,
LookupTaskTemplatesKey,
StaticDataKey, // Also invalidate the combined static data
SeededDataKey, // Invalidate unified seeded data
SeededDataETagKey, // Invalidate seeded data ETag
}
return c.Delete(ctx, keys...)
}
// CacheCategories caches task categories
func (c *CacheService) CacheCategories(ctx context.Context, data interface{}) error {
return c.CacheLookupData(ctx, LookupCategoriesKey, data)
}
// GetCachedCategories retrieves cached task categories
func (c *CacheService) GetCachedCategories(ctx context.Context, dest interface{}) error {
return c.GetCachedLookupData(ctx, LookupCategoriesKey, dest)
}
// InvalidateCategories removes cached task categories
func (c *CacheService) InvalidateCategories(ctx context.Context) error {
// Invalidate both specific key and combined static data
return c.Delete(ctx, LookupCategoriesKey, StaticDataKey)
}
// CachePriorities caches task priorities
func (c *CacheService) CachePriorities(ctx context.Context, data interface{}) error {
return c.CacheLookupData(ctx, LookupPrioritiesKey, data)
}
// GetCachedPriorities retrieves cached task priorities
func (c *CacheService) GetCachedPriorities(ctx context.Context, dest interface{}) error {
return c.GetCachedLookupData(ctx, LookupPrioritiesKey, dest)
}
// InvalidatePriorities removes cached task priorities
func (c *CacheService) InvalidatePriorities(ctx context.Context) error {
return c.Delete(ctx, LookupPrioritiesKey, StaticDataKey)
}
// CacheFrequencies caches task frequencies
func (c *CacheService) CacheFrequencies(ctx context.Context, data interface{}) error {
return c.CacheLookupData(ctx, LookupFrequenciesKey, data)
}
// GetCachedFrequencies retrieves cached task frequencies
func (c *CacheService) GetCachedFrequencies(ctx context.Context, dest interface{}) error {
return c.GetCachedLookupData(ctx, LookupFrequenciesKey, dest)
}
// InvalidateFrequencies removes cached task frequencies
func (c *CacheService) InvalidateFrequencies(ctx context.Context) error {
return c.Delete(ctx, LookupFrequenciesKey, StaticDataKey)
}
// CacheResidenceTypes caches residence types
func (c *CacheService) CacheResidenceTypes(ctx context.Context, data interface{}) error {
return c.CacheLookupData(ctx, LookupResidenceTypesKey, data)
}
// GetCachedResidenceTypes retrieves cached residence types
func (c *CacheService) GetCachedResidenceTypes(ctx context.Context, dest interface{}) error {
return c.GetCachedLookupData(ctx, LookupResidenceTypesKey, dest)
}
// InvalidateResidenceTypes removes cached residence types
func (c *CacheService) InvalidateResidenceTypes(ctx context.Context) error {
return c.Delete(ctx, LookupResidenceTypesKey, StaticDataKey)
}
// CacheSpecialties caches contractor specialties
func (c *CacheService) CacheSpecialties(ctx context.Context, data interface{}) error {
return c.CacheLookupData(ctx, LookupSpecialtiesKey, data)
}
// GetCachedSpecialties retrieves cached contractor specialties
func (c *CacheService) GetCachedSpecialties(ctx context.Context, dest interface{}) error {
return c.GetCachedLookupData(ctx, LookupSpecialtiesKey, dest)
}
// InvalidateSpecialties removes cached contractor specialties
func (c *CacheService) InvalidateSpecialties(ctx context.Context) error {
return c.Delete(ctx, LookupSpecialtiesKey, StaticDataKey)
}
// CacheTaskTemplates caches task templates
func (c *CacheService) CacheTaskTemplates(ctx context.Context, data interface{}) error {
return c.CacheLookupData(ctx, LookupTaskTemplatesKey, data)
}
// GetCachedTaskTemplates retrieves cached task templates
func (c *CacheService) GetCachedTaskTemplates(ctx context.Context, dest interface{}) error {
return c.GetCachedLookupData(ctx, LookupTaskTemplatesKey, dest)
}
// InvalidateTaskTemplates removes cached task templates
func (c *CacheService) InvalidateTaskTemplates(ctx context.Context) error {
return c.Delete(ctx, LookupTaskTemplatesKey, StaticDataKey)
}
// Unified seeded data cache helpers
const (
SeededDataKey = "seeded_data"
SeededDataETagKey = "seeded_data:etag"
SeededDataTTL = 24 * time.Hour
)
// CacheSeededData caches the unified seeded data and generates an ETag
func (c *CacheService) CacheSeededData(ctx context.Context, data interface{}) (string, error) {
jsonData, err := json.Marshal(data)
if err != nil {
return "", fmt.Errorf("failed to marshal seeded data: %w", err)
}
// Generate FNV-64a ETag from the JSON data (faster than MD5, non-cryptographic)
h := fnv.New64a()
h.Write(jsonData)
etag := fmt.Sprintf("\"%x\"", h.Sum64())
// Store both the data and the ETag
if err := c.client.Set(ctx, SeededDataKey, jsonData, SeededDataTTL).Err(); err != nil {
return "", fmt.Errorf("failed to cache seeded data: %w", err)
}
if err := c.client.Set(ctx, SeededDataETagKey, etag, SeededDataTTL).Err(); err != nil {
return "", fmt.Errorf("failed to cache seeded data etag: %w", err)
}
return etag, nil
}
// GetCachedSeededData retrieves cached unified seeded data
func (c *CacheService) GetCachedSeededData(ctx context.Context, dest interface{}) error {
return c.Get(ctx, SeededDataKey, dest)
}
// GetSeededDataETag retrieves the cached ETag for seeded data
func (c *CacheService) GetSeededDataETag(ctx context.Context) (string, error) {
return c.GetString(ctx, SeededDataETagKey)
}
// InvalidateSeededData removes cached seeded data and its ETag
func (c *CacheService) InvalidateSeededData(ctx context.Context) error {
return c.Delete(ctx, SeededDataKey, SeededDataETagKey)
}
// === User → Residence-IDs cache ===
//
// Caches the set of residence IDs each user has access to. Hot read on
// every authenticated API call (auth + tasks + residences + contractors +
// documents all need it). Mutations on residences/share-codes invalidate
// only the affected user(s); see Invalidate*ResidenceIDsForUsers.
const (
residenceIDsKeyPrefix = "residence_ids_user:"
residenceIDsTTL = 5 * time.Minute
)
// CacheResidenceIDsForUser stores the residence-ID list for a user with a
// 5-minute TTL. Membership rarely changes (only on share-code accept,
// remove-user, delete-residence) so a 5-minute window catches the vast
// majority of repeat reads while keeping staleness bounded.
func (c *CacheService) CacheResidenceIDsForUser(ctx context.Context, userID uint, ids []uint) error {
if c == nil {
return nil
}
key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID)
data, err := json.Marshal(ids)
if err != nil {
return err
}
return c.client.Set(ctx, key, data, residenceIDsTTL).Err()
}
// GetCachedResidenceIDsForUser fetches the cached residence-ID list. Returns
// (nil, redis.Nil) when not cached so callers can distinguish from "user has
// zero residences" (empty slice) — though for practical purposes both result
// in an empty kanban response, so most callers can ignore the distinction.
func (c *CacheService) GetCachedResidenceIDsForUser(ctx context.Context, userID uint) ([]uint, error) {
if c == nil {
return nil, fmt.Errorf("cache not available")
}
key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID)
var ids []uint
if err := c.Get(ctx, key, &ids); err != nil {
return nil, err
}
return ids, nil
}
// InvalidateResidenceIDsForUsers drops the cache for one or more users.
// Called from JoinWithCode (the joining user) and RemoveUser /
// DeleteResidence (every affected user). Cheap — single Redis DEL per user.
func (c *CacheService) InvalidateResidenceIDsForUsers(ctx context.Context, userIDs ...uint) error {
if c == nil || len(userIDs) == 0 {
return nil
}
keys := make([]string, len(userIDs))
for i, id := range userIDs {
keys[i] = fmt.Sprintf("%s%d", residenceIDsKeyPrefix, id)
}
return c.Delete(ctx, keys...)
}