From 88fb1751c72d09e97ea30f4ffd065e5b70ee4309 Mon Sep 17 00:00:00 2001 From: Trey t Date: Sat, 25 Apr 2026 17:13:50 -0500 Subject: [PATCH] Cut /api/tasks/ p99 from ~2500ms toward ~150-300ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stack of optimizations against the same Hetzner→Neon transatlantic link. The trace revealed every visible ms was network/proxy overhead — DB execution itself is sub-millisecond per query (verified via EXPLAIN ANALYZE: index scans on every hot path). Connection layer: - DB_HOST → Neon pooler endpoint (-pooler suffix). PgBouncer transaction-mode keeps backend Postgres connections warm so we no longer pay the ~110ms Postgres-startup RTT on cold queries. - GORM pool tuned: MaxIdleConns 10→20, MaxLifetime 600s→1800s, MaxIdleTime added (default 0 = never close idle). - Eager pool warm-up at boot via parallel pings — first user request no longer pays the ~440ms TCP+TLS+startup handshake. - Redis maxmemory-policy noeviction → allkeys-lru. Cache writes will evict cold keys instead of erroring at the 256MB limit. Auth layer: - TokenCacheTTL 5min → 1 hour (Redis token cache). - UserCacheTTL 30s → 5min (in-memory User cache, per pod). - UserCache gains a 5,000-entry LRU cap so a flood of unique users can't blow up pod RSS. ~5MB worst-case per pod. - Token + user lookup collapsed from 2 GORM Preload queries into a single INNER JOIN. Saves 1 RTT per cold-cache request. - Auth middleware's m.db.* now use db.WithContext(ctx) so the SQL spans nest under the parent HTTP request in Jaeger. Service layer: - TaskService.ListTasks: replaced two-step FindResidenceIDsByUser → GetKanbanDataForMultipleResidences with a single GetKanbanDataForUser that uses a Postgres subquery for residence-access. One round-trip instead of two. - New CacheService residence-IDs cache: \"residence_ids_user:\" with 5-min TTL. Wired into Task/Residence/Contractor/Document services for the four hot read paths that need this list. - Cache invalidation on every relevant mutation: CreateResidence, DeleteResidence, JoinWithCode, RemoveUser. DeleteResidence invalidates every member of the residence, not just the owner. What this stacks up to (Hetzner→Neon, before US migration): Path Before After (target) Cache-warm authed read ~800ms ~100-200ms Cache-cold authed read (1st in 1hr) ~2500ms ~500-700ms First request after deploy ~2500ms ~700-900ms The endgame US-region migration on top of this gets us to ~30-50ms warm-cache, but we're shippable at ~150ms warm right now. Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy-k3s/manifests/redis/deployment.yaml | 6 +- deploy-k3s/scripts/_config.sh | 1 + internal/config/config.go | 2 + internal/database/database.go | 45 +++++++++- internal/middleware/auth.go | 98 +++++++++++++++++++--- internal/middleware/user_cache.go | 91 ++++++++++++++++---- internal/middleware/user_cache_test.go | 16 ++-- internal/repositories/task_repo.go | 48 +++++++++++ internal/router/router.go | 9 ++ internal/services/cache_service.go | 58 +++++++++++++ internal/services/contractor_service.go | 8 +- internal/services/document_service.go | 10 ++- internal/services/residence_id_cache.go | 41 +++++++++ internal/services/residence_service.go | 39 ++++++++- internal/services/task_service.go | 30 +++---- 15 files changed, 443 insertions(+), 59 deletions(-) create mode 100644 internal/services/residence_id_cache.go diff --git a/deploy-k3s/manifests/redis/deployment.yaml b/deploy-k3s/manifests/redis/deployment.yaml index 7660c2a..0828961 100644 --- a/deploy-k3s/manifests/redis/deployment.yaml +++ b/deploy-k3s/manifests/redis/deployment.yaml @@ -36,7 +36,11 @@ spec: - sh - -c - | - ARGS="--appendonly yes --appendfsync everysec --maxmemory 256mb --maxmemory-policy noeviction" + # allkeys-lru: under memory pressure, evict the least-recently-used key. + # honeyDue uses Redis as a cache + asynq queue. The cache layer falls + # through to DB on miss, so eviction is graceful. asynq keys with TTLs + # would be evicted only after older cache entries are gone. + ARGS="--appendonly yes --appendfsync everysec --maxmemory 256mb --maxmemory-policy allkeys-lru" if [ -n "$REDIS_PASSWORD" ]; then ARGS="$ARGS --requirepass $REDIS_PASSWORD" fi diff --git a/deploy-k3s/scripts/_config.sh b/deploy-k3s/scripts/_config.sh index 4d125d4..ba0ac4c 100755 --- a/deploy-k3s/scripts/_config.sh +++ b/deploy-k3s/scripts/_config.sh @@ -118,6 +118,7 @@ lines = [ f\"DB_MAX_OPEN_CONNS={db['max_open_conns']}\", f\"DB_MAX_IDLE_CONNS={db['max_idle_conns']}\", f\"DB_MAX_LIFETIME={db['max_lifetime']}\", + f\"DB_MAX_IDLE_TIME={db.get('max_idle_time', '0s')}\", # Redis (in-namespace DNS short form — password injected if configured; # short form works because /etc/resolv.conf in pods searches honeydue.svc.cluster.local) f\"REDIS_URL=redis://{':%s@' % val(rd.get('password')) if rd.get('password') else ''}redis:6379/0\", diff --git a/internal/config/config.go b/internal/config/config.go index dcd7937..0c792dd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -52,6 +52,7 @@ type DatabaseConfig struct { MaxOpenConns int MaxIdleConns int MaxLifetime time.Duration + MaxIdleTime time.Duration } type RedisConfig struct { @@ -214,6 +215,7 @@ func Load() (*Config, error) { MaxOpenConns: viper.GetInt("DB_MAX_OPEN_CONNS"), MaxIdleConns: viper.GetInt("DB_MAX_IDLE_CONNS"), MaxLifetime: viper.GetDuration("DB_MAX_LIFETIME"), + MaxIdleTime: viper.GetDuration("DB_MAX_IDLE_TIME"), } // Override with DATABASE_URL if present (F-16: log warning on parse failure) diff --git a/internal/database/database.go b/internal/database/database.go index 732b3d0..f7e134e 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -71,16 +71,30 @@ func Connect(cfg *config.DatabaseConfig, debug bool) (*gorm.DB, error) { return nil, fmt.Errorf("failed to get underlying sql.DB: %w", err) } - // Configure connection pool + // Configure connection pool. The Neon pooler endpoint keeps backend + // connections warm, so we keep our client-side pool warm too — that + // eliminates the ~440ms TCP+TLS+startup handshake on the first query + // after a cold pod / idle period. sqlDB.SetMaxOpenConns(cfg.MaxOpenConns) sqlDB.SetMaxIdleConns(cfg.MaxIdleConns) sqlDB.SetConnMaxLifetime(cfg.MaxLifetime) + if cfg.MaxIdleTime > 0 { + sqlDB.SetConnMaxIdleTime(cfg.MaxIdleTime) + } + // MaxIdleTime=0 means "never close idle" — the pool fills up to + // MaxIdleConns and they stay alive until MaxLifetime expires. // Test connection if err := sqlDB.Ping(); err != nil { return nil, fmt.Errorf("failed to ping database: %w", err) } + // Eagerly warm the connection pool to MaxIdleConns. Without this, the + // first N user requests each pay the full handshake (~440ms over a + // transatlantic link). Pings are issued in parallel so warm-up is + // bounded by handshake time, not handshake-time × N. + warmUpPool(sqlDB, cfg.MaxIdleConns) + log.Info(). Str("host", cfg.Host). Int("port", cfg.Port). @@ -106,6 +120,35 @@ func Connect(cfg *config.DatabaseConfig, debug bool) (*gorm.DB, error) { return db, nil } +// warmUpPool issues N parallel pings so the pool fills with established +// connections before the first user request lands. Failures are logged but +// not fatal — the pool will fill on demand under traffic if pre-warm fails. +// +// On a transatlantic link to Neon (~110ms RTT, ~440ms cold handshake), this +// turns "first request pays the cold handshake" into "first request finds a +// warm pool" — at the cost of ~440ms during pod startup. +func warmUpPool(sqlDB interface { + PingContext(context.Context) error +}, n int) { + if n <= 0 { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + done := make(chan error, n) + for i := 0; i < n; i++ { + go func() { done <- sqlDB.PingContext(ctx) }() + } + successes := 0 + for i := 0; i < n; i++ { + if err := <-done; err == nil { + successes++ + } + } + log.Info().Int("requested", n).Int("warmed", successes).Msg("DB pool warm-up complete") +} + // Get returns the database instance func Get() *gorm.DB { return db diff --git a/internal/middleware/auth.go b/internal/middleware/auth.go index 3cd361a..725e976 100644 --- a/internal/middleware/auth.go +++ b/internal/middleware/auth.go @@ -22,13 +22,22 @@ const ( AuthUserKey = "auth_user" // AuthTokenKey is the key used to store the token in the context AuthTokenKey = "auth_token" - // TokenCacheTTL is the duration to cache tokens in Redis - TokenCacheTTL = 5 * time.Minute + // TokenCacheTTL is the duration to cache tokens in Redis. Tokens are + // valid for DefaultTokenExpiryDays (90), and explicit logout invalidates + // the cache, so a long TTL here just means most authed requests skip the + // auth-token SQL query entirely. + TokenCacheTTL = 1 * time.Hour // TokenCachePrefix is the prefix for token cache keys TokenCachePrefix = "auth_token_" // UserCacheTTL is how long full user records are cached in memory to - // avoid hitting the database on every authenticated request. - UserCacheTTL = 30 * time.Second + // avoid hitting the database on every authenticated request. Bumped from + // 30s — at 30s the trace showed a SELECT auth_user query on most warm + // requests because users aren't in cache long enough to hit twice. + UserCacheTTL = 5 * time.Minute + // UserCacheMaxSize bounds the per-pod in-memory user cache. With ~1KB + // per User struct, 5000 entries = ~5MB per pod. Older entries are + // evicted LRU before the limit is exceeded. + UserCacheMaxSize = 5000 // DefaultTokenExpiryDays is the default number of days before a token expires. DefaultTokenExpiryDays = 90 @@ -47,7 +56,7 @@ func NewAuthMiddleware(db *gorm.DB, cache *services.CacheService) *AuthMiddlewar return &AuthMiddleware{ db: db, cache: cache, - userCache: NewUserCache(UserCacheTTL), + userCache: NewUserCache(UserCacheTTL, UserCacheMaxSize), tokenExpiryDays: DefaultTokenExpiryDays, } } @@ -61,7 +70,7 @@ func NewAuthMiddlewareWithConfig(db *gorm.DB, cache *services.CacheService, cfg return &AuthMiddleware{ db: db, cache: cache, - userCache: NewUserCache(UserCacheTTL), + userCache: NewUserCache(UserCacheTTL, UserCacheMaxSize), tokenExpiryDays: expiryDays, } } @@ -244,20 +253,83 @@ func (m *AuthMiddleware) getUserFromCache(ctx context.Context, token string) (*m // getUserFromDatabaseWithToken looks up the token in the database and returns // both the user and the auth token record (for expiry checking). The ctx is // threaded into the GORM session so the SQL span attaches to the request trace. +// +// Uses a single JOIN query instead of GORM's Preload (which issues 2 SELECTs). +// Over a transatlantic link this saves ~110ms RTT per cache miss. func (m *AuthMiddleware) getUserFromDatabaseWithToken(ctx context.Context, token string) (*models.User, *models.AuthToken, error) { - var authToken models.AuthToken - if err := m.db.WithContext(ctx).Preload("User").Where("key = ?", token).First(&authToken).Error; err != nil { + // Flat result row: every column from auth_user prefixed `u_`, every + // column from user_authtoken left in its native shape. Mapping to two + // structs is mechanical so we don't need a struct tag soup. + type joinedRow struct { + // AuthToken columns + Key string `gorm:"column:key"` + Created time.Time `gorm:"column:created"` + UserID uint `gorm:"column:user_id"` + // User columns (prefixed to avoid collision with UserID) + UID uint `gorm:"column:u_id"` + UUsername string `gorm:"column:u_username"` + UEmail string `gorm:"column:u_email"` + UFirstName string `gorm:"column:u_first_name"` + ULastName string `gorm:"column:u_last_name"` + UPassword string `gorm:"column:u_password"` + UIsActive bool `gorm:"column:u_is_active"` + UIsStaff bool `gorm:"column:u_is_staff"` + UIsSuper bool `gorm:"column:u_is_superuser"` + UDateJoined time.Time `gorm:"column:u_date_joined"` + ULastLogin *time.Time `gorm:"column:u_last_login"` + } + + var row joinedRow + err := m.db.WithContext(ctx). + Table("user_authtoken AS t"). + Select(` + t.key, t.created, t.user_id, + u.id AS u_id, + u.username AS u_username, + u.email AS u_email, + u.first_name AS u_first_name, + u.last_name AS u_last_name, + u.password AS u_password, + u.is_active AS u_is_active, + u.is_staff AS u_is_staff, + u.is_superuser AS u_is_superuser, + u.date_joined AS u_date_joined, + u.last_login AS u_last_login + `). + Joins("INNER JOIN auth_user u ON u.id = t.user_id"). + Where("t.key = ?", token). + Limit(1). + Scan(&row).Error + if err != nil || row.Key == "" { return nil, nil, fmt.Errorf("token not found") } - // Check if user is active - if !authToken.User.IsActive { + user := models.User{ + ID: row.UID, + Username: row.UUsername, + Email: row.UEmail, + FirstName: row.UFirstName, + LastName: row.ULastName, + Password: row.UPassword, + IsActive: row.UIsActive, + IsStaff: row.UIsStaff, + IsSuperuser: row.UIsSuper, + DateJoined: row.UDateJoined, + LastLogin: row.ULastLogin, + } + authToken := models.AuthToken{ + Key: row.Key, + Created: row.Created, + UserID: row.UserID, + User: user, + } + + if !user.IsActive { return nil, nil, fmt.Errorf("user is inactive") } - // Store in in-memory cache for subsequent requests - m.userCache.Set(&authToken.User) - return &authToken.User, &authToken, nil + m.userCache.Set(&user) + return &user, &authToken, nil } // getUserFromDatabase looks up the token in the database and caches the diff --git a/internal/middleware/user_cache.go b/internal/middleware/user_cache.go index c442a4a..3a8f0b1 100644 --- a/internal/middleware/user_cache.go +++ b/internal/middleware/user_cache.go @@ -2,6 +2,7 @@ package middleware import ( "sync" + "sync/atomic" "time" "github.com/treytartt/honeydue-api/internal/models" @@ -16,22 +17,29 @@ type userCacheEntry struct { // UserCache is a concurrency-safe in-memory cache for User records, keyed by // user ID. Entries expire after a configurable TTL. The cache uses a sync.Map // for lock-free reads on the hot path, with periodic lazy eviction of stale -// entries during Set operations. +// entries during Set operations and a hard size cap to bound memory. type UserCache struct { - store sync.Map - ttl time.Duration - lastGC time.Time - gcMu sync.Mutex - gcEvery time.Duration + store sync.Map + ttl time.Duration + lastGC time.Time + gcMu sync.Mutex + gcEvery time.Duration + size atomic.Int64 // approximate count; sync.Map has no Len() + maxSize int64 } // NewUserCache creates a UserCache with the given TTL for entries. -func NewUserCache(ttl time.Duration) *UserCache { - return &UserCache{ +// maxSize is the soft upper bound on the number of cached users; when +// exceeded, the next Set will trigger an eviction sweep before storing. +// Pass <=0 for no size cap. +func NewUserCache(ttl time.Duration, maxSize int) *UserCache { + c := &UserCache{ ttl: ttl, lastGC: time.Now(), gcEvery: 2 * time.Minute, + maxSize: int64(maxSize), } + return c } // Get returns a cached user by ID, or nil if not found or expired. @@ -42,7 +50,9 @@ func (c *UserCache) Get(userID uint) *models.User { } entry := val.(*userCacheEntry) if time.Now().After(entry.expiresAt) { - c.store.Delete(userID) + if _, loaded := c.store.LoadAndDelete(userID); loaded { + c.size.Add(-1) + } return nil } // Return a shallow copy so callers cannot mutate the cached value. @@ -51,20 +61,71 @@ func (c *UserCache) Get(userID uint) *models.User { } // Set stores a user in the cache. It also triggers a background garbage- -// collection sweep if enough time has elapsed since the last one. +// collection sweep if enough time has elapsed since the last one or if the +// cache has grown past maxSize. func (c *UserCache) Set(user *models.User) { // Store a copy to prevent external mutation of the cached object. copied := *user - c.store.Store(user.ID, &userCacheEntry{ + if _, loaded := c.store.Swap(user.ID, &userCacheEntry{ user: &copied, expiresAt: time.Now().Add(c.ttl), - }) + }); !loaded { + c.size.Add(1) + } + if c.maxSize > 0 && c.size.Load() > c.maxSize { + c.evictUntilUnderCap() + } c.maybeGC() } +// evictUntilUnderCap walks the cache and drops the oldest expirable entries +// until size is under maxSize. Cheap O(n) walk; runs only when the cap is +// breached, which should be rare in practice (TTL handles most eviction). +func (c *UserCache) evictUntilUnderCap() { + now := time.Now() + // First pass: drop expired entries. + c.store.Range(func(key, value any) bool { + entry := value.(*userCacheEntry) + if now.After(entry.expiresAt) { + if _, loaded := c.store.LoadAndDelete(key); loaded { + c.size.Add(-1) + } + } + return c.size.Load() > c.maxSize + }) + // Second pass: if still over cap, drop entries closest to expiry. + if c.size.Load() <= c.maxSize { + return + } + type scored struct { + key any + expiresAt time.Time + } + candidates := make([]scored, 0, 64) + c.store.Range(func(key, value any) bool { + entry := value.(*userCacheEntry) + candidates = append(candidates, scored{key, entry.expiresAt}) + return true + }) + // Sort by expiry ascending — drop closest-to-expiry first. + for i := 1; i < len(candidates); i++ { + for j := i; j > 0 && candidates[j-1].expiresAt.After(candidates[j].expiresAt); j-- { + candidates[j-1], candidates[j] = candidates[j], candidates[j-1] + } + } + overshoot := int(c.size.Load() - c.maxSize) + for i := 0; i < overshoot && i < len(candidates); i++ { + if _, loaded := c.store.LoadAndDelete(candidates[i].key); loaded { + c.size.Add(-1) + } + } +} + // Invalidate removes a user from the cache by ID. func (c *UserCache) Invalidate(userID uint) { - c.store.Delete(userID) + if _, loaded := c.store.LoadAndDelete(userID); loaded { + c.size.Add(-1) + } } // maybeGC lazily sweeps expired entries at most once per gcEvery interval. @@ -81,7 +142,9 @@ func (c *UserCache) maybeGC() { c.store.Range(func(key, value any) bool { entry := value.(*userCacheEntry) if now.After(entry.expiresAt) { - c.store.Delete(key) + if _, loaded := c.store.LoadAndDelete(key); loaded { + c.size.Add(-1) + } } return true }) diff --git a/internal/middleware/user_cache_test.go b/internal/middleware/user_cache_test.go index cad628a..edefea1 100644 --- a/internal/middleware/user_cache_test.go +++ b/internal/middleware/user_cache_test.go @@ -11,7 +11,7 @@ import ( ) func TestUserCache_SetAndGet(t *testing.T) { - cache := NewUserCache(1 * time.Minute) + cache := NewUserCache(1 * time.Minute, 0) user := &models.User{Username: "testuser", Email: "test@test.com"} user.ID = 1 @@ -25,7 +25,7 @@ func TestUserCache_SetAndGet(t *testing.T) { } func TestUserCache_GetNonExistent_ReturnsNil(t *testing.T) { - cache := NewUserCache(1 * time.Minute) + cache := NewUserCache(1 * time.Minute, 0) cached := cache.Get(999) assert.Nil(t, cached) @@ -33,7 +33,7 @@ func TestUserCache_GetNonExistent_ReturnsNil(t *testing.T) { func TestUserCache_Expired_ReturnsNil(t *testing.T) { // Very short TTL - cache := NewUserCache(1 * time.Millisecond) + cache := NewUserCache(1 * time.Millisecond, 0) user := &models.User{Username: "expiring_user"} user.ID = 1 @@ -48,7 +48,7 @@ func TestUserCache_Expired_ReturnsNil(t *testing.T) { } func TestUserCache_Invalidate(t *testing.T) { - cache := NewUserCache(1 * time.Minute) + cache := NewUserCache(1 * time.Minute, 0) user := &models.User{Username: "to_invalidate"} user.ID = 1 @@ -66,7 +66,7 @@ func TestUserCache_Invalidate(t *testing.T) { } func TestUserCache_ReturnsCopy_NotOriginal(t *testing.T) { - cache := NewUserCache(1 * time.Minute) + cache := NewUserCache(1 * time.Minute, 0) user := &models.User{Username: "original"} user.ID = 1 @@ -85,7 +85,7 @@ func TestUserCache_ReturnsCopy_NotOriginal(t *testing.T) { } func TestUserCache_SetCopiesInput(t *testing.T) { - cache := NewUserCache(1 * time.Minute) + cache := NewUserCache(1 * time.Minute, 0) user := &models.User{Username: "original"} user.ID = 1 @@ -102,7 +102,7 @@ func TestUserCache_SetCopiesInput(t *testing.T) { } func TestUserCache_MultipleUsers(t *testing.T) { - cache := NewUserCache(1 * time.Minute) + cache := NewUserCache(1 * time.Minute, 0) user1 := &models.User{Username: "user1"} user1.ID = 1 @@ -122,7 +122,7 @@ func TestUserCache_MultipleUsers(t *testing.T) { } func TestUserCache_OverwriteEntry(t *testing.T) { - cache := NewUserCache(1 * time.Minute) + cache := NewUserCache(1 * time.Minute, 0) user := &models.User{Username: "original"} user.ID = 1 diff --git a/internal/repositories/task_repo.go b/internal/repositories/task_repo.go index f5c7580..0edacf6 100644 --- a/internal/repositories/task_repo.go +++ b/internal/repositories/task_repo.go @@ -640,6 +640,54 @@ func (r *TaskRepository) GetKanbanDataForMultipleResidences(residenceIDs []uint, }, nil } +// GetKanbanDataForUser fetches every task across every residence the user has +// access to, in a single round-trip. Replaces the two-step +// FindResidenceIDsByUser → GetKanbanDataForMultipleResidences pattern, saving +// one transatlantic RTT per ListTasks call (~110ms on Hetzner→Neon). +// +// The residence-access check runs as a subquery on Postgres rather than a +// separate Go-side round-trip; Postgres's planner already turns it into a +// hash semi-join, so there's no perf cost vs the explicit IN(...) approach. +func (r *TaskRepository) GetKanbanDataForUser(userID uint, daysThreshold int, now time.Time) (*models.KanbanBoard, error) { + residenceSubquery := r.db.Table("residence_residence"). + Select("id"). + Where("is_active = ?", true). + Where("owner_id = ? OR id IN (?)", + userID, + r.db.Table("residence_residence_users").Select("residence_id").Where("user_id = ?", userID), + ) + + var allTasks []models.Task + query := r.db.Model(&models.Task{}). + Where("task_task.residence_id IN (?)", residenceSubquery). + Preload("CreatedBy"). + Preload("AssignedTo"). + Preload("Residence"). + Preload("Completions", func(db *gorm.DB) *gorm.DB { + return db.Select("id", "task_id", "completed_at") + }). + Scopes(task.ScopeKanbanOrder) + + if err := query.Find(&allTasks).Error; err != nil { + return nil, fmt.Errorf("get tasks for kanban: %w", err) + } + + columnMap := categorization.CategorizeTasksIntoColumnsWithTime(allTasks, daysThreshold, now) + columns := buildKanbanColumns( + columnMap[categorization.ColumnOverdue], + columnMap[categorization.ColumnInProgress], + columnMap[categorization.ColumnDueSoon], + columnMap[categorization.ColumnUpcoming], + columnMap[categorization.ColumnCompleted], + ) + + return &models.KanbanBoard{ + Columns: columns, + DaysThreshold: daysThreshold, + ResidenceID: "all", + }, nil +} + // === Lookup Operations === // GetAllCategories returns all task categories diff --git a/internal/router/router.go b/internal/router/router.go index 2017490..f7e6b0f 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -200,6 +200,15 @@ func SetupRouter(deps *Dependencies) *echo.Echo { taskService.SetStorageService(deps.StorageService) // For reading completion images for email subscriptionService := services.NewSubscriptionService(subscriptionRepo, residenceRepo, taskRepo, contractorRepo, documentRepo) residenceService.SetSubscriptionService(subscriptionService) // Wire up subscription service for tier limit enforcement + + // Wire Redis cache for residence-ID lookups across the four services that + // read it on the request hot path. Cache is best-effort; nil cache is OK. + if deps.Cache != nil { + residenceService.SetCacheService(deps.Cache) + taskService.SetCacheService(deps.Cache) + contractorService.SetCacheService(deps.Cache) + documentService.SetCacheService(deps.Cache) + } taskTemplateService := services.NewTaskTemplateService(taskTemplateRepo) suggestionService := services.NewSuggestionService(deps.DB, residenceRepo) diff --git a/internal/services/cache_service.go b/internal/services/cache_service.go index 1bbc6ba..fcfd70a 100644 --- a/internal/services/cache_service.go +++ b/internal/services/cache_service.go @@ -388,3 +388,61 @@ func (c *CacheService) GetSeededDataETag(ctx context.Context) (string, error) { func (c *CacheService) InvalidateSeededData(ctx context.Context) error { return c.Delete(ctx, SeededDataKey, SeededDataETagKey) } + +// === User → Residence-IDs cache === +// +// Caches the set of residence IDs each user has access to. Hot read on +// every authenticated API call (auth + tasks + residences + contractors + +// documents all need it). Mutations on residences/share-codes invalidate +// only the affected user(s); see Invalidate*ResidenceIDsForUsers. + +const ( + residenceIDsKeyPrefix = "residence_ids_user:" + residenceIDsTTL = 5 * time.Minute +) + +// CacheResidenceIDsForUser stores the residence-ID list for a user with a +// 5-minute TTL. Membership rarely changes (only on share-code accept, +// remove-user, delete-residence) so a 5-minute window catches the vast +// majority of repeat reads while keeping staleness bounded. +func (c *CacheService) CacheResidenceIDsForUser(ctx context.Context, userID uint, ids []uint) error { + if c == nil { + return nil + } + key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID) + data, err := json.Marshal(ids) + if err != nil { + return err + } + return c.client.Set(ctx, key, data, residenceIDsTTL).Err() +} + +// GetCachedResidenceIDsForUser fetches the cached residence-ID list. Returns +// (nil, redis.Nil) when not cached so callers can distinguish from "user has +// zero residences" (empty slice) — though for practical purposes both result +// in an empty kanban response, so most callers can ignore the distinction. +func (c *CacheService) GetCachedResidenceIDsForUser(ctx context.Context, userID uint) ([]uint, error) { + if c == nil { + return nil, fmt.Errorf("cache not available") + } + key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID) + var ids []uint + if err := c.Get(ctx, key, &ids); err != nil { + return nil, err + } + return ids, nil +} + +// InvalidateResidenceIDsForUsers drops the cache for one or more users. +// Called from JoinWithCode (the joining user) and RemoveUser / +// DeleteResidence (every affected user). Cheap — single Redis DEL per user. +func (c *CacheService) InvalidateResidenceIDsForUsers(ctx context.Context, userIDs ...uint) error { + if c == nil || len(userIDs) == 0 { + return nil + } + keys := make([]string, len(userIDs)) + for i, id := range userIDs { + keys[i] = fmt.Sprintf("%s%d", residenceIDsKeyPrefix, id) + } + return c.Delete(ctx, keys...) +} diff --git a/internal/services/contractor_service.go b/internal/services/contractor_service.go index 0dee23e..1c996d5 100644 --- a/internal/services/contractor_service.go +++ b/internal/services/contractor_service.go @@ -23,6 +23,7 @@ import ( type ContractorService struct { contractorRepo *repositories.ContractorRepository residenceRepo *repositories.ResidenceRepository + cache *CacheService } // NewContractorService creates a new contractor service @@ -33,6 +34,11 @@ func NewContractorService(contractorRepo *repositories.ContractorRepository, res } } +// SetCacheService wires Redis caching for residence-ID lookups. +func (s *ContractorService) SetCacheService(cache *CacheService) { + s.cache = cache +} + // GetContractor gets a contractor by ID with access check func (s *ContractorService) GetContractor(ctx context.Context, contractorID, userID uint) (*responses.ContractorResponse, error) { contractor, err := s.contractorRepo.WithContext(ctx).FindByID(contractorID) @@ -73,7 +79,7 @@ func (s *ContractorService) hasContractorAccess(ctx context.Context, contractor // ListContractors lists all contractors accessible to a user func (s *ContractorService) ListContractors(ctx context.Context, userID uint) ([]responses.ContractorResponse, error) { // Get residence IDs (lightweight - no preloads) - residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID) + residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID) if err != nil { return nil, apperrors.Internal(err) } diff --git a/internal/services/document_service.go b/internal/services/document_service.go index 174047e..e8aaae4 100644 --- a/internal/services/document_service.go +++ b/internal/services/document_service.go @@ -24,6 +24,7 @@ import ( type DocumentService struct { documentRepo *repositories.DocumentRepository residenceRepo *repositories.ResidenceRepository + cache *CacheService } // NewDocumentService creates a new document service @@ -34,6 +35,11 @@ func NewDocumentService(documentRepo *repositories.DocumentRepository, residence } } +// SetCacheService wires Redis caching for residence-ID lookups. +func (s *DocumentService) SetCacheService(cache *CacheService) { + s.cache = cache +} + // GetDocument gets a document by ID with access check func (s *DocumentService) GetDocument(ctx context.Context, documentID, userID uint) (*responses.DocumentResponse, error) { document, err := s.documentRepo.WithContext(ctx).FindByID(documentID) @@ -60,7 +66,7 @@ func (s *DocumentService) GetDocument(ctx context.Context, documentID, userID ui // ListDocuments lists all documents accessible to a user, with optional filters. func (s *DocumentService) ListDocuments(ctx context.Context, userID uint, filter *repositories.DocumentFilter) ([]responses.DocumentResponse, error) { // Get residence IDs (lightweight - no preloads) - residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID) + residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID) if err != nil { return nil, apperrors.Internal(err) } @@ -95,7 +101,7 @@ func (s *DocumentService) ListDocuments(ctx context.Context, userID uint, filter // ListWarranties lists all warranty documents func (s *DocumentService) ListWarranties(ctx context.Context, userID uint) ([]responses.DocumentResponse, error) { // Get residence IDs (lightweight - no preloads) - residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID) + residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID) if err != nil { return nil, apperrors.Internal(err) } diff --git a/internal/services/residence_id_cache.go b/internal/services/residence_id_cache.go new file mode 100644 index 0000000..a403216 --- /dev/null +++ b/internal/services/residence_id_cache.go @@ -0,0 +1,41 @@ +package services + +import ( + "context" + + "github.com/treytartt/honeydue-api/internal/repositories" +) + +// cachedResidenceIDsForUser fetches the residence-ID list for a user, going +// through Redis (5-min TTL) before falling back to Postgres. +// +// Used on every authed read path (tasks, documents, contractors, summary) +// because the list rarely changes — only on share-code accept, member +// removal, or residence delete. Callers must invalidate after mutations +// via cache.InvalidateResidenceIDsForUsers. +// +// A nil cache is permitted — the function falls through to the repo +// directly, so this works in tests and in failure modes. +func cachedResidenceIDsForUser( + ctx context.Context, + cache *CacheService, + residenceRepo *repositories.ResidenceRepository, + userID uint, +) ([]uint, error) { + if cache != nil { + if ids, err := cache.GetCachedResidenceIDsForUser(ctx, userID); err == nil { + return ids, nil + } + } + + ids, err := residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID) + if err != nil { + return nil, err + } + + if cache != nil { + // Best-effort cache fill; don't fail the request on Redis hiccup. + _ = cache.CacheResidenceIDsForUser(ctx, userID, ids) + } + return ids, nil +} diff --git a/internal/services/residence_service.go b/internal/services/residence_service.go index d507983..fbc0e7e 100644 --- a/internal/services/residence_service.go +++ b/internal/services/residence_service.go @@ -37,9 +37,16 @@ type ResidenceService struct { userRepo *repositories.UserRepository taskRepo *repositories.TaskRepository subscriptionService *SubscriptionService + cache *CacheService config *config.Config } +// SetCacheService wires a Redis-backed cache for residence-ID lookups. May +// be nil — service falls through to direct DB queries when unset. +func (s *ResidenceService) SetCacheService(cache *CacheService) { + s.cache = cache +} + // NewResidenceService creates a new residence service func NewResidenceService(residenceRepo *repositories.ResidenceRepository, userRepo *repositories.UserRepository, cfg *config.Config) *ResidenceService { return &ResidenceService{ @@ -160,7 +167,7 @@ func (s *ResidenceService) GetMyResidences(ctx context.Context, userID uint, now // Clients should use calculateSummaryFromKanban() instead. func (s *ResidenceService) GetSummary(ctx context.Context, userID uint, now time.Time) (*responses.TotalSummary, error) { // Get residence IDs (lightweight - no preloads) - residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID) + residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID) if err != nil { return nil, apperrors.Internal(err) } @@ -257,6 +264,11 @@ func (s *ResidenceService) CreateResidence(ctx context.Context, req *requests.Cr if err := s.residenceRepo.WithContext(ctx).Create(residence); err != nil { return nil, apperrors.Internal(err) } + if s.cache != nil { + // Owner now has a new residence — drop cached IDs so the next + // list-residences call doesn't omit it. + _ = s.cache.InvalidateResidenceIDsForUsers(ctx, ownerID) + } // Reload with relations residence, err := s.residenceRepo.WithContext(ctx).FindByID(residence.ID) @@ -419,9 +431,26 @@ func (s *ResidenceService) DeleteResidence(ctx context.Context, residenceID, use return nil, apperrors.Forbidden("error.not_residence_owner") } + // Capture all member IDs before delete so we can invalidate their caches. + var affectedUserIDs []uint + if s.cache != nil { + if members, _ := s.residenceRepo.WithContext(ctx).GetResidenceUsers(residenceID); members != nil { + affectedUserIDs = make([]uint, 0, len(members)+1) + affectedUserIDs = append(affectedUserIDs, userID) // owner + for _, m := range members { + if m.ID != userID { + affectedUserIDs = append(affectedUserIDs, m.ID) + } + } + } + } + if err := s.residenceRepo.WithContext(ctx).Delete(residenceID); err != nil { return nil, apperrors.Internal(err) } + if s.cache != nil && len(affectedUserIDs) > 0 { + _ = s.cache.InvalidateResidenceIDsForUsers(ctx, affectedUserIDs...) + } // Get updated summary summary := s.getSummaryForUser(userID) @@ -548,6 +577,10 @@ func (s *ResidenceService) JoinWithCode(ctx context.Context, code string, userID if err := s.residenceRepo.WithContext(ctx).AddUser(shareCode.ResidenceID, userID); err != nil { return nil, apperrors.Internal(err) } + if s.cache != nil { + // The joining user's residence-IDs cache is now stale. + _ = s.cache.InvalidateResidenceIDsForUsers(ctx, userID) + } // Mark share code as used (one-time use) if err := s.residenceRepo.WithContext(ctx).DeactivateShareCode(shareCode.ID); err != nil { @@ -629,6 +662,10 @@ func (s *ResidenceService) RemoveUser(ctx context.Context, residenceID, userIDTo if err := s.residenceRepo.WithContext(ctx).RemoveUser(residenceID, userIDToRemove); err != nil { return apperrors.Internal(err) } + if s.cache != nil { + // The removed user's residence-IDs cache is now stale. + _ = s.cache.InvalidateResidenceIDsForUsers(ctx, userIDToRemove) + } return nil } diff --git a/internal/services/task_service.go b/internal/services/task_service.go index 193ce5a..77dfac3 100644 --- a/internal/services/task_service.go +++ b/internal/services/task_service.go @@ -37,6 +37,12 @@ type TaskService struct { notificationService *NotificationService emailService *EmailService storageService *StorageService + cache *CacheService +} + +// SetCacheService wires Redis caching for residence-ID lookups. +func (s *TaskService) SetCacheService(cache *CacheService) { + s.cache = cache } // NewTaskService creates a new task service @@ -108,23 +114,11 @@ func (s *TaskService) ListTasks(ctx context.Context, userID uint, daysThreshold daysThreshold = 30 // Default } - // Get all residence IDs accessible to user (lightweight - no preloads) - residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID) - if err != nil { - return nil, apperrors.Internal(err) - } - - if len(residenceIDs) == 0 { - // Return empty kanban board - return &responses.KanbanBoardResponse{ - Columns: []responses.KanbanColumnResponse{}, - DaysThreshold: daysThreshold, - ResidenceID: "all", - }, nil - } - - // Get kanban data aggregated across all residences using user's timezone-aware time - board, err := s.taskRepo.WithContext(ctx).GetKanbanDataForMultipleResidences(residenceIDs, daysThreshold, now) + // Single-round-trip variant: residence-access subquery is folded into the + // task fetch on the Postgres side instead of a separate Go-side round-trip. + // Saves ~110ms on Hetzner→Neon. Empty result naturally handles the + // "user has no residences" case as an empty board. + board, err := s.taskRepo.WithContext(ctx).GetKanbanDataForUser(userID, daysThreshold, now) if err != nil { return nil, apperrors.Internal(err) } @@ -1025,7 +1019,7 @@ func (s *TaskService) GetCompletion(ctx context.Context, completionID, userID ui // ListCompletions lists all task completions for a user func (s *TaskService) ListCompletions(ctx context.Context, userID uint) ([]responses.TaskCompletionResponse, error) { // Get all residence IDs (lightweight - no preloads) - residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID) + residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID) if err != nil { return nil, apperrors.Internal(err) }