Cut /api/tasks/ p99 from ~2500ms toward ~150-300ms
Stack of optimizations against the same Hetzner→Neon transatlantic link. The trace revealed every visible ms was network/proxy overhead — DB execution itself is sub-millisecond per query (verified via EXPLAIN ANALYZE: index scans on every hot path). Connection layer: - DB_HOST → Neon pooler endpoint (-pooler suffix). PgBouncer transaction-mode keeps backend Postgres connections warm so we no longer pay the ~110ms Postgres-startup RTT on cold queries. - GORM pool tuned: MaxIdleConns 10→20, MaxLifetime 600s→1800s, MaxIdleTime added (default 0 = never close idle). - Eager pool warm-up at boot via parallel pings — first user request no longer pays the ~440ms TCP+TLS+startup handshake. - Redis maxmemory-policy noeviction → allkeys-lru. Cache writes will evict cold keys instead of erroring at the 256MB limit. Auth layer: - TokenCacheTTL 5min → 1 hour (Redis token cache). - UserCacheTTL 30s → 5min (in-memory User cache, per pod). - UserCache gains a 5,000-entry LRU cap so a flood of unique users can't blow up pod RSS. ~5MB worst-case per pod. - Token + user lookup collapsed from 2 GORM Preload queries into a single INNER JOIN. Saves 1 RTT per cold-cache request. - Auth middleware's m.db.* now use db.WithContext(ctx) so the SQL spans nest under the parent HTTP request in Jaeger. Service layer: - TaskService.ListTasks: replaced two-step FindResidenceIDsByUser → GetKanbanDataForMultipleResidences with a single GetKanbanDataForUser that uses a Postgres subquery for residence-access. One round-trip instead of two. - New CacheService residence-IDs cache: \"residence_ids_user:<id>\" with 5-min TTL. Wired into Task/Residence/Contractor/Document services for the four hot read paths that need this list. - Cache invalidation on every relevant mutation: CreateResidence, DeleteResidence, JoinWithCode, RemoveUser. DeleteResidence invalidates every member of the residence, not just the owner. What this stacks up to (Hetzner→Neon, before US migration): Path Before After (target) Cache-warm authed read ~800ms ~100-200ms Cache-cold authed read (1st in 1hr) ~2500ms ~500-700ms First request after deploy ~2500ms ~700-900ms The endgame US-region migration on top of this gets us to ~30-50ms warm-cache, but we're shippable at ~150ms warm right now. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -388,3 +388,61 @@ func (c *CacheService) GetSeededDataETag(ctx context.Context) (string, error) {
|
||||
func (c *CacheService) InvalidateSeededData(ctx context.Context) error {
|
||||
return c.Delete(ctx, SeededDataKey, SeededDataETagKey)
|
||||
}
|
||||
|
||||
// === User → Residence-IDs cache ===
|
||||
//
|
||||
// Caches the set of residence IDs each user has access to. Hot read on
|
||||
// every authenticated API call (auth + tasks + residences + contractors +
|
||||
// documents all need it). Mutations on residences/share-codes invalidate
|
||||
// only the affected user(s); see Invalidate*ResidenceIDsForUsers.
|
||||
|
||||
const (
|
||||
residenceIDsKeyPrefix = "residence_ids_user:"
|
||||
residenceIDsTTL = 5 * time.Minute
|
||||
)
|
||||
|
||||
// CacheResidenceIDsForUser stores the residence-ID list for a user with a
|
||||
// 5-minute TTL. Membership rarely changes (only on share-code accept,
|
||||
// remove-user, delete-residence) so a 5-minute window catches the vast
|
||||
// majority of repeat reads while keeping staleness bounded.
|
||||
func (c *CacheService) CacheResidenceIDsForUser(ctx context.Context, userID uint, ids []uint) error {
|
||||
if c == nil {
|
||||
return nil
|
||||
}
|
||||
key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID)
|
||||
data, err := json.Marshal(ids)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.client.Set(ctx, key, data, residenceIDsTTL).Err()
|
||||
}
|
||||
|
||||
// GetCachedResidenceIDsForUser fetches the cached residence-ID list. Returns
|
||||
// (nil, redis.Nil) when not cached so callers can distinguish from "user has
|
||||
// zero residences" (empty slice) — though for practical purposes both result
|
||||
// in an empty kanban response, so most callers can ignore the distinction.
|
||||
func (c *CacheService) GetCachedResidenceIDsForUser(ctx context.Context, userID uint) ([]uint, error) {
|
||||
if c == nil {
|
||||
return nil, fmt.Errorf("cache not available")
|
||||
}
|
||||
key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID)
|
||||
var ids []uint
|
||||
if err := c.Get(ctx, key, &ids); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
// InvalidateResidenceIDsForUsers drops the cache for one or more users.
|
||||
// Called from JoinWithCode (the joining user) and RemoveUser /
|
||||
// DeleteResidence (every affected user). Cheap — single Redis DEL per user.
|
||||
func (c *CacheService) InvalidateResidenceIDsForUsers(ctx context.Context, userIDs ...uint) error {
|
||||
if c == nil || len(userIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
keys := make([]string, len(userIDs))
|
||||
for i, id := range userIDs {
|
||||
keys[i] = fmt.Sprintf("%s%d", residenceIDsKeyPrefix, id)
|
||||
}
|
||||
return c.Delete(ctx, keys...)
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
type ContractorService struct {
|
||||
contractorRepo *repositories.ContractorRepository
|
||||
residenceRepo *repositories.ResidenceRepository
|
||||
cache *CacheService
|
||||
}
|
||||
|
||||
// NewContractorService creates a new contractor service
|
||||
@@ -33,6 +34,11 @@ func NewContractorService(contractorRepo *repositories.ContractorRepository, res
|
||||
}
|
||||
}
|
||||
|
||||
// SetCacheService wires Redis caching for residence-ID lookups.
|
||||
func (s *ContractorService) SetCacheService(cache *CacheService) {
|
||||
s.cache = cache
|
||||
}
|
||||
|
||||
// GetContractor gets a contractor by ID with access check
|
||||
func (s *ContractorService) GetContractor(ctx context.Context, contractorID, userID uint) (*responses.ContractorResponse, error) {
|
||||
contractor, err := s.contractorRepo.WithContext(ctx).FindByID(contractorID)
|
||||
@@ -73,7 +79,7 @@ func (s *ContractorService) hasContractorAccess(ctx context.Context, contractor
|
||||
// ListContractors lists all contractors accessible to a user
|
||||
func (s *ContractorService) ListContractors(ctx context.Context, userID uint) ([]responses.ContractorResponse, error) {
|
||||
// Get residence IDs (lightweight - no preloads)
|
||||
residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID)
|
||||
residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
type DocumentService struct {
|
||||
documentRepo *repositories.DocumentRepository
|
||||
residenceRepo *repositories.ResidenceRepository
|
||||
cache *CacheService
|
||||
}
|
||||
|
||||
// NewDocumentService creates a new document service
|
||||
@@ -34,6 +35,11 @@ func NewDocumentService(documentRepo *repositories.DocumentRepository, residence
|
||||
}
|
||||
}
|
||||
|
||||
// SetCacheService wires Redis caching for residence-ID lookups.
|
||||
func (s *DocumentService) SetCacheService(cache *CacheService) {
|
||||
s.cache = cache
|
||||
}
|
||||
|
||||
// GetDocument gets a document by ID with access check
|
||||
func (s *DocumentService) GetDocument(ctx context.Context, documentID, userID uint) (*responses.DocumentResponse, error) {
|
||||
document, err := s.documentRepo.WithContext(ctx).FindByID(documentID)
|
||||
@@ -60,7 +66,7 @@ func (s *DocumentService) GetDocument(ctx context.Context, documentID, userID ui
|
||||
// ListDocuments lists all documents accessible to a user, with optional filters.
|
||||
func (s *DocumentService) ListDocuments(ctx context.Context, userID uint, filter *repositories.DocumentFilter) ([]responses.DocumentResponse, error) {
|
||||
// Get residence IDs (lightweight - no preloads)
|
||||
residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID)
|
||||
residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
@@ -95,7 +101,7 @@ func (s *DocumentService) ListDocuments(ctx context.Context, userID uint, filter
|
||||
// ListWarranties lists all warranty documents
|
||||
func (s *DocumentService) ListWarranties(ctx context.Context, userID uint) ([]responses.DocumentResponse, error) {
|
||||
// Get residence IDs (lightweight - no preloads)
|
||||
residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID)
|
||||
residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/treytartt/honeydue-api/internal/repositories"
|
||||
)
|
||||
|
||||
// cachedResidenceIDsForUser fetches the residence-ID list for a user, going
|
||||
// through Redis (5-min TTL) before falling back to Postgres.
|
||||
//
|
||||
// Used on every authed read path (tasks, documents, contractors, summary)
|
||||
// because the list rarely changes — only on share-code accept, member
|
||||
// removal, or residence delete. Callers must invalidate after mutations
|
||||
// via cache.InvalidateResidenceIDsForUsers.
|
||||
//
|
||||
// A nil cache is permitted — the function falls through to the repo
|
||||
// directly, so this works in tests and in failure modes.
|
||||
func cachedResidenceIDsForUser(
|
||||
ctx context.Context,
|
||||
cache *CacheService,
|
||||
residenceRepo *repositories.ResidenceRepository,
|
||||
userID uint,
|
||||
) ([]uint, error) {
|
||||
if cache != nil {
|
||||
if ids, err := cache.GetCachedResidenceIDsForUser(ctx, userID); err == nil {
|
||||
return ids, nil
|
||||
}
|
||||
}
|
||||
|
||||
ids, err := residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if cache != nil {
|
||||
// Best-effort cache fill; don't fail the request on Redis hiccup.
|
||||
_ = cache.CacheResidenceIDsForUser(ctx, userID, ids)
|
||||
}
|
||||
return ids, nil
|
||||
}
|
||||
@@ -37,9 +37,16 @@ type ResidenceService struct {
|
||||
userRepo *repositories.UserRepository
|
||||
taskRepo *repositories.TaskRepository
|
||||
subscriptionService *SubscriptionService
|
||||
cache *CacheService
|
||||
config *config.Config
|
||||
}
|
||||
|
||||
// SetCacheService wires a Redis-backed cache for residence-ID lookups. May
|
||||
// be nil — service falls through to direct DB queries when unset.
|
||||
func (s *ResidenceService) SetCacheService(cache *CacheService) {
|
||||
s.cache = cache
|
||||
}
|
||||
|
||||
// NewResidenceService creates a new residence service
|
||||
func NewResidenceService(residenceRepo *repositories.ResidenceRepository, userRepo *repositories.UserRepository, cfg *config.Config) *ResidenceService {
|
||||
return &ResidenceService{
|
||||
@@ -160,7 +167,7 @@ func (s *ResidenceService) GetMyResidences(ctx context.Context, userID uint, now
|
||||
// Clients should use calculateSummaryFromKanban() instead.
|
||||
func (s *ResidenceService) GetSummary(ctx context.Context, userID uint, now time.Time) (*responses.TotalSummary, error) {
|
||||
// Get residence IDs (lightweight - no preloads)
|
||||
residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID)
|
||||
residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
@@ -257,6 +264,11 @@ func (s *ResidenceService) CreateResidence(ctx context.Context, req *requests.Cr
|
||||
if err := s.residenceRepo.WithContext(ctx).Create(residence); err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
if s.cache != nil {
|
||||
// Owner now has a new residence — drop cached IDs so the next
|
||||
// list-residences call doesn't omit it.
|
||||
_ = s.cache.InvalidateResidenceIDsForUsers(ctx, ownerID)
|
||||
}
|
||||
|
||||
// Reload with relations
|
||||
residence, err := s.residenceRepo.WithContext(ctx).FindByID(residence.ID)
|
||||
@@ -419,9 +431,26 @@ func (s *ResidenceService) DeleteResidence(ctx context.Context, residenceID, use
|
||||
return nil, apperrors.Forbidden("error.not_residence_owner")
|
||||
}
|
||||
|
||||
// Capture all member IDs before delete so we can invalidate their caches.
|
||||
var affectedUserIDs []uint
|
||||
if s.cache != nil {
|
||||
if members, _ := s.residenceRepo.WithContext(ctx).GetResidenceUsers(residenceID); members != nil {
|
||||
affectedUserIDs = make([]uint, 0, len(members)+1)
|
||||
affectedUserIDs = append(affectedUserIDs, userID) // owner
|
||||
for _, m := range members {
|
||||
if m.ID != userID {
|
||||
affectedUserIDs = append(affectedUserIDs, m.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.residenceRepo.WithContext(ctx).Delete(residenceID); err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
if s.cache != nil && len(affectedUserIDs) > 0 {
|
||||
_ = s.cache.InvalidateResidenceIDsForUsers(ctx, affectedUserIDs...)
|
||||
}
|
||||
|
||||
// Get updated summary
|
||||
summary := s.getSummaryForUser(userID)
|
||||
@@ -548,6 +577,10 @@ func (s *ResidenceService) JoinWithCode(ctx context.Context, code string, userID
|
||||
if err := s.residenceRepo.WithContext(ctx).AddUser(shareCode.ResidenceID, userID); err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
if s.cache != nil {
|
||||
// The joining user's residence-IDs cache is now stale.
|
||||
_ = s.cache.InvalidateResidenceIDsForUsers(ctx, userID)
|
||||
}
|
||||
|
||||
// Mark share code as used (one-time use)
|
||||
if err := s.residenceRepo.WithContext(ctx).DeactivateShareCode(shareCode.ID); err != nil {
|
||||
@@ -629,6 +662,10 @@ func (s *ResidenceService) RemoveUser(ctx context.Context, residenceID, userIDTo
|
||||
if err := s.residenceRepo.WithContext(ctx).RemoveUser(residenceID, userIDToRemove); err != nil {
|
||||
return apperrors.Internal(err)
|
||||
}
|
||||
if s.cache != nil {
|
||||
// The removed user's residence-IDs cache is now stale.
|
||||
_ = s.cache.InvalidateResidenceIDsForUsers(ctx, userIDToRemove)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -37,6 +37,12 @@ type TaskService struct {
|
||||
notificationService *NotificationService
|
||||
emailService *EmailService
|
||||
storageService *StorageService
|
||||
cache *CacheService
|
||||
}
|
||||
|
||||
// SetCacheService wires Redis caching for residence-ID lookups.
|
||||
func (s *TaskService) SetCacheService(cache *CacheService) {
|
||||
s.cache = cache
|
||||
}
|
||||
|
||||
// NewTaskService creates a new task service
|
||||
@@ -108,23 +114,11 @@ func (s *TaskService) ListTasks(ctx context.Context, userID uint, daysThreshold
|
||||
daysThreshold = 30 // Default
|
||||
}
|
||||
|
||||
// Get all residence IDs accessible to user (lightweight - no preloads)
|
||||
residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
if len(residenceIDs) == 0 {
|
||||
// Return empty kanban board
|
||||
return &responses.KanbanBoardResponse{
|
||||
Columns: []responses.KanbanColumnResponse{},
|
||||
DaysThreshold: daysThreshold,
|
||||
ResidenceID: "all",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Get kanban data aggregated across all residences using user's timezone-aware time
|
||||
board, err := s.taskRepo.WithContext(ctx).GetKanbanDataForMultipleResidences(residenceIDs, daysThreshold, now)
|
||||
// Single-round-trip variant: residence-access subquery is folded into the
|
||||
// task fetch on the Postgres side instead of a separate Go-side round-trip.
|
||||
// Saves ~110ms on Hetzner→Neon. Empty result naturally handles the
|
||||
// "user has no residences" case as an empty board.
|
||||
board, err := s.taskRepo.WithContext(ctx).GetKanbanDataForUser(userID, daysThreshold, now)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
@@ -1025,7 +1019,7 @@ func (s *TaskService) GetCompletion(ctx context.Context, completionID, userID ui
|
||||
// ListCompletions lists all task completions for a user
|
||||
func (s *TaskService) ListCompletions(ctx context.Context, userID uint) ([]responses.TaskCompletionResponse, error) {
|
||||
// Get all residence IDs (lightweight - no preloads)
|
||||
residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByUser(userID)
|
||||
residenceIDs, err := cachedResidenceIDsForUser(ctx, s.cache, s.residenceRepo, userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user