b67f7f9e6b
Trace data revealed subscription_subscriptionsettings was consuming
1,983s of cumulative DB time per day (180× more than the next-largest
table) for a 32-byte singleton row of admin-toggleable global flags.
Root cause was a 30-second poll loop in monitoring.Service per pod
plus uncached reads on every authed status check / CreateResidence /
Stripe webhook. Fix is layered:
1. Redis cache for SubscriptionSettings — same shape as the
residence-IDs cache. 30-min TTL, explicit invalidation on admin
write. New CacheService.{Cache,GetCached,Invalidate}SubscriptionSettings
plus a cachedSubscriptionSettings helper in services/.
2. SubscriptionService, StripeService, and both admin handlers
(settings + limitations) now read through the cache. Admin write
handlers invalidate so toggles propagate cluster-wide within ms
instead of waiting for the TTL.
3. monitoring.Service.syncSettingsFromDB also reads from Redis first
(raw redis.Client to avoid a services→monitoring import cycle).
Polling interval bumped 30s → 5min. Combined with Redis-shared
cache, cluster-wide DB hits from this poll go from ~480/hour to
~2/hour — a 240× reduction.
4. StripeService.CreateCheckoutSession now takes ctx so the cached
settings span (and the Stripe webhook trace) stay attached to the
request. Handler call site updated.
5. Admin handlers' direct h.db.First calls switched to
db.WithContext(ctx) so the resulting orphan SQL spans nest under
the admin request span in Jaeger.
Net DB query rate for subscription_subscriptionsettings should drop
from 0.101/sec to ~0/sec with occasional invalidation-driven refills,
and the table's cumulative DB time from 1,983s/day to ~10s/day.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
498 lines
16 KiB
Go
498 lines
16 KiB
Go
package services
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"hash/fnv"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/redis/go-redis/v9"
|
|
"github.com/rs/zerolog/log"
|
|
|
|
"github.com/treytartt/honeydue-api/internal/config"
|
|
)
|
|
|
|
// CacheService provides Redis caching functionality
|
|
type CacheService struct {
|
|
client *redis.Client
|
|
}
|
|
|
|
var (
|
|
cacheInstance *CacheService
|
|
cacheOnce sync.Once
|
|
)
|
|
|
|
// NewCacheService creates a new cache service (thread-safe via sync.Once)
|
|
func NewCacheService(cfg *config.RedisConfig) (*CacheService, error) {
|
|
var initErr error
|
|
|
|
cacheOnce.Do(func() {
|
|
opt, err := redis.ParseURL(cfg.URL)
|
|
if err != nil {
|
|
initErr = fmt.Errorf("failed to parse Redis URL: %w", err)
|
|
return
|
|
}
|
|
|
|
if cfg.Password != "" {
|
|
opt.Password = cfg.Password
|
|
}
|
|
if cfg.DB != 0 {
|
|
opt.DB = cfg.DB
|
|
}
|
|
|
|
client := redis.NewClient(opt)
|
|
|
|
// Test connection
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
if err := client.Ping(ctx).Err(); err != nil {
|
|
initErr = fmt.Errorf("failed to connect to Redis: %w", err)
|
|
// NOTE: Don't reassign `cacheOnce = sync.Once{}` here. Mutating the
|
|
// Once from within its own Do() callback fatals with "unlock of
|
|
// unlocked mutex" because Do is holding the inner lock while we
|
|
// zero it. main.go handles the error (caching disabled, keep running);
|
|
// a pod restart is the right "retry" path for a transient Redis
|
|
// outage, not in-process.
|
|
return
|
|
}
|
|
|
|
// S-14: Mask credentials in Redis URL before logging
|
|
log.Info().
|
|
Str("url", config.MaskURLCredentials(cfg.URL)).
|
|
Int("db", opt.DB).
|
|
Msg("Connected to Redis")
|
|
|
|
cacheInstance = &CacheService{client: client}
|
|
})
|
|
|
|
if initErr != nil {
|
|
return nil, initErr
|
|
}
|
|
|
|
return cacheInstance, nil
|
|
}
|
|
|
|
// GetCache returns the cache service instance
|
|
func GetCache() *CacheService {
|
|
return cacheInstance
|
|
}
|
|
|
|
// Client returns the underlying Redis client
|
|
func (c *CacheService) Client() *redis.Client {
|
|
return c.client
|
|
}
|
|
|
|
// Set stores a value with expiration
|
|
func (c *CacheService) Set(ctx context.Context, key string, value interface{}, expiration time.Duration) error {
|
|
data, err := json.Marshal(value)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal value: %w", err)
|
|
}
|
|
|
|
return c.client.Set(ctx, key, data, expiration).Err()
|
|
}
|
|
|
|
// Get retrieves a value by key
|
|
func (c *CacheService) Get(ctx context.Context, key string, dest interface{}) error {
|
|
data, err := c.client.Get(ctx, key).Bytes()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return json.Unmarshal(data, dest)
|
|
}
|
|
|
|
// GetString retrieves a string value by key
|
|
func (c *CacheService) GetString(ctx context.Context, key string) (string, error) {
|
|
return c.client.Get(ctx, key).Result()
|
|
}
|
|
|
|
// SetString stores a string value with expiration
|
|
func (c *CacheService) SetString(ctx context.Context, key string, value string, expiration time.Duration) error {
|
|
return c.client.Set(ctx, key, value, expiration).Err()
|
|
}
|
|
|
|
// Delete removes a key
|
|
func (c *CacheService) Delete(ctx context.Context, keys ...string) error {
|
|
return c.client.Del(ctx, keys...).Err()
|
|
}
|
|
|
|
// Exists checks if a key exists
|
|
func (c *CacheService) Exists(ctx context.Context, keys ...string) (int64, error) {
|
|
return c.client.Exists(ctx, keys...).Result()
|
|
}
|
|
|
|
// Close closes the Redis connection
|
|
func (c *CacheService) Close() error {
|
|
if c.client != nil {
|
|
return c.client.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Auth token cache helpers
|
|
const (
|
|
AuthTokenPrefix = "auth_token_"
|
|
TokenCacheTTL = 5 * time.Minute
|
|
)
|
|
|
|
// CacheAuthToken caches a user ID for a token
|
|
func (c *CacheService) CacheAuthToken(ctx context.Context, token string, userID uint) error {
|
|
key := AuthTokenPrefix + token
|
|
return c.SetString(ctx, key, fmt.Sprintf("%d", userID), TokenCacheTTL)
|
|
}
|
|
|
|
// CacheAuthTokenWithCreated caches a user ID and token creation time for a token
|
|
func (c *CacheService) CacheAuthTokenWithCreated(ctx context.Context, token string, userID uint, createdUnix int64) error {
|
|
key := AuthTokenPrefix + token
|
|
return c.SetString(ctx, key, fmt.Sprintf("%d|%d", userID, createdUnix), TokenCacheTTL)
|
|
}
|
|
|
|
// GetCachedAuthToken gets a cached user ID for a token
|
|
func (c *CacheService) GetCachedAuthToken(ctx context.Context, token string) (uint, error) {
|
|
key := AuthTokenPrefix + token
|
|
val, err := c.GetString(ctx, key)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
var userID uint
|
|
_, err = fmt.Sscanf(val, "%d", &userID)
|
|
return userID, err
|
|
}
|
|
|
|
// GetCachedAuthTokenWithCreated gets a cached user ID and token creation time.
|
|
// Returns userID, createdUnix, error. createdUnix is 0 if not stored (legacy format).
|
|
func (c *CacheService) GetCachedAuthTokenWithCreated(ctx context.Context, token string) (uint, int64, error) {
|
|
key := AuthTokenPrefix + token
|
|
val, err := c.GetString(ctx, key)
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
var userID uint
|
|
var createdUnix int64
|
|
n, _ := fmt.Sscanf(val, "%d|%d", &userID, &createdUnix)
|
|
if n < 1 {
|
|
return 0, 0, fmt.Errorf("invalid cached token format")
|
|
}
|
|
return userID, createdUnix, nil
|
|
}
|
|
|
|
// InvalidateAuthToken removes a cached token
|
|
func (c *CacheService) InvalidateAuthToken(ctx context.Context, token string) error {
|
|
key := AuthTokenPrefix + token
|
|
return c.Delete(ctx, key)
|
|
}
|
|
|
|
// Static data cache helpers
|
|
const (
|
|
StaticDataKey = "static_data"
|
|
StaticDataTTL = 1 * time.Hour
|
|
)
|
|
|
|
// CacheStaticData caches static lookup data
|
|
func (c *CacheService) CacheStaticData(ctx context.Context, data interface{}) error {
|
|
return c.Set(ctx, StaticDataKey, data, StaticDataTTL)
|
|
}
|
|
|
|
// GetCachedStaticData retrieves cached static data
|
|
func (c *CacheService) GetCachedStaticData(ctx context.Context, dest interface{}) error {
|
|
return c.Get(ctx, StaticDataKey, dest)
|
|
}
|
|
|
|
// InvalidateStaticData removes cached static data
|
|
func (c *CacheService) InvalidateStaticData(ctx context.Context) error {
|
|
return c.Delete(ctx, StaticDataKey)
|
|
}
|
|
|
|
// Lookup data cache helpers - each lookup type gets its own key
|
|
const (
|
|
LookupKeyPrefix = "lookup:"
|
|
LookupCategoriesKey = LookupKeyPrefix + "categories"
|
|
LookupPrioritiesKey = LookupKeyPrefix + "priorities"
|
|
LookupFrequenciesKey = LookupKeyPrefix + "frequencies"
|
|
LookupResidenceTypesKey = LookupKeyPrefix + "residence_types"
|
|
LookupSpecialtiesKey = LookupKeyPrefix + "specialties"
|
|
LookupTaskTemplatesKey = LookupKeyPrefix + "task_templates"
|
|
LookupDataTTL = 24 * time.Hour // Lookup data rarely changes
|
|
)
|
|
|
|
// CacheLookupData caches data for a specific lookup type
|
|
func (c *CacheService) CacheLookupData(ctx context.Context, key string, data interface{}) error {
|
|
return c.Set(ctx, key, data, LookupDataTTL)
|
|
}
|
|
|
|
// GetCachedLookupData retrieves cached lookup data for a specific key
|
|
func (c *CacheService) GetCachedLookupData(ctx context.Context, key string, dest interface{}) error {
|
|
return c.Get(ctx, key, dest)
|
|
}
|
|
|
|
// InvalidateLookupData removes cached data for a specific lookup type
|
|
func (c *CacheService) InvalidateLookupData(ctx context.Context, key string) error {
|
|
return c.Delete(ctx, key)
|
|
}
|
|
|
|
// InvalidateAllLookups removes all cached lookup data
|
|
func (c *CacheService) InvalidateAllLookups(ctx context.Context) error {
|
|
keys := []string{
|
|
LookupCategoriesKey,
|
|
LookupPrioritiesKey,
|
|
LookupFrequenciesKey,
|
|
LookupResidenceTypesKey,
|
|
LookupSpecialtiesKey,
|
|
LookupTaskTemplatesKey,
|
|
StaticDataKey, // Also invalidate the combined static data
|
|
SeededDataKey, // Invalidate unified seeded data
|
|
SeededDataETagKey, // Invalidate seeded data ETag
|
|
}
|
|
return c.Delete(ctx, keys...)
|
|
}
|
|
|
|
// CacheCategories caches task categories
|
|
func (c *CacheService) CacheCategories(ctx context.Context, data interface{}) error {
|
|
return c.CacheLookupData(ctx, LookupCategoriesKey, data)
|
|
}
|
|
|
|
// GetCachedCategories retrieves cached task categories
|
|
func (c *CacheService) GetCachedCategories(ctx context.Context, dest interface{}) error {
|
|
return c.GetCachedLookupData(ctx, LookupCategoriesKey, dest)
|
|
}
|
|
|
|
// InvalidateCategories removes cached task categories
|
|
func (c *CacheService) InvalidateCategories(ctx context.Context) error {
|
|
// Invalidate both specific key and combined static data
|
|
return c.Delete(ctx, LookupCategoriesKey, StaticDataKey)
|
|
}
|
|
|
|
// CachePriorities caches task priorities
|
|
func (c *CacheService) CachePriorities(ctx context.Context, data interface{}) error {
|
|
return c.CacheLookupData(ctx, LookupPrioritiesKey, data)
|
|
}
|
|
|
|
// GetCachedPriorities retrieves cached task priorities
|
|
func (c *CacheService) GetCachedPriorities(ctx context.Context, dest interface{}) error {
|
|
return c.GetCachedLookupData(ctx, LookupPrioritiesKey, dest)
|
|
}
|
|
|
|
// InvalidatePriorities removes cached task priorities
|
|
func (c *CacheService) InvalidatePriorities(ctx context.Context) error {
|
|
return c.Delete(ctx, LookupPrioritiesKey, StaticDataKey)
|
|
}
|
|
|
|
// CacheFrequencies caches task frequencies
|
|
func (c *CacheService) CacheFrequencies(ctx context.Context, data interface{}) error {
|
|
return c.CacheLookupData(ctx, LookupFrequenciesKey, data)
|
|
}
|
|
|
|
// GetCachedFrequencies retrieves cached task frequencies
|
|
func (c *CacheService) GetCachedFrequencies(ctx context.Context, dest interface{}) error {
|
|
return c.GetCachedLookupData(ctx, LookupFrequenciesKey, dest)
|
|
}
|
|
|
|
// InvalidateFrequencies removes cached task frequencies
|
|
func (c *CacheService) InvalidateFrequencies(ctx context.Context) error {
|
|
return c.Delete(ctx, LookupFrequenciesKey, StaticDataKey)
|
|
}
|
|
|
|
// CacheResidenceTypes caches residence types
|
|
func (c *CacheService) CacheResidenceTypes(ctx context.Context, data interface{}) error {
|
|
return c.CacheLookupData(ctx, LookupResidenceTypesKey, data)
|
|
}
|
|
|
|
// GetCachedResidenceTypes retrieves cached residence types
|
|
func (c *CacheService) GetCachedResidenceTypes(ctx context.Context, dest interface{}) error {
|
|
return c.GetCachedLookupData(ctx, LookupResidenceTypesKey, dest)
|
|
}
|
|
|
|
// InvalidateResidenceTypes removes cached residence types
|
|
func (c *CacheService) InvalidateResidenceTypes(ctx context.Context) error {
|
|
return c.Delete(ctx, LookupResidenceTypesKey, StaticDataKey)
|
|
}
|
|
|
|
// CacheSpecialties caches contractor specialties
|
|
func (c *CacheService) CacheSpecialties(ctx context.Context, data interface{}) error {
|
|
return c.CacheLookupData(ctx, LookupSpecialtiesKey, data)
|
|
}
|
|
|
|
// GetCachedSpecialties retrieves cached contractor specialties
|
|
func (c *CacheService) GetCachedSpecialties(ctx context.Context, dest interface{}) error {
|
|
return c.GetCachedLookupData(ctx, LookupSpecialtiesKey, dest)
|
|
}
|
|
|
|
// InvalidateSpecialties removes cached contractor specialties
|
|
func (c *CacheService) InvalidateSpecialties(ctx context.Context) error {
|
|
return c.Delete(ctx, LookupSpecialtiesKey, StaticDataKey)
|
|
}
|
|
|
|
// CacheTaskTemplates caches task templates
|
|
func (c *CacheService) CacheTaskTemplates(ctx context.Context, data interface{}) error {
|
|
return c.CacheLookupData(ctx, LookupTaskTemplatesKey, data)
|
|
}
|
|
|
|
// GetCachedTaskTemplates retrieves cached task templates
|
|
func (c *CacheService) GetCachedTaskTemplates(ctx context.Context, dest interface{}) error {
|
|
return c.GetCachedLookupData(ctx, LookupTaskTemplatesKey, dest)
|
|
}
|
|
|
|
// InvalidateTaskTemplates removes cached task templates
|
|
func (c *CacheService) InvalidateTaskTemplates(ctx context.Context) error {
|
|
return c.Delete(ctx, LookupTaskTemplatesKey, StaticDataKey)
|
|
}
|
|
|
|
// Unified seeded data cache helpers
|
|
const (
|
|
SeededDataKey = "seeded_data"
|
|
SeededDataETagKey = "seeded_data:etag"
|
|
SeededDataTTL = 24 * time.Hour
|
|
)
|
|
|
|
// CacheSeededData caches the unified seeded data and generates an ETag
|
|
func (c *CacheService) CacheSeededData(ctx context.Context, data interface{}) (string, error) {
|
|
jsonData, err := json.Marshal(data)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to marshal seeded data: %w", err)
|
|
}
|
|
|
|
// Generate FNV-64a ETag from the JSON data (faster than MD5, non-cryptographic)
|
|
h := fnv.New64a()
|
|
h.Write(jsonData)
|
|
etag := fmt.Sprintf("\"%x\"", h.Sum64())
|
|
|
|
// Store both the data and the ETag
|
|
if err := c.client.Set(ctx, SeededDataKey, jsonData, SeededDataTTL).Err(); err != nil {
|
|
return "", fmt.Errorf("failed to cache seeded data: %w", err)
|
|
}
|
|
|
|
if err := c.client.Set(ctx, SeededDataETagKey, etag, SeededDataTTL).Err(); err != nil {
|
|
return "", fmt.Errorf("failed to cache seeded data etag: %w", err)
|
|
}
|
|
|
|
return etag, nil
|
|
}
|
|
|
|
// GetCachedSeededData retrieves cached unified seeded data
|
|
func (c *CacheService) GetCachedSeededData(ctx context.Context, dest interface{}) error {
|
|
return c.Get(ctx, SeededDataKey, dest)
|
|
}
|
|
|
|
// GetSeededDataETag retrieves the cached ETag for seeded data
|
|
func (c *CacheService) GetSeededDataETag(ctx context.Context) (string, error) {
|
|
return c.GetString(ctx, SeededDataETagKey)
|
|
}
|
|
|
|
// InvalidateSeededData removes cached seeded data and its ETag
|
|
func (c *CacheService) InvalidateSeededData(ctx context.Context) error {
|
|
return c.Delete(ctx, SeededDataKey, SeededDataETagKey)
|
|
}
|
|
|
|
// === User → Residence-IDs cache ===
|
|
//
|
|
// Caches the set of residence IDs each user has access to. Hot read on
|
|
// every authenticated API call (auth + tasks + residences + contractors +
|
|
// documents all need it). Mutations on residences/share-codes invalidate
|
|
// only the affected user(s); see Invalidate*ResidenceIDsForUsers.
|
|
|
|
const (
|
|
residenceIDsKeyPrefix = "residence_ids_user:"
|
|
residenceIDsTTL = 5 * time.Minute
|
|
)
|
|
|
|
// CacheResidenceIDsForUser stores the residence-ID list for a user with a
|
|
// 5-minute TTL. Membership rarely changes (only on share-code accept,
|
|
// remove-user, delete-residence) so a 5-minute window catches the vast
|
|
// majority of repeat reads while keeping staleness bounded.
|
|
func (c *CacheService) CacheResidenceIDsForUser(ctx context.Context, userID uint, ids []uint) error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID)
|
|
data, err := json.Marshal(ids)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return c.client.Set(ctx, key, data, residenceIDsTTL).Err()
|
|
}
|
|
|
|
// GetCachedResidenceIDsForUser fetches the cached residence-ID list. Returns
|
|
// (nil, redis.Nil) when not cached so callers can distinguish from "user has
|
|
// zero residences" (empty slice) — though for practical purposes both result
|
|
// in an empty kanban response, so most callers can ignore the distinction.
|
|
func (c *CacheService) GetCachedResidenceIDsForUser(ctx context.Context, userID uint) ([]uint, error) {
|
|
if c == nil {
|
|
return nil, fmt.Errorf("cache not available")
|
|
}
|
|
key := fmt.Sprintf("%s%d", residenceIDsKeyPrefix, userID)
|
|
var ids []uint
|
|
if err := c.Get(ctx, key, &ids); err != nil {
|
|
return nil, err
|
|
}
|
|
return ids, nil
|
|
}
|
|
|
|
// InvalidateResidenceIDsForUsers drops the cache for one or more users.
|
|
// Called from JoinWithCode (the joining user) and RemoveUser /
|
|
// DeleteResidence (every affected user). Cheap — single Redis DEL per user.
|
|
func (c *CacheService) InvalidateResidenceIDsForUsers(ctx context.Context, userIDs ...uint) error {
|
|
if c == nil || len(userIDs) == 0 {
|
|
return nil
|
|
}
|
|
keys := make([]string, len(userIDs))
|
|
for i, id := range userIDs {
|
|
keys[i] = fmt.Sprintf("%s%d", residenceIDsKeyPrefix, id)
|
|
}
|
|
return c.Delete(ctx, keys...)
|
|
}
|
|
|
|
// === SubscriptionSettings cache ===
|
|
//
|
|
// SubscriptionSettings is a 32-byte singleton row of admin-toggleable global
|
|
// flags (EnableLimitations, EnableMonitoring, TrialEnabled, TrialDurationDays).
|
|
// Read on every authed status check, every CreateResidence, and once per
|
|
// 30s by every monitoring goroutine. Cached forever-ish here; admin writes
|
|
// invalidate explicitly.
|
|
//
|
|
// 30-minute TTL is belt-and-suspenders against an admin update that somehow
|
|
// bypasses the invalidation path (e.g., a manual SQL UPDATE). The flag value
|
|
// converging within 30 min is fine for any real use case.
|
|
|
|
const (
|
|
subscriptionSettingsKey = "subscription_settings:1"
|
|
subscriptionSettingsTTL = 30 * time.Minute
|
|
)
|
|
|
|
// CacheSubscriptionSettings stores the singleton settings row. Caller passes
|
|
// any encodable value — typically *models.SubscriptionSettings. Best-effort.
|
|
func (c *CacheService) CacheSubscriptionSettings(ctx context.Context, settings interface{}) error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
data, err := json.Marshal(settings)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return c.client.Set(ctx, subscriptionSettingsKey, data, subscriptionSettingsTTL).Err()
|
|
}
|
|
|
|
// GetCachedSubscriptionSettings unmarshals into the supplied destination.
|
|
// Returns redis.Nil on cache miss so callers can distinguish from genuine errors.
|
|
func (c *CacheService) GetCachedSubscriptionSettings(ctx context.Context, dest interface{}) error {
|
|
if c == nil {
|
|
return fmt.Errorf("cache not available")
|
|
}
|
|
return c.Get(ctx, subscriptionSettingsKey, dest)
|
|
}
|
|
|
|
// InvalidateSubscriptionSettings drops the singleton-settings cache. Called
|
|
// from admin handlers that update the row so the new values are visible
|
|
// immediately to all pods (instead of waiting for the 30-min TTL).
|
|
func (c *CacheService) InvalidateSubscriptionSettings(ctx context.Context) error {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
return c.Delete(ctx, subscriptionSettingsKey)
|
|
}
|