Cache SubscriptionSettings + cut monitoring poll noise
Trace data revealed subscription_subscriptionsettings was consuming
1,983s of cumulative DB time per day (180× more than the next-largest
table) for a 32-byte singleton row of admin-toggleable global flags.
Root cause was a 30-second poll loop in monitoring.Service per pod
plus uncached reads on every authed status check / CreateResidence /
Stripe webhook. Fix is layered:
1. Redis cache for SubscriptionSettings — same shape as the
residence-IDs cache. 30-min TTL, explicit invalidation on admin
write. New CacheService.{Cache,GetCached,Invalidate}SubscriptionSettings
plus a cachedSubscriptionSettings helper in services/.
2. SubscriptionService, StripeService, and both admin handlers
(settings + limitations) now read through the cache. Admin write
handlers invalidate so toggles propagate cluster-wide within ms
instead of waiting for the TTL.
3. monitoring.Service.syncSettingsFromDB also reads from Redis first
(raw redis.Client to avoid a services→monitoring import cycle).
Polling interval bumped 30s → 5min. Combined with Redis-shared
cache, cluster-wide DB hits from this poll go from ~480/hour to
~2/hour — a 240× reduction.
4. StripeService.CreateCheckoutSession now takes ctx so the cached
settings span (and the Stripe webhook trace) stay attached to the
request. Handler call site updated.
5. Admin handlers' direct h.db.First calls switched to
db.WithContext(ctx) so the resulting orphan SQL spans nest under
the admin request span in Jaeger.
Net DB query rate for subscription_subscriptionsettings should drop
from 0.101/sec to ~0/sec with occasional invalidation-driven refills,
and the table's cumulative DB time from 1,983s/day to ~10s/day.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -446,3 +446,52 @@ func (c *CacheService) InvalidateResidenceIDsForUsers(ctx context.Context, userI
|
||||
}
|
||||
return c.Delete(ctx, keys...)
|
||||
}
|
||||
|
||||
// === SubscriptionSettings cache ===
|
||||
//
|
||||
// SubscriptionSettings is a 32-byte singleton row of admin-toggleable global
|
||||
// flags (EnableLimitations, EnableMonitoring, TrialEnabled, TrialDurationDays).
|
||||
// Read on every authed status check, every CreateResidence, and once per
|
||||
// 30s by every monitoring goroutine. Cached forever-ish here; admin writes
|
||||
// invalidate explicitly.
|
||||
//
|
||||
// 30-minute TTL is belt-and-suspenders against an admin update that somehow
|
||||
// bypasses the invalidation path (e.g., a manual SQL UPDATE). The flag value
|
||||
// converging within 30 min is fine for any real use case.
|
||||
|
||||
const (
|
||||
subscriptionSettingsKey = "subscription_settings:1"
|
||||
subscriptionSettingsTTL = 30 * time.Minute
|
||||
)
|
||||
|
||||
// CacheSubscriptionSettings stores the singleton settings row. Caller passes
|
||||
// any encodable value — typically *models.SubscriptionSettings. Best-effort.
|
||||
func (c *CacheService) CacheSubscriptionSettings(ctx context.Context, settings interface{}) error {
|
||||
if c == nil {
|
||||
return nil
|
||||
}
|
||||
data, err := json.Marshal(settings)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.client.Set(ctx, subscriptionSettingsKey, data, subscriptionSettingsTTL).Err()
|
||||
}
|
||||
|
||||
// GetCachedSubscriptionSettings unmarshals into the supplied destination.
|
||||
// Returns redis.Nil on cache miss so callers can distinguish from genuine errors.
|
||||
func (c *CacheService) GetCachedSubscriptionSettings(ctx context.Context, dest interface{}) error {
|
||||
if c == nil {
|
||||
return fmt.Errorf("cache not available")
|
||||
}
|
||||
return c.Get(ctx, subscriptionSettingsKey, dest)
|
||||
}
|
||||
|
||||
// InvalidateSubscriptionSettings drops the singleton-settings cache. Called
|
||||
// from admin handlers that update the row so the new values are visible
|
||||
// immediately to all pods (instead of waiting for the 30-min TTL).
|
||||
func (c *CacheService) InvalidateSubscriptionSettings(ctx context.Context) error {
|
||||
if c == nil {
|
||||
return nil
|
||||
}
|
||||
return c.Delete(ctx, subscriptionSettingsKey)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
@@ -24,6 +25,12 @@ type StripeService struct {
|
||||
subscriptionRepo *repositories.SubscriptionRepository
|
||||
userRepo *repositories.UserRepository
|
||||
webhookSecret string
|
||||
cache *CacheService
|
||||
}
|
||||
|
||||
// SetCacheService wires Redis caching for SubscriptionSettings reads.
|
||||
func (s *StripeService) SetCacheService(cache *CacheService) {
|
||||
s.cache = cache
|
||||
}
|
||||
|
||||
// NewStripeService creates a new Stripe service. It initializes the global
|
||||
@@ -58,7 +65,7 @@ func NewStripeService(
|
||||
// CreateCheckoutSession creates a Stripe Checkout Session for a web subscription purchase.
|
||||
// It ensures the user has a Stripe customer record and configures the session with a trial
|
||||
// period if the user has not used their trial yet.
|
||||
func (s *StripeService) CreateCheckoutSession(userID uint, priceID string, successURL string, cancelURL string) (string, error) {
|
||||
func (s *StripeService) CreateCheckoutSession(ctx context.Context, userID uint, priceID string, successURL string, cancelURL string) (string, error) {
|
||||
// Get or create the user's subscription record
|
||||
sub, err := s.subscriptionRepo.GetOrCreate(userID)
|
||||
if err != nil {
|
||||
@@ -94,7 +101,7 @@ func (s *StripeService) CreateCheckoutSession(userID uint, priceID string, succe
|
||||
|
||||
// Offer a trial period if the user has not used their trial yet
|
||||
if !sub.TrialUsed {
|
||||
trialDays, err := s.getTrialDays()
|
||||
trialDays, err := s.getTrialDays(ctx)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Msg("Failed to get trial duration from settings, skipping trial")
|
||||
} else if trialDays > 0 {
|
||||
@@ -444,9 +451,11 @@ func (s *StripeService) findSubscriptionByStripeID(stripeSubID string) (*models.
|
||||
return sub, nil
|
||||
}
|
||||
|
||||
// getTrialDays reads the trial duration from SubscriptionSettings.
|
||||
func (s *StripeService) getTrialDays() (int, error) {
|
||||
settings, err := s.subscriptionRepo.GetSettings()
|
||||
// getTrialDays reads the trial duration from SubscriptionSettings via the
|
||||
// shared cache. ctx threads through so the SQL span (on cache miss) attaches
|
||||
// to the parent webhook trace.
|
||||
func (s *StripeService) getTrialDays(ctx context.Context) (int, error) {
|
||||
settings, err := cachedSubscriptionSettings(ctx, s.cache, s.subscriptionRepo)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -50,6 +50,12 @@ type SubscriptionService struct {
|
||||
documentRepo *repositories.DocumentRepository
|
||||
appleClient *AppleIAPClient
|
||||
googleClient *GoogleIAPClient
|
||||
cache *CacheService
|
||||
}
|
||||
|
||||
// SetCacheService wires Redis caching for SubscriptionSettings reads.
|
||||
func (s *SubscriptionService) SetCacheService(cache *CacheService) {
|
||||
s.cache = cache
|
||||
}
|
||||
|
||||
// NewSubscriptionService creates a new subscription service
|
||||
@@ -113,7 +119,7 @@ func (s *SubscriptionService) GetSubscriptionStatus(ctx context.Context, userID
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
settings, err := s.subscriptionRepo.WithContext(ctx).GetSettings()
|
||||
settings, err := cachedSubscriptionSettings(ctx, s.cache, s.subscriptionRepo)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
@@ -243,7 +249,7 @@ func (s *SubscriptionService) getUserUsage(ctx context.Context, userID uint) (*U
|
||||
|
||||
// CheckLimit checks if a user has exceeded a specific limit
|
||||
func (s *SubscriptionService) CheckLimit(ctx context.Context, userID uint, limitType string) error {
|
||||
settings, err := s.subscriptionRepo.WithContext(ctx).GetSettings()
|
||||
settings, err := cachedSubscriptionSettings(ctx, s.cache, s.subscriptionRepo)
|
||||
if err != nil {
|
||||
return apperrors.Internal(err)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/treytartt/honeydue-api/internal/models"
|
||||
"github.com/treytartt/honeydue-api/internal/repositories"
|
||||
)
|
||||
|
||||
// cachedSubscriptionSettings fetches the singleton settings row, going
|
||||
// through Redis (30-min TTL) before falling back to Postgres.
|
||||
//
|
||||
// Hot read — touched on every CheckLimit, every GetSubscriptionStatus,
|
||||
// and every Stripe webhook. The row is admin-toggleable but writes are
|
||||
// rare; the cache cuts the per-request cost from ~250ms (transatlantic
|
||||
// Postgres roundtrip) to ~1ms (cluster-internal Redis).
|
||||
//
|
||||
// On a nil cache (tests, Redis-down), falls through to the repo directly
|
||||
// so the caller never sees a hard failure from caching.
|
||||
//
|
||||
// Admin writes invalidate via cache.InvalidateSubscriptionSettings.
|
||||
func cachedSubscriptionSettings(
|
||||
ctx context.Context,
|
||||
cache *CacheService,
|
||||
subRepo *repositories.SubscriptionRepository,
|
||||
) (*models.SubscriptionSettings, error) {
|
||||
if cache != nil {
|
||||
var settings models.SubscriptionSettings
|
||||
if err := cache.GetCachedSubscriptionSettings(ctx, &settings); err == nil {
|
||||
return &settings, nil
|
||||
}
|
||||
}
|
||||
|
||||
settings, err := subRepo.WithContext(ctx).GetSettings()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if cache != nil {
|
||||
_ = cache.CacheSubscriptionSettings(ctx, settings)
|
||||
}
|
||||
return settings, nil
|
||||
}
|
||||
Reference in New Issue
Block a user