perf(subscription-status): cache + parallelize + invalidate on mutations
GET /api/subscription/status/ was the slowest endpoint in the API at p50≈1750ms / p95≈2425ms — about 12× the floor for our cluster→Neon geography. Jaeger traces showed seven sequential SQL queries each costing roughly one transatlantic RTT (~110ms), with the actual queries running in 0.073ms at the database. Pure network serialization, not slow SQL. Three changes, in order of leverage: 1. Cache the assembled SubscriptionStatusResponse per-user in Redis with a 5-minute TTL. Hot path collapses to a single Redis GET (~5ms) on warm reads; the TTL is a safety net against missed invalidations. 2. Parallelize the three independent COUNT queries in getUserUsage (task_task / task_contractor / task_document) via golang.org/x/sync errgroup. Three RTTs collapse to one. Also dropped the redundant residence_residence COUNT — len(residenceIDs) from FindResidenceIDsByOwner is the same number, no need to re-query. 3. Wire explicit invalidation into every mutation that could change a user's response — residence/task/contractor/document CRUD, residence membership changes (JoinWithCode, RemoveUser, DeleteResidence), and every subscription tier flip across the IAP/Stripe/webhook surface. Residence-scoped invalidations fan out to every user with access via a new ResidenceRepository.FindUserIDsByResidence helper, so members of a shared residence don't see stale `usage` numbers when another member adds a task. Net effect: warm path goes from ~1350ms to ~5ms (Redis hit). Cold path goes from ~1350ms to ~250-450ms (5 sequential queries → 2 phases: residence IDs lookup, then parallel task/contractor/document counts). Also fixed a pre-existing CheckLimit signature drift in internal/integration/subscription_is_free_test.go that was blocking the package build. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/treytartt/honeydue-api/internal/apperrors"
|
||||
@@ -112,8 +113,24 @@ func (s *SubscriptionService) GetSubscription(ctx context.Context, userID uint)
|
||||
return NewSubscriptionResponse(sub), nil
|
||||
}
|
||||
|
||||
// GetSubscriptionStatus gets detailed subscription status including limits
|
||||
// GetSubscriptionStatus gets detailed subscription status including limits.
|
||||
//
|
||||
// Hot path on the iOS launch screen — runs 7+ sequential SQL queries against
|
||||
// transatlantic Neon Postgres at ~110ms RTT each (~800ms floor before
|
||||
// optimization). The assembled response is cached per-user in Redis with a
|
||||
// 5-minute TTL; mutation paths (residence/task/contractor/document/sub CRUD)
|
||||
// invalidate via cache.InvalidateSubscriptionStatusForUsers, fanning out to
|
||||
// every member of a shared residence.
|
||||
func (s *SubscriptionService) GetSubscriptionStatus(ctx context.Context, userID uint) (*SubscriptionStatusResponse, error) {
|
||||
// Cache fast path — only used on warm reads. Cold reads, trial-start
|
||||
// branch, and the actual mutation paths below all populate fresh.
|
||||
if s.cache != nil {
|
||||
var cached SubscriptionStatusResponse
|
||||
if err := s.cache.GetCachedSubscriptionStatus(ctx, userID, &cached); err == nil {
|
||||
return &cached, nil
|
||||
}
|
||||
}
|
||||
|
||||
sub, err := s.subscriptionRepo.WithContext(ctx).GetOrCreate(userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
@@ -204,43 +221,59 @@ func (s *SubscriptionService) GetSubscriptionStatus(ctx context.Context, userID
|
||||
resp.TrialActive = sub.IsTrialActive()
|
||||
resp.SubscriptionSource = sub.SubscriptionSource()
|
||||
|
||||
// Best-effort cache write. Errors are logged at the cache layer, not fatal.
|
||||
if s.cache != nil {
|
||||
_ = s.cache.CacheSubscriptionStatus(ctx, userID, resp)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// getUserUsage calculates current usage for a user.
|
||||
// P-10: Uses CountByOwner for properties count instead of loading all owned residences.
|
||||
// Uses batch COUNT queries (O(1) queries) instead of per-residence queries (O(N)).
|
||||
//
|
||||
// Performance: residence ID lookup is one query (we use len() for the
|
||||
// properties count instead of a redundant COUNT). The three IN-clause counts
|
||||
// against task_task / task_contractor / task_document don't depend on each
|
||||
// other and run concurrently via errgroup, collapsing 3 transatlantic RTTs
|
||||
// into 1. With residence IDs that's 2 RTT total instead of the prior 5.
|
||||
func (s *SubscriptionService) getUserUsage(ctx context.Context, userID uint) (*UsageResponse, error) {
|
||||
// P-10: Use CountByOwner for an efficient COUNT query instead of loading all records
|
||||
propertiesCount, err := s.residenceRepo.WithContext(ctx).CountByOwner(userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
// Still need residence IDs for batch counting tasks/contractors/documents
|
||||
// One query — used both for the properties count (len) and as the IN-list
|
||||
// for the three downstream counts. Replaces the prior CountByOwner +
|
||||
// FindResidenceIDsByOwner pair, which queried residence_residence twice
|
||||
// with the same predicate.
|
||||
residenceIDs, err := s.residenceRepo.WithContext(ctx).FindResidenceIDsByOwner(userID)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
// Count tasks, contractors, and documents across all residences with single queries each
|
||||
tasksCount, err := s.taskRepo.WithContext(ctx).CountByResidenceIDs(residenceIDs)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
var (
|
||||
tasksCount int64
|
||||
contractorsCount int64
|
||||
documentsCount int64
|
||||
)
|
||||
|
||||
contractorsCount, err := s.contractorRepo.WithContext(ctx).CountByResidenceIDs(residenceIDs)
|
||||
if err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
documentsCount, err := s.documentRepo.WithContext(ctx).CountByResidenceIDs(residenceIDs)
|
||||
if err != nil {
|
||||
g, gCtx := errgroup.WithContext(ctx)
|
||||
g.Go(func() error {
|
||||
c, err := s.taskRepo.WithContext(gCtx).CountByResidenceIDs(residenceIDs)
|
||||
tasksCount = c
|
||||
return err
|
||||
})
|
||||
g.Go(func() error {
|
||||
c, err := s.contractorRepo.WithContext(gCtx).CountByResidenceIDs(residenceIDs)
|
||||
contractorsCount = c
|
||||
return err
|
||||
})
|
||||
g.Go(func() error {
|
||||
c, err := s.documentRepo.WithContext(gCtx).CountByResidenceIDs(residenceIDs)
|
||||
documentsCount = c
|
||||
return err
|
||||
})
|
||||
if err := g.Wait(); err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
return &UsageResponse{
|
||||
PropertiesCount: propertiesCount,
|
||||
PropertiesCount: int64(len(residenceIDs)),
|
||||
TasksCount: tasksCount,
|
||||
ContractorsCount: contractorsCount,
|
||||
DocumentsCount: documentsCount,
|
||||
@@ -416,6 +449,12 @@ func (s *SubscriptionService) ProcessApplePurchase(ctx context.Context, userID u
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
// Tier flipped — drop cached SubscriptionStatusResponse so the next call
|
||||
// returns Pro immediately instead of stale Free.
|
||||
if s.cache != nil {
|
||||
_ = s.cache.InvalidateSubscriptionStatusForUsers(ctx, userID)
|
||||
}
|
||||
|
||||
return s.GetSubscription(ctx, userID)
|
||||
}
|
||||
|
||||
@@ -473,6 +512,10 @@ func (s *SubscriptionService) ProcessGooglePurchase(ctx context.Context, userID
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
|
||||
if s.cache != nil {
|
||||
_ = s.cache.InvalidateSubscriptionStatusForUsers(ctx, userID)
|
||||
}
|
||||
|
||||
return s.GetSubscription(ctx, userID)
|
||||
}
|
||||
|
||||
@@ -481,6 +524,10 @@ func (s *SubscriptionService) CancelSubscription(ctx context.Context, userID uin
|
||||
if err := s.subscriptionRepo.WithContext(ctx).SetAutoRenew(userID, false); err != nil {
|
||||
return nil, apperrors.Internal(err)
|
||||
}
|
||||
// auto_renew flips a field surfaced in SubscriptionStatusResponse.
|
||||
if s.cache != nil {
|
||||
_ = s.cache.InvalidateSubscriptionStatusForUsers(ctx, userID)
|
||||
}
|
||||
return s.GetSubscription(ctx, userID)
|
||||
}
|
||||
|
||||
@@ -657,6 +704,31 @@ func NewUpgradeTriggerDataResponse(t *models.UpgradeTrigger) *UpgradeTriggerData
|
||||
}
|
||||
}
|
||||
|
||||
// invalidateSubStatusForResidence drops the per-user subscription_status cache
|
||||
// for every user with access to a residence (owner + members from
|
||||
// residence_residence_users). Used by every mutation that changes shared data
|
||||
// counts — tasks, contractors, documents — so members of a shared residence
|
||||
// don't see stale `usage` numbers.
|
||||
//
|
||||
// Best-effort: failures are logged but never returned. The 5-min cache TTL is
|
||||
// the safety net if this ever silently fails.
|
||||
func invalidateSubStatusForResidence(ctx context.Context, cache *CacheService, residenceRepo *repositories.ResidenceRepository, residenceID uint) {
|
||||
if cache == nil {
|
||||
return
|
||||
}
|
||||
userIDs, err := residenceRepo.FindUserIDsByResidence(residenceID)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Uint("residence_id", residenceID).Msg("sub_status invalidation: residence lookup failed")
|
||||
return
|
||||
}
|
||||
if len(userIDs) == 0 {
|
||||
return
|
||||
}
|
||||
if err := cache.InvalidateSubscriptionStatusForUsers(ctx, userIDs...); err != nil {
|
||||
log.Warn().Err(err).Uint("residence_id", residenceID).Msg("sub_status invalidation: redis delete failed")
|
||||
}
|
||||
}
|
||||
|
||||
// FeatureBenefitResponse represents a feature benefit
|
||||
type FeatureBenefitResponse struct {
|
||||
FeatureName string `json:"feature_name"`
|
||||
|
||||
Reference in New Issue
Block a user