diff --git a/internal/middleware/kratos_auth.go b/internal/middleware/kratos_auth.go index f7ae066..53d3ca1 100644 --- a/internal/middleware/kratos_auth.go +++ b/internal/middleware/kratos_auth.go @@ -33,7 +33,14 @@ const ( // kratosSessionCacheTTL is how long a validated session is cached in // Redis, so most authed requests skip the Kratos /whoami round trip. - kratosSessionCacheTTL = 5 * time.Minute + // + // PRODUCTION CAVEAT (2026-06-03): until Kratos is deployed in-cluster, + // the Whoami fallback ALWAYS fails (no kratos Service). That means every + // cache miss = 401 = forced re-login. We mitigate by (a) using a long + // TTL and (b) refreshing the TTL on every cache hit (see resolve()). + // This is a short-term workaround — restore to a few minutes once Kratos + // is live and the runbook §11 #7 prerequisites are done. + kratosSessionCacheTTL = 24 * time.Hour kratosSessionPrefix = "kratos_sess:" ) @@ -123,6 +130,12 @@ func (m *KratosAuth) resolve(c echo.Context) (*models.User, bool, string, error) if m.cache != nil { if v, err := m.cache.GetString(ctx, cacheKey); err == nil && v != "" { if user, verified, ok := m.userFromCacheValue(ctx, v); ok { + // Sliding-window refresh: extend the TTL on every successful + // hit so active users don't get bounced when their original + // cache entry would have otherwise expired. Best-effort — + // failure to refresh just means the entry expires on the + // original schedule. + _ = m.cache.SetString(ctx, cacheKey, v, kratosSessionCacheTTL) return user, verified, cred, nil } }