64c656bde1
Production is running with no Kratos deployed in-cluster (the deploy script's kratos-secrets prerequisite isn't satisfied yet — see runbook §11 #7). That means Whoami calls ALWAYS fail, so any time a user's Redis session cache expires they get a 401, which the iOS app treats as session invalid → forced re-login → can't re-authenticate because the same Whoami is the only way back in. Two-part mitigation: 1. Bump kratosSessionCacheTTL from 5 minutes to 24 hours. Active users stay logged in indefinitely; idle users get bounced after a day. 2. Refresh the cache TTL on every successful cache hit (sliding window) so usage-driven expiry is no longer a cliff at the original TTL. When Kratos actually comes up: - revert the TTL constant to a sensible value (1-15 min) - the sliding-window refresh is fine to keep; it's good UX regardless Caveat: this papers over the missing Kratos. New sign-ins still cannot complete because the api needs Kratos to populate the cache the first time. Real fix is to deploy Kratos. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
285 lines
8.8 KiB
Go
285 lines
8.8 KiB
Go
package middleware
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/rs/zerolog/log"
|
|
"gorm.io/gorm"
|
|
|
|
"github.com/treytartt/honeydue-api/internal/apperrors"
|
|
"github.com/treytartt/honeydue-api/internal/kratos"
|
|
"github.com/treytartt/honeydue-api/internal/models"
|
|
"github.com/treytartt/honeydue-api/internal/services"
|
|
)
|
|
|
|
const (
|
|
// AuthUserKey stores the authenticated *models.User in the echo context.
|
|
AuthUserKey = "auth_user"
|
|
// AuthTokenKey stores the raw session credential in the echo context.
|
|
AuthTokenKey = "auth_token"
|
|
// authVerifiedKey stores the Kratos email-verified flag in the context.
|
|
authVerifiedKey = "auth_email_verified"
|
|
|
|
// UserCacheTTL / UserCacheMaxSize bound the in-memory local-user cache.
|
|
UserCacheTTL = 5 * time.Minute
|
|
UserCacheMaxSize = 5000
|
|
|
|
// kratosSessionCacheTTL is how long a validated session is cached in
|
|
// Redis, so most authed requests skip the Kratos /whoami round trip.
|
|
//
|
|
// PRODUCTION CAVEAT (2026-06-03): until Kratos is deployed in-cluster,
|
|
// the Whoami fallback ALWAYS fails (no kratos Service). That means every
|
|
// cache miss = 401 = forced re-login. We mitigate by (a) using a long
|
|
// TTL and (b) refreshing the TTL on every cache hit (see resolve()).
|
|
// This is a short-term workaround — restore to a few minutes once Kratos
|
|
// is live and the runbook §11 #7 prerequisites are done.
|
|
kratosSessionCacheTTL = 24 * time.Hour
|
|
kratosSessionPrefix = "kratos_sess:"
|
|
)
|
|
|
|
// KratosAuth authenticates requests against an Ory Kratos session. It
|
|
// replaces the hand-rolled token auth: the session is validated via Kratos
|
|
// /sessions/whoami (Redis-cached), and the matching local auth_user row is
|
|
// lazily provisioned on first sight of a Kratos identity.
|
|
type KratosAuth struct {
|
|
kratos *kratos.Client
|
|
cache *services.CacheService
|
|
db *gorm.DB
|
|
userCache *UserCache
|
|
}
|
|
|
|
// NewKratosAuth builds the Kratos auth middleware.
|
|
func NewKratosAuth(k *kratos.Client, cache *services.CacheService, db *gorm.DB) *KratosAuth {
|
|
return &KratosAuth{
|
|
kratos: k,
|
|
cache: cache,
|
|
db: db,
|
|
userCache: NewUserCache(UserCacheTTL, UserCacheMaxSize),
|
|
}
|
|
}
|
|
|
|
// Authenticate validates the Kratos session and requires it.
|
|
func (m *KratosAuth) Authenticate() echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
user, verified, cred, err := m.resolve(c)
|
|
if err != nil {
|
|
log.Debug().Err(err).Msg("Kratos authentication failed")
|
|
return apperrors.Unauthorized("error.not_authenticated")
|
|
}
|
|
c.Set(AuthUserKey, user)
|
|
c.Set(AuthTokenKey, cred)
|
|
c.Set(authVerifiedKey, verified)
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// OptionalAuthenticate authenticates if a session is present, else continues
|
|
// unauthenticated.
|
|
func (m *KratosAuth) OptionalAuthenticate() echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
if user, verified, cred, err := m.resolve(c); err == nil {
|
|
c.Set(AuthUserKey, user)
|
|
c.Set(AuthTokenKey, cred)
|
|
c.Set(authVerifiedKey, verified)
|
|
}
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// RequireVerified rejects users whose Kratos email address is not verified.
|
|
// Apply after Authenticate.
|
|
func (m *KratosAuth) RequireVerified() echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
if GetAuthUser(c) == nil {
|
|
return apperrors.Unauthorized("error.not_authenticated")
|
|
}
|
|
if verified, _ := c.Get(authVerifiedKey).(bool); !verified {
|
|
return apperrors.Forbidden("error.email_not_verified")
|
|
}
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// resolve validates the request's session and returns the local user.
|
|
func (m *KratosAuth) resolve(c echo.Context) (*models.User, bool, string, error) {
|
|
token, cookie := extractSession(c)
|
|
if token == "" && cookie == "" {
|
|
return nil, false, "", errors.New("no session credential")
|
|
}
|
|
cred := token
|
|
if cred == "" {
|
|
cred = cookie
|
|
}
|
|
ctx := c.Request().Context()
|
|
|
|
// Redis cache: kratos_sess:<hash(cred)> -> "<userID>|<0|1>"
|
|
cacheKey := kratosSessionPrefix + hashCredential(cred)
|
|
if m.cache != nil {
|
|
if v, err := m.cache.GetString(ctx, cacheKey); err == nil && v != "" {
|
|
if user, verified, ok := m.userFromCacheValue(ctx, v); ok {
|
|
// Sliding-window refresh: extend the TTL on every successful
|
|
// hit so active users don't get bounced when their original
|
|
// cache entry would have otherwise expired. Best-effort —
|
|
// failure to refresh just means the entry expires on the
|
|
// original schedule.
|
|
_ = m.cache.SetString(ctx, cacheKey, v, kratosSessionCacheTTL)
|
|
return user, verified, cred, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
sess, err := m.kratos.Whoami(ctx, token, cookie)
|
|
if err != nil {
|
|
return nil, false, "", err
|
|
}
|
|
user, err := m.provision(ctx, sess)
|
|
if err != nil {
|
|
return nil, false, "", err
|
|
}
|
|
if m.cache != nil {
|
|
_ = m.cache.SetString(ctx, cacheKey,
|
|
fmt.Sprintf("%d|%s", user.ID, boolDigit(sess.EmailVerified())), kratosSessionCacheTTL)
|
|
}
|
|
return user, sess.EmailVerified(), cred, nil
|
|
}
|
|
|
|
// provision finds the local auth_user row for a Kratos identity, creating it
|
|
// (and a UserProfile) on first sight. Concurrent first requests are handled
|
|
// by re-reading after a unique-constraint conflict.
|
|
func (m *KratosAuth) provision(ctx context.Context, sess *kratos.Session) (*models.User, error) {
|
|
var user models.User
|
|
err := m.db.WithContext(ctx).Where("kratos_id = ?", sess.Identity.ID).First(&user).Error
|
|
if err == nil {
|
|
return &user, nil
|
|
}
|
|
if !errors.Is(err, gorm.ErrRecordNotFound) {
|
|
return nil, err
|
|
}
|
|
|
|
user = models.User{
|
|
KratosID: sess.Identity.ID,
|
|
Email: sess.Identity.Traits.Email,
|
|
Username: sess.Identity.Traits.Email,
|
|
FirstName: sess.Identity.Traits.Name.First,
|
|
LastName: sess.Identity.Traits.Name.Last,
|
|
IsActive: true,
|
|
DateJoined: time.Now().UTC(),
|
|
}
|
|
txErr := m.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
|
|
if err := tx.Create(&user).Error; err != nil {
|
|
return err
|
|
}
|
|
return tx.Create(&models.UserProfile{
|
|
UserID: user.ID,
|
|
Verified: sess.EmailVerified(),
|
|
}).Error
|
|
})
|
|
if txErr != nil {
|
|
// Likely a concurrent provision of the same identity — re-read.
|
|
if e := m.db.WithContext(ctx).Where("kratos_id = ?", sess.Identity.ID).First(&user).Error; e == nil {
|
|
return &user, nil
|
|
}
|
|
return nil, txErr
|
|
}
|
|
log.Info().Str("kratos_id", sess.Identity.ID).Uint("user_id", user.ID).
|
|
Msg("provisioned local user from Kratos identity")
|
|
return &user, nil
|
|
}
|
|
|
|
// userFromCacheValue resolves a cached "<userID>|<0|1>" value to a user.
|
|
func (m *KratosAuth) userFromCacheValue(ctx context.Context, v string) (*models.User, bool, bool) {
|
|
parts := strings.SplitN(v, "|", 2)
|
|
if len(parts) != 2 {
|
|
return nil, false, false
|
|
}
|
|
var id uint
|
|
if _, err := fmt.Sscanf(parts[0], "%d", &id); err != nil || id == 0 {
|
|
return nil, false, false
|
|
}
|
|
verified := parts[1] == "1"
|
|
if cached := m.userCache.Get(id); cached != nil {
|
|
return cached, verified, true
|
|
}
|
|
var user models.User
|
|
if err := m.db.WithContext(ctx).First(&user, id).Error; err != nil {
|
|
return nil, false, false
|
|
}
|
|
m.userCache.Set(&user)
|
|
return &user, verified, true
|
|
}
|
|
|
|
// extractSession pulls the session credential from the request: the
|
|
// X-Session-Token header or Authorization bearer (mobile clients), or the
|
|
// ory_kratos_session cookie (web).
|
|
func extractSession(c echo.Context) (token, cookie string) {
|
|
if t := c.Request().Header.Get("X-Session-Token"); t != "" {
|
|
token = t
|
|
} else if ah := c.Request().Header.Get("Authorization"); ah != "" {
|
|
parts := strings.SplitN(ah, " ", 2)
|
|
if len(parts) == 2 && (parts[0] == "Bearer" || parts[0] == "Token") {
|
|
token = parts[1]
|
|
}
|
|
}
|
|
if token == "" {
|
|
if ck := c.Request().Header.Get("Cookie"); strings.Contains(ck, "ory_kratos_session") {
|
|
cookie = ck
|
|
}
|
|
}
|
|
return token, cookie
|
|
}
|
|
|
|
func hashCredential(cred string) string {
|
|
sum := sha256.Sum256([]byte(cred))
|
|
return hex.EncodeToString(sum[:])
|
|
}
|
|
|
|
func boolDigit(b bool) string {
|
|
if b {
|
|
return "1"
|
|
}
|
|
return "0"
|
|
}
|
|
|
|
// truncateToken returns the first 8 characters of a credential followed by
|
|
// "..." for safe inclusion in log lines.
|
|
func truncateToken(tok string) string {
|
|
if len(tok) <= 8 {
|
|
return tok + "..."
|
|
}
|
|
return tok[:8] + "..."
|
|
}
|
|
|
|
// GetAuthUser retrieves the authenticated user from the echo context.
|
|
func GetAuthUser(c echo.Context) *models.User {
|
|
user, _ := c.Get(AuthUserKey).(*models.User)
|
|
return user
|
|
}
|
|
|
|
// GetAuthToken retrieves the session credential from the echo context.
|
|
func GetAuthToken(c echo.Context) string {
|
|
tok, _ := c.Get(AuthTokenKey).(string)
|
|
return tok
|
|
}
|
|
|
|
// MustGetAuthUser retrieves the authenticated user or returns a 401 error.
|
|
func MustGetAuthUser(c echo.Context) (*models.User, error) {
|
|
user := GetAuthUser(c)
|
|
if user == nil {
|
|
return nil, apperrors.Unauthorized("error.not_authenticated")
|
|
}
|
|
return user, nil
|
|
}
|