Files
honeyDueAPI/internal/middleware/auth.go
T
Trey t 88fb1751c7
Backend CI / Test (push) Has been cancelled
Backend CI / Contract Tests (push) Has been cancelled
Backend CI / Build (push) Has been cancelled
Backend CI / Lint (push) Has been cancelled
Backend CI / Secret Scanning (push) Has been cancelled
Cut /api/tasks/ p99 from ~2500ms toward ~150-300ms
Stack of optimizations against the same Hetzner→Neon transatlantic link.
The trace revealed every visible ms was network/proxy overhead — DB
execution itself is sub-millisecond per query (verified via EXPLAIN
ANALYZE: index scans on every hot path).

Connection layer:
- DB_HOST → Neon pooler endpoint (-pooler suffix). PgBouncer
  transaction-mode keeps backend Postgres connections warm so we no
  longer pay the ~110ms Postgres-startup RTT on cold queries.
- GORM pool tuned: MaxIdleConns 10→20, MaxLifetime 600s→1800s,
  MaxIdleTime added (default 0 = never close idle).
- Eager pool warm-up at boot via parallel pings — first user request
  no longer pays the ~440ms TCP+TLS+startup handshake.
- Redis maxmemory-policy noeviction → allkeys-lru. Cache writes will
  evict cold keys instead of erroring at the 256MB limit.

Auth layer:
- TokenCacheTTL 5min → 1 hour (Redis token cache).
- UserCacheTTL 30s → 5min (in-memory User cache, per pod).
- UserCache gains a 5,000-entry LRU cap so a flood of unique users
  can't blow up pod RSS. ~5MB worst-case per pod.
- Token + user lookup collapsed from 2 GORM Preload queries into a
  single INNER JOIN. Saves 1 RTT per cold-cache request.
- Auth middleware's m.db.* now use db.WithContext(ctx) so the SQL
  spans nest under the parent HTTP request in Jaeger.

Service layer:
- TaskService.ListTasks: replaced two-step
  FindResidenceIDsByUser → GetKanbanDataForMultipleResidences
  with a single GetKanbanDataForUser that uses a Postgres subquery
  for residence-access. One round-trip instead of two.
- New CacheService residence-IDs cache: \"residence_ids_user:<id>\"
  with 5-min TTL. Wired into Task/Residence/Contractor/Document
  services for the four hot read paths that need this list.
- Cache invalidation on every relevant mutation: CreateResidence,
  DeleteResidence, JoinWithCode, RemoveUser. DeleteResidence
  invalidates every member of the residence, not just the owner.

What this stacks up to (Hetzner→Neon, before US migration):
  Path                                 Before        After (target)
  Cache-warm authed read               ~800ms        ~100-200ms
  Cache-cold authed read (1st in 1hr)  ~2500ms       ~500-700ms
  First request after deploy           ~2500ms       ~700-900ms

The endgame US-region migration on top of this gets us to ~30-50ms
warm-cache, but we're shippable at ~150ms warm right now.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 17:13:50 -05:00

411 lines
13 KiB
Go

package middleware
import (
"context"
"fmt"
"strings"
"time"
"github.com/labstack/echo/v4"
"github.com/redis/go-redis/v9"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"github.com/treytartt/honeydue-api/internal/apperrors"
"github.com/treytartt/honeydue-api/internal/config"
"github.com/treytartt/honeydue-api/internal/models"
"github.com/treytartt/honeydue-api/internal/services"
)
const (
// AuthUserKey is the key used to store the authenticated user in the context
AuthUserKey = "auth_user"
// AuthTokenKey is the key used to store the token in the context
AuthTokenKey = "auth_token"
// TokenCacheTTL is the duration to cache tokens in Redis. Tokens are
// valid for DefaultTokenExpiryDays (90), and explicit logout invalidates
// the cache, so a long TTL here just means most authed requests skip the
// auth-token SQL query entirely.
TokenCacheTTL = 1 * time.Hour
// TokenCachePrefix is the prefix for token cache keys
TokenCachePrefix = "auth_token_"
// UserCacheTTL is how long full user records are cached in memory to
// avoid hitting the database on every authenticated request. Bumped from
// 30s — at 30s the trace showed a SELECT auth_user query on most warm
// requests because users aren't in cache long enough to hit twice.
UserCacheTTL = 5 * time.Minute
// UserCacheMaxSize bounds the per-pod in-memory user cache. With ~1KB
// per User struct, 5000 entries = ~5MB per pod. Older entries are
// evicted LRU before the limit is exceeded.
UserCacheMaxSize = 5000
// DefaultTokenExpiryDays is the default number of days before a token expires.
DefaultTokenExpiryDays = 90
)
// AuthMiddleware provides token authentication middleware
type AuthMiddleware struct {
db *gorm.DB
cache *services.CacheService
userCache *UserCache
tokenExpiryDays int
}
// NewAuthMiddleware creates a new auth middleware instance
func NewAuthMiddleware(db *gorm.DB, cache *services.CacheService) *AuthMiddleware {
return &AuthMiddleware{
db: db,
cache: cache,
userCache: NewUserCache(UserCacheTTL, UserCacheMaxSize),
tokenExpiryDays: DefaultTokenExpiryDays,
}
}
// NewAuthMiddlewareWithConfig creates a new auth middleware instance with configuration
func NewAuthMiddlewareWithConfig(db *gorm.DB, cache *services.CacheService, cfg *config.Config) *AuthMiddleware {
expiryDays := DefaultTokenExpiryDays
if cfg != nil && cfg.Security.TokenExpiryDays > 0 {
expiryDays = cfg.Security.TokenExpiryDays
}
return &AuthMiddleware{
db: db,
cache: cache,
userCache: NewUserCache(UserCacheTTL, UserCacheMaxSize),
tokenExpiryDays: expiryDays,
}
}
// TokenExpiryDuration returns the token expiry duration.
func (m *AuthMiddleware) TokenExpiryDuration() time.Duration {
return time.Duration(m.tokenExpiryDays) * 24 * time.Hour
}
// isTokenExpired checks if a token's created timestamp indicates expiry.
func (m *AuthMiddleware) isTokenExpired(created time.Time) bool {
if created.IsZero() {
return false // Legacy tokens without created time are not expired
}
return time.Since(created) > m.TokenExpiryDuration()
}
// TokenAuth returns an Echo middleware that validates token authentication
func (m *AuthMiddleware) TokenAuth() echo.MiddlewareFunc {
return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
// Extract token from Authorization header
token, err := extractToken(c)
if err != nil {
return apperrors.Unauthorized("error.not_authenticated")
}
// Try to get user from cache first (includes expiry check)
user, err := m.getUserFromCache(c.Request().Context(), token)
if err == nil && user != nil {
// Cache hit - set user in context and continue
c.Set(AuthUserKey, user)
c.Set(AuthTokenKey, token)
return next(c)
}
// Check if the cache indicated token expiry
if err != nil && err.Error() == "token expired" {
return apperrors.Unauthorized("error.token_expired")
}
// Cache miss - look up token in database
user, authToken, err := m.getUserFromDatabaseWithToken(c.Request().Context(), token)
if err != nil {
log.Debug().Err(err).Str("token", truncateToken(token)).Msg("Token authentication failed")
return apperrors.Unauthorized("error.invalid_token")
}
// Check token expiry
if m.isTokenExpired(authToken.Created) {
log.Debug().Str("token", truncateToken(token)).Time("created", authToken.Created).Msg("Token expired")
return apperrors.Unauthorized("error.token_expired")
}
// Cache the user ID and token creation time for future requests
if cacheErr := m.cacheTokenInfo(c.Request().Context(), token, user.ID, authToken.Created); cacheErr != nil {
log.Warn().Err(cacheErr).Msg("Failed to cache token info")
}
// Set user in context
c.Set(AuthUserKey, user)
c.Set(AuthTokenKey, token)
return next(c)
}
}
}
// OptionalTokenAuth returns middleware that authenticates if token is present but doesn't require it
func (m *AuthMiddleware) OptionalTokenAuth() echo.MiddlewareFunc {
return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
token, err := extractToken(c)
if err != nil {
// No token or invalid format - continue without user
return next(c)
}
// Try cache first
user, err := m.getUserFromCache(c.Request().Context(), token)
if err == nil && user != nil {
c.Set(AuthUserKey, user)
c.Set(AuthTokenKey, token)
return next(c)
}
// Try database
user, authToken, err := m.getUserFromDatabaseWithToken(c.Request().Context(), token)
if err == nil && !m.isTokenExpired(authToken.Created) {
m.cacheTokenInfo(c.Request().Context(), token, user.ID, authToken.Created)
c.Set(AuthUserKey, user)
c.Set(AuthTokenKey, token)
}
return next(c)
}
}
}
// extractToken extracts the token from the Authorization header
func extractToken(c echo.Context) (string, error) {
authHeader := c.Request().Header.Get("Authorization")
if authHeader == "" {
return "", fmt.Errorf("authorization header required")
}
// Support both "Token xxx" (Django style) and "Bearer xxx" formats
parts := strings.SplitN(authHeader, " ", 2)
if len(parts) != 2 {
return "", fmt.Errorf("invalid authorization header format")
}
scheme := parts[0]
token := parts[1]
if scheme != "Token" && scheme != "Bearer" {
return "", fmt.Errorf("invalid authorization scheme: %s", scheme)
}
if token == "" {
return "", fmt.Errorf("token is empty")
}
return token, nil
}
// getUserFromCache tries to get user from Redis cache, then from the
// in-memory user cache, before falling back to the database.
// Returns a "token expired" error if the cached creation time indicates expiry.
func (m *AuthMiddleware) getUserFromCache(ctx context.Context, token string) (*models.User, error) {
if m.cache == nil {
return nil, fmt.Errorf("cache not available")
}
userID, createdUnix, err := m.cache.GetCachedAuthTokenWithCreated(ctx, token)
if err != nil {
if err == redis.Nil {
return nil, fmt.Errorf("token not in cache")
}
return nil, err
}
// Check token expiry from cached creation time
if createdUnix > 0 {
created := time.Unix(createdUnix, 0)
if m.isTokenExpired(created) {
m.cache.InvalidateAuthToken(ctx, token)
return nil, fmt.Errorf("token expired")
}
}
// Try in-memory user cache first to avoid a DB round-trip
if cached := m.userCache.Get(userID); cached != nil {
if !cached.IsActive {
m.cache.InvalidateAuthToken(ctx, token)
m.userCache.Invalidate(userID)
return nil, fmt.Errorf("user is inactive")
}
return cached, nil
}
// In-memory cache miss — fetch from database
var user models.User
if err := m.db.WithContext(ctx).First(&user, userID).Error; err != nil {
// User was deleted - invalidate caches
m.cache.InvalidateAuthToken(ctx, token)
return nil, err
}
// Check if user is active
if !user.IsActive {
m.cache.InvalidateAuthToken(ctx, token)
return nil, fmt.Errorf("user is inactive")
}
// Store in in-memory cache for subsequent requests
m.userCache.Set(&user)
return &user, nil
}
// getUserFromDatabaseWithToken looks up the token in the database and returns
// both the user and the auth token record (for expiry checking). The ctx is
// threaded into the GORM session so the SQL span attaches to the request trace.
//
// Uses a single JOIN query instead of GORM's Preload (which issues 2 SELECTs).
// Over a transatlantic link this saves ~110ms RTT per cache miss.
func (m *AuthMiddleware) getUserFromDatabaseWithToken(ctx context.Context, token string) (*models.User, *models.AuthToken, error) {
// Flat result row: every column from auth_user prefixed `u_`, every
// column from user_authtoken left in its native shape. Mapping to two
// structs is mechanical so we don't need a struct tag soup.
type joinedRow struct {
// AuthToken columns
Key string `gorm:"column:key"`
Created time.Time `gorm:"column:created"`
UserID uint `gorm:"column:user_id"`
// User columns (prefixed to avoid collision with UserID)
UID uint `gorm:"column:u_id"`
UUsername string `gorm:"column:u_username"`
UEmail string `gorm:"column:u_email"`
UFirstName string `gorm:"column:u_first_name"`
ULastName string `gorm:"column:u_last_name"`
UPassword string `gorm:"column:u_password"`
UIsActive bool `gorm:"column:u_is_active"`
UIsStaff bool `gorm:"column:u_is_staff"`
UIsSuper bool `gorm:"column:u_is_superuser"`
UDateJoined time.Time `gorm:"column:u_date_joined"`
ULastLogin *time.Time `gorm:"column:u_last_login"`
}
var row joinedRow
err := m.db.WithContext(ctx).
Table("user_authtoken AS t").
Select(`
t.key, t.created, t.user_id,
u.id AS u_id,
u.username AS u_username,
u.email AS u_email,
u.first_name AS u_first_name,
u.last_name AS u_last_name,
u.password AS u_password,
u.is_active AS u_is_active,
u.is_staff AS u_is_staff,
u.is_superuser AS u_is_superuser,
u.date_joined AS u_date_joined,
u.last_login AS u_last_login
`).
Joins("INNER JOIN auth_user u ON u.id = t.user_id").
Where("t.key = ?", token).
Limit(1).
Scan(&row).Error
if err != nil || row.Key == "" {
return nil, nil, fmt.Errorf("token not found")
}
user := models.User{
ID: row.UID,
Username: row.UUsername,
Email: row.UEmail,
FirstName: row.UFirstName,
LastName: row.ULastName,
Password: row.UPassword,
IsActive: row.UIsActive,
IsStaff: row.UIsStaff,
IsSuperuser: row.UIsSuper,
DateJoined: row.UDateJoined,
LastLogin: row.ULastLogin,
}
authToken := models.AuthToken{
Key: row.Key,
Created: row.Created,
UserID: row.UserID,
User: user,
}
if !user.IsActive {
return nil, nil, fmt.Errorf("user is inactive")
}
m.userCache.Set(&user)
return &user, &authToken, nil
}
// getUserFromDatabase looks up the token in the database and caches the
// resulting user record in memory.
// Deprecated: Use getUserFromDatabaseWithToken for new code paths that need expiry checking.
func (m *AuthMiddleware) getUserFromDatabase(ctx context.Context, token string) (*models.User, error) {
user, _, err := m.getUserFromDatabaseWithToken(ctx, token)
return user, err
}
// cacheTokenInfo caches the user ID and token creation time for a token
func (m *AuthMiddleware) cacheTokenInfo(ctx context.Context, token string, userID uint, created time.Time) error {
if m.cache == nil {
return nil
}
return m.cache.CacheAuthTokenWithCreated(ctx, token, userID, created.Unix())
}
// cacheUserID caches the user ID for a token
func (m *AuthMiddleware) cacheUserID(ctx context.Context, token string, userID uint) error {
if m.cache == nil {
return nil
}
return m.cache.CacheAuthToken(ctx, token, userID)
}
// InvalidateToken removes a token from the cache
func (m *AuthMiddleware) InvalidateToken(ctx context.Context, token string) error {
if m.cache == nil {
return nil
}
return m.cache.InvalidateAuthToken(ctx, token)
}
// GetAuthUser retrieves the authenticated user from the Echo context.
// Returns nil if the context value is missing or not of the expected type.
func GetAuthUser(c echo.Context) *models.User {
val := c.Get(AuthUserKey)
if val == nil {
return nil
}
user, ok := val.(*models.User)
if !ok {
return nil
}
return user
}
// GetAuthToken retrieves the auth token from the Echo context
func GetAuthToken(c echo.Context) string {
token := c.Get(AuthTokenKey)
if token == nil {
return ""
}
tokenStr, ok := token.(string)
if !ok {
return ""
}
return tokenStr
}
// MustGetAuthUser retrieves the authenticated user or returns error with 401
func MustGetAuthUser(c echo.Context) (*models.User, error) {
user := GetAuthUser(c)
if user == nil {
return nil, apperrors.Unauthorized("error.not_authenticated")
}
return user, nil
}
// truncateToken safely truncates a token string for logging.
// Returns at most the first 8 characters followed by "...".
func truncateToken(token string) string {
if len(token) > 8 {
return token[:8] + "..."
}
return token + "..."
}