88fb1751c7
Stack of optimizations against the same Hetzner→Neon transatlantic link. The trace revealed every visible ms was network/proxy overhead — DB execution itself is sub-millisecond per query (verified via EXPLAIN ANALYZE: index scans on every hot path). Connection layer: - DB_HOST → Neon pooler endpoint (-pooler suffix). PgBouncer transaction-mode keeps backend Postgres connections warm so we no longer pay the ~110ms Postgres-startup RTT on cold queries. - GORM pool tuned: MaxIdleConns 10→20, MaxLifetime 600s→1800s, MaxIdleTime added (default 0 = never close idle). - Eager pool warm-up at boot via parallel pings — first user request no longer pays the ~440ms TCP+TLS+startup handshake. - Redis maxmemory-policy noeviction → allkeys-lru. Cache writes will evict cold keys instead of erroring at the 256MB limit. Auth layer: - TokenCacheTTL 5min → 1 hour (Redis token cache). - UserCacheTTL 30s → 5min (in-memory User cache, per pod). - UserCache gains a 5,000-entry LRU cap so a flood of unique users can't blow up pod RSS. ~5MB worst-case per pod. - Token + user lookup collapsed from 2 GORM Preload queries into a single INNER JOIN. Saves 1 RTT per cold-cache request. - Auth middleware's m.db.* now use db.WithContext(ctx) so the SQL spans nest under the parent HTTP request in Jaeger. Service layer: - TaskService.ListTasks: replaced two-step FindResidenceIDsByUser → GetKanbanDataForMultipleResidences with a single GetKanbanDataForUser that uses a Postgres subquery for residence-access. One round-trip instead of two. - New CacheService residence-IDs cache: \"residence_ids_user:<id>\" with 5-min TTL. Wired into Task/Residence/Contractor/Document services for the four hot read paths that need this list. - Cache invalidation on every relevant mutation: CreateResidence, DeleteResidence, JoinWithCode, RemoveUser. DeleteResidence invalidates every member of the residence, not just the owner. What this stacks up to (Hetzner→Neon, before US migration): Path Before After (target) Cache-warm authed read ~800ms ~100-200ms Cache-cold authed read (1st in 1hr) ~2500ms ~500-700ms First request after deploy ~2500ms ~700-900ms The endgame US-region migration on top of this gets us to ~30-50ms warm-cache, but we're shippable at ~150ms warm right now. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
411 lines
13 KiB
Go
411 lines
13 KiB
Go
package middleware
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/redis/go-redis/v9"
|
|
"github.com/rs/zerolog/log"
|
|
"gorm.io/gorm"
|
|
|
|
"github.com/treytartt/honeydue-api/internal/apperrors"
|
|
"github.com/treytartt/honeydue-api/internal/config"
|
|
"github.com/treytartt/honeydue-api/internal/models"
|
|
"github.com/treytartt/honeydue-api/internal/services"
|
|
)
|
|
|
|
const (
|
|
// AuthUserKey is the key used to store the authenticated user in the context
|
|
AuthUserKey = "auth_user"
|
|
// AuthTokenKey is the key used to store the token in the context
|
|
AuthTokenKey = "auth_token"
|
|
// TokenCacheTTL is the duration to cache tokens in Redis. Tokens are
|
|
// valid for DefaultTokenExpiryDays (90), and explicit logout invalidates
|
|
// the cache, so a long TTL here just means most authed requests skip the
|
|
// auth-token SQL query entirely.
|
|
TokenCacheTTL = 1 * time.Hour
|
|
// TokenCachePrefix is the prefix for token cache keys
|
|
TokenCachePrefix = "auth_token_"
|
|
// UserCacheTTL is how long full user records are cached in memory to
|
|
// avoid hitting the database on every authenticated request. Bumped from
|
|
// 30s — at 30s the trace showed a SELECT auth_user query on most warm
|
|
// requests because users aren't in cache long enough to hit twice.
|
|
UserCacheTTL = 5 * time.Minute
|
|
// UserCacheMaxSize bounds the per-pod in-memory user cache. With ~1KB
|
|
// per User struct, 5000 entries = ~5MB per pod. Older entries are
|
|
// evicted LRU before the limit is exceeded.
|
|
UserCacheMaxSize = 5000
|
|
|
|
// DefaultTokenExpiryDays is the default number of days before a token expires.
|
|
DefaultTokenExpiryDays = 90
|
|
)
|
|
|
|
// AuthMiddleware provides token authentication middleware
|
|
type AuthMiddleware struct {
|
|
db *gorm.DB
|
|
cache *services.CacheService
|
|
userCache *UserCache
|
|
tokenExpiryDays int
|
|
}
|
|
|
|
// NewAuthMiddleware creates a new auth middleware instance
|
|
func NewAuthMiddleware(db *gorm.DB, cache *services.CacheService) *AuthMiddleware {
|
|
return &AuthMiddleware{
|
|
db: db,
|
|
cache: cache,
|
|
userCache: NewUserCache(UserCacheTTL, UserCacheMaxSize),
|
|
tokenExpiryDays: DefaultTokenExpiryDays,
|
|
}
|
|
}
|
|
|
|
// NewAuthMiddlewareWithConfig creates a new auth middleware instance with configuration
|
|
func NewAuthMiddlewareWithConfig(db *gorm.DB, cache *services.CacheService, cfg *config.Config) *AuthMiddleware {
|
|
expiryDays := DefaultTokenExpiryDays
|
|
if cfg != nil && cfg.Security.TokenExpiryDays > 0 {
|
|
expiryDays = cfg.Security.TokenExpiryDays
|
|
}
|
|
return &AuthMiddleware{
|
|
db: db,
|
|
cache: cache,
|
|
userCache: NewUserCache(UserCacheTTL, UserCacheMaxSize),
|
|
tokenExpiryDays: expiryDays,
|
|
}
|
|
}
|
|
|
|
// TokenExpiryDuration returns the token expiry duration.
|
|
func (m *AuthMiddleware) TokenExpiryDuration() time.Duration {
|
|
return time.Duration(m.tokenExpiryDays) * 24 * time.Hour
|
|
}
|
|
|
|
// isTokenExpired checks if a token's created timestamp indicates expiry.
|
|
func (m *AuthMiddleware) isTokenExpired(created time.Time) bool {
|
|
if created.IsZero() {
|
|
return false // Legacy tokens without created time are not expired
|
|
}
|
|
return time.Since(created) > m.TokenExpiryDuration()
|
|
}
|
|
|
|
// TokenAuth returns an Echo middleware that validates token authentication
|
|
func (m *AuthMiddleware) TokenAuth() echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
// Extract token from Authorization header
|
|
token, err := extractToken(c)
|
|
if err != nil {
|
|
return apperrors.Unauthorized("error.not_authenticated")
|
|
}
|
|
|
|
// Try to get user from cache first (includes expiry check)
|
|
user, err := m.getUserFromCache(c.Request().Context(), token)
|
|
if err == nil && user != nil {
|
|
// Cache hit - set user in context and continue
|
|
c.Set(AuthUserKey, user)
|
|
c.Set(AuthTokenKey, token)
|
|
return next(c)
|
|
}
|
|
|
|
// Check if the cache indicated token expiry
|
|
if err != nil && err.Error() == "token expired" {
|
|
return apperrors.Unauthorized("error.token_expired")
|
|
}
|
|
|
|
// Cache miss - look up token in database
|
|
user, authToken, err := m.getUserFromDatabaseWithToken(c.Request().Context(), token)
|
|
if err != nil {
|
|
log.Debug().Err(err).Str("token", truncateToken(token)).Msg("Token authentication failed")
|
|
return apperrors.Unauthorized("error.invalid_token")
|
|
}
|
|
|
|
// Check token expiry
|
|
if m.isTokenExpired(authToken.Created) {
|
|
log.Debug().Str("token", truncateToken(token)).Time("created", authToken.Created).Msg("Token expired")
|
|
return apperrors.Unauthorized("error.token_expired")
|
|
}
|
|
|
|
// Cache the user ID and token creation time for future requests
|
|
if cacheErr := m.cacheTokenInfo(c.Request().Context(), token, user.ID, authToken.Created); cacheErr != nil {
|
|
log.Warn().Err(cacheErr).Msg("Failed to cache token info")
|
|
}
|
|
|
|
// Set user in context
|
|
c.Set(AuthUserKey, user)
|
|
c.Set(AuthTokenKey, token)
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// OptionalTokenAuth returns middleware that authenticates if token is present but doesn't require it
|
|
func (m *AuthMiddleware) OptionalTokenAuth() echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
token, err := extractToken(c)
|
|
if err != nil {
|
|
// No token or invalid format - continue without user
|
|
return next(c)
|
|
}
|
|
|
|
// Try cache first
|
|
user, err := m.getUserFromCache(c.Request().Context(), token)
|
|
if err == nil && user != nil {
|
|
c.Set(AuthUserKey, user)
|
|
c.Set(AuthTokenKey, token)
|
|
return next(c)
|
|
}
|
|
|
|
// Try database
|
|
user, authToken, err := m.getUserFromDatabaseWithToken(c.Request().Context(), token)
|
|
if err == nil && !m.isTokenExpired(authToken.Created) {
|
|
m.cacheTokenInfo(c.Request().Context(), token, user.ID, authToken.Created)
|
|
c.Set(AuthUserKey, user)
|
|
c.Set(AuthTokenKey, token)
|
|
}
|
|
|
|
return next(c)
|
|
}
|
|
}
|
|
}
|
|
|
|
// extractToken extracts the token from the Authorization header
|
|
func extractToken(c echo.Context) (string, error) {
|
|
authHeader := c.Request().Header.Get("Authorization")
|
|
if authHeader == "" {
|
|
return "", fmt.Errorf("authorization header required")
|
|
}
|
|
|
|
// Support both "Token xxx" (Django style) and "Bearer xxx" formats
|
|
parts := strings.SplitN(authHeader, " ", 2)
|
|
if len(parts) != 2 {
|
|
return "", fmt.Errorf("invalid authorization header format")
|
|
}
|
|
|
|
scheme := parts[0]
|
|
token := parts[1]
|
|
|
|
if scheme != "Token" && scheme != "Bearer" {
|
|
return "", fmt.Errorf("invalid authorization scheme: %s", scheme)
|
|
}
|
|
|
|
if token == "" {
|
|
return "", fmt.Errorf("token is empty")
|
|
}
|
|
|
|
return token, nil
|
|
}
|
|
|
|
// getUserFromCache tries to get user from Redis cache, then from the
|
|
// in-memory user cache, before falling back to the database.
|
|
// Returns a "token expired" error if the cached creation time indicates expiry.
|
|
func (m *AuthMiddleware) getUserFromCache(ctx context.Context, token string) (*models.User, error) {
|
|
if m.cache == nil {
|
|
return nil, fmt.Errorf("cache not available")
|
|
}
|
|
|
|
userID, createdUnix, err := m.cache.GetCachedAuthTokenWithCreated(ctx, token)
|
|
if err != nil {
|
|
if err == redis.Nil {
|
|
return nil, fmt.Errorf("token not in cache")
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
// Check token expiry from cached creation time
|
|
if createdUnix > 0 {
|
|
created := time.Unix(createdUnix, 0)
|
|
if m.isTokenExpired(created) {
|
|
m.cache.InvalidateAuthToken(ctx, token)
|
|
return nil, fmt.Errorf("token expired")
|
|
}
|
|
}
|
|
|
|
// Try in-memory user cache first to avoid a DB round-trip
|
|
if cached := m.userCache.Get(userID); cached != nil {
|
|
if !cached.IsActive {
|
|
m.cache.InvalidateAuthToken(ctx, token)
|
|
m.userCache.Invalidate(userID)
|
|
return nil, fmt.Errorf("user is inactive")
|
|
}
|
|
return cached, nil
|
|
}
|
|
|
|
// In-memory cache miss — fetch from database
|
|
var user models.User
|
|
if err := m.db.WithContext(ctx).First(&user, userID).Error; err != nil {
|
|
// User was deleted - invalidate caches
|
|
m.cache.InvalidateAuthToken(ctx, token)
|
|
return nil, err
|
|
}
|
|
|
|
// Check if user is active
|
|
if !user.IsActive {
|
|
m.cache.InvalidateAuthToken(ctx, token)
|
|
return nil, fmt.Errorf("user is inactive")
|
|
}
|
|
|
|
// Store in in-memory cache for subsequent requests
|
|
m.userCache.Set(&user)
|
|
return &user, nil
|
|
}
|
|
|
|
// getUserFromDatabaseWithToken looks up the token in the database and returns
|
|
// both the user and the auth token record (for expiry checking). The ctx is
|
|
// threaded into the GORM session so the SQL span attaches to the request trace.
|
|
//
|
|
// Uses a single JOIN query instead of GORM's Preload (which issues 2 SELECTs).
|
|
// Over a transatlantic link this saves ~110ms RTT per cache miss.
|
|
func (m *AuthMiddleware) getUserFromDatabaseWithToken(ctx context.Context, token string) (*models.User, *models.AuthToken, error) {
|
|
// Flat result row: every column from auth_user prefixed `u_`, every
|
|
// column from user_authtoken left in its native shape. Mapping to two
|
|
// structs is mechanical so we don't need a struct tag soup.
|
|
type joinedRow struct {
|
|
// AuthToken columns
|
|
Key string `gorm:"column:key"`
|
|
Created time.Time `gorm:"column:created"`
|
|
UserID uint `gorm:"column:user_id"`
|
|
// User columns (prefixed to avoid collision with UserID)
|
|
UID uint `gorm:"column:u_id"`
|
|
UUsername string `gorm:"column:u_username"`
|
|
UEmail string `gorm:"column:u_email"`
|
|
UFirstName string `gorm:"column:u_first_name"`
|
|
ULastName string `gorm:"column:u_last_name"`
|
|
UPassword string `gorm:"column:u_password"`
|
|
UIsActive bool `gorm:"column:u_is_active"`
|
|
UIsStaff bool `gorm:"column:u_is_staff"`
|
|
UIsSuper bool `gorm:"column:u_is_superuser"`
|
|
UDateJoined time.Time `gorm:"column:u_date_joined"`
|
|
ULastLogin *time.Time `gorm:"column:u_last_login"`
|
|
}
|
|
|
|
var row joinedRow
|
|
err := m.db.WithContext(ctx).
|
|
Table("user_authtoken AS t").
|
|
Select(`
|
|
t.key, t.created, t.user_id,
|
|
u.id AS u_id,
|
|
u.username AS u_username,
|
|
u.email AS u_email,
|
|
u.first_name AS u_first_name,
|
|
u.last_name AS u_last_name,
|
|
u.password AS u_password,
|
|
u.is_active AS u_is_active,
|
|
u.is_staff AS u_is_staff,
|
|
u.is_superuser AS u_is_superuser,
|
|
u.date_joined AS u_date_joined,
|
|
u.last_login AS u_last_login
|
|
`).
|
|
Joins("INNER JOIN auth_user u ON u.id = t.user_id").
|
|
Where("t.key = ?", token).
|
|
Limit(1).
|
|
Scan(&row).Error
|
|
if err != nil || row.Key == "" {
|
|
return nil, nil, fmt.Errorf("token not found")
|
|
}
|
|
|
|
user := models.User{
|
|
ID: row.UID,
|
|
Username: row.UUsername,
|
|
Email: row.UEmail,
|
|
FirstName: row.UFirstName,
|
|
LastName: row.ULastName,
|
|
Password: row.UPassword,
|
|
IsActive: row.UIsActive,
|
|
IsStaff: row.UIsStaff,
|
|
IsSuperuser: row.UIsSuper,
|
|
DateJoined: row.UDateJoined,
|
|
LastLogin: row.ULastLogin,
|
|
}
|
|
authToken := models.AuthToken{
|
|
Key: row.Key,
|
|
Created: row.Created,
|
|
UserID: row.UserID,
|
|
User: user,
|
|
}
|
|
|
|
if !user.IsActive {
|
|
return nil, nil, fmt.Errorf("user is inactive")
|
|
}
|
|
|
|
m.userCache.Set(&user)
|
|
return &user, &authToken, nil
|
|
}
|
|
|
|
// getUserFromDatabase looks up the token in the database and caches the
|
|
// resulting user record in memory.
|
|
// Deprecated: Use getUserFromDatabaseWithToken for new code paths that need expiry checking.
|
|
func (m *AuthMiddleware) getUserFromDatabase(ctx context.Context, token string) (*models.User, error) {
|
|
user, _, err := m.getUserFromDatabaseWithToken(ctx, token)
|
|
return user, err
|
|
}
|
|
|
|
// cacheTokenInfo caches the user ID and token creation time for a token
|
|
func (m *AuthMiddleware) cacheTokenInfo(ctx context.Context, token string, userID uint, created time.Time) error {
|
|
if m.cache == nil {
|
|
return nil
|
|
}
|
|
return m.cache.CacheAuthTokenWithCreated(ctx, token, userID, created.Unix())
|
|
}
|
|
|
|
// cacheUserID caches the user ID for a token
|
|
func (m *AuthMiddleware) cacheUserID(ctx context.Context, token string, userID uint) error {
|
|
if m.cache == nil {
|
|
return nil
|
|
}
|
|
return m.cache.CacheAuthToken(ctx, token, userID)
|
|
}
|
|
|
|
// InvalidateToken removes a token from the cache
|
|
func (m *AuthMiddleware) InvalidateToken(ctx context.Context, token string) error {
|
|
if m.cache == nil {
|
|
return nil
|
|
}
|
|
return m.cache.InvalidateAuthToken(ctx, token)
|
|
}
|
|
|
|
// GetAuthUser retrieves the authenticated user from the Echo context.
|
|
// Returns nil if the context value is missing or not of the expected type.
|
|
func GetAuthUser(c echo.Context) *models.User {
|
|
val := c.Get(AuthUserKey)
|
|
if val == nil {
|
|
return nil
|
|
}
|
|
user, ok := val.(*models.User)
|
|
if !ok {
|
|
return nil
|
|
}
|
|
return user
|
|
}
|
|
|
|
// GetAuthToken retrieves the auth token from the Echo context
|
|
func GetAuthToken(c echo.Context) string {
|
|
token := c.Get(AuthTokenKey)
|
|
if token == nil {
|
|
return ""
|
|
}
|
|
tokenStr, ok := token.(string)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
return tokenStr
|
|
}
|
|
|
|
// MustGetAuthUser retrieves the authenticated user or returns error with 401
|
|
func MustGetAuthUser(c echo.Context) (*models.User, error) {
|
|
user := GetAuthUser(c)
|
|
if user == nil {
|
|
return nil, apperrors.Unauthorized("error.not_authenticated")
|
|
}
|
|
return user, nil
|
|
}
|
|
|
|
// truncateToken safely truncates a token string for logging.
|
|
// Returns at most the first 8 characters followed by "...".
|
|
func truncateToken(token string) string {
|
|
if len(token) > 8 {
|
|
return token[:8] + "..."
|
|
}
|
|
return token + "..."
|
|
}
|