Revert "Fix migration deadlock under Neon pooler"

This reverts commit 30966c6f5e.
This commit is contained in:
Trey t
2026-04-26 22:22:07 -05:00
parent 4049b704c3
commit d96f317d20
+9 -83
View File
@@ -3,7 +3,6 @@ package database
import ( import (
"context" "context"
"fmt" "fmt"
"strings"
"time" "time"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
@@ -121,48 +120,6 @@ func Connect(cfg *config.DatabaseConfig, debug bool) (*gorm.DB, error) {
return db, nil return db, nil
} }
// openMigrationDB opens a parallel *gorm.DB pointed at the *direct* (non-pooler)
// Neon compute endpoint, used exclusively by MigrateWithLock so that
// pg_advisory_lock (session-scoped) survives across statements.
//
// The direct host is derived by stripping the "-pooler" segment from the
// configured DB_HOST. Example:
// pooler → ep-floral-truth-amttbc5a-pooler.c-5.us-east-1.aws.neon.tech
// direct → ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech
//
// If DB_HOST already lacks the suffix (e.g., a fallback config or a non-Neon
// PG instance), it's used as-is.
func openMigrationDB() (*gorm.DB, error) {
host := viper.GetString("DB_HOST")
port := viper.GetInt("DB_PORT")
user := viper.GetString("POSTGRES_USER")
pass := viper.GetString("POSTGRES_PASSWORD")
name := viper.GetString("POSTGRES_DB")
sslmode := viper.GetString("DB_SSLMODE")
directHost := host
if idx := strings.Index(host, "-pooler."); idx > 0 {
directHost = host[:idx] + host[idx+len("-pooler"):]
}
dsn := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=%s",
directHost, port, user, pass, name, sslmode)
migrationDB, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
})
if err != nil {
return nil, err
}
if sqlDB, _ := migrationDB.DB(); sqlDB != nil {
// Single connection is enough — we hold the advisory lock on it.
sqlDB.SetMaxOpenConns(1)
sqlDB.SetMaxIdleConns(1)
}
log.Info().Str("direct_host", directHost).Msg("Opened direct DB connection for migration")
return migrationDB, nil
}
// warmUpPool issues N parallel pings so the pool fills with established // warmUpPool issues N parallel pings so the pool fills with established
// connections before the first user request lands. Failures are logged but // connections before the first user request lands. Failures are logged but
// not fatal — the pool will fill on demand under traffic if pre-warm fails. // not fatal — the pool will fill on demand under traffic if pre-warm fails.
@@ -243,37 +200,18 @@ func MigrateWithLock() error {
return Migrate() return Migrate()
} }
sqlDB, err := db.DB()
if err != nil {
return fmt.Errorf("get underlying sql.DB: %w", err)
}
// Give ourselves up to 5 min to acquire the lock — long enough for a // Give ourselves up to 5 min to acquire the lock — long enough for a
// slow migration on a peer replica, short enough to fail fast if Postgres // slow migration on a peer replica, short enough to fail fast if Postgres
// is hung. // is hung.
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel() defer cancel()
// CRITICAL: pg_advisory_lock is *session-scoped*. The runtime DB pool conn, err := sqlDB.Conn(ctx)
// connects to Neon's pooler endpoint (PgBouncer in transaction mode),
// which releases the underlying Postgres session after each transaction —
// which would silently release our lock the moment we acquired it.
//
// So for migrations only, we open a parallel *gorm.DB pointed at the
// *direct* compute endpoint (DB_HOST with the "-pooler" segment stripped).
// AutoMigrate runs against this direct connection, holding the lock the
// whole time. Closed on return.
migrationDB, err := openMigrationDB()
if err != nil {
return fmt.Errorf("open direct migration connection: %w", err)
}
defer func() {
if sqlDB, _ := migrationDB.DB(); sqlDB != nil {
_ = sqlDB.Close()
}
}()
migSQLDB, err := migrationDB.DB()
if err != nil {
return fmt.Errorf("get underlying sql.DB: %w", err)
}
conn, err := migSQLDB.Conn(ctx)
if err != nil { if err != nil {
return fmt.Errorf("acquire dedicated migration connection: %w", err) return fmt.Errorf("acquire dedicated migration connection: %w", err)
} }
@@ -296,27 +234,15 @@ func MigrateWithLock() error {
} }
}() }()
// Run migrations against the *direct* migrationDB while holding the return Migrate()
// session-scoped lock on `conn`. Migrate() defaults to the global
// `db` (the pooler-backed pool) but the package-level `db` swap during
// migration is risky; instead, just call AutoMigrate on migrationDB.
return migrate(migrationDB)
} }
// Migrate runs database migrations against the global pool. This is fine // Migrate runs database migrations for all models
// when the global pool is connected via a session (no PgBouncer) — e.g.,
// in tests with SQLite. In production, MigrateWithLock should be used,
// which targets a direct Postgres connection.
func Migrate() error { func Migrate() error {
return migrate(db)
}
// migrate runs AutoMigrate on the supplied gorm.DB.
func migrate(target *gorm.DB) error {
log.Info().Msg("Running database migrations...") log.Info().Msg("Running database migrations...")
// Migrate all models in order (respecting foreign key constraints) // Migrate all models in order (respecting foreign key constraints)
err := target.AutoMigrate( err := db.AutoMigrate(
// Lookup tables first (no foreign keys) // Lookup tables first (no foreign keys)
&models.ResidenceType{}, &models.ResidenceType{},
&models.TaskCategory{}, &models.TaskCategory{},