From d96f317d2088d61b0c9b16acbb8315f111b61f7a Mon Sep 17 00:00:00 2001 From: Trey t Date: Sun, 26 Apr 2026 22:22:07 -0500 Subject: [PATCH] Revert "Fix migration deadlock under Neon pooler" This reverts commit 30966c6f5e525c80c4833ac9292b0a482abd1e50. --- internal/database/database.go | 92 ++++------------------------------- 1 file changed, 9 insertions(+), 83 deletions(-) diff --git a/internal/database/database.go b/internal/database/database.go index f1519b6..f7e134e 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -3,7 +3,6 @@ package database import ( "context" "fmt" - "strings" "time" "github.com/rs/zerolog/log" @@ -121,48 +120,6 @@ func Connect(cfg *config.DatabaseConfig, debug bool) (*gorm.DB, error) { return db, nil } -// openMigrationDB opens a parallel *gorm.DB pointed at the *direct* (non-pooler) -// Neon compute endpoint, used exclusively by MigrateWithLock so that -// pg_advisory_lock (session-scoped) survives across statements. -// -// The direct host is derived by stripping the "-pooler" segment from the -// configured DB_HOST. Example: -// pooler → ep-floral-truth-amttbc5a-pooler.c-5.us-east-1.aws.neon.tech -// direct → ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech -// -// If DB_HOST already lacks the suffix (e.g., a fallback config or a non-Neon -// PG instance), it's used as-is. -func openMigrationDB() (*gorm.DB, error) { - host := viper.GetString("DB_HOST") - port := viper.GetInt("DB_PORT") - user := viper.GetString("POSTGRES_USER") - pass := viper.GetString("POSTGRES_PASSWORD") - name := viper.GetString("POSTGRES_DB") - sslmode := viper.GetString("DB_SSLMODE") - - directHost := host - if idx := strings.Index(host, "-pooler."); idx > 0 { - directHost = host[:idx] + host[idx+len("-pooler"):] - } - - dsn := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=%s", - directHost, port, user, pass, name, sslmode) - - migrationDB, err := gorm.Open(postgres.Open(dsn), &gorm.Config{ - Logger: logger.Default.LogMode(logger.Silent), - }) - if err != nil { - return nil, err - } - if sqlDB, _ := migrationDB.DB(); sqlDB != nil { - // Single connection is enough — we hold the advisory lock on it. - sqlDB.SetMaxOpenConns(1) - sqlDB.SetMaxIdleConns(1) - } - log.Info().Str("direct_host", directHost).Msg("Opened direct DB connection for migration") - return migrationDB, nil -} - // warmUpPool issues N parallel pings so the pool fills with established // connections before the first user request lands. Failures are logged but // not fatal — the pool will fill on demand under traffic if pre-warm fails. @@ -243,37 +200,18 @@ func MigrateWithLock() error { return Migrate() } + sqlDB, err := db.DB() + if err != nil { + return fmt.Errorf("get underlying sql.DB: %w", err) + } + // Give ourselves up to 5 min to acquire the lock — long enough for a // slow migration on a peer replica, short enough to fail fast if Postgres // is hung. ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() - // CRITICAL: pg_advisory_lock is *session-scoped*. The runtime DB pool - // connects to Neon's pooler endpoint (PgBouncer in transaction mode), - // which releases the underlying Postgres session after each transaction — - // which would silently release our lock the moment we acquired it. - // - // So for migrations only, we open a parallel *gorm.DB pointed at the - // *direct* compute endpoint (DB_HOST with the "-pooler" segment stripped). - // AutoMigrate runs against this direct connection, holding the lock the - // whole time. Closed on return. - migrationDB, err := openMigrationDB() - if err != nil { - return fmt.Errorf("open direct migration connection: %w", err) - } - defer func() { - if sqlDB, _ := migrationDB.DB(); sqlDB != nil { - _ = sqlDB.Close() - } - }() - - migSQLDB, err := migrationDB.DB() - if err != nil { - return fmt.Errorf("get underlying sql.DB: %w", err) - } - - conn, err := migSQLDB.Conn(ctx) + conn, err := sqlDB.Conn(ctx) if err != nil { return fmt.Errorf("acquire dedicated migration connection: %w", err) } @@ -296,27 +234,15 @@ func MigrateWithLock() error { } }() - // Run migrations against the *direct* migrationDB while holding the - // session-scoped lock on `conn`. Migrate() defaults to the global - // `db` (the pooler-backed pool) but the package-level `db` swap during - // migration is risky; instead, just call AutoMigrate on migrationDB. - return migrate(migrationDB) + return Migrate() } -// Migrate runs database migrations against the global pool. This is fine -// when the global pool is connected via a session (no PgBouncer) — e.g., -// in tests with SQLite. In production, MigrateWithLock should be used, -// which targets a direct Postgres connection. +// Migrate runs database migrations for all models func Migrate() error { - return migrate(db) -} - -// migrate runs AutoMigrate on the supplied gorm.DB. -func migrate(target *gorm.DB) error { log.Info().Msg("Running database migrations...") // Migrate all models in order (respecting foreign key constraints) - err := target.AutoMigrate( + err := db.AutoMigrate( // Lookup tables first (no foreign keys) &models.ResidenceType{}, &models.TaskCategory{},