diff --git a/deploy-k3s/manifests/api/deployment.yaml b/deploy-k3s/manifests/api/deployment.yaml index a98f67c..d742ef6 100644 --- a/deploy-k3s/manifests/api/deployment.yaml +++ b/deploy-k3s/manifests/api/deployment.yaml @@ -122,11 +122,14 @@ spec: path: /api/health/ port: 8000 # MigrateWithLock in cmd/api/main.go runs pg_advisory_lock on - # every startup. On a cold boot with 3 replicas, the first does - # AutoMigrate (~90s) and the others wait on the lock, so real - # startup runs 90–240s. 48 × 5s = 240s grace absorbs it without - # healthcheck killing a still-starting replica. - failureThreshold: 48 + # every startup against Neon's *direct* (non-pooler) endpoint, + # because session-scoped locks don't survive PgBouncer + # transaction-mode. AutoMigrate over a transatlantic direct + # link runs many DDLs serially × ~110ms RTT each ≈ 4–6 min on + # the first pod; subsequent pods see no-op migrate after + # acquiring the same lock. 120 × 5s = 600s grace absorbs it + # without the healthcheck killing a still-migrating replica. + failureThreshold: 120 periodSeconds: 5 readinessProbe: httpGet: