a94744061e
The migration-pooler fix (commit 30966c6) routes AutoMigrate through
Neon's direct compute endpoint to keep the session-scoped advisory lock
alive. That swap means each DDL pays a fresh transatlantic RTT instead
of riding warm pooler connections, so AutoMigrate's runtime climbs from
~90s to 4-6 min on the first pod of a cold boot. With the previous 240s
grace the startup probe was killing pods mid-migration.
Bumping to 120 × 5s = 600s grace. Subsequent pods inherit the schema
and finish their migrate-no-op in seconds, so this only matters for the
single first-pod migration window after a deploy.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
158 lines
5.1 KiB
YAML
158 lines
5.1 KiB
YAML
apiVersion: apps/v1
|
||
kind: Deployment
|
||
metadata:
|
||
name: api
|
||
namespace: honeydue
|
||
labels:
|
||
app.kubernetes.io/name: api
|
||
app.kubernetes.io/part-of: honeydue
|
||
spec:
|
||
replicas: 3
|
||
strategy:
|
||
type: RollingUpdate
|
||
rollingUpdate:
|
||
maxUnavailable: 0
|
||
maxSurge: 1
|
||
selector:
|
||
matchLabels:
|
||
app.kubernetes.io/name: api
|
||
template:
|
||
metadata:
|
||
labels:
|
||
app.kubernetes.io/name: api
|
||
app.kubernetes.io/part-of: honeydue
|
||
spec:
|
||
serviceAccountName: api
|
||
imagePullSecrets:
|
||
- name: ghcr-credentials
|
||
securityContext:
|
||
runAsNonRoot: true
|
||
runAsUser: 1000
|
||
runAsGroup: 1000
|
||
fsGroup: 1000
|
||
seccompProfile:
|
||
type: RuntimeDefault
|
||
containers:
|
||
- name: api
|
||
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh
|
||
ports:
|
||
- containerPort: 8000
|
||
protocol: TCP
|
||
securityContext:
|
||
allowPrivilegeEscalation: false
|
||
readOnlyRootFilesystem: true
|
||
capabilities:
|
||
drop: ["ALL"]
|
||
envFrom:
|
||
- configMapRef:
|
||
name: honeydue-config
|
||
env:
|
||
- name: POSTGRES_PASSWORD
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: POSTGRES_PASSWORD
|
||
- name: SECRET_KEY
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: SECRET_KEY
|
||
- name: EMAIL_HOST_PASSWORD
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: EMAIL_HOST_PASSWORD
|
||
- name: FCM_SERVER_KEY
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: FCM_SERVER_KEY
|
||
- name: REDIS_PASSWORD
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: REDIS_PASSWORD
|
||
optional: true
|
||
# B2 (Backblaze) credentials. With both set, StorageConfig.IsS3()
|
||
# returns true and uploads stream to B2 via minio-go. With either
|
||
# missing, code falls back to local filesystem — and since
|
||
# readOnlyRootFilesystem is true on this container, that fallback
|
||
# silently fails. So both must be wired or uploads break.
|
||
- name: B2_KEY_ID
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: B2_KEY_ID
|
||
- name: B2_APP_KEY
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: B2_APP_KEY
|
||
# Observability — push traces (and any future OTLP metrics) to
|
||
# obs.88oakapps.com. Token gates ingest at nginx; URL is the
|
||
# same one vmagent uses for metric remote-write. Both come from
|
||
# honeydue-secrets so they aren't world-readable in ConfigMap.
|
||
- name: OBS_TRACES_URL
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: OBS_TRACES_URL
|
||
optional: true
|
||
- name: OBS_INGEST_TOKEN
|
||
valueFrom:
|
||
secretKeyRef:
|
||
name: honeydue-secrets
|
||
key: OBS_INGEST_TOKEN
|
||
optional: true
|
||
volumeMounts:
|
||
- name: apns-key
|
||
mountPath: /secrets/apns
|
||
readOnly: true
|
||
- name: tmp
|
||
mountPath: /tmp
|
||
resources:
|
||
requests:
|
||
cpu: 100m
|
||
memory: 128Mi
|
||
limits:
|
||
cpu: "1"
|
||
memory: 512Mi
|
||
startupProbe:
|
||
httpGet:
|
||
path: /api/health/
|
||
port: 8000
|
||
# MigrateWithLock in cmd/api/main.go runs pg_advisory_lock on
|
||
# every startup against Neon's *direct* (non-pooler) endpoint,
|
||
# because session-scoped locks don't survive PgBouncer
|
||
# transaction-mode. AutoMigrate over a transatlantic direct
|
||
# link runs many DDLs serially × ~110ms RTT each ≈ 4–6 min on
|
||
# the first pod; subsequent pods see no-op migrate after
|
||
# acquiring the same lock. 120 × 5s = 600s grace absorbs it
|
||
# without the healthcheck killing a still-migrating replica.
|
||
failureThreshold: 120
|
||
periodSeconds: 5
|
||
readinessProbe:
|
||
httpGet:
|
||
path: /api/health/
|
||
port: 8000
|
||
initialDelaySeconds: 5
|
||
periodSeconds: 10
|
||
timeoutSeconds: 5
|
||
livenessProbe:
|
||
httpGet:
|
||
path: /api/health/
|
||
port: 8000
|
||
initialDelaySeconds: 30
|
||
periodSeconds: 30
|
||
timeoutSeconds: 10
|
||
volumes:
|
||
- name: apns-key
|
||
secret:
|
||
secretName: honeydue-apns-key
|
||
items:
|
||
- key: apns_auth_key.p8
|
||
path: apns_auth_key.p8
|
||
- name: tmp
|
||
emptyDir:
|
||
sizeLimit: 64Mi
|