Harden prod deploy: versioned secrets, healthchecks, migration lock, dry-run
Swarm stack - Resource limits on all services, stop_grace_period 60s on api/worker/admin - Dozzle bound to manager loopback only (ssh -L required for access) - Worker health server on :6060, admin /api/health endpoint - Redis 200M LRU cap, B2/S3 env vars wired through to api service Deploy script - DRY_RUN=1 prints plan + exits - Auto-rollback on failed healthcheck, docker logout at end - Versioned-secret pruning keeps last SECRET_KEEP_VERSIONS (default 3) - PUSH_LATEST_TAG default flipped to false - B2 all-or-none validation before deploy Code - cmd/api takes pg_advisory_lock on a dedicated connection before AutoMigrate, serialising boot-time migrations across replicas - cmd/worker exposes an HTTP /health endpoint with graceful shutdown Docs - deploy/DEPLOYING.md: step-by-step walkthrough for a real deploy - deploy/shit_deploy_cant_do.md: manual prerequisites + recurring ops - deploy/README.md updated with storage toggle, worker-replica caveat, multi-arch recipe, connection-pool tuning, renumbered sections Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,7 @@ version: "3.8"
|
||||
services:
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --appendonly yes --appendfsync everysec
|
||||
command: redis-server --appendonly yes --appendfsync everysec --maxmemory 200mb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
@@ -18,6 +18,13 @@ services:
|
||||
delay: 5s
|
||||
placement:
|
||||
max_replicas_per_node: 1
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.50"
|
||||
memory: 256M
|
||||
reservations:
|
||||
cpus: "0.10"
|
||||
memory: 64M
|
||||
networks:
|
||||
- honeydue-network
|
||||
|
||||
@@ -67,6 +74,17 @@ services:
|
||||
STORAGE_MAX_FILE_SIZE: "${STORAGE_MAX_FILE_SIZE}"
|
||||
STORAGE_ALLOWED_TYPES: "${STORAGE_ALLOWED_TYPES}"
|
||||
|
||||
# S3-compatible object storage (Backblaze B2, MinIO). When all B2_* vars
|
||||
# are set, uploads/media are stored in the bucket and the local volume
|
||||
# mount becomes a no-op fallback. Required for multi-replica prod —
|
||||
# without it uploads only exist on one node.
|
||||
B2_ENDPOINT: "${B2_ENDPOINT}"
|
||||
B2_KEY_ID: "${B2_KEY_ID}"
|
||||
B2_APP_KEY: "${B2_APP_KEY}"
|
||||
B2_BUCKET_NAME: "${B2_BUCKET_NAME}"
|
||||
B2_USE_SSL: "${B2_USE_SSL}"
|
||||
B2_REGION: "${B2_REGION}"
|
||||
|
||||
FEATURE_PUSH_ENABLED: "${FEATURE_PUSH_ENABLED}"
|
||||
FEATURE_EMAIL_ENABLED: "${FEATURE_EMAIL_ENABLED}"
|
||||
FEATURE_WEBHOOKS_ENABLED: "${FEATURE_WEBHOOKS_ENABLED}"
|
||||
@@ -86,6 +104,7 @@ services:
|
||||
APPLE_IAP_SANDBOX: "${APPLE_IAP_SANDBOX}"
|
||||
GOOGLE_IAP_SERVICE_ACCOUNT_PATH: "${GOOGLE_IAP_SERVICE_ACCOUNT_PATH}"
|
||||
GOOGLE_IAP_PACKAGE_NAME: "${GOOGLE_IAP_PACKAGE_NAME}"
|
||||
stop_grace_period: 60s
|
||||
command:
|
||||
- /bin/sh
|
||||
- -lc
|
||||
@@ -128,6 +147,13 @@ services:
|
||||
parallelism: 1
|
||||
delay: 5s
|
||||
order: stop-first
|
||||
resources:
|
||||
limits:
|
||||
cpus: "1.00"
|
||||
memory: 512M
|
||||
reservations:
|
||||
cpus: "0.25"
|
||||
memory: 128M
|
||||
networks:
|
||||
- honeydue-network
|
||||
|
||||
@@ -142,10 +168,12 @@ services:
|
||||
PORT: "3000"
|
||||
HOSTNAME: "0.0.0.0"
|
||||
NEXT_PUBLIC_API_URL: "${NEXT_PUBLIC_API_URL}"
|
||||
stop_grace_period: 60s
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3000/admin/"]
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3000/api/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 20s
|
||||
retries: 3
|
||||
deploy:
|
||||
replicas: ${ADMIN_REPLICAS}
|
||||
@@ -160,6 +188,13 @@ services:
|
||||
parallelism: 1
|
||||
delay: 5s
|
||||
order: stop-first
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.50"
|
||||
memory: 384M
|
||||
reservations:
|
||||
cpus: "0.10"
|
||||
memory: 128M
|
||||
networks:
|
||||
- honeydue-network
|
||||
|
||||
@@ -201,6 +236,7 @@ services:
|
||||
FEATURE_ONBOARDING_EMAILS_ENABLED: "${FEATURE_ONBOARDING_EMAILS_ENABLED}"
|
||||
FEATURE_PDF_REPORTS_ENABLED: "${FEATURE_PDF_REPORTS_ENABLED}"
|
||||
FEATURE_WORKER_ENABLED: "${FEATURE_WORKER_ENABLED}"
|
||||
stop_grace_period: 60s
|
||||
command:
|
||||
- /bin/sh
|
||||
- -lc
|
||||
@@ -222,6 +258,12 @@ services:
|
||||
target: fcm_server_key
|
||||
- source: ${APNS_AUTH_KEY_SECRET}
|
||||
target: apns_auth_key
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:6060/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 15s
|
||||
retries: 3
|
||||
deploy:
|
||||
replicas: ${WORKER_REPLICAS}
|
||||
restart_policy:
|
||||
@@ -235,16 +277,28 @@ services:
|
||||
parallelism: 1
|
||||
delay: 5s
|
||||
order: stop-first
|
||||
resources:
|
||||
limits:
|
||||
cpus: "1.00"
|
||||
memory: 512M
|
||||
reservations:
|
||||
cpus: "0.25"
|
||||
memory: 128M
|
||||
networks:
|
||||
- honeydue-network
|
||||
|
||||
dozzle:
|
||||
# NOTE: Dozzle exposes the full Docker log stream with no built-in auth.
|
||||
# Bound to manager loopback only — access via SSH tunnel:
|
||||
# ssh -L ${DOZZLE_PORT}:127.0.0.1:${DOZZLE_PORT} <manager>
|
||||
# Then browse http://localhost:${DOZZLE_PORT}
|
||||
image: amir20/dozzle:latest
|
||||
ports:
|
||||
- target: 8080
|
||||
published: ${DOZZLE_PORT}
|
||||
protocol: tcp
|
||||
mode: ingress
|
||||
mode: host
|
||||
host_ip: 127.0.0.1
|
||||
environment:
|
||||
DOZZLE_NO_ANALYTICS: "true"
|
||||
volumes:
|
||||
@@ -257,6 +311,13 @@ services:
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.25"
|
||||
memory: 128M
|
||||
reservations:
|
||||
cpus: "0.05"
|
||||
memory: 32M
|
||||
networks:
|
||||
- honeydue-network
|
||||
|
||||
|
||||
Reference in New Issue
Block a user