Migrate prod deploy from Swarm to K3s; add full deployment book

Infrastructure: - Stack now runs on K3s v1.34.6 HA (3 Hetzner CX33 nodes as managers) - Traefik DaemonSet + hostNetwork replaces Caddy + ingress mesh - All manifests in deploy-k3s/manifests/; Swarm config (deploy/) kept temporarily for reference Bug fixes surfaced during migration: - Dockerfile: golang:1.24-alpine -> 1.25-alpine (go.mod requires 1.25) - cache_service.go: remove sync.Once reassignment from inside Do() callback (was causing 'unlock of unlocked mutex' fatal after Redis Ping failure) - router.go: relax CSP from 'default-src none' to 'default-src self' + allowlist fonts.googleapis.com so the marketing landing page CSS actually loads in browsers - deploy/scripts/deploy_prod.sh: use docker buildx with --platform linux/amd64 so arm64 (Apple Silicon) dev machines produce images runnable on x86_64 Hetzner nodes; fix array expansion under set -u - deploy/swarm-stack.prod.yml: fix secret source references to use top-level aliases (the '\${X_SECRET}' form never actually resolved); dozzle ports: long-form host_ip is rejected by Swarm, switched to short-form (bound to 0.0.0.0 with UFW-based loopback restriction); worker replicas 2 -> 1 (Asynq scheduler singleton) - deploy-k3s/manifests/admin/deployment.yaml: probe path '/admin/' -> '/' (Next.js serves at root; /admin/ returned 404 and killed pods); startupProbe failureThreshold 12 -> 24 - deploy-k3s/manifests/pod-disruption-budgets.yaml: worker minAvailable 1 -> 0 (singleton) - deploy-k3s/manifests/api/deployment.yaml: startupProbe failureThreshold 12 -> 48 (MigrateWithLock serializes across 3 replicas on first-boot; real startup takes up to 240s) - .gitignore: tighten 'api' -> '/api' (was matching deploy-k3s/manifests/api/ and admin/src/app/api/*, hiding legitimate files) New files: - deploy-k3s/manifests/traefik-helmchartconfig.yaml: DaemonSet + hostNetwork override for k3s-bundled Traefik - deploy-k3s/manifests/ingress/ingress-simple.yaml: plain Ingress without TLS (CF Flexible SSL) and without middleware - deploy-k3s/MIGRATION_NOTES.md: operator-facing migration log Documentation: - docs/deployment/ — full deployment book, 26 files, ~42k words: - Part I Overview, infrastructure, orchestrator choice (Ch 0-2) - Part II Networking, firewall, Cloudflare (Ch 3-4, 13) - Part III Security, Traefik ingress (Ch 5-6) - Part IV Services, DB, storage, secrets, registry (Ch 7-11) - Part V Data flow, deploy process, observability, failures, runbook (Ch 12, 14-17) - Part VI Cost, Swarm postmortem, roadmap (Ch 18-20) - Appendices: glossary, kubectl cheat sheet, file locations, consolidated citations - README.md: Production Deployment section replaced with pointer to the book; Go version bumped to 1.25 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 07:20:21 -05:00
parent 4ec4bbbfe8
commit 6f303dbbaa
46 changed files with 9785 additions and 93 deletions
@@ -1,6 +1,59 @@
 version: "3.8"

 services:
+  # Edge reverse proxy — the only service publishing :80/:443 publicly.
+  # Routes by Host header to internal `api` and `admin` services over the
+  # overlay network. Runs one replica per node via ingress mesh, so any node
+  # can terminate incoming traffic.
+  caddy:
+    image: caddy:2-alpine
+    ports:
+      - target: 80
+        published: 80
+        protocol: tcp
+        mode: ingress
+      - target: 443
+        published: 443
+        protocol: tcp
+        mode: ingress
+    configs:
+      - source: caddyfile
+        target: /etc/caddy/Caddyfile
+        mode: 0444
+    volumes:
+      - caddy_data:/data
+      - caddy_config:/config
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1/"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 10s
+    deploy:
+      replicas: 3
+      restart_policy:
+        condition: any
+        delay: 5s
+      update_config:
+        parallelism: 1
+        delay: 10s
+        order: start-first
+      rollback_config:
+        parallelism: 1
+        delay: 5s
+        order: stop-first
+      placement:
+        max_replicas_per_node: 1
+      resources:
+        limits:
+          cpus: "0.25"
+          memory: 128M
+        reservations:
+          cpus: "0.05"
+          memory: 32M
+    networks:
+      - honeydue-network
+
  redis:
    image: redis:7-alpine
    command: redis-server --appendonly yes --appendfsync everysec --maxmemory 200mb --maxmemory-policy allkeys-lru
@@ -30,11 +83,8 @@ services:

  api:
    image: ${API_IMAGE}
-    ports:
-      - target: 8000
-        published: ${API_PORT}
-        protocol: tcp
-        mode: ingress
+    # No `ports:` block — Caddy edge service proxies to api:8000 over the
+    # overlay network. Port 8000 is never publicly exposed.
    environment:
      PORT: "8000"
      DEBUG: "${DEBUG}"
@@ -104,6 +154,10 @@ services:
      APPLE_IAP_SANDBOX: "${APPLE_IAP_SANDBOX}"
      GOOGLE_IAP_SERVICE_ACCOUNT_PATH: "${GOOGLE_IAP_SERVICE_ACCOUNT_PATH}"
      GOOGLE_IAP_PACKAGE_NAME: "${GOOGLE_IAP_PACKAGE_NAME}"
+
+      # Seeded on first migration (idempotent — skipped if admin_users row exists)
+      ADMIN_EMAIL: "${ADMIN_EMAIL}"
+      ADMIN_PASSWORD: "${ADMIN_PASSWORD}"
    stop_grace_period: 60s
    command:
      - /bin/sh
@@ -116,15 +170,15 @@ services:
        export FCM_SERVER_KEY="$$(cat /run/secrets/fcm_server_key)"
        exec /app/api
    secrets:
-      - source: ${POSTGRES_PASSWORD_SECRET}
+      - source: postgres_password
        target: postgres_password
-      - source: ${SECRET_KEY_SECRET}
+      - source: secret_key
        target: secret_key
-      - source: ${EMAIL_HOST_PASSWORD_SECRET}
+      - source: email_host_password
        target: email_host_password
-      - source: ${FCM_SERVER_KEY_SECRET}
+      - source: fcm_server_key
        target: fcm_server_key
-      - source: ${APNS_AUTH_KEY_SECRET}
+      - source: apns_auth_key
        target: apns_auth_key
    volumes:
      - uploads:/app/uploads
@@ -132,10 +186,18 @@ services:
      test: ["CMD", "curl", "-f", "http://127.0.0.1:8000/api/health/"]
      interval: 30s
      timeout: 10s
-      start_period: 15s
+      # Single-replica AutoMigrate on a fresh DB takes ~90s; subsequent
+      # replicas are ~2s (idempotent). 180s gives honest headroom for the
+      # first replica to finish, without masking cascade failures.
+      start_period: 180s
      retries: 3
    deploy:
      replicas: ${API_REPLICAS}
+      # DNS round-robin instead of VIP. VIP's kernel IPVS state can go stale
+      # during replica churn (rolling updates, task restarts), causing
+      # intermittent i/o timeouts from clients on the overlay network (Caddy).
+      # dnsrr resolves to live task IPs directly and bypasses IPVS.
+      endpoint_mode: dnsrr
      restart_policy:
        condition: any
        delay: 5s
@@ -159,11 +221,8 @@ services:

  admin:
    image: ${ADMIN_IMAGE}
-    ports:
-      - target: 3000
-        published: ${ADMIN_PORT}
-        protocol: tcp
-        mode: ingress
+    # No `ports:` block — reached via Caddy on admin.myhoneydue.com using
+    # Swarm's embedded DNS and default VIP endpoint_mode.
    environment:
      PORT: "3000"
      HOSTNAME: "0.0.0.0"
@@ -248,15 +307,15 @@ services:
        export FCM_SERVER_KEY="$$(cat /run/secrets/fcm_server_key)"
        exec /app/worker
    secrets:
-      - source: ${POSTGRES_PASSWORD_SECRET}
+      - source: postgres_password
        target: postgres_password
-      - source: ${SECRET_KEY_SECRET}
+      - source: secret_key
        target: secret_key
-      - source: ${EMAIL_HOST_PASSWORD_SECRET}
+      - source: email_host_password
        target: email_host_password
-      - source: ${FCM_SERVER_KEY_SECRET}
+      - source: fcm_server_key
        target: fcm_server_key
-      - source: ${APNS_AUTH_KEY_SECRET}
+      - source: apns_auth_key
        target: apns_auth_key
    healthcheck:
      test: ["CMD", "curl", "-f", "http://127.0.0.1:6060/health"]
@@ -293,12 +352,11 @@ services:
    #   ssh -L ${DOZZLE_PORT}:127.0.0.1:${DOZZLE_PORT} <manager>
    # Then browse http://localhost:${DOZZLE_PORT}
    image: amir20/dozzle:latest
+    # Bind to loopback only on the manager. Swarm's long-form port spec
+    # rejects `host_ip`, so we use the short form — 127.0.0.1:<port>:8080.
+    # Access via SSH tunnel: ssh -L ${DOZZLE_PORT}:127.0.0.1:${DOZZLE_PORT} <manager>
    ports:
-      - target: 8080
-        published: ${DOZZLE_PORT}
-        protocol: tcp
-        mode: host
-        host_ip: 127.0.0.1
+      - "127.0.0.1:${DOZZLE_PORT}:8080"
    environment:
      DOZZLE_NO_ANALYTICS: "true"
    volumes:
@@ -324,6 +382,8 @@ services:
 volumes:
  redis_data:
  uploads:
+  caddy_data:
+  caddy_config:

 networks:
  honeydue-network:
@@ -331,6 +391,11 @@ networks:
    driver_opts:
      encrypted: "true"

+configs:
+  caddyfile:
+    external: true
+    name: ${CADDYFILE_CONFIG}
+
 secrets:
  postgres_password:
    external: true