#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DEPLOY_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" REPO_DIR="$(cd "${DEPLOY_DIR}/.." && pwd)" STACK_TEMPLATE="${DEPLOY_DIR}/swarm-stack.prod.yml" CLUSTER_ENV="${DEPLOY_DIR}/cluster.env" REGISTRY_ENV="${DEPLOY_DIR}/registry.env" PROD_ENV="${DEPLOY_DIR}/prod.env" SECRET_POSTGRES="${DEPLOY_DIR}/secrets/postgres_password.txt" SECRET_APP_KEY="${DEPLOY_DIR}/secrets/secret_key.txt" SECRET_EMAIL_PASS="${DEPLOY_DIR}/secrets/email_host_password.txt" SECRET_FCM_KEY="${DEPLOY_DIR}/secrets/fcm_server_key.txt" SECRET_APNS_KEY="${DEPLOY_DIR}/secrets/apns_auth_key.p8" SKIP_BUILD="${SKIP_BUILD:-0}" SKIP_HEALTHCHECK="${SKIP_HEALTHCHECK:-0}" DRY_RUN="${DRY_RUN:-0}" SECRET_KEEP_VERSIONS="${SECRET_KEEP_VERSIONS:-3}" log() { printf '[deploy] %s\n' "$*" } warn() { printf '[deploy][warn] %s\n' "$*" >&2 } die() { printf '[deploy][error] %s\n' "$*" >&2 exit 1 } require_cmd() { command -v "$1" >/dev/null 2>&1 || die "Missing required command: $1" } contains_placeholder() { local value="$1" [[ -z "${value}" ]] && return 0 local lowered lowered="$(printf '%s' "${value}" | tr '[:upper:]' '[:lower:]')" case "${lowered}" in *changeme*|*replace_me*|*example.com*|*your-*|*todo*|*fill_me*|*paste_here*) return 0 ;; *) return 1 ;; esac } ensure_file_from_example() { local path="$1" local example="${path}.example" if [[ -f "${path}" ]]; then return fi if [[ -f "${example}" ]]; then cp "${example}" "${path}" die "Created ${path} from template. Fill it in and rerun." fi die "Missing required file: ${path}" } require_var() { local name="$1" local value="${!name:-}" [[ -n "${value}" ]] || die "Missing required value: ${name}" if contains_placeholder "${value}"; then die "Value still uses placeholder text: ${name}=${value}" fi } require_secret_file() { local path="$1" local label="$2" ensure_file_from_example "${path}" local contents contents="$(tr -d '\r' < "${path}" | sed 's/[[:space:]]*$//')" [[ -n "${contents}" ]] || die "Secret file is empty: ${path}" if contains_placeholder "${contents}"; then die "Secret file still has placeholder text (${label}): ${path}" fi } print_usage() { cat <<'EOF' Usage: ./.deploy_prod Optional environment flags: DRY_RUN=1 Print the deployment plan and exit without changes. SKIP_BUILD=1 Deploy existing image tags without rebuilding/pushing. SKIP_HEALTHCHECK=1 Skip final HTTP health check. DEPLOY_TAG= Override image tag (default: git short sha). PUSH_LATEST_TAG=true|false Also tag/push :latest (default: false — SHA only). SECRET_KEEP_VERSIONS= How many versions of each Swarm secret to retain (default: 3). Older unused versions are pruned. EOF } while (($# > 0)); do case "$1" in -h|--help) print_usage exit 0 ;; *) die "Unknown argument: $1" ;; esac done require_cmd docker require_cmd ssh require_cmd scp require_cmd git require_cmd awk require_cmd sed require_cmd grep require_cmd mktemp require_cmd date require_cmd curl ensure_file_from_example "${CLUSTER_ENV}" ensure_file_from_example "${REGISTRY_ENV}" ensure_file_from_example "${PROD_ENV}" require_secret_file "${SECRET_POSTGRES}" "Postgres password" require_secret_file "${SECRET_APP_KEY}" "SECRET_KEY" require_secret_file "${SECRET_EMAIL_PASS}" "SMTP password" require_secret_file "${SECRET_FCM_KEY}" "FCM server key" require_secret_file "${SECRET_APNS_KEY}" "APNS private key" set -a # shellcheck disable=SC1090 source "${CLUSTER_ENV}" # shellcheck disable=SC1090 source "${REGISTRY_ENV}" # shellcheck disable=SC1090 source "${PROD_ENV}" set +a DEPLOY_MANAGER_SSH_PORT="${DEPLOY_MANAGER_SSH_PORT:-22}" DEPLOY_STACK_NAME="${DEPLOY_STACK_NAME:-honeydue}" DEPLOY_REMOTE_DIR="${DEPLOY_REMOTE_DIR:-/opt/honeydue/deploy}" DEPLOY_WAIT_SECONDS="${DEPLOY_WAIT_SECONDS:-420}" DEPLOY_TAG="${DEPLOY_TAG:-$(git -C "${REPO_DIR}" rev-parse --short HEAD)}" PUSH_LATEST_TAG="${PUSH_LATEST_TAG:-false}" require_var DEPLOY_MANAGER_HOST require_var DEPLOY_MANAGER_USER require_var DEPLOY_STACK_NAME require_var DEPLOY_REMOTE_DIR require_var REGISTRY require_var REGISTRY_NAMESPACE require_var REGISTRY_USERNAME require_var REGISTRY_TOKEN require_var ALLOWED_HOSTS require_var CORS_ALLOWED_ORIGINS require_var BASE_URL require_var NEXT_PUBLIC_API_URL require_var DB_HOST require_var DB_PORT require_var POSTGRES_USER require_var POSTGRES_DB require_var DB_SSLMODE require_var REDIS_URL require_var EMAIL_HOST require_var EMAIL_PORT require_var EMAIL_HOST_USER require_var DEFAULT_FROM_EMAIL require_var APNS_AUTH_KEY_ID require_var APNS_TEAM_ID require_var APNS_TOPIC # Storage backend validation: B2 is all-or-none. If any var is filled with # a real value, require all four core vars. Empty means "use local volume". b2_any_set=0 b2_all_set=1 for b2_var in B2_ENDPOINT B2_KEY_ID B2_APP_KEY B2_BUCKET_NAME; do val="${!b2_var:-}" if [[ -n "${val}" ]] && ! contains_placeholder "${val}"; then b2_any_set=1 else b2_all_set=0 fi done if (( b2_any_set == 1 && b2_all_set == 0 )); then die "Partial B2 configuration detected. Set all four of B2_ENDPOINT, B2_KEY_ID, B2_APP_KEY, B2_BUCKET_NAME, or leave all four empty to use the local volume." fi if (( b2_all_set == 1 )); then log "Storage backend: S3 (${B2_ENDPOINT} / bucket=${B2_BUCKET_NAME})" else warn "Storage backend: LOCAL VOLUME. This is not safe for multi-replica prod — uploads will only exist on one node. Set B2_* in prod.env to use object storage." fi if [[ ! "$(tr -d '\r\n' < "${SECRET_APNS_KEY}")" =~ BEGIN[[:space:]]+PRIVATE[[:space:]]+KEY ]]; then die "APNS key file does not look like a private key: ${SECRET_APNS_KEY}" fi app_secret_len="$(tr -d '\r\n' < "${SECRET_APP_KEY}" | wc -c | tr -d ' ')" if (( app_secret_len < 32 )); then die "deploy/secrets/secret_key.txt must be at least 32 characters." fi REGISTRY_PREFIX="${REGISTRY%/}/${REGISTRY_NAMESPACE#/}" API_IMAGE="${REGISTRY_PREFIX}/honeydue-api:${DEPLOY_TAG}" WORKER_IMAGE="${REGISTRY_PREFIX}/honeydue-worker:${DEPLOY_TAG}" ADMIN_IMAGE="${REGISTRY_PREFIX}/honeydue-admin:${DEPLOY_TAG}" SSH_KEY_PATH="${DEPLOY_SSH_KEY_PATH:-}" if [[ -n "${SSH_KEY_PATH}" ]]; then SSH_KEY_PATH="${SSH_KEY_PATH/#\~/${HOME}}" fi SSH_TARGET="${DEPLOY_MANAGER_USER}@${DEPLOY_MANAGER_HOST}" SSH_OPTS=(-p "${DEPLOY_MANAGER_SSH_PORT}") SCP_OPTS=(-P "${DEPLOY_MANAGER_SSH_PORT}") if [[ -n "${SSH_KEY_PATH}" ]]; then SSH_OPTS+=(-i "${SSH_KEY_PATH}") SCP_OPTS+=(-i "${SSH_KEY_PATH}") fi if [[ "${DRY_RUN}" == "1" ]]; then cat < ${DEPLOY_STACK_NAME}_secret_key_ ${DEPLOY_STACK_NAME}_email_host_password_ ${DEPLOY_STACK_NAME}_fcm_server_key_ ${DEPLOY_STACK_NAME}_apns_auth_key_ No changes made. Re-run without DRY_RUN=1 to deploy. ================================================= EOF exit 0 fi log "Validating SSH access to ${SSH_TARGET}" if ! ssh "${SSH_OPTS[@]}" "${SSH_TARGET}" "echo ok" >/dev/null 2>&1; then die "SSH connection failed to ${SSH_TARGET}" fi remote_swarm_state="$(ssh "${SSH_OPTS[@]}" "${SSH_TARGET}" "docker info --format '{{.Swarm.LocalNodeState}} {{.Swarm.ControlAvailable}}'" 2>/dev/null || true)" if [[ -z "${remote_swarm_state}" ]]; then die "Could not read Docker Swarm state on manager. Is Docker installed/running?" fi if [[ "${remote_swarm_state}" != "active true" ]]; then die "Remote node must be a Swarm manager. Got: ${remote_swarm_state}" fi if [[ "${SKIP_BUILD}" != "1" ]]; then log "Logging in to ${REGISTRY}" printf '%s' "${REGISTRY_TOKEN}" | docker login "${REGISTRY}" -u "${REGISTRY_USERNAME}" --password-stdin >/dev/null log "Building API image ${API_IMAGE}" docker build --target api -t "${API_IMAGE}" "${REPO_DIR}" log "Building Worker image ${WORKER_IMAGE}" docker build --target worker -t "${WORKER_IMAGE}" "${REPO_DIR}" log "Building Admin image ${ADMIN_IMAGE}" docker build --target admin -t "${ADMIN_IMAGE}" "${REPO_DIR}" log "Pushing deploy images" docker push "${API_IMAGE}" docker push "${WORKER_IMAGE}" docker push "${ADMIN_IMAGE}" if [[ "${PUSH_LATEST_TAG}" == "true" ]]; then log "Updating :latest tags" docker tag "${API_IMAGE}" "${REGISTRY_PREFIX}/honeydue-api:latest" docker tag "${WORKER_IMAGE}" "${REGISTRY_PREFIX}/honeydue-worker:latest" docker tag "${ADMIN_IMAGE}" "${REGISTRY_PREFIX}/honeydue-admin:latest" docker push "${REGISTRY_PREFIX}/honeydue-api:latest" docker push "${REGISTRY_PREFIX}/honeydue-worker:latest" docker push "${REGISTRY_PREFIX}/honeydue-admin:latest" fi else warn "SKIP_BUILD=1 set. Using prebuilt images for tag: ${DEPLOY_TAG}" fi DEPLOY_ID_RAW="${DEPLOY_TAG}-$(date +%Y%m%d%H%M%S)" DEPLOY_ID="$(printf '%s' "${DEPLOY_ID_RAW}" | tr -c 'a-zA-Z0-9_-' '_')" POSTGRES_PASSWORD_SECRET="${DEPLOY_STACK_NAME}_postgres_password_${DEPLOY_ID}" SECRET_KEY_SECRET="${DEPLOY_STACK_NAME}_secret_key_${DEPLOY_ID}" EMAIL_HOST_PASSWORD_SECRET="${DEPLOY_STACK_NAME}_email_host_password_${DEPLOY_ID}" FCM_SERVER_KEY_SECRET="${DEPLOY_STACK_NAME}_fcm_server_key_${DEPLOY_ID}" APNS_AUTH_KEY_SECRET="${DEPLOY_STACK_NAME}_apns_auth_key_${DEPLOY_ID}" TMP_DIR="$(mktemp -d)" cleanup() { rm -rf "${TMP_DIR}" } trap cleanup EXIT cp "${STACK_TEMPLATE}" "${TMP_DIR}/swarm-stack.prod.yml" cp "${PROD_ENV}" "${TMP_DIR}/prod.env" cp "${REGISTRY_ENV}" "${TMP_DIR}/registry.env" mkdir -p "${TMP_DIR}/secrets" cp "${SECRET_POSTGRES}" "${TMP_DIR}/secrets/postgres_password.txt" cp "${SECRET_APP_KEY}" "${TMP_DIR}/secrets/secret_key.txt" cp "${SECRET_EMAIL_PASS}" "${TMP_DIR}/secrets/email_host_password.txt" cp "${SECRET_FCM_KEY}" "${TMP_DIR}/secrets/fcm_server_key.txt" cp "${SECRET_APNS_KEY}" "${TMP_DIR}/secrets/apns_auth_key.p8" cat > "${TMP_DIR}/runtime.env" </dev/null 2>&1; then echo "[remote] secret exists: ${name}" else docker secret create "${name}" "${src}" >/dev/null echo "[remote] created secret: ${name}" fi } printf '%s' "${REGISTRY_TOKEN}" | docker login "${REGISTRY}" -u "${REGISTRY_USERNAME}" --password-stdin >/dev/null rm -f "${REMOTE_DIR}/registry.env" create_secret "${POSTGRES_PASSWORD_SECRET}" "${REMOTE_DIR}/secrets/postgres_password.txt" create_secret "${SECRET_KEY_SECRET}" "${REMOTE_DIR}/secrets/secret_key.txt" create_secret "${EMAIL_HOST_PASSWORD_SECRET}" "${REMOTE_DIR}/secrets/email_host_password.txt" create_secret "${FCM_SERVER_KEY_SECRET}" "${REMOTE_DIR}/secrets/fcm_server_key.txt" create_secret "${APNS_AUTH_KEY_SECRET}" "${REMOTE_DIR}/secrets/apns_auth_key.p8" rm -f "${REMOTE_DIR}/secrets/postgres_password.txt" rm -f "${REMOTE_DIR}/secrets/secret_key.txt" rm -f "${REMOTE_DIR}/secrets/email_host_password.txt" rm -f "${REMOTE_DIR}/secrets/fcm_server_key.txt" rm -f "${REMOTE_DIR}/secrets/apns_auth_key.p8" set -a # shellcheck disable=SC1090 source "${REMOTE_DIR}/prod.env" # shellcheck disable=SC1090 source "${REMOTE_DIR}/runtime.env" set +a docker stack deploy --with-registry-auth -c "${REMOTE_DIR}/swarm-stack.prod.yml" "${STACK_NAME}" EOF log "Waiting for stack convergence (${DEPLOY_WAIT_SECONDS}s max)" start_epoch="$(date +%s)" while true; do services="$(ssh "${SSH_OPTS[@]}" "${SSH_TARGET}" "docker stack services '${DEPLOY_STACK_NAME}' --format '{{.Name}} {{.Replicas}}'" 2>/dev/null || true)" if [[ -n "${services}" ]]; then all_ready=1 while IFS=' ' read -r svc replicas; do [[ -z "${svc}" ]] && continue current="${replicas%%/*}" desired="${replicas##*/}" if [[ "${desired}" == "0" ]]; then continue fi if [[ "${current}" != "${desired}" ]]; then all_ready=0 fi done <<< "${services}" if [[ "${all_ready}" -eq 1 ]]; then break fi fi now_epoch="$(date +%s)" elapsed=$((now_epoch - start_epoch)) if (( elapsed >= DEPLOY_WAIT_SECONDS )); then die "Timed out waiting for stack to converge. Check: ssh ${SSH_TARGET} docker stack services ${DEPLOY_STACK_NAME}" fi sleep 10 done log "Pruning old secret versions (keeping last ${SECRET_KEEP_VERSIONS})" ssh "${SSH_OPTS[@]}" "${SSH_TARGET}" "bash -s -- '${DEPLOY_STACK_NAME}' '${SECRET_KEEP_VERSIONS}'" <<'EOF' || warn "Secret pruning reported errors (non-fatal)" set -euo pipefail STACK_NAME="$1" KEEP="$2" prune_prefix() { local prefix="$1" # List matching secrets with creation time, sorted newest-first. local all all="$(docker secret ls --format '{{.CreatedAt}}|{{.Name}}' 2>/dev/null \ | grep "|${prefix}_" \ | sort -r \ || true)" if [[ -z "${all}" ]]; then return 0 fi local total total="$(printf '%s\n' "${all}" | wc -l | tr -d ' ')" if (( total <= KEEP )); then echo "[cleanup] ${prefix}: ${total} version(s) — nothing to prune" return 0 fi local to_remove to_remove="$(printf '%s\n' "${all}" | tail -n +$((KEEP + 1)) | awk -F'|' '{print $2}')" while IFS= read -r name; do [[ -z "${name}" ]] && continue if docker secret rm "${name}" >/dev/null 2>&1; then echo "[cleanup] removed: ${name}" else echo "[cleanup] in-use (kept): ${name}" fi done <<< "${to_remove}" } for base in postgres_password secret_key email_host_password fcm_server_key apns_auth_key; do prune_prefix "${STACK_NAME}_${base}" done EOF rollback_stack() { warn "Rolling back stack ${DEPLOY_STACK_NAME} on ${SSH_TARGET}" ssh "${SSH_OPTS[@]}" "${SSH_TARGET}" "bash -s -- '${DEPLOY_STACK_NAME}'" <<'EOF' || true set +e STACK="$1" for svc in $(docker stack services "${STACK}" --format '{{.Name}}'); do echo "[rollback] ${svc}" docker service rollback "${svc}" || echo "[rollback] ${svc}: nothing to roll back" done EOF } if [[ "${SKIP_HEALTHCHECK}" != "1" && -n "${DEPLOY_HEALTHCHECK_URL:-}" ]]; then log "Running health check: ${DEPLOY_HEALTHCHECK_URL}" if ! curl -fsS --max-time 20 "${DEPLOY_HEALTHCHECK_URL}" >/dev/null; then warn "Health check FAILED for ${DEPLOY_HEALTHCHECK_URL}" rollback_stack die "Deploy rolled back due to failed health check." fi fi # Best-effort registry logout — the token should not linger in # ~/.docker/config.json after deploy completes. Failures are non-fatal. log "Logging out of registry (local + remote)" docker logout "${REGISTRY}" >/dev/null 2>&1 || true ssh "${SSH_OPTS[@]}" "${SSH_TARGET}" "docker logout '${REGISTRY}' >/dev/null 2>&1 || true" log "Deploy completed successfully." log "Stack: ${DEPLOY_STACK_NAME}" log "Images:" log " ${API_IMAGE}" log " ${WORKER_IMAGE}" log " ${ADMIN_IMAGE}"