Compare commits
60 Commits
15359401fa
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 225fb1306b | |||
| b54493f785 | |||
| 3b2ea9959a | |||
| cf054959bd | |||
| 12de5a230a | |||
| 25897e913e | |||
| 81e454d86d | |||
| 7b87f2e392 | |||
| 6de90acef7 | |||
| 64c656bde1 | |||
| d74cfeee62 | |||
| 52bf1ff3c7 | |||
| e448ec66dc | |||
| 3d3ba84df0 | |||
| 81578f6e27 | |||
| b66151ddd9 | |||
| c845771946 | |||
| 93fddc3769 | |||
| c77ff07ce9 | |||
| 2004f9c5b2 | |||
| 139a990ebc | |||
| 7cc5448a7c | |||
| 5d8559b495 | |||
| 191c9b08e0 | |||
| 4efc87559a | |||
| 1347ffadf5 | |||
| 14026251b7 | |||
| b7f83293b8 | |||
| 29c9014a33 | |||
| 9bee436e86 | |||
| 0798ae8d74 | |||
| ce4d49caef | |||
| cb1dc383b4 | |||
| 8fce568532 | |||
| 289a23f7e6 | |||
| 8d9ca2e6ed | |||
| 0f7450ada9 | |||
| 12b2f9d43b | |||
| d96f317d20 | |||
| 4049b704c3 | |||
| a94744061e | |||
| 30966c6f5e | |||
| b67f7f9e6b | |||
| c9ac273dbd | |||
| 88fb1751c7 | |||
| 9410da7497 | |||
| d9b5f85c3d | |||
| e881d37de0 | |||
| 65a9aae4e5 | |||
| 3f5bf21e09 | |||
| bc3da007db | |||
| 77cfcc0b27 | |||
| d3708e6c72 | |||
| 372d4d2d37 | |||
| df78d9ccd8 | |||
| 1cd6cafa9d | |||
| 57cef36379 | |||
| 9ea058347f | |||
| 7e77e3bbab | |||
| ace03d2340 |
+11
-1
@@ -28,12 +28,22 @@ EMAIL_HOST_USER=your-email@gmail.com
|
|||||||
EMAIL_HOST_PASSWORD=your-app-password
|
EMAIL_HOST_PASSWORD=your-app-password
|
||||||
DEFAULT_FROM_EMAIL=honeyDue <noreply@honeyDue.treytartt.com>
|
DEFAULT_FROM_EMAIL=honeyDue <noreply@honeyDue.treytartt.com>
|
||||||
|
|
||||||
|
# Sign in with Apple
|
||||||
|
# APPLE_CLIENT_ID must equal the iOS bundle ID of the build hitting this
|
||||||
|
# backend. The Apple identity-token `aud` claim is checked against it
|
||||||
|
# (see internal/services/apple_auth.go::verifyAudience). With DEBUG=false
|
||||||
|
# an empty value rejects every Apple token.
|
||||||
|
# Release builds: com.myhoneydue.honeyDue
|
||||||
|
# Debug builds: com.myhoneydue.honeyDue.dev
|
||||||
|
APPLE_CLIENT_ID=com.myhoneydue.honeyDue.dev
|
||||||
|
APPLE_TEAM_ID=X86BR9WTLD
|
||||||
|
|
||||||
# APNs Settings (iOS Push Notifications)
|
# APNs Settings (iOS Push Notifications)
|
||||||
# Direct APNs integration - no external push server needed
|
# Direct APNs integration - no external push server needed
|
||||||
APNS_AUTH_KEY_PATH=/path/to/AuthKey_XXXXXX.p8
|
APNS_AUTH_KEY_PATH=/path/to/AuthKey_XXXXXX.p8
|
||||||
APNS_AUTH_KEY_ID=XXXXXXXXXX
|
APNS_AUTH_KEY_ID=XXXXXXXXXX
|
||||||
APNS_TEAM_ID=XXXXXXXXXX
|
APNS_TEAM_ID=XXXXXXXXXX
|
||||||
APNS_TOPIC=com.tt.honeyDue
|
APNS_TOPIC=com.myhoneydue.honeyDue.dev
|
||||||
APNS_PRODUCTION=false # Set to true for production APNs, false for sandbox
|
APNS_PRODUCTION=false # Set to true for production APNs, false for sandbox
|
||||||
|
|
||||||
# FCM Settings (Android Push Notifications)
|
# FCM Settings (Android Push Notifications)
|
||||||
|
|||||||
@@ -8,6 +8,9 @@ bin/
|
|||||||
/api
|
/api
|
||||||
/worker
|
/worker
|
||||||
/admin
|
/admin
|
||||||
|
/admin-reset
|
||||||
|
/notif-diag
|
||||||
|
/send-test-push
|
||||||
!admin/
|
!admin/
|
||||||
*.exe
|
*.exe
|
||||||
*.exe~
|
*.exe~
|
||||||
@@ -42,3 +45,4 @@ push_certs/
|
|||||||
|
|
||||||
# Vendor (if not using go modules)
|
# Vendor (if not using go modules)
|
||||||
# vendor/
|
# vendor/
|
||||||
|
/migrate
|
||||||
|
|||||||
+19
-3
@@ -1,5 +1,5 @@
|
|||||||
# Admin panel build stage
|
# Admin panel build stage
|
||||||
FROM node:20-alpine AS admin-builder
|
FROM node:20-alpine@sha256:fb4cd12c85ee03686f6af5362a0b0d56d50c58a04632e6c0fb8363f609372293 AS admin-builder
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@@ -49,6 +49,19 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -ldflags="-w -s" -o /
|
|||||||
# Build the worker binary
|
# Build the worker binary
|
||||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -ldflags="-w -s" -o /app/worker ./cmd/worker
|
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -ldflags="-w -s" -o /app/worker ./cmd/worker
|
||||||
|
|
||||||
|
# Install goose CLI for production migrations. Pinned to a specific version
|
||||||
|
# so an upstream behavioural change can't break a deploy unannounced.
|
||||||
|
# Bumping is a deliberate, reviewable diff. We `go build` rather than
|
||||||
|
# `go install` so the output path is predictable across host platforms —
|
||||||
|
# `go install` with cross-compile env vars drops the binary in
|
||||||
|
# /go/bin/<goos>_<goarch>/, which is awkward to COPY from.
|
||||||
|
RUN cd /tmp && \
|
||||||
|
git clone --depth=1 --branch=v3.22.1 https://github.com/pressly/goose.git goose-src && \
|
||||||
|
cd goose-src && \
|
||||||
|
CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} \
|
||||||
|
go build -ldflags="-w -s" -o /app/goose ./cmd/goose && \
|
||||||
|
cd / && rm -rf /tmp/goose-src
|
||||||
|
|
||||||
# Base runtime stage for Go services
|
# Base runtime stage for Go services
|
||||||
FROM alpine:3.19 AS go-base
|
FROM alpine:3.19 AS go-base
|
||||||
|
|
||||||
@@ -64,6 +77,9 @@ WORKDIR /app
|
|||||||
# Copy all binaries from builder
|
# Copy all binaries from builder
|
||||||
COPY --from=builder /app/api /app/api
|
COPY --from=builder /app/api /app/api
|
||||||
COPY --from=builder /app/worker /app/worker
|
COPY --from=builder /app/worker /app/worker
|
||||||
|
# goose is the migration runner — same image is reused as the migrate Job
|
||||||
|
# entrypoint via `command: ["/usr/local/bin/goose", ...]`.
|
||||||
|
COPY --from=builder /app/goose /usr/local/bin/goose
|
||||||
|
|
||||||
# Copy templates directory
|
# Copy templates directory
|
||||||
COPY --from=builder /app/templates /app/templates
|
COPY --from=builder /app/templates /app/templates
|
||||||
@@ -93,7 +109,7 @@ FROM go-base AS worker
|
|||||||
CMD ["/app/worker"]
|
CMD ["/app/worker"]
|
||||||
|
|
||||||
# Admin panel runtime stage
|
# Admin panel runtime stage
|
||||||
FROM node:20-alpine AS admin
|
FROM node:20-alpine@sha256:fb4cd12c85ee03686f6af5362a0b0d56d50c58a04632e6c0fb8363f609372293 AS admin
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@@ -115,7 +131,7 @@ ENV HOSTNAME="0.0.0.0"
|
|||||||
CMD ["node", "server.js"]
|
CMD ["node", "server.js"]
|
||||||
|
|
||||||
# Default production stage (for Dokku - runs API + Admin)
|
# Default production stage (for Dokku - runs API + Admin)
|
||||||
FROM node:20-alpine AS production
|
FROM node:20-alpine@sha256:fb4cd12c85ee03686f6af5362a0b0d56d50c58a04632e6c0fb8363f609372293 AS production
|
||||||
|
|
||||||
# Install runtime dependencies
|
# Install runtime dependencies
|
||||||
RUN apk add --no-cache ca-certificates tzdata curl
|
RUN apk add --no-cache ca-certificates tzdata curl
|
||||||
|
|||||||
@@ -89,15 +89,36 @@ docker-build-prod:
|
|||||||
docker build --target worker -t $${REGISTRY:-ghcr.io/treytartt}/honeydue-worker:$${TAG:-latest} .
|
docker build --target worker -t $${REGISTRY:-ghcr.io/treytartt}/honeydue-worker:$${TAG:-latest} .
|
||||||
docker build --target admin -t $${REGISTRY:-ghcr.io/treytartt}/honeydue-admin:$${TAG:-latest} .
|
docker build --target admin -t $${REGISTRY:-ghcr.io/treytartt}/honeydue-admin:$${TAG:-latest} .
|
||||||
|
|
||||||
# Database migrations
|
# Database migrations (goose)
|
||||||
|
#
|
||||||
|
# DATABASE_URL must point at the *direct* (non-pooler) Neon endpoint —
|
||||||
|
# goose's session-scoped advisory lock won't survive PgBouncer transaction
|
||||||
|
# mode. Example:
|
||||||
|
# export DATABASE_URL='host=ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech \
|
||||||
|
# user=neondb_owner password=... dbname=honeyDue sslmode=require'
|
||||||
|
#
|
||||||
|
# Bootstrap (one-time, when adopting goose against an existing DB):
|
||||||
|
# make migrate-status # creates goose_db_version
|
||||||
|
# psql ... -c "INSERT INTO goose_db_version (version_id, is_applied, tstamp) VALUES (1, true, NOW());"
|
||||||
|
#
|
||||||
|
# Day-to-day:
|
||||||
|
# make migrate-status # show what's pending
|
||||||
|
# make migrate-up # apply pending migrations
|
||||||
|
# make migrate-down # roll back the latest migration
|
||||||
|
# make migrate-new name=add_some_column # scaffold a new SQL migration
|
||||||
|
|
||||||
migrate-up:
|
migrate-up:
|
||||||
migrate -path migrations -database "$(DATABASE_URL)" up
|
goose -dir migrations postgres "$(DATABASE_URL)" up
|
||||||
|
|
||||||
migrate-down:
|
migrate-down:
|
||||||
migrate -path migrations -database "$(DATABASE_URL)" down
|
goose -dir migrations postgres "$(DATABASE_URL)" down
|
||||||
|
|
||||||
migrate-create:
|
migrate-status:
|
||||||
migrate create -ext sql -dir migrations -seq $(name)
|
goose -dir migrations postgres "$(DATABASE_URL)" status
|
||||||
|
|
||||||
|
migrate-new:
|
||||||
|
@if [ -z "$(name)" ]; then echo "usage: make migrate-new name=<short_name>"; exit 1; fi
|
||||||
|
goose -dir migrations create $(name) sql
|
||||||
|
|
||||||
# Encrypt existing uploads at rest (run after setting STORAGE_ENCRYPTION_KEY)
|
# Encrypt existing uploads at rest (run after setting STORAGE_ENCRYPTION_KEY)
|
||||||
migrate-encrypt:
|
migrate-encrypt:
|
||||||
|
|||||||
@@ -184,6 +184,15 @@ needed for local dev. For the complete production env var reference
|
|||||||
|
|
||||||
Leave all four `B2_*` empty in dev to fall back to a local `/app/uploads` volume.
|
Leave all four `B2_*` empty in dev to fall back to a local `/app/uploads` volume.
|
||||||
|
|
||||||
|
**Upload architecture (since `b7f8329`)**: Image and document uploads go
|
||||||
|
**directly from the client to B2** via a presigned POST policy issued by
|
||||||
|
`POST /api/uploads/presign`. Bytes never traverse the api server. B2
|
||||||
|
enforces a 10 MB per-object cap at the protocol level. The worker reaps
|
||||||
|
orphaned upload sessions hourly via the `maintenance:upload_cleanup`
|
||||||
|
cron. See [`docs/deployment/09-storage.md`](./docs/deployment/09-storage.md)
|
||||||
|
for the full flow, and [`docs/deployment/14-deployment-process.md`](./docs/deployment/14-deployment-process.md#one-time-b2-bucket-lifecycle-manual)
|
||||||
|
for the one-time bucket lifecycle setup.
|
||||||
|
|
||||||
### Worker schedules (UTC hours)
|
### Worker schedules (UTC hours)
|
||||||
|
|
||||||
| Variable | Description | Default |
|
| Variable | Description | Default |
|
||||||
@@ -349,7 +358,11 @@ All protected endpoints require an `Authorization: Token <token>` header.
|
|||||||
|
|
||||||
Production runs on a **3-node K3s HA cluster** on Hetzner Cloud, fronted
|
Production runs on a **3-node K3s HA cluster** on Hetzner Cloud, fronted
|
||||||
by Cloudflare, with Neon Postgres, Backblaze B2, and a self-hosted Gitea
|
by Cloudflare, with Neon Postgres, Backblaze B2, and a self-hosted Gitea
|
||||||
container registry. See the full deployment book for every detail:
|
container registry. Live observability (VictoriaMetrics + Jaeger +
|
||||||
|
Grafana) runs on a separate Linode VPS at
|
||||||
|
[`grafana.88oakapps.com`](https://grafana.88oakapps.com) and is fed by a
|
||||||
|
`vmagent` sidecar in-cluster. See the full deployment book for every
|
||||||
|
detail:
|
||||||
|
|
||||||
**→ [docs/deployment/](./docs/deployment/README.md) — The Deployment Book**
|
**→ [docs/deployment/](./docs/deployment/README.md) — The Deployment Book**
|
||||||
|
|
||||||
@@ -371,7 +384,10 @@ Quick links:
|
|||||||
|
|
||||||
- **Runbook** — [docs/deployment/17-runbook.md](./docs/deployment/17-runbook.md) — 22 common ops procedures
|
- **Runbook** — [docs/deployment/17-runbook.md](./docs/deployment/17-runbook.md) — 22 common ops procedures
|
||||||
- **kubectl cheat sheet** — [docs/deployment/appendices/b-commands.md](./docs/deployment/appendices/b-commands.md)
|
- **kubectl cheat sheet** — [docs/deployment/appendices/b-commands.md](./docs/deployment/appendices/b-commands.md)
|
||||||
- **Deploy process** — [docs/deployment/14-deployment-process.md](./docs/deployment/14-deployment-process.md) — build → push → rollout
|
- **Deploy process** — [docs/deployment/14-deployment-process.md](./docs/deployment/14-deployment-process.md) — `bash deploy-k3s/scripts/03-deploy.sh` builds → pushes → rolls out
|
||||||
|
- **Observability** — [docs/deployment/15-observability.md](./docs/deployment/15-observability.md) — VictoriaMetrics + Jaeger + Grafana on `obs.88oakapps.com`
|
||||||
|
- **Observability plan** — [docs/observability-plan.md](./docs/observability-plan.md) — design doc and rollout phases
|
||||||
|
- **Database / pool tuning** — [docs/deployment/08-database.md](./docs/deployment/08-database.md) — Neon pooler endpoint, GORM pool, warm-up, RTT budget
|
||||||
- **Failure modes** — [docs/deployment/16-failure-modes.md](./docs/deployment/16-failure-modes.md) — what happens when X dies
|
- **Failure modes** — [docs/deployment/16-failure-modes.md](./docs/deployment/16-failure-modes.md) — what happens when X dies
|
||||||
- **Swarm postmortem** — [docs/deployment/19-postmortem-swarm.md](./docs/deployment/19-postmortem-swarm.md) — why we migrated
|
- **Swarm postmortem** — [docs/deployment/19-postmortem-swarm.md](./docs/deployment/19-postmortem-swarm.md) — why we migrated
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,257 @@
|
|||||||
|
// admin-reset is a one-off CLI for resetting an admin_users row's password.
|
||||||
|
//
|
||||||
|
// It reads DB connection settings from environment variables (the same names
|
||||||
|
// the API uses), looks up the admin user by email, prompts for a new password
|
||||||
|
// twice (no echo), bcrypts it, and updates the row. Safe to keep in the repo
|
||||||
|
// — running it requires DB credentials.
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// # load env (host, user, db, sslmode) and password from secrets file
|
||||||
|
// set -a && source deploy/prod.env && set +a
|
||||||
|
// go run ./cmd/admin-reset
|
||||||
|
//
|
||||||
|
// # or with a non-default secrets path / different admin
|
||||||
|
// go run ./cmd/admin-reset --password-file path/to/postgres_password.txt
|
||||||
|
// go run ./cmd/admin-reset --email someone@example.com
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/rs/zerolog"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
"golang.org/x/crypto/bcrypt"
|
||||||
|
"golang.org/x/term"
|
||||||
|
"gorm.io/driver/postgres"
|
||||||
|
"gorm.io/gorm"
|
||||||
|
"gorm.io/gorm/logger"
|
||||||
|
|
||||||
|
"github.com/treytartt/honeydue-api/internal/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
const minPasswordLen = 12
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
email := flag.String("email", "admin@myhoneydue.com", "Admin email to reset")
|
||||||
|
passwordFile := flag.String("password-file", "deploy/secrets/postgres_password.txt",
|
||||||
|
"Path to file containing POSTGRES_PASSWORD (used if env var is empty)")
|
||||||
|
list := flag.Bool("list", false, "List all rows in admin_users and exit (no changes)")
|
||||||
|
verify := flag.Bool("verify", false, "Prompt for a password and check it against the stored hash; no changes")
|
||||||
|
newEmail := flag.String("new-email", "", "If set: rename the matched admin's email to this value and exit (no password change)")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339})
|
||||||
|
|
||||||
|
dsn, host, err := buildDSN(*passwordFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to build database DSN")
|
||||||
|
}
|
||||||
|
|
||||||
|
db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
|
||||||
|
Logger: logger.Default.LogMode(logger.Silent),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to connect to database")
|
||||||
|
}
|
||||||
|
|
||||||
|
if *list {
|
||||||
|
var admins []models.AdminUser
|
||||||
|
if err := db.Order("id").Find(&admins).Error; err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to list admin users")
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "DB host: %s\n%d admin user(s):\n\n", host, len(admins))
|
||||||
|
fmt.Fprintf(os.Stderr, "%-4s %-40s %-12s %-6s %s\n", "ID", "EMAIL", "ROLE", "ACTIVE", "LAST_LOGIN")
|
||||||
|
for _, a := range admins {
|
||||||
|
last := "-"
|
||||||
|
if a.LastLogin != nil {
|
||||||
|
last = a.LastLogin.Format(time.RFC3339)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "%-4d %-40s %-12s %-6t %s\n", a.ID, a.Email, a.Role, a.IsActive, last)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mirror the live API's case-insensitive lookup so --verify reflects what
|
||||||
|
// /api/admin/auth/login actually does. The reset path uses the same query
|
||||||
|
// for consistency.
|
||||||
|
var admin models.AdminUser
|
||||||
|
if err := db.Where("LOWER(email) = LOWER(?)", *email).First(&admin).Error; err != nil {
|
||||||
|
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||||
|
log.Fatal().Str("email", *email).Msg("admin user not found (try --list to see existing rows)")
|
||||||
|
}
|
||||||
|
log.Fatal().Err(err).Msg("failed to look up admin user")
|
||||||
|
}
|
||||||
|
|
||||||
|
if *newEmail != "" {
|
||||||
|
target := strings.TrimSpace(*newEmail)
|
||||||
|
if target == "" || !strings.Contains(target, "@") {
|
||||||
|
log.Fatal().Str("new_email", *newEmail).Msg("--new-email must be a valid email address")
|
||||||
|
}
|
||||||
|
if strings.EqualFold(target, admin.Email) {
|
||||||
|
fmt.Fprintf(os.Stderr, "No change — current email already matches %q\n", target)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Catch the unique-index conflict early with a clear message instead of a Postgres error.
|
||||||
|
var collisionCount int64
|
||||||
|
if err := db.Model(&models.AdminUser{}).
|
||||||
|
Where("LOWER(email) = LOWER(?) AND id <> ?", target, admin.ID).
|
||||||
|
Count(&collisionCount).Error; err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to check for email collision")
|
||||||
|
}
|
||||||
|
if collisionCount > 0 {
|
||||||
|
log.Fatal().Str("new_email", target).Msg("another admin row already uses this email — aborting")
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "Renaming admin email: %s → %s (id=%d)\n", admin.Email, target, admin.ID)
|
||||||
|
fmt.Fprintf(os.Stderr, "DB host: %s\n\n", host)
|
||||||
|
res := db.Model(&models.AdminUser{}).
|
||||||
|
Where("id = ?", admin.ID).
|
||||||
|
Updates(map[string]any{
|
||||||
|
"email": target,
|
||||||
|
"updated_at": time.Now().UTC(),
|
||||||
|
})
|
||||||
|
if res.Error != nil {
|
||||||
|
log.Fatal().Err(res.Error).Msg("failed to rename admin email")
|
||||||
|
}
|
||||||
|
if res.RowsAffected != 1 {
|
||||||
|
log.Fatal().Int64("rows", res.RowsAffected).Msg("expected exactly 1 row updated")
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "OK — email is now %s\n", target)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if *verify {
|
||||||
|
fmt.Fprintf(os.Stderr, "Verifying password for: %s (id=%d, role=%s, active=%t)\n",
|
||||||
|
admin.Email, admin.ID, admin.Role, admin.IsActive)
|
||||||
|
fmt.Fprintf(os.Stderr, "DB host: %s\n\n", host)
|
||||||
|
|
||||||
|
pw, err := readPassword("Password: ")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to read password")
|
||||||
|
}
|
||||||
|
if admin.CheckPassword(pw) {
|
||||||
|
fmt.Fprintln(os.Stderr, "PASS — bcrypt hash matches the supplied password")
|
||||||
|
if !admin.IsActive {
|
||||||
|
fmt.Fprintln(os.Stderr, "WARNING: is_active = false — login will still be rejected with \"Account is disabled\"")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fmt.Fprintln(os.Stderr, "FAIL — bcrypt hash does NOT match the supplied password")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "Resetting password for: %s (id=%d, role=%s, active=%t)\n",
|
||||||
|
admin.Email, admin.ID, admin.Role, admin.IsActive)
|
||||||
|
fmt.Fprintf(os.Stderr, "DB host: %s\n\n", host)
|
||||||
|
|
||||||
|
pw1, err := readPassword("New password: ")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to read password")
|
||||||
|
}
|
||||||
|
if len(pw1) < minPasswordLen {
|
||||||
|
log.Fatal().Int("min", minPasswordLen).Msg("password too short")
|
||||||
|
}
|
||||||
|
|
||||||
|
pw2, err := readPassword("Confirm password: ")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to read password")
|
||||||
|
}
|
||||||
|
if pw1 != pw2 {
|
||||||
|
log.Fatal().Msg("passwords do not match")
|
||||||
|
}
|
||||||
|
|
||||||
|
hash, err := bcrypt.GenerateFromPassword([]byte(pw1), bcrypt.DefaultCost)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to hash password")
|
||||||
|
}
|
||||||
|
|
||||||
|
res := db.Model(&models.AdminUser{}).
|
||||||
|
Where("id = ?", admin.ID).
|
||||||
|
Updates(map[string]any{
|
||||||
|
"password": string(hash),
|
||||||
|
"updated_at": time.Now().UTC(),
|
||||||
|
})
|
||||||
|
if res.Error != nil {
|
||||||
|
log.Fatal().Err(res.Error).Msg("failed to update admin user")
|
||||||
|
}
|
||||||
|
if res.RowsAffected != 1 {
|
||||||
|
log.Fatal().Int64("rows", res.RowsAffected).Msg("expected exactly 1 row updated")
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(os.Stderr, "\nOK — password reset for %s\n", admin.Email)
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDSN(passwordFile string) (dsn, host string, err error) {
|
||||||
|
host = os.Getenv("DB_HOST")
|
||||||
|
user := os.Getenv("POSTGRES_USER")
|
||||||
|
dbname := os.Getenv("POSTGRES_DB")
|
||||||
|
sslmode := os.Getenv("DB_SSLMODE")
|
||||||
|
if sslmode == "" {
|
||||||
|
sslmode = "require"
|
||||||
|
}
|
||||||
|
|
||||||
|
port := 5432
|
||||||
|
if s := os.Getenv("DB_PORT"); s != "" {
|
||||||
|
p, perr := strconv.Atoi(s)
|
||||||
|
if perr != nil {
|
||||||
|
return "", "", fmt.Errorf("invalid DB_PORT %q: %w", s, perr)
|
||||||
|
}
|
||||||
|
port = p
|
||||||
|
}
|
||||||
|
|
||||||
|
password := os.Getenv("POSTGRES_PASSWORD")
|
||||||
|
if password == "" && passwordFile != "" {
|
||||||
|
b, rerr := os.ReadFile(passwordFile)
|
||||||
|
if rerr != nil {
|
||||||
|
return "", "", fmt.Errorf("POSTGRES_PASSWORD not set and could not read %s: %w", passwordFile, rerr)
|
||||||
|
}
|
||||||
|
password = strings.TrimRight(string(b), "\r\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
missing := []string{}
|
||||||
|
if host == "" {
|
||||||
|
missing = append(missing, "DB_HOST")
|
||||||
|
}
|
||||||
|
if user == "" {
|
||||||
|
missing = append(missing, "POSTGRES_USER")
|
||||||
|
}
|
||||||
|
if dbname == "" {
|
||||||
|
missing = append(missing, "POSTGRES_DB")
|
||||||
|
}
|
||||||
|
if password == "" {
|
||||||
|
missing = append(missing, "POSTGRES_PASSWORD")
|
||||||
|
}
|
||||||
|
if len(missing) > 0 {
|
||||||
|
return "", "", fmt.Errorf("missing required env vars: %s", strings.Join(missing, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
|
dsn = fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=%s",
|
||||||
|
host, port, user, password, dbname, sslmode)
|
||||||
|
return dsn, host, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readPassword(prompt string) (string, error) {
|
||||||
|
fmt.Fprint(os.Stderr, prompt)
|
||||||
|
if term.IsTerminal(int(os.Stdin.Fd())) {
|
||||||
|
b, err := term.ReadPassword(int(os.Stdin.Fd()))
|
||||||
|
fmt.Fprintln(os.Stderr)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return strings.TrimRight(string(b), "\r\n"), nil
|
||||||
|
}
|
||||||
|
s, err := bufio.NewReader(os.Stdin).ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return strings.TrimRight(s, "\r\n"), nil
|
||||||
|
}
|
||||||
+74
-5
@@ -9,6 +9,7 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hibiken/asynq"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
|
|
||||||
@@ -19,6 +20,8 @@ import (
|
|||||||
"github.com/treytartt/honeydue-api/internal/push"
|
"github.com/treytartt/honeydue-api/internal/push"
|
||||||
"github.com/treytartt/honeydue-api/internal/router"
|
"github.com/treytartt/honeydue-api/internal/router"
|
||||||
"github.com/treytartt/honeydue-api/internal/services"
|
"github.com/treytartt/honeydue-api/internal/services"
|
||||||
|
"github.com/treytartt/honeydue-api/internal/tracing"
|
||||||
|
"github.com/treytartt/honeydue-api/internal/worker"
|
||||||
"github.com/treytartt/honeydue-api/pkg/utils"
|
"github.com/treytartt/honeydue-api/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -50,6 +53,29 @@ func main() {
|
|||||||
Str("redis_url", config.MaskURLCredentials(cfg.Redis.URL)).
|
Str("redis_url", config.MaskURLCredentials(cfg.Redis.URL)).
|
||||||
Msg("Starting HoneyDue API server")
|
Msg("Starting HoneyDue API server")
|
||||||
|
|
||||||
|
// Initialize OpenTelemetry tracing — exports to obs.88oakapps.com
|
||||||
|
// (Jaeger via OTLP/HTTP) when OBS_TRACES_URL is set; otherwise installs
|
||||||
|
// a no-op tracer so call sites can use otel.Tracer() unconditionally.
|
||||||
|
// config.SecretValue (not os.Getenv) so file-mounted secrets resolve
|
||||||
|
// after audit F8 removed these from the process environment.
|
||||||
|
tracingShutdown, err := tracing.Init(context.Background(), tracing.Config{
|
||||||
|
ServiceName: "honeydue-api",
|
||||||
|
Environment: deploymentEnvironment(cfg.Server.Debug),
|
||||||
|
EndpointURL: config.SecretValue("OBS_TRACES_URL"),
|
||||||
|
BearerToken: config.SecretValue("OBS_INGEST_TOKEN"),
|
||||||
|
SampleRatio: tracing.SampleRatioFromEnv(),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Msg("tracing init failed — continuing without traces")
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := tracingShutdown(shutdownCtx); err != nil {
|
||||||
|
log.Warn().Err(err).Msg("tracing shutdown error")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
// Connect to database (retry with backoff)
|
// Connect to database (retry with backoff)
|
||||||
var db *gorm.DB
|
var db *gorm.DB
|
||||||
var dbErr error
|
var dbErr error
|
||||||
@@ -65,11 +91,14 @@ func main() {
|
|||||||
log.Error().Err(dbErr).Msg("Failed to connect to database - API will start but database operations will fail")
|
log.Error().Err(dbErr).Msg("Failed to connect to database - API will start but database operations will fail")
|
||||||
} else {
|
} else {
|
||||||
defer database.Close()
|
defer database.Close()
|
||||||
// Run database migrations only if connected.
|
// Migrations are managed out-of-band by golang-migrate (see
|
||||||
// MigrateWithLock serialises parallel replica starts via a Postgres
|
// cmd/migrate and deploy-k3s/manifests/migrate/job.yaml) so the api
|
||||||
// advisory lock so concurrent AutoMigrate calls don't race on DDL.
|
// no longer runs AutoMigrate at startup. Instead we verify the
|
||||||
if err := database.MigrateWithLock(); err != nil {
|
// schema is at the expected version and refuse to start if not —
|
||||||
log.Error().Err(err).Msg("Failed to run database migrations")
|
// this catches the "operator forgot to run migrate" footgun loudly,
|
||||||
|
// at boot, instead of with mysterious runtime errors.
|
||||||
|
if err := database.RequireSchemaApplied(); err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("Schema precondition failed — run `kubectl -n honeydue create job --from=cronjob/honeydue-migrate` (or `make migrate-up` locally) and retry")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,6 +196,28 @@ func main() {
|
|||||||
Msg("Push notification client initialized")
|
Msg("Push notification client initialized")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize Asynq enqueuer (api-side). Used by services that move
|
||||||
|
// long-running work off the request path (currently: task-completion
|
||||||
|
// notification fan-out). Same Redis as cmd/worker — file-mounted password
|
||||||
|
// applied separately because cfg.Redis.URL does not embed it (audit HIGH-1).
|
||||||
|
var taskEnqueuer *worker.TaskClient
|
||||||
|
if redisOpt, parseErr := asynq.ParseRedisURI(cfg.Redis.URL); parseErr != nil {
|
||||||
|
log.Warn().Err(parseErr).Msg("Failed to parse Redis URL for Asynq enqueuer — completion notifications will run inline")
|
||||||
|
} else if clientOpt, ok := redisOpt.(asynq.RedisClientOpt); ok {
|
||||||
|
if cfg.Redis.Password != "" {
|
||||||
|
clientOpt.Password = cfg.Redis.Password
|
||||||
|
}
|
||||||
|
taskEnqueuer = worker.NewTaskClient(clientOpt)
|
||||||
|
defer func() {
|
||||||
|
if cerr := taskEnqueuer.Close(); cerr != nil {
|
||||||
|
log.Warn().Err(cerr).Msg("Failed to close Asynq enqueuer on shutdown")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
log.Info().Msg("Asynq enqueuer initialized")
|
||||||
|
} else {
|
||||||
|
log.Warn().Msg("Redis opt is not RedisClientOpt — Asynq enqueuer skipped; completion notifications will run inline")
|
||||||
|
}
|
||||||
|
|
||||||
// Setup router with dependencies (includes admin panel at /admin)
|
// Setup router with dependencies (includes admin panel at /admin)
|
||||||
deps := &router.Dependencies{
|
deps := &router.Dependencies{
|
||||||
DB: db,
|
DB: db,
|
||||||
@@ -178,6 +229,12 @@ func main() {
|
|||||||
StorageService: storageService,
|
StorageService: storageService,
|
||||||
MonitoringService: monitoringService,
|
MonitoringService: monitoringService,
|
||||||
}
|
}
|
||||||
|
// Only assign the enqueuer when we actually constructed one. Assigning a
|
||||||
|
// nil *worker.TaskClient directly would create a typed-nil interface that
|
||||||
|
// fails the `if deps.TaskEnqueuer != nil` check in router.SetupRouter.
|
||||||
|
if taskEnqueuer != nil {
|
||||||
|
deps.TaskEnqueuer = taskEnqueuer
|
||||||
|
}
|
||||||
e := router.SetupRouter(deps)
|
e := router.SetupRouter(deps)
|
||||||
|
|
||||||
// Create HTTP server
|
// Create HTTP server
|
||||||
@@ -217,3 +274,15 @@ func main() {
|
|||||||
|
|
||||||
log.Info().Msg("Server exited")
|
log.Info().Msg("Server exited")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// deploymentEnvironment turns the boolean Debug flag into the conventional
|
||||||
|
// environment label spans get tagged with.
|
||||||
|
func deploymentEnvironment(debug bool) string {
|
||||||
|
if env := os.Getenv("DEPLOYMENT_ENVIRONMENT"); env != "" {
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
if debug {
|
||||||
|
return "dev"
|
||||||
|
}
|
||||||
|
return "prod"
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
// shouldInitEmail returns true if email config has host and user set.
|
||||||
|
func shouldInitEmail(host, user string) bool {
|
||||||
|
return host != "" && user != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// shouldInitStorage returns true if upload directory is configured.
|
||||||
|
func shouldInitStorage(uploadDir string) bool {
|
||||||
|
return uploadDir != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// shouldInitEncryption returns true if encryption key is set.
|
||||||
|
func shouldInitEncryption(encryptionKey string) bool {
|
||||||
|
return encryptionKey != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// connectWithRetry attempts a connection with exponential backoff.
|
||||||
|
// Returns nil on success or the last error after all retries fail.
|
||||||
|
func connectWithRetry(connect func() error, maxRetries int) error {
|
||||||
|
var err error
|
||||||
|
for i := 0; i < maxRetries; i++ {
|
||||||
|
err = connect()
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
time.Sleep(time.Duration(i+1) * time.Millisecond) // use ms in tests
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
@@ -0,0 +1,107 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// --- shouldInitEmail ---
|
||||||
|
|
||||||
|
func TestShouldInitEmail_BothSet_True(t *testing.T) {
|
||||||
|
if !shouldInitEmail("smtp.example.com", "user@example.com") {
|
||||||
|
t.Error("expected true when both set")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldInitEmail_MissingHost_False(t *testing.T) {
|
||||||
|
if shouldInitEmail("", "user@example.com") {
|
||||||
|
t.Error("expected false when host empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldInitEmail_MissingUser_False(t *testing.T) {
|
||||||
|
if shouldInitEmail("smtp.example.com", "") {
|
||||||
|
t.Error("expected false when user empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldInitEmail_BothEmpty_False(t *testing.T) {
|
||||||
|
if shouldInitEmail("", "") {
|
||||||
|
t.Error("expected false when both empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- shouldInitStorage ---
|
||||||
|
|
||||||
|
func TestShouldInitStorage_Set_True(t *testing.T) {
|
||||||
|
if !shouldInitStorage("/uploads") {
|
||||||
|
t.Error("expected true")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldInitStorage_Empty_False(t *testing.T) {
|
||||||
|
if shouldInitStorage("") {
|
||||||
|
t.Error("expected false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- shouldInitEncryption ---
|
||||||
|
|
||||||
|
func TestShouldInitEncryption_Set_True(t *testing.T) {
|
||||||
|
if !shouldInitEncryption("secret-key-123") {
|
||||||
|
t.Error("expected true")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldInitEncryption_Empty_False(t *testing.T) {
|
||||||
|
if shouldInitEncryption("") {
|
||||||
|
t.Error("expected false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- connectWithRetry ---
|
||||||
|
|
||||||
|
func TestConnectWithRetry_SucceedsFirst_NoRetry(t *testing.T) {
|
||||||
|
calls := 0
|
||||||
|
err := connectWithRetry(func() error {
|
||||||
|
calls++
|
||||||
|
return nil
|
||||||
|
}, 3)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if calls != 1 {
|
||||||
|
t.Errorf("calls = %d, want 1", calls)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConnectWithRetry_SucceedsSecond_OneRetry(t *testing.T) {
|
||||||
|
calls := 0
|
||||||
|
err := connectWithRetry(func() error {
|
||||||
|
calls++
|
||||||
|
if calls == 1 {
|
||||||
|
return errors.New("fail")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, 3)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if calls != 2 {
|
||||||
|
t.Errorf("calls = %d, want 2", calls)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConnectWithRetry_AllFail_ReturnsError(t *testing.T) {
|
||||||
|
calls := 0
|
||||||
|
err := connectWithRetry(func() error {
|
||||||
|
calls++
|
||||||
|
return errors.New("fail")
|
||||||
|
}, 3)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error")
|
||||||
|
}
|
||||||
|
if calls != 3 {
|
||||||
|
t.Errorf("calls = %d, want 3", calls)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,333 @@
|
|||||||
|
// notif-diag is a CLI for inspecting and (optionally) cleaning up stuck
|
||||||
|
// notification rows. Default mode is read-only — runs SELECTs and prints a
|
||||||
|
// summary. With --mark-failed-as-sent, marks pending rows that already have a
|
||||||
|
// recorded error as sent (cosmetic — no retry, no resend).
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// set -a && source deploy/prod.env && set +a
|
||||||
|
// go run ./cmd/notif-diag # diagnose
|
||||||
|
// go run ./cmd/notif-diag --mark-failed-as-sent --yes # clean up errored backlog
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/rs/zerolog"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
"gorm.io/driver/postgres"
|
||||||
|
"gorm.io/gorm"
|
||||||
|
"gorm.io/gorm/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
passwordFile := stringFlag("password-file", "deploy/secrets/postgres_password.txt",
|
||||||
|
"Path to file containing POSTGRES_PASSWORD (used if env var is empty)")
|
||||||
|
markFailed := boolFlag("mark-failed-as-sent",
|
||||||
|
"Mark every pending row with a non-empty error_message as sent. Cosmetic only — does not retry the push.")
|
||||||
|
yes := boolFlag("yes", "Skip the interactive confirmation prompt for destructive actions.")
|
||||||
|
|
||||||
|
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339})
|
||||||
|
|
||||||
|
dsn, host, err := buildDSN(*passwordFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to build database DSN")
|
||||||
|
}
|
||||||
|
|
||||||
|
db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
|
||||||
|
Logger: logger.Default.LogMode(logger.Silent),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to connect to database")
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("DB host: %s\n", host)
|
||||||
|
fmt.Println(strings.Repeat("=", 80))
|
||||||
|
|
||||||
|
overallTotals(db)
|
||||||
|
pendingByType(db)
|
||||||
|
recentPending(db)
|
||||||
|
deviceCounts(db)
|
||||||
|
|
||||||
|
if *markFailed {
|
||||||
|
markFailedAsSent(db, *yes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// markFailedAsSent updates pending rows whose error_message is non-empty,
|
||||||
|
// flipping them to sent=true with sent_at=updated_at. This is purely cosmetic:
|
||||||
|
// it removes them from the "pending" count so dashboards and the diag tool
|
||||||
|
// don't keep flagging an old, unfixable backlog. It does NOT re-send anything.
|
||||||
|
func markFailedAsSent(db *gorm.DB, skipPrompt bool) {
|
||||||
|
var candidate int64
|
||||||
|
if err := db.Raw(`
|
||||||
|
SELECT COUNT(*) FROM notifications_notification
|
||||||
|
WHERE sent = false AND error_message IS NOT NULL AND error_message <> ''
|
||||||
|
`).Scan(&candidate).Error; err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to count cleanup candidates")
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("\n# Cleanup candidate count: %d\n", candidate)
|
||||||
|
if candidate == 0 {
|
||||||
|
fmt.Println(" (nothing to clean up)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Println(" These rows have a recorded send error and will never be retried.")
|
||||||
|
fmt.Println(" Marking them sent=true is cosmetic — it just prevents them from")
|
||||||
|
fmt.Println(" showing up as pending in admin dashboards going forward.")
|
||||||
|
|
||||||
|
if !skipPrompt {
|
||||||
|
fmt.Printf("\nProceed? Type 'yes' to update %d rows: ", candidate)
|
||||||
|
s, err := bufio.NewReader(os.Stdin).ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("failed to read confirmation")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(s) != "yes" {
|
||||||
|
fmt.Println("Aborted.")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res := db.Exec(`
|
||||||
|
UPDATE notifications_notification
|
||||||
|
SET sent = true, sent_at = COALESCE(updated_at, NOW())
|
||||||
|
WHERE sent = false AND error_message IS NOT NULL AND error_message <> ''
|
||||||
|
`)
|
||||||
|
if res.Error != nil {
|
||||||
|
log.Fatal().Err(res.Error).Msg("failed to update rows")
|
||||||
|
}
|
||||||
|
fmt.Printf("OK — updated %d rows.\n", res.RowsAffected)
|
||||||
|
}
|
||||||
|
|
||||||
|
// overallTotals shows the high-level sent/pending/read split.
|
||||||
|
func overallTotals(db *gorm.DB) {
|
||||||
|
type row struct {
|
||||||
|
Total int64
|
||||||
|
Sent int64
|
||||||
|
Pending int64
|
||||||
|
Read int64
|
||||||
|
Errored int64
|
||||||
|
}
|
||||||
|
var r row
|
||||||
|
db.Raw(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS total,
|
||||||
|
COUNT(*) FILTER (WHERE sent = true) AS sent,
|
||||||
|
COUNT(*) FILTER (WHERE sent = false) AS pending,
|
||||||
|
COUNT(*) FILTER (WHERE read = true) AS read,
|
||||||
|
COUNT(*) FILTER (WHERE error_message IS NOT NULL AND error_message <> '') AS errored
|
||||||
|
FROM notifications_notification
|
||||||
|
`).Scan(&r)
|
||||||
|
|
||||||
|
fmt.Println("\n# Overall notification counts")
|
||||||
|
fmt.Printf(" total: %d\n", r.Total)
|
||||||
|
fmt.Printf(" sent: %d\n", r.Sent)
|
||||||
|
fmt.Printf(" pending: %d\n", r.Pending)
|
||||||
|
fmt.Printf(" read: %d\n", r.Read)
|
||||||
|
fmt.Printf(" errored: %d (rows with non-empty error_message)\n", r.Errored)
|
||||||
|
}
|
||||||
|
|
||||||
|
// pendingByType breaks the pending rows down by type and age.
|
||||||
|
func pendingByType(db *gorm.DB) {
|
||||||
|
type row struct {
|
||||||
|
NotificationType string
|
||||||
|
PendingCount int64
|
||||||
|
Oldest *time.Time
|
||||||
|
Newest *time.Time
|
||||||
|
WithErrors int64
|
||||||
|
Last24h int64
|
||||||
|
Last7d int64
|
||||||
|
}
|
||||||
|
var rows []row
|
||||||
|
db.Raw(`
|
||||||
|
SELECT
|
||||||
|
notification_type,
|
||||||
|
COUNT(*) AS pending_count,
|
||||||
|
MIN(created_at) AS oldest,
|
||||||
|
MAX(created_at) AS newest,
|
||||||
|
COUNT(*) FILTER (WHERE error_message IS NOT NULL AND error_message <> '') AS with_errors,
|
||||||
|
COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '24 hours') AS last_24h,
|
||||||
|
COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '7 days') AS last_7d
|
||||||
|
FROM notifications_notification
|
||||||
|
WHERE sent = false
|
||||||
|
GROUP BY notification_type
|
||||||
|
ORDER BY MAX(created_at) DESC NULLS LAST
|
||||||
|
`).Scan(&rows)
|
||||||
|
|
||||||
|
fmt.Println("\n# Pending rows by type")
|
||||||
|
if len(rows) == 0 {
|
||||||
|
fmt.Println(" (no pending notifications)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Printf(" %-22s %7s %7s %7s %7s %-19s %-19s\n",
|
||||||
|
"TYPE", "PENDING", "ERRORED", "LAST24H", "LAST7D", "OLDEST", "NEWEST")
|
||||||
|
for _, r := range rows {
|
||||||
|
fmt.Printf(" %-22s %7d %7d %7d %7d %-19s %-19s\n",
|
||||||
|
r.NotificationType, r.PendingCount, r.WithErrors, r.Last24h, r.Last7d,
|
||||||
|
fmtTime(r.Oldest), fmtTime(r.Newest))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// recentPending shows the 5 most recent pending rows with full detail.
|
||||||
|
func recentPending(db *gorm.DB) {
|
||||||
|
type row struct {
|
||||||
|
ID uint
|
||||||
|
UserID uint
|
||||||
|
NotificationType string
|
||||||
|
Title string
|
||||||
|
Body string
|
||||||
|
ErrorMessage string
|
||||||
|
CreatedAt time.Time
|
||||||
|
}
|
||||||
|
var rows []row
|
||||||
|
db.Raw(`
|
||||||
|
SELECT id, user_id, notification_type, title, body, COALESCE(error_message, '') AS error_message, created_at
|
||||||
|
FROM notifications_notification
|
||||||
|
WHERE sent = false
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT 5
|
||||||
|
`).Scan(&rows)
|
||||||
|
|
||||||
|
fmt.Println("\n# 5 most recent pending notifications")
|
||||||
|
if len(rows) == 0 {
|
||||||
|
fmt.Println(" (none)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, r := range rows {
|
||||||
|
errPart := ""
|
||||||
|
if r.ErrorMessage != "" {
|
||||||
|
errPart = fmt.Sprintf("\n error: %s", r.ErrorMessage)
|
||||||
|
}
|
||||||
|
fmt.Printf(" [%d] user=%d %s %s%s\n title: %s\n body: %s\n",
|
||||||
|
r.ID, r.UserID, r.CreatedAt.Format("2006-01-02 15:04:05"), r.NotificationType, errPart,
|
||||||
|
truncate(r.Title, 100), truncate(r.Body, 100))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// deviceCounts shows how many push devices are registered (active vs inactive).
|
||||||
|
func deviceCounts(db *gorm.DB) {
|
||||||
|
type row struct {
|
||||||
|
Total int64
|
||||||
|
Active int64
|
||||||
|
WithUser int64
|
||||||
|
DistinctUsers int64
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("\n# Registered push devices")
|
||||||
|
for _, t := range []struct {
|
||||||
|
label string
|
||||||
|
table string
|
||||||
|
}{
|
||||||
|
{"APNs (iOS)", "push_notifications_apnsdevice"},
|
||||||
|
{"GCM (Android)", "push_notifications_gcmdevice"},
|
||||||
|
} {
|
||||||
|
var r row
|
||||||
|
err := db.Raw(fmt.Sprintf(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS total,
|
||||||
|
COUNT(*) FILTER (WHERE active = true) AS active,
|
||||||
|
COUNT(*) FILTER (WHERE user_id IS NOT NULL) AS with_user,
|
||||||
|
COUNT(DISTINCT user_id) AS distinct_users
|
||||||
|
FROM %s
|
||||||
|
`, t.table)).Scan(&r).Error
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf(" %-15s ERROR: %v\n", t.label, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Printf(" %-15s total=%-5d active=%-5d with_user=%-5d distinct_users=%d\n",
|
||||||
|
t.label, r.Total, r.Active, r.WithUser, r.DistinctUsers)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDSN(passwordFile string) (dsn, host string, err error) {
|
||||||
|
host = os.Getenv("DB_HOST")
|
||||||
|
user := os.Getenv("POSTGRES_USER")
|
||||||
|
dbname := os.Getenv("POSTGRES_DB")
|
||||||
|
sslmode := os.Getenv("DB_SSLMODE")
|
||||||
|
if sslmode == "" {
|
||||||
|
sslmode = "require"
|
||||||
|
}
|
||||||
|
|
||||||
|
port := 5432
|
||||||
|
if s := os.Getenv("DB_PORT"); s != "" {
|
||||||
|
p, perr := strconv.Atoi(s)
|
||||||
|
if perr != nil {
|
||||||
|
return "", "", fmt.Errorf("invalid DB_PORT %q: %w", s, perr)
|
||||||
|
}
|
||||||
|
port = p
|
||||||
|
}
|
||||||
|
|
||||||
|
password := os.Getenv("POSTGRES_PASSWORD")
|
||||||
|
if password == "" && passwordFile != "" {
|
||||||
|
b, rerr := os.ReadFile(passwordFile)
|
||||||
|
if rerr != nil {
|
||||||
|
return "", "", fmt.Errorf("POSTGRES_PASSWORD not set and could not read %s: %w", passwordFile, rerr)
|
||||||
|
}
|
||||||
|
password = strings.TrimRight(string(b), "\r\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
missing := []string{}
|
||||||
|
if host == "" {
|
||||||
|
missing = append(missing, "DB_HOST")
|
||||||
|
}
|
||||||
|
if user == "" {
|
||||||
|
missing = append(missing, "POSTGRES_USER")
|
||||||
|
}
|
||||||
|
if dbname == "" {
|
||||||
|
missing = append(missing, "POSTGRES_DB")
|
||||||
|
}
|
||||||
|
if password == "" {
|
||||||
|
missing = append(missing, "POSTGRES_PASSWORD")
|
||||||
|
}
|
||||||
|
if len(missing) > 0 {
|
||||||
|
return "", "", fmt.Errorf("missing required env vars: %s", strings.Join(missing, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
|
dsn = fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=%s",
|
||||||
|
host, port, user, password, dbname, sslmode)
|
||||||
|
return dsn, host, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// stringFlag is a tiny stand-in for flag.String to keep imports lean — using it
|
||||||
|
// also dodges flag-package quirks when this file is rebuilt with go run.
|
||||||
|
func stringFlag(name, def, _usage string) *string {
|
||||||
|
v := def
|
||||||
|
prefix := "--" + name + "="
|
||||||
|
for _, a := range os.Args[1:] {
|
||||||
|
if strings.HasPrefix(a, prefix) {
|
||||||
|
v = strings.TrimPrefix(a, prefix)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &v
|
||||||
|
}
|
||||||
|
|
||||||
|
// boolFlag is true if --name is present in os.Args (no value form).
|
||||||
|
func boolFlag(name, _usage string) *bool {
|
||||||
|
want := "--" + name
|
||||||
|
v := false
|
||||||
|
for _, a := range os.Args[1:] {
|
||||||
|
if a == want {
|
||||||
|
v = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &v
|
||||||
|
}
|
||||||
|
|
||||||
|
func fmtTime(t *time.Time) string {
|
||||||
|
if t == nil {
|
||||||
|
return "-"
|
||||||
|
}
|
||||||
|
return t.Format("2006-01-02 15:04:05")
|
||||||
|
}
|
||||||
|
|
||||||
|
func truncate(s string, n int) string {
|
||||||
|
if len(s) <= n {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return s[:n] + "…"
|
||||||
|
}
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
// send-test-push enqueues a one-shot Asynq push notification task. The worker
|
||||||
|
// picks it up and routes it through internal/push/Client.SendToAll, which now
|
||||||
|
// hits APNs production. Verifies end-to-end that push delivery is working
|
||||||
|
// without waiting for the next cron tick.
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// # Port-forward Redis from the cluster first:
|
||||||
|
// kubectl --kubeconfig=~/.kube/honeydue-k3s.yaml -n honeydue port-forward svc/redis 6379:6379
|
||||||
|
//
|
||||||
|
// # Then in another shell:
|
||||||
|
// go run ./cmd/send-test-push --user-id 6 --title "Test" --message "Hello from notif-diag"
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/hibiken/asynq"
|
||||||
|
|
||||||
|
"github.com/treytartt/honeydue-api/internal/worker/jobs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
userID := flag.Uint("user-id", 0, "Target auth_user.id (required)")
|
||||||
|
title := flag.String("title", "Test push", "Notification title")
|
||||||
|
message := flag.String("message", "Hello from send-test-push", "Notification body")
|
||||||
|
redisAddr := flag.String("redis", "localhost:6379", "Redis host:port (use kubectl port-forward to reach the in-cluster redis)")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if *userID == 0 {
|
||||||
|
fmt.Fprintln(os.Stderr, "--user-id is required")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
task, err := jobs.NewSendPushTask(*userID, *title, *message, map[string]string{
|
||||||
|
"type": "test",
|
||||||
|
"user_id": strconv.FormatUint(uint64(*userID), 10),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "build task: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
client := asynq.NewClient(asynq.RedisClientOpt{Addr: *redisAddr})
|
||||||
|
defer func() { _ = client.Close() }()
|
||||||
|
|
||||||
|
info, err := client.Enqueue(task, asynq.Queue("default"), asynq.MaxRetry(3))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "enqueue: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Enqueued task: id=%s queue=%s type=%s\n", info.ID, info.Queue, info.Type)
|
||||||
|
fmt.Printf("Tail worker logs to see the result:\n")
|
||||||
|
fmt.Printf(" kubectl --kubeconfig=~/.kube/honeydue-k3s.yaml -n honeydue logs deploy/worker --tail=20 -f\n")
|
||||||
|
}
|
||||||
@@ -11,13 +11,19 @@ import (
|
|||||||
"github.com/hibiken/asynq"
|
"github.com/hibiken/asynq"
|
||||||
"github.com/redis/go-redis/v9"
|
"github.com/redis/go-redis/v9"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
"go.opentelemetry.io/otel/codes"
|
||||||
|
"go.opentelemetry.io/otel/trace"
|
||||||
|
|
||||||
"github.com/treytartt/honeydue-api/internal/config"
|
"github.com/treytartt/honeydue-api/internal/config"
|
||||||
"github.com/treytartt/honeydue-api/internal/database"
|
"github.com/treytartt/honeydue-api/internal/database"
|
||||||
"github.com/treytartt/honeydue-api/internal/monitoring"
|
"github.com/treytartt/honeydue-api/internal/monitoring"
|
||||||
|
"github.com/treytartt/honeydue-api/internal/prom"
|
||||||
"github.com/treytartt/honeydue-api/internal/push"
|
"github.com/treytartt/honeydue-api/internal/push"
|
||||||
"github.com/treytartt/honeydue-api/internal/repositories"
|
"github.com/treytartt/honeydue-api/internal/repositories"
|
||||||
"github.com/treytartt/honeydue-api/internal/services"
|
"github.com/treytartt/honeydue-api/internal/services"
|
||||||
|
"github.com/treytartt/honeydue-api/internal/tracing"
|
||||||
|
"github.com/treytartt/honeydue-api/internal/worker"
|
||||||
"github.com/treytartt/honeydue-api/internal/worker/jobs"
|
"github.com/treytartt/honeydue-api/internal/worker/jobs"
|
||||||
"github.com/treytartt/honeydue-api/pkg/utils"
|
"github.com/treytartt/honeydue-api/pkg/utils"
|
||||||
)
|
)
|
||||||
@@ -40,6 +46,29 @@ func main() {
|
|||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize OpenTelemetry tracing for the worker process. Same OTLP
|
||||||
|
// destination as the api; service.name distinguishes them in Jaeger.
|
||||||
|
// config.SecretValue (not os.Getenv) so file-mounted secrets resolve
|
||||||
|
// after audit F8 removed these from the process environment.
|
||||||
|
tracingShutdown, err := tracing.Init(context.Background(), tracing.Config{
|
||||||
|
ServiceName: "honeydue-worker",
|
||||||
|
Environment: workerDeploymentEnv(cfg.Server.Debug),
|
||||||
|
EndpointURL: config.SecretValue("OBS_TRACES_URL"),
|
||||||
|
BearerToken: config.SecretValue("OBS_INGEST_TOKEN"),
|
||||||
|
SampleRatio: tracing.SampleRatioFromEnv(),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Msg("worker tracing init failed — continuing without traces")
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := tracingShutdown(shutdownCtx); err != nil {
|
||||||
|
log.Warn().Err(err).Msg("worker tracing shutdown error")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
asynqTracer := tracing.Tracer("honeydue/worker/asynq")
|
||||||
|
|
||||||
// Initialize database
|
// Initialize database
|
||||||
db, err := database.Connect(&cfg.Database, cfg.Server.Debug)
|
db, err := database.Connect(&cfg.Database, cfg.Server.Debug)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -80,6 +109,17 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal().Err(err).Msg("Failed to parse Redis URL")
|
log.Fatal().Err(err).Msg("Failed to parse Redis URL")
|
||||||
}
|
}
|
||||||
|
// Audit HIGH-1: the Redis password is a file-mounted secret (REDIS_PASSWORD),
|
||||||
|
// not embedded in REDIS_URL — REDIS_URL travels in the honeydue-config
|
||||||
|
// ConfigMap. Apply the password onto the parsed opt so the Asynq server,
|
||||||
|
// inspector and monitoring client (all derived from redisOpt below)
|
||||||
|
// authenticate against a requirepass-protected Redis.
|
||||||
|
if cfg.Redis.Password != "" {
|
||||||
|
if clientOpt, ok := redisOpt.(asynq.RedisClientOpt); ok {
|
||||||
|
clientOpt.Password = cfg.Redis.Password
|
||||||
|
redisOpt = clientOpt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize monitoring service (if Redis is available)
|
// Initialize monitoring service (if Redis is available)
|
||||||
var monitoringService *monitoring.Service
|
var monitoringService *monitoring.Service
|
||||||
@@ -141,14 +181,62 @@ func main() {
|
|||||||
// Create job handler
|
// Create job handler
|
||||||
jobHandler := jobs.NewHandler(db, pushClient, emailService, notificationService, cfg)
|
jobHandler := jobs.NewHandler(db, pushClient, emailService, notificationService, cfg)
|
||||||
|
|
||||||
|
// Wire upload service for the pending_uploads cleanup cron AND share the
|
||||||
|
// underlying storage service with the TaskService below so the worker can
|
||||||
|
// load completion images for email embedding. Storage may be local-disk
|
||||||
|
// (no S3 backend), in which case the upload service stays nil and the
|
||||||
|
// cleanup handler no-ops. Cache is optional — the cleanup path doesn't
|
||||||
|
// rate-limit and works fine with a nil cache.
|
||||||
|
var sharedStorageService *services.StorageService
|
||||||
|
if storageService, sErr := services.NewStorageService(&cfg.Storage); sErr == nil {
|
||||||
|
sharedStorageService = storageService
|
||||||
|
if s3 := storageService.S3Backend(); s3 != nil {
|
||||||
|
pendingUploadRepo := repositories.NewPendingUploadRepository(db)
|
||||||
|
uploadService := services.NewUploadService(pendingUploadRepo, s3, &cfg.Storage, nil)
|
||||||
|
jobHandler.SetUploadService(uploadService)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Warn().Err(sErr).Msg("Failed to initialize storage service for upload cleanup; cleanup cron will no-op")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wire a TaskService for the task-completed notification handler. The
|
||||||
|
// worker re-creates this (vs. importing the api's wired instance) because
|
||||||
|
// each binary owns its own dependency graph. The handler is fully nil-safe
|
||||||
|
// — if any of the wired services are absent, the corresponding side of
|
||||||
|
// notification delivery (push or email) is skipped.
|
||||||
|
taskRepo := repositories.NewTaskRepository(db)
|
||||||
|
residenceRepo := repositories.NewResidenceRepository(db)
|
||||||
|
workerTaskService := services.NewTaskService(taskRepo, residenceRepo)
|
||||||
|
if notificationService != nil {
|
||||||
|
workerTaskService.SetNotificationService(notificationService)
|
||||||
|
}
|
||||||
|
if emailService != nil {
|
||||||
|
workerTaskService.SetEmailService(emailService)
|
||||||
|
}
|
||||||
|
if sharedStorageService != nil {
|
||||||
|
workerTaskService.SetStorageService(sharedStorageService)
|
||||||
|
}
|
||||||
|
jobHandler.SetTaskService(workerTaskService)
|
||||||
|
|
||||||
// Create Asynq mux and register handlers
|
// Create Asynq mux and register handlers
|
||||||
mux := asynq.NewServeMux()
|
mux := asynq.NewServeMux()
|
||||||
|
|
||||||
|
// Tracing + metrics middleware: every job runs inside a span and emits
|
||||||
|
// asynq_job_duration_seconds{task_type,result}.
|
||||||
|
mux.Use(asynqTracingMiddleware(asynqTracer))
|
||||||
|
|
||||||
mux.HandleFunc(jobs.TypeSmartReminder, jobHandler.HandleSmartReminder)
|
mux.HandleFunc(jobs.TypeSmartReminder, jobHandler.HandleSmartReminder)
|
||||||
mux.HandleFunc(jobs.TypeDailyDigest, jobHandler.HandleDailyDigest)
|
mux.HandleFunc(jobs.TypeDailyDigest, jobHandler.HandleDailyDigest)
|
||||||
mux.HandleFunc(jobs.TypeSendEmail, jobHandler.HandleSendEmail)
|
mux.HandleFunc(jobs.TypeSendEmail, jobHandler.HandleSendEmail)
|
||||||
mux.HandleFunc(jobs.TypeSendPush, jobHandler.HandleSendPush)
|
mux.HandleFunc(jobs.TypeSendPush, jobHandler.HandleSendPush)
|
||||||
mux.HandleFunc(jobs.TypeOnboardingEmails, jobHandler.HandleOnboardingEmails)
|
mux.HandleFunc(jobs.TypeOnboardingEmails, jobHandler.HandleOnboardingEmails)
|
||||||
mux.HandleFunc(jobs.TypeReminderLogCleanup, jobHandler.HandleReminderLogCleanup)
|
mux.HandleFunc(jobs.TypeReminderLogCleanup, jobHandler.HandleReminderLogCleanup)
|
||||||
|
mux.HandleFunc(jobs.TypeUploadCleanup, jobHandler.HandleUploadCleanup)
|
||||||
|
mux.HandleFunc(jobs.TypeNotificationCleanup, jobHandler.HandleNotificationCleanup)
|
||||||
|
mux.HandleFunc(jobs.TypeWebhookLogCleanup, jobHandler.HandleWebhookLogCleanup)
|
||||||
|
mux.HandleFunc(jobs.TypeAuditLogCleanup, jobHandler.HandleAuditLogCleanup)
|
||||||
|
mux.HandleFunc(worker.TypeTaskCompletedNotification, jobHandler.HandleTaskCompletedNotification)
|
||||||
|
mux.HandleFunc(worker.TypeDataExport, jobHandler.HandleDataExport)
|
||||||
|
|
||||||
// Register email job handlers (welcome, verification, password reset, password changed)
|
// Register email job handlers (welcome, verification, password reset, password changed)
|
||||||
if emailService != nil {
|
if emailService != nil {
|
||||||
@@ -188,6 +276,32 @@ func main() {
|
|||||||
}
|
}
|
||||||
log.Info().Str("cron", "0 3 * * *").Msg("Registered reminder log cleanup job (runs daily at 3:00 AM UTC)")
|
log.Info().Str("cron", "0 3 * * *").Msg("Registered reminder log cleanup job (runs daily at 3:00 AM UTC)")
|
||||||
|
|
||||||
|
// Schedule pending_uploads cleanup (hourly at :30 to avoid colliding with
|
||||||
|
// the top-of-hour reminder + digest crons). Reaps unclaimed expired
|
||||||
|
// upload sessions; the B2 bucket lifecycle (7 days on uploads/ prefix)
|
||||||
|
// is the backstop if this worker is offline for an extended period.
|
||||||
|
if _, err := scheduler.Register("30 * * * *", asynq.NewTask(jobs.TypeUploadCleanup, nil)); err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("Failed to register upload cleanup job")
|
||||||
|
}
|
||||||
|
log.Info().Str("cron", "30 * * * *").Msg("Registered pending_uploads cleanup job (runs hourly)")
|
||||||
|
|
||||||
|
// Data-retention cleanups (BE-2). Staggered off the 3:00 reminder cleanup to
|
||||||
|
// avoid piling DELETEs onto the same Neon connection window.
|
||||||
|
if _, err := scheduler.Register("0 2 * * *", asynq.NewTask(jobs.TypeNotificationCleanup, nil)); err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("Failed to register notification cleanup job")
|
||||||
|
}
|
||||||
|
log.Info().Str("cron", "0 2 * * *").Msg("Registered notification cleanup job (daily 02:00 UTC, 90d retention)")
|
||||||
|
|
||||||
|
if _, err := scheduler.Register("30 2 * * 0", asynq.NewTask(jobs.TypeWebhookLogCleanup, nil)); err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("Failed to register webhook log cleanup job")
|
||||||
|
}
|
||||||
|
log.Info().Str("cron", "30 2 * * 0").Msg("Registered webhook log cleanup job (weekly Sun 02:30 UTC, 180d retention)")
|
||||||
|
|
||||||
|
if _, err := scheduler.Register("30 3 * * 0", asynq.NewTask(jobs.TypeAuditLogCleanup, nil)); err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("Failed to register audit log cleanup job")
|
||||||
|
}
|
||||||
|
log.Info().Str("cron", "30 3 * * 0").Msg("Registered audit log cleanup job (weekly Sun 03:30 UTC, 365d retention)")
|
||||||
|
|
||||||
// Handle graceful shutdown
|
// Handle graceful shutdown
|
||||||
quit := make(chan os.Signal, 1)
|
quit := make(chan os.Signal, 1)
|
||||||
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
|
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
|
||||||
@@ -199,6 +313,12 @@ func main() {
|
|||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
_, _ = w.Write([]byte(`{"status":"ok"}`))
|
_, _ = w.Write([]byte(`{"status":"ok"}`))
|
||||||
})
|
})
|
||||||
|
// Expose Prometheus metrics so vmagent can scrape the worker. The
|
||||||
|
// apns_send_*, fcm_send_*, asynq_job_* and cache_ops_* series have been
|
||||||
|
// recorded on this process all along — they were just never exposed, which
|
||||||
|
// is why those dashboard panels read empty. Same :6060 as health; in-cluster
|
||||||
|
// only (not externally published).
|
||||||
|
healthMux.Handle("/metrics", prom.HTTPHandler())
|
||||||
healthSrv := &http.Server{
|
healthSrv := &http.Server{
|
||||||
Addr: workerHealthAddr,
|
Addr: workerHealthAddr,
|
||||||
Handler: healthMux,
|
Handler: healthMux,
|
||||||
@@ -238,3 +358,44 @@ func main() {
|
|||||||
|
|
||||||
log.Info().Msg("Worker stopped")
|
log.Info().Msg("Worker stopped")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// asynqTracingMiddleware returns an asynq.MiddlewareFunc that opens a span
|
||||||
|
// per task execution and records asynq_job_duration_seconds. Span attrs
|
||||||
|
// include task type, queue, retry count, and the result outcome.
|
||||||
|
func asynqTracingMiddleware(tracer trace.Tracer) asynq.MiddlewareFunc {
|
||||||
|
return func(next asynq.Handler) asynq.Handler {
|
||||||
|
return asynq.HandlerFunc(func(ctx context.Context, t *asynq.Task) error {
|
||||||
|
ctx, span := tracer.Start(ctx, "asynq.handle:"+t.Type(),
|
||||||
|
trace.WithAttributes(
|
||||||
|
attribute.String("asynq.task_type", t.Type()),
|
||||||
|
attribute.Int("asynq.payload_bytes", len(t.Payload())),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
err := next.ProcessTask(ctx, t)
|
||||||
|
dur := time.Since(start)
|
||||||
|
result := "ok"
|
||||||
|
if err != nil {
|
||||||
|
result = "error"
|
||||||
|
span.SetStatus(codes.Error, err.Error())
|
||||||
|
span.RecordError(err)
|
||||||
|
}
|
||||||
|
span.SetAttributes(attribute.String("asynq.result", result))
|
||||||
|
prom.ObserveAsynqJob(t.Type(), result, dur)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// workerDeploymentEnv mirrors deploymentEnvironment in cmd/api/main.go.
|
||||||
|
func workerDeploymentEnv(debug bool) string {
|
||||||
|
if env := os.Getenv("DEPLOYMENT_ENVIRONMENT"); env != "" {
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
if debug {
|
||||||
|
return "dev"
|
||||||
|
}
|
||||||
|
return "prod"
|
||||||
|
}
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ email:
|
|||||||
push:
|
push:
|
||||||
apns_key_id: ""
|
apns_key_id: ""
|
||||||
apns_team_id: ""
|
apns_team_id: ""
|
||||||
apns_topic: com.tt.honeyDue
|
apns_topic: com.myhoneydue.honeyDue.dev
|
||||||
apns_production: false
|
apns_production: false
|
||||||
apns_use_sandbox: true # Sandbox for dev
|
apns_use_sandbox: true # Sandbox for dev
|
||||||
|
|
||||||
@@ -85,8 +85,9 @@ tls:
|
|||||||
# If mode=cloudflare, create secrets/cloudflare-origin.crt and .key
|
# If mode=cloudflare, create secrets/cloudflare-origin.crt and .key
|
||||||
|
|
||||||
# --- Apple Auth / IAP (optional) ---
|
# --- Apple Auth / IAP (optional) ---
|
||||||
|
# client_id MUST equal the iOS Debug bundle ID for the dev backend.
|
||||||
apple_auth:
|
apple_auth:
|
||||||
client_id: ""
|
client_id: "com.myhoneydue.honeyDue.dev"
|
||||||
team_id: ""
|
team_id: ""
|
||||||
iap_key_id: ""
|
iap_key_id: ""
|
||||||
iap_issuer_id: ""
|
iap_issuer_id: ""
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ ADMIN_PW="$(openssl rand -base64 16)"
|
|||||||
|
|
||||||
EMAIL_USER="treytartt@fastmail.com"
|
EMAIL_USER="treytartt@fastmail.com"
|
||||||
APNS_KEY_ID="9R5Q7ZX874"
|
APNS_KEY_ID="9R5Q7ZX874"
|
||||||
APNS_TEAM_ID="V3PF3M6B6U"
|
APNS_TEAM_ID="X86BR9WTLD"
|
||||||
|
|
||||||
log ""
|
log ""
|
||||||
log "Pre-filled from existing dev server:"
|
log "Pre-filled from existing dev server:"
|
||||||
@@ -147,7 +147,7 @@ email:
|
|||||||
push:
|
push:
|
||||||
apns_key_id: "${APNS_KEY_ID}"
|
apns_key_id: "${APNS_KEY_ID}"
|
||||||
apns_team_id: "${APNS_TEAM_ID}"
|
apns_team_id: "${APNS_TEAM_ID}"
|
||||||
apns_topic: com.tt.honeyDue
|
apns_topic: com.myhoneydue.honeyDue.dev
|
||||||
apns_production: false
|
apns_production: false
|
||||||
apns_use_sandbox: true
|
apns_use_sandbox: true
|
||||||
|
|
||||||
@@ -189,7 +189,7 @@ tls:
|
|||||||
|
|
||||||
# --- Apple Auth / IAP ---
|
# --- Apple Auth / IAP ---
|
||||||
apple_auth:
|
apple_auth:
|
||||||
client_id: "com.tt.honeyDue"
|
client_id: "com.myhoneydue.honeyDue.dev"
|
||||||
team_id: "${APNS_TEAM_ID}"
|
team_id: "${APNS_TEAM_ID}"
|
||||||
iap_key_id: ""
|
iap_key_id: ""
|
||||||
iap_issuer_id: ""
|
iap_issuer_id: ""
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ config.yaml
|
|||||||
|
|
||||||
# Generated files
|
# Generated files
|
||||||
kubeconfig
|
kubeconfig
|
||||||
|
kubeconfig.*
|
||||||
cluster-config.yaml
|
cluster-config.yaml
|
||||||
prod.env
|
prod.env
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,966 @@
|
|||||||
|
# honeyDue k3s Cluster — Operations Runbook
|
||||||
|
|
||||||
|
Living document for the honeyDue production cluster. Add entries when you hit
|
||||||
|
something non-obvious so future-you (or your replacement) doesn't have to
|
||||||
|
rediscover it.
|
||||||
|
|
||||||
|
Last full revision: **2026-06-03** (Hetzner → OVH BHS cutover; cluster solo
|
||||||
|
production from that date forward). For pre-OVH history, see
|
||||||
|
`MIGRATION_NOTES.md` (Swarm → k3s migration on Hetzner, 2026-04-24).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Topology and inventory
|
||||||
|
|
||||||
|
### Hosting
|
||||||
|
|
||||||
|
| | |
|
||||||
|
|---|---|
|
||||||
|
| Provider | OVHcloud (us.ovhcloud.com) |
|
||||||
|
| Datacenter | BHS — Beauharnois, Quebec, Canada |
|
||||||
|
| Plan | VPS-1 × 3 (~$6.46/mo each, ~$19/mo total) |
|
||||||
|
| Node spec | 4 vCPU (Intel Haswell, shared), 7.6 GB RAM, 75 GB NVMe |
|
||||||
|
| Public bandwidth | 400 Mbps per node, unlimited traffic |
|
||||||
|
| Private network | **None.** Nodes have public IPv4 + IPv6 only; inter-node traffic crosses the public internet (encrypted by flannel WireGuard backend — see §3) |
|
||||||
|
|
||||||
|
### Nodes
|
||||||
|
|
||||||
|
| SSH alias | Kubernetes node name | Public IPv4 | Public IPv6 | Roles |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| `ovhcloud1` | `vps-1624d691` | `51.81.83.33` | `2604:2dc0:101:200::5a9a` | control-plane, etcd, redis-pinned |
|
||||||
|
| `ovhcloud2` | `vps-c0f51be2` | `51.81.87.86` | `2604:2dc0:101:200::30d4` | control-plane, etcd |
|
||||||
|
| `ovhcloud3` | `vps-dbca24c7` | `51.81.85.248` | `2604:2dc0:101:200::450f` | control-plane, etcd |
|
||||||
|
|
||||||
|
The cluster is **all-control-plane** (workloads schedule on the same nodes that
|
||||||
|
run etcd and the API server). `vps-1624d691` carries the
|
||||||
|
`honeydue/redis=true` label so the Redis Deployment's `nodeSelector` binds
|
||||||
|
there; the Redis PVC (`local-path`, host-pinned) lives on that node's disk.
|
||||||
|
|
||||||
|
### SSH access
|
||||||
|
|
||||||
|
`~/.ssh/config` entries (operator workstation):
|
||||||
|
|
||||||
|
```
|
||||||
|
Host ovhcloud1
|
||||||
|
HostName 51.81.83.33
|
||||||
|
Port 22
|
||||||
|
User ubuntu
|
||||||
|
IdentityFile ~/.ssh/ovhcloud
|
||||||
|
IdentitiesOnly yes
|
||||||
|
Host ovhcloud2
|
||||||
|
HostName 51.81.87.86
|
||||||
|
Port 22
|
||||||
|
User ubuntu
|
||||||
|
IdentityFile ~/.ssh/ovhcloud
|
||||||
|
IdentitiesOnly yes
|
||||||
|
Host ovhcloud3
|
||||||
|
HostName 51.81.85.248
|
||||||
|
Port 22
|
||||||
|
User ubuntu
|
||||||
|
IdentityFile ~/.ssh/ovhcloud
|
||||||
|
IdentitiesOnly yes
|
||||||
|
```
|
||||||
|
|
||||||
|
`ubuntu` has passwordless sudo (`/etc/sudoers.d/90-cloud-init-users` from OVH's
|
||||||
|
cloud-init).
|
||||||
|
|
||||||
|
### kubectl access
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export KUBECONFIG=/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go/deploy-k3s/kubeconfig
|
||||||
|
kubectl get nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
The `deploy-k3s/kubeconfig` file (mode 0600, gitignored) is the OVH cluster's
|
||||||
|
admin kubeconfig with `server: https://51.81.83.33:6443`. A stale Hetzner copy
|
||||||
|
lives next to it as `kubeconfig.hetzner.bak` for historical reference; the
|
||||||
|
Hetzner cluster is powered off and that file's API server is unreachable.
|
||||||
|
|
||||||
|
To refresh from the cluster (if the local copy is lost or rotated):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh ovhcloud1 'sudo cat /etc/rancher/k3s/k3s.yaml' \
|
||||||
|
| sed 's|server: https://127.0.0.1:6443|server: https://51.81.83.33:6443|' \
|
||||||
|
> deploy-k3s/kubeconfig
|
||||||
|
chmod 600 deploy-k3s/kubeconfig
|
||||||
|
```
|
||||||
|
|
||||||
|
The k3s API at `:6443` is open to the public internet (token-protected).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Software
|
||||||
|
|
||||||
|
### Kernel-level
|
||||||
|
|
||||||
|
| | |
|
||||||
|
|---|---|
|
||||||
|
| OS | Ubuntu 26.04 LTS (set by OVH's VPS-1 image) |
|
||||||
|
| Kernel | `7.0.0-14-generic` |
|
||||||
|
| Init | systemd |
|
||||||
|
| Container runtime | containerd 2.2.2 (bundled with k3s) |
|
||||||
|
| Firewall | `ufw` (per-node, configured at install — see §3) |
|
||||||
|
| Other host packages | `fail2ban` (SSH brute-force protection, default jail), `unattended-upgrades` (security updates), `open-iscsi` (k3s prereq for some storage backends), `curl` |
|
||||||
|
|
||||||
|
### Kubernetes
|
||||||
|
|
||||||
|
| | |
|
||||||
|
|---|---|
|
||||||
|
| Distribution | k3s |
|
||||||
|
| Version | **`v1.34.6+k3s1`** (pinned in `config.yaml:cluster.k3s_version`) |
|
||||||
|
| Control plane | 3-node HA, embedded etcd (no external Postgres backing store) |
|
||||||
|
| CNI / networking | flannel with **WireGuard-native backend** (`--flannel-backend=wireguard-native`). Encrypts pod-to-pod and etcd peer traffic because nodes only have public IPs (no private network). ~3-5% CPU overhead under load. |
|
||||||
|
| Service LB | klipper-lb (default k3s `servicelb`). The `svclb-traefik` DaemonSet binds host ports `:80` and `:443` on each node and forwards to the Traefik Service. **Not** the DaemonSet-w/-hostNetwork Traefik pattern used on the old Hetzner cluster — see §10 *Differences from MIGRATION_NOTES*. |
|
||||||
|
| Ingress controller | Traefik (k3s default), single-replica Deployment, exposed via klipper-lb |
|
||||||
|
| DNS | CoreDNS (k3s default) |
|
||||||
|
| Secrets encryption | Enabled (`--secrets-encryption`); etcd values are AES-CBC encrypted at rest |
|
||||||
|
| kubeconfig perms | `0600` (`--write-kubeconfig-mode=0600`) |
|
||||||
|
| Cloud controller | Disabled (`--disable-cloud-controller`) — no provider integration on OVH |
|
||||||
|
| Misc | `--node-ip` / `--node-external-ip` / `--advertise-address` all set to each node's public IPv4. TLS SANs cover all 3 IPs so any IP can serve the API. |
|
||||||
|
|
||||||
|
### Application stack (in cluster, `honeydue` namespace)
|
||||||
|
|
||||||
|
| Deployment | Replicas | Image (digest-pinned) | Notes |
|
||||||
|
|---|---:|---|---|
|
||||||
|
| `api` | 3 | `gitea.treytartt.com/admin/honeydue-api@sha256:34fde6...` | Go REST API on `:8000`, exposes `/metrics` |
|
||||||
|
| `web` | 3 | `gitea.treytartt.com/admin/honeydue-web@sha256:8c62cf...` | Next.js, server-side proxy to api |
|
||||||
|
| `admin` | 1 | `gitea.treytartt.com/admin/honeydue-admin@sha256:b81263...` | Next.js admin panel, gated behind Traefik basic-auth |
|
||||||
|
| `worker` | 1 | `gitea.treytartt.com/admin/honeydue-worker@sha256:fe1f5e...` | Asynq scheduler + Redis-backed jobs (singleton — must not run as >1 replica or every cron fires N×) |
|
||||||
|
| `redis` | 1 | `redis:7-alpine@sha256:6ab0b6...` | Pinned to `vps-1624d691` via `honeydue/redis=true`. PVC `redis-data` (local-path, 5 Gi). Password-auth required. |
|
||||||
|
| `vmagent` | 1 | `victoriametrics/vmagent@sha256:...` (default tag) | Scrapes api `/metrics` + kube-state-metrics; remote-writes to obs.88oakapps.com |
|
||||||
|
| `kube-state-metrics` | 1 | `kube-state-metrics@sha256:...` | In `kube-system`, scraped by vmagent for `kube_*` cluster-state metrics |
|
||||||
|
| `alloy-logs` (DaemonSet) | 3 (1/node) | `grafana/alloy@sha256:...` | Tails `/var/log/pods/*` and ships to Loki at obs.88oakapps.com |
|
||||||
|
|
||||||
|
The Asynq scheduler inside `worker` registers these cron jobs:
|
||||||
|
|
||||||
|
| Cron | Job | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| `0 * * * *` | Smart reminder check (per-user hour) | Default user hour: 14:00 UTC |
|
||||||
|
| `0 * * * *` | Daily digest check (per-user hour) | Default user hour: 03:00 UTC |
|
||||||
|
| `0 10 * * *` | Onboarding emails | 10:00 UTC |
|
||||||
|
| `0 3 * * *` | Reminder log cleanup | 03:00 UTC |
|
||||||
|
| `30 * * * *` | Pending uploads cleanup | xx:30 every hour |
|
||||||
|
|
||||||
|
### External dependencies
|
||||||
|
|
||||||
|
| Service | Endpoint | Purpose | Failure mode |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Neon Postgres | `ep-floral-truth-amttbc5a-pooler.c-5.us-east-1.aws.neon.tech:5432` | App data. Pooler endpoint (transaction-mode PgBouncer in front of Neon compute) so connections stay warm. | api / worker pods crash-loop with `dial tcp: connection refused`. Health endpoint returns `postgres: error`. |
|
||||||
|
| Backblaze B2 (S3-compatible) | `s3.us-east-005.backblazeb2.com` (bucket `honeyDueProd`) | User uploads (photos, PDFs, completion attachments) | Upload routes return 5xx; reads of cached/static files still work. |
|
||||||
|
| Cloudflare | `myhoneydue.com` zone | DNS + TLS termination + edge cache + DDoS | Traffic stops reaching origin. Direct `https://51.81.x.x` still works for diagnostics. |
|
||||||
|
| obs.88oakapps.com | Operator-run Grafana + VictoriaMetrics + Loki | Metrics & logs | vmagent + alloy-logs back off and retry. No app-side impact. |
|
||||||
|
| Apple APNs | `api.push.apple.com:443` (production) | iOS push notifications | Push fails; circuit breaker opens; failure logged. App functionality unaffected. |
|
||||||
|
| Fastmail SMTP | `smtp.fastmail.com:587` | Transactional emails (verification, recovery, digests) | Email send fails in the worker; logged; user reset/digest flow degrades. |
|
||||||
|
| Gitea registry | `gitea.treytartt.com` | Container image registry | Deploys can't pull. Existing pods keep running on cached images. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Network and firewall
|
||||||
|
|
||||||
|
### Per-node `ufw` configuration
|
||||||
|
|
||||||
|
Applied during install (same on all 3 nodes):
|
||||||
|
|
||||||
|
```
|
||||||
|
default deny incoming
|
||||||
|
default allow outgoing
|
||||||
|
allow 22/tcp (SSH, world)
|
||||||
|
allow 80/tcp (HTTP via Cloudflare, world — see GAP-1)
|
||||||
|
allow 443/tcp (HTTPS, same — GAP-1)
|
||||||
|
allow 6443/tcp (k3s API, world, token-protected)
|
||||||
|
allow 2379:2380/tcp from <other 2 OVH IPs> (etcd client + peer)
|
||||||
|
allow 10250/tcp from <other 2 OVH IPs> (kubelet)
|
||||||
|
allow 51820/udp from <other 2 OVH IPs> (WireGuard tunnel)
|
||||||
|
allow 8472/udp from <other 2 OVH IPs> (VXLAN, defense-in-depth fallback)
|
||||||
|
```
|
||||||
|
|
||||||
|
To inspect: `ssh ovhcloudN sudo ufw status numbered`.
|
||||||
|
|
||||||
|
### Cluster networking
|
||||||
|
|
||||||
|
- **Pod CIDR**: `10.42.0.0/16` (default k3s)
|
||||||
|
- **Service CIDR**: `10.43.0.0/16` (default k3s)
|
||||||
|
- **Flannel backend**: WireGuard-native. Each node hosts a `flannel-wg` interface on UDP 51820 and tunnels pod traffic to peers. Verify: `ssh ovhcloudN ip -d link show flannel-wg`.
|
||||||
|
|
||||||
|
### Traefik ingress flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Cloudflare → node:80/443 (public)
|
||||||
|
→ klipper-lb svclb-traefik DaemonSet pod (hostPort:80/443)
|
||||||
|
→ Traefik Service (ClusterIP 10.43.245.127:80/443)
|
||||||
|
→ Traefik Deployment pod (single replica)
|
||||||
|
→ matches Ingress host rule (api.myhoneydue.com etc.)
|
||||||
|
→ routes to backend Service (api / web / admin)
|
||||||
|
→ backend Pod
|
||||||
|
```
|
||||||
|
|
||||||
|
The Traefik default also lives in `kube-system` and is managed by k3s's
|
||||||
|
HelmChart. **No HelmChartConfig override is applied on OVH** (unlike Hetzner
|
||||||
|
— see §10).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. DNS configuration (Cloudflare)
|
||||||
|
|
||||||
|
The `myhoneydue.com` zone in Cloudflare has these public records. **All
|
||||||
|
hostnames are proxied (orange cloud)** — required by the `cloudflare-only`
|
||||||
|
Traefik middleware which 403s any non-CF source IP.
|
||||||
|
|
||||||
|
| Host | Type | Values | Proxy |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `api.myhoneydue.com` | A × 3 | `51.81.83.33`, `51.81.87.86`, `51.81.85.248` | Proxied |
|
||||||
|
| `app.myhoneydue.com` | A × 3 | (same trio) | Proxied |
|
||||||
|
| `admin.myhoneydue.com` | A × 3 | (same trio) | Proxied |
|
||||||
|
| `myhoneydue.com` (apex `@`) | A × 3 | (same trio) | Proxied |
|
||||||
|
|
||||||
|
Cloudflare round-robins among the 3 origins, klipper-lb on whichever node CF
|
||||||
|
hits forwards to Traefik, and Traefik routes by Host header. Per-request,
|
||||||
|
effectively load-balanced across the 3 nodes for ingress, with no central LB.
|
||||||
|
|
||||||
|
**SSL/TLS mode**: Flexible (CF terminates TLS at the edge; origin is plain
|
||||||
|
HTTP on `:80`). Upgrading to Full (strict) is on the deferred list — would
|
||||||
|
need an origin certificate provisioned to `cloudflare-origin-cert` secret and
|
||||||
|
Traefik configured for TLS termination.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Filesystem layout (`deploy-k3s/`)
|
||||||
|
|
||||||
|
```
|
||||||
|
deploy-k3s/
|
||||||
|
├── config.yaml # Single config source (gitignored; contains tokens)
|
||||||
|
├── config.yaml.example # Template
|
||||||
|
├── kubeconfig # OVH admin kubeconfig (gitignored, 0600)
|
||||||
|
├── kubeconfig.hetzner.bak # Old Hetzner kubeconfig (unreachable, kept for history)
|
||||||
|
├── kubeconfig.tunnel # Optional: localhost-pointing copy for SSH-tunnel use
|
||||||
|
├── secrets/
|
||||||
|
│ ├── README.md
|
||||||
|
│ ├── postgres_password.txt # Neon DB password
|
||||||
|
│ ├── secret_key.txt # 32+ char app-token signing secret
|
||||||
|
│ ├── email_host_password.txt # Fastmail SMTP app password
|
||||||
|
│ ├── fcm_server_key.txt # FCM server key (currently unused — Android push disabled)
|
||||||
|
│ ├── apns_auth_key.p8 # APNs auth key (binary)
|
||||||
|
│ ├── cloudflare-origin.crt # Origin certificate (currently unused — CF Flexible)
|
||||||
|
│ └── cloudflare-origin.key
|
||||||
|
│ (all gitignored except README.md)
|
||||||
|
├── manifests/
|
||||||
|
│ ├── namespace.yaml
|
||||||
|
│ ├── network-policies.yaml # default-deny + per-app egress/ingress (13 NetPols total)
|
||||||
|
│ ├── rbac.yaml # api/worker/admin/web/redis ServiceAccounts (NOT applied by 03-deploy.sh; manual once)
|
||||||
|
│ ├── pod-disruption-budgets.yaml # api-pdb, web-pdb, worker-pdb (NOT applied by 03-deploy.sh; manual once)
|
||||||
|
│ ├── traefik-helmchartconfig.yaml # Hetzner-only DaemonSet+hostNetwork override (do NOT apply on OVH; we use default klipper-lb)
|
||||||
|
│ ├── kyverno-verify-images.yaml # Operator-gated policy (do NOT apply blindly — see file comment)
|
||||||
|
│ ├── api/{deployment,service,hpa}.yaml
|
||||||
|
│ ├── worker/deployment.yaml
|
||||||
|
│ ├── admin/{deployment,service}.yaml
|
||||||
|
│ ├── web/{deployment,service}.yaml
|
||||||
|
│ ├── redis/{deployment,service,pvc}.yaml
|
||||||
|
│ ├── ingress/{middleware,ingress-simple}.yaml
|
||||||
|
│ ├── migrate/job.yaml # goose migration Job (image-subbed at deploy time)
|
||||||
|
│ ├── observability/{kube-state-metrics,vmagent,alloy-logs}.yaml
|
||||||
|
│ └── kratos/ # Ory Kratos identity service (NOT yet deployed; gated on operator OIDC setup)
|
||||||
|
└── scripts/
|
||||||
|
├── _config.sh # Sourced by all scripts: cfg(), generate_env(), generate_cluster_config()
|
||||||
|
├── 01-provision-cluster.sh # Hetzner-Cloud-specific (uses hetzner-k3s CLI) — DO NOT RUN ON OVH
|
||||||
|
├── 02-setup-secrets.sh # Creates honeydue-secrets etc. from secrets/ + config.yaml; kubeconfig-driven
|
||||||
|
├── 03-deploy.sh # Build + push + apply manifests + roll deployments; kubeconfig-driven
|
||||||
|
├── 04-verify.sh # Post-deploy health + security checks; kubeconfig-driven
|
||||||
|
└── rollback.sh # `kubectl rollout undo` across all deployments
|
||||||
|
```
|
||||||
|
|
||||||
|
The `deploy/prod.env` file (sibling to `deploy-k3s/`, gitignored) holds
|
||||||
|
observability + admin credentials that `02/03-deploy.sh` read but never
|
||||||
|
display:
|
||||||
|
|
||||||
|
```
|
||||||
|
OBS_INGEST_URL (https://obs.88oakapps.com/api/v1/write)
|
||||||
|
OBS_TRACES_URL (https://obs.88oakapps.com/v1/traces)
|
||||||
|
OBS_INGEST_TOKEN (bearer token for VM + Loki + traces — all use same token)
|
||||||
|
GRAFANA_URL (https://grafana.88oakapps.com)
|
||||||
|
GRAFANA_ADMIN_USER (admin)
|
||||||
|
GRAFANA_ADMIN_PASSWORD
|
||||||
|
ADMIN_EMAIL / ADMIN_PASSWORD (in-app admin login)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Install from clean boxes — the truthful procedure
|
||||||
|
|
||||||
|
This is what we ran on 2026-06-03 to stand up the live cluster, exactly. If
|
||||||
|
you ever rebuild from zero this is the canonical sequence. Total wall-clock:
|
||||||
|
~12 min for cluster bootstrap; ~10 min for workloads.
|
||||||
|
|
||||||
|
### 6.1 Prerequisites
|
||||||
|
|
||||||
|
- 3 fresh Ubuntu VPS instances (any provider with public IPv4, ≥4 GB RAM,
|
||||||
|
≥40 GB disk)
|
||||||
|
- `~/.ssh/config` entries (`ovhcloud1/2/3`) pointing at them, with
|
||||||
|
passwordless sudo
|
||||||
|
- Local `kubectl` and `curl`
|
||||||
|
- The repo's `deploy-k3s/secrets/` populated (or the ability to copy live
|
||||||
|
secrets from another running cluster — see §7.2)
|
||||||
|
- `deploy/prod.env` populated with obs token + Grafana creds
|
||||||
|
|
||||||
|
### 6.2 Per-node OS hardening + firewall (all 3 in parallel)
|
||||||
|
|
||||||
|
For each `ovhcloudN`, over SSH:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
sudo apt-get update -qq
|
||||||
|
sudo apt-get install -y -qq fail2ban unattended-upgrades open-iscsi curl ufw
|
||||||
|
sudo systemctl enable --now iscsid fail2ban
|
||||||
|
sudo dpkg-reconfigure -f noninteractive -plow unattended-upgrades
|
||||||
|
|
||||||
|
sudo ufw --force reset
|
||||||
|
sudo ufw default deny incoming
|
||||||
|
sudo ufw default allow outgoing
|
||||||
|
sudo ufw allow 22/tcp
|
||||||
|
sudo ufw allow 80/tcp
|
||||||
|
sudo ufw allow 443/tcp
|
||||||
|
sudo ufw allow 6443/tcp
|
||||||
|
SELF=$(hostname -I | awk '{print $1}')
|
||||||
|
for peer in 51.81.83.33 51.81.87.86 51.81.85.248; do
|
||||||
|
[ "$peer" = "$SELF" ] && continue
|
||||||
|
sudo ufw allow from "$peer" to any port 2379:2380 proto tcp
|
||||||
|
sudo ufw allow from "$peer" to any port 10250 proto tcp
|
||||||
|
sudo ufw allow from "$peer" to any port 51820 proto udp
|
||||||
|
sudo ufw allow from "$peer" to any port 8472 proto udp
|
||||||
|
done
|
||||||
|
sudo ufw --force enable
|
||||||
|
```
|
||||||
|
|
||||||
|
**Watch ordering:** `allow 22/tcp` MUST precede `ufw enable`. Existing SSH
|
||||||
|
sessions survive (`ufw` only affects new connections), but a misordered script
|
||||||
|
locks you out of fresh logins.
|
||||||
|
|
||||||
|
### 6.3 Install k3s on `ovhcloud1` (the init node)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh ovhcloud1 'curl -sfL https://get.k3s.io | \
|
||||||
|
INSTALL_K3S_VERSION=v1.34.6+k3s1 \
|
||||||
|
sh -s - server \
|
||||||
|
--cluster-init \
|
||||||
|
--node-ip=51.81.83.33 \
|
||||||
|
--node-external-ip=51.81.83.33 \
|
||||||
|
--advertise-address=51.81.83.33 \
|
||||||
|
--flannel-backend=wireguard-native \
|
||||||
|
--flannel-external-ip \
|
||||||
|
--secrets-encryption \
|
||||||
|
--write-kubeconfig-mode=0600 \
|
||||||
|
--tls-san=51.81.83.33 \
|
||||||
|
--tls-san=51.81.87.86 \
|
||||||
|
--tls-san=51.81.85.248 \
|
||||||
|
--disable-cloud-controller'
|
||||||
|
```
|
||||||
|
|
||||||
|
Wait for `sudo k3s kubectl get nodes` to show this node Ready (~2-5 s).
|
||||||
|
Read the cluster token:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh ovhcloud1 'sudo cat /var/lib/rancher/k3s/server/node-token'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.4 Join `ovhcloud2`, then `ovhcloud3` (sequential)
|
||||||
|
|
||||||
|
Joining etcd one node at a time avoids split-brain on slow networks.
|
||||||
|
Replace `<TOKEN>` with the value from 6.3.
|
||||||
|
|
||||||
|
For `ovhcloud2`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh ovhcloud2 'curl -sfL https://get.k3s.io | \
|
||||||
|
INSTALL_K3S_VERSION=v1.34.6+k3s1 \
|
||||||
|
K3S_TOKEN=<TOKEN> \
|
||||||
|
sh -s - server \
|
||||||
|
--server=https://51.81.83.33:6443 \
|
||||||
|
--node-ip=51.81.87.86 \
|
||||||
|
--node-external-ip=51.81.87.86 \
|
||||||
|
--advertise-address=51.81.87.86 \
|
||||||
|
--flannel-backend=wireguard-native \
|
||||||
|
--flannel-external-ip \
|
||||||
|
--secrets-encryption \
|
||||||
|
--write-kubeconfig-mode=0600 \
|
||||||
|
--tls-san=51.81.83.33 --tls-san=51.81.87.86 --tls-san=51.81.85.248 \
|
||||||
|
--disable-cloud-controller'
|
||||||
|
```
|
||||||
|
|
||||||
|
Then identical for `ovhcloud3` with `--node-ip=51.81.85.248` and
|
||||||
|
`--advertise-address=51.81.85.248`. After each, wait for `kubectl get nodes`
|
||||||
|
to show the new node Ready before proceeding.
|
||||||
|
|
||||||
|
### 6.5 Pull kubeconfig to the operator workstation
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh ovhcloud1 'sudo cat /etc/rancher/k3s/k3s.yaml' \
|
||||||
|
| sed 's|server: https://127.0.0.1:6443|server: https://51.81.83.33:6443|' \
|
||||||
|
> deploy-k3s/kubeconfig
|
||||||
|
chmod 600 deploy-k3s/kubeconfig
|
||||||
|
export KUBECONFIG=$(pwd)/deploy-k3s/kubeconfig
|
||||||
|
kubectl get nodes -o wide # All 3 Ready, INTERNAL-IP = public IP
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.6 Label the redis node
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kubectl label node vps-1624d691 honeydue/redis=true --overwrite
|
||||||
|
```
|
||||||
|
|
||||||
|
(Use whichever k8s node name corresponds to `ovhcloud1`. The Redis
|
||||||
|
Deployment's `nodeSelector` binds to this label.)
|
||||||
|
|
||||||
|
### 6.7 Bootstrap manifests NOT applied by `03-deploy.sh`
|
||||||
|
|
||||||
|
These must be applied manually on a fresh cluster, **before** running
|
||||||
|
`03-deploy.sh`, or workloads will fail to schedule:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kubectl apply -f deploy-k3s/manifests/rbac.yaml
|
||||||
|
kubectl apply -f deploy-k3s/manifests/pod-disruption-budgets.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
`rbac.yaml` creates the 5 ServiceAccounts (`api`, `worker`, `admin`, `web`,
|
||||||
|
`redis`) referenced by the Deployment manifests. Without these, ReplicaSets
|
||||||
|
hang on `FailedCreate: error looking up service account` and pods never
|
||||||
|
start. Symptom on first deploy: `kubectl get deploy` shows `0 up-to-date`
|
||||||
|
across the board with no pod activity — see §9 *Gotchas*.
|
||||||
|
|
||||||
|
**Do NOT apply** `traefik-helmchartconfig.yaml` (Hetzner-only — see §10) or
|
||||||
|
`kyverno-verify-images.yaml` (gated on operator Kyverno install).
|
||||||
|
|
||||||
|
### 6.8 Seed secrets
|
||||||
|
|
||||||
|
Two paths; pick whichever fits your situation:
|
||||||
|
|
||||||
|
**Path A — clean install from local files** (the original design):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
KUBECONFIG=$(pwd)/deploy-k3s/kubeconfig ./deploy-k3s/scripts/02-setup-secrets.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Requires `deploy-k3s/secrets/` to contain real `postgres_password.txt`,
|
||||||
|
`secret_key.txt`, `email_host_password.txt`, `fcm_server_key.txt`,
|
||||||
|
`apns_auth_key.p8`, `cloudflare-origin.crt`, `cloudflare-origin.key`. The
|
||||||
|
script reads `config.yaml` for `registry.*`, `redis.password`,
|
||||||
|
`admin.basic_auth_*`, and `storage.b2_*`.
|
||||||
|
|
||||||
|
**Path B — clone live secrets from another running cluster** (what we
|
||||||
|
actually did during the migration; useful if `secrets/` is empty or you want
|
||||||
|
exact-byte equivalence):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
HETZNER=$(pwd)/deploy-k3s/kubeconfig.hetzner.bak # or any kubeconfig with the secrets
|
||||||
|
OVH=$(pwd)/deploy-k3s/kubeconfig
|
||||||
|
kubectl --kubeconfig=$OVH apply -f deploy-k3s/manifests/namespace.yaml
|
||||||
|
for S in honeydue-secrets honeydue-apns-key gitea-credentials cloudflare-origin-cert admin-basic-auth; do
|
||||||
|
kubectl --kubeconfig=$HETZNER -n honeydue get secret $S -o json \
|
||||||
|
| python3 -c "
|
||||||
|
import json, sys
|
||||||
|
d = json.load(sys.stdin)
|
||||||
|
m = d['metadata']
|
||||||
|
for k in ('uid','resourceVersion','creationTimestamp','generation','managedFields','ownerReferences','selfLink'):
|
||||||
|
m.pop(k, None)
|
||||||
|
m.pop('annotations', None)
|
||||||
|
print(json.dumps(d))" \
|
||||||
|
| kubectl --kubeconfig=$OVH apply -f -
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
After either path, verify:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kubectl -n honeydue get secrets
|
||||||
|
# Expect: admin-basic-auth, cloudflare-origin-cert, gitea-credentials,
|
||||||
|
# honeydue-apns-key, honeydue-secrets
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.9 Deploy workloads
|
||||||
|
|
||||||
|
```sh
|
||||||
|
KUBECONFIG=$(pwd)/deploy-k3s/kubeconfig \
|
||||||
|
./deploy-k3s/scripts/03-deploy.sh --skip-build --tag latest
|
||||||
|
```
|
||||||
|
|
||||||
|
- `--skip-build` skips Docker build + push, deploys whatever's already in the
|
||||||
|
registry at the named tag. Use this when migrating between clusters to
|
||||||
|
guarantee both run identical bits.
|
||||||
|
- Without flags it builds the api / worker / admin / web images from the
|
||||||
|
local repo HEAD and pushes to `gitea.treytartt.com` first.
|
||||||
|
- The script applies (in order): namespace, network-policies (13 of them),
|
||||||
|
redis, ingress, then runs the goose migration Job (blocking on success),
|
||||||
|
then api / worker / admin / web Deployments, then observability
|
||||||
|
(kube-state-metrics, vmagent, alloy-logs).
|
||||||
|
- It does NOT apply: `rbac.yaml`, `pod-disruption-budgets.yaml`,
|
||||||
|
`traefik-helmchartconfig.yaml`, `kyverno-verify-images.yaml`. The first
|
||||||
|
two must be applied manually (see §6.7); the latter two are Hetzner-only
|
||||||
|
or operator-gated.
|
||||||
|
- It does NOT apply: anything under `kratos/` (skipped until
|
||||||
|
`kratos-secrets` exists, which requires real OIDC client IDs).
|
||||||
|
|
||||||
|
### 6.10 Verify
|
||||||
|
|
||||||
|
```sh
|
||||||
|
KUBECONFIG=$(pwd)/deploy-k3s/kubeconfig ./deploy-k3s/scripts/04-verify.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Expect: all deployments `READY=desired`, 13 NetworkPolicies, 7 ServiceAccounts
|
||||||
|
(api, worker, admin, web, redis, vmagent, alloy-logs), 3 PDBs, cloudflare-only
|
||||||
|
middleware present, in-cluster `/api/health/` returns 200.
|
||||||
|
|
||||||
|
External smoke test (DNS-aware, but the api `/health/` route is exempt from
|
||||||
|
the cloudflare-only middleware so direct-IP works for diagnostics):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
for IP in 51.81.83.33 51.81.87.86 51.81.85.248; do
|
||||||
|
curl -s -o /dev/null -w "$IP -> %{http_code}\n" \
|
||||||
|
-H 'Host: api.myhoneydue.com' http://$IP/api/health/
|
||||||
|
done
|
||||||
|
# All three should return 200.
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.11 DNS cutover (if migrating)
|
||||||
|
|
||||||
|
In the Cloudflare dashboard for `myhoneydue.com`, set the 4 hostnames in §4 to
|
||||||
|
the OVH IPs and keep proxied. Effective propagation ~30 s to 5 min through
|
||||||
|
the Cloudflare proxy.
|
||||||
|
|
||||||
|
If you have a previous cluster, **scale its worker to 0 before flipping** to
|
||||||
|
avoid scheduled-job double-fires:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
KUBECONFIG=<previous> kubectl -n honeydue scale deploy/worker --replicas=0
|
||||||
|
# (cut DNS)
|
||||||
|
KUBECONFIG=<new> kubectl -n honeydue scale deploy/worker --replicas=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Run those last two lines back-to-back. Worker work is mostly scheduled
|
||||||
|
(hourly+), so a brief gap is harmless; overlap would cause duplicate emails.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Day-to-day operations
|
||||||
|
|
||||||
|
### Common kubectl one-liners
|
||||||
|
|
||||||
|
```sh
|
||||||
|
export KUBECONFIG=$(pwd)/deploy-k3s/kubeconfig
|
||||||
|
|
||||||
|
# Cluster state
|
||||||
|
kubectl get nodes -o wide
|
||||||
|
kubectl -n honeydue get pods
|
||||||
|
kubectl -n honeydue get deploy
|
||||||
|
kubectl top nodes
|
||||||
|
kubectl -n honeydue top pods
|
||||||
|
|
||||||
|
# Tail logs
|
||||||
|
kubectl -n honeydue logs deploy/api -f --tail=50
|
||||||
|
kubectl -n honeydue logs -l app.kubernetes.io/name=api -f --tail=20
|
||||||
|
stern -n honeydue api # if stern is installed (multi-pod)
|
||||||
|
|
||||||
|
# Restart a deployment (no image change, picks up ConfigMap changes)
|
||||||
|
kubectl -n honeydue rollout restart deploy/api
|
||||||
|
|
||||||
|
# Rollback one revision
|
||||||
|
kubectl -n honeydue rollout undo deploy/api
|
||||||
|
|
||||||
|
# Scale (worker MUST stay at 0 or 1)
|
||||||
|
kubectl -n honeydue scale deploy/api --replicas=4
|
||||||
|
|
||||||
|
# Get into a pod
|
||||||
|
kubectl -n honeydue exec -it deploy/api -- sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Redeploy after code changes
|
||||||
|
|
||||||
|
```sh
|
||||||
|
KUBECONFIG=$(pwd)/deploy-k3s/kubeconfig ./deploy-k3s/scripts/03-deploy.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Builds images from local HEAD, tags with the git short SHA, pushes to Gitea,
|
||||||
|
runs `goose up` (idempotent), rolls api/worker/admin/web. Total: ~3-5 min
|
||||||
|
when images change.
|
||||||
|
|
||||||
|
To deploy without rebuilding (pin to a specific tag):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./deploy-k3s/scripts/03-deploy.sh --skip-build --tag <tag-or-:latest>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Migrations
|
||||||
|
|
||||||
|
Goose migrations live in `migrations/`. New file pattern:
|
||||||
|
|
||||||
|
```
|
||||||
|
make migrate-new name=add_foo_column # generates migrations/YYYYMMDDHHMMSS_add_foo_column.sql
|
||||||
|
# Edit the file with -- +goose Up / -- +goose Down sections
|
||||||
|
```
|
||||||
|
|
||||||
|
`03-deploy.sh` runs a one-shot Job (`manifests/migrate/job.yaml`) that
|
||||||
|
executes `goose up` against Neon (direct compute endpoint, not pooler — see
|
||||||
|
file comment). The Job blocks api/worker rollout and aborts the deploy on
|
||||||
|
failure. No app pod runs `AutoMigrate`; api/worker startup verifies
|
||||||
|
`goose_db_version` is current and refuses to boot on mismatch.
|
||||||
|
|
||||||
|
### Grafana
|
||||||
|
|
||||||
|
URL: https://grafana.88oakapps.com (creds in `deploy/prod.env`)
|
||||||
|
|
||||||
|
Three dashboards in the `honeyDue` folder:
|
||||||
|
|
||||||
|
| UID | Title | Use |
|
||||||
|
|---|---|---|
|
||||||
|
| `honeydue-eli5-overview` | honeyDue — Overview (ELI5) | Single-screen at-a-glance health: pods up, crashes, errors, RPS, latency, Postgres, memory, top endpoints, push failures, worker activity, recent error logs. Created 2026-06-03. |
|
||||||
|
| `honeydue-red` | honeyDue API — RED | Rate/Errors/Duration cuts (legacy) |
|
||||||
|
| `honeydue-logs` | honeyDue — Production Logs | Live log explorer |
|
||||||
|
|
||||||
|
For the ELI5 dashboard's queries, **api-side metrics use `service="api"`,
|
||||||
|
NOT `namespace="honeydue"`.** vmagent's scrape config drops the namespace
|
||||||
|
label from api metrics — only `service`, `pod`, `node`, `job`, plus the
|
||||||
|
metric's own labels (route, method, status, etc.) survive. Queries that
|
||||||
|
filter on `namespace="honeydue"` for api metrics silently match nothing.
|
||||||
|
|
||||||
|
### kubectl tunnel (if 6443 is firewalled to your IP)
|
||||||
|
|
||||||
|
Currently `6443` is open WAN-side (matching the previous Hetzner posture).
|
||||||
|
If you tighten that to operator-IPs-only and your IP changes, use an SSH
|
||||||
|
tunnel:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh -fN -o ExitOnForwardFailure=yes -o ServerAliveInterval=30 \
|
||||||
|
-i ~/.ssh/ovhcloud \
|
||||||
|
-L 127.0.0.1:6443:127.0.0.1:6443 \
|
||||||
|
ubuntu@51.81.83.33
|
||||||
|
|
||||||
|
cp deploy-k3s/kubeconfig deploy-k3s/kubeconfig.tunnel
|
||||||
|
sed -i.bak 's|https://51.81.83.33:6443|https://127.0.0.1:6443|' deploy-k3s/kubeconfig.tunnel
|
||||||
|
export KUBECONFIG="$(pwd)/deploy-k3s/kubeconfig.tunnel"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Disaster recovery
|
||||||
|
|
||||||
|
### "I lost the kubeconfig"
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh ovhcloud1 'sudo cat /etc/rancher/k3s/k3s.yaml' \
|
||||||
|
| sed 's|server: https://127.0.0.1:6443|server: https://51.81.83.33:6443|' \
|
||||||
|
> deploy-k3s/kubeconfig
|
||||||
|
chmod 600 deploy-k3s/kubeconfig
|
||||||
|
```
|
||||||
|
|
||||||
|
If `ovhcloud1` is down but `ovhcloud2` or `3` is up, swap host and IP — the
|
||||||
|
TLS SAN covers all three.
|
||||||
|
|
||||||
|
### "A node is unresponsive"
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kubectl drain vps-XXX --ignore-daemonsets --delete-emptydir-data
|
||||||
|
# Reboot via OVH manager or:
|
||||||
|
ssh ovhcloudN sudo reboot
|
||||||
|
# Wait for Ready, then:
|
||||||
|
kubectl uncordon vps-XXX
|
||||||
|
```
|
||||||
|
|
||||||
|
The cluster tolerates 1 node down (etcd quorum 2/3). With 2 down, etcd
|
||||||
|
loses quorum and the API server stops accepting writes.
|
||||||
|
|
||||||
|
### "etcd quorum lost (2+ nodes dead)"
|
||||||
|
|
||||||
|
Bring nodes back online if possible. If not:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh ovhcloud1 'sudo k3s server --cluster-reset --cluster-reset-restore-path=/var/lib/rancher/k3s/server/db/snapshots/<latest>'
|
||||||
|
```
|
||||||
|
|
||||||
|
k3s takes automatic etcd snapshots every 12h, keeping 5. List with:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ssh ovhcloud1 sudo ls -la /var/lib/rancher/k3s/server/db/snapshots/
|
||||||
|
```
|
||||||
|
|
||||||
|
This is destructive — workload state since the snapshot is lost, but Neon
|
||||||
|
(actual app data) is unaffected.
|
||||||
|
|
||||||
|
### "I have to rebuild the whole cluster from scratch"
|
||||||
|
|
||||||
|
Provision 3 fresh boxes, then exactly the sequence in §6. End-to-end is
|
||||||
|
~30 min. The dependencies that make this possible:
|
||||||
|
|
||||||
|
| Stays put through rebuild | Where |
|
||||||
|
|---|---|
|
||||||
|
| Application data | Neon Postgres (managed) |
|
||||||
|
| User uploads | Backblaze B2 (managed) |
|
||||||
|
| Container images | `gitea.treytartt.com` (self-hosted, but not on the OVH cluster) |
|
||||||
|
| Operator secrets | `deploy-k3s/secrets/` + `config.yaml` + `deploy/prod.env` on the operator workstation (gitignored) |
|
||||||
|
| DNS | Cloudflare control panel |
|
||||||
|
|
||||||
|
If `gitea.treytartt.com` is on the same OVH cluster, you have a circular
|
||||||
|
dependency — rebuilding requires images you can't pull until the cluster is
|
||||||
|
up. Currently Gitea is NOT in the honeyDue cluster (separate Hetzner-era
|
||||||
|
host), so this isn't a problem today, but worth flagging if that ever
|
||||||
|
changes.
|
||||||
|
|
||||||
|
### "Cutover back to Hetzner / failover to a backup cluster"
|
||||||
|
|
||||||
|
There is **no warm standby today.** Bringing up a second cluster is the
|
||||||
|
same §6 procedure on different hardware, then a Cloudflare DNS swap. The
|
||||||
|
worker-swap dance is critical:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
KUBECONFIG=<current> kubectl -n honeydue scale deploy/worker --replicas=0
|
||||||
|
# (Update Cloudflare DNS to new cluster's IPs — proxied)
|
||||||
|
KUBECONFIG=<new> kubectl -n honeydue scale deploy/worker --replicas=1
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Known gotchas
|
||||||
|
|
||||||
|
### 9.1 First-deploy "0 up-to-date" across all Deployments
|
||||||
|
|
||||||
|
**Symptoms:** `kubectl get deploy` shows `READY 0/N, UP-TO-DATE 0` for
|
||||||
|
api/worker/admin/web/redis. `kubectl get events` shows
|
||||||
|
`FailedCreate: error looking up service account honeydue/<name>: serviceaccount "..." not found`.
|
||||||
|
|
||||||
|
**Cause:** `rbac.yaml` (ServiceAccounts) is NOT applied by `03-deploy.sh`. On
|
||||||
|
a fresh cluster the SAs don't exist; the ReplicaSet controller can't create
|
||||||
|
pods.
|
||||||
|
|
||||||
|
**Fix:**
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kubectl apply -f deploy-k3s/manifests/rbac.yaml
|
||||||
|
kubectl -n honeydue rollout restart deploy/api deploy/worker deploy/admin deploy/web deploy/redis
|
||||||
|
```
|
||||||
|
|
||||||
|
This was hit during the 2026-06-03 OVH bootstrap. Permanently fix by adding
|
||||||
|
`kubectl apply -f rbac.yaml` to `03-deploy.sh` between the namespace and
|
||||||
|
network-policies apply, but until that lands, follow §6.7 on every fresh
|
||||||
|
cluster.
|
||||||
|
|
||||||
|
### 9.2 vmagent SD broken on fresh deploy ("0 pods up" in Grafana)
|
||||||
|
|
||||||
|
**Symptoms:**
|
||||||
|
- Grafana panels using `kube_*` metrics or `up{job=...}` show 0
|
||||||
|
- vmagent logs: `dial tcp 10.43.0.1:443: connect: connection refused` every ~30 s
|
||||||
|
- Direct test from a pod also refused
|
||||||
|
|
||||||
|
**Cause:** k3s's NetworkPolicy controller evaluates egress rules *after*
|
||||||
|
kube-proxy's DNAT (not before, contrary to spec). Pod-to-`kubernetes`-Service
|
||||||
|
(`10.43.0.1:443`) gets DNAT'd to `<node_ip>:6443`, *then* the policy check
|
||||||
|
runs. Without an explicit egress rule for `:6443`, the packet is rejected.
|
||||||
|
|
||||||
|
The `allow-egress-from-vmagent` NetPol in `network-policies.yaml` includes
|
||||||
|
both rules:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- to:
|
||||||
|
- ipBlock: { cidr: 10.43.0.0/16 }
|
||||||
|
ports:
|
||||||
|
- { port: 443, protocol: TCP }
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except: [10.42.0.0/16]
|
||||||
|
ports:
|
||||||
|
- { port: 6443, protocol: TCP }
|
||||||
|
```
|
||||||
|
|
||||||
|
**If this happens:** confirm `network-policies.yaml` was applied:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kubectl -n honeydue get netpol allow-egress-from-vmagent -o yaml | grep -A 5 6443
|
||||||
|
```
|
||||||
|
|
||||||
|
Counter-evidence that confirms diagnosis: `kube-state-metrics` in
|
||||||
|
`kube-system` works fine (no NetPols in that namespace).
|
||||||
|
|
||||||
|
### 9.3 vmagent appears healthy but no data in Grafana
|
||||||
|
|
||||||
|
vmagent's `/-/healthy` returns 200 as long as the process is alive and
|
||||||
|
remote-write is TCP-functional. It doesn't check that scrapes are actually
|
||||||
|
*succeeding*. The liveness probe in `vmagent.yaml` queries `/api/v1/targets`
|
||||||
|
and fails the pod if no target is `up`. After ~3 failures (~3 min), kubelet
|
||||||
|
recycles it.
|
||||||
|
|
||||||
|
If vmagent runs for weeks but Grafana is empty, the probe was disabled or
|
||||||
|
the exec command broke.
|
||||||
|
|
||||||
|
### 9.4 vmagent bearer token destroyed by direct `kubectl apply`
|
||||||
|
|
||||||
|
The committed `vmagent.yaml` has `bearer_token: TOKEN_PLACEHOLDER`. The real
|
||||||
|
token is `sed`-substituted at deploy time by `03-deploy.sh`. Applying the
|
||||||
|
file directly:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kubectl apply -f deploy-k3s/manifests/observability/vmagent.yaml # WRONG
|
||||||
|
```
|
||||||
|
|
||||||
|
overwrites the Secret with the literal `TOKEN_PLACEHOLDER` and remote-writes
|
||||||
|
401. To restore without a full redeploy:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
OBS_TOKEN_B64=$(kubectl -n honeydue get secret honeydue-secrets \
|
||||||
|
-o jsonpath='{.data.OBS_INGEST_TOKEN}')
|
||||||
|
kubectl -n honeydue patch secret vmagent-remote-write --type=json \
|
||||||
|
-p="[{\"op\":\"replace\",\"path\":\"/data/bearer_token\",\"value\":\"${OBS_TOKEN_B64}\"}]"
|
||||||
|
kubectl -n honeydue rollout restart deploy/vmagent
|
||||||
|
```
|
||||||
|
|
||||||
|
Or just re-run `./deploy-k3s/scripts/03-deploy.sh` — the sed handles it.
|
||||||
|
|
||||||
|
### 9.5 Dashboard queries: api metrics need `service="api"` not `namespace="honeydue"`
|
||||||
|
|
||||||
|
vmagent's scrape config (`vmagent-config` ConfigMap) explicitly chooses which
|
||||||
|
Kubernetes pod-metadata labels to copy onto each scraped series. **Namespace
|
||||||
|
isn't one of them.** Labels you can use on api-side metrics:
|
||||||
|
|
||||||
|
- `service` (literal `"api"`)
|
||||||
|
- `job` (literal `"api"`)
|
||||||
|
- `pod` (the api pod name)
|
||||||
|
- `node` (the k8s node name)
|
||||||
|
- `cluster` (vmagent external_label, currently `"honeydue-k3s"`)
|
||||||
|
- `environment` (vmagent external_label, currently `"prod"`)
|
||||||
|
- Plus each metric's own labels (`method`, `route`, `status` for HTTP; etc.)
|
||||||
|
|
||||||
|
`kube_*` metrics from kube-state-metrics DO carry `namespace` natively
|
||||||
|
(KSM publishes it as a label, vmagent passes it through). Loki streams have
|
||||||
|
`namespace` because alloy-logs explicitly relabels it. So the rule is:
|
||||||
|
|
||||||
|
| Metric prefix | Use |
|
||||||
|
|---|---|
|
||||||
|
| `kube_*` | `namespace="honeydue"` |
|
||||||
|
| `http_*`, `gorm_*`, `go_*`, `process_*` (api) | `service="api"` |
|
||||||
|
| Loki logs `{...}` | `namespace="honeydue"` |
|
||||||
|
|
||||||
|
### 9.6 Cluster-label collision when two clusters run together
|
||||||
|
|
||||||
|
Both Hetzner and OVH vmagents push as `cluster=honeydue-k3s, environment=prod`
|
||||||
|
(same external_labels). During the migration overlap this made dashboards
|
||||||
|
sum both clusters' data. The simplest narrowing during overlap is by node
|
||||||
|
name pattern (`node=~"vps-.*"` for OVH, `node=~"ubuntu-.*"` for Hetzner). If
|
||||||
|
you ever bring up a backup cluster long-term, change one cluster's
|
||||||
|
`external_labels.cluster` to something distinct (e.g. `honeydue-ovh`
|
||||||
|
vs. `honeydue-backup`).
|
||||||
|
|
||||||
|
### 9.7 Worker double-firing scheduled jobs
|
||||||
|
|
||||||
|
If two `worker` Deployments run concurrently (e.g. two clusters both pointing
|
||||||
|
at the same Neon DB), Asynq schedulers each fire crons independently — users
|
||||||
|
get duplicate emails. Workaround: scale all-but-one worker to 0. This is the
|
||||||
|
exact mechanic used during cutovers (§6.11).
|
||||||
|
|
||||||
|
### 9.8 Node kubeconfig mode
|
||||||
|
|
||||||
|
`/etc/rancher/k3s/k3s.yaml` on each node is mode `0600` because we install
|
||||||
|
with `--write-kubeconfig-mode=0600`. Tightening from k3s default (0644) was
|
||||||
|
intentional. Don't change without coordinating — any tooling on the node
|
||||||
|
that expects to read it (none today) will break.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. Differences from MIGRATION_NOTES.md (Hetzner-era)
|
||||||
|
|
||||||
|
`MIGRATION_NOTES.md` documents the Swarm → k3s migration on Hetzner
|
||||||
|
(2026-04-24). Most of it still applies, with these OVH-specific deltas:
|
||||||
|
|
||||||
|
| What MIGRATION_NOTES says | What OVH actually has |
|
||||||
|
|---|---|
|
||||||
|
| `hetzner-k3s` provisioner | Manual k3s install (§6) |
|
||||||
|
| Hetzner Load Balancer (not used) → Cloudflare round-robin | Same — Cloudflare round-robin (§4) |
|
||||||
|
| Traefik as DaemonSet + hostNetwork via HelmChartConfig | Traefik default Deployment + klipper-lb svclb DaemonSet. The `traefik-helmchartconfig.yaml` file is **NOT applied** on OVH. |
|
||||||
|
| `servicelb` disabled (`--disable=servicelb`) | `servicelb` enabled (we didn't pass `--disable=servicelb`). This is what makes klipper-lb work. |
|
||||||
|
| sysctl `net.ipv4.ip_unprivileged_port_start=0` for hostNetwork Traefik | Not needed — klipper-lb proxies the port binding instead |
|
||||||
|
| UFW rules between 3 Hetzner IPs | UFW rules between 3 OVH IPs (51.81.83.33, 51.81.87.86, 51.81.85.248) |
|
||||||
|
| Kubeconfig at `~/.kube/honeydue-k3s.yaml` | Kubeconfig at `deploy-k3s/kubeconfig` |
|
||||||
|
| TLS at origin: not configured (CF Flexible) | Same — CF Flexible. `cloudflare-origin-cert` Secret exists (carried over) but Ingress doesn't reference it. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. Outstanding follow-ups (deferred, not blocking)
|
||||||
|
|
||||||
|
1. **No warm standby / rollback cluster.** OVH is solo production. An OVH
|
||||||
|
outage is a real outage; recovery time = §6 procedure (~30 min). User
|
||||||
|
plans to bring a second cluster up as a target.
|
||||||
|
2. **UFW allows 80/443 from world.** Hetzner had a network-layer Cloudflare-IP
|
||||||
|
allowlist on these ports. OVH currently relies on the L7
|
||||||
|
`cloudflare-only` Traefik middleware, which protects admin but NOT api /
|
||||||
|
web / apex (those routes have to be reachable from anywhere, but they're
|
||||||
|
then trivially DDoSable bypassing Cloudflare). Fix: add ufw allow rules
|
||||||
|
restricting `80/tcp` and `443/tcp` to Cloudflare's published IP ranges
|
||||||
|
(~22 IPv4 prefixes from https://www.cloudflare.com/ips-v4/).
|
||||||
|
3. **Cloudflare TLS Flexible → Full(strict).** Origin certs exist as Secret
|
||||||
|
but Ingress doesn't terminate TLS. Upgrading to Full(strict) requires
|
||||||
|
Traefik configured with the cert + an HTTPS entrypoint + Ingress
|
||||||
|
`tls:` block.
|
||||||
|
4. **`rbac.yaml` + `pod-disruption-budgets.yaml` should be in `03-deploy.sh`.**
|
||||||
|
They're currently bootstrap-only. Adding them is idempotent and prevents
|
||||||
|
the §9.1 footgun.
|
||||||
|
5. **Push notification metrics are log-derived, not counters.** Successes
|
||||||
|
aren't logged or counted. Proper Prometheus instrumentation (~15 lines in
|
||||||
|
`internal/push/client.go`) would give a real success/failure ratio.
|
||||||
|
6. **Worker has no `/metrics` endpoint.** `cmd/worker/main.go` serves `:6060`
|
||||||
|
for healthz only. Adding Asynq's `metrics.NewPrometheusExporter()` + a
|
||||||
|
ServiceMonitor + uncommenting the `worker` job stanza in
|
||||||
|
`vmagent-config` ConfigMap would give real queue depth and job latency.
|
||||||
|
7. **Ory Kratos.** Manifests exist (`manifests/kratos/`) but the deploy
|
||||||
|
is gated on operator-side prerequisites (Neon `kratos` database,
|
||||||
|
`auth.myhoneydue.com` DNS, real Apple+Google OIDC clients, Kratos image
|
||||||
|
tag pinned). Until `kratos-secrets` exists, `03-deploy.sh` silently
|
||||||
|
skips the Kratos apply.
|
||||||
|
8. **Hetzner cluster fully retired? `config.yaml` `nodes:` block describes
|
||||||
|
OVH; the bak kubeconfig is at `kubeconfig.hetzner.bak`. Boxes themselves
|
||||||
|
are operator-managed.
|
||||||
|
|
||||||
|
### 11.1 Dashboard observability gaps (raised 2026-06-03 during dashboard build)
|
||||||
|
|
||||||
|
Surfaced while building the `honeydue-eli5-overview` Grafana dashboard. Each
|
||||||
|
needs code or infra changes to expose; none blocks today's operations.
|
||||||
|
|
||||||
|
9. **node-exporter not deployed.** No node-level metrics today
|
||||||
|
(`node_filesystem_avail_bytes`, `node_memory_*`, `node_load1`, etc.).
|
||||||
|
The dashboard's pod-level memory/CPU panels are app-process only — a
|
||||||
|
node running out of disk would silently fail the cluster before any
|
||||||
|
dashboard signal showed it. Highest-priority Tier-3 item. Fix: deploy
|
||||||
|
`node-exporter` as a DaemonSet (~50 lines of YAML), add a scrape stanza
|
||||||
|
to `vmagent-config`, add a `Node disk free` stat panel.
|
||||||
|
10. **Traefik metrics not enabled.** Traefik can expose `/metrics` with
|
||||||
|
`traefik_entrypoint_requests_total` + `traefik_service_request_duration_seconds`,
|
||||||
|
giving edge-level visibility into requests that never reached api
|
||||||
|
pods (404s, redirects, middleware blocks). Enable via a
|
||||||
|
HelmChartConfig override that sets `metrics.prometheus.entryPoint=metrics`
|
||||||
|
+ adds a `:9100` entryPoint + a scrape stanza. Skipped today to avoid
|
||||||
|
Traefik restart risk; safe additive change when ready.
|
||||||
|
11. **Push notification success/failure counters** (already #5). Add
|
||||||
|
`prometheus.NewCounterVec` in `internal/push/client.go` with labels
|
||||||
|
`platform={ios,android}, outcome={success,failed,breaker_open,disabled}`.
|
||||||
|
Increments at every Send/SendActionable branch. Replaces the
|
||||||
|
log-derived "Push failures" stat on the dashboard with a real success
|
||||||
|
rate.
|
||||||
|
12. **Worker queue / job metrics** (already #6). Asynq has a built-in
|
||||||
|
Prometheus exporter (`asynq/x/metrics`). Wire it into the worker's
|
||||||
|
`:6060` health server (a single `healthMux.Handle` line) and
|
||||||
|
uncomment the worker scrape stanza in `vmagent-config`. Surfaces
|
||||||
|
queue depth, retry count, processing time per task type.
|
||||||
|
13. **Cache hit / miss rate.** `internal/services/cache_service.go` has
|
||||||
|
no counters. Add a Counter with labels `{operation=get|set, result=hit|miss}`
|
||||||
|
around the cache wrapper. ~10 lines. Useful once real traffic flows
|
||||||
|
to verify the ETag and Redis caches are paying their keep.
|
||||||
|
14. **APNs send-latency histogram.** Wrap `internal/push/apns.go::Send`
|
||||||
|
in a `prometheus.NewHistogramVec` keyed on outcome. Tells you when
|
||||||
|
Apple's gateway is slow (which correlates with their incident page).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 12. Audit trail
|
||||||
|
|
||||||
|
| Date | Change |
|
||||||
|
|---|---|
|
||||||
|
| 2026-04-24 | Initial k3s cluster on Hetzner (Swarm → k3s migration) — see MIGRATION_NOTES.md |
|
||||||
|
| 2026-04-25 | `config.yaml` reconstructed from live ConfigMap (original file lost) |
|
||||||
|
| 2026-05-15 | Audit fixes: Redis auth required, admin basic auth, secrets-encryption flag |
|
||||||
|
| 2026-05-16 | `02-setup-secrets.sh` started carrying B2 credentials (was a manifest/script drift) |
|
||||||
|
| 2026-06-02 | Kratos scaffolding committed (not deployed) |
|
||||||
|
| 2026-06-03 | **Hetzner → OVH BHS cutover.** New 3-node cluster on 51.81.83.33, .87.86, .85.248. DNS cut on Cloudflare. Hetzner kubeconfig moved to `.bak`. Grafana `honeydue-eli5-overview` dashboard created. Hetzner cluster powered off later same day. |
|
||||||
|
| 2026-06-03 | Dashboard build-out: extended `honeydue-eli5-overview` to 22 panels covering Tier-1 (HTTP status, CPU per pod, goroutines, top slow) and Tier-2 (GC, network I/O, pod uptime, top 5xx) signals. Surfaced Tier-3 instrumentation gaps in §11.1. |
|
||||||
+896
-676
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@ load_balancer_ip: ""
|
|||||||
domains:
|
domains:
|
||||||
api: api.myhoneydue.com
|
api: api.myhoneydue.com
|
||||||
admin: admin.myhoneydue.com
|
admin: admin.myhoneydue.com
|
||||||
|
app: app.myhoneydue.com # web client host — added to CORS_ALLOWED_ORIGINS
|
||||||
base: myhoneydue.com
|
base: myhoneydue.com
|
||||||
|
|
||||||
# --- Container Registry (GHCR) ---
|
# --- Container Registry (GHCR) ---
|
||||||
@@ -62,7 +63,7 @@ email:
|
|||||||
push:
|
push:
|
||||||
apns_key_id: ""
|
apns_key_id: ""
|
||||||
apns_team_id: ""
|
apns_team_id: ""
|
||||||
apns_topic: com.tt.honeyDue
|
apns_topic: com.myhoneydue.honeyDue
|
||||||
apns_production: true
|
apns_production: true
|
||||||
apns_use_sandbox: false
|
apns_use_sandbox: false
|
||||||
|
|
||||||
@@ -72,8 +73,13 @@ storage:
|
|||||||
b2_app_key: ""
|
b2_app_key: ""
|
||||||
b2_bucket: ""
|
b2_bucket: ""
|
||||||
b2_endpoint: "" # e.g. s3.us-west-004.backblazeb2.com
|
b2_endpoint: "" # e.g. s3.us-west-004.backblazeb2.com
|
||||||
|
b2_region: "" # e.g. us-east-005
|
||||||
|
b2_use_ssl: true
|
||||||
max_file_size: 10485760
|
max_file_size: 10485760
|
||||||
allowed_types: "image/jpeg,image/png,image/gif,image/webp,application/pdf"
|
allowed_types: "image/jpeg,image/png,image/gif,image/webp,application/pdf"
|
||||||
|
upload_dir: /app/uploads # filesystem path inside the api container
|
||||||
|
base_url: /uploads # public URL prefix served by the api
|
||||||
|
static_dir: /app/static # static asset path inside the api container
|
||||||
|
|
||||||
# --- Worker Schedules (UTC hours) ---
|
# --- Worker Schedules (UTC hours) ---
|
||||||
worker:
|
worker:
|
||||||
@@ -100,8 +106,10 @@ admin:
|
|||||||
basic_auth_password: "" # HTTP basic auth password for admin panel
|
basic_auth_password: "" # HTTP basic auth password for admin panel
|
||||||
|
|
||||||
# --- Apple Auth / IAP (optional, leave empty if unused) ---
|
# --- Apple Auth / IAP (optional, leave empty if unused) ---
|
||||||
|
# client_id MUST equal the iOS Release bundle ID — Apple identity tokens
|
||||||
|
# are rejected if the `aud` claim doesn't match.
|
||||||
apple_auth:
|
apple_auth:
|
||||||
client_id: ""
|
client_id: "com.myhoneydue.honeyDue"
|
||||||
team_id: ""
|
team_id: ""
|
||||||
iap_key_id: ""
|
iap_key_id: ""
|
||||||
iap_issuer_id: ""
|
iap_issuer_id: ""
|
||||||
|
|||||||
@@ -23,8 +23,11 @@ spec:
|
|||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: admin
|
serviceAccountName: admin
|
||||||
|
# Explicit pod-level opt-out (audit F11) — defense-in-depth on top of
|
||||||
|
# the ServiceAccount-level setting in rbac.yaml.
|
||||||
|
automountServiceAccountToken: false
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
- name: ghcr-credentials
|
- name: gitea-credentials
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
runAsUser: 1001
|
runAsUser: 1001
|
||||||
@@ -35,6 +38,7 @@ spec:
|
|||||||
containers:
|
containers:
|
||||||
- name: admin
|
- name: admin
|
||||||
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh
|
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh
|
||||||
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit; images are SHA/digest-pinned
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 3000
|
- containerPort: 3000
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
@@ -82,7 +86,7 @@ spec:
|
|||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /admin/
|
path: /
|
||||||
port: 3000
|
port: 3000
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 30
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
|
|||||||
@@ -23,8 +23,11 @@ spec:
|
|||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: api
|
serviceAccountName: api
|
||||||
|
# Explicit pod-level opt-out (audit F11) — defense-in-depth on top of
|
||||||
|
# the ServiceAccount-level setting in rbac.yaml.
|
||||||
|
automountServiceAccountToken: false
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
- name: ghcr-credentials
|
- name: gitea-credentials
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
runAsUser: 1000
|
runAsUser: 1000
|
||||||
@@ -35,6 +38,7 @@ spec:
|
|||||||
containers:
|
containers:
|
||||||
- name: api
|
- name: api
|
||||||
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh
|
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh
|
||||||
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit; images are SHA/digest-pinned
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8000
|
- containerPort: 8000
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
@@ -46,34 +50,16 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: honeydue-config
|
name: honeydue-config
|
||||||
env:
|
# Audit CODE-F8: secrets are NOT injected as environment variables.
|
||||||
- name: POSTGRES_PASSWORD
|
# Env vars are readable for the life of the pod via /proc/<pid>/environ
|
||||||
valueFrom:
|
# and leak into crash dumps / child processes. honeydue-secrets is
|
||||||
secretKeyRef:
|
# mounted read-only at /etc/honeydue/secrets (mode 0400) and the Go
|
||||||
name: honeydue-secrets
|
# config layer (config.loadFileSecrets) reads each key from its file.
|
||||||
key: POSTGRES_PASSWORD
|
# Non-secret config still arrives via the configMapRef above.
|
||||||
- name: SECRET_KEY
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: SECRET_KEY
|
|
||||||
- name: EMAIL_HOST_PASSWORD
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: EMAIL_HOST_PASSWORD
|
|
||||||
- name: FCM_SERVER_KEY
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: FCM_SERVER_KEY
|
|
||||||
- name: REDIS_PASSWORD
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: REDIS_PASSWORD
|
|
||||||
optional: true
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
- name: app-secrets
|
||||||
|
mountPath: /etc/honeydue/secrets
|
||||||
|
readOnly: true
|
||||||
- name: apns-key
|
- name: apns-key
|
||||||
mountPath: /secrets/apns
|
mountPath: /secrets/apns
|
||||||
readOnly: true
|
readOnly: true
|
||||||
@@ -90,11 +76,12 @@ spec:
|
|||||||
httpGet:
|
httpGet:
|
||||||
path: /api/health/
|
path: /api/health/
|
||||||
port: 8000
|
port: 8000
|
||||||
# MigrateWithLock in cmd/api/main.go runs pg_advisory_lock on
|
# Schema migrations run separately in the honeydue-migrate Job
|
||||||
# every startup. On a cold boot with 3 replicas, the first does
|
# *before* this Deployment rolls — the api itself does not migrate
|
||||||
# AutoMigrate (~90s) and the others wait on the lock, so real
|
# (it only verifies goose_db_version at boot). Cold start still
|
||||||
# startup runs 90–240s. 48 × 5s = 240s grace absorbs it without
|
# pays the DB pool warm-up + Redis connect + APNs/FCM client init
|
||||||
# healthcheck killing a still-starting replica.
|
# before /api/health/ goes green. 48 × 5s = 240s grace keeps the
|
||||||
|
# probe from killing a still-starting replica.
|
||||||
failureThreshold: 48
|
failureThreshold: 48
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
@@ -112,6 +99,12 @@ spec:
|
|||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 10
|
timeoutSeconds: 10
|
||||||
volumes:
|
volumes:
|
||||||
|
# Audit CODE-F8: the whole honeydue-secrets Secret, projected as files.
|
||||||
|
# defaultMode 0400 → readable only by the container's runAsUser (1000).
|
||||||
|
- name: app-secrets
|
||||||
|
secret:
|
||||||
|
secretName: honeydue-secrets
|
||||||
|
defaultMode: 0400
|
||||||
- name: apns-key
|
- name: apns-key
|
||||||
secret:
|
secret:
|
||||||
secretName: honeydue-apns-key
|
secretName: honeydue-apns-key
|
||||||
|
|||||||
@@ -0,0 +1,57 @@
|
|||||||
|
# B2 bucket lifecycle — `uploads/` prefix
|
||||||
|
|
||||||
|
The `pending_uploads` cleanup worker (cron `30 * * * *`, see
|
||||||
|
`internal/worker/jobs/handler.go::HandleUploadCleanup`) reaps unclaimed
|
||||||
|
upload sessions every hour, deleting both the row and the corresponding B2
|
||||||
|
object. This bucket-level lifecycle rule is a **backstop** — it catches B2
|
||||||
|
objects that survive the row deletion (e.g. worker crashed mid-loop, B2
|
||||||
|
delete errored, manual DB tampering).
|
||||||
|
|
||||||
|
## Rule
|
||||||
|
|
||||||
|
Apply via the Backblaze web console: **Bucket → `honeyDueProd` → Lifecycle Settings → Custom**
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"fileNamePrefix": "uploads/",
|
||||||
|
"daysFromUploadingToHiding": 7,
|
||||||
|
"daysFromHidingToDeleting": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Effect: any object under the `uploads/` prefix is hidden 7 days after
|
||||||
|
upload, then permanently deleted 1 day after that. Total maximum lifetime
|
||||||
|
of an orphaned object: 8 days.
|
||||||
|
|
||||||
|
This rule does NOT affect:
|
||||||
|
|
||||||
|
- `images/`, `documents/`, `completions/` — legacy multipart-uploaded
|
||||||
|
objects, which are managed by the existing `task_completion_image` /
|
||||||
|
`document_image` / `document.file_url` references.
|
||||||
|
|
||||||
|
## Why a backstop, not the primary mechanism
|
||||||
|
|
||||||
|
The application worker is the primary mechanism because:
|
||||||
|
|
||||||
|
1. It can delete the **DB row** alongside the B2 object — lifecycle alone
|
||||||
|
would leave dangling `pending_uploads` rows.
|
||||||
|
2. It runs hourly vs. lifecycle's once-per-day evaluation — much tighter
|
||||||
|
recovery window for the common case.
|
||||||
|
3. It produces logs / metrics for orphan rate observability.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
After applying:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
b2 bucket get-info honeyDueProd | jq '.lifecycleRules'
|
||||||
|
```
|
||||||
|
|
||||||
|
Should show the rule above. If you don't have the B2 CLI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -u "$B2_KEY_ID:$B2_APP_KEY" https://api.backblazeb2.com/b2api/v3/b2_authorize_account
|
||||||
|
# Then use the returned authorization_token + apiUrl to call b2_get_bucket
|
||||||
|
```
|
||||||
@@ -1,11 +1,10 @@
|
|||||||
# Simple hostname-based Ingress — no TLS (Cloudflare Flexible handles edge
|
# Hostname-based Ingress with TLS terminated at Traefik using the
|
||||||
# TLS, CF→origin is plain HTTP on 80). Upgrade to Full (strict) by
|
# Cloudflare Origin CA cert (secret/cloudflare-origin-cert). CF→origin
|
||||||
# adding back a `tls:` block with a Cloudflare Origin CA cert stored in
|
# encryption enables CF SSL mode "Full (strict)".
|
||||||
# secret/cloudflare-origin-cert.
|
|
||||||
#
|
#
|
||||||
# Middleware chain (security headers, rate limit, CF-only allowlist, admin
|
# Middleware chain (security headers, rate limit, CF-only allowlist, admin
|
||||||
# basic auth) is defined in `middleware.yaml` but NOT attached here —
|
# basic auth) is defined in `middleware.yaml`. security-headers + rate-limit
|
||||||
# annotate this ingress to turn any of them on.
|
# are attached below via annotation.
|
||||||
apiVersion: networking.k8s.io/v1
|
apiVersion: networking.k8s.io/v1
|
||||||
kind: Ingress
|
kind: Ingress
|
||||||
metadata:
|
metadata:
|
||||||
@@ -13,8 +12,15 @@ metadata:
|
|||||||
namespace: honeydue
|
namespace: honeydue
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||||
spec:
|
spec:
|
||||||
ingressClassName: traefik
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- api.myhoneydue.com
|
||||||
|
- myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
rules:
|
rules:
|
||||||
- host: api.myhoneydue.com
|
- host: api.myhoneydue.com
|
||||||
http:
|
http:
|
||||||
@@ -46,8 +52,19 @@ metadata:
|
|||||||
namespace: honeydue
|
namespace: honeydue
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
# cloudflare-only + admin-auth wired in (audit F2/F3/CODE-L6). Order
|
||||||
|
# matters: reject non-Cloudflare IPs, then basic auth, then headers,
|
||||||
|
# then rate limit. The admin-basic-auth secret is created by
|
||||||
|
# 02-setup-secrets.sh from config.yaml admin.basic_auth_* — that runs
|
||||||
|
# before 03-deploy.sh, so the middleware always has its secret.
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-cloudflare-only@kubernetescrd,honeydue-admin-auth@kubernetescrd,honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||||
spec:
|
spec:
|
||||||
ingressClassName: traefik
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- admin.myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
rules:
|
rules:
|
||||||
- host: admin.myhoneydue.com
|
- host: admin.myhoneydue.com
|
||||||
http:
|
http:
|
||||||
@@ -67,8 +84,14 @@ metadata:
|
|||||||
namespace: honeydue
|
namespace: honeydue
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||||
spec:
|
spec:
|
||||||
ingressClassName: traefik
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- app.myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
rules:
|
rules:
|
||||||
- host: app.myhoneydue.com
|
- host: app.myhoneydue.com
|
||||||
http:
|
http:
|
||||||
@@ -80,3 +103,98 @@ spec:
|
|||||||
name: web
|
name: web
|
||||||
port:
|
port:
|
||||||
number: 3000
|
number: 3000
|
||||||
|
---
|
||||||
|
# Auth-endpoint Ingress (audit F10 / LIVE-L12). A dedicated Ingress for the
|
||||||
|
# auth paths so Traefik gives their longer path-prefix routers a higher
|
||||||
|
# priority than honeydue-api's "/" router — these paths then get
|
||||||
|
# auth-rate-limit (5/min) instead of the general rate-limit (100/min).
|
||||||
|
# Anything not matched here falls through to honeydue-api unchanged.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: honeydue-api-auth
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-auth-rate-limit@kubernetescrd,honeydue-security-headers@kubernetescrd
|
||||||
|
spec:
|
||||||
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- api.myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
|
rules:
|
||||||
|
- host: api.myhoneydue.com
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /api/auth/login
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/register
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/forgot-password
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/reset-password
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/residences/join-with-code
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/verify-reset-code
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/apple-sign-in
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/google-sign-in
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/refresh
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
- path: /api/auth/account
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 8000
|
||||||
|
|||||||
@@ -1,54 +0,0 @@
|
|||||||
# API Ingress — Cloudflare-only + security headers + rate limiting
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: honeydue-api
|
|
||||||
namespace: honeydue
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/part-of: honeydue
|
|
||||||
annotations:
|
|
||||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-cloudflare-only@kubernetescrd,honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
|
||||||
spec:
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- api.myhoneydue.com
|
|
||||||
secretName: cloudflare-origin-cert
|
|
||||||
rules:
|
|
||||||
- host: api.myhoneydue.com
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: api
|
|
||||||
port:
|
|
||||||
number: 8000
|
|
||||||
|
|
||||||
---
|
|
||||||
# Admin Ingress — Cloudflare-only + security headers + rate limiting + basic auth
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: honeydue-admin
|
|
||||||
namespace: honeydue
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/part-of: honeydue
|
|
||||||
annotations:
|
|
||||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-cloudflare-only@kubernetescrd,honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd,honeydue-admin-auth@kubernetescrd
|
|
||||||
spec:
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- admin.myhoneydue.com
|
|
||||||
secretName: cloudflare-origin-cert
|
|
||||||
rules:
|
|
||||||
- host: admin.myhoneydue.com
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: admin
|
|
||||||
port:
|
|
||||||
number: 3000
|
|
||||||
@@ -21,13 +21,24 @@ spec:
|
|||||||
headers:
|
headers:
|
||||||
frameDeny: true
|
frameDeny: true
|
||||||
contentTypeNosniff: true
|
contentTypeNosniff: true
|
||||||
browserXssFilter: true
|
# browserXssFilter removed (audit L7): it emits the deprecated
|
||||||
|
# X-XSS-Protection header, which can itself introduce XSS in legacy
|
||||||
|
# browsers. Modern browsers ignore it.
|
||||||
referrerPolicy: "strict-origin-when-cross-origin"
|
referrerPolicy: "strict-origin-when-cross-origin"
|
||||||
customResponseHeaders:
|
customResponseHeaders:
|
||||||
X-Content-Type-Options: "nosniff"
|
X-Content-Type-Options: "nosniff"
|
||||||
X-Frame-Options: "DENY"
|
X-Frame-Options: "DENY"
|
||||||
Strict-Transport-Security: "max-age=31536000; includeSubDomains"
|
# HSTS: 2-year max-age + preload (audit L5/CODE-L3). After this is
|
||||||
Content-Security-Policy: "default-src 'self'; frame-ancestors 'none'"
|
# live on api/admin/app, submit myhoneydue.com to hstspreload.org.
|
||||||
|
Strict-Transport-Security: "max-age=63072000; includeSubDomains; preload"
|
||||||
|
# Cross-origin isolation (audit F9). COEP (require-corp) is omitted —
|
||||||
|
# it commonly breaks third-party embeds; add only after testing.
|
||||||
|
Cross-Origin-Opener-Policy: "same-origin"
|
||||||
|
Cross-Origin-Resource-Policy: "same-origin"
|
||||||
|
# Content-Security-Policy is intentionally NOT set here — the Go API
|
||||||
|
# sets a CSP in internal/router/router.go that permits Google Fonts
|
||||||
|
# for the landing page. Two CSP headers would intersect and break it.
|
||||||
|
# admin and web apps set their own CSP via Next.js middleware.
|
||||||
Permissions-Policy: "camera=(), microphone=(), geolocation=()"
|
Permissions-Policy: "camera=(), microphone=(), geolocation=()"
|
||||||
X-Permitted-Cross-Domain-Policies: "none"
|
X-Permitted-Cross-Domain-Policies: "none"
|
||||||
|
|
||||||
@@ -80,3 +91,24 @@ spec:
|
|||||||
basicAuth:
|
basicAuth:
|
||||||
secret: admin-basic-auth
|
secret: admin-basic-auth
|
||||||
realm: "honeyDue Admin"
|
realm: "honeyDue Admin"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Strict rate limit for auth endpoints (audit F10 / LIVE-L12).
|
||||||
|
# Applied via the honeydue-api-auth Ingress to login / register /
|
||||||
|
# forgot-password / reset-password / join-with-code. depth: 2 makes the
|
||||||
|
# limiter key on the real client IP rather than the Cloudflare edge IP
|
||||||
|
# (request path: client -> Cloudflare -> Traefik). This is the edge half;
|
||||||
|
# the per-account lockout in the Go app is the robust half.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: Middleware
|
||||||
|
metadata:
|
||||||
|
name: auth-rate-limit
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
rateLimit:
|
||||||
|
average: 5
|
||||||
|
burst: 10
|
||||||
|
period: 1m
|
||||||
|
sourceCriterion:
|
||||||
|
ipStrategy:
|
||||||
|
depth: 2
|
||||||
|
|||||||
@@ -0,0 +1,92 @@
|
|||||||
|
# Ory Kratos — honeyDue identity service (Phase 1: infrastructure)
|
||||||
|
|
||||||
|
This directory deploys [Ory Kratos](https://www.ory.sh/kratos/) into the
|
||||||
|
`honeydue` namespace as the identity provider — replacing the hand-rolled auth
|
||||||
|
in `internal/services/auth_service.go` etc.
|
||||||
|
|
||||||
|
**Phase 1 is infrastructure only.** Once deployed, Kratos runs but nothing uses
|
||||||
|
it yet — the honeyDue Go API still does its own auth. Phase 2 (backend swap)
|
||||||
|
and Phase 3 (KMP/web clients) follow. Migrating onto Kratos can lose all
|
||||||
|
existing user data — honeyDue is pre-production, so no user import is done.
|
||||||
|
|
||||||
|
The deploy is **gated**: `03-deploy.sh` applies Kratos only when the
|
||||||
|
`kratos-secrets` Secret exists, and `02-setup-secrets.sh` creates that Secret
|
||||||
|
only when `config.yaml` has a `kratos:` block. Until then the existing stack
|
||||||
|
deploys completely unaffected.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | What |
|
||||||
|
|---|---|
|
||||||
|
| `configmap.yaml` | `kratos.yml`, identity schema, Google/Apple OIDC claim mappers (no secrets) |
|
||||||
|
| `migrate-job.yaml` | `kratos migrate sql` — schema migration, run before the Deployment |
|
||||||
|
| `kratos.yaml` | Deployment (×2), Service, NetworkPolicies |
|
||||||
|
| `ingress.yaml` | `auth.myhoneydue.com` → Kratos public API :4433 |
|
||||||
|
|
||||||
|
## Operator prerequisites (must be done before deploying)
|
||||||
|
|
||||||
|
1. **Kratos version** — Ory uses CalVer (`v25.x` / `v26.x`). Pick the current
|
||||||
|
stable, then replace `REPLACE_WITH_CURRENT_STABLE_TAG` in `kratos.yaml` and
|
||||||
|
`migrate-job.yaml` with `oryd/kratos:vXX.Y@sha256:<digest>`, and set the
|
||||||
|
matching `version:` in `configmap.yaml`.
|
||||||
|
|
||||||
|
2. **Kratos database** — create a separate Neon database named `kratos` (do not
|
||||||
|
share honeyDue's). Capture its connection string as the DSN.
|
||||||
|
|
||||||
|
3. **DNS** — add `auth.myhoneydue.com` in Cloudflare (proxied), pointing at the
|
||||||
|
cluster ingress like the other honeyDue hosts. Confirm the
|
||||||
|
`cloudflare-origin-cert` TLS secret covers `auth.myhoneydue.com`.
|
||||||
|
|
||||||
|
4. **Google OAuth client** — Google Cloud Console → create an OAuth 2.0 client.
|
||||||
|
Redirect URI: `https://auth.myhoneydue.com/self-service/methods/oidc/callback/google`.
|
||||||
|
Put the **client ID** into `configmap.yaml` (`GOOGLE_OAUTH_CLIENT_ID`); the
|
||||||
|
**client secret** goes in `config.yaml`.
|
||||||
|
|
||||||
|
5. **Apple Sign In** — Apple Developer → a Services ID + a Sign in with Apple
|
||||||
|
key. Return URL: `https://auth.myhoneydue.com/self-service/methods/oidc/callback/apple`.
|
||||||
|
Put the **Services ID / Team ID / Key ID** into `configmap.yaml`
|
||||||
|
(`APPLE_SERVICES_ID` / `APPLE_TEAM_ID` / `APPLE_PRIVATE_KEY_ID`); the **.p8
|
||||||
|
private key** goes in `config.yaml`.
|
||||||
|
|
||||||
|
6. **`config.yaml`** — add a `kratos:` block:
|
||||||
|
```yaml
|
||||||
|
kratos:
|
||||||
|
dsn: "postgres://USER:PASS@HOST/kratos?sslmode=require"
|
||||||
|
secrets_cookie: "<openssl rand -hex 16>" # generate ONCE, keep stable
|
||||||
|
secrets_cipher: "<openssl rand -hex 16>" # must be exactly 32 chars
|
||||||
|
smtp_connection_uri: "smtps://USER:PASS@smtp.fastmail.com:465/"
|
||||||
|
google_client_secret: "<from Google Cloud Console>"
|
||||||
|
apple_private_key: |
|
||||||
|
-----BEGIN PRIVATE KEY-----
|
||||||
|
...
|
||||||
|
-----END PRIVATE KEY-----
|
||||||
|
```
|
||||||
|
`secrets_cookie` / `secrets_cipher` must stay stable forever — rotating them
|
||||||
|
invalidates every session and makes encrypted data unreadable.
|
||||||
|
|
||||||
|
## Deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd honeyDueAPI-go
|
||||||
|
export KUBECONFIG="$(pwd)/deploy-k3s/kubeconfig"
|
||||||
|
./deploy-k3s/scripts/02-setup-secrets.sh # creates kratos-secrets from config.yaml
|
||||||
|
./deploy-k3s/scripts/03-deploy.sh # applies kratos manifests, runs migrate, rolls
|
||||||
|
```
|
||||||
|
|
||||||
|
`03-deploy.sh` applies `configmap.yaml` → runs `migrate-job.yaml` → waits →
|
||||||
|
applies `kratos.yaml` + `ingress.yaml`.
|
||||||
|
|
||||||
|
## Verify
|
||||||
|
|
||||||
|
- `kubectl -n honeydue get pods -l app.kubernetes.io/name=kratos` — 2/2 Running
|
||||||
|
- `kubectl -n honeydue logs job/kratos-migrate` — migration succeeded
|
||||||
|
- `curl https://auth.myhoneydue.com/health/ready` — `{"status":"ok"}`
|
||||||
|
- `curl https://auth.myhoneydue.com/self-service/registration/api` — returns a flow
|
||||||
|
|
||||||
|
## Not yet done (later phases)
|
||||||
|
|
||||||
|
- **Phase 2** — honeyDue Go backend: swap `middleware/auth.go` for Kratos
|
||||||
|
session validation, drop the hand-rolled auth code, rebuild the `users`
|
||||||
|
table keyed on the Kratos identity ID.
|
||||||
|
- **Phase 3** — KMP mobile + Next.js web clients point at Kratos flows.
|
||||||
|
- Admin-panel auth stays on its own JWT (out of scope).
|
||||||
@@ -0,0 +1,232 @@
|
|||||||
|
# Ory Kratos configuration for honeyDue.
|
||||||
|
#
|
||||||
|
# Secrets are NOT in this ConfigMap. The DSN, cookie/cipher secrets, SMTP URI
|
||||||
|
# and OIDC client secrets are injected as environment variables from the
|
||||||
|
# kratos-secrets Secret (see kratos.yaml). Kratos is configured natively via
|
||||||
|
# env vars, so this is the idiomatic split — only non-secret config here.
|
||||||
|
#
|
||||||
|
# OIDC scope: Apple-only as of 2026-06-03. Google is intentionally absent;
|
||||||
|
# adding it later is additive — append a `- id: google` block under
|
||||||
|
# selfservice.methods.oidc.config.providers (it becomes index 1) and bind a
|
||||||
|
# matching CLIENT_SECRET env in kratos.yaml.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: kratos-config
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
data:
|
||||||
|
kratos.yml: |
|
||||||
|
# version must track the Kratos image tag — kratos.yaml + migrate-job.yaml
|
||||||
|
# both pin oryd/kratos:v26.2.0 (2026-06-03). See kratos/README.md.
|
||||||
|
version: v1.3.0 # internal config schema version; do not change unless Kratos release notes require it
|
||||||
|
|
||||||
|
serve:
|
||||||
|
public:
|
||||||
|
base_url: https://auth.myhoneydue.com/
|
||||||
|
cors:
|
||||||
|
enabled: true
|
||||||
|
allowed_origins:
|
||||||
|
- https://myhoneydue.com
|
||||||
|
- https://app.myhoneydue.com
|
||||||
|
- https://admin.myhoneydue.com
|
||||||
|
allowed_methods: [GET, POST, PUT, PATCH, DELETE, OPTIONS]
|
||||||
|
allowed_headers: [Authorization, Content-Type, X-Session-Token, Cookie]
|
||||||
|
exposed_headers: [Content-Type, Set-Cookie]
|
||||||
|
# Required: the web clients call Kratos browser flows with
|
||||||
|
# credentials (the ory_kratos_session cookie). Safe here because
|
||||||
|
# allowed_origins is an explicit list, never a wildcard.
|
||||||
|
allow_credentials: true
|
||||||
|
admin:
|
||||||
|
base_url: http://kratos.honeydue.svc.cluster.local:4434/
|
||||||
|
|
||||||
|
selfservice:
|
||||||
|
default_browser_return_url: https://app.myhoneydue.com/
|
||||||
|
allowed_return_urls:
|
||||||
|
- https://app.myhoneydue.com
|
||||||
|
- https://myhoneydue.com
|
||||||
|
- honeydue://callback
|
||||||
|
|
||||||
|
methods:
|
||||||
|
password:
|
||||||
|
enabled: true
|
||||||
|
code: # email one-time codes (verify/recover)
|
||||||
|
enabled: true
|
||||||
|
oidc:
|
||||||
|
enabled: true
|
||||||
|
config:
|
||||||
|
providers:
|
||||||
|
# index 0 — Apple Sign In. apple_private_key (.p8 contents) is
|
||||||
|
# injected via env SELFSERVICE_METHODS_OIDC_CONFIG_PROVIDERS_0_APPLE_PRIVATE_KEY.
|
||||||
|
# client_id is the Apple Services ID (here: the bundle ID, which
|
||||||
|
# was configured as a Services ID with Sign In with Apple
|
||||||
|
# capability — see operator notes in README.md §5).
|
||||||
|
- id: apple
|
||||||
|
provider: apple
|
||||||
|
# Production bundle id. Apple issues id_tokens with
|
||||||
|
# `aud` = the requesting app's bundle id, so this is the
|
||||||
|
# primary audience Kratos verifies against.
|
||||||
|
client_id: com.myhoneydue.honeyDue
|
||||||
|
# Debug builds out of Xcode use a `.dev` bundle id (see
|
||||||
|
# iosApp/honeyDue.xcodeproj — Debug config). Their id_tokens
|
||||||
|
# therefore have `aud: com.myhoneydue.honeyDue.dev`, which
|
||||||
|
# the primary client_id check rejects. Whitelist the dev
|
||||||
|
# audience so Apple Sign In works from a non-Release Xcode
|
||||||
|
# build without per-build Kratos reconfiguration.
|
||||||
|
additional_id_token_audiences:
|
||||||
|
- com.myhoneydue.honeyDue.dev
|
||||||
|
apple_team_id: X86BR9WTLD
|
||||||
|
apple_private_key_id: HQD3NCF99C
|
||||||
|
mapper_url: file:///etc/kratos/oidc.apple.jsonnet
|
||||||
|
scope: [openid, email, name]
|
||||||
|
|
||||||
|
flows:
|
||||||
|
error:
|
||||||
|
ui_url: https://app.myhoneydue.com/auth/error
|
||||||
|
login:
|
||||||
|
ui_url: https://app.myhoneydue.com/auth/login
|
||||||
|
lifespan: 10m
|
||||||
|
registration:
|
||||||
|
ui_url: https://app.myhoneydue.com/auth/registration
|
||||||
|
lifespan: 10m
|
||||||
|
after:
|
||||||
|
password:
|
||||||
|
hooks:
|
||||||
|
- hook: session # auto-login after registration
|
||||||
|
oidc:
|
||||||
|
hooks:
|
||||||
|
- hook: session
|
||||||
|
verification:
|
||||||
|
enabled: true
|
||||||
|
ui_url: https://app.myhoneydue.com/auth/verification
|
||||||
|
use: code
|
||||||
|
after:
|
||||||
|
default_browser_return_url: https://app.myhoneydue.com/
|
||||||
|
recovery:
|
||||||
|
enabled: true
|
||||||
|
ui_url: https://app.myhoneydue.com/auth/recovery
|
||||||
|
use: code
|
||||||
|
settings:
|
||||||
|
ui_url: https://app.myhoneydue.com/auth/settings
|
||||||
|
privileged_session_max_age: 15m
|
||||||
|
logout:
|
||||||
|
after:
|
||||||
|
default_browser_return_url: https://app.myhoneydue.com/
|
||||||
|
|
||||||
|
log:
|
||||||
|
level: info
|
||||||
|
format: json
|
||||||
|
leak_sensitive_values: false
|
||||||
|
|
||||||
|
ciphers:
|
||||||
|
algorithm: xchacha20-poly1305
|
||||||
|
|
||||||
|
hashers:
|
||||||
|
algorithm: bcrypt
|
||||||
|
bcrypt:
|
||||||
|
cost: 12
|
||||||
|
|
||||||
|
identity:
|
||||||
|
default_schema_id: honeydue
|
||||||
|
schemas:
|
||||||
|
- id: honeydue
|
||||||
|
url: file:///etc/kratos/identity.schema.json
|
||||||
|
|
||||||
|
courier:
|
||||||
|
smtp:
|
||||||
|
from_address: noreply@myhoneydue.com
|
||||||
|
from_name: honeyDue
|
||||||
|
# connection_uri is injected via env COURIER_SMTP_CONNECTION_URI
|
||||||
|
|
||||||
|
session:
|
||||||
|
lifespan: 720h # 30-day sessions (mobile)
|
||||||
|
cookie:
|
||||||
|
domain: myhoneydue.com
|
||||||
|
same_site: Lax
|
||||||
|
|
||||||
|
identity.schema.json: |
|
||||||
|
{
|
||||||
|
"$id": "https://honeydue.app/identity.schema.json",
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"title": "honeyDue user",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"traits": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"email": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "email",
|
||||||
|
"title": "Email",
|
||||||
|
"minLength": 3,
|
||||||
|
"maxLength": 320,
|
||||||
|
"ory.sh/kratos": {
|
||||||
|
"credentials": {
|
||||||
|
"password": { "identifier": true },
|
||||||
|
"code": { "identifier": true, "via": "email" },
|
||||||
|
"totp": { "account_name": true }
|
||||||
|
},
|
||||||
|
"verification": { "via": "email" },
|
||||||
|
"recovery": { "via": "email" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "object",
|
||||||
|
"title": "Name",
|
||||||
|
"properties": {
|
||||||
|
"first": { "type": "string", "title": "First name", "maxLength": 100 },
|
||||||
|
"last": { "type": "string", "title": "Last name", "maxLength": 100 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["email"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
oidc.google.jsonnet: |
|
||||||
|
// Maps Google OIDC claims onto the honeyDue identity schema.
|
||||||
|
local claims = std.extVar('claims');
|
||||||
|
{
|
||||||
|
identity: {
|
||||||
|
traits: {
|
||||||
|
email: claims.email,
|
||||||
|
[if 'given_name' in claims || 'family_name' in claims then 'name']: {
|
||||||
|
first: if 'given_name' in claims then claims.given_name else '',
|
||||||
|
last: if 'family_name' in claims then claims.family_name else '',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
oidc.apple.jsonnet: |
|
||||||
|
// Maps Apple OIDC claims onto the honeyDue identity schema. Apple only
|
||||||
|
// returns the name on the very first authorization and not in the ID
|
||||||
|
// token claims, so only email is mapped here.
|
||||||
|
//
|
||||||
|
// Sign in with Apple emails are marked verified UNCONDITIONALLY: completing
|
||||||
|
// SIWA cryptographically proves the user controls that Apple ID, and Apple
|
||||||
|
// owns/verifies the (relay or real) email, so a 6-digit code would be
|
||||||
|
// redundant. We deliberately do NOT gate this on Apple's `email_verified`
|
||||||
|
// claim — Apple omits that claim on many authorizations (only sends it on
|
||||||
|
// the first grant), which made auto-verification random: sometimes verified,
|
||||||
|
// sometimes a surprise code prompt (observed 2026-06-03). Marking it
|
||||||
|
// verified on every SIWA makes the behaviour consistent: Apple users never
|
||||||
|
// see a code; password sign-ups still verify via the honeyDue API flow.
|
||||||
|
local claims = std.extVar('claims');
|
||||||
|
{
|
||||||
|
identity: {
|
||||||
|
traits: {
|
||||||
|
email: claims.email,
|
||||||
|
},
|
||||||
|
verified_addresses: std.prune([
|
||||||
|
if 'email' in claims then {
|
||||||
|
via: 'email',
|
||||||
|
value: claims.email,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
# Public ingress for Ory Kratos — auth.myhoneydue.com → Kratos public API :4433.
|
||||||
|
#
|
||||||
|
# Middlewares match the honeyDue API ingress (security-headers + rate-limit).
|
||||||
|
# The cloudflare-only middleware is intentionally NOT applied here: on this
|
||||||
|
# cluster, klipper-lb SNATs the source IP before Traefik sees it, so
|
||||||
|
# cloudflare-only's IP allowlist rejects every legitimate Cloudflare request
|
||||||
|
# (verified 2026-06-03 — iOS Apple Sign In failed silently because Kratos
|
||||||
|
# never received the request). The api ingress doesn't use cloudflare-only
|
||||||
|
# for the same reason. DDoS protection still rides on Cloudflare's edge.
|
||||||
|
#
|
||||||
|
# Kratos's self-service flows are multi-request, so the strict auth-rate-limit
|
||||||
|
# (5/min) is intentionally NOT used here — Kratos applies its own per-flow
|
||||||
|
# protections.
|
||||||
|
#
|
||||||
|
# OPERATOR: confirm the cloudflare-origin-cert TLS secret covers
|
||||||
|
# auth.myhoneydue.com (apex + wildcard origin cert), and add the
|
||||||
|
# auth.myhoneydue.com DNS record in Cloudflare (proxied) → cluster ingress.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: honeydue-auth
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||||
|
spec:
|
||||||
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- auth.myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
|
rules:
|
||||||
|
- host: auth.myhoneydue.com
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: kratos
|
||||||
|
port:
|
||||||
|
number: 4433
|
||||||
@@ -0,0 +1,208 @@
|
|||||||
|
# Ory Kratos — identity service for honeyDue.
|
||||||
|
#
|
||||||
|
# Deployed once the operator has completed the prerequisites in kratos/README.md
|
||||||
|
# (Neon `kratos` database, auth.myhoneydue.com DNS, Apple Sign In OIDC client,
|
||||||
|
# and the kratos-secrets Secret). Until then 03-deploy.sh skips the Kratos
|
||||||
|
# apply, so the existing stack is unaffected.
|
||||||
|
#
|
||||||
|
# IMAGE: pinned to oryd/kratos v26.2.0 (CalVer current stable as of 2026-06-03)
|
||||||
|
# with the linux/amd64 digest. The schema-migration Job is in migrate-job.yaml
|
||||||
|
# and runs before this Deployment rolls.
|
||||||
|
#
|
||||||
|
# OIDC: currently Apple-only (configmap.yaml providers[0]). Google was scoped
|
||||||
|
# out at deploy time; adding it later is additive — append to providers[] in
|
||||||
|
# configmap.yaml and add the matching CLIENT_SECRET env binding here.
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: kratos
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
strategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 0
|
||||||
|
maxSurge: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
automountServiceAccountToken: false
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: kratos
|
||||||
|
image: oryd/kratos:v26.2.0@sha256:92eedc292ff8e1a918ac442c88ed0abe44610c75121700963114549908a45ac3
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args:
|
||||||
|
- serve
|
||||||
|
- --config
|
||||||
|
- /etc/kratos/kratos.yml
|
||||||
|
- --watch-courier # send verification/recovery email in-process
|
||||||
|
ports:
|
||||||
|
- name: public
|
||||||
|
containerPort: 4433
|
||||||
|
- name: admin
|
||||||
|
containerPort: 4434
|
||||||
|
env:
|
||||||
|
# Kratos is configured natively via env vars; secrets come from
|
||||||
|
# the kratos-secrets Secret rather than the ConfigMap.
|
||||||
|
- name: DSN
|
||||||
|
valueFrom: { secretKeyRef: { name: kratos-secrets, key: dsn } }
|
||||||
|
- name: SECRETS_COOKIE
|
||||||
|
valueFrom: { secretKeyRef: { name: kratos-secrets, key: secrets_cookie } }
|
||||||
|
- name: SECRETS_CIPHER
|
||||||
|
valueFrom: { secretKeyRef: { name: kratos-secrets, key: secrets_cipher } }
|
||||||
|
- name: COURIER_SMTP_CONNECTION_URI
|
||||||
|
valueFrom: { secretKeyRef: { name: kratos-secrets, key: smtp_connection_uri } }
|
||||||
|
# OIDC provider secrets — index must match the providers list
|
||||||
|
# order in configmap.yaml. Apple-only for now (index 0).
|
||||||
|
- name: SELFSERVICE_METHODS_OIDC_CONFIG_PROVIDERS_0_APPLE_PRIVATE_KEY
|
||||||
|
valueFrom: { secretKeyRef: { name: kratos-secrets, key: apple_private_key } }
|
||||||
|
volumeMounts:
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/kratos
|
||||||
|
readOnly: true
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health/ready
|
||||||
|
port: 4434
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health/alive
|
||||||
|
port: 4434
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 30
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: "1"
|
||||||
|
memory: 512Mi
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: kratos-config
|
||||||
|
- name: tmp
|
||||||
|
emptyDir:
|
||||||
|
sizeLimit: 64Mi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: kratos
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
ports:
|
||||||
|
- name: public
|
||||||
|
port: 4433
|
||||||
|
targetPort: 4433
|
||||||
|
- name: admin
|
||||||
|
port: 4434
|
||||||
|
targetPort: 4434
|
||||||
|
---
|
||||||
|
# Ingress to Kratos. Traefik (the auth.myhoneydue.com IngressRoute) reaches
|
||||||
|
# only the public API :4433. The honeyDue api pods reach the public API :4433
|
||||||
|
# (session whoami) AND the admin API :4434 (identity deletion on account
|
||||||
|
# close). The admin API :4434 takes no other cluster ingress.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-ingress-to-kratos
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
ingress:
|
||||||
|
# Traefik ingress controller -> public API only.
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
ports:
|
||||||
|
- port: 4433
|
||||||
|
protocol: TCP
|
||||||
|
# honeyDue api pods -> public API (whoami) + admin API (identity deletion).
|
||||||
|
- from:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: api
|
||||||
|
ports:
|
||||||
|
- port: 4433
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4434
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# Kratos egress: DNS, the Neon Postgres database, SMTP, and HTTPS to the
|
||||||
|
# OIDC providers (Apple/Google token + JWKS endpoints).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-kratos
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
- to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# Neon Postgres (external)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
- 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 5432
|
||||||
|
protocol: TCP
|
||||||
|
# SMTP (Fastmail) + HTTPS to Apple/Google OIDC endpoints (external)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
- 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 465
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
# Ory Kratos schema migration — runs `kratos migrate sql` against the Kratos
|
||||||
|
# database before the Kratos Deployment rolls. 03-deploy.sh applies this,
|
||||||
|
# waits for completion, then applies kratos.yaml.
|
||||||
|
#
|
||||||
|
# IMAGE: pinned to oryd/kratos v26.2.0 (CalVer current stable as of 2026-06-03)
|
||||||
|
# with the linux/amd64 digest. Bump in sync with kratos.yaml's image.
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: kratos-migrate
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
backoffLimit: 0
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
restartPolicy: Never
|
||||||
|
automountServiceAccountToken: false
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: kratos-migrate
|
||||||
|
image: oryd/kratos:v26.2.0@sha256:92eedc292ff8e1a918ac442c88ed0abe44610c75121700963114549908a45ac3
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args: ["migrate", "sql", "-e", "--yes"]
|
||||||
|
env:
|
||||||
|
- name: DSN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: kratos-secrets
|
||||||
|
key: dsn
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 256Mi
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
# Kyverno image-signature verification policy (audit CODE-L5).
|
||||||
|
#
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# THIS MANIFEST IS NOT APPLIED BY 03-deploy.sh. It is intentionally outside
|
||||||
|
# the script's apply set. Applying it before the prerequisites are in place
|
||||||
|
# would block every honeydue Pod from scheduling. Operator steps:
|
||||||
|
#
|
||||||
|
# 1. Install Kyverno in the cluster (it is an admission controller):
|
||||||
|
# kubectl create -f https://github.com/kyverno/kyverno/releases/latest/download/install.yaml
|
||||||
|
# 2. Generate a cosign key pair and keep the private key safe:
|
||||||
|
# cosign generate-key-pair # -> cosign.key (PRIVATE) + cosign.pub
|
||||||
|
# Set COSIGN_KEY=cosign.key in the deploy environment so 03-deploy.sh
|
||||||
|
# signs images after pushing them (the signing step is already wired,
|
||||||
|
# guarded, into 03-deploy.sh).
|
||||||
|
# 3. Paste the contents of cosign.pub into the publicKeys block below.
|
||||||
|
# 4. Apply this policy: kubectl apply -f deploy-k3s/manifests/kyverno-verify-images.yaml
|
||||||
|
# 5. After confirming honeydue Pods still schedule, flip
|
||||||
|
# validationFailureAction from Audit to Enforce.
|
||||||
|
#
|
||||||
|
# Until then it is a documented, ready-to-use template — not active config.
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
apiVersion: kyverno.io/v1
|
||||||
|
kind: ClusterPolicy
|
||||||
|
metadata:
|
||||||
|
name: verify-honeydue-images
|
||||||
|
annotations:
|
||||||
|
policies.kyverno.io/title: Verify honeyDue image signatures
|
||||||
|
policies.kyverno.io/description: >-
|
||||||
|
Requires that honeyDue application images pulled into the honeydue
|
||||||
|
namespace carry a valid cosign signature made with the operator's key.
|
||||||
|
spec:
|
||||||
|
# Audit first — logs violations without blocking. Switch to Enforce once
|
||||||
|
# signing is confirmed working end to end.
|
||||||
|
validationFailureAction: Audit
|
||||||
|
background: false
|
||||||
|
webhookTimeoutSeconds: 30
|
||||||
|
rules:
|
||||||
|
- name: verify-gitea-image-signatures
|
||||||
|
match:
|
||||||
|
any:
|
||||||
|
- resources:
|
||||||
|
kinds:
|
||||||
|
- Pod
|
||||||
|
namespaces:
|
||||||
|
- honeydue
|
||||||
|
verifyImages:
|
||||||
|
# Only the images we build and sign. Public base images
|
||||||
|
# (redis, vmagent) are pinned by digest instead — see their manifests.
|
||||||
|
- imageReferences:
|
||||||
|
- "gitea.treytartt.com/admin/honeydue-api*"
|
||||||
|
- "gitea.treytartt.com/admin/honeydue-worker*"
|
||||||
|
- "gitea.treytartt.com/admin/honeydue-admin*"
|
||||||
|
- "gitea.treytartt.com/admin/honeydue-web*"
|
||||||
|
attestors:
|
||||||
|
- count: 1
|
||||||
|
entries:
|
||||||
|
- keys:
|
||||||
|
publicKeys: |-
|
||||||
|
-----BEGIN PUBLIC KEY-----
|
||||||
|
REPLACE_WITH_CONTENTS_OF_cosign.pub
|
||||||
|
-----END PUBLIC KEY-----
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
# One-shot migration Job. Runs goose against Neon's *direct* (non-pooler)
|
||||||
|
# endpoint, applies any pending migrations from /app/migrations (baked into
|
||||||
|
# the api image), exits.
|
||||||
|
#
|
||||||
|
# 03-deploy.sh deletes any prior Job, applies this one, waits for completion
|
||||||
|
# with `kubectl wait --for=condition=complete`, and rolls api/worker only
|
||||||
|
# after the Job succeeds. A Job failure aborts the whole deploy.
|
||||||
|
#
|
||||||
|
# We reuse the api image rather than build a separate one — the api Dockerfile
|
||||||
|
# already installs the goose CLI to /usr/local/bin/goose and copies the
|
||||||
|
# migrations directory to /app/migrations.
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: honeydue-migrate
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: migrate
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
backoffLimit: 0 # fail fast — no silent retries on a bad migration
|
||||||
|
ttlSecondsAfterFinished: 86400 # keep finished Job for 24h so logs are inspectable
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: migrate
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
restartPolicy: Never
|
||||||
|
# The migrate Job never calls the k8s API (audit F11).
|
||||||
|
automountServiceAccountToken: false
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: gitea-credentials
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
runAsGroup: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: goose
|
||||||
|
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh — same as api
|
||||||
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
# DB_HOST in the ConfigMap points at the -pooler endpoint for runtime.
|
||||||
|
# goose's session-scoped advisory lock can't survive PgBouncer
|
||||||
|
# transaction-mode, so we strip the -pooler segment for migrations.
|
||||||
|
# `set -e` so any sub-command failure exits non-zero.
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
DIRECT_HOST=$(echo "$DB_HOST" | sed 's/-pooler\.\(.*\)$/.\1/')
|
||||||
|
echo "[migrate] running goose up against $DIRECT_HOST"
|
||||||
|
exec /usr/local/bin/goose \
|
||||||
|
-dir /app/migrations \
|
||||||
|
postgres "host=$DIRECT_HOST port=$DB_PORT user=$POSTGRES_USER password=$POSTGRES_PASSWORD dbname=$POSTGRES_DB sslmode=$DB_SSLMODE" \
|
||||||
|
up
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: honeydue-config
|
||||||
|
env:
|
||||||
|
- name: POSTGRES_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: honeydue-secrets
|
||||||
|
key: POSTGRES_PASSWORD
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 256Mi
|
||||||
@@ -47,10 +47,19 @@ spec:
|
|||||||
policyTypes:
|
policyTypes:
|
||||||
- Ingress
|
- Ingress
|
||||||
ingress:
|
ingress:
|
||||||
|
# Traefik runs as DaemonSet with hostNetwork=true, so traffic from it
|
||||||
|
# arrives with the NODE IP as source (not a pod IP). The node pod CIDR
|
||||||
|
# 10.42.0.0/16 covers any intra-cluster caller; the three node IPs
|
||||||
|
# cover Traefik on hostNetwork.
|
||||||
- from:
|
- from:
|
||||||
- namespaceSelector:
|
- ipBlock:
|
||||||
matchLabels:
|
cidr: 178.105.32.198/32 # ubuntu-8gb-nbg1-1
|
||||||
kubernetes.io/metadata.name: kube-system
|
- ipBlock:
|
||||||
|
cidr: 178.104.247.152/32 # ubuntu-8gb-nbg1-2
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.249.189/32 # ubuntu-8gb-nbg1-3
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16 # cluster pod CIDR
|
||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 8000
|
port: 8000
|
||||||
@@ -69,10 +78,17 @@ spec:
|
|||||||
policyTypes:
|
policyTypes:
|
||||||
- Ingress
|
- Ingress
|
||||||
ingress:
|
ingress:
|
||||||
|
# Traefik runs as DaemonSet with hostNetwork=true — see allow-ingress-to-api
|
||||||
|
# for the rationale. Same ipBlock list.
|
||||||
- from:
|
- from:
|
||||||
- namespaceSelector:
|
- ipBlock:
|
||||||
matchLabels:
|
cidr: 178.105.32.198/32
|
||||||
kubernetes.io/metadata.name: kube-system
|
- ipBlock:
|
||||||
|
cidr: 178.104.247.152/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.249.189/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 3000
|
port: 3000
|
||||||
@@ -124,6 +140,20 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 6379
|
port: 6379
|
||||||
|
# Kratos (in-cluster). The auth middleware validates every session via
|
||||||
|
# http://kratos:4433/sessions/whoami; the AuthService also uses :4434
|
||||||
|
# for account deletion (DELETE /admin/identities/{id}). k3s evaluates
|
||||||
|
# egress rules AFTER kube-proxy DNAT (runbook §9.2), so this podSelector
|
||||||
|
# rule covers Service ClusterIP traffic correctly.
|
||||||
|
- to:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: kratos
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 4433
|
||||||
|
- protocol: TCP
|
||||||
|
port: 4434
|
||||||
# External services: Neon DB (5432), SMTP (587), HTTPS (443 — APNs, FCM, B2, PostHog)
|
# External services: Neon DB (5432), SMTP (587), HTTPS (443 — APNs, FCM, B2, PostHog)
|
||||||
- to:
|
- to:
|
||||||
- ipBlock:
|
- ipBlock:
|
||||||
@@ -200,3 +230,213 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 8000
|
port: 8000
|
||||||
|
|
||||||
|
---
|
||||||
|
# --- Web: allow ingress from Traefik (kube-system namespace) ---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-ingress-to-web
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
ingress:
|
||||||
|
# Traefik runs as DaemonSet with hostNetwork=true — see allow-ingress-to-api
|
||||||
|
# for the rationale. Same ipBlock list.
|
||||||
|
- from:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.105.32.198/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.247.152/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.249.189/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 3000
|
||||||
|
|
||||||
|
---
|
||||||
|
# --- Web: allow egress for the Next.js server-side proxy routes ---
|
||||||
|
# Browser → app.myhoneydue.com → web pod (Node.js) → api.myhoneydue.com
|
||||||
|
# The web pod resolves api.myhoneydue.com via public DNS and hits
|
||||||
|
# Cloudflare (143.). We don't know which CF IP yet at policy time, so
|
||||||
|
# allow HTTPS to public ipBlock (except private CIDRs).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-web
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: web
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
# HTTPS to public (api.myhoneydue.com via CF, PostHog, any other remote)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.0.0.0/8
|
||||||
|
- 172.16.0.0/12
|
||||||
|
- 192.168.0.0/16
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 443
|
||||||
|
|
||||||
|
---
|
||||||
|
# vmagent egress.
|
||||||
|
#
|
||||||
|
# IMPORTANT (gotcha): k3s's built-in NetworkPolicy controller appears to
|
||||||
|
# evaluate egress rules AFTER kube-proxy's DNAT, not before (contrary to
|
||||||
|
# the k8s spec). So traffic from a pod to the kubernetes Service
|
||||||
|
# (ClusterIP 10.43.0.1:443) is policy-checked as dst=<node_public_ip>:6443.
|
||||||
|
# That's why we need an explicit rule for :6443 to public IPs, even though
|
||||||
|
# we already allow :443 to the cluster service CIDR.
|
||||||
|
#
|
||||||
|
# Without the :6443 rule, vmagent's k8s service discovery silently fails
|
||||||
|
# and zero pods get scraped. See deploy-k3s/RUNBOOK.md ("vmagent SD broken").
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-vmagent
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
# DNS (cluster-internal)
|
||||||
|
- to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# k8s API server via ClusterIP (pre-DNAT view)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
# k8s API server post-DNAT (real path k3s NetPol enforcer sees) — REQUIRED
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# Scrape api Pods on :8000
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 8000
|
||||||
|
protocol: TCP
|
||||||
|
# Scrape kube-state-metrics Pod on :8080 (pod CIDR)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# HTTPS to public (remote-write to obs.88oakapps.com via Cloudflare)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
- 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
|
||||||
|
---
|
||||||
|
# Allow vmagent → api ingress on :8000 so api pods accept scrapes.
|
||||||
|
# api Pods are otherwise locked down by default-deny-all + allow-ingress-to-api
|
||||||
|
# (which only allows Traefik). This adds vmagent specifically.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-vmagent-to-api
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: api
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
ports:
|
||||||
|
- port: 8000
|
||||||
|
protocol: TCP
|
||||||
|
|
||||||
|
---
|
||||||
|
# alloy-logs egress — Grafana Alloy discovers honeydue pods via the k8s API
|
||||||
|
# and pushes their logs to Loki at obs.88oakapps.com. Same k3s NetworkPolicy
|
||||||
|
# DNAT gotcha as vmagent: API-server traffic is policy-checked as
|
||||||
|
# dst=<node_public_ip>:6443, so an explicit :6443 rule is required.
|
||||||
|
# Alloy reads log FILES from a hostPath, so it needs no ingress and no
|
||||||
|
# egress to pod :8000/:8080 — only DNS, the API server, and obs HTTPS.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
# DNS (cluster-internal)
|
||||||
|
- to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# k8s API server via ClusterIP (pre-DNAT view)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
# k8s API server post-DNAT (real path k3s NetPol enforcer sees) — REQUIRED
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# HTTPS to public (log push to obs.88oakapps.com via Cloudflare)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
- 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
|||||||
@@ -0,0 +1,278 @@
|
|||||||
|
# honeyDue log shipper — Grafana Alloy as a DaemonSet.
|
||||||
|
#
|
||||||
|
# Each node runs one Alloy pod that tails the honeydue-namespace pod logs in
|
||||||
|
# /var/log/pods and pushes them to Loki at obs.88oakapps.com/loki/api/v1/push
|
||||||
|
# (the same nginx ingest endpoint + bearer token vmagent uses for metrics).
|
||||||
|
#
|
||||||
|
# Runs as root: /var/log/pods is 0750 root:root on the k3s nodes, so a
|
||||||
|
# non-root uid cannot even traverse it. The container is otherwise locked
|
||||||
|
# down — all capabilities dropped, read-only root filesystem, seccomp
|
||||||
|
# RuntimeDefault — and root inside the container reads only a read-only
|
||||||
|
# hostPath mount of /var/log/pods. This is the one root-running workload in
|
||||||
|
# the namespace (standard for log collectors); see docs/deployment.
|
||||||
|
#
|
||||||
|
# 03-deploy.sh substitutes TOKEN_PLACEHOLDER with OBS_INGEST_TOKEN from
|
||||||
|
# deploy/prod.env before applying — the token never lands in the repo.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
---
|
||||||
|
# Least privilege: Alloy's discovery.kubernetes only lists/watches pods, and
|
||||||
|
# only in the honeydue namespace — so a namespaced Role, not a ClusterRole.
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: alloy-logs
|
||||||
|
---
|
||||||
|
# Bearer token for the Loki push endpoint. TOKEN_PLACEHOLDER is replaced by
|
||||||
|
# 03-deploy.sh with OBS_INGEST_TOKEN (same token vmagent uses).
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs-auth
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
bearer_token: TOKEN_PLACEHOLDER
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
data:
|
||||||
|
config.alloy: |
|
||||||
|
// honeyDue log shipper. Each DaemonSet instance discovers honeydue-namespace
|
||||||
|
// pods via the Kubernetes API, tails the container log files present on its
|
||||||
|
// own node (/var/log/pods), and pushes them to Loki at obs.88oakapps.com.
|
||||||
|
|
||||||
|
logging {
|
||||||
|
level = "warn"
|
||||||
|
format = "logfmt"
|
||||||
|
}
|
||||||
|
|
||||||
|
discovery.kubernetes "pods" {
|
||||||
|
role = "pod"
|
||||||
|
namespaces {
|
||||||
|
names = ["honeydue"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn pod metadata into Loki labels and build the on-disk log path.
|
||||||
|
discovery.relabel "pod_logs" {
|
||||||
|
targets = discovery.kubernetes.pods.targets
|
||||||
|
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_namespace"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "namespace"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "pod"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_container_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "container"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "app"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_node_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "node"
|
||||||
|
}
|
||||||
|
// /var/log/pods/<namespace>_<pod>_<uid>/<container>/<n>.log
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
|
||||||
|
separator = "/"
|
||||||
|
action = "replace"
|
||||||
|
replacement = "/var/log/pods/*$1/*.log"
|
||||||
|
target_label = "__path__"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
local.file_match "pod_logs" {
|
||||||
|
path_targets = discovery.relabel.pod_logs.output
|
||||||
|
}
|
||||||
|
|
||||||
|
loki.source.file "pod_logs" {
|
||||||
|
targets = local.file_match.pod_logs.targets
|
||||||
|
forward_to = [loki.process.pod_logs.receiver]
|
||||||
|
// With no stored read offset (fresh node, or positions wiped), start
|
||||||
|
// at the END of each file instead of re-shipping history — otherwise
|
||||||
|
// Loki rejects the now-too-old entries ("entry too far behind") and
|
||||||
|
// shipping stalls. Offsets persist on a hostPath (see volumes), so a
|
||||||
|
// normal pod restart resumes exactly where it left off.
|
||||||
|
tail_from_end = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the CRI log format (timestamp / stream / flags / message),
|
||||||
|
// then drop probe/scrape noise before shipping.
|
||||||
|
loki.process "pod_logs" {
|
||||||
|
forward_to = [loki.write.obs.receiver]
|
||||||
|
|
||||||
|
stage.cri {}
|
||||||
|
|
||||||
|
// Drop successful probe/scrape access logs. k8s liveness/readiness
|
||||||
|
// hits /api/health/ every few seconds and vmagent scrapes /metrics
|
||||||
|
// on a 15s interval — all 2xx, pure noise that drowns real logs.
|
||||||
|
// A non-2xx health check, or one logged above info level, does NOT
|
||||||
|
// match this regex and is kept.
|
||||||
|
stage.drop {
|
||||||
|
expression = "\"level\":\"info\".*\"path\":\"/(api/health/?|metrics)\".*\"status\":2[0-9][0-9]"
|
||||||
|
drop_counter_reason = "probe_access_ok"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
loki.write "obs" {
|
||||||
|
endpoint {
|
||||||
|
url = "https://obs.88oakapps.com/loki/api/v1/push"
|
||||||
|
bearer_token_file = "/etc/alloy-secrets/bearer_token"
|
||||||
|
}
|
||||||
|
external_labels = {
|
||||||
|
cluster = "honeydue-k3s",
|
||||||
|
environment = "prod",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
serviceAccountName: alloy-logs
|
||||||
|
# Alloy needs its SA token — discovery.kubernetes talks to the API server.
|
||||||
|
automountServiceAccountToken: true
|
||||||
|
# Root is required to traverse /var/log/pods (0750 root:root). The
|
||||||
|
# container is otherwise fully confined (see container securityContext).
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
runAsGroup: 0
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
tolerations:
|
||||||
|
# DaemonSet must run on every node, including any control-plane taint.
|
||||||
|
- key: node-role.kubernetes.io/control-plane
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
containers:
|
||||||
|
- name: alloy
|
||||||
|
image: grafana/alloy:v1.5.1@sha256:01a63f4e032ce54ee94b22049bc27f597e74f85566478c377f4b5c7f020c1eb3
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args:
|
||||||
|
- run
|
||||||
|
- /etc/alloy/config.alloy
|
||||||
|
- --storage.path=/tmp/alloy
|
||||||
|
- --server.http.listen-addr=0.0.0.0:12345
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 12345
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/alloy
|
||||||
|
readOnly: true
|
||||||
|
- name: auth
|
||||||
|
mountPath: /etc/alloy-secrets
|
||||||
|
readOnly: true
|
||||||
|
- name: varlogpods
|
||||||
|
mountPath: /var/log/pods
|
||||||
|
readOnly: true
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp/alloy
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/ready
|
||||||
|
port: 12345
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 20
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 150m
|
||||||
|
memory: 256Mi
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: alloy-logs
|
||||||
|
- name: auth
|
||||||
|
secret:
|
||||||
|
secretName: alloy-logs-auth
|
||||||
|
defaultMode: 0400
|
||||||
|
- name: varlogpods
|
||||||
|
hostPath:
|
||||||
|
path: /var/log/pods
|
||||||
|
type: Directory
|
||||||
|
# Alloy's positions/WAL store. A hostPath (not emptyDir) so file read
|
||||||
|
# offsets survive pod restarts — otherwise every restart re-reads log
|
||||||
|
# files from the start and Loki rejects the now-too-old entries.
|
||||||
|
- name: tmp
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/honeydue-alloy-logs
|
||||||
|
type: DirectoryOrCreate
|
||||||
@@ -0,0 +1,223 @@
|
|||||||
|
# kube-state-metrics — exposes cluster object state (pods, deployments,
|
||||||
|
# services, etc.) as Prometheus metrics. vmagent scrapes it via the api
|
||||||
|
# group defined in vmagent-config; Grafana panels that count pods,
|
||||||
|
# replicas, etc. consume the `kube_*` metrics this produces.
|
||||||
|
#
|
||||||
|
# Lives in kube-system because it watches resources cluster-wide.
|
||||||
|
# RBAC is cluster-scoped (ClusterRole + ClusterRoleBinding).
|
||||||
|
#
|
||||||
|
# Image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.13.0
|
||||||
|
# (latest stable as of authoring; bump when a newer minor is released)
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: kube-state-metrics
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: honeydue-observability
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: kube-state-metrics
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: honeydue-observability
|
||||||
|
rules:
|
||||||
|
# Core resources
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- secrets
|
||||||
|
- nodes
|
||||||
|
- pods
|
||||||
|
- services
|
||||||
|
- serviceaccounts
|
||||||
|
- resourcequotas
|
||||||
|
- replicationcontrollers
|
||||||
|
- limitranges
|
||||||
|
- persistentvolumeclaims
|
||||||
|
- persistentvolumes
|
||||||
|
- namespaces
|
||||||
|
- endpoints
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Apps
|
||||||
|
- apiGroups: ["apps"]
|
||||||
|
resources:
|
||||||
|
- statefulsets
|
||||||
|
- daemonsets
|
||||||
|
- deployments
|
||||||
|
- replicasets
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Batch
|
||||||
|
- apiGroups: ["batch"]
|
||||||
|
resources:
|
||||||
|
- cronjobs
|
||||||
|
- jobs
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Autoscaling
|
||||||
|
- apiGroups: ["autoscaling"]
|
||||||
|
resources:
|
||||||
|
- horizontalpodautoscalers
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Authentication / authorization (used by some ksm collectors)
|
||||||
|
- apiGroups: ["authentication.k8s.io"]
|
||||||
|
resources: [tokenreviews]
|
||||||
|
verbs: [create]
|
||||||
|
- apiGroups: ["authorization.k8s.io"]
|
||||||
|
resources: [subjectaccessreviews]
|
||||||
|
verbs: [create]
|
||||||
|
# Policy
|
||||||
|
- apiGroups: ["policy"]
|
||||||
|
resources: [poddisruptionbudgets]
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Certificate signing
|
||||||
|
- apiGroups: ["certificates.k8s.io"]
|
||||||
|
resources: [certificatesigningrequests]
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Discovery
|
||||||
|
- apiGroups: ["discovery.k8s.io"]
|
||||||
|
resources: [endpointslices]
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Storage
|
||||||
|
- apiGroups: ["storage.k8s.io"]
|
||||||
|
resources:
|
||||||
|
- storageclasses
|
||||||
|
- volumeattachments
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Admission policy
|
||||||
|
- apiGroups: ["admissionregistration.k8s.io"]
|
||||||
|
resources:
|
||||||
|
- mutatingwebhookconfigurations
|
||||||
|
- validatingwebhookconfigurations
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Networking
|
||||||
|
- apiGroups: ["networking.k8s.io"]
|
||||||
|
resources:
|
||||||
|
- networkpolicies
|
||||||
|
- ingressclasses
|
||||||
|
- ingresses
|
||||||
|
verbs: [list, watch]
|
||||||
|
# Coordination (leader election)
|
||||||
|
- apiGroups: ["coordination.k8s.io"]
|
||||||
|
resources: [leases]
|
||||||
|
verbs: [list, watch]
|
||||||
|
# RBAC
|
||||||
|
- apiGroups: ["rbac.authorization.k8s.io"]
|
||||||
|
resources:
|
||||||
|
- clusterrolebindings
|
||||||
|
- clusterroles
|
||||||
|
- rolebindings
|
||||||
|
- roles
|
||||||
|
verbs: [list, watch]
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: kube-state-metrics
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: honeydue-observability
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: kube-state-metrics
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: kube-state-metrics
|
||||||
|
namespace: kube-system
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: kube-state-metrics
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: honeydue-observability
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
ports:
|
||||||
|
- name: http-metrics
|
||||||
|
port: 8080
|
||||||
|
targetPort: http-metrics
|
||||||
|
protocol: TCP
|
||||||
|
- name: telemetry
|
||||||
|
port: 8081
|
||||||
|
targetPort: telemetry
|
||||||
|
protocol: TCP
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: kube-state-metrics
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: honeydue-observability
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
|
app.kubernetes.io/part-of: honeydue-observability
|
||||||
|
spec:
|
||||||
|
serviceAccountName: kube-state-metrics
|
||||||
|
automountServiceAccountToken: true
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 65534
|
||||||
|
fsGroup: 65534
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: kube-state-metrics
|
||||||
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.13.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http-metrics
|
||||||
|
- containerPort: 8081
|
||||||
|
name: telemetry
|
||||||
|
args:
|
||||||
|
- --port=8080
|
||||||
|
- --telemetry-port=8081
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 256Mi
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop: [ALL]
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /livez
|
||||||
|
port: http-metrics
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /readyz
|
||||||
|
port: http-metrics
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
# node-exporter — per-node host metrics (filesystem, memory, load, CPU).
|
||||||
|
# Runs as a normal pod (NOT hostNetwork) so vmagent scrapes it pod-to-pod over
|
||||||
|
# the cluster CIDR, avoiding any dependency on node public IPs (the netpol
|
||||||
|
# node-IP list is OVH-stale). Host /proc, /sys and / are bind-mounted read-only
|
||||||
|
# so the filesystem/memory/load collectors read the real host, not the pod ns.
|
||||||
|
# Added 2026-06-08 to close RUNBOOK §11.1 gap #9 (node disk/mem were unmonitored).
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: node-exporter
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
# Run on every node, including any tainted control-plane nodes.
|
||||||
|
tolerations:
|
||||||
|
- operator: Exists
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 65534
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: node-exporter
|
||||||
|
image: quay.io/prometheus/node-exporter:v1.8.2 # TODO digest-pin (audit K3S-F14)
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args:
|
||||||
|
- --path.procfs=/host/proc
|
||||||
|
- --path.sysfs=/host/sys
|
||||||
|
- --path.rootfs=/host/root
|
||||||
|
# Only report real host mounts; drop the kubelet/container churn.
|
||||||
|
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/kubelet/.+|var/lib/docker/.+|var/lib/containerd/.+)($|/)
|
||||||
|
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
|
||||||
|
- --no-collector.wifi
|
||||||
|
- --no-collector.hwmon
|
||||||
|
- --web.listen-address=:9100
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 9100
|
||||||
|
protocol: TCP
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 30m
|
||||||
|
memory: 32Mi
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 128Mi
|
||||||
|
volumeMounts:
|
||||||
|
- name: proc
|
||||||
|
mountPath: /host/proc
|
||||||
|
readOnly: true
|
||||||
|
- name: sys
|
||||||
|
mountPath: /host/sys
|
||||||
|
readOnly: true
|
||||||
|
- name: root
|
||||||
|
mountPath: /host/root
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: proc
|
||||||
|
hostPath:
|
||||||
|
path: /proc
|
||||||
|
- name: sys
|
||||||
|
hostPath:
|
||||||
|
path: /sys
|
||||||
|
- name: root
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
---
|
||||||
|
# default-deny-all blocks ingress; allow vmagent to scrape :9100.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-ingress-to-node-exporter
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
ports:
|
||||||
|
- port: 9100
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# vmagent's existing egress policy only opens :8000/:8080 to the pod CIDR.
|
||||||
|
# Additive policy (NetworkPolicies are OR'd) opening :9100 for the node-exporter
|
||||||
|
# scrape — leaves the working allow-egress-from-vmagent policy untouched.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-vmagent-to-node-exporter
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 9100
|
||||||
|
protocol: TCP
|
||||||
@@ -0,0 +1,289 @@
|
|||||||
|
# vmagent — scrapes Prometheus /metrics from in-cluster services and
|
||||||
|
# remote-writes them to https://obs.88oakapps.com/api/v1/write
|
||||||
|
# (VictoriaMetrics on 88oakappsUpdate, fronted by Cloudflare + nginx
|
||||||
|
# bearer-token auth). Single replica is fine — vmagent buffers locally
|
||||||
|
# during transient remote outages.
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: vmagent-config
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
data:
|
||||||
|
scrape.yaml: |
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
external_labels:
|
||||||
|
cluster: honeydue-k3s
|
||||||
|
environment: prod
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
# honeyDue Go API — exposes /metrics on :8000
|
||||||
|
- job_name: api
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: pod
|
||||||
|
namespaces:
|
||||||
|
names: [honeydue]
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
||||||
|
action: keep
|
||||||
|
regex: api
|
||||||
|
- source_labels: [__meta_kubernetes_pod_container_port_number]
|
||||||
|
action: keep
|
||||||
|
regex: "8000"
|
||||||
|
- source_labels: [__meta_kubernetes_pod_name]
|
||||||
|
target_label: pod
|
||||||
|
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||||
|
target_label: node
|
||||||
|
- target_label: service
|
||||||
|
replacement: api
|
||||||
|
|
||||||
|
# kube-state-metrics — cluster object state (kube_pod_*, kube_deployment_*,
|
||||||
|
# etc.) needed for Grafana panels that count pods/replicas/etc.
|
||||||
|
- job_name: kube-state-metrics
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: endpoints
|
||||||
|
namespaces:
|
||||||
|
names: [kube-system]
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name]
|
||||||
|
action: keep
|
||||||
|
regex: kube-state-metrics
|
||||||
|
- source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||||
|
action: keep
|
||||||
|
regex: http-metrics
|
||||||
|
|
||||||
|
# node-exporter — per-node host metrics (node_filesystem_*, node_memory_*,
|
||||||
|
# node_load*). Pod-networked DaemonSet scraped on :9100 over the pod CIDR.
|
||||||
|
- job_name: node-exporter
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: pod
|
||||||
|
namespaces:
|
||||||
|
names: [honeydue]
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
||||||
|
action: keep
|
||||||
|
regex: node-exporter
|
||||||
|
- source_labels: [__meta_kubernetes_pod_container_port_number]
|
||||||
|
action: keep
|
||||||
|
regex: "9100"
|
||||||
|
- source_labels: [__meta_kubernetes_pod_name]
|
||||||
|
target_label: pod
|
||||||
|
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||||
|
target_label: node
|
||||||
|
- target_label: service
|
||||||
|
replacement: node-exporter
|
||||||
|
|
||||||
|
# honeyDue worker — exposes /metrics on :6060 (apns/fcm/asynq/cache series).
|
||||||
|
- job_name: worker
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: pod
|
||||||
|
namespaces:
|
||||||
|
names: [honeydue]
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
||||||
|
action: keep
|
||||||
|
regex: worker
|
||||||
|
- source_labels: [__meta_kubernetes_pod_container_port_number]
|
||||||
|
action: keep
|
||||||
|
regex: "6060"
|
||||||
|
- source_labels: [__meta_kubernetes_pod_name]
|
||||||
|
target_label: pod
|
||||||
|
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||||
|
target_label: node
|
||||||
|
- target_label: service
|
||||||
|
replacement: worker
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: vmagent-remote-write
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
# Bearer token for obs.88oakapps.com. Provisioned at deploy time from
|
||||||
|
# deploy/prod.env (OBS_INGEST_TOKEN). The cluster-side token must match
|
||||||
|
# the token in /etc/honeydue-obs/ingest_token on 88oakappsUpdate.
|
||||||
|
bearer_token: TOKEN_PLACEHOLDER
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: vmagent
|
||||||
|
namespace: honeydue
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: [pods, services, endpoints]
|
||||||
|
verbs: [get, list, watch]
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: vmagent
|
||||||
|
namespace: honeydue
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: vmagent
|
||||||
|
namespace: honeydue
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: vmagent
|
||||||
|
namespace: honeydue
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: vmagent
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|
||||||
|
---
|
||||||
|
# Allow vmagent to discover the kube-state-metrics Service/Endpoints in
|
||||||
|
# kube-system so the kube-state-metrics scrape job can find its target.
|
||||||
|
# Cross-namespace SD needs an explicit RoleBinding here.
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: vmagent-kube-system
|
||||||
|
namespace: kube-system
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: [services, endpoints, pods]
|
||||||
|
verbs: [get, list, watch]
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: vmagent-kube-system
|
||||||
|
namespace: kube-system
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: vmagent
|
||||||
|
namespace: honeydue
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: vmagent-kube-system
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: vmagent
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
serviceAccountName: vmagent
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
fsGroup: 1000
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
containers:
|
||||||
|
- name: vmagent
|
||||||
|
# Pinned by digest (audit K3S-F14).
|
||||||
|
image: victoriametrics/vmagent:v1.106.1@sha256:90208a667c0baf65f7536b92a84c40b6e35ffe8e88bda7e4447b97b06c6ba6b8
|
||||||
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit
|
||||||
|
# Container-level hardening (audit F7) — matches the other 5
|
||||||
|
# workloads. vmagent only writes to the /tmp/vmagent emptyDir
|
||||||
|
# (its remoteWrite buffer), so a read-only root filesystem holds.
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
args:
|
||||||
|
- "-promscrape.config=/etc/vmagent/scrape.yaml"
|
||||||
|
- "-remoteWrite.url=https://obs.88oakapps.com/api/v1/write"
|
||||||
|
- "-remoteWrite.bearerTokenFile=/etc/vmagent-secrets/bearer_token"
|
||||||
|
- "-remoteWrite.tmpDataPath=/tmp/vmagent"
|
||||||
|
- "-remoteWrite.maxDiskUsagePerURL=512MB"
|
||||||
|
- "-loggerLevel=INFO"
|
||||||
|
ports:
|
||||||
|
- containerPort: 8429
|
||||||
|
name: http
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 256Mi
|
||||||
|
volumeMounts:
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/vmagent
|
||||||
|
readOnly: true
|
||||||
|
- name: secrets
|
||||||
|
mountPath: /etc/vmagent-secrets
|
||||||
|
readOnly: true
|
||||||
|
- name: buffer
|
||||||
|
mountPath: /tmp/vmagent
|
||||||
|
# Process startup gate. /-/healthy returns 200 once vmagent has
|
||||||
|
# parsed config — gives the agent up to 2 min to come up before
|
||||||
|
# liveness starts evaluating.
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/healthy
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 24
|
||||||
|
# Real liveness check: are scrapes actually succeeding?
|
||||||
|
# /-/healthy was the old probe and returned 200 for 17 days even
|
||||||
|
# while vmagent had zero healthy targets (stale k8s SD watch).
|
||||||
|
# This exec probe queries vmagent's own targets API and fails if
|
||||||
|
# NO target is in state "up". Three consecutive failures (3 min)
|
||||||
|
# → kubelet kills the pod → fresh SD watch.
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- 'n=$(wget -qO- -T 4 http://localhost:8429/api/v1/targets 2>/dev/null | grep -c ''"health":"up"''); [ "$n" -gt 0 ]'
|
||||||
|
initialDelaySeconds: 180
|
||||||
|
periodSeconds: 120
|
||||||
|
timeoutSeconds: 5
|
||||||
|
failureThreshold: 5
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/healthy
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: vmagent-config
|
||||||
|
- name: secrets
|
||||||
|
secret:
|
||||||
|
secretName: vmagent-remote-write
|
||||||
|
defaultMode: 0400
|
||||||
|
- name: buffer
|
||||||
|
emptyDir:
|
||||||
|
sizeLimit: 512Mi
|
||||||
@@ -20,6 +20,9 @@ spec:
|
|||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: redis
|
serviceAccountName: redis
|
||||||
|
# Explicit pod-level opt-out (audit F11) — defense-in-depth on top of
|
||||||
|
# the ServiceAccount-level setting in rbac.yaml.
|
||||||
|
automountServiceAccountToken: false
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
honeydue/redis: "true"
|
honeydue/redis: "true"
|
||||||
securityContext:
|
securityContext:
|
||||||
@@ -31,12 +34,18 @@ spec:
|
|||||||
type: RuntimeDefault
|
type: RuntimeDefault
|
||||||
containers:
|
containers:
|
||||||
- name: redis
|
- name: redis
|
||||||
image: redis:7-alpine
|
# Pinned by digest (audit K3S-F14) — redis:7-alpine is 7.4.9-alpine.
|
||||||
|
image: redis:7-alpine@sha256:6ab0b6e7381779332f97b8ca76193e45b0756f38d4c0dcda72dbb3c32061ab99
|
||||||
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
- |
|
- |
|
||||||
ARGS="--appendonly yes --appendfsync everysec --maxmemory 256mb --maxmemory-policy noeviction"
|
# allkeys-lru: under memory pressure, evict the least-recently-used key.
|
||||||
|
# honeyDue uses Redis as a cache + asynq queue. The cache layer falls
|
||||||
|
# through to DB on miss, so eviction is graceful. asynq keys with TTLs
|
||||||
|
# would be evicted only after older cache entries are gone.
|
||||||
|
ARGS="--appendonly yes --appendfsync everysec --maxmemory 256mb --maxmemory-policy allkeys-lru"
|
||||||
if [ -n "$REDIS_PASSWORD" ]; then
|
if [ -n "$REDIS_PASSWORD" ]; then
|
||||||
ARGS="$ARGS --requirepass $REDIS_PASSWORD"
|
ARGS="$ARGS --requirepass $REDIS_PASSWORD"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -23,8 +23,11 @@ spec:
|
|||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: web
|
serviceAccountName: web
|
||||||
|
# Explicit pod-level opt-out (audit F11) — defense-in-depth on top of
|
||||||
|
# the ServiceAccount-level setting in rbac.yaml.
|
||||||
|
automountServiceAccountToken: false
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
- name: ghcr-credentials
|
- name: gitea-credentials
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
runAsUser: 1001
|
runAsUser: 1001
|
||||||
@@ -43,6 +46,7 @@ spec:
|
|||||||
containers:
|
containers:
|
||||||
- name: web
|
- name: web
|
||||||
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh or manual sed
|
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh or manual sed
|
||||||
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit; images are SHA/digest-pinned
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 3000
|
- containerPort: 3000
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
|
|||||||
@@ -27,8 +27,11 @@ spec:
|
|||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: worker
|
serviceAccountName: worker
|
||||||
|
# Explicit pod-level opt-out (audit F11) — defense-in-depth on top of
|
||||||
|
# the ServiceAccount-level setting in rbac.yaml.
|
||||||
|
automountServiceAccountToken: false
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
- name: ghcr-credentials
|
- name: gitea-credentials
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
runAsUser: 1000
|
runAsUser: 1000
|
||||||
@@ -39,6 +42,12 @@ spec:
|
|||||||
containers:
|
containers:
|
||||||
- name: worker
|
- name: worker
|
||||||
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh
|
image: IMAGE_PLACEHOLDER # Replaced by 03-deploy.sh
|
||||||
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit; images are SHA/digest-pinned
|
||||||
|
ports:
|
||||||
|
# health + Prometheus /metrics (in-cluster only; scraped by vmagent)
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 6060
|
||||||
|
protocol: TCP
|
||||||
securityContext:
|
securityContext:
|
||||||
allowPrivilegeEscalation: false
|
allowPrivilegeEscalation: false
|
||||||
readOnlyRootFilesystem: true
|
readOnlyRootFilesystem: true
|
||||||
@@ -47,34 +56,16 @@ spec:
|
|||||||
envFrom:
|
envFrom:
|
||||||
- configMapRef:
|
- configMapRef:
|
||||||
name: honeydue-config
|
name: honeydue-config
|
||||||
env:
|
# Audit CODE-F8: secrets are NOT injected as environment variables.
|
||||||
- name: POSTGRES_PASSWORD
|
# Env vars are readable for the life of the pod via /proc/<pid>/environ
|
||||||
valueFrom:
|
# and leak into crash dumps / child processes. honeydue-secrets is
|
||||||
secretKeyRef:
|
# mounted read-only at /etc/honeydue/secrets (mode 0400) and the Go
|
||||||
name: honeydue-secrets
|
# config layer (config.loadFileSecrets) reads each key from its file.
|
||||||
key: POSTGRES_PASSWORD
|
# Non-secret config still arrives via the configMapRef above.
|
||||||
- name: SECRET_KEY
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: SECRET_KEY
|
|
||||||
- name: EMAIL_HOST_PASSWORD
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: EMAIL_HOST_PASSWORD
|
|
||||||
- name: FCM_SERVER_KEY
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: FCM_SERVER_KEY
|
|
||||||
- name: REDIS_PASSWORD
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: honeydue-secrets
|
|
||||||
key: REDIS_PASSWORD
|
|
||||||
optional: true
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
- name: app-secrets
|
||||||
|
mountPath: /etc/honeydue/secrets
|
||||||
|
readOnly: true
|
||||||
- name: apns-key
|
- name: apns-key
|
||||||
mountPath: /secrets/apns
|
mountPath: /secrets/apns
|
||||||
readOnly: true
|
readOnly: true
|
||||||
@@ -94,6 +85,12 @@ spec:
|
|||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
volumes:
|
volumes:
|
||||||
|
# Audit CODE-F8: the whole honeydue-secrets Secret, projected as files.
|
||||||
|
# defaultMode 0400 → readable only by the container's runAsUser (1000).
|
||||||
|
- name: app-secrets
|
||||||
|
secret:
|
||||||
|
secretName: honeydue-secrets
|
||||||
|
defaultMode: 0400
|
||||||
- name: apns-key
|
- name: apns-key
|
||||||
secret:
|
secret:
|
||||||
secretName: honeydue-apns-key
|
secretName: honeydue-apns-key
|
||||||
@@ -103,3 +100,46 @@ spec:
|
|||||||
- name: tmp
|
- name: tmp
|
||||||
emptyDir:
|
emptyDir:
|
||||||
sizeLimit: 64Mi
|
sizeLimit: 64Mi
|
||||||
|
---
|
||||||
|
# Allow vmagent to scrape the worker's /metrics on :6060 (default-deny-all is in
|
||||||
|
# force; the worker otherwise receives no ingress). Additive — see node-exporter.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-ingress-to-worker-metrics
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: worker
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
ports:
|
||||||
|
- port: 6060
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# vmagent's base egress policy only opens :8000/:8080 to the pod CIDR; this
|
||||||
|
# additive policy opens :6060 for the worker scrape (leaves the base untouched).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-vmagent-to-worker
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: vmagent
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 6060
|
||||||
|
protocol: TCP
|
||||||
|
|||||||
@@ -68,6 +68,43 @@ SECRET_ARGS=(
|
|||||||
if [[ -n "${REDIS_PASSWORD}" ]]; then
|
if [[ -n "${REDIS_PASSWORD}" ]]; then
|
||||||
log " Including REDIS_PASSWORD in secrets"
|
log " Including REDIS_PASSWORD in secrets"
|
||||||
SECRET_ARGS+=(--from-literal="REDIS_PASSWORD=${REDIS_PASSWORD}")
|
SECRET_ARGS+=(--from-literal="REDIS_PASSWORD=${REDIS_PASSWORD}")
|
||||||
|
else
|
||||||
|
# Audit K3S-F1 (CRITICAL) / MEDIUM-4: refuse to deploy with an unauthenticated
|
||||||
|
# Redis. A previous version only warned here, which let a deploy from an
|
||||||
|
# unedited config.yaml silently bring Redis up with no password.
|
||||||
|
die "redis.password is empty in config.yaml — refusing to deploy: Redis would run with NO authentication (audit K3S-F1). Set a strong value, e.g.: openssl rand -base64 32"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# B2 (Backblaze) object-storage credentials. The api/worker manifests
|
||||||
|
# reference B2_KEY_ID / B2_APP_KEY as required secret keys, so honeydue-secrets
|
||||||
|
# MUST carry them or those pods fail to start. Sourced from config.yaml so the
|
||||||
|
# script and the manifests no longer drift (was a latent gap before 2026-05-16).
|
||||||
|
B2_KEY_ID_VAL="$(cfg storage.b2_key_id 2>/dev/null || true)"
|
||||||
|
B2_APP_KEY_VAL="$(cfg storage.b2_app_key 2>/dev/null || true)"
|
||||||
|
if [[ -n "${B2_KEY_ID_VAL}" && -n "${B2_APP_KEY_VAL}" ]]; then
|
||||||
|
log " Including B2_KEY_ID / B2_APP_KEY in secrets"
|
||||||
|
SECRET_ARGS+=(--from-literal="B2_KEY_ID=${B2_KEY_ID_VAL}")
|
||||||
|
SECRET_ARGS+=(--from-literal="B2_APP_KEY=${B2_APP_KEY_VAL}")
|
||||||
|
else
|
||||||
|
warn "storage.b2_key_id / b2_app_key not set in config.yaml — B2 uploads will be disabled."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Observability ingest credentials live in deploy/prod.env (gitignored) so
|
||||||
|
# the values aren't checked into config.yaml. Skipped silently when the
|
||||||
|
# file or keys are absent — the api/worker manifests mark these env vars
|
||||||
|
# optional, so the deployment still rolls without traces.
|
||||||
|
PROD_ENV_FILE="${DEPLOY_DIR}/../deploy/prod.env"
|
||||||
|
if [[ -f "${PROD_ENV_FILE}" ]]; then
|
||||||
|
OBS_TOKEN_VAL="$(grep -E '^OBS_INGEST_TOKEN=' "${PROD_ENV_FILE}" 2>/dev/null | cut -d= -f2- || true)"
|
||||||
|
OBS_URL_VAL="$(grep -E '^OBS_TRACES_URL=' "${PROD_ENV_FILE}" 2>/dev/null | cut -d= -f2- || true)"
|
||||||
|
if [[ -n "${OBS_TOKEN_VAL}" ]]; then
|
||||||
|
log " Including OBS_INGEST_TOKEN in secrets"
|
||||||
|
SECRET_ARGS+=(--from-literal="OBS_INGEST_TOKEN=${OBS_TOKEN_VAL}")
|
||||||
|
fi
|
||||||
|
if [[ -n "${OBS_URL_VAL}" ]]; then
|
||||||
|
log " Including OBS_TRACES_URL in secrets"
|
||||||
|
SECRET_ARGS+=(--from-literal="OBS_TRACES_URL=${OBS_URL_VAL}")
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
kubectl create secret generic honeydue-secrets \
|
kubectl create secret generic honeydue-secrets \
|
||||||
@@ -82,22 +119,24 @@ kubectl create secret generic honeydue-apns-key \
|
|||||||
--from-file="apns_auth_key.p8=${SECRETS_DIR}/apns_auth_key.p8" \
|
--from-file="apns_auth_key.p8=${SECRETS_DIR}/apns_auth_key.p8" \
|
||||||
--dry-run=client -o yaml | kubectl apply -f -
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
|
||||||
# --- Create GHCR registry credentials ---
|
# --- Create container registry credentials ---
|
||||||
|
# Secret name is gitea-credentials (audit F6): the registry is self-hosted
|
||||||
|
# Gitea, not GHCR. Every deployment manifest references this same name.
|
||||||
|
|
||||||
REGISTRY_SERVER="$(cfg registry.server)"
|
REGISTRY_SERVER="$(cfg registry.server)"
|
||||||
REGISTRY_USER="$(cfg registry.username)"
|
REGISTRY_USER="$(cfg registry.username)"
|
||||||
REGISTRY_TOKEN="$(cfg registry.token)"
|
REGISTRY_TOKEN="$(cfg registry.token)"
|
||||||
|
|
||||||
if [[ -n "${REGISTRY_SERVER}" && -n "${REGISTRY_USER}" && -n "${REGISTRY_TOKEN}" ]]; then
|
if [[ -n "${REGISTRY_SERVER}" && -n "${REGISTRY_USER}" && -n "${REGISTRY_TOKEN}" ]]; then
|
||||||
log "Creating ghcr-credentials..."
|
log "Creating gitea-credentials..."
|
||||||
kubectl create secret docker-registry ghcr-credentials \
|
kubectl create secret docker-registry gitea-credentials \
|
||||||
--namespace="${NAMESPACE}" \
|
--namespace="${NAMESPACE}" \
|
||||||
--docker-server="${REGISTRY_SERVER}" \
|
--docker-server="${REGISTRY_SERVER}" \
|
||||||
--docker-username="${REGISTRY_USER}" \
|
--docker-username="${REGISTRY_USER}" \
|
||||||
--docker-password="${REGISTRY_TOKEN}" \
|
--docker-password="${REGISTRY_TOKEN}" \
|
||||||
--dry-run=client -o yaml | kubectl apply -f -
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
else
|
else
|
||||||
warn "Registry credentials incomplete in config.yaml — skipping ghcr-credentials."
|
warn "Registry credentials incomplete in config.yaml — skipping gitea-credentials."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Create Cloudflare origin cert ---
|
# --- Create Cloudflare origin cert ---
|
||||||
@@ -114,7 +153,8 @@ kubectl create secret tls cloudflare-origin-cert \
|
|||||||
if [[ -n "${ADMIN_AUTH_USER}" && -n "${ADMIN_AUTH_PASSWORD}" ]]; then
|
if [[ -n "${ADMIN_AUTH_USER}" && -n "${ADMIN_AUTH_PASSWORD}" ]]; then
|
||||||
command -v htpasswd >/dev/null 2>&1 || die "Missing: htpasswd (install apache2-utils)"
|
command -v htpasswd >/dev/null 2>&1 || die "Missing: htpasswd (install apache2-utils)"
|
||||||
log "Creating admin-basic-auth secret..."
|
log "Creating admin-basic-auth secret..."
|
||||||
HTPASSWD="$(htpasswd -nb "${ADMIN_AUTH_USER}" "${ADMIN_AUTH_PASSWORD}")"
|
# -B forces bcrypt (Traefik BasicAuth supports it; avoids weak apr1-MD5).
|
||||||
|
HTPASSWD="$(htpasswd -nbB "${ADMIN_AUTH_USER}" "${ADMIN_AUTH_PASSWORD}")"
|
||||||
kubectl create secret generic admin-basic-auth \
|
kubectl create secret generic admin-basic-auth \
|
||||||
--namespace="${NAMESPACE}" \
|
--namespace="${NAMESPACE}" \
|
||||||
--from-literal=users="${HTPASSWD}" \
|
--from-literal=users="${HTPASSWD}" \
|
||||||
@@ -124,6 +164,35 @@ else
|
|||||||
warn "Admin panel will NOT have basic auth protection."
|
warn "Admin panel will NOT have basic auth protection."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# --- Create Kratos secrets (Ory Kratos identity service) ---
|
||||||
|
# Created only when config.yaml has a kratos.dsn. Until then 03-deploy.sh skips
|
||||||
|
# the Kratos deploy entirely, so the existing stack is unaffected.
|
||||||
|
|
||||||
|
KRATOS_DSN="$(cfg kratos.dsn 2>/dev/null || true)"
|
||||||
|
if [[ -n "${KRATOS_DSN}" ]]; then
|
||||||
|
log "Creating kratos-secrets..."
|
||||||
|
KR_COOKIE="$(cfg kratos.secrets_cookie 2>/dev/null || true)"
|
||||||
|
KR_CIPHER="$(cfg kratos.secrets_cipher 2>/dev/null || true)"
|
||||||
|
KR_SMTP="$(cfg kratos.smtp_connection_uri 2>/dev/null || true)"
|
||||||
|
KR_GOOGLE="$(cfg kratos.google_client_secret 2>/dev/null || true)"
|
||||||
|
KR_APPLE="$(cfg kratos.apple_private_key 2>/dev/null || true)"
|
||||||
|
[[ -n "${KR_COOKIE}" && -n "${KR_CIPHER}" ]] \
|
||||||
|
|| die "kratos.secrets_cookie / secrets_cipher must be set (generate once: openssl rand -hex 16)"
|
||||||
|
[[ ${#KR_CIPHER} -eq 32 ]] \
|
||||||
|
|| die "kratos.secrets_cipher must be exactly 32 characters (openssl rand -hex 16)"
|
||||||
|
kubectl create secret generic kratos-secrets \
|
||||||
|
--namespace="${NAMESPACE}" \
|
||||||
|
--from-literal="dsn=${KRATOS_DSN}" \
|
||||||
|
--from-literal="secrets_cookie=${KR_COOKIE}" \
|
||||||
|
--from-literal="secrets_cipher=${KR_CIPHER}" \
|
||||||
|
--from-literal="smtp_connection_uri=${KR_SMTP}" \
|
||||||
|
--from-literal="google_client_secret=${KR_GOOGLE}" \
|
||||||
|
--from-literal="apple_private_key=${KR_APPLE}" \
|
||||||
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
else
|
||||||
|
warn "config.yaml has no kratos.dsn — skipping kratos-secrets (Kratos not yet configured)."
|
||||||
|
fi
|
||||||
|
|
||||||
# --- Done ---
|
# --- Done ---
|
||||||
|
|
||||||
log ""
|
log ""
|
||||||
|
|||||||
+146
-12
@@ -81,20 +81,24 @@ if [[ "${SKIP_BUILD}" == "false" ]]; then
|
|||||||
log "Logging in to ${REGISTRY_SERVER}..."
|
log "Logging in to ${REGISTRY_SERVER}..."
|
||||||
printf '%s' "${REGISTRY_TOKEN}" | docker login "${REGISTRY_SERVER}" -u "${REGISTRY_USER}" --password-stdin >/dev/null
|
printf '%s' "${REGISTRY_TOKEN}" | docker login "${REGISTRY_SERVER}" -u "${REGISTRY_USER}" --password-stdin >/dev/null
|
||||||
|
|
||||||
log "Building API image: ${API_IMAGE}"
|
# k3s nodes are linux/amd64 (Hetzner CX). Force the build platform so
|
||||||
docker build --target api -t "${API_IMAGE}" "${REPO_DIR}"
|
# local arm64 Macs don't push images that crash with "exec format error".
|
||||||
|
BUILD_PLATFORM="linux/amd64"
|
||||||
|
|
||||||
log "Building Worker image: ${WORKER_IMAGE}"
|
log "Building API image: ${API_IMAGE} (${BUILD_PLATFORM})"
|
||||||
docker build --target worker -t "${WORKER_IMAGE}" "${REPO_DIR}"
|
docker build --platform "${BUILD_PLATFORM}" --target api -t "${API_IMAGE}" "${REPO_DIR}"
|
||||||
|
|
||||||
log "Building Admin image: ${ADMIN_IMAGE} (NEXT_PUBLIC_API_URL=${ADMIN_API_URL})"
|
log "Building Worker image: ${WORKER_IMAGE} (${BUILD_PLATFORM})"
|
||||||
docker build --target admin \
|
docker build --platform "${BUILD_PLATFORM}" --target worker -t "${WORKER_IMAGE}" "${REPO_DIR}"
|
||||||
|
|
||||||
|
log "Building Admin image: ${ADMIN_IMAGE} (${BUILD_PLATFORM}, NEXT_PUBLIC_API_URL=${ADMIN_API_URL})"
|
||||||
|
docker build --platform "${BUILD_PLATFORM}" --target admin \
|
||||||
--build-arg "NEXT_PUBLIC_API_URL=${ADMIN_API_URL}" \
|
--build-arg "NEXT_PUBLIC_API_URL=${ADMIN_API_URL}" \
|
||||||
-t "${ADMIN_IMAGE}" "${REPO_DIR}"
|
-t "${ADMIN_IMAGE}" "${REPO_DIR}"
|
||||||
|
|
||||||
if [[ -n "${WEB_REPO_DIR}" && -f "${WEB_REPO_DIR}/Dockerfile" ]]; then
|
if [[ -n "${WEB_REPO_DIR}" && -f "${WEB_REPO_DIR}/Dockerfile" ]]; then
|
||||||
log "Building Web image: ${WEB_IMAGE} (NEXT_PUBLIC_API_URL=${WEB_API_URL})"
|
log "Building Web image: ${WEB_IMAGE} (${BUILD_PLATFORM}, NEXT_PUBLIC_API_URL=${WEB_API_URL})"
|
||||||
docker build \
|
docker build --platform "${BUILD_PLATFORM}" \
|
||||||
--build-arg "NEXT_PUBLIC_API_URL=${WEB_API_URL}" \
|
--build-arg "NEXT_PUBLIC_API_URL=${WEB_API_URL}" \
|
||||||
--build-arg "NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY}" \
|
--build-arg "NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY}" \
|
||||||
--build-arg "NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST}" \
|
--build-arg "NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST}" \
|
||||||
@@ -124,6 +128,56 @@ else
|
|||||||
warn "Skipping build. Using images for tag: ${DEPLOY_TAG}"
|
warn "Skipping build. Using images for tag: ${DEPLOY_TAG}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# --- Resolve immutable image digests (audit F5) ---
|
||||||
|
# A short-SHA tag is mutable — anyone who can push to the registry can
|
||||||
|
# overwrite it, and imagePullPolicy then pulls the new bits silently. We
|
||||||
|
# deploy by @sha256: digest instead, pinning the exact image that was just
|
||||||
|
# built and pushed. `docker push` populates RepoDigests; with --skip-build
|
||||||
|
# (no local image) resolve_ref falls back to the tag.
|
||||||
|
resolve_ref() {
|
||||||
|
local img="$1" digest
|
||||||
|
digest="$(docker inspect --format='{{range .RepoDigests}}{{println .}}{{end}}' "${img}" 2>/dev/null | grep -m1 '@sha256:' || true)"
|
||||||
|
if [[ -n "${digest}" ]]; then
|
||||||
|
printf '%s' "${digest}"
|
||||||
|
else
|
||||||
|
warn "could not resolve a digest for ${img} — deploying by mutable tag"
|
||||||
|
printf '%s' "${img}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
API_REF="$(resolve_ref "${API_IMAGE}")"
|
||||||
|
WORKER_REF="$(resolve_ref "${WORKER_IMAGE}")"
|
||||||
|
ADMIN_REF="$(resolve_ref "${ADMIN_IMAGE}")"
|
||||||
|
WEB_REF="$(resolve_ref "${WEB_IMAGE}")"
|
||||||
|
log "Deploying by digest:"
|
||||||
|
log " API: ${API_REF}"
|
||||||
|
log " Worker: ${WORKER_REF}"
|
||||||
|
log " Admin: ${ADMIN_REF}"
|
||||||
|
|
||||||
|
# --- Image scan + signing (audit CODE-L5) ---
|
||||||
|
# Both steps are best-effort: the deploy does NOT fail if the tools are
|
||||||
|
# absent, so an operator who has not set up cosign/trivy yet is not blocked.
|
||||||
|
# Install trivy + cosign and export COSIGN_KEY to enforce. Cluster-side
|
||||||
|
# admission verification (Kyverno/Connaisseur) is a separate operator step.
|
||||||
|
if [[ "${SKIP_BUILD}" == "false" ]]; then
|
||||||
|
if command -v trivy >/dev/null 2>&1; then
|
||||||
|
log "Scanning images with Trivy (HIGH,CRITICAL)..."
|
||||||
|
for img in "${API_IMAGE}" "${WORKER_IMAGE}" "${ADMIN_IMAGE}"; do
|
||||||
|
trivy image --severity HIGH,CRITICAL --exit-code 0 --quiet "${img}" \
|
||||||
|
|| warn "Trivy reported findings for ${img}"
|
||||||
|
done
|
||||||
|
else
|
||||||
|
warn "trivy not installed — skipping image vulnerability scan (audit L5)"
|
||||||
|
fi
|
||||||
|
if command -v cosign >/dev/null 2>&1 && [[ -n "${COSIGN_KEY:-}" ]]; then
|
||||||
|
log "Signing images with cosign..."
|
||||||
|
for ref in "${API_REF}" "${WORKER_REF}" "${ADMIN_REF}"; do
|
||||||
|
cosign sign --yes --key "${COSIGN_KEY}" "${ref}" || warn "cosign sign failed for ${ref}"
|
||||||
|
done
|
||||||
|
else
|
||||||
|
warn "cosign not configured (need cosign + COSIGN_KEY) — skipping image signing (audit L5)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# --- Generate and apply ConfigMap from config.yaml ---
|
# --- Generate and apply ConfigMap from config.yaml ---
|
||||||
|
|
||||||
log "Generating env from config.yaml..."
|
log "Generating env from config.yaml..."
|
||||||
@@ -142,24 +196,95 @@ kubectl create configmap honeydue-config \
|
|||||||
log "Applying manifests..."
|
log "Applying manifests..."
|
||||||
|
|
||||||
kubectl apply -f "${MANIFESTS}/namespace.yaml"
|
kubectl apply -f "${MANIFESTS}/namespace.yaml"
|
||||||
|
|
||||||
|
# NetworkPolicies first — default-deny-all + per-app allow rules.
|
||||||
|
# These MUST be applied; without them the cluster falls back to default-allow
|
||||||
|
# (worse posture) AND the vmagent egress rule for :6443 (which fixes a k3s
|
||||||
|
# post-DNAT enforcement quirk for k8s API discovery) is missing.
|
||||||
|
# See deploy-k3s/RUNBOOK.md ("vmagent SD broken on fresh deploy").
|
||||||
|
kubectl apply -f "${MANIFESTS}/network-policies.yaml"
|
||||||
|
|
||||||
kubectl apply -f "${MANIFESTS}/redis/"
|
kubectl apply -f "${MANIFESTS}/redis/"
|
||||||
kubectl apply -f "${MANIFESTS}/ingress/"
|
kubectl apply -f "${MANIFESTS}/ingress/"
|
||||||
|
|
||||||
|
# --- Run migrations BEFORE rolling api/worker ---
|
||||||
|
#
|
||||||
|
# goose-based migration Job. We delete any prior Job (Jobs are immutable —
|
||||||
|
# applying a duplicate name otherwise fails), apply a fresh one with the new
|
||||||
|
# api image (which includes /usr/local/bin/goose and /app/migrations), and
|
||||||
|
# block until it succeeds. A failure aborts the deploy before any new app
|
||||||
|
# pod sees a stale schema.
|
||||||
|
log "Running database migrations (goose Job)..."
|
||||||
|
kubectl delete job honeydue-migrate -n "${NAMESPACE}" --ignore-not-found --wait=true >/dev/null
|
||||||
|
sed "s|image: IMAGE_PLACEHOLDER|image: ${API_REF}|" "${MANIFESTS}/migrate/job.yaml" | kubectl apply -f -
|
||||||
|
if ! kubectl wait --namespace="${NAMESPACE}" --for=condition=complete --timeout=10m job/honeydue-migrate; then
|
||||||
|
warn "migration Job failed — see logs:"
|
||||||
|
kubectl logs -n "${NAMESPACE}" job/honeydue-migrate --tail=200 || true
|
||||||
|
die "migrations did not complete cleanly; aborting deploy"
|
||||||
|
fi
|
||||||
|
log "Migrations applied; proceeding with api/worker rollout"
|
||||||
|
|
||||||
# Apply deployments with image substitution
|
# Apply deployments with image substitution
|
||||||
sed "s|image: IMAGE_PLACEHOLDER|image: ${API_IMAGE}|" "${MANIFESTS}/api/deployment.yaml" | kubectl apply -f -
|
sed "s|image: IMAGE_PLACEHOLDER|image: ${API_REF}|" "${MANIFESTS}/api/deployment.yaml" | kubectl apply -f -
|
||||||
kubectl apply -f "${MANIFESTS}/api/service.yaml"
|
kubectl apply -f "${MANIFESTS}/api/service.yaml"
|
||||||
kubectl apply -f "${MANIFESTS}/api/hpa.yaml"
|
kubectl apply -f "${MANIFESTS}/api/hpa.yaml"
|
||||||
|
|
||||||
sed "s|image: IMAGE_PLACEHOLDER|image: ${WORKER_IMAGE}|" "${MANIFESTS}/worker/deployment.yaml" | kubectl apply -f -
|
sed "s|image: IMAGE_PLACEHOLDER|image: ${WORKER_REF}|" "${MANIFESTS}/worker/deployment.yaml" | kubectl apply -f -
|
||||||
|
|
||||||
sed "s|image: IMAGE_PLACEHOLDER|image: ${ADMIN_IMAGE}|" "${MANIFESTS}/admin/deployment.yaml" | kubectl apply -f -
|
sed "s|image: IMAGE_PLACEHOLDER|image: ${ADMIN_REF}|" "${MANIFESTS}/admin/deployment.yaml" | kubectl apply -f -
|
||||||
kubectl apply -f "${MANIFESTS}/admin/service.yaml"
|
kubectl apply -f "${MANIFESTS}/admin/service.yaml"
|
||||||
|
|
||||||
if [[ -d "${MANIFESTS}/web" ]]; then
|
if [[ -d "${MANIFESTS}/web" ]]; then
|
||||||
sed "s|image: IMAGE_PLACEHOLDER|image: ${WEB_IMAGE}|" "${MANIFESTS}/web/deployment.yaml" | kubectl apply -f -
|
sed "s|image: IMAGE_PLACEHOLDER|image: ${WEB_REF}|" "${MANIFESTS}/web/deployment.yaml" | kubectl apply -f -
|
||||||
kubectl apply -f "${MANIFESTS}/web/service.yaml"
|
kubectl apply -f "${MANIFESTS}/web/service.yaml"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Observability — vmagent scrapes api Pods :8000/metrics + kube-state-metrics
|
||||||
|
# :8080/metrics and remote-writes everything to obs.88oakapps.com. The bearer
|
||||||
|
# token comes from deploy/prod.env so it stays out of the repo; the manifest
|
||||||
|
# holds TOKEN_PLACEHOLDER. kube-state-metrics provides the kube_* metrics
|
||||||
|
# Grafana panels need to count pods, deployments, etc.
|
||||||
|
if [[ -d "${MANIFESTS}/observability" ]]; then
|
||||||
|
# kube-state-metrics — no secrets, plain apply
|
||||||
|
kubectl apply -f "${MANIFESTS}/observability/kube-state-metrics.yaml"
|
||||||
|
|
||||||
|
# vmagent — needs the bearer-token substitution
|
||||||
|
# prod.env lives at the repo's deploy/ dir (sibling of deploy-k3s/), not
|
||||||
|
# under deploy-k3s/. It's gitignored — operator copies values there once.
|
||||||
|
OBS_TOKEN="$(grep -E '^OBS_INGEST_TOKEN=' "${REPO_DIR}/deploy/prod.env" 2>/dev/null | cut -d= -f2- || true)"
|
||||||
|
if [[ -z "${OBS_TOKEN}" ]]; then
|
||||||
|
warn "OBS_INGEST_TOKEN not found in deploy/prod.env — skipping vmagent + alloy-logs apply"
|
||||||
|
else
|
||||||
|
sed "s|TOKEN_PLACEHOLDER|${OBS_TOKEN}|" "${MANIFESTS}/observability/vmagent.yaml" | kubectl apply -f -
|
||||||
|
# alloy-logs — DaemonSet that tails honeydue pod logs and pushes them to
|
||||||
|
# Loki at obs.88oakapps.com. Same OBS_INGEST_TOKEN as vmagent.
|
||||||
|
if [[ -f "${MANIFESTS}/observability/alloy-logs.yaml" ]]; then
|
||||||
|
sed "s|TOKEN_PLACEHOLDER|${OBS_TOKEN}|" "${MANIFESTS}/observability/alloy-logs.yaml" | kubectl apply -f -
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Ory Kratos (identity service) ---
|
||||||
|
# Applied only when kratos-secrets exists — i.e. the operator has completed the
|
||||||
|
# Kratos prerequisites in deploy-k3s/manifests/kratos/README.md. Otherwise
|
||||||
|
# skipped, so the existing stack deploys unaffected.
|
||||||
|
if kubectl -n "${NAMESPACE}" get secret kratos-secrets >/dev/null 2>&1; then
|
||||||
|
log "Deploying Ory Kratos..."
|
||||||
|
kubectl apply -f "${MANIFESTS}/kratos/configmap.yaml"
|
||||||
|
# The migrate Job is immutable — delete any prior run, then apply + wait.
|
||||||
|
kubectl delete job kratos-migrate -n "${NAMESPACE}" --ignore-not-found --wait=true >/dev/null
|
||||||
|
kubectl apply -f "${MANIFESTS}/kratos/migrate-job.yaml"
|
||||||
|
if ! kubectl wait --namespace="${NAMESPACE}" --for=condition=complete --timeout=5m job/kratos-migrate; then
|
||||||
|
warn "Kratos migration Job failed — logs:"
|
||||||
|
kubectl logs -n "${NAMESPACE}" job/kratos-migrate --tail=100 || true
|
||||||
|
die "aborting: Kratos schema migration failed"
|
||||||
|
fi
|
||||||
|
kubectl apply -f "${MANIFESTS}/kratos/kratos.yaml"
|
||||||
|
kubectl apply -f "${MANIFESTS}/kratos/ingress.yaml"
|
||||||
|
else
|
||||||
|
log "kratos-secrets not present — skipping Kratos deploy (see manifests/kratos/README.md)."
|
||||||
|
fi
|
||||||
|
|
||||||
# --- Wait for rollouts ---
|
# --- Wait for rollouts ---
|
||||||
|
|
||||||
log "Waiting for rollouts..."
|
log "Waiting for rollouts..."
|
||||||
@@ -171,6 +296,15 @@ kubectl rollout status deployment/admin -n "${NAMESPACE}" --timeout=300s
|
|||||||
if [[ -d "${MANIFESTS}/web" ]]; then
|
if [[ -d "${MANIFESTS}/web" ]]; then
|
||||||
kubectl rollout status deployment/web -n "${NAMESPACE}" --timeout=300s
|
kubectl rollout status deployment/web -n "${NAMESPACE}" --timeout=300s
|
||||||
fi
|
fi
|
||||||
|
if kubectl -n "${NAMESPACE}" get deployment vmagent >/dev/null 2>&1; then
|
||||||
|
kubectl rollout status deployment/vmagent -n "${NAMESPACE}" --timeout=120s
|
||||||
|
fi
|
||||||
|
if kubectl -n "${NAMESPACE}" get daemonset alloy-logs >/dev/null 2>&1; then
|
||||||
|
kubectl rollout status daemonset/alloy-logs -n "${NAMESPACE}" --timeout=120s
|
||||||
|
fi
|
||||||
|
if kubectl -n "${NAMESPACE}" get deployment kratos >/dev/null 2>&1; then
|
||||||
|
kubectl rollout status deployment/kratos -n "${NAMESPACE}" --timeout=180s
|
||||||
|
fi
|
||||||
|
|
||||||
# --- Done ---
|
# --- Done ---
|
||||||
|
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ lines = [
|
|||||||
# API
|
# API
|
||||||
'DEBUG=false',
|
'DEBUG=false',
|
||||||
f\"ALLOWED_HOSTS={d['api']},{d['base']}\",
|
f\"ALLOWED_HOSTS={d['api']},{d['base']}\",
|
||||||
f\"CORS_ALLOWED_ORIGINS=https://{d['base']},https://{d['admin']}\",
|
f\"CORS_ALLOWED_ORIGINS=https://{d['base']},https://{d['admin']},https://{d.get('app', 'app.' + d['base'])}\",
|
||||||
'TIMEZONE=UTC',
|
'TIMEZONE=UTC',
|
||||||
f\"BASE_URL=https://{d['base']}\",
|
f\"BASE_URL=https://{d['base']}\",
|
||||||
'PORT=8000',
|
'PORT=8000',
|
||||||
@@ -118,8 +118,15 @@ lines = [
|
|||||||
f\"DB_MAX_OPEN_CONNS={db['max_open_conns']}\",
|
f\"DB_MAX_OPEN_CONNS={db['max_open_conns']}\",
|
||||||
f\"DB_MAX_IDLE_CONNS={db['max_idle_conns']}\",
|
f\"DB_MAX_IDLE_CONNS={db['max_idle_conns']}\",
|
||||||
f\"DB_MAX_LIFETIME={db['max_lifetime']}\",
|
f\"DB_MAX_LIFETIME={db['max_lifetime']}\",
|
||||||
# Redis (K8s internal DNS — password injected if configured)
|
f\"DB_MAX_IDLE_TIME={db.get('max_idle_time', '0s')}\",
|
||||||
f\"REDIS_URL=redis://{':%s@' % val(rd.get('password')) if rd.get('password') else ''}redis.honeydue.svc.cluster.local:6379/0\",
|
# Redis — in-namespace DNS short form (works because pod /etc/resolv.conf
|
||||||
|
# searches honeydue.svc.cluster.local). Audit HIGH-1: the password is
|
||||||
|
# intentionally NOT embedded here. This URL is emitted into the
|
||||||
|
# honeydue-config ConfigMap, which is NOT encrypted at rest and is
|
||||||
|
# readable by anyone with `get configmap`. The Redis password travels
|
||||||
|
# only in honeydue-secrets as REDIS_PASSWORD (file-mounted, F8); the API
|
||||||
|
# applies it in cache_service.go and the worker onto its Asynq opt.
|
||||||
|
'REDIS_URL=redis://redis:6379/0',
|
||||||
'REDIS_DB=0',
|
'REDIS_DB=0',
|
||||||
# Email
|
# Email
|
||||||
f\"EMAIL_HOST={em['host']}\",
|
f\"EMAIL_HOST={em['host']}\",
|
||||||
@@ -139,12 +146,21 @@ lines = [
|
|||||||
f\"OVERDUE_REMINDER_HOUR={wk['overdue_reminder_hour']}\",
|
f\"OVERDUE_REMINDER_HOUR={wk['overdue_reminder_hour']}\",
|
||||||
f\"DAILY_DIGEST_HOUR={wk['daily_digest_hour']}\",
|
f\"DAILY_DIGEST_HOUR={wk['daily_digest_hour']}\",
|
||||||
# B2 Storage
|
# B2 Storage
|
||||||
f\"B2_KEY_ID={val(st['b2_key_id'])}\",
|
# B2_KEY_ID and B2_APP_KEY are intentionally NOT emitted into the
|
||||||
f\"B2_APP_KEY={val(st['b2_app_key'])}\",
|
# ConfigMap — they're credentials and belong in honeydue-secrets
|
||||||
|
# (set by 02-setup-secrets.sh). Wire them into the api/worker
|
||||||
|
# deployments via envFrom: secretRef when B2 uploads need to be
|
||||||
|
# active. Leaving them in cleartext here would leak via
|
||||||
|
# \"kubectl get cm\".
|
||||||
f\"B2_BUCKET_NAME={val(st['b2_bucket'])}\",
|
f\"B2_BUCKET_NAME={val(st['b2_bucket'])}\",
|
||||||
f\"B2_ENDPOINT={val(st['b2_endpoint'])}\",
|
f\"B2_ENDPOINT={val(st['b2_endpoint'])}\",
|
||||||
|
f\"B2_REGION={val(st.get('b2_region'))}\",
|
||||||
|
f\"B2_USE_SSL={b(st.get('b2_use_ssl', True))}\",
|
||||||
f\"STORAGE_MAX_FILE_SIZE={st['max_file_size']}\",
|
f\"STORAGE_MAX_FILE_SIZE={st['max_file_size']}\",
|
||||||
f\"STORAGE_ALLOWED_TYPES={st['allowed_types']}\",
|
f\"STORAGE_ALLOWED_TYPES={st['allowed_types']}\",
|
||||||
|
f\"STORAGE_UPLOAD_DIR={val(st.get('upload_dir', '/app/uploads'))}\",
|
||||||
|
f\"STORAGE_BASE_URL={val(st.get('base_url', '/uploads'))}\",
|
||||||
|
f\"STATIC_DIR={val(st.get('static_dir', '/app/static'))}\",
|
||||||
# Features
|
# Features
|
||||||
f\"FEATURE_PUSH_ENABLED={b(ft['push_enabled'])}\",
|
f\"FEATURE_PUSH_ENABLED={b(ft['push_enabled'])}\",
|
||||||
f\"FEATURE_EMAIL_ENABLED={b(ft['email_enabled'])}\",
|
f\"FEATURE_EMAIL_ENABLED={b(ft['email_enabled'])}\",
|
||||||
@@ -207,8 +223,18 @@ config = {
|
|||||||
'image': 'ubuntu-24.04',
|
'image': 'ubuntu-24.04',
|
||||||
},
|
},
|
||||||
'additional_packages': ['open-iscsi'],
|
'additional_packages': ['open-iscsi'],
|
||||||
'post_create_commands': ['sudo systemctl enable --now iscsid'],
|
# Audit K3S-CG2: harden the node OS at provision time — fail2ban for SSH
|
||||||
'k3s_config_file': 'secrets-encryption: true\n',
|
# brute-force, unattended-upgrades for automatic security patches.
|
||||||
|
'post_create_commands': [
|
||||||
|
'sudo systemctl enable --now iscsid',
|
||||||
|
'sudo apt-get update -qq',
|
||||||
|
'sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq fail2ban unattended-upgrades',
|
||||||
|
'sudo systemctl enable --now fail2ban',
|
||||||
|
'sudo dpkg-reconfigure -f noninteractive -plow unattended-upgrades',
|
||||||
|
],
|
||||||
|
# Audit K3S-CG1 / K3S-F4: encrypt Secrets at rest in etcd, and write the
|
||||||
|
# node kubeconfig as mode 0600 (not world-readable).
|
||||||
|
'k3s_config_file': 'secrets-encryption: true\nwrite-kubeconfig-mode: \"0600\"\n',
|
||||||
}
|
}
|
||||||
|
|
||||||
print(yaml.dump(config, default_flow_style=False, sort_keys=False))
|
print(yaml.dump(config, default_flow_style=False, sort_keys=False))
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
{
|
||||||
|
"$id": "https://honeydue.app/identity.schema.json",
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"title": "honeyDue user",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"traits": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"email": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "email",
|
||||||
|
"title": "Email",
|
||||||
|
"minLength": 3,
|
||||||
|
"maxLength": 320,
|
||||||
|
"ory.sh/kratos": {
|
||||||
|
"credentials": {
|
||||||
|
"password": { "identifier": true },
|
||||||
|
"code": { "identifier": true, "via": "email" },
|
||||||
|
"totp": { "account_name": true }
|
||||||
|
},
|
||||||
|
"verification": { "via": "email" },
|
||||||
|
"recovery": { "via": "email" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "object",
|
||||||
|
"title": "Name",
|
||||||
|
"properties": {
|
||||||
|
"first": { "type": "string", "title": "First name", "maxLength": 100 },
|
||||||
|
"last": { "type": "string", "title": "Last name", "maxLength": 100 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["email"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,101 @@
|
|||||||
|
version: v1.3.0
|
||||||
|
|
||||||
|
serve:
|
||||||
|
public:
|
||||||
|
base_url: http://localhost:4433/
|
||||||
|
cors:
|
||||||
|
enabled: true
|
||||||
|
allowed_origins:
|
||||||
|
- http://localhost
|
||||||
|
- http://localhost:3000
|
||||||
|
- http://localhost:8000
|
||||||
|
- http://127.0.0.1
|
||||||
|
allowed_methods: [GET, POST, PUT, PATCH, DELETE, OPTIONS]
|
||||||
|
allowed_headers: [Authorization, Content-Type, X-Session-Token, Cookie]
|
||||||
|
exposed_headers: [Content-Type, Set-Cookie]
|
||||||
|
allow_credentials: true
|
||||||
|
admin:
|
||||||
|
base_url: http://kratos:4434/
|
||||||
|
|
||||||
|
selfservice:
|
||||||
|
default_browser_return_url: http://localhost:8000/
|
||||||
|
allowed_return_urls:
|
||||||
|
- http://localhost:8000
|
||||||
|
- honeydue://callback
|
||||||
|
|
||||||
|
methods:
|
||||||
|
password:
|
||||||
|
enabled: true
|
||||||
|
config:
|
||||||
|
min_password_length: 8
|
||||||
|
identifier_similarity_check_enabled: false
|
||||||
|
code:
|
||||||
|
enabled: true
|
||||||
|
oidc:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
flows:
|
||||||
|
error:
|
||||||
|
ui_url: http://localhost:8000/auth/error
|
||||||
|
login:
|
||||||
|
ui_url: http://localhost:8000/auth/login
|
||||||
|
lifespan: 10m
|
||||||
|
registration:
|
||||||
|
ui_url: http://localhost:8000/auth/registration
|
||||||
|
lifespan: 10m
|
||||||
|
after:
|
||||||
|
password:
|
||||||
|
hooks:
|
||||||
|
- hook: session
|
||||||
|
verification:
|
||||||
|
enabled: true
|
||||||
|
ui_url: http://localhost:8000/auth/verification
|
||||||
|
use: code
|
||||||
|
after:
|
||||||
|
default_browser_return_url: http://localhost:8000/
|
||||||
|
recovery:
|
||||||
|
enabled: true
|
||||||
|
ui_url: http://localhost:8000/auth/recovery
|
||||||
|
use: code
|
||||||
|
settings:
|
||||||
|
ui_url: http://localhost:8000/auth/settings
|
||||||
|
privileged_session_max_age: 15m
|
||||||
|
logout:
|
||||||
|
after:
|
||||||
|
default_browser_return_url: http://localhost:8000/
|
||||||
|
|
||||||
|
log:
|
||||||
|
level: debug
|
||||||
|
format: text
|
||||||
|
leak_sensitive_values: true
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
cookie:
|
||||||
|
- local-dev-cookie-secret-please-change-this-32chars
|
||||||
|
cipher:
|
||||||
|
- 0123456789abcdef0123456789abcdef
|
||||||
|
|
||||||
|
ciphers:
|
||||||
|
algorithm: xchacha20-poly1305
|
||||||
|
|
||||||
|
hashers:
|
||||||
|
algorithm: bcrypt
|
||||||
|
bcrypt:
|
||||||
|
cost: 8
|
||||||
|
|
||||||
|
identity:
|
||||||
|
default_schema_id: honeydue
|
||||||
|
schemas:
|
||||||
|
- id: honeydue
|
||||||
|
url: file:///etc/config/kratos/identity.schema.json
|
||||||
|
|
||||||
|
courier:
|
||||||
|
smtp:
|
||||||
|
connection_uri: smtp://mailpit:1025/?disable_starttls=true
|
||||||
|
from_address: noreply@localhost
|
||||||
|
from_name: honeyDue Local
|
||||||
|
|
||||||
|
session:
|
||||||
|
lifespan: 720h
|
||||||
|
cookie:
|
||||||
|
same_site: Lax
|
||||||
@@ -35,7 +35,7 @@ DEFAULT_FROM_EMAIL=honeyDue <noreply@honeyDue.treytartt.com>
|
|||||||
# APNS private key goes in deploy/secrets/apns_auth_key.p8
|
# APNS private key goes in deploy/secrets/apns_auth_key.p8
|
||||||
APNS_AUTH_KEY_ID=CHANGEME_APNS_KEY_ID
|
APNS_AUTH_KEY_ID=CHANGEME_APNS_KEY_ID
|
||||||
APNS_TEAM_ID=CHANGEME_APNS_TEAM_ID
|
APNS_TEAM_ID=CHANGEME_APNS_TEAM_ID
|
||||||
APNS_TOPIC=com.tt.honeyDue
|
APNS_TOPIC=com.myhoneydue.honeyDue
|
||||||
APNS_USE_SANDBOX=false
|
APNS_USE_SANDBOX=false
|
||||||
APNS_PRODUCTION=true
|
APNS_PRODUCTION=true
|
||||||
|
|
||||||
@@ -80,7 +80,11 @@ FEATURE_PDF_REPORTS_ENABLED=true
|
|||||||
FEATURE_WORKER_ENABLED=true
|
FEATURE_WORKER_ENABLED=true
|
||||||
|
|
||||||
# Optional auth/iap values
|
# Optional auth/iap values
|
||||||
APPLE_CLIENT_ID=
|
# APPLE_CLIENT_ID must equal the iOS Release bundle ID. The Apple
|
||||||
|
# identity-token `aud` claim is verified against this value
|
||||||
|
# (internal/services/apple_auth.go::verifyAudience). Leaving it empty
|
||||||
|
# with DEBUG=false rejects every Apple token as invalid audience.
|
||||||
|
APPLE_CLIENT_ID=com.myhoneydue.honeyDue
|
||||||
APPLE_TEAM_ID=
|
APPLE_TEAM_ID=
|
||||||
GOOGLE_CLIENT_ID=
|
GOOGLE_CLIENT_ID=
|
||||||
GOOGLE_ANDROID_CLIENT_ID=
|
GOOGLE_ANDROID_CLIENT_ID=
|
||||||
|
|||||||
@@ -1,6 +1,31 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
# DEPRECATED — production migrated from Docker Swarm to k3s on 2026-04-24.
|
||||||
|
# This script targets the old Swarm manager + registry flow and will fail
|
||||||
|
# at the SSH/Swarm validation step because hetzner1 no longer runs dockerd.
|
||||||
|
#
|
||||||
|
# Use the k3s deploy stack instead:
|
||||||
|
#
|
||||||
|
# export KUBECONFIG="$(pwd)/deploy-k3s/kubeconfig"
|
||||||
|
# ./deploy-k3s/scripts/03-deploy.sh
|
||||||
|
#
|
||||||
|
# If you don't have deploy-k3s/kubeconfig locally, fetch it once:
|
||||||
|
# ssh -i ~/.ssh/hetzner deploy@hetzner1 'sudo cat /etc/rancher/k3s/k3s.yaml' \
|
||||||
|
# | sed 's|server: https://127.0.0.1:6443|server: https://178.104.247.152:6443|' \
|
||||||
|
# > deploy-k3s/kubeconfig
|
||||||
|
# chmod 600 deploy-k3s/kubeconfig
|
||||||
|
#
|
||||||
|
# To override and run anyway (do NOT do this casually), set:
|
||||||
|
# ALLOW_LEGACY_SWARM_DEPLOY=1 ./deploy/scripts/deploy_prod.sh
|
||||||
|
if [[ "${ALLOW_LEGACY_SWARM_DEPLOY:-0}" != "1" ]]; then
|
||||||
|
printf '[deploy][error] %s\n' \
|
||||||
|
"deploy_prod.sh is the legacy Docker Swarm flow. Production now runs on k3s." \
|
||||||
|
"Use ./deploy-k3s/scripts/03-deploy.sh instead (see top of this script for setup)." \
|
||||||
|
"If you really need the old Swarm path, set ALLOW_LEGACY_SWARM_DEPLOY=1." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
DEPLOY_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
DEPLOY_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||||
REPO_DIR="$(cd "${DEPLOY_DIR}/.." && pwd)"
|
REPO_DIR="$(cd "${DEPLOY_DIR}/.." && pwd)"
|
||||||
|
|||||||
+62
-2
@@ -14,6 +14,7 @@ services:
|
|||||||
POSTGRES_DB: ${POSTGRES_DB:-honeydue}
|
POSTGRES_DB: ${POSTGRES_DB:-honeydue}
|
||||||
volumes:
|
volumes:
|
||||||
- postgres_data:/var/lib/postgresql/data
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
- ./deploy/local/postgres-init:/docker-entrypoint-initdb.d:ro
|
||||||
ports:
|
ports:
|
||||||
- "${DB_PORT:-5433}:5432" # 5433 externally to avoid conflicts with local postgres
|
- "${DB_PORT:-5433}:5432" # 5433 externally to avoid conflicts with local postgres
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@@ -85,12 +86,16 @@ services:
|
|||||||
APNS_AUTH_KEY_PATH: ${APNS_AUTH_KEY_PATH}
|
APNS_AUTH_KEY_PATH: ${APNS_AUTH_KEY_PATH}
|
||||||
APNS_AUTH_KEY_ID: ${APNS_AUTH_KEY_ID}
|
APNS_AUTH_KEY_ID: ${APNS_AUTH_KEY_ID}
|
||||||
APNS_TEAM_ID: ${APNS_TEAM_ID}
|
APNS_TEAM_ID: ${APNS_TEAM_ID}
|
||||||
APNS_TOPIC: ${APNS_TOPIC:-com.tt.honeyDue}
|
APNS_TOPIC: ${APNS_TOPIC:-com.myhoneydue.honeyDue.dev}
|
||||||
APNS_USE_SANDBOX: "true"
|
APNS_USE_SANDBOX: "true"
|
||||||
FCM_SERVER_KEY: ${FCM_SERVER_KEY}
|
FCM_SERVER_KEY: ${FCM_SERVER_KEY}
|
||||||
|
|
||||||
# Storage encryption
|
# Storage encryption
|
||||||
STORAGE_ENCRYPTION_KEY: ${STORAGE_ENCRYPTION_KEY}
|
STORAGE_ENCRYPTION_KEY: ${STORAGE_ENCRYPTION_KEY}
|
||||||
|
|
||||||
|
# Kratos (identity service)
|
||||||
|
KRATOS_PUBLIC_URL: "http://kratos:4433"
|
||||||
|
KRATOS_ADMIN_URL: "http://kratos:4434"
|
||||||
volumes:
|
volumes:
|
||||||
- ./push_certs:/certs:ro
|
- ./push_certs:/certs:ro
|
||||||
- ./uploads:/app/uploads
|
- ./uploads:/app/uploads
|
||||||
@@ -99,6 +104,8 @@ services:
|
|||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
redis:
|
redis:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
kratos:
|
||||||
|
condition: service_healthy
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://127.0.0.1:8000/api/health/"]
|
test: ["CMD", "curl", "-f", "http://127.0.0.1:8000/api/health/"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
@@ -158,7 +165,7 @@ services:
|
|||||||
APNS_AUTH_KEY_PATH: "/certs/apns_key.p8"
|
APNS_AUTH_KEY_PATH: "/certs/apns_key.p8"
|
||||||
APNS_AUTH_KEY_ID: ${APNS_AUTH_KEY_ID}
|
APNS_AUTH_KEY_ID: ${APNS_AUTH_KEY_ID}
|
||||||
APNS_TEAM_ID: ${APNS_TEAM_ID}
|
APNS_TEAM_ID: ${APNS_TEAM_ID}
|
||||||
APNS_TOPIC: ${APNS_TOPIC:-com.tt.honeyDue}
|
APNS_TOPIC: ${APNS_TOPIC:-com.myhoneydue.honeyDue.dev}
|
||||||
APNS_USE_SANDBOX: "true"
|
APNS_USE_SANDBOX: "true"
|
||||||
FCM_SERVER_KEY: ${FCM_SERVER_KEY}
|
FCM_SERVER_KEY: ${FCM_SERVER_KEY}
|
||||||
|
|
||||||
@@ -184,6 +191,59 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- honeydue-network
|
- honeydue-network
|
||||||
|
|
||||||
|
# Mailpit — local SMTP catcher (for Kratos email codes during onboarding)
|
||||||
|
mailpit:
|
||||||
|
image: axllent/mailpit:latest
|
||||||
|
container_name: honeydue-mailpit
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "${MAILPIT_SMTP_PORT:-1025}:1025"
|
||||||
|
- "${MAILPIT_HTTP_PORT:-8025}:8025"
|
||||||
|
networks:
|
||||||
|
- honeydue-network
|
||||||
|
|
||||||
|
# Kratos schema migration (one-shot, runs before kratos starts)
|
||||||
|
kratos-migrate:
|
||||||
|
image: oryd/kratos:v1.3.0
|
||||||
|
container_name: honeydue-kratos-migrate
|
||||||
|
command: ["migrate", "sql", "-e", "--yes"]
|
||||||
|
environment:
|
||||||
|
DSN: "postgres://${POSTGRES_USER:-honeydue}:${POSTGRES_PASSWORD:-honeydue_dev_password}@db:5432/kratos?sslmode=disable"
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- honeydue-network
|
||||||
|
restart: "no"
|
||||||
|
|
||||||
|
# Ory Kratos — identity service
|
||||||
|
kratos:
|
||||||
|
image: oryd/kratos:v1.3.0
|
||||||
|
container_name: honeydue-kratos
|
||||||
|
restart: unless-stopped
|
||||||
|
command: ["serve", "--config", "/etc/config/kratos/kratos.yml", "--watch-courier", "--dev"]
|
||||||
|
ports:
|
||||||
|
- "${KRATOS_PUBLIC_PORT:-4433}:4433"
|
||||||
|
- "${KRATOS_ADMIN_PORT:-4434}:4434"
|
||||||
|
environment:
|
||||||
|
DSN: "postgres://${POSTGRES_USER:-honeydue}:${POSTGRES_PASSWORD:-honeydue_dev_password}@db:5432/kratos?sslmode=disable"
|
||||||
|
LOG_LEVEL: "debug"
|
||||||
|
volumes:
|
||||||
|
- ./deploy/local/kratos:/etc/config/kratos:ro
|
||||||
|
depends_on:
|
||||||
|
kratos-migrate:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
mailpit:
|
||||||
|
condition: service_started
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://127.0.0.1:4434/health/ready"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 10
|
||||||
|
start_period: 10s
|
||||||
|
networks:
|
||||||
|
- honeydue-network
|
||||||
|
|
||||||
# Dozzle — lightweight real-time log viewer
|
# Dozzle — lightweight real-time log viewer
|
||||||
dozzle:
|
dozzle:
|
||||||
image: amir20/dozzle:latest
|
image: amir20/dozzle:latest
|
||||||
|
|||||||
@@ -194,10 +194,17 @@ See [Chapter 8](./08-database.md), [9](./09-storage.md), and
|
|||||||
until we have Apple Developer / Google Play accounts. The env vars are
|
until we have Apple Developer / Google Play accounts. The env vars are
|
||||||
set to sentinel values that let the Go app boot; `FEATURE_PUSH_ENABLED=false`
|
set to sentinel values that let the Go app boot; `FEATURE_PUSH_ENABLED=false`
|
||||||
gates all call sites.
|
gates all call sites.
|
||||||
- **External metrics/monitoring (Prometheus, Grafana, Betterstack).**
|
- **In-cluster Prometheus / Grafana.** Self-hosted Prometheus-compatible
|
||||||
Right now we rely on `kubectl logs`, `kubectl top`, and Cloudflare's own
|
metrics + tracing + dashboards live **outside** the k3s cluster on
|
||||||
analytics. See [Chapter 15](./15-observability.md) for what's there and
|
`88oakappsUpdate` (the same Linode VPS that hosts PostHog), reached
|
||||||
what we'd add.
|
via `https://obs.88oakapps.com` (Cloudflare-fronted, bearer-gated).
|
||||||
|
A `vmagent` sidecar in the honeydue namespace scrapes the api Pods
|
||||||
|
and remote-writes out. This frees ~700 MB of cluster RAM and means
|
||||||
|
observability survives a k3s control-plane incident. See
|
||||||
|
[Chapter 15](./15-observability.md).
|
||||||
|
- **Alerting.** No PagerDuty, Slack hooks, or pages-on-error wired up
|
||||||
|
yet. Histograms are flowing into Grafana — alert rules on top of them
|
||||||
|
is the next add. See [Chapter 15 — Future](./15-observability.md).
|
||||||
- **Automated backups of Redis state.** Redis is configured with AOF
|
- **Automated backups of Redis state.** Redis is configured with AOF
|
||||||
(append-only file) persistence, but the PVC is only on one node. Redis
|
(append-only file) persistence, but the PVC is only on one node. Redis
|
||||||
holds only cache + Asynq queue state; losing it re-populates on first
|
holds only cache + Asynq queue state; losing it re-populates on first
|
||||||
|
|||||||
@@ -27,23 +27,27 @@ that every legitimate port be enumerated in a rule.
|
|||||||
Run `sudo ufw status verbose` on any node to see the live ruleset. The
|
Run `sudo ufw status verbose` on any node to see the live ruleset. The
|
||||||
canonical ruleset below, grouped by purpose.
|
canonical ruleset below, grouped by purpose.
|
||||||
|
|
||||||
### Public-facing (anywhere)
|
### Public-facing
|
||||||
|
|
||||||
| Port | Protocol | From | Purpose | Comment |
|
| Port | Protocol | From | Purpose |
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|
|
||||||
| 22 | TCP | Anywhere | SSH | |
|
| 22 | TCP | Anywhere | SSH (key-only) |
|
||||||
| 80 | TCP | Anywhere | HTTP (Cloudflare → Traefik) | |
|
| 443 | TCP | Cloudflare ranges (15 IPv4 + 7 IPv6) | HTTPS (CF → Traefik, TLS-terminated at Traefik) |
|
||||||
| 443 | TCP | Anywhere | HTTPS (future, currently unused at origin) | |
|
|
||||||
|
|
||||||
**Why 443 is open but unused**: We're on Cloudflare SSL=Flexible, so
|
**Port :80 is closed** on all three nodes. CF is in Full (strict) mode
|
||||||
Cloudflare talks to origin over plain HTTP:80. Port 443 on origin is
|
and initiates every request on :443 to the origin. Cloudflare's
|
||||||
only hit by misconfigured clients (who bypass CF DNS and hit node IPs
|
"Always Use HTTPS" turns any plaintext client request into HTTPS at
|
||||||
directly). Traefik's config accepts it but we don't require it. Keeping
|
the edge, so the origin never needs to accept :80.
|
||||||
it open smooths a future switch to Full (strict) SSL mode.
|
|
||||||
|
|
||||||
**Future hardening**: Restrict 80 and 443 to Cloudflare's published IP
|
**Port :443 is restricted to Cloudflare** via 22 UFW allow rules per
|
||||||
ranges (15 IPv4 CIDRs, 7 IPv6 CIDRs). See [Chapter 13](./13-cloudflare.md)
|
node (one per CF CIDR). Direct-connect from any non-CF IP is dropped
|
||||||
for the ranges and the UFW rule format. Today they're open to anyone.
|
at the kernel. This closes the "node IP leak = bypass CF WAF/DDoS"
|
||||||
|
hole entirely. See [Chapter 13](./13-cloudflare.md#cloudflare-ip-ranges-used-in-traefik-trustedips)
|
||||||
|
for the exact ranges and UFW rule format.
|
||||||
|
|
||||||
|
**Refresh cadence**: CF updates its IP ranges rarely. A monthly
|
||||||
|
`curl https://www.cloudflare.com/ips-v4` diff and UFW re-apply is
|
||||||
|
enough. Automation TODO (Chapter 20).
|
||||||
|
|
||||||
### SSH (operator access)
|
### SSH (operator access)
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,13 @@ long-haul components, and dedicated service accounts with dropped
|
|||||||
capabilities inside containers. This chapter documents each layer, the
|
capabilities inside containers. This chapter documents each layer, the
|
||||||
rationale, and what's currently missing (and why).
|
rationale, and what's currently missing (and why).
|
||||||
|
|
||||||
|
> **Updated 2026-05-15 — security remediation.** The 2026-05 audits
|
||||||
|
> (`live_scan_5_12.md`, `k3_audit_5_12.md`, `security_scan_5_12.md`) drove a
|
||||||
|
> full remediation pass. **`deploy-k3s/SECURITY.md` is the authoritative,
|
||||||
|
> per-finding current-state record.** This chapter is corrected for the
|
||||||
|
> major items below; where any other detail conflicts with `SECURITY.md`,
|
||||||
|
> `SECURITY.md` wins.
|
||||||
|
|
||||||
## Threat model
|
## Threat model
|
||||||
|
|
||||||
Who we're defending against, in rough order of likelihood:
|
Who we're defending against, in rough order of likelihood:
|
||||||
@@ -54,8 +61,8 @@ Cloudflare sits in front of every public request.
|
|||||||
- **Authorize requests** — that's the app's job
|
- **Authorize requests** — that's the app's job
|
||||||
- **Protect origin if origin IP leaks** — once someone knows a node IP
|
- **Protect origin if origin IP leaks** — once someone knows a node IP
|
||||||
they can bypass CF. Mitigation: keep origin firewall strict (Chapter 4).
|
they can bypass CF. Mitigation: keep origin firewall strict (Chapter 4).
|
||||||
- **Encrypt between CF and origin** — we're on SSL=Flexible, so CF↔origin
|
- **~~Encrypt between CF and origin~~** — done (2026-04-24): SSL mode is
|
||||||
is HTTP. This is in our TODO (Chapter 20, upgrade to Full-strict).
|
Full (strict); CF↔origin is TLS with a Cloudflare Origin CA cert.
|
||||||
|
|
||||||
### The proxy-IP problem
|
### The proxy-IP problem
|
||||||
|
|
||||||
@@ -75,8 +82,8 @@ This means a malicious request that bypasses CF (by hitting the node IP
|
|||||||
directly) can't spoof headers — Traefik ignores `X-Forwarded-*` unless
|
directly) can't spoof headers — Traefik ignores `X-Forwarded-*` unless
|
||||||
the source IP is in CF's ranges.
|
the source IP is in CF's ranges.
|
||||||
|
|
||||||
**TODO** (Chapter 20): Enforce at UFW level — allow 80/tcp only from
|
**Done (2026-04-24):** the node UFW allowlist permits `:443` only from
|
||||||
CF IP ranges. Today any IP can reach the origin on port 80.
|
Cloudflare's IP ranges; the `Anywhere` rules on `:80`/`:443` were removed.
|
||||||
|
|
||||||
## Layer 2 — Node (OS, SSH, firewall)
|
## Layer 2 — Node (OS, SSH, firewall)
|
||||||
|
|
||||||
@@ -297,15 +304,13 @@ The `deploy-k3s/manifests/network-policies.yaml` scaffold defines:
|
|||||||
reach api pods on port 8000
|
reach api pods on port 8000
|
||||||
- **allow-ingress-to-admin** — same, for admin:3000
|
- **allow-ingress-to-admin** — same, for admin:3000
|
||||||
|
|
||||||
**These are not currently applied.** Without them, our pods can freely
|
**Applied.** `03-deploy.sh` applies
|
||||||
talk to anything — including, theoretically, malicious destinations if
|
`deploy-k3s/manifests/network-policies.yaml` on every deploy — default-deny
|
||||||
an attacker gets RCE inside a pod.
|
plus the explicit per-app allows below. Traefik runs `hostNetwork`, so its
|
||||||
|
traffic is matched by node-IP `ipBlock`s plus the pod CIDR `10.42.0.0/16`,
|
||||||
|
not a `namespaceSelector`.
|
||||||
|
|
||||||
**TODO** (Chapter 20): Apply network policies. The scaffold is there; we
|
### What network policies prevent
|
||||||
just need to `kubectl apply -f deploy-k3s/manifests/network-policies.yaml`
|
|
||||||
and test that nothing breaks.
|
|
||||||
|
|
||||||
### What network policies would prevent
|
|
||||||
|
|
||||||
| Attack scenario | NetworkPolicy blocks |
|
| Attack scenario | NetworkPolicy blocks |
|
||||||
|---|---|
|
|---|---|
|
||||||
@@ -324,13 +329,10 @@ renewed Let's Encrypt or CF-managed cert for `*.myhoneydue.com`.
|
|||||||
|
|
||||||
### CF ↔ origin
|
### CF ↔ origin
|
||||||
|
|
||||||
**Plaintext HTTP** (SSL = Flexible). An attacker with access to the
|
**TLS — SSL = Full (strict)** (since 2026-04-24). A Cloudflare Origin CA
|
||||||
Cloudflare-to-Hetzner path could read traffic. In practice nobody who
|
certificate (`cloudflare-origin-cert` secret) is installed on all three
|
||||||
isn't Cloudflare or Hetzner sits on that path.
|
ingresses; Cloudflare validates it. Both user↔CF and CF↔origin are
|
||||||
|
encrypted, and a DNS-hijack MitM is defeated by the origin-cert check.
|
||||||
**TODO** (Chapter 20): Upgrade to SSL = Full (strict) with a Cloudflare
|
|
||||||
Origin CA certificate. This encrypts CF ↔ origin and verifies that
|
|
||||||
origin's cert is the CF-issued one (prevents MitM if DNS is compromised).
|
|
||||||
|
|
||||||
### API ↔ Neon Postgres
|
### API ↔ Neon Postgres
|
||||||
|
|
||||||
@@ -454,11 +456,14 @@ Mitigations:
|
|||||||
- Gitea itself is behind login; PAT is scoped to read:packages +
|
- Gitea itself is behind login; PAT is scoped to read:packages +
|
||||||
write:packages only
|
write:packages only
|
||||||
- Gitea runs on the operator's infrastructure (same operator account)
|
- Gitea runs on the operator's infrastructure (same operator account)
|
||||||
- Image tags are SHA-pinned (`:237c6b8`) not `:latest` → attacker can't
|
- Workloads deploy by immutable `@sha256:` digest, not by mutable tag
|
||||||
replace an existing tag's image without us noticing the digest change
|
(`03-deploy.sh` resolves the digest after push; the redis/vmagent/node
|
||||||
|
base images are digest-pinned too) — a swapped tag cannot reach the
|
||||||
|
cluster.
|
||||||
|
|
||||||
**TODO** (Chapter 20): Add cosign signing at build time, verify at pull
|
**TODO**: cosign signing is wired into `03-deploy.sh` (guarded — runs when
|
||||||
time.
|
`cosign` + `COSIGN_KEY` are present); cluster-side admission verification
|
||||||
|
(Kyverno/Connaisseur) is still pending. See `deploy-k3s/SECURITY.md` → L5.
|
||||||
|
|
||||||
## Operator workstation security
|
## Operator workstation security
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,13 @@
|
|||||||
# 06 — Traefik Ingress
|
# 06 — Traefik Ingress
|
||||||
|
|
||||||
|
> **Updated 2026-05-15 (security remediation):** the Traefik middleware set
|
||||||
|
> changed — `cloudflare-only` + `admin-auth` are now attached to the admin
|
||||||
|
> ingress, a strict `auth-rate-limit` middleware fronts the auth endpoints
|
||||||
|
> (via a dedicated `honeydue-api-auth` Ingress), and `security-headers`
|
||||||
|
> gained COOP/CORP + a 2-year preload HSTS and dropped the deprecated
|
||||||
|
> `X-XSS-Protection`. `deploy-k3s/SECURITY.md` is the authoritative
|
||||||
|
> current-state record.
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
Traefik is the reverse proxy that routes external HTTP requests to the
|
Traefik is the reverse proxy that routes external HTTP requests to the
|
||||||
@@ -280,16 +288,22 @@ most Ingress controllers and matches how users think about URL routing.
|
|||||||
|
|
||||||
## How requests flow
|
## How requests flow
|
||||||
|
|
||||||
1. **Cloudflare DNS** resolves `api.myhoneydue.com` to one of three IPs
|
1. **Cloudflare DNS** resolves `api.myhoneydue.com` to a CF edge IP
|
||||||
(round-robin). Say it picks `178.105.32.198` (hetzner2).
|
(client never sees the three origin IPs — CF proxies).
|
||||||
2. **Cloudflare edge** establishes TCP to `178.105.32.198:80` (plain HTTP,
|
2. **Cloudflare edge** terminates TLS from the browser, then opens a
|
||||||
SSL=Flexible). Original HTTPS terminated at CF.
|
fresh TCP to one of the origin IPs on `:443` (SSL=Full (strict)).
|
||||||
3. **UFW on hetzner2** accepts the SYN (80/tcp open from anywhere).
|
Say it picks `178.105.32.198` (hetzner2).
|
||||||
4. **Linux kernel** sees a listener on 0.0.0.0:80 (the Traefik pod).
|
3. **UFW on hetzner2** accepts the SYN — the source IP is in one of
|
||||||
Hands off the SYN.
|
the 15 CF IPv4 CIDRs allowed on `:443`. (Any non-CF source IP is
|
||||||
5. **Traefik accepts** the connection. Reads the HTTP request.
|
dropped at the kernel.)
|
||||||
|
4. **Linux kernel** sees a listener on `0.0.0.0:443` (the Traefik pod,
|
||||||
|
hostNetwork). Hands off the SYN.
|
||||||
|
5. **Traefik accepts** the connection, completes the TLS handshake
|
||||||
|
using the `cloudflare-origin-cert` secret (CF Origin CA — CF
|
||||||
|
verifies this chain on its side). Reads the plaintext HTTP request.
|
||||||
6. **Traefik matches** the `Host:` header against its router table.
|
6. **Traefik matches** the `Host:` header against its router table.
|
||||||
`Host: api.myhoneydue.com` → `honeydue-api` Ingress → `api` Service.
|
`Host: api.myhoneydue.com` → `honeydue-api` Ingress → `api` Service.
|
||||||
|
Attached middlewares (`security-headers`, `rate-limit`) run here.
|
||||||
7. **Traefik dials** `10.43.167.83:8000` (api Service ClusterIP). This
|
7. **Traefik dials** `10.43.167.83:8000` (api Service ClusterIP). This
|
||||||
goes through the cluster DNS (CoreDNS) and kube-proxy (IPVS).
|
goes through the cluster DNS (CoreDNS) and kube-proxy (IPVS).
|
||||||
8. **kube-proxy IPVS** rewrites the destination to a live api pod endpoint
|
8. **kube-proxy IPVS** rewrites the destination to a live api pod endpoint
|
||||||
|
|||||||
@@ -1,10 +1,17 @@
|
|||||||
# 07 — Services
|
# 07 — Services
|
||||||
|
|
||||||
|
> **Updated 2026-05-15 (security remediation):** Redis now requires a
|
||||||
|
> password (`config.yaml` `redis.password` → `honeydue-secrets`), all
|
||||||
|
> workloads deploy by immutable `@sha256:` digest, and the redis/vmagent
|
||||||
|
> base images are digest-pinned. `deploy-k3s/SECURITY.md` is the
|
||||||
|
> authoritative current-state record.
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
Four workloads run in the `honeydue` namespace: **api** (Go REST API, 3
|
Five workloads run in the `honeydue` namespace: **api** (Go REST API, 3
|
||||||
replicas), **admin** (Next.js panel, 1 replica), **worker** (Go background
|
replicas), **admin** (Next.js admin panel, 1 replica), **web** (Next.js
|
||||||
jobs, 1 replica), and **redis** (cache + job queue, 1 replica, PVC-backed).
|
customer-facing app, 3 replicas), **worker** (Go background jobs, 1
|
||||||
|
replica), and **redis** (cache + job queue, 1 replica, PVC-backed).
|
||||||
This chapter deep-dives each: container image, resource limits, probes,
|
This chapter deep-dives each: container image, resource limits, probes,
|
||||||
volumes, and why each knob is set the way it is.
|
volumes, and why each knob is set the way it is.
|
||||||
|
|
||||||
@@ -14,10 +21,11 @@ volumes, and why each knob is set the way it is.
|
|||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
| `api` | `gitea.treytartt.com/admin/honeydue-api:<sha>` | 3 | 8000 | HTTP REST API |
|
| `api` | `gitea.treytartt.com/admin/honeydue-api:<sha>` | 3 | 8000 | HTTP REST API |
|
||||||
| `admin` | `gitea.treytartt.com/admin/honeydue-admin:<sha>` | 1 | 3000 | Next.js admin panel |
|
| `admin` | `gitea.treytartt.com/admin/honeydue-admin:<sha>` | 1 | 3000 | Next.js admin panel |
|
||||||
|
| `web` | `gitea.treytartt.com/admin/honeydue-web:<sha>` | 3 | 3000 | Next.js customer-facing web client at `app.myhoneydue.com` |
|
||||||
| `worker` | `gitea.treytartt.com/admin/honeydue-worker:<sha>` | 1 | — | Background job processor |
|
| `worker` | `gitea.treytartt.com/admin/honeydue-worker:<sha>` | 1 | — | Background job processor |
|
||||||
| `redis` | `redis:7-alpine` | 1 | 6379 | Cache + Asynq queue |
|
| `redis` | `redis:7-alpine` | 1 | 6379 | Cache + Asynq queue |
|
||||||
|
|
||||||
All four are Kubernetes `Deployment` workloads (not StatefulSets, not
|
All five are Kubernetes `Deployment` workloads (not StatefulSets, not
|
||||||
DaemonSets). They share:
|
DaemonSets). They share:
|
||||||
- ServiceAccount with `automountServiceAccountToken: false` (Chapter 5)
|
- ServiceAccount with `automountServiceAccountToken: false` (Chapter 5)
|
||||||
- `imagePullSecrets: [gitea-credentials]` (Chapter 11)
|
- `imagePullSecrets: [gitea-credentials]` (Chapter 11)
|
||||||
@@ -25,6 +33,66 @@ DaemonSets). They share:
|
|||||||
- Individual env vars wired to `honeydue-secrets` keys
|
- Individual env vars wired to `honeydue-secrets` keys
|
||||||
- Read-only root filesystem with `tmp` emptyDir mounted at `/tmp`
|
- Read-only root filesystem with `tmp` emptyDir mounted at `/tmp`
|
||||||
|
|
||||||
|
## Service — web (Next.js customer app)
|
||||||
|
|
||||||
|
### What it does
|
||||||
|
|
||||||
|
Lives at `https://app.myhoneydue.com`. Next.js 16 standalone build,
|
||||||
|
served by `node server.js` inside the container. Sibling repo:
|
||||||
|
`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-Web/`.
|
||||||
|
|
||||||
|
### Architecture: server-side proxy pattern
|
||||||
|
|
||||||
|
Unlike the admin panel (which makes CORS requests directly to
|
||||||
|
`api.myhoneydue.com`), the web app uses a proxy pattern:
|
||||||
|
|
||||||
|
```
|
||||||
|
Browser → https://app.myhoneydue.com/api/proxy/tasks/123/
|
||||||
|
→ Next.js route handler (src/app/api/proxy/[...path]/route.ts)
|
||||||
|
→ reads honeydue-token httpOnly cookie
|
||||||
|
→ attaches Authorization: Token <value>
|
||||||
|
→ https://api.myhoneydue.com/api/tasks/123/ (server-side fetch)
|
||||||
|
→ response flows back
|
||||||
|
```
|
||||||
|
|
||||||
|
**Consequences:**
|
||||||
|
- Browser never makes cross-origin requests. No CORS entry needed on
|
||||||
|
the Go API for `app.myhoneydue.com`.
|
||||||
|
- Auth tokens live in httpOnly cookies, not localStorage. XSS can't
|
||||||
|
exfiltrate them.
|
||||||
|
- The web pod needs outbound HTTPS to `api.myhoneydue.com` — covered
|
||||||
|
in the `allow-egress-from-web` NetworkPolicy (Chapter 5).
|
||||||
|
|
||||||
|
### Env vars
|
||||||
|
|
||||||
|
Build-time (baked into the client bundle by the Dockerfile `ARG`):
|
||||||
|
- `NEXT_PUBLIC_API_URL` — only used as a fallback; baked for safety
|
||||||
|
- `NEXT_PUBLIC_POSTHOG_KEY` — PostHog project API key
|
||||||
|
- `NEXT_PUBLIC_POSTHOG_HOST` — `https://analytics.88oakapps.com`
|
||||||
|
|
||||||
|
Runtime (ConfigMap):
|
||||||
|
- `API_URL=https://api.myhoneydue.com/api` — consumed by the
|
||||||
|
server-side proxy handlers
|
||||||
|
- `PORT=3000`, `HOSTNAME=0.0.0.0`
|
||||||
|
|
||||||
|
### Deployment spec highlights
|
||||||
|
|
||||||
|
- **3 replicas**, same as api — this is a production customer surface
|
||||||
|
- `topologySpreadConstraints` across `kubernetes.io/hostname` —
|
||||||
|
evicting one node at most kills one pod
|
||||||
|
- `readOnlyRootFilesystem: true`; `emptyDir`s at `/app/.next/cache`
|
||||||
|
(Next.js build cache) and `/tmp`
|
||||||
|
- PDB `web-pdb` with `minAvailable: 2`
|
||||||
|
- runAsUser/runAsGroup `1001` (matches the `nextjs` user created in
|
||||||
|
the Dockerfile)
|
||||||
|
|
||||||
|
### Why same availability as api
|
||||||
|
|
||||||
|
The web client is now the primary user-facing surface. Users hitting
|
||||||
|
`app.myhoneydue.com/login` should never see a 502 because a single
|
||||||
|
node went down. 3 replicas × `minAvailable: 2` guarantees at least
|
||||||
|
two pods stay up through any voluntary disruption.
|
||||||
|
|
||||||
## Service 1 — api (Go REST API)
|
## Service 1 — api (Go REST API)
|
||||||
|
|
||||||
### What it does
|
### What it does
|
||||||
@@ -113,13 +181,15 @@ doesn't run as root.
|
|||||||
file writes to the image layer. Go binary doesn't need to write to `/`;
|
file writes to the image layer. Go binary doesn't need to write to `/`;
|
||||||
only `/tmp` is mutable.
|
only `/tmp` is mutable.
|
||||||
|
|
||||||
**`startupProbe.failureThreshold: 48`** (= 48 × 5s = 240s grace) — this
|
**`startupProbe.failureThreshold: 48`** (= 48 × 5s = 240s grace) —
|
||||||
was bumped up from the scaffold default of 12. Reason: on first boot,
|
historically bumped from the scaffold default of 12 to absorb in-replica
|
||||||
the Go app runs `MigrateWithLock()` which acquires a Postgres advisory
|
migration time. Now that migrations run out-of-band as a Kubernetes
|
||||||
lock and runs AutoMigrate. First replica takes ~90s; subsequent
|
Job ([Chapter 8 §Schema management](./08-database.md)), pods boot in
|
||||||
replicas wait on the lock. With 3 replicas all starting simultaneously
|
seconds and only need a few probe failures of grace, but the budget
|
||||||
and the lock serializing them, 240s is the right grace. See
|
stays at 240s because cold pods on a fresh Hetzner node still pay
|
||||||
[Chapter 19](./19-postmortem-swarm.md) for the detailed story.
|
~10s for image pull + startup. See
|
||||||
|
[Chapter 19 §13](./19-postmortem-swarm.md) for the historical
|
||||||
|
context (the in-replica advisory-lock approach this replaced).
|
||||||
|
|
||||||
**`readinessProbe.initialDelaySeconds: 5`** — after the startupProbe
|
**`readinessProbe.initialDelaySeconds: 5`** — after the startupProbe
|
||||||
passes, wait 5s before starting readiness checks. Prevents a racy
|
passes, wait 5s before starting readiness checks. Prevents a racy
|
||||||
|
|||||||
+195
-75
@@ -4,8 +4,10 @@
|
|||||||
|
|
||||||
Authoritative user data lives in a Neon-managed Postgres database in AWS
|
Authoritative user data lives in a Neon-managed Postgres database in AWS
|
||||||
us-east-1. Connections use TLS (`DB_SSLMODE=require`). Schema is managed
|
us-east-1. Connections use TLS (`DB_SSLMODE=require`). Schema is managed
|
||||||
via GORM AutoMigrate inside the api binary, coordinated across replicas
|
via [pressly/goose](https://github.com/pressly/goose) running as a
|
||||||
by a Postgres advisory lock to prevent concurrent migration attempts.
|
one-shot Kubernetes Job before every api/worker rollout. See §Schema
|
||||||
|
management below for the full shape; ch19 §13 documents the previous
|
||||||
|
in-replica AutoMigrate approach this replaced.
|
||||||
|
|
||||||
## Why Neon
|
## Why Neon
|
||||||
|
|
||||||
@@ -32,7 +34,7 @@ Neon Launch won on:
|
|||||||
|
|
||||||
| Field | Value |
|
| Field | Value |
|
||||||
|---|---|
|
|---|---|
|
||||||
| Hostname | `ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech` |
|
| Hostname | `ep-floral-truth-amttbc5a-pooler.c-5.us-east-1.aws.neon.tech` |
|
||||||
| Port | 5432 |
|
| Port | 5432 |
|
||||||
| Username | `neondb_owner` |
|
| Username | `neondb_owner` |
|
||||||
| Database | `honeyDue` (case-sensitive!) |
|
| Database | `honeyDue` (case-sensitive!) |
|
||||||
@@ -58,9 +60,19 @@ paid tiers much higher.
|
|||||||
|
|
||||||
### PgBouncer on Neon
|
### PgBouncer on Neon
|
||||||
|
|
||||||
Neon provides a built-in PgBouncer at `-pooler` subdomain. Our hostname
|
Neon provides a built-in PgBouncer at the `-pooler` subdomain. The
|
||||||
already includes `-pooler` handling in the route, so connections go
|
non-pooler endpoint (`ep-floral-truth-amttbc5a.c-5.us-east-1...`) is
|
||||||
through PgBouncer transparently.
|
the direct compute endpoint and connects straight to Postgres,
|
||||||
|
paying the full TCP+TLS+startup handshake on every cold connection.
|
||||||
|
The `-pooler` endpoint multiplexes through PgBouncer in Neon's
|
||||||
|
infrastructure.
|
||||||
|
|
||||||
|
**We use the `-pooler` endpoint** because the direct endpoint paid
|
||||||
|
~440ms per cold handshake on a transatlantic link, visible as
|
||||||
|
1500ms-tail spikes in /api/tasks/ traces. The pooler keeps backend
|
||||||
|
Postgres connections warm in Neon's data center, so the only
|
||||||
|
latency our Go pods see is one TCP+TLS to PgBouncer (already
|
||||||
|
warm via our pool) plus one query round-trip.
|
||||||
|
|
||||||
Modes PgBouncer supports:
|
Modes PgBouncer supports:
|
||||||
- **session** — one server connection held per client session (transparent)
|
- **session** — one server connection held per client session (transparent)
|
||||||
@@ -68,26 +80,59 @@ Modes PgBouncer supports:
|
|||||||
- **statement** — per-statement (most aggressive; breaks many features)
|
- **statement** — per-statement (most aggressive; breaks many features)
|
||||||
|
|
||||||
Neon's pooler runs in **transaction mode**. This is compatible with GORM
|
Neon's pooler runs in **transaction mode**. This is compatible with GORM
|
||||||
out of the box (we don't use session-level features like prepared
|
runtime queries (we don't use session-level features like LISTEN/NOTIFY
|
||||||
statements or session variables).
|
or session-scope advisory locks in the data path). The one place this
|
||||||
|
matters is migrations: goose's session-scoped advisory lock can't
|
||||||
|
survive PgBouncer transaction-mode pooling. The migrate Job
|
||||||
|
(`deploy-k3s/manifests/migrate/job.yaml`) handles this by stripping
|
||||||
|
the `-pooler` segment from `DB_HOST` before invoking goose — runtime
|
||||||
|
keeps using the pooler, only migrations bypass it.
|
||||||
|
|
||||||
### Connection pool settings
|
### Connection pool settings
|
||||||
|
|
||||||
In `prod.env`:
|
In `config.yaml` (rendered into ConfigMap → env vars):
|
||||||
|
|
||||||
```
|
```yaml
|
||||||
DB_MAX_OPEN_CONNS=25
|
database:
|
||||||
DB_MAX_IDLE_CONNS=10
|
max_open_conns: 25
|
||||||
DB_MAX_LIFETIME=600s
|
max_idle_conns: 20
|
||||||
|
max_lifetime: "1800s"
|
||||||
|
max_idle_time: "0s"
|
||||||
```
|
```
|
||||||
|
|
||||||
These are the Go `database/sql` pool settings (GORM uses `database/sql`
|
These map to Go `database/sql` pool settings:
|
||||||
underneath):
|
|
||||||
|
|
||||||
- **MaxOpenConns: 25** — at most 25 concurrent connections per replica
|
- **MaxOpenConns: 25** — at most 25 concurrent connections per replica.
|
||||||
- **MaxIdleConns: 10** — keep up to 10 warm connections ready to reuse
|
- **MaxIdleConns: 20** — keep up to 20 warm connections per replica
|
||||||
- **MaxLifetime: 600s** — recycle connections after 10 min (prevents
|
ready to reuse. Bumped from 10 because the pooler tolerates many
|
||||||
stale state in long-lived connections, good for Neon's idle timeout)
|
client connections cheaply, and the cost of a cold handshake (~440ms
|
||||||
|
transatlantic) is far higher than the cost of holding an idle
|
||||||
|
connection.
|
||||||
|
- **MaxLifetime: 1800s** — recycle connections after 30 min. Bumped
|
||||||
|
from 600s; with the pooler keeping things warm, longer lifetime
|
||||||
|
reduces churn.
|
||||||
|
- **MaxIdleTime: 0s** — never close idle connections. Lifetime drives
|
||||||
|
recycling instead.
|
||||||
|
|
||||||
|
### Pool warm-up at boot
|
||||||
|
|
||||||
|
`database.Connect()` issues 20 parallel `PingContext` calls
|
||||||
|
immediately after opening the pool. This pre-establishes
|
||||||
|
`MaxIdleConns` connections to the pooler so the first user request
|
||||||
|
doesn't pay any handshake.
|
||||||
|
|
||||||
|
The warm-up is bounded by *one* round-trip time (~440ms cold), not
|
||||||
|
one round-trip per connection — pings run concurrently. Confirmed
|
||||||
|
in pod logs at boot:
|
||||||
|
|
||||||
|
```
|
||||||
|
{"level":"info","requested":20,"warmed":20,"message":"DB pool warm-up complete"}
|
||||||
|
```
|
||||||
|
|
||||||
|
If warm-up partially fails (e.g., 18/20 succeed), the pod still
|
||||||
|
starts; the pool fills the rest under traffic. Failure to ping at all
|
||||||
|
would be caught by the synchronous `sqlDB.Ping()` immediately before,
|
||||||
|
which is fatal.
|
||||||
|
|
||||||
### Worst-case connection count
|
### Worst-case connection count
|
||||||
|
|
||||||
@@ -107,66 +152,110 @@ the default 25/10. If we hit connection errors in prod, adjust.
|
|||||||
|
|
||||||
## Schema management
|
## Schema management
|
||||||
|
|
||||||
### GORM AutoMigrate
|
### goose
|
||||||
|
|
||||||
On startup, the Go API's `cmd/api/main.go` calls
|
We use [pressly/goose](https://github.com/pressly/goose) (pinned in the
|
||||||
`database.MigrateWithLock()` which:
|
api `Dockerfile` to v3.22.1) for schema migrations. Why goose specifically:
|
||||||
|
|
||||||
1. Opens a dedicated Postgres connection
|
- Each migration file runs inside its own transaction by default —
|
||||||
2. `SELECT pg_advisory_lock(1751412071)` — acquires a session-level
|
partial-failure recovery is built in (no "dirty" state to manually
|
||||||
advisory lock on a hardcoded key
|
unstick like golang-migrate).
|
||||||
3. Calls `db.AutoMigrate(&models.*{})` for every GORM model
|
- Locking is opt-in. We *don't* opt in. Migrations run as a single
|
||||||
4. `SELECT pg_advisory_unlock(...)` via deferred function
|
Kubernetes Job — that's the singleton process. No advisory-lock vs
|
||||||
5. Close the connection
|
PgBouncer-transaction-mode foot-gun.
|
||||||
|
- Plain SQL files. No DSL, no library integration in our Go code.
|
||||||
|
|
||||||
The advisory lock serializes migrations across replicas: when 3 api
|
See `docs/deployment/19-postmortem-swarm.md` (Schema Versioning section)
|
||||||
pods start simultaneously, one acquires the lock and migrates; the
|
for the AutoMigrate-with-advisory-lock approach this replaced and why.
|
||||||
others block on the lock. Once the first finishes (≤2s for already-
|
|
||||||
migrated schema, up to 90s on first cold boot), the next acquires and
|
|
||||||
sees the schema is current (no-op migrate).
|
|
||||||
|
|
||||||
### Why an advisory lock
|
### Migration files
|
||||||
|
|
||||||
Without it, concurrent `CREATE TABLE IF NOT EXISTS ...` statements from
|
Live under `migrations/`, named `<NNNNNN>_<short_name>.sql`. Each file
|
||||||
multiple replicas would race — Postgres usually handles it, but GORM's
|
has both the up and down migration in one file, separated by goose
|
||||||
AutoMigrate also alters tables (adds columns, indexes) which can deadlock
|
markers:
|
||||||
under concurrency.
|
|
||||||
|
|
||||||
The advisory lock pattern (also used by Rails + Django + Alembic) is the
|
```sql
|
||||||
canonical solution.
|
-- +goose Up
|
||||||
|
CREATE TABLE example (id bigint PRIMARY KEY);
|
||||||
|
|
||||||
### The lock key
|
-- +goose Down
|
||||||
|
DROP TABLE example;
|
||||||
|
```
|
||||||
|
|
||||||
`1751412071` is a hardcoded integer in `internal/database/database.go`.
|
Multi-statement constructs (`CREATE FUNCTION`, `DO $$ BEGIN ... END $$`)
|
||||||
Arbitrary but unique — as long as nothing else in the Postgres instance
|
need `-- +goose StatementBegin` / `-- +goose StatementEnd` wrappers
|
||||||
uses the same advisory lock key, no conflicts.
|
because goose splits on semicolons by default.
|
||||||
|
|
||||||
### First-boot behavior
|
`migrations/000001_init.sql` is the baseline — captures every
|
||||||
|
table/index/sequence as it existed when goose was adopted, generated
|
||||||
|
via `pg_dump --schema-only --no-owner --no-privileges`. The pre-goose
|
||||||
|
hand-numbered migrations (002-022 in git history at commit
|
||||||
|
58e6997) had their effects folded into this baseline; they're gone
|
||||||
|
from the live tree but remain in git for archaeology.
|
||||||
|
|
||||||
On a **fresh database** (new Neon project), the first api pod runs
|
### Production migration flow
|
||||||
through every model's `CREATE TABLE` statement. This is ~50 tables for
|
|
||||||
honeyDue and takes ~90 seconds.
|
|
||||||
|
|
||||||
On a **warm database** (tables already exist), AutoMigrate is fast —
|
`deploy-k3s/scripts/03-deploy.sh` runs migrations as part of every
|
||||||
typically under 2 seconds. It still runs (GORM checks every model
|
deploy, **before** the api/worker rollout starts:
|
||||||
against the schema) but finds no work to do.
|
|
||||||
|
|
||||||
### Where this bit us
|
```
|
||||||
|
1. kubectl delete job honeydue-migrate (idempotent)
|
||||||
|
2. kubectl apply -f manifests/migrate/job.yaml (with current api image)
|
||||||
|
3. kubectl wait --for=condition=complete --timeout=10m job/honeydue-migrate
|
||||||
|
4. (only if Job succeeded) kubectl apply -f manifests/api/...
|
||||||
|
```
|
||||||
|
|
||||||
With 3 api pods starting simultaneously and migrations taking 90s first
|
The Job uses the api image — we install the goose CLI binary at
|
||||||
time, the lock queue for the last replica is ~180s. We needed a
|
`/usr/local/bin/goose` during the api Dockerfile build, so any pod that
|
||||||
startupProbe grace of 240s to cover this without false restart loops.
|
can run api can run goose. No separate image to build/push.
|
||||||
See Chapter 7 §startupProbe and Chapter 19 §MigrateWithLock.
|
|
||||||
|
|
||||||
### Downside: no schema versioning
|
The Job's `command` runs `goose ... up` against the **direct**
|
||||||
|
(non-pooler) Neon endpoint. Goose's session-scoped advisory lock can't
|
||||||
|
survive PgBouncer transaction-mode pooling, so the Job script strips
|
||||||
|
the `-pooler` segment from `DB_HOST` before connecting. The api/worker
|
||||||
|
runtime continues to use the pooler endpoint for everything else; only
|
||||||
|
this one Job needs the direct connection.
|
||||||
|
|
||||||
AutoMigrate can only *add* — new tables, new columns, new indexes. It
|
### Schema-version precondition
|
||||||
won't drop columns, rename them, or change types destructively. For
|
|
||||||
those we'd need raw SQL migrations (a tool like `golang-migrate` or
|
|
||||||
`dbmate`).
|
|
||||||
|
|
||||||
Today: we accept that schema changes are additive-only. When we need
|
`internal/database/database.go::RequireSchemaApplied()` runs at api and
|
||||||
destructive changes, we'd hand-write them.
|
worker startup. It queries `goose_db_version` for the highest applied
|
||||||
|
version and refuses to start if the table is missing or the latest row
|
||||||
|
is `is_applied=false`. This catches "operator forgot to run migrate" as
|
||||||
|
a clear boot error instead of a mysterious runtime "relation does not
|
||||||
|
exist" later.
|
||||||
|
|
||||||
|
### Local migration workflow
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Set the direct-endpoint DSN once
|
||||||
|
export DATABASE_URL='host=ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech \
|
||||||
|
user=neondb_owner password=$PG_PASSWORD dbname=honeyDue sslmode=require'
|
||||||
|
|
||||||
|
make migrate-status # what's pending
|
||||||
|
make migrate-up # apply
|
||||||
|
make migrate-down # roll back the latest
|
||||||
|
make migrate-new name=add_widget_col # scaffold a new SQL file
|
||||||
|
```
|
||||||
|
|
||||||
|
Each new migration file goes through code review like any other code
|
||||||
|
change. The deploy-script Job applies it on the next deploy.
|
||||||
|
|
||||||
|
### Bootstrap (one-time, when the prod DB already had a schema)
|
||||||
|
|
||||||
|
Bootstrapping a goose-managed DB whose schema already exists requires
|
||||||
|
seeding `goose_db_version` so goose treats version 1 as already-applied:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Once. After this, future migrations append normally.
|
||||||
|
goose -dir migrations postgres "$DATABASE_URL" version # creates the table
|
||||||
|
psql "$DATABASE_URL" -c \
|
||||||
|
"INSERT INTO goose_db_version (version_id, is_applied, tstamp) VALUES (1, true, NOW());"
|
||||||
|
```
|
||||||
|
|
||||||
|
This was done for honeyDue's prod Neon project at the time of goose
|
||||||
|
adoption — no need to repeat unless we set up a fresh DB from a
|
||||||
|
schema dump.
|
||||||
|
|
||||||
## What's in the database
|
## What's in the database
|
||||||
|
|
||||||
@@ -229,17 +318,45 @@ value.
|
|||||||
## Neon regions
|
## Neon regions
|
||||||
|
|
||||||
Neon's default region for new projects is `aws-us-east-1` (Virginia).
|
Neon's default region for new projects is `aws-us-east-1` (Virginia).
|
||||||
Our DB is there. Latency from Nuremberg to us-east-1 is **~90-120ms
|
Our DB is there. Latency from Nuremberg to us-east-1 is **~108ms one-way**
|
||||||
round trip**.
|
TCP-level (verified by `nc -z -w 5` from `hetzner1`), so **~220ms RTT
|
||||||
|
through Neon's pooler stack**.
|
||||||
|
|
||||||
This is the slowest hop in our data flow. Every api request that needs
|
This is the slowest hop in our data flow. Every api request that needs
|
||||||
a DB query (most of them) pays this latency at least once.
|
a DB query pays this latency at least once. Sub-millisecond Postgres
|
||||||
|
execution time (verified via `EXPLAIN ANALYZE`: 0.04-0.34 ms on every
|
||||||
|
hot path) means **wall-clock latency = network + Neon proxy overhead**.
|
||||||
|
|
||||||
**When this matters**: When we start seeing ~200ms+ response times from
|
### Optimizations layered on top to minimize round trips
|
||||||
complex endpoints, it's likely DB latency dominant. Options:
|
|
||||||
- Migrate Neon to `aws-eu-central-1` (Frankfurt) — shaves ~90ms off
|
We don't move the DB region (yet) but we cut the *number* of RTTs per
|
||||||
- Add Redis caching for hot reads (Chapter 7)
|
request via:
|
||||||
- Read replicas (Neon supports them on paid tiers)
|
|
||||||
|
1. **Auth caching** (Chapter 7 §Redis) — token + user lookups served
|
||||||
|
from Redis (1-hour TTL) and per-pod in-memory cache (5-min TTL).
|
||||||
|
On warm cache: 0 SQL round-trips for auth.
|
||||||
|
2. **JOIN consolidation** — two-step
|
||||||
|
`find residence-IDs → find tasks IN ids` collapsed into a single
|
||||||
|
query with a Postgres subquery. One RTT instead of two.
|
||||||
|
3. **Single-query auth** — token + user fetched in one INNER JOIN
|
||||||
|
instead of GORM's two-query Preload pattern.
|
||||||
|
4. **Residence-IDs Redis cache** — cached per user with 5-min TTL,
|
||||||
|
invalidated on Create/Delete/Join/Remove. Saves 1 RTT per
|
||||||
|
`/api/documents/`, `/api/contractors/`, `/api/residences/summary/`
|
||||||
|
request.
|
||||||
|
|
||||||
|
After these, a fully-warm `/api/tasks/` is **1 SQL round-trip total
|
||||||
|
(~220ms wall-clock)**. Verified via Jaeger trace — see Chapter 15.
|
||||||
|
|
||||||
|
### When this still matters
|
||||||
|
|
||||||
|
- Any cold-cache request still pays 2-3 RTTs (~500-700ms).
|
||||||
|
- Pod startup pays 1 RTT × 20 (warm-up), but that runs in parallel:
|
||||||
|
~440ms one-shot.
|
||||||
|
|
||||||
|
Long-term fix: migrate Neon to `aws-eu-central-1` (Frankfurt) — drops
|
||||||
|
RTT to ~5ms and brings warm-cache requests under 50ms. Tracked in
|
||||||
|
`docs/observability-plan.md` and Chapter 18 §migration triggers.
|
||||||
|
|
||||||
## Environment variables the app reads
|
## Environment variables the app reads
|
||||||
|
|
||||||
@@ -247,14 +364,15 @@ From ConfigMap:
|
|||||||
|
|
||||||
| Var | Purpose |
|
| Var | Purpose |
|
||||||
|---|---|
|
|---|---|
|
||||||
| `DB_HOST` | Neon pooler hostname |
|
| `DB_HOST` | Neon pooler hostname (`-pooler` suffix) |
|
||||||
| `DB_PORT` | 5432 |
|
| `DB_PORT` | 5432 |
|
||||||
| `POSTGRES_USER` | `neondb_owner` |
|
| `POSTGRES_USER` | `neondb_owner` |
|
||||||
| `POSTGRES_DB` | `honeyDue` |
|
| `POSTGRES_DB` | `honeyDue` |
|
||||||
| `DB_SSLMODE` | `require` |
|
| `DB_SSLMODE` | `require` |
|
||||||
| `DB_MAX_OPEN_CONNS` | 25 |
|
| `DB_MAX_OPEN_CONNS` | 25 |
|
||||||
| `DB_MAX_IDLE_CONNS` | 10 |
|
| `DB_MAX_IDLE_CONNS` | 20 |
|
||||||
| `DB_MAX_LIFETIME` | `600s` |
|
| `DB_MAX_LIFETIME` | `1800s` |
|
||||||
|
| `DB_MAX_IDLE_TIME` | `0s` (never close idle) |
|
||||||
|
|
||||||
From Secret (`honeydue-secrets`):
|
From Secret (`honeydue-secrets`):
|
||||||
|
|
||||||
@@ -288,11 +406,13 @@ GROUP BY usename, state, application_name;
|
|||||||
- [Neon docs][neon-docs]
|
- [Neon docs][neon-docs]
|
||||||
- [Neon pricing][neon-pricing]
|
- [Neon pricing][neon-pricing]
|
||||||
- [Postgres advisory locks][pg-locks]
|
- [Postgres advisory locks][pg-locks]
|
||||||
- [GORM AutoMigrate][gorm-automigrate]
|
- [pressly/goose][goose] — production migration tool
|
||||||
|
- [GORM AutoMigrate][gorm-automigrate] (tests only)
|
||||||
- [honeyDue task architecture][task-arch] (repo-local)
|
- [honeyDue task architecture][task-arch] (repo-local)
|
||||||
|
|
||||||
[neon-docs]: https://neon.com/docs/introduction
|
[neon-docs]: https://neon.com/docs/introduction
|
||||||
[neon-pricing]: https://neon.com/pricing
|
[neon-pricing]: https://neon.com/pricing
|
||||||
[pg-locks]: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS
|
[pg-locks]: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS
|
||||||
|
[goose]: https://github.com/pressly/goose
|
||||||
[gorm-automigrate]: https://gorm.io/docs/migration.html
|
[gorm-automigrate]: https://gorm.io/docs/migration.html
|
||||||
[task-arch]: ../../docs/TASK_LOGIC_ARCHITECTURE.md
|
[task-arch]: ../../docs/TASK_LOGIC_ARCHITECTURE.md
|
||||||
|
|||||||
+100
-33
@@ -150,18 +150,64 @@ Allowed MIME types: `image/jpeg`, `image/png`, `image/gif`, `image/webp`,
|
|||||||
|
|
||||||
## Access control
|
## Access control
|
||||||
|
|
||||||
### Upload flow
|
### Upload flow (current — direct-to-B2 with presigned POST)
|
||||||
|
|
||||||
1. Client POSTs to `/api/upload/`
|
Image and document uploads go **directly from the client to B2**. The
|
||||||
2. Go API validates the user is authenticated and authorized for the
|
api server only signs a short-lived POST policy; the bytes never
|
||||||
target resource
|
traverse our cluster. This is the WhatsApp / Slack architecture and
|
||||||
3. Go API streams the upload to B2 via minio-go's `PutObject`
|
sidesteps the api as a proxy bottleneck.
|
||||||
4. B2 returns a key
|
|
||||||
5. Go API stores the key in Postgres
|
|
||||||
6. Returns the key to the client
|
|
||||||
|
|
||||||
The B2 bucket is **private**. Clients can't GET directly; they always
|
1. Client `POST /api/uploads/presign` with `{category, content_type, content_length}`.
|
||||||
go through the Go API.
|
2. api validates auth, per-user quota (10 concurrent in-flight,
|
||||||
|
50/hour rate limit), allowed mime, and the 10 MB cap. On success it
|
||||||
|
creates a `pending_uploads` row, signs a B2 POST policy with a
|
||||||
|
`content-length-range` condition bound to the claimed length ±256
|
||||||
|
bytes, and returns `{id, upload_url, fields, key, expires_at}`.
|
||||||
|
3. Client multipart-POSTs the bytes directly to B2 using the returned
|
||||||
|
fields. **B2 enforces the size cap at the protocol level** — clients
|
||||||
|
can't bypass it by lying about Content-Length.
|
||||||
|
4. Client POSTs to the entity-creation endpoint (`/api/task-completions/`,
|
||||||
|
`/api/documents/`) with `upload_ids: [id]`. The service `HEAD`s each
|
||||||
|
B2 object, verifies size matches `expected_bytes`, marks the
|
||||||
|
`pending_uploads.claimed_at`, and writes the `task_completion_image`
|
||||||
|
/ `document_image` row referencing the upload.
|
||||||
|
|
||||||
|
The signed URL is valid for 15 minutes; presigns are not reusable.
|
||||||
|
|
||||||
|
The B2 bucket stays **private** — only the api ever holds the key
|
||||||
|
material. Clients can't list or GET directly without a presign.
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────┐ 1) presign ┌────────┐
|
||||||
|
│ client │ ──────────────────► │ api │
|
||||||
|
│ │ ◄────────────────── │ │ POST policy + key
|
||||||
|
│ │ └────────┘
|
||||||
|
│ │ row in
|
||||||
|
│ │ pending_uploads
|
||||||
|
│ │ (claimed_at NULL)
|
||||||
|
│ │ 2) POST bytes ┌────────┐
|
||||||
|
│ │ ──────────────────► │ B2 │ enforces policy
|
||||||
|
│ │ ◄────────────────── │ │
|
||||||
|
│ │ └────────┘
|
||||||
|
│ │ 3) attach ┌────────┐
|
||||||
|
│ │ ──────────────────► │ api │ HEAD B2 object,
|
||||||
|
│ │ upload_ids: [id] │ │ mark claimed_at,
|
||||||
|
│ │ └────────┘ insert image row
|
||||||
|
└──────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
Server-side enforcement summary:
|
||||||
|
|
||||||
|
| Check | Where | Reject if |
|
||||||
|
|---|---|---|
|
||||||
|
| Auth | api middleware | unauthenticated |
|
||||||
|
| Mime allowlist | `upload_service.go:allowedContentTypes` | not in list for category |
|
||||||
|
| Size cap (10 MB) | api before signing + B2 policy | content_length > 10 MiB |
|
||||||
|
| Concurrency cap (10) | `CountUnclaimedActiveForUser` | already 10 unclaimed in-flight |
|
||||||
|
| Rate limit (50/hr) | Redis sliding window `upload:presign:<uid>:<bucket>` | 51st presign in the same hour |
|
||||||
|
| Size at upload time | B2 (signed policy) | bytes outside content-length-range |
|
||||||
|
| Ownership at attach | `FindUnclaimedForUser` | upload_id belongs to a different user |
|
||||||
|
| Bytes match claim | `s3.Stat()` + bytes comparison | actual size differs from expected ±256 |
|
||||||
|
|
||||||
### Download flow (current)
|
### Download flow (current)
|
||||||
|
|
||||||
@@ -170,34 +216,55 @@ go through the Go API.
|
|||||||
3. Go API fetches from B2 and streams back to the client
|
3. Go API fetches from B2 and streams back to the client
|
||||||
|
|
||||||
This proxies every download through the api. For high-traffic media
|
This proxies every download through the api. For high-traffic media
|
||||||
that's inefficient (api becomes an egress bottleneck).
|
that's inefficient (api becomes an egress bottleneck) — could be
|
||||||
|
replaced with presigned GET URLs on the same bucket. Not yet shipped;
|
||||||
### Future: signed URLs
|
download volume is low enough that the proxy is fine for now.
|
||||||
|
|
||||||
We could generate time-limited signed URLs for B2 objects:
|
|
||||||
|
|
||||||
```go
|
|
||||||
url, err := s3Client.PresignedGetObject(ctx, bucket, key, 1*time.Hour, nil)
|
|
||||||
```
|
|
||||||
|
|
||||||
Returns a URL the client can GET directly from B2, scoped to a specific
|
|
||||||
object, valid for 1h. Saves api bandwidth and latency.
|
|
||||||
|
|
||||||
Not yet implemented. TODO (Chapter 20).
|
|
||||||
|
|
||||||
## Lifecycle and retention
|
## Lifecycle and retention
|
||||||
|
|
||||||
We have **no lifecycle rules** set on the bucket. Objects live forever
|
### Orphan cleanup (`pending_uploads`)
|
||||||
unless the app deletes them.
|
|
||||||
|
|
||||||
When a user deletes their account, the app should delete their B2
|
Every presign creates a row in `pending_uploads` with `expires_at =
|
||||||
objects. This is currently not automated — a compliance gap for any
|
now + 15 min`. If the client never finishes the upload, or finishes
|
||||||
"right to be forgotten" request.
|
but never calls the attach endpoint, the row stays unclaimed. An
|
||||||
|
hourly cron in the worker reaps them:
|
||||||
|
|
||||||
**TODO** (Chapter 20): Either:
|
- **`maintenance:upload_cleanup`** — cron `30 * * * *`. Selects
|
||||||
- Implement explicit cleanup in the user deletion handler, or
|
unclaimed rows past `expires_at`, deletes the corresponding B2
|
||||||
- Add B2 lifecycle rule tied to object metadata (tag objects with
|
object, deletes the row. Up to 500 per tick; the next tick picks up
|
||||||
user ID; rule deletes tagged objects when user is soft-deleted)
|
any overflow. Worker logs include `reaped` count.
|
||||||
|
|
||||||
|
The worker constructs a `StorageService` at startup; if storage init
|
||||||
|
fails (e.g. `B2_KEY_ID` / `B2_APP_KEY` not wired into the worker
|
||||||
|
deployment), the cleanup handler logs a warning and no-ops. See
|
||||||
|
`deploy-k3s/manifests/worker/deployment.yaml` — both B2 secrets are
|
||||||
|
required envs on this pod.
|
||||||
|
|
||||||
|
### Bucket lifecycle (backstop)
|
||||||
|
|
||||||
|
A B2 lifecycle rule on the `uploads/` prefix is the safety net if the
|
||||||
|
worker is offline for an extended period:
|
||||||
|
|
||||||
|
- Hide objects 7 days after upload.
|
||||||
|
- Delete 1 day after hidden.
|
||||||
|
|
||||||
|
This is configured manually via the Backblaze console (B2's S3
|
||||||
|
lifecycle API isn't fully implemented). See
|
||||||
|
`deploy-k3s/manifests/b2-lifecycle.md` for the exact rule and
|
||||||
|
`b2 bucket get-info` verification command.
|
||||||
|
|
||||||
|
### User-deletion cascade
|
||||||
|
|
||||||
|
When a user deletes their account, the app deletes their `task_*` /
|
||||||
|
`document` rows. The associated B2 objects survive — same compliance
|
||||||
|
gap as before, not yet automated. Two approaches:
|
||||||
|
|
||||||
|
- Walk the image rows on user delete and `RemoveObject` each (simple,
|
||||||
|
synchronous, slow for users with many uploads).
|
||||||
|
- Tag objects with a `user_id` metadata header at upload time, then
|
||||||
|
use a B2 lifecycle rule scoped to a deleted-users prefix.
|
||||||
|
|
||||||
|
Option 1 is the next item in the upload roadmap.
|
||||||
|
|
||||||
## Backup of B2
|
## Backup of B2
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
# 10 — Secrets & Config
|
# 10 — Secrets & Config
|
||||||
|
|
||||||
|
> **Updated 2026-05-15 (security remediation):** `honeydue-secrets` now
|
||||||
|
> carries `REDIS_PASSWORD`; an `admin-basic-auth` Secret backs the admin
|
||||||
|
> ingress; rotation is documented in `docs/runbooks/secret-rotation.md`;
|
||||||
|
> and the Go config can read file-mounted secrets (`HONEYDUE_SECRETS_DIR`).
|
||||||
|
> `deploy-k3s/SECURITY.md` is the authoritative current-state record.
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
Non-sensitive config (hostnames, ports, feature flags, etc.) lives in
|
Non-sensitive config (hostnames, ports, feature flags, etc.) lives in
|
||||||
@@ -55,7 +61,7 @@ APNS_AUTH_KEY_ID=DISABLED01
|
|||||||
APNS_AUTH_KEY_PATH=/secrets/apns/apns_auth_key.p8
|
APNS_AUTH_KEY_PATH=/secrets/apns/apns_auth_key.p8
|
||||||
APNS_PRODUCTION=false
|
APNS_PRODUCTION=false
|
||||||
APNS_TEAM_ID=DISABLED01
|
APNS_TEAM_ID=DISABLED01
|
||||||
APNS_TOPIC=com.tt.honeyDue
|
APNS_TOPIC=com.myhoneydue.honeyDue
|
||||||
APNS_USE_SANDBOX=false
|
APNS_USE_SANDBOX=false
|
||||||
BASE_URL=https://myhoneydue.com
|
BASE_URL=https://myhoneydue.com
|
||||||
B2_BUCKET_NAME=honeyDueProd
|
B2_BUCKET_NAME=honeyDueProd
|
||||||
|
|||||||
@@ -272,7 +272,7 @@ sequenceDiagram
|
|||||||
participant NewPod as api pod v2 (starting)
|
participant NewPod as api pod v2 (starting)
|
||||||
|
|
||||||
Note over NewPod: kubelet starts new pod
|
Note over NewPod: kubelet starts new pod
|
||||||
Note over NewPod: pod connects to Postgres<br/>MigrateWithLock runs (no-op)<br/>HTTP server starts<br/>readinessProbe passes
|
Note over NewPod: pod connects to Postgres<br/>RequireSchemaApplied checks goose_db_version<br/>HTTP server starts<br/>readinessProbe passes
|
||||||
Note over NewPod: kube-proxy updates endpoints<br/>NewPod added to Service pool
|
Note over NewPod: kube-proxy updates endpoints<br/>NewPod added to Service pool
|
||||||
CF->>Traefik: request 1
|
CF->>Traefik: request 1
|
||||||
Traefik->>OldPod: routed (old pod still in pool)
|
Traefik->>OldPod: routed (old pod still in pool)
|
||||||
|
|||||||
@@ -5,8 +5,9 @@
|
|||||||
Cloudflare sits in front of every public request. It provides DNS
|
Cloudflare sits in front of every public request. It provides DNS
|
||||||
(authoritative nameservers for `myhoneydue.com`), TLS termination at
|
(authoritative nameservers for `myhoneydue.com`), TLS termination at
|
||||||
the edge, DDoS mitigation, caching, and the round-robin fan-out across
|
the edge, DDoS mitigation, caching, and the round-robin fan-out across
|
||||||
our three node IPs. We use the Free plan. TLS mode is "Flexible"
|
our three node IPs. We use the Free plan. TLS mode is **Full (strict)**
|
||||||
(HTTP between CF and origin). This chapter documents every Cloudflare
|
— CF connects to origin over HTTPS and verifies the origin's cert
|
||||||
|
against CF's own Origin CA. This chapter documents every Cloudflare
|
||||||
setting that matters.
|
setting that matters.
|
||||||
|
|
||||||
## DNS
|
## DNS
|
||||||
@@ -72,53 +73,49 @@ when you want sub-second failover.
|
|||||||
|
|
||||||
## TLS
|
## TLS
|
||||||
|
|
||||||
### Mode: Flexible
|
### Mode: Full (strict)
|
||||||
|
|
||||||
CF Dashboard → SSL/TLS → Overview → **Flexible**.
|
CF Dashboard → SSL/TLS → Overview → **Full (strict)**.
|
||||||
|
|
||||||
**What this means:**
|
**What this means:**
|
||||||
- User ↔ Cloudflare: **TLS** (HTTPS)
|
- User ↔ Cloudflare: **TLS** (HTTPS) — CF serves its own Let's Encrypt cert
|
||||||
- Cloudflare ↔ Origin: **plaintext HTTP** (port 80)
|
- Cloudflare ↔ Origin: **TLS** (HTTPS :443) — origin serves our CF Origin CA cert; CF verifies it chains to CF's Origin CA root
|
||||||
|
|
||||||
**Why we chose it:**
|
**How it's wired:**
|
||||||
- No origin cert required on the Hetzner nodes
|
- k8s secret `cloudflare-origin-cert` (type `kubernetes.io/tls`) holds
|
||||||
- Zero Traefik cert-management complexity
|
`tls.crt` + `tls.key`. The cert is valid for `*.myhoneydue.com` +
|
||||||
- Fine for a site where CF terminates all user-facing TLS
|
`myhoneydue.com`, 15-year validity, issued by
|
||||||
|
`CloudFlare Origin CA SSL Certificate Authority`.
|
||||||
|
- All three `Ingress` resources in `deploy-k3s/manifests/ingress/ingress-simple.yaml`
|
||||||
|
reference the secret via `spec.tls[].secretName`.
|
||||||
|
- Traefik terminates TLS on :443 using the cert. Backend pods still
|
||||||
|
speak plain HTTP over the cluster network (Traefik → pod is an
|
||||||
|
intra-cluster hop, encrypted at the Flannel overlay layer).
|
||||||
|
|
||||||
**Downsides:**
|
**Why we chose Full (strict) over Flexible:**
|
||||||
- An attacker with network access between CF and Hetzner could read
|
- CF → origin traffic was plaintext on Flexible. Between Cloudflare's
|
||||||
traffic. Realistically: nobody between CF's POPs and Hetzner's
|
POPs and Hetzner Nuremberg is a lot of internet. Full (strict)
|
||||||
Nuremberg DC, but it's theoretically plaintext on the wire.
|
closes that gap.
|
||||||
- MitM risk if DNS gets hijacked and traffic is routed through an
|
- Origin cert is a CF-internal-only CA, so it's useless to anyone who
|
||||||
unintended origin.
|
isn't CF. Non-CF clients that somehow bypass the UFW CF-IP allowlist
|
||||||
|
can't impersonate the origin because their cert wouldn't chain to
|
||||||
|
CF's Origin CA root.
|
||||||
|
|
||||||
### Future: Full (strict)
|
**Maintenance:** the Origin CA cert is valid for 15 years (expires
|
||||||
|
Apr 2041). No action needed until then. If rotation is ever required,
|
||||||
|
regenerate in CF dashboard → SSL/TLS → Origin Server, re-run the
|
||||||
|
`kubectl create secret tls cloudflare-origin-cert --dry-run=client -o yaml | kubectl apply -f -`
|
||||||
|
command, Traefik picks it up on next secret reload (no pod restart).
|
||||||
|
|
||||||
The next step up is **Full (strict)**: CF verifies origin's TLS cert
|
### Regenerating the cert (for the record)
|
||||||
and connects over HTTPS. Cloudflare provides free **Origin CA
|
|
||||||
certificates** for this: they're issued by a CF-internal CA that only
|
|
||||||
CF's own edge accepts. An attacker without a CF-signed cert can't
|
|
||||||
impersonate our origin.
|
|
||||||
|
|
||||||
Path to enable:
|
|
||||||
1. Generate Origin CA cert in CF dashboard → SSL/TLS → Origin Server
|
|
||||||
2. Download as PEM
|
|
||||||
3. Create k8s Secret `cloudflare-origin-cert`:
|
|
||||||
```bash
|
```bash
|
||||||
kubectl create secret tls cloudflare-origin-cert -n honeydue \
|
# After downloading cf-origin-cert.pem + cf-origin-key.pem from CF dashboard:
|
||||||
--cert=origin.crt --key=origin.key
|
kubectl -n honeydue create secret tls cloudflare-origin-cert \
|
||||||
|
--cert=cf-origin-cert.pem \
|
||||||
|
--key=cf-origin-key.pem \
|
||||||
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
```
|
```
|
||||||
4. Add `tls:` block to our Ingress:
|
|
||||||
```yaml
|
|
||||||
spec:
|
|
||||||
tls:
|
|
||||||
- hosts: [api.myhoneydue.com]
|
|
||||||
secretName: cloudflare-origin-cert
|
|
||||||
```
|
|
||||||
5. Switch CF SSL mode to Full (strict)
|
|
||||||
|
|
||||||
Trad-off: the `cloudflare-origin-cert` expires (default 15 years), so
|
|
||||||
low maintenance. **TODO** (Chapter 20).
|
|
||||||
|
|
||||||
### Edge certificate
|
### Edge certificate
|
||||||
|
|
||||||
|
|||||||
@@ -8,23 +8,62 @@ No downtime if the change is backward-compatible. Rollback is
|
|||||||
`kubectl rollout undo`. This chapter walks through the full process,
|
`kubectl rollout undo`. This chapter walks through the full process,
|
||||||
plus alternate paths (config-only changes, manifest changes, hotfixes).
|
plus alternate paths (config-only changes, manifest changes, hotfixes).
|
||||||
|
|
||||||
## TL;DR for a code change
|
## TL;DR using the unified deploy script
|
||||||
|
|
||||||
|
The recommended path. `deploy-k3s/scripts/03-deploy.sh` builds all four
|
||||||
|
images (api, worker, admin, web), pushes to Gitea, regenerates the
|
||||||
|
ConfigMap from `config.yaml`, applies every manifest under
|
||||||
|
`deploy-k3s/manifests/` (including the observability vmagent), and
|
||||||
|
waits for all rollouts.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go
|
||||||
|
git add . && git commit -m "..." && git push gitea master
|
||||||
|
|
||||||
|
export KUBECONFIG=~/.kube/honeydue.yaml
|
||||||
|
bash deploy-k3s/scripts/03-deploy.sh # full build + push + rollout
|
||||||
|
# or, to redeploy without rebuilding:
|
||||||
|
bash deploy-k3s/scripts/03-deploy.sh --skip-build
|
||||||
|
# or, to pin a specific tag:
|
||||||
|
bash deploy-k3s/scripts/03-deploy.sh --tag d3708e6
|
||||||
|
```
|
||||||
|
|
||||||
|
What the script does, in order:
|
||||||
|
|
||||||
|
1. Read registry creds from `deploy-k3s/config.yaml`.
|
||||||
|
2. `docker login gitea.treytartt.com`.
|
||||||
|
3. Build all four images with `--platform linux/amd64` (so arm64 Macs
|
||||||
|
don't push images that crash on Hetzner amd64 nodes with
|
||||||
|
"exec format error").
|
||||||
|
4. Push to the gitea registry, plus tag and push `:latest`.
|
||||||
|
5. Generate the env file from `config.yaml` and apply as ConfigMap
|
||||||
|
`honeydue-config` (uses dry-run + apply for diff-free idempotence).
|
||||||
|
6. Apply `manifests/namespace.yaml`, `redis/`, `ingress/`,
|
||||||
|
`api/{deployment,service,hpa}`, `worker/`, `admin/`, `web/`.
|
||||||
|
7. Apply `manifests/observability/vmagent.yaml`, substituting
|
||||||
|
`TOKEN_PLACEHOLDER` with `OBS_INGEST_TOKEN` from `deploy/prod.env`
|
||||||
|
(gitignored). Skipped with a warning if the token isn't present.
|
||||||
|
8. `kubectl rollout status` for every Deployment, including vmagent.
|
||||||
|
|
||||||
|
~7–10 minutes for a full rebuild. ~1–2 minutes with `--skip-build`.
|
||||||
|
|
||||||
|
## TL;DR for a single-service code change (manual)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 1. Commit + get SHA
|
# 1. Commit + get SHA
|
||||||
cd /Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go
|
cd /Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go
|
||||||
git add . && git commit -m "..." && SHA=$(git rev-parse --short HEAD)
|
git add . && git commit -m "..." && SHA=$(git rev-parse --short HEAD)
|
||||||
|
|
||||||
# 2. Login to Gitea registry
|
# 2. Login to Gitea registry (creds in config.yaml)
|
||||||
set -a; source deploy/registry.env; set +a
|
docker login gitea.treytartt.com -u admin
|
||||||
printf '%s' "$REGISTRY_TOKEN" | docker login "$REGISTRY" -u "$REGISTRY_USERNAME" --password-stdin
|
|
||||||
|
|
||||||
# 3. Build + push amd64 image
|
# 3. Build + push amd64 image
|
||||||
docker buildx build --platform linux/amd64 --target api \
|
docker build --platform linux/amd64 --target api \
|
||||||
-t "gitea.treytartt.com/admin/honeydue-api:${SHA}" --push .
|
-t "gitea.treytartt.com/admin/honeydue-api:${SHA}" .
|
||||||
|
docker push "gitea.treytartt.com/admin/honeydue-api:${SHA}"
|
||||||
|
|
||||||
# 4. Roll it in
|
# 4. Roll it in
|
||||||
export KUBECONFIG=~/.kube/honeydue-k3s.yaml
|
export KUBECONFIG=~/.kube/honeydue.yaml
|
||||||
kubectl set image deployment/api -n honeydue \
|
kubectl set image deployment/api -n honeydue \
|
||||||
api="gitea.treytartt.com/admin/honeydue-api:${SHA}"
|
api="gitea.treytartt.com/admin/honeydue-api:${SHA}"
|
||||||
|
|
||||||
@@ -32,11 +71,18 @@ kubectl set image deployment/api -n honeydue \
|
|||||||
kubectl rollout status -n honeydue deployment/api
|
kubectl rollout status -n honeydue deployment/api
|
||||||
|
|
||||||
# 6. Log out
|
# 6. Log out
|
||||||
docker logout "$REGISTRY"
|
docker logout gitea.treytartt.com
|
||||||
```
|
```
|
||||||
|
|
||||||
~3–5 minutes end to end for api.
|
~3–5 minutes end to end for api.
|
||||||
|
|
||||||
|
> **Gotcha:** Deployments default to `imagePullPolicy: IfNotPresent`,
|
||||||
|
> which means kubelet won't re-fetch an image with a tag it already
|
||||||
|
> has cached locally — even if the registry now has different bytes
|
||||||
|
> at that tag. Always change tags (use the SHA), or temporarily flip
|
||||||
|
> `imagePullPolicy: Always` and `kubectl rollout restart` if you need
|
||||||
|
> to overwrite a tag.
|
||||||
|
|
||||||
## The build
|
## The build
|
||||||
|
|
||||||
### Step 1 — Prepare
|
### Step 1 — Prepare
|
||||||
@@ -201,6 +247,38 @@ kubectl patch secret honeydue-secrets -n honeydue \
|
|||||||
kubectl rollout restart -n honeydue deployment/api deployment/worker
|
kubectl rollout restart -n honeydue deployment/api deployment/worker
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## One-time B2 bucket lifecycle (manual)
|
||||||
|
|
||||||
|
The `pending_uploads` cleanup cron (`30 * * * *` on the worker) handles
|
||||||
|
the common case of reaping orphaned uploads. The B2 bucket lifecycle
|
||||||
|
rule on the `uploads/` prefix is the **backstop** if the worker is
|
||||||
|
offline for >24 hours. It's configured once via the Backblaze web
|
||||||
|
console — B2's S3 lifecycle API isn't fully implemented, so this can't
|
||||||
|
be in the deploy script.
|
||||||
|
|
||||||
|
One-time setup:
|
||||||
|
|
||||||
|
1. Open https://secure.backblaze.com/b2_buckets.htm → bucket
|
||||||
|
`honeyDueProd` → **Lifecycle Settings** → **Custom**
|
||||||
|
2. Add rule:
|
||||||
|
- File name prefix: `uploads/`
|
||||||
|
- Hide files older than: **7 days**
|
||||||
|
- Delete hidden files older than: **1 day**
|
||||||
|
|
||||||
|
Total maximum lifetime of an orphaned object after the rule fires: 8
|
||||||
|
days. The worker normally reaps within an hour, so the rule should
|
||||||
|
almost never trigger.
|
||||||
|
|
||||||
|
Verify:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Requires the b2 CLI: brew install b2-tools
|
||||||
|
b2 bucket get-info honeyDueProd | jq '.lifecycleRules'
|
||||||
|
```
|
||||||
|
|
||||||
|
See `deploy-k3s/manifests/b2-lifecycle.md` for the canonical rule
|
||||||
|
definition and a curl-based fallback if the b2 CLI isn't available.
|
||||||
|
|
||||||
## Manifest changes
|
## Manifest changes
|
||||||
|
|
||||||
When you add/modify a deployment YAML:
|
When you add/modify a deployment YAML:
|
||||||
@@ -271,10 +349,47 @@ Timeline (approximate, warm state):
|
|||||||
- t=60s: another old pod terminates
|
- t=60s: another old pod terminates
|
||||||
- ...continues until all on new RS
|
- ...continues until all on new RS
|
||||||
|
|
||||||
For cold-boot (e.g., first deploy on a rebuilt cluster), the
|
Migrations run as a separate Kubernetes Job that completes before any
|
||||||
MigrateWithLock advisory lock extends this to several minutes. But the
|
api/worker pod is rolled. So the rollout above never includes migration
|
||||||
rollout is serialized — only one pod starts per iteration, so the lock
|
work — pods that boot are guaranteed to find the schema already at the
|
||||||
queue is small.
|
expected version. See §"Migrations are gated, not interleaved" below.
|
||||||
|
|
||||||
|
## Migrations are gated, not interleaved
|
||||||
|
|
||||||
|
`03-deploy.sh` runs `goose up` as a one-shot Job before applying any
|
||||||
|
api/worker manifests:
|
||||||
|
|
||||||
|
```
|
||||||
|
1. kubectl delete job honeydue-migrate (idempotent, removes prior run)
|
||||||
|
2. kubectl apply -f manifests/migrate/job.yaml (with current api image)
|
||||||
|
3. kubectl wait --for=condition=complete --timeout=10m job/honeydue-migrate
|
||||||
|
4. (only if Job succeeded) kubectl apply -f manifests/api/...
|
||||||
|
```
|
||||||
|
|
||||||
|
The Job uses the api image — `/usr/local/bin/goose` is baked in at
|
||||||
|
Dockerfile build time. The Job script strips the `-pooler` segment
|
||||||
|
from `DB_HOST` before connecting (goose's session-scoped advisory
|
||||||
|
lock can't survive PgBouncer transaction-mode), runs `goose up`, exits.
|
||||||
|
|
||||||
|
If the Job fails, the script aborts before any new app pod sees a
|
||||||
|
stale schema. To debug:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n honeydue logs job/honeydue-migrate --tail=200
|
||||||
|
kubectl -n honeydue describe job honeydue-migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
After investigating, fix the migration file and re-run `03-deploy.sh`.
|
||||||
|
The Job is idempotent — successful migrations stay applied, only the
|
||||||
|
new/failed file gets retried.
|
||||||
|
|
||||||
|
api/worker pods run a `RequireSchemaApplied` check at startup that
|
||||||
|
queries `goose_db_version` and refuses to boot if the table is missing
|
||||||
|
or the latest row is `is_applied=false`. This is the fail-fast for
|
||||||
|
"someone bypassed the deploy script and the schema isn't current."
|
||||||
|
|
||||||
|
For full schema management background, see
|
||||||
|
[Chapter 8 §Schema management](./08-database.md).
|
||||||
|
|
||||||
## Hotfix workflow
|
## Hotfix workflow
|
||||||
|
|
||||||
@@ -314,14 +429,10 @@ Contrast: `deploy/scripts/deploy_prod.sh` (Swarm-era) did:
|
|||||||
9. Healthcheck the final URL; auto-rollback on failure
|
9. Healthcheck the final URL; auto-rollback on failure
|
||||||
10. Log out of registries
|
10. Log out of registries
|
||||||
|
|
||||||
Our current k3s deploy is more manual but simpler. We'd write a similar
|
The current k3s replacement, `deploy-k3s/scripts/03-deploy.sh`, covers
|
||||||
script for k3s if deploys become frequent:
|
the same ground in fewer steps because Kubernetes does the
|
||||||
|
versioning/rollout/health bookkeeping natively. See the TL;DR section
|
||||||
```bash
|
at the top of this chapter.
|
||||||
# deploy-k3s/scripts/04-deploy.sh (not yet updated for Gitea)
|
|
||||||
```
|
|
||||||
|
|
||||||
See the scaffold in `deploy-k3s/scripts/`.
|
|
||||||
|
|
||||||
## Common deploy failures
|
## Common deploy failures
|
||||||
|
|
||||||
|
|||||||
+302
-164
@@ -2,15 +2,119 @@
|
|||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
We have minimal observability today: `kubectl logs`, `kubectl top`,
|
Production has live metrics and tracing infrastructure as of 2026-04-25.
|
||||||
Cloudflare Analytics, and the Neon dashboard. No Prometheus, no Grafana,
|
A self-hosted **VictoriaMetrics + Jaeger + Grafana** stack runs on
|
||||||
no centralized log aggregator, no APM. This is adequate for the
|
`88oakappsUpdate` (Linode VPS, also home to the self-hosted PostHog
|
||||||
current traffic volume (low) but is a known gap. This chapter documents
|
deployment). A `vmagent` sidecar in the honeyDue k3s namespace scrapes
|
||||||
what we *have* and what we'd add as traffic grows.
|
the api Pods' `/metrics` endpoint every 15 seconds and remote-writes to
|
||||||
|
`https://obs.88oakapps.com/api/v1/write`. Grafana is at
|
||||||
|
`https://grafana.88oakapps.com` with a pre-provisioned RED dashboard.
|
||||||
|
|
||||||
|
What we still don't have: log aggregation (Dozzle and `kubectl logs`
|
||||||
|
fill the niche for now), alerting (no PagerDuty/Slack on errors), and
|
||||||
|
full distributed tracing (OTel SDK is wired in app code but app-side
|
||||||
|
instrumentation beyond HTTP routes hasn't shipped yet).
|
||||||
|
|
||||||
|
The whole observability stack costs **$0** incremental and uses ~700 MB
|
||||||
|
RAM on `88oakappsUpdate` (5% of its free RAM). It runs as a separate
|
||||||
|
docker-compose project from PostHog so neither product's lifecycle
|
||||||
|
touches the other.
|
||||||
|
|
||||||
## What we have
|
## What we have
|
||||||
|
|
||||||
### 1. `kubectl logs`
|
### 1. Metrics — VictoriaMetrics + vmagent
|
||||||
|
|
||||||
|
```
|
||||||
|
honeyDue k3s (Hetzner) 88oakappsUpdate (Linode)
|
||||||
|
┌───────────────────────────┐ ┌──────────────────────────┐
|
||||||
|
│ api Pods (3) :8000/metrics│ │ /opt/honeydue-obs/ │
|
||||||
|
│ prometheus/client_golang│ │ ┌──────────────────┐ │
|
||||||
|
│ │ │ │ VictoriaMetrics │ │
|
||||||
|
│ vmagent ──── scrape 15s │ │ │ 30d retention │ │
|
||||||
|
│ remote_write ─────┼────────────┼─→ /api/v1/write │ │
|
||||||
|
│ (HTTPS, bearer) │ │ │ (mem 256 MB) │ │
|
||||||
|
└───────────────────────────┘ │ └──────────────────┘ │
|
||||||
|
└──────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
The Go API exposes `/metrics` in Prometheus exposition format. Histograms
|
||||||
|
are defined in `internal/prom/metrics.go` and registered globally:
|
||||||
|
|
||||||
|
| Metric | Labels | Source |
|
||||||
|
|---|---|---|
|
||||||
|
| `http_request_duration_seconds` | `route, method, status` | Echo middleware around every handler |
|
||||||
|
| `gorm_query_duration_seconds` | `table, operation` | GORM before/after callbacks (no ctx threading needed) |
|
||||||
|
| `b2_upload_duration_seconds` | `bucket, result` | Wrapped `s.backend.Write` in `internal/services/storage_service.go` |
|
||||||
|
| `b2_upload_bytes_total` | `bucket, result` | Counter alongside the duration histogram |
|
||||||
|
| `apns_send_duration_seconds` | `result` (`ok`/`bad_token`/`error`) | Wrapped APNs `PushWithContext` in `internal/push/apns.go` |
|
||||||
|
| `fcm_send_duration_seconds` | `result` | Wrapped FCM HTTP v1 send in `internal/push/fcm.go` |
|
||||||
|
| `asynq_job_duration_seconds` | `task_type, result` | Histograms registered; middleware not yet attached (Step 3) |
|
||||||
|
| `go_*`, `process_*` | (standard) | `prometheus/client_golang/prometheus/collectors` defaults |
|
||||||
|
|
||||||
|
The previous custom monitoring at `/metrics` was renamed to
|
||||||
|
`/metrics/legacy` so the canonical `/metrics` emits proper histograms
|
||||||
|
suitable for `histogram_quantile()` rollups. The legacy endpoint stays
|
||||||
|
because the GoAdmin dashboard reads it.
|
||||||
|
|
||||||
|
#### vmagent in k3s
|
||||||
|
|
||||||
|
Lives at `deploy-k3s/manifests/observability/vmagent.yaml`. One replica,
|
||||||
|
`mem_limit: 256Mi`, scrapes by Kubernetes pod-discovery filtered to
|
||||||
|
`app.kubernetes.io/name=api` and remote-writes to
|
||||||
|
`https://obs.88oakapps.com/api/v1/write` with a bearer token from
|
||||||
|
`OBS_INGEST_TOKEN` in `deploy/prod.env` (substituted into a Secret at
|
||||||
|
deploy time).
|
||||||
|
|
||||||
|
The agent buffers locally to `/tmp/vmagent` (emptyDir, 512 MB cap), so
|
||||||
|
brief obs outages don't drop samples. Persistent queue replays on
|
||||||
|
reconnect.
|
||||||
|
|
||||||
|
NetworkPolicies in the honeydue namespace allow egress from vmagent to:
|
||||||
|
- DNS (kube-dns / coredns)
|
||||||
|
- Kubernetes API (`10.43.0.0/16:443`) for pod discovery
|
||||||
|
- api Pods on `10.42.0.0/16:8000`
|
||||||
|
- The public obs endpoint over `0.0.0.0/0:443`
|
||||||
|
|
||||||
|
These are scoped tight — vmagent can't reach Postgres, Redis, B2, or
|
||||||
|
any other external service.
|
||||||
|
|
||||||
|
### 2. Tracing — Jaeger all-in-one
|
||||||
|
|
||||||
|
Jaeger 1.62 with badger storage runs alongside VictoriaMetrics. The
|
||||||
|
collector accepts:
|
||||||
|
- OTLP/HTTP at `https://obs.88oakapps.com/v1/traces` (bearer-token gated)
|
||||||
|
- OTLP/gRPC at `:4317` (localhost-only)
|
||||||
|
- Native Jaeger protocols at `:14268` etc. (localhost-only)
|
||||||
|
|
||||||
|
Retention: ~7 days at current scale before badger rotates. UI at
|
||||||
|
`https://grafana.88oakapps.com` via the Jaeger datasource.
|
||||||
|
|
||||||
|
**Status of app-side instrumentation**: the histograms are populating
|
||||||
|
metrics. The OTel exporter wiring in `cmd/api/main.go` is **not yet
|
||||||
|
shipped**. When it does ship, every `POST /api/auth/login/` will produce
|
||||||
|
a flame-graph trace with HTTP → handler → SQL → B2 → APNs spans.
|
||||||
|
Tracking issue: gitea#3.
|
||||||
|
|
||||||
|
### 3. Dashboards — Grafana
|
||||||
|
|
||||||
|
`https://grafana.88oakapps.com` (Cloudflare-fronted, basic auth via
|
||||||
|
Grafana itself, admin credentials in `deploy/prod.env`).
|
||||||
|
|
||||||
|
Datasources auto-provisioned at container startup from
|
||||||
|
`/opt/honeydue-obs/data/grafana-provisioning/datasources/datasources.yaml`:
|
||||||
|
- VictoriaMetrics (Prometheus type, `http://victoriametrics:8428` in-network)
|
||||||
|
- Jaeger (`http://jaeger:16686` in-network)
|
||||||
|
|
||||||
|
Pre-provisioned dashboard: `honeyDue API — RED` at
|
||||||
|
`/d/honeydue-red`. Top row uses the legacy custom metrics
|
||||||
|
(`http_endpoint_requests_total`, `http_requests_total`) which started
|
||||||
|
flowing the moment vmagent attached. Lower rows use the new histograms
|
||||||
|
(`http_request_duration_seconds_bucket` p50/p95/p99 by route, GORM p95
|
||||||
|
by table, B2 upload p95, APNs/FCM send p95, Go memory + goroutines).
|
||||||
|
Lower rows populated immediately after the api rebuild that shipped
|
||||||
|
`internal/prom`.
|
||||||
|
|
||||||
|
### 4. `kubectl logs`
|
||||||
|
|
||||||
Every container's stdout/stderr is captured by containerd and readable
|
Every container's stdout/stderr is captured by containerd and readable
|
||||||
via kubectl:
|
via kubectl:
|
||||||
@@ -33,9 +137,10 @@ kubectl get events -n honeydue --sort-by=.lastTimestamp
|
|||||||
Only the last ~20 MB of logs is retained per container, on-disk on the
|
Only the last ~20 MB of logs is retained per container, on-disk on the
|
||||||
node. Once a pod is deleted, its logs are gone.
|
node. Once a pod is deleted, its logs are gone.
|
||||||
|
|
||||||
For persistent log access we'd need aggregation (see §what we'd add).
|
For persistent log access we'd need aggregation (see §What we still
|
||||||
|
don't have).
|
||||||
|
|
||||||
### 2. `kubectl top`
|
### 5. `kubectl top`
|
||||||
|
|
||||||
Pod and node resource usage via metrics-server:
|
Pod and node resource usage via metrics-server:
|
||||||
|
|
||||||
@@ -43,43 +148,32 @@ Pod and node resource usage via metrics-server:
|
|||||||
kubectl top nodes
|
kubectl top nodes
|
||||||
# NAME CPU(cores) CPU(%) MEMORY(bytes) MEMORY(%)
|
# NAME CPU(cores) CPU(%) MEMORY(bytes) MEMORY(%)
|
||||||
# ubuntu-8gb-nbg1-1 169m 4% 748Mi 9%
|
# ubuntu-8gb-nbg1-1 169m 4% 748Mi 9%
|
||||||
# ubuntu-8gb-nbg1-2 229m 5% 1043Mi 13%
|
|
||||||
# ubuntu-8gb-nbg1-3 124m 3% 770Mi 9%
|
|
||||||
|
|
||||||
kubectl top pods -n honeydue
|
kubectl top pods -n honeydue
|
||||||
```
|
```
|
||||||
|
|
||||||
**Retention**: In-memory only. Last few minutes of data. No
|
In-memory only; last few minutes of data. For historical trends use
|
||||||
historical view.
|
the Grafana dashboard, which exposes the same data via the `go_*` and
|
||||||
|
`container_*` (kubelet cAdvisor) metrics.
|
||||||
|
|
||||||
### 3. Cloudflare Analytics
|
### 6. Cloudflare Analytics
|
||||||
|
|
||||||
CF Dashboard → Analytics & Logs. Per-zone stats:
|
CF Dashboard → Analytics & Logs. Per-zone aggregate stats:
|
||||||
- Requests per second
|
requests/sec, bandwidth, cache hit ratio, top status codes, top paths,
|
||||||
- Bandwidth
|
bot traffic score. Good for spotting macro trends ("suddenly 10× more
|
||||||
- Cache hit ratio
|
502s today") that wouldn't show up in a single-pod sample.
|
||||||
- Top HTTP status codes
|
|
||||||
- Top request paths
|
|
||||||
- Bot traffic score
|
|
||||||
|
|
||||||
All aggregated, no individual request traces. Good for spotting macro
|
Free tier retention: 7 days of aggregate stats.
|
||||||
trends ("suddenly 10× more 502s today"), poor for debugging specific
|
|
||||||
issues.
|
|
||||||
|
|
||||||
Free tier retention: 7 days of aggregate stats. Pro extends this.
|
### 7. Neon dashboard
|
||||||
|
|
||||||
### 4. Neon dashboard
|
Neon console → project → Monitoring: compute utilization (CU-hours),
|
||||||
|
slow queries, active connections, storage usage. Useful for "is the
|
||||||
|
DB busy?" and free-tier limit watching. The new
|
||||||
|
`gorm_query_duration_seconds` histogram covers the application side
|
||||||
|
of the same question with much better latency tail visibility.
|
||||||
|
|
||||||
Neon console → project → Monitoring:
|
### 8. Kubernetes events
|
||||||
- Compute utilization (CU-hours consumed)
|
|
||||||
- Query performance (slow queries)
|
|
||||||
- Active connections
|
|
||||||
- Storage usage
|
|
||||||
|
|
||||||
Good for "is the DB busy?" and "am I close to my free tier limit?"
|
|
||||||
Not real-time.
|
|
||||||
|
|
||||||
### 5. Kubernetes events
|
|
||||||
|
|
||||||
`kubectl get events` shows cluster-level state changes: pod scheduling,
|
`kubectl get events` shows cluster-level state changes: pod scheduling,
|
||||||
failures, image pulls, probe failures. Useful for post-mortem on
|
failures, image pulls, probe failures. Useful for post-mortem on
|
||||||
@@ -87,7 +181,7 @@ deploys.
|
|||||||
|
|
||||||
Retention: events are stored in etcd but default to 1 hour.
|
Retention: events are stored in etcd but default to 1 hour.
|
||||||
|
|
||||||
## What we don't have (the gap)
|
## What we still don't have
|
||||||
|
|
||||||
### No log aggregation
|
### No log aggregation
|
||||||
|
|
||||||
@@ -98,64 +192,108 @@ all api pod logs for user X") we have to:
|
|||||||
# Query all at once with stern (if installed)
|
# Query all at once with stern (if installed)
|
||||||
stern -n honeydue api
|
stern -n honeydue api
|
||||||
|
|
||||||
# Or for specific pod
|
# Or per-pod
|
||||||
kubectl logs -n honeydue <pod> | grep user_id=12345
|
kubectl logs -n honeydue <pod> | grep user_id=12345
|
||||||
```
|
```
|
||||||
|
|
||||||
This works but doesn't scale. Grep across 3 pods for a specific
|
This works but doesn't scale across many pods.
|
||||||
user_id is OK. Across 30 pods, intractable.
|
|
||||||
|
|
||||||
**What we'd add**: [Loki](https://grafana.com/oss/loki/) — a lightweight
|
**What we'd add**: [Loki](https://grafana.com/oss/loki/) on
|
||||||
log aggregator designed for k8s. ~$0 to self-host; integrates with
|
`88oakappsUpdate` next to the existing obs stack. Adds ~512 MB RAM
|
||||||
Grafana for queries. Or [Betterstack](https://betterstack.com/logs)
|
plus a Promtail (or Vector/Alloy) DaemonSet in k3s. Defer until log
|
||||||
($10/mo, hosted).
|
search becomes a recurring pain point — `stern` + `grep` is fine at
|
||||||
|
current pod count.
|
||||||
### No metrics/dashboards
|
|
||||||
|
|
||||||
`kubectl top` tells us "is this pod hot right now?" but not "has CPU
|
|
||||||
been climbing over the past hour?" We'd need:
|
|
||||||
|
|
||||||
- **Prometheus** — scrapes metrics from kubelet and pods' `/metrics`
|
|
||||||
endpoints, stores time series
|
|
||||||
- **Grafana** — queries Prometheus, renders dashboards
|
|
||||||
|
|
||||||
K3s can install these via Helm in ~10 minutes. Adds ~500MB RAM to the
|
|
||||||
cluster. Stability and operational load: moderate.
|
|
||||||
|
|
||||||
**Alternative**: [Kubernetes Dashboard](https://github.com/kubernetes/dashboard)
|
|
||||||
bundled with k3s (disabled by default). Minimal UI over the existing
|
|
||||||
metrics API. Cheaper than Prometheus but less queryable.
|
|
||||||
|
|
||||||
### No distributed tracing
|
|
||||||
|
|
||||||
"This request took 800ms — which hop was slow?" is currently unanswerable
|
|
||||||
beyond "the DB query, probably." A real trace would show:
|
|
||||||
- TLS handshake time
|
|
||||||
- Traefik routing time
|
|
||||||
- Go handler time
|
|
||||||
- Postgres query time
|
|
||||||
- Redis call time
|
|
||||||
- Each B2 request time
|
|
||||||
|
|
||||||
We'd add OpenTelemetry to the Go app and export to Jaeger/Tempo. Work
|
|
||||||
is moderate; value kicks in when we have complex request flows.
|
|
||||||
|
|
||||||
### No alerting
|
### No alerting
|
||||||
|
|
||||||
No PagerDuty, no Slack webhooks, no email on "api is returning 500s."
|
No PagerDuty, no Slack webhooks, no email on "api is returning 500s."
|
||||||
The operator finds out when users complain.
|
The operator finds out when users complain.
|
||||||
|
|
||||||
Cheapest fix: [Uptime Kuma](https://github.com/louislam/uptime-kuma)
|
Cheapest fix path:
|
||||||
(self-hosted) or Better Stack Uptime (free for small teams). Ping
|
1. Grafana alerting (built into Grafana 11) — alert rules over the
|
||||||
`https://api.myhoneydue.com/api/health/` every minute; alert if it fails.
|
existing histograms (e.g., `histogram_quantile(0.95, ...) > 1s`).
|
||||||
|
Routes to Slack via webhook. **Zero infra cost.**
|
||||||
|
2. [Uptime Kuma](https://github.com/louislam/uptime-kuma) on
|
||||||
|
`88oakappsUpdate` — pings `/api/health/` from outside the cluster
|
||||||
|
every minute; complements the in-cluster view.
|
||||||
|
|
||||||
|
We'd want both eventually. Grafana alerting first because the data is
|
||||||
|
already there.
|
||||||
|
|
||||||
|
### Distributed tracing — fully integrated
|
||||||
|
|
||||||
|
The OTel SDK is wired in `cmd/api/main.go` and `cmd/worker/main.go` and
|
||||||
|
ships traces to Jaeger via `obs.88oakapps.com/v1/traces`. Every public
|
||||||
|
service method now takes `ctx context.Context` and routes its SQL through
|
||||||
|
`repo.WithContext(ctx)`, which means **every authenticated API endpoint
|
||||||
|
produces a fully-nested flame graph** in Jaeger.
|
||||||
|
|
||||||
|
| Span source | Status |
|
||||||
|
|---|---|
|
||||||
|
| `otelecho.Middleware` — span per HTTP request | ✅ live |
|
||||||
|
| Auth middleware DB lookups (`m.db.WithContext(ctx)`) | ✅ live |
|
||||||
|
| All repos via `repo.WithContext(ctx)` (`otelgorm` plugin) | ✅ live |
|
||||||
|
| Manual span around `storage_service.Upload` (B2 PutObject) | ✅ live |
|
||||||
|
| Manual span around APNs `Send` / `SendWithCategory` | ✅ live |
|
||||||
|
| Manual span around FCM `sendOne` | ✅ live |
|
||||||
|
| Asynq middleware — span per task type with retry/payload attrs | ✅ live |
|
||||||
|
|
||||||
|
Migrated services (every public method takes ctx):
|
||||||
|
- `AuthService` — login, register, refresh, logout, me, verify-email,
|
||||||
|
forgot/reset-password, update-profile
|
||||||
|
- `TaskService` — all 25+ task and completion methods
|
||||||
|
- `ResidenceService` — all 15 methods including share-codes
|
||||||
|
- `ContractorService` — all 9 methods
|
||||||
|
- `DocumentService` — all 10 methods
|
||||||
|
- `NotificationService` — all 12 methods
|
||||||
|
- `SubscriptionService` — all 12 methods including Apple/Google IAP
|
||||||
|
|
||||||
|
Sample trace for `GET /api/tasks/` (warm cache, post-optimization):
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/tasks/ (229ms)
|
||||||
|
└── service: SELECT * FROM task_task WHERE residence_id IN
|
||||||
|
(SELECT id FROM residence_residence WHERE...) (227ms)
|
||||||
|
```
|
||||||
|
|
||||||
|
Two spans total. The auth path runs entirely from Redis + in-memory
|
||||||
|
cache (zero SQL queries) thanks to the 1-hour token TTL and 5-min user
|
||||||
|
TTL. The residence-ID lookup is folded into the tasks query as a
|
||||||
|
Postgres subquery, so a single network round-trip to Neon services the
|
||||||
|
whole request. See Chapter 8 §"Optimizations layered on top" for the
|
||||||
|
optimization stack.
|
||||||
|
|
||||||
|
Earlier trace, before the optimization stack landed (commit 88fb175):
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/tasks/ (2473ms)
|
||||||
|
├── auth: SELECT * FROM user_authtoken WHERE key=... (1506ms)
|
||||||
|
├── auth: SELECT * FROM auth_user WHERE id=7 (333ms)
|
||||||
|
├── service: SELECT id FROM residence_residence WHERE... (736ms)
|
||||||
|
└── service: SELECT * FROM task_task WHERE residence_id IN(...) (226ms)
|
||||||
|
```
|
||||||
|
|
||||||
|
10× improvement from 2,473ms to 229ms by cutting query count
|
||||||
|
(5 SQL → 1 SQL on warm cache). The 227ms in the surviving query is
|
||||||
|
**1 transatlantic round-trip** to Neon us-east-1 from Hetzner
|
||||||
|
Nuremberg — the physical floor on the current setup. Eliminated by
|
||||||
|
migrating Neon to a EU region; tracked in [Chapter 18 §migration
|
||||||
|
triggers](./18-cost.md) and `docs/observability-plan.md`.
|
||||||
|
|
||||||
|
**Migration pattern (for any future services or middleware):** add
|
||||||
|
`ctx context.Context` as the first arg, change the handler call site
|
||||||
|
to pass `c.Request().Context()`, and replace `s.repo.X(...)` with
|
||||||
|
`s.repo.WithContext(ctx).X(...)`. Tests pass `context.Background()`.
|
||||||
|
|
||||||
### No APM (Application Performance Monitoring)
|
### No APM (Application Performance Monitoring)
|
||||||
|
|
||||||
No request-level profiling. We can't see "which endpoint has the highest
|
No continuous profiling. We can answer "which endpoint has the highest
|
||||||
p99 latency?" or "which SQL query is hot this week?"
|
p99 latency?" from the histograms, but not "where in the call stack is
|
||||||
|
the time going?" without ad-hoc `pprof` runs.
|
||||||
|
|
||||||
Options: Datadog, New Relic, Honeycomb, self-hosted Tempo+Grafana.
|
If/when needed: Grafana Pyroscope is the OSS continuous profiler that
|
||||||
All are meaningful work to set up and cost $$$.
|
fits our stack. Adds ~512 MB RAM. Defer until a CPU performance
|
||||||
|
incident shows up.
|
||||||
|
|
||||||
## The app's logging conventions
|
## The app's logging conventions
|
||||||
|
|
||||||
@@ -172,28 +310,12 @@ The Go app uses zerolog and emits structured JSON:
|
|||||||
```
|
```
|
||||||
|
|
||||||
Log levels: `debug`, `info`, `warn`, `error`, `fatal`. Controlled by
|
Log levels: `debug`, `info`, `warn`, `error`, `fatal`. Controlled by
|
||||||
`DEBUG=true|false` in ConfigMap (true sets level to debug, false sets
|
`DEBUG=true|false` in the ConfigMap (true sets level to debug, false
|
||||||
level to info).
|
sets level to info).
|
||||||
|
|
||||||
Every request is logged with:
|
Every request is logged with method, path, status, request_id, user_id
|
||||||
- Method, path, status code
|
(if authenticated), latency. Queryable by grep today; ready to ingest
|
||||||
- Request ID (for correlating logs across pods)
|
into Loki when we add it.
|
||||||
- User ID (if authenticated)
|
|
||||||
- Latency
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"level": "info",
|
|
||||||
"method": "GET",
|
|
||||||
"path": "/api/tasks/",
|
|
||||||
"status": 200,
|
|
||||||
"latency_ms": 42,
|
|
||||||
"user_id": 123,
|
|
||||||
"request_id": "a6b5db35-..."
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
This is queryable by grep. Better with log aggregation.
|
|
||||||
|
|
||||||
## Health endpoints
|
## Health endpoints
|
||||||
|
|
||||||
@@ -202,71 +324,58 @@ Each service exposes a health endpoint:
|
|||||||
| Service | Endpoint | What it checks |
|
| Service | Endpoint | What it checks |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| api | `/api/health/` | Process alive (doesn't verify DB) |
|
| api | `/api/health/` | Process alive (doesn't verify DB) |
|
||||||
|
| api | `/api/health/live` | Process alive |
|
||||||
| admin | `/` | Next.js is up |
|
| admin | `/` | Next.js is up |
|
||||||
| worker | (none public) | Internal Asynq status |
|
| worker | (none public) | Internal Asynq status |
|
||||||
|
| api | `/metrics` | Prometheus exposition (vmagent scrapes here) |
|
||||||
|
| api | `/metrics/legacy` | Custom monitoring metrics for GoAdmin |
|
||||||
|
|
||||||
Health endpoints are **shallow** — they return 200 if the process is
|
Health endpoints are **shallow** — they return 200 if the process is
|
||||||
running and listening. They don't try to reach Postgres/Redis/etc.
|
running and listening. They don't try to reach Postgres/Redis/etc.
|
||||||
Rationale: if Postgres is briefly down, we don't want all api pods to
|
Rationale: if Postgres is briefly down, we don't want all api pods to
|
||||||
start failing liveness and cascade-restart.
|
start failing liveness and cascade-restart.
|
||||||
|
|
||||||
## Dozzle (deprecated)
|
## obs.88oakapps.com — the ingest endpoint
|
||||||
|
|
||||||
The Swarm era had [Dozzle](https://github.com/amir20/dozzle) — a
|
Public hostname for cross-cluster metric and trace ingest. Cloudflare
|
||||||
lightweight web UI for Docker logs. Accessible via SSH tunnel to the
|
in front, nginx on `88oakappsUpdate` enforces a bearer-token check
|
||||||
manager node. Not deployed on k3s; `kubectl logs` + `stern` fills the
|
before forwarding to the local VM/Jaeger containers.
|
||||||
niche.
|
|
||||||
|
|
||||||
## Kubernetes metrics the k8s API exposes
|
| Path | Forwards to | Purpose |
|
||||||
|
|---|---|---|
|
||||||
|
| `/api/v1/write` | `http://127.0.0.1:8428` | Prometheus remote-write (vmagent → VM) |
|
||||||
|
| `/v1/traces` | `http://127.0.0.1:4318/v1/traces` | OTLP/HTTP traces (app → Jaeger) |
|
||||||
|
| `/health` | (returns 200) | Reachability probe — also requires auth |
|
||||||
|
| anything else | 404 | |
|
||||||
|
|
||||||
Even without Prometheus, these are queryable:
|
Token lives at `/etc/honeydue-obs/secrets.env` (mode 0600 on the box)
|
||||||
|
and at `OBS_INGEST_TOKEN=` in `deploy/prod.env` (gitignored). To rotate:
|
||||||
|
generate a new value, update both ends, restart vmagent.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Resource metrics (via metrics-server)
|
# Operator: rotate the bearer token
|
||||||
kubectl get --raw /apis/metrics.k8s.io/v1beta1/nodes
|
NEW=$(openssl rand -hex 32)
|
||||||
kubectl get --raw /apis/metrics.k8s.io/v1beta1/namespaces/honeydue/pods
|
ssh 88oakappsUpdate "sudo sed -i 's|OBS_INGEST_TOKEN=.*|OBS_INGEST_TOKEN=$NEW|' /etc/honeydue-obs/secrets.env"
|
||||||
|
ssh 88oakappsUpdate "sudo sed -i 's|Bearer [a-f0-9]\{64\}|Bearer $NEW|' /etc/nginx/sites-available/obs.88oakapps.com && sudo nginx -s reload"
|
||||||
# Core API (k8s state)
|
sed -i.bak "s|^OBS_INGEST_TOKEN=.*|OBS_INGEST_TOKEN=$NEW|" deploy/prod.env
|
||||||
kubectl get --raw /api/v1/namespaces/honeydue/pods/<name>
|
KUBECONFIG=~/.kube/honeydue.yaml kubectl -n honeydue create secret generic vmagent-remote-write \
|
||||||
|
--from-literal=bearer_token=$NEW --dry-run=client -o yaml | kubectl apply -f -
|
||||||
# Kubelet metrics (per-node; requires tunneling)
|
KUBECONFIG=~/.kube/honeydue.yaml kubectl -n honeydue rollout restart deploy/vmagent
|
||||||
kubectl get --raw /api/v1/nodes/<node>/proxy/metrics
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If we ever spin up Prometheus, these are the endpoints it would scrape.
|
## Resource budget
|
||||||
|
|
||||||
## Future: what to add and when
|
| Service | mem_limit | Disk | Retention |
|
||||||
|
|---|---|---|---|
|
||||||
|
| VictoriaMetrics | 256 MB | 10 GB | 30 days |
|
||||||
|
| Jaeger all-in-one (badger) | 256 MB | 10 GB | ~7 days |
|
||||||
|
| Grafana OSS | 256 MB | 1 GB | — |
|
||||||
|
| vmagent (in k3s) | 256 MB | 512 MB emptyDir | — |
|
||||||
|
| **Total** | **~1 GB hard cap** | **~21 GB** | |
|
||||||
|
|
||||||
| Trigger | Add |
|
Resident usage at idle is much lower (~90 MB on the obs side, ~30 MB
|
||||||
|---|---|
|
for vmagent). Hard limits exist so a memory leak in any one component
|
||||||
| 10k+ daily users | Loki + Grafana for logs |
|
can't squeeze the cohabiting PostHog stack on `88oakappsUpdate`.
|
||||||
| 100+ req/s sustained | Prometheus + Grafana for metrics |
|
|
||||||
| Performance incidents | OpenTelemetry tracing |
|
|
||||||
| Revenue > $5k/mo | Paid monitoring (Datadog or similar) |
|
|
||||||
| First production outage | Alerting to phone/Slack |
|
|
||||||
|
|
||||||
The overall philosophy: observability is an investment that compounds.
|
|
||||||
Add it before you need it, not after. But also don't over-invest at
|
|
||||||
idle.
|
|
||||||
|
|
||||||
**Next quarter**: set up Uptime Kuma + Loki at minimum.
|
|
||||||
|
|
||||||
## Checking what's installed
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# In kube-system namespace
|
|
||||||
kubectl get pods -n kube-system
|
|
||||||
# Should see: coredns, metrics-server, traefik, local-path-provisioner,
|
|
||||||
# and some k3s-related helm install jobs
|
|
||||||
|
|
||||||
# In honeydue namespace
|
|
||||||
kubectl get pods -n honeydue
|
|
||||||
# api, admin, worker, redis
|
|
||||||
|
|
||||||
# No monitoring namespace (yet)
|
|
||||||
kubectl get namespaces
|
|
||||||
# default, honeydue, kube-node-lease, kube-public, kube-system
|
|
||||||
```
|
|
||||||
|
|
||||||
## Operator cheat sheet
|
## Operator cheat sheet
|
||||||
|
|
||||||
@@ -274,32 +383,61 @@ kubectl get namespaces
|
|||||||
# Tail all logs in the namespace
|
# Tail all logs in the namespace
|
||||||
kubectl logs -n honeydue --all-containers=true --tail=50 -l app.kubernetes.io/part-of=honeydue
|
kubectl logs -n honeydue --all-containers=true --tail=50 -l app.kubernetes.io/part-of=honeydue
|
||||||
|
|
||||||
|
# Scrape state from vmagent self-metrics
|
||||||
|
kubectl -n honeydue exec deploy/vmagent -- wget -qO- http://127.0.0.1:8429/metrics \
|
||||||
|
| grep -E "scrapes_total|targets|remotewrite"
|
||||||
|
|
||||||
|
# Force vmagent to reload scrape config
|
||||||
|
kubectl -n honeydue rollout restart deploy/vmagent
|
||||||
|
|
||||||
|
# Query VictoriaMetrics directly (PromQL)
|
||||||
|
ssh 88oakappsUpdate 'curl -s "http://127.0.0.1:8428/api/v1/query?query=histogram_quantile(0.95,sum%20by%20(route,le)(rate(http_request_duration_seconds_bucket%5B5m%5D)))" | python3 -m json.tool'
|
||||||
|
|
||||||
|
# Restart the obs stack on 88oakappsUpdate
|
||||||
|
ssh 88oakappsUpdate 'cd /opt/honeydue-obs && sudo docker compose restart'
|
||||||
|
|
||||||
|
# Live obs container memory
|
||||||
|
ssh 88oakappsUpdate 'sudo docker stats --no-stream | grep honeydue-obs'
|
||||||
|
|
||||||
|
# Pod resource usage (k3s side)
|
||||||
|
kubectl top pods -n honeydue --sort-by=memory
|
||||||
|
|
||||||
# With stern (if installed: brew install stern)
|
# With stern (if installed: brew install stern)
|
||||||
stern -n honeydue .
|
stern -n honeydue .
|
||||||
|
|
||||||
# Follow specific pod, including previous runs
|
|
||||||
kubectl logs -n honeydue <pod> -f --previous=false
|
|
||||||
|
|
||||||
# Pod resource usage
|
|
||||||
kubectl top pods -n honeydue --sort-by=memory
|
|
||||||
kubectl top pods -n honeydue --sort-by=cpu
|
|
||||||
|
|
||||||
# Events (cluster-wide)
|
|
||||||
kubectl get events -A --sort-by=.lastTimestamp | tail -20
|
|
||||||
|
|
||||||
# Full state dump for a pod (debugging)
|
# Full state dump for a pod (debugging)
|
||||||
kubectl describe pod -n honeydue <pod> > /tmp/pod-dump.txt
|
kubectl describe pod -n honeydue <pod> > /tmp/pod-dump.txt
|
||||||
kubectl logs -n honeydue <pod> > /tmp/pod-logs.txt
|
kubectl logs -n honeydue <pod> > /tmp/pod-logs.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Future: what to add and when
|
||||||
|
|
||||||
|
| Trigger | Add |
|
||||||
|
|---|---|
|
||||||
|
| First production incident | Grafana alerting (free, data already there) |
|
||||||
|
| 10k+ daily users | Loki + Vector for log aggregation |
|
||||||
|
| Performance incident the histograms can't explain | Wire OTel exporter → Jaeger from the Go app |
|
||||||
|
| CPU pressure on api pods | Pyroscope continuous profiler |
|
||||||
|
| Multi-product obs needs | Migrate obs stack to dedicated CX32 ($8/mo) |
|
||||||
|
|
||||||
|
The overall philosophy: observability is an investment that compounds.
|
||||||
|
Add it before you need it, not after. But also don't over-invest at
|
||||||
|
idle.
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
- [Kubernetes metrics-server][ms]
|
- [VictoriaMetrics docs][vm]
|
||||||
- [K3s metrics][k3s-metrics]
|
- [vmagent kubernetes_sd_configs][vmagent-k8s]
|
||||||
- [Loki][loki]
|
- [Jaeger all-in-one with badger][jaeger]
|
||||||
|
- [prometheus/client_golang][promclient]
|
||||||
|
- [Grafana provisioning datasources][gf-prov]
|
||||||
|
- [Loki][loki] (future)
|
||||||
- [Stern (multi-pod log tail)][stern]
|
- [Stern (multi-pod log tail)][stern]
|
||||||
|
|
||||||
[ms]: https://github.com/kubernetes-sigs/metrics-server
|
[vm]: https://docs.victoriametrics.com/single-server-victoriametrics/
|
||||||
[k3s-metrics]: https://docs.k3s.io/advanced#enabling-metrics-server
|
[vmagent-k8s]: https://docs.victoriametrics.com/vmagent.html#kubernetes-monitoring-with-vmagent
|
||||||
|
[jaeger]: https://www.jaegertracing.io/docs/1.62/getting-started/#all-in-one
|
||||||
|
[promclient]: https://pkg.go.dev/github.com/prometheus/client_golang
|
||||||
|
[gf-prov]: https://grafana.com/docs/grafana/latest/administration/provisioning/#datasources
|
||||||
[loki]: https://grafana.com/oss/loki/
|
[loki]: https://grafana.com/oss/loki/
|
||||||
[stern]: https://github.com/stern/stern
|
[stern]: https://github.com/stern/stern
|
||||||
|
|||||||
@@ -115,6 +115,41 @@ kubectl rollout restart deployment/coredns -n kube-system
|
|||||||
kubectl rollout restart deployment/metrics-server -n kube-system
|
kubectl rollout restart deployment/metrics-server -n kube-system
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### vmagent can't reach obs.88oakapps.com
|
||||||
|
|
||||||
|
**Symptom**: dashboards stop updating; vmagent logs show 401 / TLS /
|
||||||
|
network errors against `obs.88oakapps.com`. App is unaffected.
|
||||||
|
**Recovery**: vmagent buffers up to 512 MB locally and replays on
|
||||||
|
reconnect, so brief outages self-heal. If sustained:
|
||||||
|
```bash
|
||||||
|
# Is the obs endpoint up?
|
||||||
|
curl -s -o /dev/null -w "%{http_code}\n" https://obs.88oakapps.com/health \
|
||||||
|
-H "Authorization: Bearer $(grep ^OBS_INGEST_TOKEN= deploy/prod.env | cut -d= -f2)"
|
||||||
|
# 200 = ingest endpoint healthy.
|
||||||
|
|
||||||
|
# Inspect vmagent's failure metric
|
||||||
|
kubectl -n honeydue exec deploy/vmagent -- wget -qO- http://127.0.0.1:8429/metrics \
|
||||||
|
| grep -E "remotewrite_(packets|samples)_dropped|persistentqueue_blocks_dropped"
|
||||||
|
|
||||||
|
# Restart vmagent (forces config reload + drains queue)
|
||||||
|
kubectl -n honeydue rollout restart deploy/vmagent
|
||||||
|
```
|
||||||
|
**If 88oakappsUpdate itself is down** (PostHog runs there too):
|
||||||
|
SSH and check `sudo docker compose -f /opt/honeydue-obs/docker-compose.yml ps`.
|
||||||
|
**Non-critical**: nothing app-facing depends on the obs stack.
|
||||||
|
|
||||||
|
#### Grafana dashboard shows "no data"
|
||||||
|
|
||||||
|
**Possible causes, in order of frequency**:
|
||||||
|
1. New histogram name — query targets a metric the api hasn't emitted
|
||||||
|
yet. Check `kubectl exec deploy/vmagent -- wget -qO- http://api:8000/metrics`
|
||||||
|
for the metric name.
|
||||||
|
2. vmagent isn't scraping (see above).
|
||||||
|
3. Time range is before the obs stack came up (2026-04-25). Adjust
|
||||||
|
the dashboard time picker.
|
||||||
|
4. Cardinality blowup — VM rejected high-label-count series. Check
|
||||||
|
`vm_rows_inserted_total` vs `vm_rows_dropped_total` on the obs box.
|
||||||
|
|
||||||
### Networking failures
|
### Networking failures
|
||||||
|
|
||||||
#### UFW rule accidentally blocks essential traffic
|
#### UFW rule accidentally blocks essential traffic
|
||||||
@@ -210,12 +245,58 @@ finds an empty data directory (or can't mount at all).
|
|||||||
- If the original node is gone: Redis starts empty. Cache regenerates.
|
- If the original node is gone: Redis starts empty. Cache regenerates.
|
||||||
Asynq queue state is lost; pending jobs re-queue on retry, cron
|
Asynq queue state is lost; pending jobs re-queue on retry, cron
|
||||||
fires re-schedule on next tick.
|
fires re-schedule on next tick.
|
||||||
|
- Auth caches (token + residence-IDs) regenerate on first user
|
||||||
|
request — first request per user pays full DB lookup, then warm
|
||||||
|
again. Visible as a brief latency spike in the Grafana RED
|
||||||
|
dashboard, not a functional failure.
|
||||||
- Ensure the node label `honeydue/redis=true` is on a healthy node:
|
- Ensure the node label `honeydue/redis=true` is on a healthy node:
|
||||||
```bash
|
```bash
|
||||||
kubectl label node <new-node> honeydue/redis=true --overwrite
|
kubectl label node <new-node> honeydue/redis=true --overwrite
|
||||||
kubectl label node <dead-node> honeydue/redis- 2>/dev/null || true
|
kubectl label node <dead-node> honeydue/redis- 2>/dev/null || true
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Stale residence-IDs cache (data freshness bug)
|
||||||
|
|
||||||
|
**Symptom**: a user accepts a share-code or has a residence
|
||||||
|
removed, but `/api/tasks/`, `/api/documents/`, `/api/contractors/`,
|
||||||
|
or `/api/residences/summary/` continues to show the old
|
||||||
|
membership for up to 5 minutes.
|
||||||
|
**Cause**: a residence-membership-mutating code path landed
|
||||||
|
without calling `cache.InvalidateResidenceIDsForUsers(...)`. The
|
||||||
|
cache TTL is 5 min so the issue self-heals, but it's user-visible.
|
||||||
|
**Recovery (immediate)**: flush the affected user's cache key
|
||||||
|
manually. See [Chapter 17 §residence-IDs cache invalidation](./17-runbook.md).
|
||||||
|
**Prevention (permanent)**: every mutation that changes
|
||||||
|
`residence_residence.owner_id`, `residence_residence_users.user_id`,
|
||||||
|
or deletes a residence MUST invalidate. Existing call sites for
|
||||||
|
reference: `CreateResidence` (owner), `DeleteResidence`
|
||||||
|
(all members), `JoinWithCode` (joining user), `RemoveUser`
|
||||||
|
(removed user). The pattern lives in
|
||||||
|
`internal/services/residence_id_cache.go`.
|
||||||
|
|
||||||
|
#### Redis at maxmemory limit
|
||||||
|
|
||||||
|
**Symptom**: Redis logs `OOM command not allowed when used memory > 'maxmemory'`.
|
||||||
|
Should be rare — current production usage is ~2.4 MB against a 256 MB
|
||||||
|
limit and the policy is `allkeys-lru` (cache writes evict cold keys
|
||||||
|
instead of erroring).
|
||||||
|
**Recovery**: confirm the policy is still `allkeys-lru`:
|
||||||
|
```bash
|
||||||
|
kubectl -n honeydue exec deploy/redis -- redis-cli CONFIG GET maxmemory-policy
|
||||||
|
```
|
||||||
|
If it's somehow `noeviction`, set it live:
|
||||||
|
```bash
|
||||||
|
kubectl -n honeydue exec deploy/redis -- redis-cli CONFIG SET maxmemory-policy allkeys-lru
|
||||||
|
```
|
||||||
|
And re-apply the manifest at `deploy-k3s/manifests/redis/deployment.yaml`
|
||||||
|
so the change survives a pod restart.
|
||||||
|
|
||||||
|
If memory usage is genuinely climbing toward the cap, check for
|
||||||
|
runaway keys without TTLs:
|
||||||
|
```bash
|
||||||
|
kubectl -n honeydue exec deploy/redis -- redis-cli --bigkeys
|
||||||
|
```
|
||||||
|
|
||||||
### External service failures
|
### External service failures
|
||||||
|
|
||||||
#### Neon Postgres outage
|
#### Neon Postgres outage
|
||||||
@@ -229,6 +310,72 @@ until Neon is back.
|
|||||||
Postgres-level failover.
|
Postgres-level failover.
|
||||||
**Frequency**: Neon has had a handful of hours-scale outages since launch.
|
**Frequency**: Neon has had a handful of hours-scale outages since launch.
|
||||||
|
|
||||||
|
#### Neon pooler endpoint unreachable but direct endpoint up
|
||||||
|
|
||||||
|
**Symptom**: `dial tcp ep-floral-truth-amttbc5a-pooler.c-5...: i/o
|
||||||
|
timeout` in api logs but the direct compute endpoint is reachable.
|
||||||
|
Rare — Neon's pooler runs in their infra alongside compute — but
|
||||||
|
possible during pooler maintenance.
|
||||||
|
**Recovery (emergency)**: switch `DB_HOST` in `config.yaml` from the
|
||||||
|
`-pooler` to the direct hostname (drop the `-pooler` segment),
|
||||||
|
re-apply ConfigMap, rolling-restart api and worker:
|
||||||
|
```bash
|
||||||
|
# Edit deploy-k3s/config.yaml: database.host: ep-floral-truth-amttbc5a.c-5...
|
||||||
|
# Then:
|
||||||
|
KUBECONFIG=~/.kube/honeydue.yaml bash deploy-k3s/scripts/03-deploy.sh --skip-build
|
||||||
|
```
|
||||||
|
Cold-handshake latency goes back up (~440ms first hit) but the API
|
||||||
|
keeps serving. Switch back when the pooler recovers.
|
||||||
|
|
||||||
|
#### Migrate Job fails during deploy
|
||||||
|
|
||||||
|
**Symptom**: `03-deploy.sh` aborts at the migrations step:
|
||||||
|
```
|
||||||
|
[deploy][error] migrations did not complete cleanly; aborting deploy
|
||||||
|
```
|
||||||
|
api/worker pods are NOT updated — they keep running the previous
|
||||||
|
revision. This is the intentional fail-fast.
|
||||||
|
|
||||||
|
**Recovery**:
|
||||||
|
```bash
|
||||||
|
# 1. See the failure
|
||||||
|
kubectl -n honeydue logs job/honeydue-migrate --tail=200
|
||||||
|
|
||||||
|
# 2. Common cause: a SQL error in the migration file. Fix the file
|
||||||
|
# locally, commit, retry the deploy. The Job is idempotent —
|
||||||
|
# successful prior versions stay applied; only the failed file
|
||||||
|
# re-runs.
|
||||||
|
git add migrations/000NNN_*.sql
|
||||||
|
git commit -m "Fix migration NNN"
|
||||||
|
git push gitea master
|
||||||
|
bash deploy-k3s/scripts/03-deploy.sh
|
||||||
|
|
||||||
|
# 3. Other cause: Neon down or auth changed. Test direct connection:
|
||||||
|
DB_PASS=$(kubectl -n honeydue get secret honeydue-secrets \
|
||||||
|
-o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d)
|
||||||
|
docker run --rm -e PGPASSWORD="$DB_PASS" postgres:17-alpine \
|
||||||
|
psql "host=ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech \
|
||||||
|
user=neondb_owner dbname=honeyDue sslmode=require" -c "SELECT 1;"
|
||||||
|
```
|
||||||
|
**Why no automatic retry**: `backoffLimit: 0` on the Job is deliberate.
|
||||||
|
A failing migration almost never gets unstuck by retrying — needs an
|
||||||
|
operator to look. See [Chapter 17 §27](./17-runbook.md) for recovery
|
||||||
|
playbook.
|
||||||
|
|
||||||
|
#### api refuses to start: "Schema precondition failed"
|
||||||
|
|
||||||
|
**Symptom**: api pods log `Schema precondition failed` and exit
|
||||||
|
immediately after DB connect.
|
||||||
|
**Cause**: `goose_db_version` table is missing or its latest row has
|
||||||
|
`is_applied=false`. Means the migrate Job either was never run or
|
||||||
|
ran and rolled back.
|
||||||
|
**Recovery**: run the migrate Job manually (see
|
||||||
|
[Chapter 17 §26](./17-runbook.md)). After it completes successfully,
|
||||||
|
delete the failing api pods so they restart with a fresh schema check:
|
||||||
|
```bash
|
||||||
|
kubectl -n honeydue rollout restart deploy/api
|
||||||
|
```
|
||||||
|
|
||||||
#### Backblaze B2 outage
|
#### Backblaze B2 outage
|
||||||
|
|
||||||
**Symptom**: image uploads fail; image downloads fail unless cached by
|
**Symptom**: image uploads fail; image downloads fail unless cached by
|
||||||
|
|||||||
@@ -358,6 +358,165 @@ Workaround: in each pod's logs, search for a unique user identifier:
|
|||||||
stern -n honeydue api | grep "user_id=12345"
|
stern -n honeydue api | grep "user_id=12345"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 23. Invalidate residence-IDs cache for a user
|
||||||
|
|
||||||
|
Used when a user reports stale data ("I joined a residence but my
|
||||||
|
tasks list still shows the old one"). The cache is keyed on user ID
|
||||||
|
with 5-min TTL — most issues self-heal — but you can flush manually.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Single user
|
||||||
|
kubectl -n honeydue exec deploy/redis -- redis-cli DEL "residence_ids_user:7"
|
||||||
|
|
||||||
|
# All users (nuclear; everyone pays one DB lookup on next request)
|
||||||
|
kubectl -n honeydue exec deploy/redis -- redis-cli --scan --pattern "residence_ids_user:*" \
|
||||||
|
| xargs -r -n 100 kubectl -n honeydue exec deploy/redis -- redis-cli DEL
|
||||||
|
```
|
||||||
|
|
||||||
|
Mutation paths that should invalidate this cache automatically (any
|
||||||
|
new code that changes membership must call
|
||||||
|
`cache.InvalidateResidenceIDsForUsers(ctx, userIDs...)`):
|
||||||
|
|
||||||
|
- `ResidenceService.CreateResidence` → owner
|
||||||
|
- `ResidenceService.DeleteResidence` → all members
|
||||||
|
- `ResidenceService.JoinWithCode` → joining user
|
||||||
|
- `ResidenceService.RemoveUser` → removed user
|
||||||
|
|
||||||
|
If a user keeps reporting stale data, grep for missing invalidation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
grep -rn "residenceRepo.*Add\|RemoveUser\|residence_residence_users" internal/ \
|
||||||
|
| grep -v cache | grep -v _test
|
||||||
|
```
|
||||||
|
|
||||||
|
## 24. Verify DB pool warm-up is working
|
||||||
|
|
||||||
|
After a deploy, check the api pod log for the warm-up confirmation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n honeydue logs -l app.kubernetes.io/name=api --tail=50 \
|
||||||
|
| grep "DB pool warm-up complete"
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected output (per pod):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"level":"info","requested":20,"warmed":20,"message":"DB pool warm-up complete"}
|
||||||
|
```
|
||||||
|
|
||||||
|
If `warmed` < `requested`, the pool partially failed at boot — pod
|
||||||
|
still starts, fills from there. If `warmed=0`, something's wrong with
|
||||||
|
either Neon connectivity or auth — check the next log line for the
|
||||||
|
specific error.
|
||||||
|
|
||||||
|
To test impact: hit the api right after a rollout. With warm-up
|
||||||
|
working, the first request should be ~250ms (1 RTT). Without warm-up,
|
||||||
|
the first request is ~700ms (full handshake).
|
||||||
|
|
||||||
|
## 25. Switch DB host between pooler and direct endpoints
|
||||||
|
|
||||||
|
The pooler endpoint (`-pooler` suffix) is the default — it cuts
|
||||||
|
cold-handshake latency by ~3 RTTs. The direct endpoint
|
||||||
|
(`ep-floral-truth-amttbc5a.c-5...`) is the fallback.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Edit deploy-k3s/config.yaml — change database.host
|
||||||
|
# To pooler: ep-floral-truth-amttbc5a-pooler.c-5.us-east-1.aws.neon.tech
|
||||||
|
# To direct: ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech
|
||||||
|
|
||||||
|
KUBECONFIG=~/.kube/honeydue.yaml bash deploy-k3s/scripts/03-deploy.sh --skip-build
|
||||||
|
```
|
||||||
|
|
||||||
|
The pooler runs in transaction mode so any session-scope feature
|
||||||
|
(LISTEN/NOTIFY, session advisory locks) won't work over it. Migrations
|
||||||
|
already handle this — the migrate Job script strips `-pooler` from
|
||||||
|
`DB_HOST` before invoking goose. If you add new session-level features
|
||||||
|
in the data path, they'll need the same workaround.
|
||||||
|
|
||||||
|
## 26. Run migrations manually (rare)
|
||||||
|
|
||||||
|
Day-to-day, migrations run as part of every `03-deploy.sh`. But
|
||||||
|
sometimes you want to apply or inspect them outside a deploy:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Direct-endpoint DSN (goose's advisory lock won't survive the pooler)
|
||||||
|
DB_PASS=$(kubectl -n honeydue get secret honeydue-secrets \
|
||||||
|
-o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d)
|
||||||
|
export DATABASE_URL="host=ep-floral-truth-amttbc5a.c-5.us-east-1.aws.neon.tech \
|
||||||
|
port=5432 user=neondb_owner password=$DB_PASS \
|
||||||
|
dbname=honeyDue sslmode=require"
|
||||||
|
|
||||||
|
# What's pending? (read-only; safe to run anytime)
|
||||||
|
make migrate-status
|
||||||
|
|
||||||
|
# Apply pending migrations (or `goose -dir migrations postgres "$DATABASE_URL" up`)
|
||||||
|
make migrate-up
|
||||||
|
|
||||||
|
# Roll back the most recent migration
|
||||||
|
make migrate-down
|
||||||
|
|
||||||
|
# Scaffold a new migration file
|
||||||
|
make migrate-new name=add_widget_count_to_residences
|
||||||
|
# → migrations/000002_add_widget_count_to_residences.sql
|
||||||
|
# Edit, then `make migrate-up` to test, then commit.
|
||||||
|
```
|
||||||
|
|
||||||
|
To run goose from inside the cluster (e.g., to bypass a network policy
|
||||||
|
that blocks Neon from your laptop), use the migrate Job manifest as a
|
||||||
|
one-shot:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Re-runs the latest migrate Job with whatever args you need
|
||||||
|
kubectl -n honeydue delete job honeydue-migrate --ignore-not-found
|
||||||
|
sed "s|image: IMAGE_PLACEHOLDER|image: $(kubectl -n honeydue get deploy api -o jsonpath='{.spec.template.spec.containers[0].image}')|" \
|
||||||
|
deploy-k3s/manifests/migrate/job.yaml | kubectl apply -f -
|
||||||
|
kubectl -n honeydue wait --for=condition=complete --timeout=5m job/honeydue-migrate
|
||||||
|
kubectl -n honeydue logs job/honeydue-migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
## 27. Recover from a failed/dirty migration
|
||||||
|
|
||||||
|
If `goose up` fails partway through, the migration file's transaction
|
||||||
|
rolls back and `goose_db_version` reflects the last *complete*
|
||||||
|
version. Goose marks no row as "dirty" — that's a golang-migrate
|
||||||
|
concept. So recovery is just: fix the migration file, re-run.
|
||||||
|
|
||||||
|
If you've genuinely corrupted state (dropped tables you shouldn't have,
|
||||||
|
applied a destructive migration in error):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# See current goose state
|
||||||
|
make migrate-status
|
||||||
|
psql "$DATABASE_URL" -c \
|
||||||
|
"SELECT version_id, is_applied, tstamp FROM goose_db_version ORDER BY id DESC LIMIT 10;"
|
||||||
|
|
||||||
|
# To force the version table back to a known-good number after
|
||||||
|
# manually fixing the schema:
|
||||||
|
psql "$DATABASE_URL" -c \
|
||||||
|
"INSERT INTO goose_db_version (version_id, is_applied, tstamp) VALUES (<N>, true, NOW());"
|
||||||
|
```
|
||||||
|
|
||||||
|
## 28. Bootstrap goose on a fresh clone of the schema
|
||||||
|
|
||||||
|
If you create a new Neon branch / dev DB and need to bring it under
|
||||||
|
goose management:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DATABASE_URL="...<the new DB>..."
|
||||||
|
|
||||||
|
# Option A: fresh DB, no schema → just run up
|
||||||
|
make migrate-up
|
||||||
|
|
||||||
|
# Option B: schema already populated (e.g., restored from a dump) →
|
||||||
|
# mark v1 as already-applied
|
||||||
|
goose -dir migrations postgres "$DATABASE_URL" version # creates table
|
||||||
|
psql "$DATABASE_URL" -c \
|
||||||
|
"INSERT INTO goose_db_version (version_id, is_applied, tstamp) VALUES (1, true, NOW());"
|
||||||
|
```
|
||||||
|
|
||||||
|
This is also what was done for the live prod DB at goose-adoption time
|
||||||
|
(commit `12b2f9d`).
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
- [kubectl cheat sheet][kubectl-cs]
|
- [kubectl cheat sheet][kubectl-cs]
|
||||||
|
|||||||
@@ -58,6 +58,20 @@ honeyDue.
|
|||||||
|---|---:|
|
|---|---:|
|
||||||
| Gitea container registry | **$0** |
|
| Gitea container registry | **$0** |
|
||||||
|
|
||||||
|
### Observability (88oakappsUpdate)
|
||||||
|
|
||||||
|
VictoriaMetrics + Jaeger + Grafana co-tenant on the existing Linode
|
||||||
|
VPS that hosts PostHog. ~700 MB RAM, 21 GB disk — fits inside the
|
||||||
|
existing instance. Not charged to honeyDue.
|
||||||
|
|
||||||
|
| Item | Monthly |
|
||||||
|
|---|---:|
|
||||||
|
| Self-hosted obs stack on `88oakappsUpdate` | **$0** |
|
||||||
|
|
||||||
|
Migration trigger: when the obs stack starts pressuring PostHog or
|
||||||
|
needs hard isolation, move to a dedicated Hetzner CX32 (~$8/mo).
|
||||||
|
See [Chapter 15 — When to move off](./15-observability.md).
|
||||||
|
|
||||||
### Total infrastructure
|
### Total infrastructure
|
||||||
|
|
||||||
| Category | Monthly |
|
| Category | Monthly |
|
||||||
@@ -67,6 +81,7 @@ honeyDue.
|
|||||||
| Storage | ~$0.30 |
|
| Storage | ~$0.30 |
|
||||||
| Edge | $0 |
|
| Edge | $0 |
|
||||||
| Registry | $0 |
|
| Registry | $0 |
|
||||||
|
| Observability | $0 |
|
||||||
| **Total** | **~$30** |
|
| **Total** | **~$30** |
|
||||||
|
|
||||||
## External SaaS
|
## External SaaS
|
||||||
|
|||||||
@@ -397,6 +397,35 @@ should reflect reality, not be optimistic.
|
|||||||
**Moral**: Healthchecks should be realistic, not aspirational. Know
|
**Moral**: Healthchecks should be realistic, not aspirational. Know
|
||||||
what your app actually does at startup.
|
what your app actually does at startup.
|
||||||
|
|
||||||
|
#### Postscript (2026-04-26): the whole `MigrateWithLock` shape was wrong
|
||||||
|
|
||||||
|
A few months after the Swarm migration, switching `DB_HOST` to Neon's
|
||||||
|
`-pooler` endpoint for runtime perf wins broke this code completely:
|
||||||
|
`pg_advisory_lock` is session-scoped, but PgBouncer transaction-mode
|
||||||
|
multiplexes statements across backend Postgres sessions, so the lock
|
||||||
|
appeared to be held but actually wasn't. Pods hung at
|
||||||
|
"Acquiring migration advisory lock..." and the startup probe killed
|
||||||
|
them in turn.
|
||||||
|
|
||||||
|
After a brief band-aid (route migrations through the direct endpoint;
|
||||||
|
bump probe to 600s to absorb 5-minute AutoMigrate runs over the slow
|
||||||
|
direct connection — both reverted), we abandoned the runtime-side
|
||||||
|
migration story entirely and adopted [pressly/goose](https://github.com/pressly/goose)
|
||||||
|
in commit `12b2f9d`:
|
||||||
|
|
||||||
|
- Migrations run as a one-shot Kubernetes Job before any api/worker
|
||||||
|
pod rolls. No more in-replica migration, no more advisory lock,
|
||||||
|
no more startup probe gymnastics.
|
||||||
|
- `RequireSchemaApplied` checks `goose_db_version` at startup and
|
||||||
|
refuses to boot on a stale schema — fail-fast for "operator
|
||||||
|
forgot to run migrate," instead of mysterious runtime errors.
|
||||||
|
- `failureThreshold` reverted to its pre-MigrateWithLock value.
|
||||||
|
Pods boot in seconds again.
|
||||||
|
|
||||||
|
See [Chapter 8 §Schema management](./08-database.md) for the goose
|
||||||
|
shape. This entire sub-section is preserved as historical context
|
||||||
|
for why we walked the path we did.
|
||||||
|
|
||||||
## What we learned
|
## What we learned
|
||||||
|
|
||||||
### Docker Swarm is in a bad place in 2026
|
### Docker Swarm is in a bad place in 2026
|
||||||
|
|||||||
@@ -19,84 +19,72 @@ minute, with Slack/email alerts on failure.
|
|||||||
**Effort**: ~30 min for Uptime Kuma deploy, ~10 min for Better Stack
|
**Effort**: ~30 min for Uptime Kuma deploy, ~10 min for Better Stack
|
||||||
signup.
|
signup.
|
||||||
|
|
||||||
### Cloudflare origin IP restriction
|
### ~~Cloudflare origin IP restriction~~ ✓ DONE (2026-04-24)
|
||||||
|
|
||||||
**Why**: UFW allows :80 from anywhere. If node IPs leak, direct-connect
|
Both `:80` and `:443` `Anywhere` rules removed on all 3 nodes. Only
|
||||||
attackers bypass CF's WAF/DDoS protection.
|
CF's 15 IPv4 + 7 IPv6 ranges allowed on `:443`. Direct-connect attempts
|
||||||
|
from non-CF IPs time out.
|
||||||
|
|
||||||
**How**: Replace the anywhere-80 UFW rule with 15 IPv4 + 7 IPv6 CF
|
**Still TODO**: monthly automated refresh of the CF IP list. Ranges
|
||||||
ranges. See [Chapter 13 §CF IP ranges](./13-cloudflare.md#cloudflare-ip-ranges-used-in-traefik-trustedips).
|
change rarely; manual re-run of `scripts/ufw-cf-refresh.sh` (not yet
|
||||||
|
written) on cadence is acceptable for now.
|
||||||
|
|
||||||
Automation: a small script that refreshes the CF IP list monthly and
|
### ~~Enable network policies in k3s~~ ✓ DONE (2026-04-24)
|
||||||
re-applies UFW rules.
|
|
||||||
|
|
||||||
**Effort**: 1 hour.
|
Applied with one scaffold correction: Traefik runs as a DaemonSet with
|
||||||
|
`hostNetwork: true`, so traffic from it arrives with the **node IP** as
|
||||||
|
source rather than a pod IP. The original scaffold used
|
||||||
|
`namespaceSelector: kube-system` which doesn't match hostNetwork
|
||||||
|
traffic. Fixed by using an `ipBlock` list of the three node IPs plus
|
||||||
|
the cluster pod CIDR `10.42.0.0/16`.
|
||||||
|
|
||||||
### Enable network policies in k3s
|
Also added policies for `web` (missing from the original scaffold).
|
||||||
|
|
||||||
**Why**: Currently pods can freely egress anywhere. A compromised pod
|
### ~~Apply Traefik security middleware~~ ✓ DONE (2026-04-24)
|
||||||
could exfiltrate data or attack lateral services.
|
|
||||||
|
|
||||||
**How**: `kubectl apply -f deploy-k3s/manifests/network-policies.yaml`.
|
`security-headers` + `rate-limit` attached to all three ingresses
|
||||||
The scaffold defines default-deny + explicit allows for:
|
(api, admin, web). `admin-auth` is defined but not attached (needs an
|
||||||
- DNS egress for all pods
|
`admin-basic-auth` secret we haven't created). `cloudflare-only` IP
|
||||||
- Traefik → api (port 8000)
|
allowlist exists but is redundant with the UFW-level CF restriction —
|
||||||
- Traefik → admin (port 3000)
|
keep for defense in depth if we ever expose another layer.
|
||||||
- api/worker → Redis
|
|
||||||
- api/worker → external services (Postgres, B2, Fastmail)
|
|
||||||
|
|
||||||
Then test that nothing breaks (might need to adjust allow rules).
|
One scaffold correction: the `Content-Security-Policy` header in
|
||||||
|
`security-headers.customResponseHeaders` was stripped. The Go API sets
|
||||||
**Effort**: 1-2 hours including testing.
|
its own CSP in `internal/router/router.go`, and two CSP headers combine
|
||||||
|
via intersection (most restrictive wins), which would break the Google
|
||||||
### Apply Traefik security middleware
|
Fonts on the marketing landing page. Next.js apps set their own via
|
||||||
|
middleware.
|
||||||
**Why**: Our current Ingress has no rate limiting or security headers
|
|
||||||
beyond what Traefik adds by default.
|
|
||||||
|
|
||||||
**How**: Apply `deploy-k3s/manifests/ingress/middleware.yaml`, annotate
|
|
||||||
Ingresses to use them:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
|
||||||
```
|
|
||||||
|
|
||||||
**Effort**: 15 min.
|
|
||||||
|
|
||||||
## Medium priority
|
## Medium priority
|
||||||
|
|
||||||
### Upgrade to CF Full (strict) SSL
|
### ~~Upgrade to CF Full (strict) SSL~~ ✓ DONE (2026-04-24)
|
||||||
|
|
||||||
**Why**: Currently CF↔origin is plain HTTP. An attacker between CF and
|
Origin CA cert (`*.myhoneydue.com` + `myhoneydue.com`, 15-year
|
||||||
Hetzner could read traffic. Full (strict) mode encrypts this leg with
|
validity) stored as `cloudflare-origin-cert` TLS secret. All three
|
||||||
a CF-issued origin cert.
|
ingresses reference it via `tls:` blocks. CF mode flipped from
|
||||||
|
Flexible to Full (strict). Verified by:
|
||||||
|
|
||||||
**How**:
|
- direct-connect to origin on `:443` serves the Origin cert (subject
|
||||||
1. Generate Origin CA cert in CF dashboard → SSL/TLS → Origin Server
|
`CN=CloudFlare Origin Certificate`)
|
||||||
2. Create `cloudflare-origin-cert` Secret in k8s
|
- CF edge continues to serve its own Let's Encrypt cert to browsers
|
||||||
3. Add `tls:` block to Ingresses
|
- both layers now TLS-encrypted
|
||||||
4. Switch CF SSL mode to Full (strict)
|
|
||||||
|
|
||||||
**Effort**: 30 min.
|
### ~~Migration Job for schema changes~~ — done (2026-04-26, commit 12b2f9d)
|
||||||
|
|
||||||
**Citations**: [Cloudflare Origin CA docs][cf-origin-ca]
|
**What shipped**: pressly/goose as the migration tool, run as a one-shot
|
||||||
|
Kubernetes Job from `deploy-k3s/manifests/migrate/job.yaml` before
|
||||||
|
api/worker rollout. The Job uses the api image (goose CLI is baked in
|
||||||
|
during the Dockerfile build), strips `-pooler` from `DB_HOST` for the
|
||||||
|
direct-endpoint connection migrations need, and exits in seconds when
|
||||||
|
there's nothing to apply. `RequireSchemaApplied` in the api/worker
|
||||||
|
startup checks `goose_db_version` and fails fast on a stale schema.
|
||||||
|
|
||||||
### Migration Job for schema changes
|
The Go-code-with-`--migrate-only` shape originally proposed here was
|
||||||
|
rejected in favor of using the upstream goose binary directly — see
|
||||||
|
[Chapter 8 §Schema management](./08-database.md) for the trade-offs.
|
||||||
|
|
||||||
**Why**: Currently every api pod runs `MigrateWithLock()` on startup,
|
Pre-goose `MigrateWithLock` is gone; ch19 §13 has the historical
|
||||||
serializing on a Postgres advisory lock. Adds 90-240s to cold startup
|
postmortem context.
|
||||||
and caused bug #13 in Chapter 19.
|
|
||||||
|
|
||||||
**How**: Create a Kubernetes `Job` resource that runs the api image
|
|
||||||
with a `--migrate-only` flag. Job runs once per deploy, completes when
|
|
||||||
schema is current. api pods get an initContainer that waits for the
|
|
||||||
Job to complete.
|
|
||||||
|
|
||||||
Requires Go code change to support `--migrate-only` flag.
|
|
||||||
|
|
||||||
**Effort**: 3-4 hours (code + job manifest + testing).
|
|
||||||
|
|
||||||
### Redis password
|
### Redis password
|
||||||
|
|
||||||
@@ -312,7 +300,16 @@ k3s server on each node with the new backend.
|
|||||||
As items are done, mark them here. Think of this as a running changelog.
|
As items are done, mark them here. Think of this as a running changelog.
|
||||||
|
|
||||||
- [x] k3s migration from Swarm (2026-04-24)
|
- [x] k3s migration from Swarm (2026-04-24)
|
||||||
- [x] Traefik DaemonSet + hostNetwork
|
- [x] Traefik DaemonSet + hostNetwork (2026-04-24)
|
||||||
- [x] Admin seed via ADMIN_EMAIL + ADMIN_PASSWORD
|
- [x] Admin seed via ADMIN_EMAIL + ADMIN_PASSWORD (2026-04-24)
|
||||||
- [x] Documentation book (this doc set)
|
- [x] Documentation book (this doc set) (2026-04-24)
|
||||||
|
- [x] Web client deployed at `app.myhoneydue.com` (2026-04-24) — Next.js 16 standalone, 3 replicas with PDB, proxy pattern to api, see Chapter 7.
|
||||||
|
- [x] Admin URL-baking fix (2026-04-24) — Dockerfile `ARG NEXT_PUBLIC_API_URL`, `.dockerignore` hardening for `admin/.env.*`.
|
||||||
|
- [x] Auto-seed initial data on first API boot (2026-04-24) — `20260414_seed_initial_data` migration populates lookups, admin user, task templates. See commit `4ec4bbb`.
|
||||||
|
- [x] APNs wired up (2026-04-24) — Key ID `5L5BVF5G48`, Team ID `X86BR9WTLD`, sandbox mode. Secret `honeydue-apns-key`, `FEATURE_PUSH_ENABLED=true`.
|
||||||
|
- [x] Traefik middleware: `security-headers` + `rate-limit` attached to all three ingresses (2026-04-24). CSP is stripped from the middleware because the Go API sets its own.
|
||||||
|
- [x] Admin liveness probe path fix (2026-04-24) — was hitting `/admin/` (404) and crashlooping every ~90s for 6 hours before the bug was caught. Fixed to `/`.
|
||||||
|
- [x] Network policies applied (2026-04-24) — default-deny + explicit allows. Traefik hostNetwork is matched via node IP `ipBlock`s, not namespaceSelector. See Chapter 5.
|
||||||
|
- [x] Cloudflare Full (strict) SSL (2026-04-24) — Origin CA cert installed as `cloudflare-origin-cert` secret, ingresses have `tls:` blocks, CF mode flipped from Flexible. Both user↔CF and CF↔origin now TLS.
|
||||||
|
- [x] UFW CF-IP allowlist on all 3 nodes (2026-04-24) — 15 IPv4 + 7 IPv6 CF ranges allow `:443`; `Anywhere` rules for `:80` and `:443` deleted. Direct-connect from non-CF IPs times out.
|
||||||
- [ ] All other items above
|
- [ ] All other items above
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ they do, and how to operate them.
|
|||||||
|
|
||||||
- [07 — Services](./07-services.md) — api, admin, worker, redis per-service deep dive
|
- [07 — Services](./07-services.md) — api, admin, worker, redis per-service deep dive
|
||||||
- [08 — Database](./08-database.md) — Neon Postgres, advisory-lock migrations
|
- [08 — Database](./08-database.md) — Neon Postgres, advisory-lock migrations
|
||||||
- [09 — Storage](./09-storage.md) — Backblaze B2, minio-go client details
|
- [09 — Storage](./09-storage.md) — Backblaze B2, minio-go, presigned-URL direct uploads
|
||||||
- [10 — Secrets & Config](./10-secrets-config.md) — ConfigMap, Secret, env mapping
|
- [10 — Secrets & Config](./10-secrets-config.md) — ConfigMap, Secret, env mapping
|
||||||
- [11 — Registry](./11-registry.md) — Gitea container registry, multi-arch builds
|
- [11 — Registry](./11-registry.md) — Gitea container registry, multi-arch builds
|
||||||
|
|
||||||
@@ -48,7 +48,7 @@ they do, and how to operate them.
|
|||||||
|
|
||||||
- [12 — Data Flow](./12-data-flow.md) — end-to-end request lifecycle
|
- [12 — Data Flow](./12-data-flow.md) — end-to-end request lifecycle
|
||||||
- [14 — Deployment Process](./14-deployment-process.md) — how to roll new code
|
- [14 — Deployment Process](./14-deployment-process.md) — how to roll new code
|
||||||
- [15 — Observability](./15-observability.md) — logs, metrics, tracing
|
- [15 — Observability](./15-observability.md) — VictoriaMetrics + Jaeger + Grafana on `obs.88oakapps.com`, vmagent in-cluster, Prometheus histograms in the Go API
|
||||||
- [16 — Failure Modes](./16-failure-modes.md) — what happens when X dies
|
- [16 — Failure Modes](./16-failure-modes.md) — what happens when X dies
|
||||||
- [17 — Runbook](./17-runbook.md) — common ops tasks
|
- [17 — Runbook](./17-runbook.md) — common ops tasks
|
||||||
|
|
||||||
|
|||||||
@@ -173,11 +173,21 @@ suffix. (Chapter 8)
|
|||||||
## Go + Asynq
|
## Go + Asynq
|
||||||
|
|
||||||
**AutoMigrate**: GORM function that syncs DB schema to Go structs.
|
**AutoMigrate**: GORM function that syncs DB schema to Go structs.
|
||||||
(Chapter 8)
|
We used this in production until 2026-04, replaced by goose. Tests
|
||||||
|
still use it via `testutil.SetupTestDB`. (Chapter 8)
|
||||||
|
|
||||||
**Asynq**: Go library for background job queues. Redis-backed.
|
**Asynq**: Go library for background job queues. Redis-backed.
|
||||||
(Chapter 7)
|
(Chapter 7)
|
||||||
|
|
||||||
|
**goose**: pressly/goose — the SQL migration tool we use in production
|
||||||
|
(commit 12b2f9d onward). Migration files live in `migrations/`, one
|
||||||
|
file per version with `-- +goose Up` / `-- +goose Down` markers.
|
||||||
|
(Chapter 8)
|
||||||
|
|
||||||
|
**goose_db_version**: goose's version-tracking table. One row per
|
||||||
|
applied migration. `RequireSchemaApplied` reads the latest row at
|
||||||
|
api/worker startup to fail fast on a stale schema. (Chapter 8)
|
||||||
|
|
||||||
**GORM**: Go ORM we use. (Chapter 8)
|
**GORM**: Go ORM we use. (Chapter 8)
|
||||||
|
|
||||||
**pgx**: Go Postgres driver used by GORM. (Chapter 8)
|
**pgx**: Go Postgres driver used by GORM. (Chapter 8)
|
||||||
|
|||||||
@@ -278,6 +278,43 @@ ssh -i ~/.ssh/hetzner deploy@<node> 'sudo systemctl start k3s'
|
|||||||
# then re-join via the k3s install command
|
# then re-join via the k3s install command
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Observability
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Hit api /metrics from inside the cluster
|
||||||
|
kubectl -n honeydue exec deploy/vmagent -- wget -qO- http://api:8000/metrics | head -30
|
||||||
|
|
||||||
|
# vmagent self-stats: scrapes succeeded, samples shipped, queue health
|
||||||
|
kubectl -n honeydue exec deploy/vmagent -- wget -qO- http://127.0.0.1:8429/metrics \
|
||||||
|
| grep -E "scrapes_total|targets|remotewrite_samples_dropped|persistentqueue_blocks_dropped"
|
||||||
|
|
||||||
|
# Force vmagent to reload config (after editing the ConfigMap)
|
||||||
|
kubectl -n honeydue rollout restart deploy/vmagent
|
||||||
|
|
||||||
|
# Query VictoriaMetrics by SSH'ing to the obs box
|
||||||
|
ssh 88oakappsUpdate 'curl -s "http://127.0.0.1:8428/api/v1/query?query=up"'
|
||||||
|
|
||||||
|
# p95 latency by route, last 5m
|
||||||
|
ssh 88oakappsUpdate 'curl -s "http://127.0.0.1:8428/api/v1/query?query=histogram_quantile(0.95,sum%20by%20(route,le)(rate(http_request_duration_seconds_bucket%5B5m%5D)))" | python3 -m json.tool'
|
||||||
|
|
||||||
|
# All metric names landing in VM
|
||||||
|
ssh 88oakappsUpdate 'curl -s http://127.0.0.1:8428/api/v1/label/__name__/values | python3 -m json.tool'
|
||||||
|
|
||||||
|
# Restart the obs stack on 88oakappsUpdate (VM + Jaeger + Grafana)
|
||||||
|
ssh 88oakappsUpdate 'cd /opt/honeydue-obs && sudo docker compose restart'
|
||||||
|
|
||||||
|
# Live RAM usage of the obs containers
|
||||||
|
ssh 88oakappsUpdate 'sudo docker stats --no-stream | grep honeydue-obs'
|
||||||
|
|
||||||
|
# Test the obs ingest endpoint with auth
|
||||||
|
TOKEN=$(grep ^OBS_INGEST_TOKEN= deploy/prod.env | cut -d= -f2)
|
||||||
|
curl -s -o /dev/null -w "%{http_code}\n" https://obs.88oakapps.com/health \
|
||||||
|
-H "Authorization: Bearer $TOKEN" # 200 = healthy
|
||||||
|
```
|
||||||
|
|
||||||
|
Dashboards live at `https://grafana.88oakapps.com/d/honeydue-red`.
|
||||||
|
Admin credentials in `deploy/prod.env`.
|
||||||
|
|
||||||
## One-liners worth memorizing
|
## One-liners worth memorizing
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -34,6 +34,14 @@ ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBU9xTTBD78tYUqHijgyU9PDqtmS4NuM/6uy8XgDzva+
|
|||||||
| `~/.docker/config.json` | Docker CLI config. After `docker login` to Gitea, contains creds. **Log out after each deploy** to not leave PATs on disk. |
|
| `~/.docker/config.json` | Docker CLI config. After `docker login` to Gitea, contains creds. **Log out after each deploy** to not leave PATs on disk. |
|
||||||
| `~/Library/Containers/com.docker.docker/` | Docker Desktop state (macOS). |
|
| `~/Library/Containers/com.docker.docker/` | Docker Desktop state (macOS). |
|
||||||
|
|
||||||
|
### Apple / Cloudflare credentials on disk
|
||||||
|
|
||||||
|
| Path | Purpose |
|
||||||
|
|---|---|
|
||||||
|
| `~/Desktop/code/honeyDue/AuthKey_5L5BVF5G48.p8` | APNs auth key (Apple). Source file for the `honeydue-apns-key` k8s secret. Sensitive — treat as a credential. |
|
||||||
|
| `~/Desktop/code/honeyDue/cf-origin-cert.pem` | Cloudflare Origin CA cert (PEM). Source file for the `cloudflare-origin-cert` k8s secret. `*.myhoneydue.com` + `myhoneydue.com`, expires 2041. |
|
||||||
|
| `~/Desktop/code/honeyDue/cf-origin-key.pem` | Private key for the Origin cert. CF only shows this **once** at generation time. Sensitive — treat as a credential. |
|
||||||
|
|
||||||
## Git repo (`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go/`)
|
## Git repo (`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go/`)
|
||||||
|
|
||||||
### Top-level
|
### Top-level
|
||||||
@@ -90,19 +98,21 @@ ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBU9xTTBD78tYUqHijgyU9PDqtmS4NuM/6uy8XgDzva+
|
|||||||
| `deploy-k3s/manifests/namespace.yaml` | Creates `honeydue` namespace. |
|
| `deploy-k3s/manifests/namespace.yaml` | Creates `honeydue` namespace. |
|
||||||
| `deploy-k3s/manifests/rbac.yaml` | ServiceAccounts + `automountServiceAccountToken: false`. |
|
| `deploy-k3s/manifests/rbac.yaml` | ServiceAccounts + `automountServiceAccountToken: false`. |
|
||||||
| `deploy-k3s/manifests/pod-disruption-budgets.yaml` | PDBs for api (2/3) and worker (0/1). |
|
| `deploy-k3s/manifests/pod-disruption-budgets.yaml` | PDBs for api (2/3) and worker (0/1). |
|
||||||
| `deploy-k3s/manifests/network-policies.yaml` | Default-deny + allows. NOT currently applied. |
|
| `deploy-k3s/manifests/network-policies.yaml` | Default-deny + allows. **Applied.** Includes web policies; Traefik hostNetwork handled via node IP `ipBlock`s rather than namespaceSelector. |
|
||||||
| `deploy-k3s/manifests/api/deployment.yaml` | api Deployment. |
|
| `deploy-k3s/manifests/api/deployment.yaml` | api Deployment. |
|
||||||
| `deploy-k3s/manifests/api/service.yaml` | api ClusterIP Service. |
|
| `deploy-k3s/manifests/api/service.yaml` | api ClusterIP Service. |
|
||||||
| `deploy-k3s/manifests/api/hpa.yaml` | api HorizontalPodAutoscaler. NOT currently applied. |
|
| `deploy-k3s/manifests/api/hpa.yaml` | api HorizontalPodAutoscaler. NOT currently applied. |
|
||||||
| `deploy-k3s/manifests/admin/deployment.yaml` | admin Deployment. |
|
| `deploy-k3s/manifests/admin/deployment.yaml` | admin Deployment. |
|
||||||
| `deploy-k3s/manifests/admin/service.yaml` | admin Service. |
|
| `deploy-k3s/manifests/admin/service.yaml` | admin Service. |
|
||||||
|
| `deploy-k3s/manifests/web/deployment.yaml` | web Deployment (3 replicas, customer-facing Next.js at app.myhoneydue.com). |
|
||||||
|
| `deploy-k3s/manifests/web/service.yaml` | web ClusterIP Service. |
|
||||||
| `deploy-k3s/manifests/worker/deployment.yaml` | worker Deployment. |
|
| `deploy-k3s/manifests/worker/deployment.yaml` | worker Deployment. |
|
||||||
| `deploy-k3s/manifests/redis/deployment.yaml` | Redis Deployment. |
|
| `deploy-k3s/manifests/redis/deployment.yaml` | Redis Deployment. |
|
||||||
| `deploy-k3s/manifests/redis/service.yaml` | Redis Service. |
|
| `deploy-k3s/manifests/redis/service.yaml` | Redis Service. |
|
||||||
| `deploy-k3s/manifests/redis/pvc.yaml` | Redis PersistentVolumeClaim. |
|
| `deploy-k3s/manifests/redis/pvc.yaml` | Redis PersistentVolumeClaim. |
|
||||||
| `deploy-k3s/manifests/ingress/ingress.yaml` | Full Ingress with TLS + middleware (scaffold; needs CF origin cert). |
|
| `deploy-k3s/manifests/ingress/ingress.yaml` | Alternate full Ingress scaffold (unused; we apply ingress-simple.yaml). |
|
||||||
| `deploy-k3s/manifests/ingress/ingress-simple.yaml` | Simple Ingress without TLS (what we actually apply). |
|
| `deploy-k3s/manifests/ingress/ingress-simple.yaml` | **Primary Ingress**. TLS via CF Origin cert, `security-headers` + `rate-limit` middleware attached to all three rules (api/admin/web). |
|
||||||
| `deploy-k3s/manifests/ingress/middleware.yaml` | Traefik middleware CRDs. Not currently applied. |
|
| `deploy-k3s/manifests/ingress/middleware.yaml` | Traefik middleware CRDs (`rate-limit`, `security-headers`, `cloudflare-only`). Applied. `admin-auth` was defined but removed at runtime (needs an unset basic-auth secret). |
|
||||||
| `deploy-k3s/manifests/traefik-helmchartconfig.yaml` | Our DaemonSet + hostNetwork override for Traefik. |
|
| `deploy-k3s/manifests/traefik-helmchartconfig.yaml` | Our DaemonSet + hostNetwork override for Traefik. |
|
||||||
| `deploy-k3s/manifests/secrets.yaml.example` | Template (never deployed). |
|
| `deploy-k3s/manifests/secrets.yaml.example` | Template (never deployed). |
|
||||||
| `deploy-k3s/scripts/01-provision-cluster.sh` | hetzner-k3s provisioning (we didn't use it; existing nodes). |
|
| `deploy-k3s/scripts/01-provision-cluster.sh` | hetzner-k3s provisioning (we didn't use it; existing nodes). |
|
||||||
|
|||||||
@@ -65,7 +65,9 @@ Every external link cited anywhere in this book, grouped by topic.
|
|||||||
- [Neon usage-based pricing announcement][neon-blog]
|
- [Neon usage-based pricing announcement][neon-blog]
|
||||||
- [Neon connect from any app][neon-connect]
|
- [Neon connect from any app][neon-connect]
|
||||||
- [Postgres advisory locks][pg-locks]
|
- [Postgres advisory locks][pg-locks]
|
||||||
- [GORM AutoMigrate][gorm-automigrate]
|
- [GORM AutoMigrate][gorm-automigrate] (tests only — production migrations use goose)
|
||||||
|
- [pressly/goose — SQL migration tool][goose]
|
||||||
|
- [Goose documentation][goose-docs]
|
||||||
|
|
||||||
## Backblaze B2
|
## Backblaze B2
|
||||||
|
|
||||||
@@ -168,6 +170,8 @@ Every external link cited anywhere in this book, grouped by topic.
|
|||||||
[neon-connect]: https://neon.com/docs/connect/connect-from-any-app
|
[neon-connect]: https://neon.com/docs/connect/connect-from-any-app
|
||||||
[pg-locks]: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS
|
[pg-locks]: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS
|
||||||
[gorm-automigrate]: https://gorm.io/docs/migration.html
|
[gorm-automigrate]: https://gorm.io/docs/migration.html
|
||||||
|
[goose]: https://github.com/pressly/goose
|
||||||
|
[goose-docs]: https://pressly.github.io/goose/
|
||||||
|
|
||||||
<!-- B2 -->
|
<!-- B2 -->
|
||||||
[b2-docs]: https://www.backblaze.com/docs/
|
[b2-docs]: https://www.backblaze.com/docs/
|
||||||
|
|||||||
@@ -0,0 +1,166 @@
|
|||||||
|
# Observability Plan — honeyDue (100% self-hosted)
|
||||||
|
|
||||||
|
**Goal:** Live request-timing visibility (HTTP, DB, B2 uploads, APNs, asynq jobs) without paying any SaaS vendor.
|
||||||
|
|
||||||
|
**Deployment target:** `88oakappsUpdate` (Linode VPS at `185.143.228.16`, Ubuntu 24.04, 8 vCPU / 32 GB RAM / 193 GB disk). This box already runs the self-hosted PostHog stack and has nginx + Let's Encrypt set up for `*.88oakapps.com`. Free RAM at rest ≈ 15 GB; the obs stack budget is ≈ 700 MB → ~5% of free RAM. Costs $0 incremental.
|
||||||
|
|
||||||
|
**Why not in the honeyDue k3s cluster:** Frees ~700 MB across the 3 Hetzner nodes, no PVC plumbing, and no need to expose anything from k3s — everything is push-from-app to a public TLS endpoint.
|
||||||
|
|
||||||
|
**Status:** Fully shipped. VictoriaMetrics + Jaeger + Grafana on `obs.88oakapps.com`, vmagent in-cluster, OTel SDK and otelgorm wired into the api+worker, every authed endpoint produces nested HTTP→service→SQL flame graphs in Jaeger.
|
||||||
|
|
||||||
|
The first round of traces revealed every visible ms was network/proxy overhead — DB execution itself is sub-millisecond. The follow-up work (`internal/services/residence_id_cache.go`, GORM pool warm-up, auth-query JOIN consolidation, switching `DB_HOST` to Neon's `-pooler` endpoint, bumped cache TTLs) cut warm-cache `/api/tasks/` from 2,473 ms / 5 spans to **229 ms / 2 spans** — see commit `88fb175` and Chapter 8 §"Optimizations layered on top".
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stack
|
||||||
|
|
||||||
|
| Role | Choice | Why this vs. the obvious alternative |
|
||||||
|
|---|---|---|
|
||||||
|
| Metrics store | **VictoriaMetrics** (single-node) | Drop-in Prometheus-compatible. ~4× lower RAM (~200 MB vs ~500 MB) and ~7× better compression. Single binary. |
|
||||||
|
| Tracing | **Jaeger all-in-one** | ~150 MB RAM with embedded badger storage. Tempo monolithic mode needs 1-2 GB minimum — overkill for honeyDue's scale. |
|
||||||
|
| Dashboards | **Grafana OSS** | Connects to both VM (Prometheus protocol) and Jaeger natively. |
|
||||||
|
| App instrumentation | **OpenTelemetry SDK** + `prometheus/client_golang` | OTel is vendor-neutral — backends are swappable without code change. |
|
||||||
|
| Logs | **Keep Dozzle**; add Loki only when log search becomes painful | Loki adds ~512 MB RAM + a daemonset for log shipping. Not worth it until there's a concrete pain point. |
|
||||||
|
|
||||||
|
### Why not the LGTM stack (Loki + Grafana + Tempo + Mimir)?
|
||||||
|
|
||||||
|
- **Tempo** wants 1-2 GB RAM minimum in monolithic mode ([Grafana community report](https://community.grafana.com/t/tempo-ram-usage-for-6k-spans-per-hour/63801)). Stacking that on top of Loki + Mimir would consume ~3-4 GB RAM. On a 3×8 GB cluster that's 12-17% of capacity for observability infra.
|
||||||
|
- **Mimir** is wonderful for multi-tenant Prometheus at scale — you have one tenant.
|
||||||
|
- **Loki** is great if you live in `kubectl logs` and need full-text search across them. You currently use Dozzle and are not feeling that pain.
|
||||||
|
|
||||||
|
VictoriaMetrics + Jaeger all-in-one gives you 90% of the value at 25% of the resource cost.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Resource budget on `88oakappsUpdate`
|
||||||
|
|
||||||
|
Three Docker containers in a separate compose project under `/opt/honeydue-obs/` — fully isolated from the existing PostHog compose stack so PostHog's lifecycle never touches the obs stack and vice versa.
|
||||||
|
|
||||||
|
| Service | `mem_limit` | Disk (bind mount) | Retention |
|
||||||
|
|---|---|---|---|
|
||||||
|
| VictoriaMetrics single-node | 256 MB | 10 GB | 30 days metrics |
|
||||||
|
| Jaeger all-in-one (badger storage) | 256 MB | 10 GB | 7 days traces |
|
||||||
|
| Grafana OSS | 256 MB | 1 GB | — |
|
||||||
|
| **Total** | **~768 MB hard cap** | **21 GB** | |
|
||||||
|
|
||||||
|
**~5% of the box's free RAM and ~14% of free disk.** The hard `mem_limit` per container matters: ClickHouse on the same VM can spike under PostHog analytics load, so bounding the obs stack prevents it from competing in a memory pinch.
|
||||||
|
|
||||||
|
**Don't reuse PostHog's ClickHouse / Kafka / Redis.** Tempting because they're sitting right there, but coupling honeyDue's observability to PostHog's storage means a PostHog incident takes honeyDue's incident-response telemetry down with it. Keep them fully separate.
|
||||||
|
|
||||||
|
**Shared blast radius caveat:** A kernel panic on `88oakappsUpdate` loses both PostHog and honeyDue obs at once. At current scale, fine — call it out, don't fix.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## App-side instrumentation
|
||||||
|
|
||||||
|
| Surface | Library / approach | Import path |
|
||||||
|
|---|---|---|
|
||||||
|
| Echo HTTP middleware | `otelecho` — span per request, tagged route/method/status | `go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho` |
|
||||||
|
| GORM queries | `uptrace/otelgorm` plugin — `db.Use(otelgorm.NewPlugin())`. Requires threading `ctx` through repositories so `db.WithContext(ctx)` works. | `github.com/uptrace/opentelemetry-go-extra/otelgorm` |
|
||||||
|
| B2 / minio-go uploads | Manual span around `storage_service.Upload` with attributes for bucket, object size, MIME type | `go.opentelemetry.io/otel` |
|
||||||
|
| APNs / FCM | Manual span in `internal/push/apns.go` and `fcm.go`; record device-token, response status code | `go.opentelemetry.io/otel` |
|
||||||
|
| asynq jobs | Custom `asynq.MiddlewareFunc` (~20 lines) — span per task type, attached to ctx, records duration + retry count | `go.opentelemetry.io/otel` + `asynq.MiddlewareFunc` |
|
||||||
|
| Prometheus `/metrics` endpoint | `prometheus/client_golang` direct — register histograms for HTTP duration / GORM op / B2 op / APNs send | `github.com/prometheus/client_golang/prometheus`, `.../prometheus/promhttp` |
|
||||||
|
| OTLP exporter | OTLP/HTTP → `https://obs.88oakapps.com/v1/traces` with bearer token. 100% sample in dev, 10% in prod. | `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp` |
|
||||||
|
| Metrics push | `vmagent` sidecar in k3s scrapes the api Pod's `/metrics` and remote-writes to `https://obs.88oakapps.com/api/v1/write` with bearer token. Cleaner than exposing `/metrics` publicly. | `victoriametrics/vmagent` image |
|
||||||
|
|
||||||
|
**Note on GORM context propagation:** the existing repository methods don't take `ctx context.Context`. Adding `otelgorm` requires plumbing ctx down from the Echo handler through the service layer to the repository call site. ~10 repository files, many call sites. Save for last because the diff is large.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation order (smallest first)
|
||||||
|
|
||||||
|
### Step 1 — Metrics + dashboards (highest immediate ROI)
|
||||||
|
|
||||||
|
**On `88oakappsUpdate`:**
|
||||||
|
1. `mkdir -p /opt/honeydue-obs/{data/vm,data/jaeger,data/grafana}` and a `docker-compose.yml` defining the three services with `mem_limit: 256m`, bind mounts for persistence, and an isolated bridge network
|
||||||
|
2. Add nginx vhosts (DNS A records first):
|
||||||
|
- `grafana.88oakapps.com` → `127.0.0.1:3000` (basic auth via htpasswd, Let's Encrypt)
|
||||||
|
- `obs.88oakapps.com` → routes by path:
|
||||||
|
- `/api/v1/write` → `127.0.0.1:8428` (VictoriaMetrics remote-write, bearer-token check)
|
||||||
|
- `/v1/traces` → `127.0.0.1:4318` (OTLP/HTTP traces, bearer-token check)
|
||||||
|
3. Generate a 32-byte token, store in `/etc/honeydue-obs/token` (mode 0600), reference from nginx as `auth_request` or simple `if ($http_authorization != ...)`
|
||||||
|
4. Pre-provision Grafana with the VM datasource pointing at `http://victoriametrics:8428` (in-network)
|
||||||
|
|
||||||
|
**On the honeyDue k3s cluster:**
|
||||||
|
5. Add `prometheus/client_golang` to `honeyDueAPI-go/go.mod` and a `/metrics` endpoint to the Go API
|
||||||
|
6. Register histograms:
|
||||||
|
- `http_request_duration_seconds{route,method,status}` via Echo middleware
|
||||||
|
- `gorm_query_duration_seconds{table,operation}` via a GORM `Plugin` callback (no ctx needed for this one — operates at the SQL string level)
|
||||||
|
- `b2_upload_duration_seconds{bucket,result}`
|
||||||
|
- `apns_send_duration_seconds{result}`
|
||||||
|
7. Deploy a `vmagent` sidecar (or DaemonSet) in the `honeydue` namespace with:
|
||||||
|
- Scrape: api Service `/metrics` every 15s
|
||||||
|
- `remote_write.url`: `https://obs.88oakapps.com/api/v1/write`
|
||||||
|
- `remote_write.bearer_token`: from k8s Secret
|
||||||
|
8. Build the RED dashboard in Grafana: rate, errors, duration p50/p95/p99 per route
|
||||||
|
|
||||||
|
**ROI:** "Is the API healthy? Where is time being spent right now?" answered live, served from `grafana.88oakapps.com`.
|
||||||
|
|
||||||
|
### Step 2 — Tracing baseline
|
||||||
|
|
||||||
|
(Jaeger is already up from Step 1. This step adds the app-side wiring.)
|
||||||
|
|
||||||
|
1. Add Grafana datasource for Jaeger pointing at `http://jaeger:16686` (in-network)
|
||||||
|
2. Wire OTel SDK in `cmd/api/main.go`:
|
||||||
|
- `otel.SetTracerProvider(tracerProvider)`
|
||||||
|
- `otelecho.Middleware("honeydue-api")` on Echo
|
||||||
|
- OTLP/HTTP exporter pointing at `https://obs.88oakapps.com/v1/traces` with `Authorization: Bearer <token>` header (token from env)
|
||||||
|
- Sampling: `TraceIDRatioBased(0.1)` in prod, `AlwaysSample()` in dev
|
||||||
|
3. Verify: a single `POST /api/auth/login/` produces a trace in Jaeger
|
||||||
|
|
||||||
|
**ROI:** "Why is this one request slow?" — answered with a flame graph.
|
||||||
|
|
||||||
|
### Step 3 — Manual spans for the work that actually matters
|
||||||
|
|
||||||
|
Wrap each in `tracer.Start(ctx, ...)` with attributes:
|
||||||
|
- `storage_service.Upload` → span "b2.PutObject" with `bucket`, `key`, `size_bytes`, result
|
||||||
|
- `push/apns.go` → span "apns.send" with `device_token_hash`, `status_code`, `reason`
|
||||||
|
- `asynq` middleware → span per task type with `task.type`, `retry_count`, `payload_size`
|
||||||
|
|
||||||
|
**ROI:** Specific high-value debugging questions ("why did this upload take 30 seconds", "why did these 5 push notifications fail") answered without code archaeology.
|
||||||
|
|
||||||
|
### Step 4 — Repository ctx + `otelgorm` (biggest diff, save for last)
|
||||||
|
|
||||||
|
1. Refactor every repository method to accept `ctx context.Context` as first arg
|
||||||
|
2. Update every call site to pass `c.Request().Context()` from handlers / propagate through services
|
||||||
|
3. Add `db.Use(otelgorm.NewPlugin())` in `internal/database/database.go`
|
||||||
|
4. Verify: a request now has nested spans `http → service → query → query → b2.PutObject → apns.send` with full SQL on the query spans
|
||||||
|
|
||||||
|
**ROI:** Every DB query in every trace, with SQL + table + rows. The "find the N+1" tool you'd otherwise build by hand.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Hard skips (revisit only when explicitly proven needed)
|
||||||
|
|
||||||
|
| Tool | Why skip |
|
||||||
|
|---|---|
|
||||||
|
| Loki / Promtail | Dozzle covers the immediate need. Loki adds 512 Mi RAM + a daemonset; defer until log search becomes a hot pain point. |
|
||||||
|
| Mimir / VM cluster mode | Single-node VM handles honeyDue scale for years. |
|
||||||
|
| Pyroscope continuous profiling | Overkill at 3 small nodes. Use `pprof` endpoints ad-hoc when CPU pressure shows up. |
|
||||||
|
| OTel Collector | Only worth running when 3+ services emit telemetry. App → Jaeger direct is fine for now. |
|
||||||
|
| Any SaaS vendor (Datadog, NR, Honeycomb, Grafana Cloud, Sentry Performance) | User constraint: nothing paid. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## When to move off `88oakappsUpdate`
|
||||||
|
|
||||||
|
Triggers — any one is enough:
|
||||||
|
- `88oakappsUpdate` available memory drops below ~3 GB sustained (PostHog growth squeezing it)
|
||||||
|
- ClickHouse OOM events start showing up in `dmesg` (PostHog under load)
|
||||||
|
- You want fully separate failure domains for honeyDue vs. 88oakapps
|
||||||
|
|
||||||
|
Migration path: the obs stack is a single docker-compose project on a bind-mount, so moving it = `rsync /opt/honeydue-obs/` to a new box, update DNS for `grafana.88oakapps.com` and `obs.88oakapps.com`, `docker compose up -d`. ~30 min of work. Until then: cohabiting on `88oakappsUpdate` is correct.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick reference: what shows up where
|
||||||
|
|
||||||
|
| Question | Where to look |
|
||||||
|
|---|---|
|
||||||
|
| Is the API up right now? Latency? Errors? | Grafana RED dashboard |
|
||||||
|
| Why is this specific request slow? | Jaeger trace view |
|
||||||
|
| What did the slow part of that request actually do (which SQL, which B2 PUT)? | Span details inside the trace |
|
||||||
|
| Background job throughput / queue depth | VictoriaMetrics + asynq metrics |
|
||||||
|
| What did the app print to stdout 5 minutes ago? | Dozzle |
|
||||||
|
| What error did the app log? | Dozzle (search) — or Loki if/when added |
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
# Runbook — Secret Rotation
|
||||||
|
|
||||||
|
Closes audit finding `K3S-F12` (secrets unrotated since cluster bootstrap,
|
||||||
|
no rotation cadence). See `deploy-k3s/SECURITY.md` Stage 2.
|
||||||
|
|
||||||
|
**Cadence:** rotate every secret at least **annually**. Rotate
|
||||||
|
**immediately** on suspected exposure, on an operator-device loss, or when
|
||||||
|
anyone who has seen a secret leaves the project.
|
||||||
|
|
||||||
|
**Record keeping:** after each rotation, annotate the secret so the age is
|
||||||
|
visible:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n honeydue annotate secret <name> \
|
||||||
|
honeydue.dev/last-rotated="$(date -u +%Y-%m-%d)" --overwrite
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How rotation works
|
||||||
|
|
||||||
|
Every secret has a **source of truth** on the operator workstation. The
|
||||||
|
deploy scripts read those sources and (re)create the Kubernetes Secrets.
|
||||||
|
Rotation is always: **update the source → re-run `02-setup-secrets.sh` →
|
||||||
|
restart the pods that consume it → revoke the old credential at its
|
||||||
|
provider.**
|
||||||
|
|
||||||
|
`02-setup-secrets.sh` uses `kubectl apply` (via `--dry-run=client -o yaml`),
|
||||||
|
so re-running it is idempotent and only changes what you changed.
|
||||||
|
|
||||||
|
| Kubernetes Secret | Source of truth | Consumed by |
|
||||||
|
|---|---|---|
|
||||||
|
| `honeydue-secrets` → `POSTGRES_PASSWORD` | `deploy-k3s/secrets/postgres_password.txt` | api, worker |
|
||||||
|
| `honeydue-secrets` → `SECRET_KEY` | `deploy-k3s/secrets/secret_key.txt` | api, worker |
|
||||||
|
| `honeydue-secrets` → `EMAIL_HOST_PASSWORD` | `deploy-k3s/secrets/email_host_password.txt` | api, worker |
|
||||||
|
| `honeydue-secrets` → `FCM_SERVER_KEY` | `deploy-k3s/secrets/fcm_server_key.txt` | api, worker |
|
||||||
|
| `honeydue-secrets` → `REDIS_PASSWORD` | `config.yaml` key `redis.password` | api, worker, redis |
|
||||||
|
| `honeydue-secrets` → `OBS_INGEST_TOKEN` | `deploy/prod.env` | api, worker |
|
||||||
|
| `honeydue-apns-key` → `apns_auth_key.p8` | `deploy-k3s/secrets/apns_auth_key.p8` | api, worker |
|
||||||
|
| `cloudflare-origin-cert` | `deploy-k3s/secrets/cloudflare-origin.{crt,key}` | Traefik ingress |
|
||||||
|
| `ghcr-credentials` | `config.yaml` block `registry.*` | image pulls (all pods) |
|
||||||
|
| `admin-basic-auth` | `config.yaml` keys `admin.basic_auth_user` / `..._password` | Traefik `admin-auth` middleware |
|
||||||
|
|
||||||
|
The `deploy-k3s/secrets/` directory and `config.yaml` are **gitignored** —
|
||||||
|
never commit them.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Standard rotation procedure
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd honeyDueAPI-go
|
||||||
|
export KUBECONFIG="$(pwd)/deploy-k3s/kubeconfig"
|
||||||
|
|
||||||
|
# 1. Update the source (file under deploy-k3s/secrets/ or a config.yaml key)
|
||||||
|
# 2. Recreate the Kubernetes Secrets from sources
|
||||||
|
./deploy-k3s/scripts/02-setup-secrets.sh
|
||||||
|
|
||||||
|
# 3. Restart the consumers (see per-secret notes below for which)
|
||||||
|
kubectl -n honeydue rollout restart deploy/api deploy/worker
|
||||||
|
|
||||||
|
# 4. Confirm health
|
||||||
|
kubectl -n honeydue rollout status deploy/api
|
||||||
|
kubectl -n honeydue rollout status deploy/worker
|
||||||
|
|
||||||
|
# 5. Revoke the OLD credential at its provider (see per-secret notes)
|
||||||
|
# 6. Annotate the rotated secret with today's date
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Per-secret notes
|
||||||
|
|
||||||
|
### `POSTGRES_PASSWORD`
|
||||||
|
1. Rotate the role password in the Neon dashboard.
|
||||||
|
2. Write the new value to `deploy-k3s/secrets/postgres_password.txt`.
|
||||||
|
3. `02-setup-secrets.sh`, then `rollout restart deploy/api deploy/worker`.
|
||||||
|
4. Watch logs for connection errors; the old password stops working the
|
||||||
|
moment Neon applies the change, so do steps 2–3 promptly.
|
||||||
|
|
||||||
|
### `SECRET_KEY` ⚠️ user-visible
|
||||||
|
This signs auth tokens. **Rotating it logs every user out** — all existing
|
||||||
|
tokens become invalid and every client must re-authenticate.
|
||||||
|
1. Generate: `openssl rand -hex 32`.
|
||||||
|
2. Write to `deploy-k3s/secrets/secret_key.txt` (must be ≥32 chars — the
|
||||||
|
script enforces this; the app refuses to start in production without it).
|
||||||
|
3. `02-setup-secrets.sh`, then `rollout restart deploy/api deploy/worker`.
|
||||||
|
- Only rotate on a schedule or on suspected compromise — not casually.
|
||||||
|
- A future improvement (overlap window via a key-id header) would let old
|
||||||
|
tokens validate during the transition; not implemented today.
|
||||||
|
|
||||||
|
### `EMAIL_HOST_PASSWORD`
|
||||||
|
1. Generate a new app password in Fastmail; keep the old one alive briefly.
|
||||||
|
2. Write to `deploy-k3s/secrets/email_host_password.txt`.
|
||||||
|
3. `02-setup-secrets.sh`, `rollout restart deploy/api deploy/worker`.
|
||||||
|
4. Delete the old Fastmail app password.
|
||||||
|
|
||||||
|
### `FCM_SERVER_KEY`
|
||||||
|
1. Rotate the key in the Firebase console.
|
||||||
|
2. Write to `deploy-k3s/secrets/fcm_server_key.txt`.
|
||||||
|
3. `02-setup-secrets.sh`, `rollout restart deploy/api deploy/worker`.
|
||||||
|
|
||||||
|
### `REDIS_PASSWORD`
|
||||||
|
Source is `config.yaml` key `redis.password` (hex only — it is embedded in
|
||||||
|
the `REDIS_URL`, so non-hex characters would break URL parsing).
|
||||||
|
1. Generate: `openssl rand -hex 32`.
|
||||||
|
2. Set `redis.password` in `config.yaml`.
|
||||||
|
3. `02-setup-secrets.sh`.
|
||||||
|
4. Restart **redis as well as** api/worker so the new `--requirepass` and
|
||||||
|
the new `REDIS_URL` land together:
|
||||||
|
`kubectl -n honeydue rollout restart deploy/redis deploy/api deploy/worker`.
|
||||||
|
Expect a few seconds where api/worker reconnect.
|
||||||
|
|
||||||
|
### `apns_auth_key.p8`
|
||||||
|
1. Revoke the key in the Apple Developer console, generate a new `.p8`.
|
||||||
|
2. Replace `deploy-k3s/secrets/apns_auth_key.p8`.
|
||||||
|
3. `02-setup-secrets.sh`, `rollout restart deploy/api deploy/worker`.
|
||||||
|
4. If the Key ID changed, update `push.apns_key_id` in `config.yaml` too.
|
||||||
|
|
||||||
|
### `cloudflare-origin-cert`
|
||||||
|
1. Generate a new Origin CA certificate in the Cloudflare dashboard.
|
||||||
|
2. Replace `deploy-k3s/secrets/cloudflare-origin.crt` and `.key`.
|
||||||
|
3. `02-setup-secrets.sh`. Traefik picks up the new TLS secret; no app
|
||||||
|
restart needed. Verify the served cert with `openssl s_client`.
|
||||||
|
|
||||||
|
### `ghcr-credentials` (Gitea registry)
|
||||||
|
1. Generate a new PAT in Gitea (scope: `read:packages`).
|
||||||
|
2. Update the `registry.token` value in `config.yaml`.
|
||||||
|
3. `02-setup-secrets.sh`. No restart needed unless a pull is pending.
|
||||||
|
4. Revoke the old PAT in Gitea.
|
||||||
|
|
||||||
|
### `admin-basic-auth`
|
||||||
|
Source is `config.yaml` keys `admin.basic_auth_user` / `basic_auth_password`.
|
||||||
|
1. Set a new password (e.g. `openssl rand -hex 24`).
|
||||||
|
2. `02-setup-secrets.sh` regenerates the bcrypt htpasswd secret.
|
||||||
|
3. No app restart needed — Traefik reloads the `admin-auth` middleware.
|
||||||
|
4. Distribute the new credential to whoever uses the admin panel.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## After any rotation
|
||||||
|
|
||||||
|
- Run `./deploy-k3s/scripts/04-verify.sh` and confirm no `✗` lines.
|
||||||
|
- Annotate the rotated secret (see "Record keeping" above).
|
||||||
|
- If the rotation was due to a compromise, also follow the relevant
|
||||||
|
playbook in `deploy-k3s/SECURITY.md` → Appendix (Incident response).
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
module github.com/treytartt/honeydue-api
|
module github.com/treytartt/honeydue-api
|
||||||
|
|
||||||
go 1.25
|
go 1.25.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/go-pdf/fpdf v0.9.0
|
github.com/go-pdf/fpdf v0.9.0
|
||||||
@@ -9,9 +9,10 @@ require (
|
|||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
github.com/gorilla/websocket v1.5.3
|
github.com/gorilla/websocket v1.5.3
|
||||||
github.com/hibiken/asynq v0.25.1
|
github.com/hibiken/asynq v0.25.1
|
||||||
github.com/labstack/echo/v4 v4.11.4
|
github.com/labstack/echo/v4 v4.15.1
|
||||||
github.com/minio/minio-go/v7 v7.0.99
|
github.com/minio/minio-go/v7 v7.0.99
|
||||||
github.com/nicksnyder/go-i18n/v2 v2.6.0
|
github.com/nicksnyder/go-i18n/v2 v2.6.0
|
||||||
|
github.com/prometheus/client_golang v1.23.2
|
||||||
github.com/redis/go-redis/v9 v9.17.1
|
github.com/redis/go-redis/v9 v9.17.1
|
||||||
github.com/rs/zerolog v1.34.0
|
github.com/rs/zerolog v1.34.0
|
||||||
github.com/shirou/gopsutil/v3 v3.24.5
|
github.com/shirou/gopsutil/v3 v3.24.5
|
||||||
@@ -20,11 +21,17 @@ require (
|
|||||||
github.com/spf13/viper v1.20.1
|
github.com/spf13/viper v1.20.1
|
||||||
github.com/stretchr/testify v1.11.1
|
github.com/stretchr/testify v1.11.1
|
||||||
github.com/stripe/stripe-go/v81 v81.4.0
|
github.com/stripe/stripe-go/v81 v81.4.0
|
||||||
|
github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2
|
||||||
github.com/wneessen/go-mail v0.7.2
|
github.com/wneessen/go-mail v0.7.2
|
||||||
golang.org/x/crypto v0.46.0
|
go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.68.0
|
||||||
golang.org/x/oauth2 v0.34.0
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0
|
||||||
golang.org/x/text v0.32.0
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0
|
||||||
golang.org/x/time v0.14.0
|
go.opentelemetry.io/otel/sdk v1.43.0
|
||||||
|
golang.org/x/crypto v0.51.0
|
||||||
|
golang.org/x/oauth2 v0.35.0
|
||||||
|
golang.org/x/term v0.43.0
|
||||||
|
golang.org/x/text v0.37.0
|
||||||
|
golang.org/x/time v0.15.0
|
||||||
google.golang.org/api v0.257.0
|
google.golang.org/api v0.257.0
|
||||||
gopkg.in/yaml.v3 v3.0.1
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
gorm.io/driver/postgres v1.6.0
|
gorm.io/driver/postgres v1.6.0
|
||||||
@@ -33,17 +40,28 @@ require (
|
|||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
|
||||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
github.com/go-ini/ini v1.67.0 // indirect
|
github.com/go-ini/ini v1.67.0 // indirect
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
|
||||||
github.com/klauspost/compress v1.18.2 // indirect
|
github.com/klauspost/compress v1.18.2 // indirect
|
||||||
github.com/klauspost/cpuid/v2 v2.2.11 // indirect
|
github.com/klauspost/cpuid/v2 v2.2.11 // indirect
|
||||||
github.com/klauspost/crc32 v1.3.0 // indirect
|
github.com/klauspost/crc32 v1.3.0 // indirect
|
||||||
github.com/minio/crc64nvme v1.1.1 // indirect
|
github.com/minio/crc64nvme v1.1.1 // indirect
|
||||||
github.com/minio/md5-simd v1.1.2 // indirect
|
github.com/minio/md5-simd v1.1.2 // indirect
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/philhofer/fwd v1.2.0 // indirect
|
github.com/philhofer/fwd v1.2.0 // indirect
|
||||||
|
github.com/prometheus/client_model v0.6.2 // indirect
|
||||||
|
github.com/prometheus/common v0.66.1 // indirect
|
||||||
|
github.com/prometheus/procfs v0.16.1 // indirect
|
||||||
github.com/rs/xid v1.6.0 // indirect
|
github.com/rs/xid v1.6.0 // indirect
|
||||||
github.com/tinylib/msgp v1.6.1 // indirect
|
github.com/tinylib/msgp v1.6.1 // indirect
|
||||||
|
github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2 // indirect
|
||||||
|
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
|
||||||
|
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@@ -51,7 +69,7 @@ require (
|
|||||||
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
|
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
|
||||||
cloud.google.com/go/compute/metadata v0.9.0 // indirect
|
cloud.google.com/go/compute/metadata v0.9.0 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
|
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
|
||||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||||
@@ -62,7 +80,6 @@ require (
|
|||||||
github.com/go-playground/locales v0.14.1 // indirect
|
github.com/go-playground/locales v0.14.1 // indirect
|
||||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||||
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
||||||
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect; TODO(S-19): Pulled by echo/v4 middleware — upgrade Echo to v4.12+ which removes built-in JWT middleware (uses echo-jwt/v4 with jwt/v5 instead), eliminating this vulnerable transitive dep
|
|
||||||
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
|
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
|
||||||
github.com/google/s2a-go v0.1.9 // indirect
|
github.com/google/s2a-go v0.1.9 // indirect
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect
|
github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect
|
||||||
@@ -76,11 +93,11 @@ require (
|
|||||||
github.com/labstack/gommon v0.4.2 // indirect
|
github.com/labstack/gommon v0.4.2 // indirect
|
||||||
github.com/leodido/go-urn v1.4.0 // indirect
|
github.com/leodido/go-urn v1.4.0 // indirect
|
||||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
github.com/mattn/go-sqlite3 v2.0.3+incompatible // indirect
|
github.com/mattn/go-sqlite3 v2.0.3+incompatible // indirect
|
||||||
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
|
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
github.com/sagikazarmark/locafero v0.9.0 // indirect
|
github.com/sagikazarmark/locafero v0.9.0 // indirect
|
||||||
@@ -97,13 +114,13 @@ require (
|
|||||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
|
||||||
go.opentelemetry.io/otel v1.38.0 // indirect
|
go.opentelemetry.io/otel v1.43.0
|
||||||
go.opentelemetry.io/otel/metric v1.38.0 // indirect
|
go.opentelemetry.io/otel/metric v1.43.0 // indirect
|
||||||
go.opentelemetry.io/otel/trace v1.38.0 // indirect
|
go.opentelemetry.io/otel/trace v1.43.0
|
||||||
golang.org/x/net v0.48.0 // indirect
|
golang.org/x/net v0.53.0 // indirect
|
||||||
golang.org/x/sync v0.19.0 // indirect
|
golang.org/x/sync v0.20.0
|
||||||
golang.org/x/sys v0.39.0 // indirect
|
golang.org/x/sys v0.44.0 // indirect
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846 // indirect
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect
|
||||||
google.golang.org/grpc v1.77.0 // indirect
|
google.golang.org/grpc v1.80.0 // indirect
|
||||||
google.golang.org/protobuf v1.36.10 // indirect
|
google.golang.org/protobuf v1.36.11 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -8,16 +8,20 @@ github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg
|
|||||||
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
|
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
|
||||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||||
github.com/alecthomas/units v0.0.0-20201120081800-1786d5ef83d4/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
|
github.com/alecthomas/units v0.0.0-20201120081800-1786d5ef83d4/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
|
||||||
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
|
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
|
||||||
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
|
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
|
||||||
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
|
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
|
||||||
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
|
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
||||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||||
@@ -52,8 +56,6 @@ github.com/go-playground/validator/v10 v10.23.0/go.mod h1:dbuPbCMFw/DrkbEynArYaC
|
|||||||
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
|
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
|
||||||
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||||
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
|
|
||||||
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
|
|
||||||
github.com/golang-jwt/jwt/v4 v4.4.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
|
github.com/golang-jwt/jwt/v4 v4.4.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
|
||||||
github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
|
github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
|
||||||
github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
|
github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
|
||||||
@@ -74,6 +76,8 @@ github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81
|
|||||||
github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc=
|
github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc=
|
||||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
|
||||||
github.com/hibiken/asynq v0.25.1 h1:phj028N0nm15n8O2ims+IvJ2gz4k2auvermngh9JhTw=
|
github.com/hibiken/asynq v0.25.1 h1:phj028N0nm15n8O2ims+IvJ2gz4k2auvermngh9JhTw=
|
||||||
github.com/hibiken/asynq v0.25.1/go.mod h1:pazWNOLBu0FEynQRBvHA26qdIKRSmfdIfUm4HdsLmXg=
|
github.com/hibiken/asynq v0.25.1/go.mod h1:pazWNOLBu0FEynQRBvHA26qdIKRSmfdIfUm4HdsLmXg=
|
||||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||||
@@ -99,16 +103,19 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
|||||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||||
github.com/labstack/echo/v4 v4.11.4 h1:vDZmA+qNeh1pd/cCkEicDMrjtrnMGQ1QFI9gWN1zGq8=
|
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||||
github.com/labstack/echo/v4 v4.11.4/go.mod h1:noh7EvLwqDsmh/X/HWKPUl1AjzJrhyptRyEbQJfxen8=
|
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||||
|
github.com/labstack/echo/v4 v4.15.1 h1:S9keusg26gZpjMmPqB5hOEvNKnmd1lNmcHrbbH2lnFs=
|
||||||
|
github.com/labstack/echo/v4 v4.15.1/go.mod h1:xmw1clThob0BSVRX1CRQkGQ/vjwcpOMjQZSZa9fKA/c=
|
||||||
github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
|
github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
|
||||||
github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
|
github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
|
||||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
|
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
|
||||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
|
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
|
||||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
|
||||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||||
|
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
|
||||||
|
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
|
||||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
@@ -121,6 +128,8 @@ github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
|||||||
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
||||||
github.com/minio/minio-go/v7 v7.0.99 h1:2vH/byrwUkIpFQFOilvTfaUpvAX3fEFhEzO+DR3DlCE=
|
github.com/minio/minio-go/v7 v7.0.99 h1:2vH/byrwUkIpFQFOilvTfaUpvAX3fEFhEzO+DR3DlCE=
|
||||||
github.com/minio/minio-go/v7 v7.0.99/go.mod h1:EtGNKtlX20iL2yaYnxEigaIvj0G0GwSDnifnG8ClIdw=
|
github.com/minio/minio-go/v7 v7.0.99/go.mod h1:EtGNKtlX20iL2yaYnxEigaIvj0G0GwSDnifnG8ClIdw=
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/nicksnyder/go-i18n/v2 v2.6.0 h1:C/m2NNWNiTB6SK4Ao8df5EWm3JETSTIGNXBpMJTxzxQ=
|
github.com/nicksnyder/go-i18n/v2 v2.6.0 h1:C/m2NNWNiTB6SK4Ao8df5EWm3JETSTIGNXBpMJTxzxQ=
|
||||||
github.com/nicksnyder/go-i18n/v2 v2.6.0/go.mod h1:88sRqr0C6OPyJn0/KRNaEz1uWorjxIKP7rUUcvycecE=
|
github.com/nicksnyder/go-i18n/v2 v2.6.0/go.mod h1:88sRqr0C6OPyJn0/KRNaEz1uWorjxIKP7rUUcvycecE=
|
||||||
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
|
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
|
||||||
@@ -128,10 +137,19 @@ github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8
|
|||||||
github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM=
|
github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM=
|
||||||
github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
|
github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
|
||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||||
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
|
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
|
||||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
|
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
|
||||||
|
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
|
||||||
|
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||||
|
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||||
|
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||||
|
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
|
||||||
|
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
|
||||||
|
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||||
|
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||||
github.com/redis/go-redis/v9 v9.17.1 h1:7tl732FjYPRT9H9aNfyTwKg9iTETjWjGKEJ2t/5iWTs=
|
github.com/redis/go-redis/v9 v9.17.1 h1:7tl732FjYPRT9H9aNfyTwKg9iTETjWjGKEJ2t/5iWTs=
|
||||||
github.com/redis/go-redis/v9 v9.17.1/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370=
|
github.com/redis/go-redis/v9 v9.17.1/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370=
|
||||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||||
@@ -180,6 +198,10 @@ github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFA
|
|||||||
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
|
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
|
||||||
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
|
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
|
||||||
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
|
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
|
||||||
|
github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2 h1:Jjn3zoRz13f8b1bR6LrXWglx93Sbh4kYfwgmPju3E2k=
|
||||||
|
github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2/go.mod h1:wocb5pNrj/sjhWB9J5jctnC0K2eisSdz/nJJBNFHo+A=
|
||||||
|
github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2 h1:ZjUj9BLYf9PEqBn8W/OapxhPjVRdC6CsXTdULHsyk5c=
|
||||||
|
github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2/go.mod h1:O8bHQfyinKwTXKkiKNGmLQS7vRsqRxIQTFZpYpHK3IQ=
|
||||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||||
github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
|
github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
|
||||||
@@ -190,33 +212,45 @@ github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo
|
|||||||
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
|
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
|
||||||
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||||
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.68.0 h1:7N94HrYgVc2tng6xEjmbycupxteYLll7lPlEi/UK5ok=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.68.0/go.mod h1:1i+7wBOfx0kn7PSGRKZ8e7zIhs+AmvLCiCloySDUeck=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
|
||||||
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
|
go.opentelemetry.io/contrib/propagators/b3 v1.43.0 h1:CETqV3QLLPTy5yNrqyMr41VnAOOD4lsRved7n4QG00A=
|
||||||
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
|
go.opentelemetry.io/contrib/propagators/b3 v1.43.0/go.mod h1:Q4mCiCdziYzpNR0g+6UqVotAlCDZdzz6L8jwY4knOrw=
|
||||||
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
|
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
|
||||||
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
|
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
|
||||||
go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k=
|
||||||
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A=
|
||||||
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc=
|
||||||
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak=
|
||||||
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
|
go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
|
||||||
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
|
go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A=
|
||||||
|
go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
|
||||||
|
go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
|
||||||
|
go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g=
|
||||||
|
go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk=
|
||||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||||
|
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
|
||||||
|
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
|
||||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||||
golang.org/x/crypto v0.0.0-20170512130425-ab89591268e0/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
golang.org/x/crypto v0.0.0-20170512130425-ab89591268e0/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||||
golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
|
golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI=
|
||||||
golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
|
golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8=
|
||||||
golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
golang.org/x/net v0.0.0-20220403103023-749bd193bc2b/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
|
golang.org/x/net v0.0.0-20220403103023-749bd193bc2b/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
|
||||||
golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
|
golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA=
|
||||||
golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
|
golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs=
|
||||||
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
|
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
|
||||||
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||||
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
|
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||||
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
@@ -228,32 +262,34 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|||||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
|
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
|
||||||
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
|
golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4=
|
||||||
|
golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk=
|
||||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
|
golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc=
|
||||||
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
|
golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38=
|
||||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
|
||||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
|
gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
|
||||||
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
|
gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
|
||||||
google.golang.org/api v0.257.0 h1:8Y0lzvHlZps53PEaw+G29SsQIkuKrumGWs9puiexNAA=
|
google.golang.org/api v0.257.0 h1:8Y0lzvHlZps53PEaw+G29SsQIkuKrumGWs9puiexNAA=
|
||||||
google.golang.org/api v0.257.0/go.mod h1:4eJrr+vbVaZSqs7vovFd1Jb/A6ml6iw2e6FBYf3GAO4=
|
google.golang.org/api v0.257.0/go.mod h1:4eJrr+vbVaZSqs7vovFd1Jb/A6ml6iw2e6FBYf3GAO4=
|
||||||
google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4=
|
google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4=
|
||||||
google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s=
|
google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s=
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4=
|
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8/go.mod h1:fDMmzKV90WSg1NbozdqrE64fkuTv6mlq2zxo9ad+3yo=
|
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846 h1:Wgl1rcDNThT+Zn47YyCXOXyX/COgMTIdhJ717F0l4xk=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg=
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8=
|
||||||
google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM=
|
google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM=
|
||||||
google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig=
|
google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4=
|
||||||
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
|
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||||
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
|||||||
@@ -1,215 +1,30 @@
|
|||||||
|
// apple_social_auth_handler is a stub — the user_applesocialauth table was
|
||||||
|
// dropped in the Ory Kratos migration (phase 2). Social sign-in is now
|
||||||
|
// handled by Kratos.
|
||||||
package handlers
|
package handlers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
|
|
||||||
"github.com/treytartt/honeydue-api/internal/admin/dto"
|
|
||||||
"github.com/treytartt/honeydue-api/internal/models"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// AdminAppleSocialAuthHandler handles admin Apple social auth management endpoints
|
// AdminAppleSocialAuthHandler is a no-op stub.
|
||||||
type AdminAppleSocialAuthHandler struct {
|
type AdminAppleSocialAuthHandler struct {
|
||||||
db *gorm.DB
|
db *gorm.DB
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAdminAppleSocialAuthHandler creates a new admin Apple social auth handler
|
|
||||||
func NewAdminAppleSocialAuthHandler(db *gorm.DB) *AdminAppleSocialAuthHandler {
|
func NewAdminAppleSocialAuthHandler(db *gorm.DB) *AdminAppleSocialAuthHandler {
|
||||||
return &AdminAppleSocialAuthHandler{db: db}
|
return &AdminAppleSocialAuthHandler{db: db}
|
||||||
}
|
}
|
||||||
|
|
||||||
// AppleSocialAuthResponse represents the response for an Apple social auth entry
|
func (h *AdminAppleSocialAuthHandler) gone(c echo.Context) error {
|
||||||
type AppleSocialAuthResponse struct {
|
return c.JSON(http.StatusGone, map[string]string{"message": "Apple social auth is managed by Ory Kratos"})
|
||||||
ID uint `json:"id"`
|
|
||||||
UserID uint `json:"user_id"`
|
|
||||||
Username string `json:"username"`
|
|
||||||
UserEmail string `json:"user_email"`
|
|
||||||
AppleID string `json:"apple_id"`
|
|
||||||
Email string `json:"email"`
|
|
||||||
IsPrivateEmail bool `json:"is_private_email"`
|
|
||||||
CreatedAt string `json:"created_at"`
|
|
||||||
UpdatedAt string `json:"updated_at"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateAppleSocialAuthRequest represents the request to update an Apple social auth entry
|
|
||||||
type UpdateAppleSocialAuthRequest struct {
|
|
||||||
Email *string `json:"email"`
|
|
||||||
IsPrivateEmail *bool `json:"is_private_email"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// List handles GET /api/admin/apple-social-auth
|
|
||||||
func (h *AdminAppleSocialAuthHandler) List(c echo.Context) error {
|
|
||||||
var filters dto.PaginationParams
|
|
||||||
if err := c.Bind(&filters); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var entries []models.AppleSocialAuth
|
|
||||||
var total int64
|
|
||||||
|
|
||||||
query := h.db.Model(&models.AppleSocialAuth{}).Preload("User")
|
|
||||||
|
|
||||||
// Apply search
|
|
||||||
if filters.Search != "" {
|
|
||||||
search := "%" + filters.Search + "%"
|
|
||||||
query = query.Joins("JOIN auth_user ON auth_user.id = user_applesocialauth.user_id").
|
|
||||||
Where("user_applesocialauth.apple_id ILIKE ? OR user_applesocialauth.email ILIKE ? OR auth_user.username ILIKE ? OR auth_user.email ILIKE ?",
|
|
||||||
search, search, search, search)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get total count
|
|
||||||
query.Count(&total)
|
|
||||||
|
|
||||||
// Apply sorting (allowlist prevents SQL injection via sort_by parameter)
|
|
||||||
sortBy := filters.GetSafeSortBy([]string{
|
|
||||||
"id", "user_id", "apple_id", "email", "is_private_email",
|
|
||||||
"created_at", "updated_at",
|
|
||||||
}, "created_at")
|
|
||||||
query = query.Order(sortBy + " " + filters.GetSortDir())
|
|
||||||
|
|
||||||
// Apply pagination
|
|
||||||
query = query.Offset(filters.GetOffset()).Limit(filters.GetPerPage())
|
|
||||||
|
|
||||||
if err := query.Find(&entries).Error; err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch Apple social auth entries"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build response
|
|
||||||
responses := make([]AppleSocialAuthResponse, len(entries))
|
|
||||||
for i, entry := range entries {
|
|
||||||
responses[i] = h.toResponse(&entry)
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, dto.NewPaginatedResponse(responses, total, filters.GetPage(), filters.GetPerPage()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get handles GET /api/admin/apple-social-auth/:id
|
|
||||||
func (h *AdminAppleSocialAuthHandler) Get(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var entry models.AppleSocialAuth
|
|
||||||
if err := h.db.Preload("User").First(&entry, id).Error; err != nil {
|
|
||||||
if err == gorm.ErrRecordNotFound {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Apple social auth entry not found"})
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch Apple social auth entry"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, h.toResponse(&entry))
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetByUser handles GET /api/admin/apple-social-auth/user/:user_id
|
|
||||||
func (h *AdminAppleSocialAuthHandler) GetByUser(c echo.Context) error {
|
|
||||||
userID, err := strconv.ParseUint(c.Param("user_id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid user ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var entry models.AppleSocialAuth
|
|
||||||
if err := h.db.Preload("User").Where("user_id = ?", userID).First(&entry).Error; err != nil {
|
|
||||||
if err == gorm.ErrRecordNotFound {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Apple social auth entry not found for user"})
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch Apple social auth entry"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, h.toResponse(&entry))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update handles PUT /api/admin/apple-social-auth/:id
|
|
||||||
func (h *AdminAppleSocialAuthHandler) Update(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var entry models.AppleSocialAuth
|
|
||||||
if err := h.db.First(&entry, id).Error; err != nil {
|
|
||||||
if err == gorm.ErrRecordNotFound {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Apple social auth entry not found"})
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch Apple social auth entry"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var req UpdateAppleSocialAuthRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
if req.Email != nil {
|
|
||||||
entry.Email = *req.Email
|
|
||||||
}
|
|
||||||
if req.IsPrivateEmail != nil {
|
|
||||||
entry.IsPrivateEmail = *req.IsPrivateEmail
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := h.db.Save(&entry).Error; err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to update Apple social auth entry"})
|
|
||||||
}
|
|
||||||
|
|
||||||
h.db.Preload("User").First(&entry, id)
|
|
||||||
return c.JSON(http.StatusOK, h.toResponse(&entry))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete handles DELETE /api/admin/apple-social-auth/:id
|
|
||||||
func (h *AdminAppleSocialAuthHandler) Delete(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var entry models.AppleSocialAuth
|
|
||||||
if err := h.db.First(&entry, id).Error; err != nil {
|
|
||||||
if err == gorm.ErrRecordNotFound {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Apple social auth entry not found"})
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch Apple social auth entry"})
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := h.db.Delete(&entry).Error; err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to delete Apple social auth entry"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Apple social auth entry deleted successfully"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// BulkDelete handles DELETE /api/admin/apple-social-auth/bulk
|
|
||||||
func (h *AdminAppleSocialAuthHandler) BulkDelete(c echo.Context) error {
|
|
||||||
var req dto.BulkDeleteRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
result := h.db.Where("id IN ?", req.IDs).Delete(&models.AppleSocialAuth{})
|
|
||||||
if result.Error != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to delete Apple social auth entries"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Apple social auth entries deleted successfully", "count": result.RowsAffected})
|
|
||||||
}
|
|
||||||
|
|
||||||
// toResponse converts an AppleSocialAuth model to AppleSocialAuthResponse
|
|
||||||
func (h *AdminAppleSocialAuthHandler) toResponse(entry *models.AppleSocialAuth) AppleSocialAuthResponse {
|
|
||||||
response := AppleSocialAuthResponse{
|
|
||||||
ID: entry.ID,
|
|
||||||
UserID: entry.UserID,
|
|
||||||
AppleID: entry.AppleID,
|
|
||||||
Email: entry.Email,
|
|
||||||
IsPrivateEmail: entry.IsPrivateEmail,
|
|
||||||
CreatedAt: entry.CreatedAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
UpdatedAt: entry.UpdatedAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
}
|
|
||||||
|
|
||||||
if entry.User.ID != 0 {
|
|
||||||
response.Username = entry.User.Username
|
|
||||||
response.UserEmail = entry.User.Email
|
|
||||||
}
|
|
||||||
|
|
||||||
return response
|
|
||||||
}
|
}
|
||||||
|
func (h *AdminAppleSocialAuthHandler) List(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAppleSocialAuthHandler) Get(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAppleSocialAuthHandler) Delete(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAppleSocialAuthHandler) BulkDelete(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAppleSocialAuthHandler) Update(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAppleSocialAuthHandler) GetByUser(c echo.Context) error { return h.gone(c) }
|
||||||
|
|||||||
@@ -1,144 +1,27 @@
|
|||||||
|
// auth_token_handler is a stub — the user_authtoken table was dropped in the
|
||||||
|
// Ory Kratos migration (phase 2). Auth tokens are now Kratos sessions.
|
||||||
package handlers
|
package handlers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
|
|
||||||
"github.com/treytartt/honeydue-api/internal/admin/dto"
|
|
||||||
"github.com/treytartt/honeydue-api/internal/models"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// AdminAuthTokenHandler handles admin auth token management endpoints
|
// AdminAuthTokenHandler is a no-op stub.
|
||||||
type AdminAuthTokenHandler struct {
|
type AdminAuthTokenHandler struct {
|
||||||
db *gorm.DB
|
db *gorm.DB
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAdminAuthTokenHandler creates a new admin auth token handler
|
|
||||||
func NewAdminAuthTokenHandler(db *gorm.DB) *AdminAuthTokenHandler {
|
func NewAdminAuthTokenHandler(db *gorm.DB) *AdminAuthTokenHandler {
|
||||||
return &AdminAuthTokenHandler{db: db}
|
return &AdminAuthTokenHandler{db: db}
|
||||||
}
|
}
|
||||||
|
|
||||||
// AuthTokenResponse represents an auth token in API responses
|
func (h *AdminAuthTokenHandler) gone(c echo.Context) error {
|
||||||
type AuthTokenResponse struct {
|
return c.JSON(http.StatusGone, map[string]string{"message": "auth tokens are managed by Ory Kratos"})
|
||||||
Key string `json:"key"`
|
|
||||||
UserID uint `json:"user_id"`
|
|
||||||
Username string `json:"username"`
|
|
||||||
Email string `json:"email"`
|
|
||||||
Created string `json:"created"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// List handles GET /api/admin/auth-tokens
|
|
||||||
func (h *AdminAuthTokenHandler) List(c echo.Context) error {
|
|
||||||
var filters dto.PaginationParams
|
|
||||||
if err := c.Bind(&filters); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var tokens []models.AuthToken
|
|
||||||
var total int64
|
|
||||||
|
|
||||||
query := h.db.Model(&models.AuthToken{}).Preload("User")
|
|
||||||
|
|
||||||
// Apply search (search by user info)
|
|
||||||
if filters.Search != "" {
|
|
||||||
search := "%" + filters.Search + "%"
|
|
||||||
query = query.Joins("JOIN auth_user ON auth_user.id = user_authtoken.user_id").
|
|
||||||
Where(
|
|
||||||
"auth_user.username ILIKE ? OR auth_user.email ILIKE ? OR user_authtoken.key ILIKE ?",
|
|
||||||
search, search, search,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get total count
|
|
||||||
query.Count(&total)
|
|
||||||
|
|
||||||
// Apply sorting (allowlist prevents SQL injection via sort_by parameter)
|
|
||||||
sortBy := filters.GetSafeSortBy([]string{
|
|
||||||
"created", "user_id",
|
|
||||||
}, "created")
|
|
||||||
query = query.Order(sortBy + " " + filters.GetSortDir())
|
|
||||||
|
|
||||||
// Apply pagination
|
|
||||||
query = query.Offset(filters.GetOffset()).Limit(filters.GetPerPage())
|
|
||||||
|
|
||||||
if err := query.Find(&tokens).Error; err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch auth tokens"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build response
|
|
||||||
responses := make([]AuthTokenResponse, len(tokens))
|
|
||||||
for i, token := range tokens {
|
|
||||||
responses[i] = AuthTokenResponse{
|
|
||||||
Key: token.Key,
|
|
||||||
UserID: token.UserID,
|
|
||||||
Username: token.User.Username,
|
|
||||||
Email: token.User.Email,
|
|
||||||
Created: token.Created.Format("2006-01-02T15:04:05Z"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, dto.NewPaginatedResponse(responses, total, filters.GetPage(), filters.GetPerPage()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get handles GET /api/admin/auth-tokens/:id (id is actually user_id)
|
|
||||||
func (h *AdminAuthTokenHandler) Get(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid user ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var token models.AuthToken
|
|
||||||
if err := h.db.Preload("User").Where("user_id = ?", id).First(&token).Error; err != nil {
|
|
||||||
if err == gorm.ErrRecordNotFound {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Auth token not found"})
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch auth token"})
|
|
||||||
}
|
|
||||||
|
|
||||||
response := AuthTokenResponse{
|
|
||||||
Key: token.Key,
|
|
||||||
UserID: token.UserID,
|
|
||||||
Username: token.User.Username,
|
|
||||||
Email: token.User.Email,
|
|
||||||
Created: token.Created.Format("2006-01-02T15:04:05Z"),
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete handles DELETE /api/admin/auth-tokens/:id (revoke token)
|
|
||||||
func (h *AdminAuthTokenHandler) Delete(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid user ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
result := h.db.Where("user_id = ?", id).Delete(&models.AuthToken{})
|
|
||||||
if result.Error != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to revoke token"})
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.RowsAffected == 0 {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Auth token not found"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Auth token revoked successfully"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// BulkDelete handles DELETE /api/admin/auth-tokens/bulk
|
|
||||||
func (h *AdminAuthTokenHandler) BulkDelete(c echo.Context) error {
|
|
||||||
var req dto.BulkDeleteRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
result := h.db.Where("user_id IN ?", req.IDs).Delete(&models.AuthToken{})
|
|
||||||
if result.Error != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to revoke tokens"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Auth tokens revoked successfully", "count": result.RowsAffected})
|
|
||||||
}
|
}
|
||||||
|
func (h *AdminAuthTokenHandler) List(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAuthTokenHandler) Get(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAuthTokenHandler) Delete(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminAuthTokenHandler) BulkDelete(c echo.Context) error { return h.gone(c) }
|
||||||
|
|||||||
@@ -1,162 +1,28 @@
|
|||||||
|
// confirmation_code_handler is a stub — the user_confirmationcode table was
|
||||||
|
// dropped in the Ory Kratos migration (phase 2). Email verification is now
|
||||||
|
// handled by Kratos.
|
||||||
package handlers
|
package handlers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
|
|
||||||
"github.com/treytartt/honeydue-api/internal/admin/dto"
|
|
||||||
"github.com/treytartt/honeydue-api/internal/models"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// maskCode masks a confirmation code, showing only the last 4 characters.
|
// AdminConfirmationCodeHandler is a no-op stub.
|
||||||
func maskCode(code string) string {
|
|
||||||
if len(code) <= 4 {
|
|
||||||
return strings.Repeat("*", len(code))
|
|
||||||
}
|
|
||||||
return strings.Repeat("*", len(code)-4) + code[len(code)-4:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// AdminConfirmationCodeHandler handles admin confirmation code management endpoints
|
|
||||||
type AdminConfirmationCodeHandler struct {
|
type AdminConfirmationCodeHandler struct {
|
||||||
db *gorm.DB
|
db *gorm.DB
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAdminConfirmationCodeHandler creates a new admin confirmation code handler
|
|
||||||
func NewAdminConfirmationCodeHandler(db *gorm.DB) *AdminConfirmationCodeHandler {
|
func NewAdminConfirmationCodeHandler(db *gorm.DB) *AdminConfirmationCodeHandler {
|
||||||
return &AdminConfirmationCodeHandler{db: db}
|
return &AdminConfirmationCodeHandler{db: db}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConfirmationCodeResponse represents a confirmation code in API responses
|
func (h *AdminConfirmationCodeHandler) gone(c echo.Context) error {
|
||||||
type ConfirmationCodeResponse struct {
|
return c.JSON(http.StatusGone, map[string]string{"message": "confirmation codes are managed by Ory Kratos"})
|
||||||
ID uint `json:"id"`
|
|
||||||
UserID uint `json:"user_id"`
|
|
||||||
Username string `json:"username"`
|
|
||||||
Email string `json:"email"`
|
|
||||||
Code string `json:"code"`
|
|
||||||
ExpiresAt string `json:"expires_at"`
|
|
||||||
IsUsed bool `json:"is_used"`
|
|
||||||
CreatedAt string `json:"created_at"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// List handles GET /api/admin/confirmation-codes
|
|
||||||
func (h *AdminConfirmationCodeHandler) List(c echo.Context) error {
|
|
||||||
var filters dto.PaginationParams
|
|
||||||
if err := c.Bind(&filters); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var codes []models.ConfirmationCode
|
|
||||||
var total int64
|
|
||||||
|
|
||||||
query := h.db.Model(&models.ConfirmationCode{}).Preload("User")
|
|
||||||
|
|
||||||
// Apply search (search by user info or code)
|
|
||||||
if filters.Search != "" {
|
|
||||||
search := "%" + filters.Search + "%"
|
|
||||||
query = query.Joins("JOIN auth_user ON auth_user.id = user_confirmationcode.user_id").
|
|
||||||
Where(
|
|
||||||
"auth_user.username ILIKE ? OR auth_user.email ILIKE ? OR user_confirmationcode.code ILIKE ?",
|
|
||||||
search, search, search,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get total count
|
|
||||||
query.Count(&total)
|
|
||||||
|
|
||||||
// Apply sorting (allowlist prevents SQL injection via sort_by parameter)
|
|
||||||
sortBy := filters.GetSafeSortBy([]string{
|
|
||||||
"id", "user_id", "created_at", "expires_at", "is_used",
|
|
||||||
}, "created_at")
|
|
||||||
query = query.Order(sortBy + " " + filters.GetSortDir())
|
|
||||||
|
|
||||||
// Apply pagination
|
|
||||||
query = query.Offset(filters.GetOffset()).Limit(filters.GetPerPage())
|
|
||||||
|
|
||||||
if err := query.Find(&codes).Error; err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch confirmation codes"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build response
|
|
||||||
responses := make([]ConfirmationCodeResponse, len(codes))
|
|
||||||
for i, code := range codes {
|
|
||||||
responses[i] = ConfirmationCodeResponse{
|
|
||||||
ID: code.ID,
|
|
||||||
UserID: code.UserID,
|
|
||||||
Username: code.User.Username,
|
|
||||||
Email: code.User.Email,
|
|
||||||
Code: maskCode(code.Code),
|
|
||||||
ExpiresAt: code.ExpiresAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
IsUsed: code.IsUsed,
|
|
||||||
CreatedAt: code.CreatedAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, dto.NewPaginatedResponse(responses, total, filters.GetPage(), filters.GetPerPage()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get handles GET /api/admin/confirmation-codes/:id
|
|
||||||
func (h *AdminConfirmationCodeHandler) Get(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var code models.ConfirmationCode
|
|
||||||
if err := h.db.Preload("User").First(&code, id).Error; err != nil {
|
|
||||||
if err == gorm.ErrRecordNotFound {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Confirmation code not found"})
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch confirmation code"})
|
|
||||||
}
|
|
||||||
|
|
||||||
response := ConfirmationCodeResponse{
|
|
||||||
ID: code.ID,
|
|
||||||
UserID: code.UserID,
|
|
||||||
Username: code.User.Username,
|
|
||||||
Email: code.User.Email,
|
|
||||||
Code: maskCode(code.Code),
|
|
||||||
ExpiresAt: code.ExpiresAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
IsUsed: code.IsUsed,
|
|
||||||
CreatedAt: code.CreatedAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete handles DELETE /api/admin/confirmation-codes/:id
|
|
||||||
func (h *AdminConfirmationCodeHandler) Delete(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
result := h.db.Delete(&models.ConfirmationCode{}, id)
|
|
||||||
if result.Error != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to delete confirmation code"})
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.RowsAffected == 0 {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Confirmation code not found"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Confirmation code deleted successfully"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// BulkDelete handles DELETE /api/admin/confirmation-codes/bulk
|
|
||||||
func (h *AdminConfirmationCodeHandler) BulkDelete(c echo.Context) error {
|
|
||||||
var req dto.BulkDeleteRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
result := h.db.Where("id IN ?", req.IDs).Delete(&models.ConfirmationCode{})
|
|
||||||
if result.Error != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to delete confirmation codes"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Confirmation codes deleted successfully", "count": result.RowsAffected})
|
|
||||||
}
|
}
|
||||||
|
func (h *AdminConfirmationCodeHandler) List(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminConfirmationCodeHandler) Get(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminConfirmationCodeHandler) Delete(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminConfirmationCodeHandler) BulkDelete(c echo.Context) error { return h.gone(c) }
|
||||||
|
|||||||
@@ -8,16 +8,18 @@ import (
|
|||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
|
|
||||||
"github.com/treytartt/honeydue-api/internal/models"
|
"github.com/treytartt/honeydue-api/internal/models"
|
||||||
|
"github.com/treytartt/honeydue-api/internal/services"
|
||||||
)
|
)
|
||||||
|
|
||||||
// AdminLimitationsHandler handles subscription limitations management
|
// AdminLimitationsHandler handles subscription limitations management
|
||||||
type AdminLimitationsHandler struct {
|
type AdminLimitationsHandler struct {
|
||||||
db *gorm.DB
|
db *gorm.DB
|
||||||
|
cache *services.CacheService
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAdminLimitationsHandler creates a new handler
|
// NewAdminLimitationsHandler creates a new handler. Cache is optional.
|
||||||
func NewAdminLimitationsHandler(db *gorm.DB) *AdminLimitationsHandler {
|
func NewAdminLimitationsHandler(db *gorm.DB, cache *services.CacheService) *AdminLimitationsHandler {
|
||||||
return &AdminLimitationsHandler{db: db}
|
return &AdminLimitationsHandler{db: db, cache: cache}
|
||||||
}
|
}
|
||||||
|
|
||||||
// === Settings (enable_limitations) ===
|
// === Settings (enable_limitations) ===
|
||||||
@@ -27,14 +29,25 @@ type LimitationsSettingsResponse struct {
|
|||||||
EnableLimitations bool `json:"enable_limitations"`
|
EnableLimitations bool `json:"enable_limitations"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetSettings handles GET /api/admin/limitations/settings
|
// GetSettings handles GET /api/admin/limitations/settings.
|
||||||
|
// Reads through Redis cache first; on miss falls through to DB.
|
||||||
func (h *AdminLimitationsHandler) GetSettings(c echo.Context) error {
|
func (h *AdminLimitationsHandler) GetSettings(c echo.Context) error {
|
||||||
|
ctx := c.Request().Context()
|
||||||
|
|
||||||
|
if h.cache != nil {
|
||||||
|
var cached models.SubscriptionSettings
|
||||||
|
if err := h.cache.GetCachedSubscriptionSettings(ctx, &cached); err == nil {
|
||||||
|
return c.JSON(http.StatusOK, LimitationsSettingsResponse{
|
||||||
|
EnableLimitations: cached.EnableLimitations,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var settings models.SubscriptionSettings
|
var settings models.SubscriptionSettings
|
||||||
if err := h.db.First(&settings, 1).Error; err != nil {
|
if err := h.db.WithContext(ctx).First(&settings, 1).Error; err != nil {
|
||||||
if err == gorm.ErrRecordNotFound {
|
if err == gorm.ErrRecordNotFound {
|
||||||
// Create default settings
|
|
||||||
settings = models.SubscriptionSettings{ID: 1, EnableLimitations: false}
|
settings = models.SubscriptionSettings{ID: 1, EnableLimitations: false}
|
||||||
if err := h.db.Create(&settings).Error; err != nil {
|
if err := h.db.WithContext(ctx).Create(&settings).Error; err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to create default settings"})
|
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to create default settings"})
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -42,6 +55,10 @@ func (h *AdminLimitationsHandler) GetSettings(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if h.cache != nil {
|
||||||
|
_ = h.cache.CacheSubscriptionSettings(ctx, &settings)
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, LimitationsSettingsResponse{
|
return c.JSON(http.StatusOK, LimitationsSettingsResponse{
|
||||||
EnableLimitations: settings.EnableLimitations,
|
EnableLimitations: settings.EnableLimitations,
|
||||||
})
|
})
|
||||||
@@ -60,7 +77,8 @@ func (h *AdminLimitationsHandler) UpdateSettings(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var settings models.SubscriptionSettings
|
var settings models.SubscriptionSettings
|
||||||
if err := h.db.First(&settings, 1).Error; err != nil {
|
ctx := c.Request().Context()
|
||||||
|
if err := h.db.WithContext(ctx).First(&settings, 1).Error; err != nil {
|
||||||
if err == gorm.ErrRecordNotFound {
|
if err == gorm.ErrRecordNotFound {
|
||||||
settings = models.SubscriptionSettings{ID: 1}
|
settings = models.SubscriptionSettings{ID: 1}
|
||||||
} else {
|
} else {
|
||||||
@@ -72,10 +90,15 @@ func (h *AdminLimitationsHandler) UpdateSettings(c echo.Context) error {
|
|||||||
settings.EnableLimitations = *req.EnableLimitations
|
settings.EnableLimitations = *req.EnableLimitations
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := h.db.Save(&settings).Error; err != nil {
|
if err := h.db.WithContext(ctx).Save(&settings).Error; err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to update settings"})
|
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to update settings"})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Invalidate the cache so the new value is visible to all pods.
|
||||||
|
if h.cache != nil {
|
||||||
|
_ = h.cache.InvalidateSubscriptionSettings(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, LimitationsSettingsResponse{
|
return c.JSON(http.StatusOK, LimitationsSettingsResponse{
|
||||||
EnableLimitations: settings.EnableLimitations,
|
EnableLimitations: settings.EnableLimitations,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,159 +1,28 @@
|
|||||||
|
// password_reset_code_handler is a stub — the user_passwordresetcode table
|
||||||
|
// was dropped in the Ory Kratos migration (phase 2). Password resets are now
|
||||||
|
// handled by Kratos.
|
||||||
package handlers
|
package handlers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
|
|
||||||
"github.com/treytartt/honeydue-api/internal/admin/dto"
|
|
||||||
"github.com/treytartt/honeydue-api/internal/models"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// AdminPasswordResetCodeHandler handles admin password reset code management endpoints
|
// AdminPasswordResetCodeHandler is a no-op stub.
|
||||||
type AdminPasswordResetCodeHandler struct {
|
type AdminPasswordResetCodeHandler struct {
|
||||||
db *gorm.DB
|
db *gorm.DB
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAdminPasswordResetCodeHandler creates a new admin password reset code handler
|
|
||||||
func NewAdminPasswordResetCodeHandler(db *gorm.DB) *AdminPasswordResetCodeHandler {
|
func NewAdminPasswordResetCodeHandler(db *gorm.DB) *AdminPasswordResetCodeHandler {
|
||||||
return &AdminPasswordResetCodeHandler{db: db}
|
return &AdminPasswordResetCodeHandler{db: db}
|
||||||
}
|
}
|
||||||
|
|
||||||
// PasswordResetCodeResponse represents a password reset code in API responses
|
func (h *AdminPasswordResetCodeHandler) gone(c echo.Context) error {
|
||||||
type PasswordResetCodeResponse struct {
|
return c.JSON(http.StatusGone, map[string]string{"message": "password reset codes are managed by Ory Kratos"})
|
||||||
ID uint `json:"id"`
|
|
||||||
UserID uint `json:"user_id"`
|
|
||||||
Username string `json:"username"`
|
|
||||||
Email string `json:"email"`
|
|
||||||
ResetToken string `json:"reset_token"`
|
|
||||||
ExpiresAt string `json:"expires_at"`
|
|
||||||
Used bool `json:"used"`
|
|
||||||
Attempts int `json:"attempts"`
|
|
||||||
MaxAttempts int `json:"max_attempts"`
|
|
||||||
CreatedAt string `json:"created_at"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// List handles GET /api/admin/password-reset-codes
|
|
||||||
func (h *AdminPasswordResetCodeHandler) List(c echo.Context) error {
|
|
||||||
var filters dto.PaginationParams
|
|
||||||
if err := c.Bind(&filters); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var codes []models.PasswordResetCode
|
|
||||||
var total int64
|
|
||||||
|
|
||||||
query := h.db.Model(&models.PasswordResetCode{}).Preload("User")
|
|
||||||
|
|
||||||
// Apply search (search by user info or token)
|
|
||||||
if filters.Search != "" {
|
|
||||||
search := "%" + filters.Search + "%"
|
|
||||||
query = query.Joins("JOIN auth_user ON auth_user.id = user_passwordresetcode.user_id").
|
|
||||||
Where(
|
|
||||||
"auth_user.username ILIKE ? OR auth_user.email ILIKE ? OR user_passwordresetcode.reset_token ILIKE ?",
|
|
||||||
search, search, search,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get total count
|
|
||||||
query.Count(&total)
|
|
||||||
|
|
||||||
// Apply sorting (allowlist prevents SQL injection via sort_by parameter)
|
|
||||||
sortBy := filters.GetSafeSortBy([]string{
|
|
||||||
"id", "user_id", "created_at", "expires_at", "used",
|
|
||||||
}, "created_at")
|
|
||||||
query = query.Order(sortBy + " " + filters.GetSortDir())
|
|
||||||
|
|
||||||
// Apply pagination
|
|
||||||
query = query.Offset(filters.GetOffset()).Limit(filters.GetPerPage())
|
|
||||||
|
|
||||||
if err := query.Find(&codes).Error; err != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch password reset codes"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build response
|
|
||||||
responses := make([]PasswordResetCodeResponse, len(codes))
|
|
||||||
for i, code := range codes {
|
|
||||||
responses[i] = PasswordResetCodeResponse{
|
|
||||||
ID: code.ID,
|
|
||||||
UserID: code.UserID,
|
|
||||||
Username: code.User.Username,
|
|
||||||
Email: code.User.Email,
|
|
||||||
ResetToken: code.ResetToken[:8] + "..." + code.ResetToken[len(code.ResetToken)-4:], // Truncate for display
|
|
||||||
ExpiresAt: code.ExpiresAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
Used: code.Used,
|
|
||||||
Attempts: code.Attempts,
|
|
||||||
MaxAttempts: code.MaxAttempts,
|
|
||||||
CreatedAt: code.CreatedAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, dto.NewPaginatedResponse(responses, total, filters.GetPage(), filters.GetPerPage()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get handles GET /api/admin/password-reset-codes/:id
|
|
||||||
func (h *AdminPasswordResetCodeHandler) Get(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
var code models.PasswordResetCode
|
|
||||||
if err := h.db.Preload("User").First(&code, id).Error; err != nil {
|
|
||||||
if err == gorm.ErrRecordNotFound {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Password reset code not found"})
|
|
||||||
}
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to fetch password reset code"})
|
|
||||||
}
|
|
||||||
|
|
||||||
response := PasswordResetCodeResponse{
|
|
||||||
ID: code.ID,
|
|
||||||
UserID: code.UserID,
|
|
||||||
Username: code.User.Username,
|
|
||||||
Email: code.User.Email,
|
|
||||||
ResetToken: code.ResetToken[:8] + "..." + code.ResetToken[len(code.ResetToken)-4:],
|
|
||||||
ExpiresAt: code.ExpiresAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
Used: code.Used,
|
|
||||||
Attempts: code.Attempts,
|
|
||||||
MaxAttempts: code.MaxAttempts,
|
|
||||||
CreatedAt: code.CreatedAt.Format("2006-01-02T15:04:05Z"),
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete handles DELETE /api/admin/password-reset-codes/:id
|
|
||||||
func (h *AdminPasswordResetCodeHandler) Delete(c echo.Context) error {
|
|
||||||
id, err := strconv.ParseUint(c.Param("id"), 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid ID"})
|
|
||||||
}
|
|
||||||
|
|
||||||
result := h.db.Delete(&models.PasswordResetCode{}, id)
|
|
||||||
if result.Error != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to delete password reset code"})
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.RowsAffected == 0 {
|
|
||||||
return c.JSON(http.StatusNotFound, map[string]interface{}{"error": "Password reset code not found"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Password reset code deleted successfully"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// BulkDelete handles DELETE /api/admin/password-reset-codes/bulk
|
|
||||||
func (h *AdminPasswordResetCodeHandler) BulkDelete(c echo.Context) error {
|
|
||||||
var req dto.BulkDeleteRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, map[string]interface{}{"error": "Invalid request body"})
|
|
||||||
}
|
|
||||||
|
|
||||||
result := h.db.Where("id IN ?", req.IDs).Delete(&models.PasswordResetCode{})
|
|
||||||
if result.Error != nil {
|
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to delete password reset codes"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]interface{}{"message": "Password reset codes deleted successfully", "count": result.RowsAffected})
|
|
||||||
}
|
}
|
||||||
|
func (h *AdminPasswordResetCodeHandler) List(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminPasswordResetCodeHandler) Get(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminPasswordResetCodeHandler) Delete(c echo.Context) error { return h.gone(c) }
|
||||||
|
func (h *AdminPasswordResetCodeHandler) BulkDelete(c echo.Context) error { return h.gone(c) }
|
||||||
|
|||||||
@@ -19,11 +19,13 @@ import (
|
|||||||
// AdminSettingsHandler handles system settings management
|
// AdminSettingsHandler handles system settings management
|
||||||
type AdminSettingsHandler struct {
|
type AdminSettingsHandler struct {
|
||||||
db *gorm.DB
|
db *gorm.DB
|
||||||
|
cache *services.CacheService
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAdminSettingsHandler creates a new handler
|
// NewAdminSettingsHandler creates a new handler. The cache may be nil; the
|
||||||
func NewAdminSettingsHandler(db *gorm.DB) *AdminSettingsHandler {
|
// handler falls through to direct DB reads in that case.
|
||||||
return &AdminSettingsHandler{db: db}
|
func NewAdminSettingsHandler(db *gorm.DB, cache *services.CacheService) *AdminSettingsHandler {
|
||||||
|
return &AdminSettingsHandler{db: db, cache: cache}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SettingsResponse represents the settings response
|
// SettingsResponse represents the settings response
|
||||||
@@ -34,10 +36,29 @@ type SettingsResponse struct {
|
|||||||
TrialDurationDays int `json:"trial_duration_days"`
|
TrialDurationDays int `json:"trial_duration_days"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetSettings handles GET /api/admin/settings
|
// GetSettings handles GET /api/admin/settings.
|
||||||
|
//
|
||||||
|
// Reads through Redis (30-min TTL) before hitting Postgres so the same
|
||||||
|
// row that's checked on every authed request and every monitoring poll
|
||||||
|
// stays hot. Cache miss / first boot creates and caches the default row.
|
||||||
func (h *AdminSettingsHandler) GetSettings(c echo.Context) error {
|
func (h *AdminSettingsHandler) GetSettings(c echo.Context) error {
|
||||||
|
ctx := c.Request().Context()
|
||||||
|
|
||||||
|
// Try cache first.
|
||||||
|
if h.cache != nil {
|
||||||
|
var cached models.SubscriptionSettings
|
||||||
|
if err := h.cache.GetCachedSubscriptionSettings(ctx, &cached); err == nil {
|
||||||
|
return c.JSON(http.StatusOK, SettingsResponse{
|
||||||
|
EnableLimitations: cached.EnableLimitations,
|
||||||
|
EnableMonitoring: cached.EnableMonitoring,
|
||||||
|
TrialEnabled: cached.TrialEnabled,
|
||||||
|
TrialDurationDays: cached.TrialDurationDays,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var settings models.SubscriptionSettings
|
var settings models.SubscriptionSettings
|
||||||
if err := h.db.First(&settings, 1).Error; err != nil {
|
if err := h.db.WithContext(ctx).First(&settings, 1).Error; err != nil {
|
||||||
if err == gorm.ErrRecordNotFound {
|
if err == gorm.ErrRecordNotFound {
|
||||||
// Create default settings
|
// Create default settings
|
||||||
settings = models.SubscriptionSettings{
|
settings = models.SubscriptionSettings{
|
||||||
@@ -47,7 +68,7 @@ func (h *AdminSettingsHandler) GetSettings(c echo.Context) error {
|
|||||||
TrialEnabled: true,
|
TrialEnabled: true,
|
||||||
TrialDurationDays: 14,
|
TrialDurationDays: 14,
|
||||||
}
|
}
|
||||||
if err := h.db.Create(&settings).Error; err != nil {
|
if err := h.db.WithContext(ctx).Create(&settings).Error; err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to create default settings"})
|
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to create default settings"})
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -55,6 +76,10 @@ func (h *AdminSettingsHandler) GetSettings(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if h.cache != nil {
|
||||||
|
_ = h.cache.CacheSubscriptionSettings(ctx, &settings)
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, SettingsResponse{
|
return c.JSON(http.StatusOK, SettingsResponse{
|
||||||
EnableLimitations: settings.EnableLimitations,
|
EnableLimitations: settings.EnableLimitations,
|
||||||
EnableMonitoring: settings.EnableMonitoring,
|
EnableMonitoring: settings.EnableMonitoring,
|
||||||
@@ -79,7 +104,7 @@ func (h *AdminSettingsHandler) UpdateSettings(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var settings models.SubscriptionSettings
|
var settings models.SubscriptionSettings
|
||||||
if err := h.db.First(&settings, 1).Error; err != nil {
|
if err := h.db.WithContext(c.Request().Context()).First(&settings, 1).Error; err != nil {
|
||||||
if err == gorm.ErrRecordNotFound {
|
if err == gorm.ErrRecordNotFound {
|
||||||
settings = models.SubscriptionSettings{
|
settings = models.SubscriptionSettings{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
@@ -108,10 +133,16 @@ func (h *AdminSettingsHandler) UpdateSettings(c echo.Context) error {
|
|||||||
settings.TrialDurationDays = *req.TrialDurationDays
|
settings.TrialDurationDays = *req.TrialDurationDays
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := h.db.Save(&settings).Error; err != nil {
|
if err := h.db.WithContext(c.Request().Context()).Save(&settings).Error; err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to update settings"})
|
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to update settings"})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Invalidate the cache so all pods pick up the new value on their
|
||||||
|
// next read (instead of waiting for the 30-min TTL).
|
||||||
|
if h.cache != nil {
|
||||||
|
_ = h.cache.InvalidateSubscriptionSettings(c.Request().Context())
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, SettingsResponse{
|
return c.JSON(http.StatusOK, SettingsResponse{
|
||||||
EnableLimitations: settings.EnableLimitations,
|
EnableLimitations: settings.EnableLimitations,
|
||||||
EnableMonitoring: settings.EnableMonitoring,
|
EnableMonitoring: settings.EnableMonitoring,
|
||||||
@@ -217,137 +248,20 @@ func (h *AdminSettingsHandler) cacheAllLookups(ctx context.Context) (bool, error
|
|||||||
}
|
}
|
||||||
log.Debug().Int("count", len(taskTemplates)).Msg("Cached task templates")
|
log.Debug().Int("count", len(taskTemplates)).Msg("Cached task templates")
|
||||||
|
|
||||||
// Build and cache the unified seeded data response
|
// Invalidate the unified seeded-data cache for every locale. The combined
|
||||||
// Import the grouped response type
|
// response is localized (lookup display_name + home-profile options) and is
|
||||||
seededData := map[string]interface{}{
|
// rebuilt per-locale on demand by the static_data handler, so the correct
|
||||||
"residence_types": residenceTypes,
|
// action after a lookup change is to clear all language variants rather than
|
||||||
"task_categories": categories,
|
// pre-warm a single (non-localized) blob.
|
||||||
"task_priorities": priorities,
|
if err := cache.InvalidateSeededData(ctx); err != nil {
|
||||||
"task_frequencies": frequencies,
|
return false, fmt.Errorf("failed to invalidate seeded data: %w", err)
|
||||||
"contractor_specialties": specialties,
|
|
||||||
"task_templates": buildGroupedTemplates(taskTemplates),
|
|
||||||
}
|
}
|
||||||
|
log.Debug().Msg("Invalidated per-locale seeded data cache")
|
||||||
etag, err := cache.CacheSeededData(ctx, seededData)
|
|
||||||
if err != nil {
|
|
||||||
return false, fmt.Errorf("failed to cache seeded data: %w", err)
|
|
||||||
}
|
|
||||||
log.Debug().Str("etag", etag).Msg("Cached unified seeded data")
|
|
||||||
|
|
||||||
log.Info().Msg("All lookup data cached in Redis successfully")
|
log.Info().Msg("All lookup data cached in Redis successfully")
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// buildGroupedTemplates groups task templates by category for the seeded data response
|
|
||||||
func buildGroupedTemplates(templates []models.TaskTemplate) map[string]interface{} {
|
|
||||||
type templateResponse struct {
|
|
||||||
ID uint `json:"id"`
|
|
||||||
Title string `json:"title"`
|
|
||||||
Description string `json:"description"`
|
|
||||||
CategoryID *uint `json:"category_id"`
|
|
||||||
Category map[string]interface{} `json:"category,omitempty"`
|
|
||||||
FrequencyID *uint `json:"frequency_id"`
|
|
||||||
Frequency map[string]interface{} `json:"frequency,omitempty"`
|
|
||||||
IconIOS string `json:"icon_ios"`
|
|
||||||
IconAndroid string `json:"icon_android"`
|
|
||||||
Tags []string `json:"tags"`
|
|
||||||
DisplayOrder int `json:"display_order"`
|
|
||||||
IsActive bool `json:"is_active"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type categoryGroup struct {
|
|
||||||
CategoryName string `json:"category_name"`
|
|
||||||
CategoryID *uint `json:"category_id"`
|
|
||||||
Templates []templateResponse `json:"templates"`
|
|
||||||
Count int `json:"count"`
|
|
||||||
}
|
|
||||||
|
|
||||||
categoryMap := make(map[string]*categoryGroup)
|
|
||||||
categoryOrder := []string{}
|
|
||||||
|
|
||||||
for _, t := range templates {
|
|
||||||
categoryName := "Uncategorized"
|
|
||||||
var categoryID *uint
|
|
||||||
if t.Category != nil {
|
|
||||||
categoryName = t.Category.Name
|
|
||||||
categoryID = &t.Category.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, exists := categoryMap[categoryName]; !exists {
|
|
||||||
categoryMap[categoryName] = &categoryGroup{
|
|
||||||
CategoryName: categoryName,
|
|
||||||
CategoryID: categoryID,
|
|
||||||
Templates: []templateResponse{},
|
|
||||||
}
|
|
||||||
categoryOrder = append(categoryOrder, categoryName)
|
|
||||||
}
|
|
||||||
|
|
||||||
resp := templateResponse{
|
|
||||||
ID: t.ID,
|
|
||||||
Title: t.Title,
|
|
||||||
Description: t.Description,
|
|
||||||
CategoryID: t.CategoryID,
|
|
||||||
FrequencyID: t.FrequencyID,
|
|
||||||
IconIOS: t.IconIOS,
|
|
||||||
IconAndroid: t.IconAndroid,
|
|
||||||
Tags: parseTags(t.Tags),
|
|
||||||
DisplayOrder: t.DisplayOrder,
|
|
||||||
IsActive: t.IsActive,
|
|
||||||
}
|
|
||||||
|
|
||||||
if t.Category != nil {
|
|
||||||
resp.Category = map[string]interface{}{
|
|
||||||
"id": t.Category.ID,
|
|
||||||
"name": t.Category.Name,
|
|
||||||
"description": t.Category.Description,
|
|
||||||
"icon": t.Category.Icon,
|
|
||||||
"color": t.Category.Color,
|
|
||||||
"display_order": t.Category.DisplayOrder,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if t.Frequency != nil {
|
|
||||||
resp.Frequency = map[string]interface{}{
|
|
||||||
"id": t.Frequency.ID,
|
|
||||||
"name": t.Frequency.Name,
|
|
||||||
"days": t.Frequency.Days,
|
|
||||||
"display_order": t.Frequency.DisplayOrder,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
categoryMap[categoryName].Templates = append(categoryMap[categoryName].Templates, resp)
|
|
||||||
}
|
|
||||||
|
|
||||||
categories := make([]categoryGroup, len(categoryOrder))
|
|
||||||
totalCount := 0
|
|
||||||
for i, name := range categoryOrder {
|
|
||||||
group := categoryMap[name]
|
|
||||||
group.Count = len(group.Templates)
|
|
||||||
totalCount += group.Count
|
|
||||||
categories[i] = *group
|
|
||||||
}
|
|
||||||
|
|
||||||
return map[string]interface{}{
|
|
||||||
"categories": categories,
|
|
||||||
"total_count": totalCount,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseTags splits a comma-separated tags string into a slice
|
|
||||||
func parseTags(tags string) []string {
|
|
||||||
if tags == "" {
|
|
||||||
return []string{}
|
|
||||||
}
|
|
||||||
parts := strings.Split(tags, ",")
|
|
||||||
result := make([]string, 0, len(parts))
|
|
||||||
for _, p := range parts {
|
|
||||||
trimmed := strings.TrimSpace(p)
|
|
||||||
if trimmed != "" {
|
|
||||||
result = append(result, trimmed)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// SeedTestData handles POST /api/admin/settings/seed-test-data
|
// SeedTestData handles POST /api/admin/settings/seed-test-data
|
||||||
func (h *AdminSettingsHandler) SeedTestData(c echo.Context) error {
|
func (h *AdminSettingsHandler) SeedTestData(c echo.Context) error {
|
||||||
if err := h.runSeedFile("002_test_data.sql"); err != nil {
|
if err := h.runSeedFile("002_test_data.sql"); err != nil {
|
||||||
|
|||||||
@@ -207,9 +207,7 @@ func (h *AdminUserHandler) Create(c echo.Context) error {
|
|||||||
user.IsSuperuser = *req.IsSuperuser
|
user.IsSuperuser = *req.IsSuperuser
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := user.SetPassword(req.Password); err != nil {
|
// Password management is handled by Ory Kratos; no local password hashing.
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to hash password"})
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := h.db.Create(&user).Error; err != nil {
|
if err := h.db.Create(&user).Error; err != nil {
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to create user"})
|
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to create user"})
|
||||||
@@ -284,10 +282,9 @@ func (h *AdminUserHandler) Update(c echo.Context) error {
|
|||||||
if req.IsSuperuser != nil {
|
if req.IsSuperuser != nil {
|
||||||
user.IsSuperuser = *req.IsSuperuser
|
user.IsSuperuser = *req.IsSuperuser
|
||||||
}
|
}
|
||||||
|
// Password management is handled by Ory Kratos; local password update ignored.
|
||||||
if req.Password != nil {
|
if req.Password != nil {
|
||||||
if err := user.SetPassword(*req.Password); err != nil {
|
_ = req.Password // Password changes must go through Kratos admin API
|
||||||
return c.JSON(http.StatusInternalServerError, map[string]interface{}{"error": "Failed to hash password"})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := h.db.Save(&user).Error; err != nil {
|
if err := h.db.Save(&user).Error; err != nil {
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ type Dependencies struct {
|
|||||||
PushClient *push.Client
|
PushClient *push.Client
|
||||||
OnboardingService *services.OnboardingEmailService
|
OnboardingService *services.OnboardingEmailService
|
||||||
MonitoringHandler *monitoring.Handler
|
MonitoringHandler *monitoring.Handler
|
||||||
|
CacheService *services.CacheService
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetupRoutes configures all admin routes
|
// SetupRoutes configures all admin routes
|
||||||
@@ -380,7 +381,7 @@ func SetupRoutes(router *echo.Echo, db *gorm.DB, cfg *config.Config, deps *Depen
|
|||||||
}
|
}
|
||||||
|
|
||||||
// System settings management (super admin only)
|
// System settings management (super admin only)
|
||||||
settingsHandler := handlers.NewAdminSettingsHandler(db)
|
settingsHandler := handlers.NewAdminSettingsHandler(db, deps.CacheService)
|
||||||
settings := protected.Group("/settings")
|
settings := protected.Group("/settings")
|
||||||
settings.Use(middleware.RequireSuperAdmin())
|
settings.Use(middleware.RequireSuperAdmin())
|
||||||
{
|
{
|
||||||
@@ -394,7 +395,7 @@ func SetupRoutes(router *echo.Echo, db *gorm.DB, cfg *config.Config, deps *Depen
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Limitations management (tier limits, upgrade triggers)
|
// Limitations management (tier limits, upgrade triggers)
|
||||||
limitationsHandler := handlers.NewAdminLimitationsHandler(db)
|
limitationsHandler := handlers.NewAdminLimitationsHandler(db, deps.CacheService)
|
||||||
limitations := protected.Group("/limitations")
|
limitations := protected.Group("/limitations")
|
||||||
{
|
{
|
||||||
// Settings (enable_limitations toggle)
|
// Settings (enable_limitations toggle)
|
||||||
|
|||||||
+106
-24
@@ -1,6 +1,7 @@
|
|||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/rand"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
@@ -52,6 +53,7 @@ type DatabaseConfig struct {
|
|||||||
MaxOpenConns int
|
MaxOpenConns int
|
||||||
MaxIdleConns int
|
MaxIdleConns int
|
||||||
MaxLifetime time.Duration
|
MaxLifetime time.Duration
|
||||||
|
MaxIdleTime time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
type RedisConfig struct {
|
type RedisConfig struct {
|
||||||
@@ -88,8 +90,12 @@ type PushConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type AppleAuthConfig struct {
|
type AppleAuthConfig struct {
|
||||||
ClientID string // Bundle ID (e.g., com.tt.honeyDue.honeyDueDev)
|
ClientID string // Bundle ID, used as the `aud` claim in Sign in with Apple identity tokens
|
||||||
TeamID string // Apple Developer Team ID
|
// TeamID is currently unused — services/apple_auth.go validates identity tokens
|
||||||
|
// against ClientID + Apple's JWKS only, with no server-to-server REST calls.
|
||||||
|
// Wire this in if/when token revocation or refresh-token exchange is added,
|
||||||
|
// since both require signing a client_secret JWT with team_id + key_id.
|
||||||
|
TeamID string
|
||||||
}
|
}
|
||||||
|
|
||||||
type GoogleAuthConfig struct {
|
type GoogleAuthConfig struct {
|
||||||
@@ -136,6 +142,13 @@ type SecurityConfig struct {
|
|||||||
MaxPasswordResetRate int // per hour
|
MaxPasswordResetRate int // per hour
|
||||||
TokenExpiryDays int // Number of days before auth tokens expire (default 90)
|
TokenExpiryDays int // Number of days before auth tokens expire (default 90)
|
||||||
TokenRefreshDays int // Token must be at least this many days old before refresh (default 60)
|
TokenRefreshDays int // Token must be at least this many days old before refresh (default 60)
|
||||||
|
// KratosPublicURL is the Ory Kratos public API base URL. The auth
|
||||||
|
// middleware validates sessions against {KratosPublicURL}/sessions/whoami.
|
||||||
|
KratosPublicURL string
|
||||||
|
// KratosAdminURL is the Ory Kratos admin API base URL. Account deletion
|
||||||
|
// removes the user's Kratos identity via
|
||||||
|
// {KratosAdminURL}/admin/identities/{id}.
|
||||||
|
KratosAdminURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
// StorageConfig holds file storage settings.
|
// StorageConfig holds file storage settings.
|
||||||
@@ -178,7 +191,7 @@ type FeatureFlags struct {
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
cfg *Config
|
cfg *Config
|
||||||
cfgOnce sync.Once
|
cfgMu sync.Mutex
|
||||||
)
|
)
|
||||||
|
|
||||||
// knownWeakSecretKeys contains well-known default or placeholder secret keys
|
// knownWeakSecretKeys contains well-known default or placeholder secret keys
|
||||||
@@ -191,11 +204,19 @@ var knownWeakSecretKeys = map[string]bool{
|
|||||||
"change-me-in-production-secret-key-12345": true,
|
"change-me-in-production-secret-key-12345": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load reads configuration from environment variables
|
// Load reads configuration from environment variables.
|
||||||
|
//
|
||||||
|
// Caches the result so repeated calls are cheap. On validation failure, the
|
||||||
|
// cache stays nil so a subsequent call (after env is corrected) can retry. The
|
||||||
|
// previous implementation used sync.Once with an in-Do reset of the Once
|
||||||
|
// itself, which races and panics with "sync: unlock of unlocked mutex".
|
||||||
func Load() (*Config, error) {
|
func Load() (*Config, error) {
|
||||||
var loadErr error
|
cfgMu.Lock()
|
||||||
|
defer cfgMu.Unlock()
|
||||||
|
if cfg != nil {
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
|
||||||
cfgOnce.Do(func() {
|
|
||||||
viper.SetEnvPrefix("")
|
viper.SetEnvPrefix("")
|
||||||
viper.AutomaticEnv()
|
viper.AutomaticEnv()
|
||||||
viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_"))
|
viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_"))
|
||||||
@@ -203,6 +224,11 @@ func Load() (*Config, error) {
|
|||||||
// Set defaults
|
// Set defaults
|
||||||
setDefaults()
|
setDefaults()
|
||||||
|
|
||||||
|
// Audit F8: overlay file-mounted secrets onto Viper. No-op when the
|
||||||
|
// directory is absent (local/dev), so this is safe to ship before the
|
||||||
|
// manifests mount honeydue-secrets as a volume.
|
||||||
|
loadFileSecrets()
|
||||||
|
|
||||||
// Parse DATABASE_URL if set (Dokku-style)
|
// Parse DATABASE_URL if set (Dokku-style)
|
||||||
dbConfig := DatabaseConfig{
|
dbConfig := DatabaseConfig{
|
||||||
Host: viper.GetString("DB_HOST"),
|
Host: viper.GetString("DB_HOST"),
|
||||||
@@ -214,6 +240,7 @@ func Load() (*Config, error) {
|
|||||||
MaxOpenConns: viper.GetInt("DB_MAX_OPEN_CONNS"),
|
MaxOpenConns: viper.GetInt("DB_MAX_OPEN_CONNS"),
|
||||||
MaxIdleConns: viper.GetInt("DB_MAX_IDLE_CONNS"),
|
MaxIdleConns: viper.GetInt("DB_MAX_IDLE_CONNS"),
|
||||||
MaxLifetime: viper.GetDuration("DB_MAX_LIFETIME"),
|
MaxLifetime: viper.GetDuration("DB_MAX_LIFETIME"),
|
||||||
|
MaxIdleTime: viper.GetDuration("DB_MAX_IDLE_TIME"),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Override with DATABASE_URL if present (F-16: log warning on parse failure)
|
// Override with DATABASE_URL if present (F-16: log warning on parse failure)
|
||||||
@@ -234,7 +261,7 @@ func Load() (*Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg = &Config{
|
c := &Config{
|
||||||
Server: ServerConfig{
|
Server: ServerConfig{
|
||||||
Port: viper.GetInt("PORT"),
|
Port: viper.GetInt("PORT"),
|
||||||
Debug: viper.GetBool("DEBUG"),
|
Debug: viper.GetBool("DEBUG"),
|
||||||
@@ -284,6 +311,8 @@ func Load() (*Config, error) {
|
|||||||
MaxPasswordResetRate: 3,
|
MaxPasswordResetRate: 3,
|
||||||
TokenExpiryDays: viper.GetInt("TOKEN_EXPIRY_DAYS"),
|
TokenExpiryDays: viper.GetInt("TOKEN_EXPIRY_DAYS"),
|
||||||
TokenRefreshDays: viper.GetInt("TOKEN_REFRESH_DAYS"),
|
TokenRefreshDays: viper.GetInt("TOKEN_REFRESH_DAYS"),
|
||||||
|
KratosPublicURL: viper.GetString("KRATOS_PUBLIC_URL"),
|
||||||
|
KratosAdminURL: viper.GetString("KRATOS_ADMIN_URL"),
|
||||||
},
|
},
|
||||||
Storage: StorageConfig{
|
Storage: StorageConfig{
|
||||||
UploadDir: viper.GetString("STORAGE_UPLOAD_DIR"),
|
UploadDir: viper.GetString("STORAGE_UPLOAD_DIR"),
|
||||||
@@ -334,19 +363,11 @@ func Load() (*Config, error) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate required fields
|
if err := validate(c); err != nil {
|
||||||
if err := validate(cfg); err != nil {
|
// Leave cfg nil so the next Load() retries after env is corrected.
|
||||||
loadErr = err
|
return nil, err
|
||||||
// Reset so a subsequent call can retry after env is fixed
|
|
||||||
cfg = nil
|
|
||||||
cfgOnce = sync.Once{}
|
|
||||||
}
|
}
|
||||||
})
|
cfg = c
|
||||||
|
|
||||||
if loadErr != nil {
|
|
||||||
return nil, loadErr
|
|
||||||
}
|
|
||||||
|
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -399,6 +420,8 @@ func setDefaults() {
|
|||||||
|
|
||||||
// Token expiry defaults
|
// Token expiry defaults
|
||||||
viper.SetDefault("TOKEN_EXPIRY_DAYS", 90) // Tokens expire after 90 days
|
viper.SetDefault("TOKEN_EXPIRY_DAYS", 90) // Tokens expire after 90 days
|
||||||
|
viper.SetDefault("KRATOS_PUBLIC_URL", "http://kratos:4433") // Ory Kratos public API
|
||||||
|
viper.SetDefault("KRATOS_ADMIN_URL", "http://kratos:4434") // Ory Kratos admin API
|
||||||
viper.SetDefault("TOKEN_REFRESH_DAYS", 60) // Tokens can be refreshed after 60 days
|
viper.SetDefault("TOKEN_REFRESH_DAYS", 60) // Tokens can be refreshed after 60 days
|
||||||
|
|
||||||
// Storage defaults
|
// Storage defaults
|
||||||
@@ -426,14 +449,67 @@ func isWeakSecretKey(key string) bool {
|
|||||||
return knownWeakSecretKeys[strings.ToLower(strings.TrimSpace(key))]
|
return knownWeakSecretKeys[strings.ToLower(strings.TrimSpace(key))]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// loadFileSecrets overlays file-mounted secrets onto Viper (audit F8). When
|
||||||
|
// the honeydue-secrets Secret is mounted as a volume at /etc/honeydue/secrets
|
||||||
|
// each key is a file; reading the value here and viper.Set-ing it (highest
|
||||||
|
// Viper precedence) keeps the secret out of the process environment
|
||||||
|
// (/proc/<pid>/environ), which plain env-var injection cannot. When the
|
||||||
|
// directory is absent it is a silent no-op and env vars are used as before.
|
||||||
|
func loadFileSecrets() {
|
||||||
|
dir := os.Getenv("HONEYDUE_SECRETS_DIR")
|
||||||
|
if dir == "" {
|
||||||
|
dir = "/etc/honeydue/secrets"
|
||||||
|
}
|
||||||
|
for _, k := range []string{
|
||||||
|
"POSTGRES_PASSWORD", "SECRET_KEY", "EMAIL_HOST_PASSWORD", "FCM_SERVER_KEY",
|
||||||
|
"REDIS_PASSWORD", "B2_KEY_ID", "B2_APP_KEY", "OBS_INGEST_TOKEN", "OBS_TRACES_URL",
|
||||||
|
} {
|
||||||
|
b, err := os.ReadFile(dir + "/" + k)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if v := strings.TrimSpace(string(b)); v != "" {
|
||||||
|
viper.Set(k, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SecretValue resolves a configuration value that is not part of the typed
|
||||||
|
// Config struct. It reads through Viper, so a value supplied via a file-mounted
|
||||||
|
// secret (audit F8, loaded by loadFileSecrets) is found just like an env var.
|
||||||
|
//
|
||||||
|
// Must be called after Load(). Used by cmd/api and cmd/worker for the
|
||||||
|
// observability endpoints, which are needed before the full Config is wired
|
||||||
|
// and would otherwise be read with os.Getenv — which misses file-mounted
|
||||||
|
// secrets entirely once F8 removes them from the process environment.
|
||||||
|
func SecretValue(key string) string {
|
||||||
|
return viper.GetString(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// randomHexKey returns a cryptographically secure random hex string
|
||||||
|
// representing n random bytes (2n hex characters).
|
||||||
|
func randomHexKey(n int) (string, error) {
|
||||||
|
b := make([]byte, n)
|
||||||
|
if _, err := rand.Read(b); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return hex.EncodeToString(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
func validate(cfg *Config) error {
|
func validate(cfg *Config) error {
|
||||||
// S-08: Validate SECRET_KEY against known weak defaults
|
// M8: SECRET_KEY validation — no static fallback secret in the binary.
|
||||||
if cfg.Security.SecretKey == "" {
|
if cfg.Security.SecretKey == "" {
|
||||||
if cfg.Server.Debug {
|
if cfg.Server.Debug {
|
||||||
// In debug mode, use a default key with a warning for local development
|
// Debug only: generate a random key per boot. Tokens signed with
|
||||||
cfg.Security.SecretKey = "change-me-in-production-secret-key-12345"
|
// it do not survive a restart, which is acceptable for local dev
|
||||||
fmt.Println("WARNING: SECRET_KEY not set, using default (debug mode only)")
|
// and far safer than a well-known hardcoded fallback.
|
||||||
fmt.Println("WARNING: *** DO NOT USE THIS DEFAULT KEY IN PRODUCTION ***")
|
randomKey, err := randomHexKey(32)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to generate ephemeral debug SECRET_KEY: %w", err)
|
||||||
|
}
|
||||||
|
cfg.Security.SecretKey = randomKey
|
||||||
|
fmt.Println("WARNING: SECRET_KEY not set, generated an ephemeral random key (debug mode only)")
|
||||||
|
fmt.Println("WARNING: tokens will not survive a restart — set SECRET_KEY for stable local sessions")
|
||||||
} else {
|
} else {
|
||||||
// In production, refuse to start without a proper secret key
|
// In production, refuse to start without a proper secret key
|
||||||
return fmt.Errorf("FATAL: SECRET_KEY environment variable is required in production (DEBUG=false)")
|
return fmt.Errorf("FATAL: SECRET_KEY environment variable is required in production (DEBUG=false)")
|
||||||
@@ -446,6 +522,12 @@ func validate(cfg *Config) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// C4: fixed confirmation codes ("123456") must never be enabled outside
|
||||||
|
// debug — with DEBUG=false they are a full authentication bypass.
|
||||||
|
if cfg.Server.DebugFixedCodes && !cfg.Server.Debug {
|
||||||
|
return fmt.Errorf("FATAL: DEBUG_FIXED_CODES is enabled with DEBUG=false — fixed confirmation codes must never run in production")
|
||||||
|
}
|
||||||
|
|
||||||
// Database password might come from DATABASE_URL, don't require it separately
|
// Database password might come from DATABASE_URL, don't require it separately
|
||||||
// The actual connection will fail if credentials are wrong
|
// The actual connection will fail if credentials are wrong
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/spf13/viper"
|
"github.com/spf13/viper"
|
||||||
@@ -11,8 +10,9 @@ import (
|
|||||||
|
|
||||||
// resetConfigState resets the package-level singleton so each test starts fresh.
|
// resetConfigState resets the package-level singleton so each test starts fresh.
|
||||||
func resetConfigState() {
|
func resetConfigState() {
|
||||||
|
cfgMu.Lock()
|
||||||
cfg = nil
|
cfg = nil
|
||||||
cfgOnce = sync.Once{}
|
cfgMu.Unlock()
|
||||||
viper.Reset()
|
viper.Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,8 +106,10 @@ func TestLoad_Validation_MissingSecretKey_DebugMode(t *testing.T) {
|
|||||||
|
|
||||||
c, err := Load()
|
c, err := Load()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
// In debug mode, a default key is assigned
|
// Audit M8: in debug mode an ephemeral random key is generated per boot
|
||||||
assert.Equal(t, "change-me-in-production-secret-key-12345", c.Security.SecretKey)
|
// (no static fallback). It must be a non-empty 64-char hex string.
|
||||||
|
assert.Len(t, c.Security.SecretKey, 64)
|
||||||
|
assert.NotEqual(t, "change-me-in-production-secret-key-12345", c.Security.SecretKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLoad_Validation_WeakSecretKey_Production(t *testing.T) {
|
func TestLoad_Validation_WeakSecretKey_Production(t *testing.T) {
|
||||||
@@ -133,6 +135,33 @@ func TestLoad_Validation_WeakSecretKey_DebugMode(t *testing.T) {
|
|||||||
assert.Equal(t, "secret", c.Security.SecretKey)
|
assert.Equal(t, "secret", c.Security.SecretKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Audit C4: DEBUG_FIXED_CODES makes confirmation codes a fixed "123456" — a
|
||||||
|
// full authentication bypass. With DEBUG=false, validate() must refuse to boot
|
||||||
|
// rather than ship that bypass to production.
|
||||||
|
func TestLoad_Validation_DebugFixedCodes_Production(t *testing.T) {
|
||||||
|
// validate() directly — avoids the sync.Once issue Load() has on failure.
|
||||||
|
cfg := &Config{
|
||||||
|
Server: ServerConfig{Debug: false, DebugFixedCodes: true},
|
||||||
|
Security: SecurityConfig{SecretKey: "a-strong-secret-key-for-tests"},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := validate(cfg)
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "DEBUG_FIXED_CODES")
|
||||||
|
}
|
||||||
|
|
||||||
|
// With DEBUG=true the fixed codes are an intended local-dev convenience, so
|
||||||
|
// the same combination must NOT error.
|
||||||
|
func TestLoad_Validation_DebugFixedCodes_DebugMode(t *testing.T) {
|
||||||
|
cfg := &Config{
|
||||||
|
Server: ServerConfig{Debug: true, DebugFixedCodes: true},
|
||||||
|
Security: SecurityConfig{SecretKey: "a-strong-secret-key-for-tests"},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := validate(cfg)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
func TestLoad_Validation_EncryptionKey_Valid(t *testing.T) {
|
func TestLoad_Validation_EncryptionKey_Valid(t *testing.T) {
|
||||||
resetConfigState()
|
resetConfigState()
|
||||||
t.Setenv("SECRET_KEY", "a-strong-secret-key-for-tests")
|
t.Setenv("SECRET_KEY", "a-strong-secret-key-for-tests")
|
||||||
|
|||||||
@@ -14,12 +14,10 @@ import (
|
|||||||
|
|
||||||
"github.com/treytartt/honeydue-api/internal/config"
|
"github.com/treytartt/honeydue-api/internal/config"
|
||||||
"github.com/treytartt/honeydue-api/internal/models"
|
"github.com/treytartt/honeydue-api/internal/models"
|
||||||
)
|
"github.com/treytartt/honeydue-api/internal/prom"
|
||||||
|
|
||||||
// migrationAdvisoryLockKey is the pg_advisory_lock key that serializes
|
"github.com/uptrace/opentelemetry-go-extra/otelgorm"
|
||||||
// Migrate() across API replicas booting in parallel. Value is arbitrary but
|
)
|
||||||
// stable ("hdmg" as bytes = honeydue migration).
|
|
||||||
const migrationAdvisoryLockKey int64 = 0x68646d67
|
|
||||||
|
|
||||||
// zerologGormWriter adapts zerolog for GORM's logger interface
|
// zerologGormWriter adapts zerolog for GORM's logger interface
|
||||||
type zerologGormWriter struct{}
|
type zerologGormWriter struct{}
|
||||||
@@ -68,25 +66,84 @@ func Connect(cfg *config.DatabaseConfig, debug bool) (*gorm.DB, error) {
|
|||||||
return nil, fmt.Errorf("failed to get underlying sql.DB: %w", err)
|
return nil, fmt.Errorf("failed to get underlying sql.DB: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure connection pool
|
// Configure connection pool. The Neon pooler endpoint keeps backend
|
||||||
|
// connections warm, so we keep our client-side pool warm too — that
|
||||||
|
// eliminates the ~440ms TCP+TLS+startup handshake on the first query
|
||||||
|
// after a cold pod / idle period.
|
||||||
sqlDB.SetMaxOpenConns(cfg.MaxOpenConns)
|
sqlDB.SetMaxOpenConns(cfg.MaxOpenConns)
|
||||||
sqlDB.SetMaxIdleConns(cfg.MaxIdleConns)
|
sqlDB.SetMaxIdleConns(cfg.MaxIdleConns)
|
||||||
sqlDB.SetConnMaxLifetime(cfg.MaxLifetime)
|
sqlDB.SetConnMaxLifetime(cfg.MaxLifetime)
|
||||||
|
if cfg.MaxIdleTime > 0 {
|
||||||
|
sqlDB.SetConnMaxIdleTime(cfg.MaxIdleTime)
|
||||||
|
}
|
||||||
|
// MaxIdleTime=0 means "never close idle" — the pool fills up to
|
||||||
|
// MaxIdleConns and they stay alive until MaxLifetime expires.
|
||||||
|
|
||||||
// Test connection
|
// Test connection
|
||||||
if err := sqlDB.Ping(); err != nil {
|
if err := sqlDB.Ping(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to ping database: %w", err)
|
return nil, fmt.Errorf("failed to ping database: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Eagerly warm the connection pool to MaxIdleConns. Without this, the
|
||||||
|
// first N user requests each pay the full handshake (~440ms over a
|
||||||
|
// transatlantic link). Pings are issued in parallel so warm-up is
|
||||||
|
// bounded by handshake time, not handshake-time × N.
|
||||||
|
warmUpPool(sqlDB, cfg.MaxIdleConns)
|
||||||
|
|
||||||
log.Info().
|
log.Info().
|
||||||
Str("host", cfg.Host).
|
Str("host", cfg.Host).
|
||||||
Int("port", cfg.Port).
|
Int("port", cfg.Port).
|
||||||
Str("database", cfg.Database).
|
Str("database", cfg.Database).
|
||||||
Msg("Connected to PostgreSQL database")
|
Msg("Connected to PostgreSQL database")
|
||||||
|
|
||||||
|
// Register Prometheus GORM callbacks — emits gorm_query_duration_seconds
|
||||||
|
// for every SQL operation. Operates at the statement level, so does not
|
||||||
|
// require ctx to be threaded through repositories.
|
||||||
|
if err := prom.RegisterGORMCallbacks(db); err != nil {
|
||||||
|
log.Warn().Err(err).Msg("failed to register prometheus GORM callbacks; metrics will be partial")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register otelgorm plugin — emits a span per SQL statement, attached to
|
||||||
|
// whatever trace context is set via db.WithContext(ctx). Repositories that
|
||||||
|
// have been migrated to use WithContext (see internal/repositories/*.go)
|
||||||
|
// will produce nested SQL spans inside the request trace; pre-migration
|
||||||
|
// repositories silently emit untraced queries.
|
||||||
|
if err := db.Use(otelgorm.NewPlugin(otelgorm.WithDBName(cfg.Database))); err != nil {
|
||||||
|
log.Warn().Err(err).Msg("failed to register otelgorm plugin; SQL spans disabled")
|
||||||
|
}
|
||||||
|
|
||||||
return db, nil
|
return db, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// warmUpPool issues N parallel pings so the pool fills with established
|
||||||
|
// connections before the first user request lands. Failures are logged but
|
||||||
|
// not fatal — the pool will fill on demand under traffic if pre-warm fails.
|
||||||
|
//
|
||||||
|
// On a transatlantic link to Neon (~110ms RTT, ~440ms cold handshake), this
|
||||||
|
// turns "first request pays the cold handshake" into "first request finds a
|
||||||
|
// warm pool" — at the cost of ~440ms during pod startup.
|
||||||
|
func warmUpPool(sqlDB interface {
|
||||||
|
PingContext(context.Context) error
|
||||||
|
}, n int) {
|
||||||
|
if n <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan error, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
go func() { done <- sqlDB.PingContext(ctx) }()
|
||||||
|
}
|
||||||
|
successes := 0
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
if err := <-done; err == nil {
|
||||||
|
successes++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Info().Int("requested", n).Int("warmed", successes).Msg("DB pool warm-up complete")
|
||||||
|
}
|
||||||
|
|
||||||
// Get returns the database instance
|
// Get returns the database instance
|
||||||
func Get() *gorm.DB {
|
func Get() *gorm.DB {
|
||||||
return db
|
return db
|
||||||
@@ -127,52 +184,46 @@ func Paginate(page, pageSize int) func(db *gorm.DB) *gorm.DB {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MigrateWithLock runs Migrate() under a Postgres session-level advisory lock
|
// RequireSchemaApplied verifies that goose's version table exists and has
|
||||||
// so that multiple API replicas booting in parallel don't race on AutoMigrate.
|
// at least one applied entry. This is the fail-fast that runs at api/worker
|
||||||
// On non-Postgres dialects (sqlite in tests) it falls through to Migrate().
|
// boot: if the operator forgot to run the migrate Job, the pod refuses to
|
||||||
func MigrateWithLock() error {
|
// start with a clear error instead of throwing mysterious "relation does
|
||||||
|
// not exist" errors deep in a request handler.
|
||||||
|
//
|
||||||
|
// On non-Postgres dialects (sqlite in tests) this is a no-op — tests use
|
||||||
|
// AutoMigrate via testutil.SetupTestDB to create a fresh schema per run.
|
||||||
|
// goose isn't involved in the test path.
|
||||||
|
func RequireSchemaApplied() error {
|
||||||
if db == nil {
|
if db == nil {
|
||||||
return fmt.Errorf("database not initialised")
|
return fmt.Errorf("database not initialised")
|
||||||
}
|
}
|
||||||
if db.Dialector.Name() != "postgres" {
|
if db.Dialector.Name() != "postgres" {
|
||||||
return Migrate()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
sqlDB, err := db.DB()
|
// goose_db_version stores one row per applied migration, not a single
|
||||||
|
// "current version" row — so we look for the highest version_id with
|
||||||
|
// is_applied=true. ORDER BY id DESC LIMIT 1 also catches the case where
|
||||||
|
// the table exists but is empty (no rows returned, scan leaves Version
|
||||||
|
// at zero).
|
||||||
|
type migrationRow struct {
|
||||||
|
VersionID int64 `gorm:"column:version_id"`
|
||||||
|
IsApplied bool `gorm:"column:is_applied"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var row migrationRow
|
||||||
|
err := db.Raw(`SELECT version_id, is_applied FROM goose_db_version ORDER BY id DESC LIMIT 1`).Scan(&row).Error
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("get underlying sql.DB: %w", err)
|
return fmt.Errorf("goose_db_version check failed (run the migrate Job to bootstrap): %w", err)
|
||||||
}
|
}
|
||||||
|
if !row.IsApplied {
|
||||||
// Give ourselves up to 5 min to acquire the lock — long enough for a
|
return fmt.Errorf("goose_db_version latest row is_applied=false at version=%d — last migration was rolled back or aborted; investigate before starting", row.VersionID)
|
||||||
// slow migration on a peer replica, short enough to fail fast if Postgres
|
|
||||||
// is hung.
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
conn, err := sqlDB.Conn(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("acquire dedicated migration connection: %w", err)
|
|
||||||
}
|
}
|
||||||
defer conn.Close()
|
if row.VersionID < 1 {
|
||||||
|
return fmt.Errorf("goose_db_version is empty — run goose up (or seed a row marking version 1 as applied if the schema already exists)")
|
||||||
log.Info().Int64("lock_key", migrationAdvisoryLockKey).Msg("Acquiring migration advisory lock...")
|
|
||||||
if _, err := conn.ExecContext(ctx, "SELECT pg_advisory_lock($1)", migrationAdvisoryLockKey); err != nil {
|
|
||||||
return fmt.Errorf("pg_advisory_lock: %w", err)
|
|
||||||
}
|
}
|
||||||
log.Info().Msg("Migration advisory lock acquired")
|
log.Info().Int64("schema_version", row.VersionID).Msg("Schema precondition satisfied")
|
||||||
|
return nil
|
||||||
defer func() {
|
|
||||||
// Unlock with a fresh context — the outer ctx may have expired.
|
|
||||||
unlockCtx, unlockCancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
||||||
defer unlockCancel()
|
|
||||||
if _, err := conn.ExecContext(unlockCtx, "SELECT pg_advisory_unlock($1)", migrationAdvisoryLockKey); err != nil {
|
|
||||||
log.Warn().Err(err).Msg("Failed to release migration advisory lock (session close will also release)")
|
|
||||||
} else {
|
|
||||||
log.Info().Msg("Migration advisory lock released")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
return Migrate()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Migrate runs database migrations for all models
|
// Migrate runs database migrations for all models
|
||||||
@@ -193,12 +244,7 @@ func Migrate() error {
|
|||||||
|
|
||||||
// User and auth tables
|
// User and auth tables
|
||||||
&models.User{},
|
&models.User{},
|
||||||
&models.AuthToken{},
|
|
||||||
&models.UserProfile{},
|
&models.UserProfile{},
|
||||||
&models.ConfirmationCode{},
|
|
||||||
&models.PasswordResetCode{},
|
|
||||||
&models.AppleSocialAuth{},
|
|
||||||
&models.GoogleSocialAuth{},
|
|
||||||
|
|
||||||
// Admin users (separate from app users)
|
// Admin users (separate from app users)
|
||||||
&models.AdminUser{},
|
&models.AdminUser{},
|
||||||
|
|||||||
@@ -25,7 +25,12 @@ type CreateDocumentRequest struct {
|
|||||||
SerialNumber string `json:"serial_number" validate:"max=100"`
|
SerialNumber string `json:"serial_number" validate:"max=100"`
|
||||||
ModelNumber string `json:"model_number" validate:"max=100"`
|
ModelNumber string `json:"model_number" validate:"max=100"`
|
||||||
TaskID *uint `json:"task_id"`
|
TaskID *uint `json:"task_id"`
|
||||||
ImageURLs []string `json:"image_urls" validate:"omitempty,max=20,dive,max=500"` // Multiple image URLs
|
// UploadIDs claims pending_uploads rows produced by the presigned-URL
|
||||||
|
// upload flow and turns them into document_image rows. UploadIDs of
|
||||||
|
// category "document_file" attach to the document's main FileURL +
|
||||||
|
// FileName fields instead — the service infers placement from the
|
||||||
|
// row's category.
|
||||||
|
UploadIDs []uint `json:"upload_ids" validate:"omitempty,max=20"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateDocumentRequest represents the request to update a document
|
// UpdateDocumentRequest represents the request to update a document
|
||||||
|
|||||||
@@ -100,14 +100,20 @@ type UpdateTaskRequest struct {
|
|||||||
ContractorID *uint `json:"contractor_id"`
|
ContractorID *uint `json:"contractor_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateTaskCompletionRequest represents the request to create a task completion
|
// CreateTaskCompletionRequest represents the request to create a task completion.
|
||||||
|
//
|
||||||
|
// Image attachments arrive via the presigned-URL flow: the client uploads
|
||||||
|
// each image directly to B2 (see /api/uploads/presign) and passes the
|
||||||
|
// resulting pending_uploads.id values in UploadIDs. The service claims
|
||||||
|
// those rows and creates the linked task_completion_image rows.
|
||||||
type CreateTaskCompletionRequest struct {
|
type CreateTaskCompletionRequest struct {
|
||||||
TaskID uint `json:"task_id" validate:"required"`
|
TaskID uint `json:"task_id" validate:"required"`
|
||||||
CompletedAt *time.Time `json:"completed_at"` // Defaults to now
|
CompletedAt *time.Time `json:"completed_at"` // Defaults to now
|
||||||
Notes string `json:"notes" validate:"max=10000"`
|
Notes string `json:"notes" validate:"max=10000"`
|
||||||
ActualCost *decimal.Decimal `json:"actual_cost"`
|
ActualCost *decimal.Decimal `json:"actual_cost"`
|
||||||
Rating *int `json:"rating" validate:"omitempty,min=1,max=5"` // 1-5 star rating
|
Rating *int `json:"rating" validate:"omitempty,min=1,max=5"` // 1-5 star rating
|
||||||
ImageURLs []string `json:"image_urls" validate:"omitempty,max=20,dive,max=500"` // Multiple image URLs
|
|
||||||
|
UploadIDs []uint `json:"upload_ids" validate:"omitempty,max=20"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateTaskCompletionRequest represents the request to update a task completion
|
// UpdateTaskCompletionRequest represents the request to update a task completion
|
||||||
@@ -115,7 +121,6 @@ type UpdateTaskCompletionRequest struct {
|
|||||||
Notes *string `json:"notes" validate:"omitempty,max=10000"`
|
Notes *string `json:"notes" validate:"omitempty,max=10000"`
|
||||||
ActualCost *decimal.Decimal `json:"actual_cost"`
|
ActualCost *decimal.Decimal `json:"actual_cost"`
|
||||||
Rating *int `json:"rating" validate:"omitempty,min=1,max=5"`
|
Rating *int `json:"rating" validate:"omitempty,min=1,max=5"`
|
||||||
ImageURLs []string `json:"image_urls" validate:"omitempty,max=20,dive,max=500"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// CompletionImageInput represents an image to add to a completion
|
// CompletionImageInput represents an image to add to a completion
|
||||||
|
|||||||
@@ -0,0 +1,22 @@
|
|||||||
|
package requests
|
||||||
|
|
||||||
|
// PresignUploadRequest is the body for POST /api/uploads/presign. The client
|
||||||
|
// describes what it's about to upload; the server validates against quota,
|
||||||
|
// rate limits, and per-category caps before returning a signed POST policy.
|
||||||
|
type PresignUploadRequest struct {
|
||||||
|
// Category gates allowed mime types and the size cap. One of:
|
||||||
|
// "completion" — task completion photos
|
||||||
|
// "document_image" — image attached to a Document
|
||||||
|
// "document_file" — file (e.g. PDF) attached to a Document
|
||||||
|
Category string `json:"category" validate:"required,oneof=completion document_image document_file"`
|
||||||
|
|
||||||
|
// ContentType is the MIME type the client will upload (e.g. image/jpeg).
|
||||||
|
// Bound to the policy so the actual upload must match exactly.
|
||||||
|
ContentType string `json:"content_type" validate:"required,min=3,max=127"`
|
||||||
|
|
||||||
|
// ContentLength is the exact byte count the client intends to upload.
|
||||||
|
// The signed policy permits a small slack window around this value
|
||||||
|
// (server-side constant) so the client can encode in one pass without
|
||||||
|
// having to predict the byte count perfectly.
|
||||||
|
ContentLength int64 `json:"content_length" validate:"required,min=1"`
|
||||||
|
}
|
||||||
@@ -9,7 +9,10 @@ import (
|
|||||||
// ContractorSpecialtyResponse represents a contractor specialty
|
// ContractorSpecialtyResponse represents a contractor specialty
|
||||||
type ContractorSpecialtyResponse struct {
|
type ContractorSpecialtyResponse struct {
|
||||||
ID uint `json:"id"`
|
ID uint `json:"id"`
|
||||||
|
// Name is the stable English identifier (clients match on this).
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
// DisplayName is the localized label for the request's Accept-Language.
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
Icon string `json:"icon"`
|
Icon string `json:"icon"`
|
||||||
DisplayOrder int `json:"display_order"`
|
DisplayOrder int `json:"display_order"`
|
||||||
|
|||||||
@@ -11,7 +11,10 @@ import (
|
|||||||
// ResidenceTypeResponse represents a residence type in the API response
|
// ResidenceTypeResponse represents a residence type in the API response
|
||||||
type ResidenceTypeResponse struct {
|
type ResidenceTypeResponse struct {
|
||||||
ID uint `json:"id"`
|
ID uint `json:"id"`
|
||||||
|
// Name is the stable English identifier (clients match on this).
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
// DisplayName is the localized label for the request's Accept-Language.
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResidenceUserResponse represents a user with access to a residence
|
// ResidenceUserResponse represents a user with access to a residence
|
||||||
|
|||||||
@@ -14,7 +14,10 @@ import (
|
|||||||
// TaskCategoryResponse represents a task category
|
// TaskCategoryResponse represents a task category
|
||||||
type TaskCategoryResponse struct {
|
type TaskCategoryResponse struct {
|
||||||
ID uint `json:"id"`
|
ID uint `json:"id"`
|
||||||
|
// Name is the stable English identifier (clients match on this).
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
// DisplayName is the localized label for the request's Accept-Language.
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
Icon string `json:"icon"`
|
Icon string `json:"icon"`
|
||||||
Color string `json:"color"`
|
Color string `json:"color"`
|
||||||
@@ -25,6 +28,7 @@ type TaskCategoryResponse struct {
|
|||||||
type TaskPriorityResponse struct {
|
type TaskPriorityResponse struct {
|
||||||
ID uint `json:"id"`
|
ID uint `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
Level int `json:"level"`
|
Level int `json:"level"`
|
||||||
Color string `json:"color"`
|
Color string `json:"color"`
|
||||||
DisplayOrder int `json:"display_order"`
|
DisplayOrder int `json:"display_order"`
|
||||||
@@ -34,6 +38,7 @@ type TaskPriorityResponse struct {
|
|||||||
type TaskFrequencyResponse struct {
|
type TaskFrequencyResponse struct {
|
||||||
ID uint `json:"id"`
|
ID uint `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
Days *int `json:"days"`
|
Days *int `json:"days"`
|
||||||
DisplayOrder int `json:"display_order"`
|
DisplayOrder int `json:"display_order"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,38 @@
|
|||||||
|
package responses
|
||||||
|
|
||||||
|
// PresignUploadResponse is what /api/uploads/presign returns to the client.
|
||||||
|
//
|
||||||
|
// Flow: the client makes one PUT request to URL with the raw object bytes
|
||||||
|
// as the body and Headers as the request headers (verbatim — the signature
|
||||||
|
// binds them). On success, the client passes ID back via upload_ids[] on
|
||||||
|
// POST /api/task-completions/ or POST /api/documents/ to claim and attach
|
||||||
|
// the object.
|
||||||
|
//
|
||||||
|
// We use PUT (not POST) because Backblaze B2's S3-compatible endpoint does
|
||||||
|
// not implement the S3 POST Object form upload — it returns HTTP 501 on
|
||||||
|
// every request style. PUT works against AWS S3, B2, and MinIO uniformly.
|
||||||
|
type PresignUploadResponse struct {
|
||||||
|
// ID is the pending_uploads.id the client passes back via upload_ids[].
|
||||||
|
ID uint `json:"id"`
|
||||||
|
|
||||||
|
// URL is the signed PUT URL. Includes all auth as query parameters.
|
||||||
|
URL string `json:"upload_url"`
|
||||||
|
|
||||||
|
// Method is always "PUT" — emitted explicitly so clients don't have to
|
||||||
|
// hardcode it. Reserved for the rare case we ever offer alternative
|
||||||
|
// upload mechanisms.
|
||||||
|
Method string `json:"method"`
|
||||||
|
|
||||||
|
// Headers must be sent verbatim on the PUT request. Currently includes
|
||||||
|
// Content-Type and Content-Length; both are signed, and B2 will reject
|
||||||
|
// any PUT whose headers don't match.
|
||||||
|
Headers map[string]string `json:"headers"`
|
||||||
|
|
||||||
|
// Key is the object key chosen by the server. Echoed for client logging
|
||||||
|
// and debugging; the canonical reference is via ID.
|
||||||
|
Key string `json:"key"`
|
||||||
|
|
||||||
|
// ExpiresAt is when the signed URL stops working. Clients should retry
|
||||||
|
// with a fresh presign rather than relying on long-lived URLs.
|
||||||
|
ExpiresAt string `json:"expires_at"`
|
||||||
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
package handlers
|
package handlers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
@@ -13,20 +12,22 @@ import (
|
|||||||
"github.com/treytartt/honeydue-api/internal/middleware"
|
"github.com/treytartt/honeydue-api/internal/middleware"
|
||||||
"github.com/treytartt/honeydue-api/internal/services"
|
"github.com/treytartt/honeydue-api/internal/services"
|
||||||
"github.com/treytartt/honeydue-api/internal/validator"
|
"github.com/treytartt/honeydue-api/internal/validator"
|
||||||
|
"github.com/treytartt/honeydue-api/internal/worker"
|
||||||
)
|
)
|
||||||
|
|
||||||
// AuthHandler handles authentication endpoints
|
// AuthHandler handles user profile and account management endpoints.
|
||||||
|
// Session lifecycle (login, register, logout, password reset) is delegated
|
||||||
|
// to Ory Kratos; this handler only deals with the honeyDue user record.
|
||||||
type AuthHandler struct {
|
type AuthHandler struct {
|
||||||
authService *services.AuthService
|
authService *services.AuthService
|
||||||
emailService *services.EmailService
|
emailService *services.EmailService
|
||||||
cache *services.CacheService
|
cache *services.CacheService
|
||||||
appleAuthService *services.AppleAuthService
|
|
||||||
googleAuthService *services.GoogleAuthService
|
|
||||||
storageService *services.StorageService
|
storageService *services.StorageService
|
||||||
auditService *services.AuditService
|
auditService *services.AuditService
|
||||||
|
enqueuer worker.Enqueuer
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAuthHandler creates a new auth handler
|
// NewAuthHandler creates a new auth handler.
|
||||||
func NewAuthHandler(authService *services.AuthService, emailService *services.EmailService, cache *services.CacheService) *AuthHandler {
|
func NewAuthHandler(authService *services.AuthService, emailService *services.EmailService, cache *services.CacheService) *AuthHandler {
|
||||||
return &AuthHandler{
|
return &AuthHandler{
|
||||||
authService: authService,
|
authService: authService,
|
||||||
@@ -35,139 +36,108 @@ func NewAuthHandler(authService *services.AuthService, emailService *services.Em
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetAppleAuthService sets the Apple auth service (called after initialization)
|
// SetStorageService sets the storage service for file deletion during account deletion.
|
||||||
func (h *AuthHandler) SetAppleAuthService(appleAuth *services.AppleAuthService) {
|
|
||||||
h.appleAuthService = appleAuth
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetGoogleAuthService sets the Google auth service (called after initialization)
|
|
||||||
func (h *AuthHandler) SetGoogleAuthService(googleAuth *services.GoogleAuthService) {
|
|
||||||
h.googleAuthService = googleAuth
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetStorageService sets the storage service for file deletion during account deletion
|
|
||||||
func (h *AuthHandler) SetStorageService(storageService *services.StorageService) {
|
func (h *AuthHandler) SetStorageService(storageService *services.StorageService) {
|
||||||
h.storageService = storageService
|
h.storageService = storageService
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetAuditService sets the audit service for logging security events
|
// SetAuditService sets the audit service for logging security events.
|
||||||
func (h *AuthHandler) SetAuditService(auditService *services.AuditService) {
|
func (h *AuthHandler) SetAuditService(auditService *services.AuditService) {
|
||||||
h.auditService = auditService
|
h.auditService = auditService
|
||||||
}
|
}
|
||||||
|
|
||||||
// Login handles POST /api/auth/login/
|
// SetEnqueuer sets the async task enqueuer (used by the GDPR data-export endpoint).
|
||||||
func (h *AuthHandler) Login(c echo.Context) error {
|
func (h *AuthHandler) SetEnqueuer(enqueuer worker.Enqueuer) {
|
||||||
var req requests.LoginRequest
|
h.enqueuer = enqueuer
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
|
||||||
}
|
|
||||||
if err := c.Validate(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.authService.Login(&req)
|
// ExportData handles POST /api/auth/export/ — queues a GDPR data-export job that
|
||||||
|
// emails the user a zip of all their data. Async (202) because gathering,
|
||||||
|
// zipping, and emailing can take seconds; doing it inline would block the request.
|
||||||
|
func (h *AuthHandler) ExportData(c echo.Context) error {
|
||||||
|
noStore(c)
|
||||||
|
user, err := middleware.MustGetAuthUser(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().Err(err).Str("identifier", req.Username).Msg("Login failed")
|
|
||||||
if h.auditService != nil {
|
|
||||||
h.auditService.LogEvent(c, nil, services.AuditEventLoginFailed, map[string]interface{}{
|
|
||||||
"identifier": req.Username,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if h.enqueuer == nil {
|
||||||
|
return echo.NewHTTPError(http.StatusServiceUnavailable, "data export is temporarily unavailable")
|
||||||
|
}
|
||||||
|
if err := h.enqueuer.EnqueueDataExport(user.ID); err != nil {
|
||||||
|
log.Error().Err(err).Uint("user_id", user.ID).Msg("Failed to enqueue data export")
|
||||||
|
return echo.NewHTTPError(http.StatusInternalServerError, "failed to queue data export")
|
||||||
|
}
|
||||||
if h.auditService != nil {
|
if h.auditService != nil {
|
||||||
userID := response.User.ID
|
h.auditService.LogEvent(c, &user.ID, services.AuditEventDataExport, map[string]interface{}{
|
||||||
h.auditService.LogEvent(c, &userID, services.AuditEventLogin, nil)
|
"user_id": user.ID,
|
||||||
|
"email": user.Email,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return c.JSON(http.StatusAccepted, map[string]string{
|
||||||
|
"message": "Your data export has been queued. You'll receive an email with your data shortly.",
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
// noStore marks a response as non-cacheable.
|
||||||
|
func noStore(c echo.Context) {
|
||||||
|
c.Response().Header().Set("Cache-Control", "no-store")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register handles POST /api/auth/register/
|
// Register handles POST /api/auth/register/ — creates a new password account.
|
||||||
|
//
|
||||||
|
// The identity is admin-created in Kratos with an unverified email and no
|
||||||
|
// auto-sent code (see services.AuthService.Register). The client logs in right
|
||||||
|
// after to get a session, then completes email verification. Returns 201 with
|
||||||
|
// no token; 409 if the email is taken; 400 on a weak password.
|
||||||
func (h *AuthHandler) Register(c echo.Context) error {
|
func (h *AuthHandler) Register(c echo.Context) error {
|
||||||
var req requests.RegisterRequest
|
var req requests.RegisterRequest
|
||||||
if err := c.Bind(&req); err != nil {
|
if err := c.Bind(&req); err != nil {
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
return apperrors.BadRequest("error.invalid_request_body")
|
||||||
}
|
}
|
||||||
if err := c.Validate(&req); err != nil {
|
if err := c.Validate(&req); err != nil {
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
||||||
}
|
}
|
||||||
|
if err := h.authService.Register(c.Request().Context(), &req); err != nil {
|
||||||
response, confirmationCode, err := h.authService.Register(&req)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug().Err(err).Msg("Registration failed")
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
return c.JSON(http.StatusCreated, map[string]string{
|
||||||
if h.auditService != nil {
|
"message": "Account created. Please verify your email.",
|
||||||
userID := response.User.ID
|
|
||||||
h.auditService.LogEvent(c, &userID, services.AuditEventRegister, map[string]interface{}{
|
|
||||||
"username": req.Username,
|
|
||||||
"email": req.Email,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send welcome email with confirmation code (async)
|
|
||||||
if h.emailService != nil && confirmationCode != "" {
|
|
||||||
go func() {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
log.Error().Interface("panic", r).Str("email", req.Email).Msg("Panic in welcome email goroutine")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err := h.emailService.SendWelcomeEmail(req.Email, req.FirstName, confirmationCode); err != nil {
|
|
||||||
log.Error().Err(err).Str("email", req.Email).Msg("Failed to send welcome email")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusCreated, response)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Logout handles POST /api/auth/logout/
|
|
||||||
func (h *AuthHandler) Logout(c echo.Context) error {
|
|
||||||
token := middleware.GetAuthToken(c)
|
|
||||||
if token == "" {
|
|
||||||
return apperrors.Unauthorized("error.not_authenticated")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log audit event before invalidating the token
|
|
||||||
if h.auditService != nil {
|
|
||||||
user := middleware.GetAuthUser(c)
|
|
||||||
if user != nil {
|
|
||||||
h.auditService.LogEvent(c, &user.ID, services.AuditEventLogout, nil)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Invalidate token in database
|
|
||||||
if err := h.authService.Logout(token); err != nil {
|
|
||||||
log.Warn().Err(err).Msg("Failed to delete token from database")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Invalidate token in cache
|
|
||||||
if h.cache != nil {
|
|
||||||
if err := h.cache.InvalidateAuthToken(c.Request().Context(), token); err != nil {
|
|
||||||
log.Warn().Err(err).Msg("Failed to invalidate token in cache")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, responses.MessageResponse{Message: "Logged out successfully"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// CurrentUser handles GET /api/auth/me/
|
// CurrentUser handles GET /api/auth/me/
|
||||||
func (h *AuthHandler) CurrentUser(c echo.Context) error {
|
func (h *AuthHandler) CurrentUser(c echo.Context) error {
|
||||||
|
noStore(c)
|
||||||
user, err := middleware.MustGetAuthUser(c)
|
user, err := middleware.MustGetAuthUser(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.authService.GetCurrentUser(user.ID)
|
response, err := h.authService.GetCurrentUser(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Uint("user_id", user.ID).Msg("Failed to get current user")
|
log.Error().Err(err).Uint("user_id", user.ID).Msg("Failed to get current user")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// user_profile.verified is a one-time mirror set at provision time
|
||||||
|
// (see middleware/kratos_auth.go::provision). Kratos remains the source
|
||||||
|
// of truth for email-verification state — it can flip from false → true
|
||||||
|
// the instant the user completes the verification flow, and nothing
|
||||||
|
// updates the local column. Override the response with the live value
|
||||||
|
// the Kratos auth middleware already stashed in context so /auth/me
|
||||||
|
// reflects current reality. Also opportunistically sync the DB mirror
|
||||||
|
// (best-effort, ignore error) so background queries that read the
|
||||||
|
// column see the same answer.
|
||||||
|
if verified, ok := c.Get(middleware.AuthVerifiedKey).(bool); ok {
|
||||||
|
mirrorStale := response.Profile != nil && response.Profile.Verified != verified
|
||||||
|
if response.Profile != nil {
|
||||||
|
response.Profile.Verified = verified
|
||||||
|
}
|
||||||
|
if verified && mirrorStale {
|
||||||
|
_ = h.authService.MarkUserVerified(c.Request().Context(), user.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
return c.JSON(http.StatusOK, response)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -186,7 +156,7 @@ func (h *AuthHandler) UpdateProfile(c echo.Context) error {
|
|||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.authService.UpdateProfile(user.ID, &req)
|
response, err := h.authService.UpdateProfile(c.Request().Context(), user.ID, &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().Err(err).Uint("user_id", user.ID).Msg("Failed to update profile")
|
log.Debug().Err(err).Uint("user_id", user.ID).Msg("Failed to update profile")
|
||||||
return err
|
return err
|
||||||
@@ -195,296 +165,6 @@ func (h *AuthHandler) UpdateProfile(c echo.Context) error {
|
|||||||
return c.JSON(http.StatusOK, response)
|
return c.JSON(http.StatusOK, response)
|
||||||
}
|
}
|
||||||
|
|
||||||
// VerifyEmail handles POST /api/auth/verify-email/
|
|
||||||
func (h *AuthHandler) VerifyEmail(c echo.Context) error {
|
|
||||||
user, err := middleware.MustGetAuthUser(c)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var req requests.VerifyEmailRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
|
||||||
}
|
|
||||||
if err := c.Validate(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
|
||||||
}
|
|
||||||
|
|
||||||
err = h.authService.VerifyEmail(user.ID, req.Code)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug().Err(err).Uint("user_id", user.ID).Msg("Email verification failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send post-verification welcome email with tips (async)
|
|
||||||
if h.emailService != nil {
|
|
||||||
go func() {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
log.Error().Interface("panic", r).Str("email", user.Email).Msg("Panic in post-verification email goroutine")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err := h.emailService.SendPostVerificationEmail(user.Email, user.FirstName); err != nil {
|
|
||||||
log.Error().Err(err).Str("email", user.Email).Msg("Failed to send post-verification email")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, responses.VerifyEmailResponse{
|
|
||||||
Message: "Email verified successfully",
|
|
||||||
Verified: true,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResendVerification handles POST /api/auth/resend-verification/
|
|
||||||
func (h *AuthHandler) ResendVerification(c echo.Context) error {
|
|
||||||
user, err := middleware.MustGetAuthUser(c)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
code, err := h.authService.ResendVerificationCode(user.ID)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug().Err(err).Uint("user_id", user.ID).Msg("Failed to resend verification")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send verification email (async)
|
|
||||||
if h.emailService != nil {
|
|
||||||
go func() {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
log.Error().Interface("panic", r).Str("email", user.Email).Msg("Panic in verification email goroutine")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err := h.emailService.SendVerificationEmail(user.Email, user.FirstName, code); err != nil {
|
|
||||||
log.Error().Err(err).Str("email", user.Email).Msg("Failed to send verification email")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, responses.MessageResponse{Message: "Verification email sent"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// ForgotPassword handles POST /api/auth/forgot-password/
|
|
||||||
func (h *AuthHandler) ForgotPassword(c echo.Context) error {
|
|
||||||
var req requests.ForgotPasswordRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
|
||||||
}
|
|
||||||
if err := c.Validate(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
|
||||||
}
|
|
||||||
|
|
||||||
code, user, err := h.authService.ForgotPassword(req.Email)
|
|
||||||
if err != nil {
|
|
||||||
var appErr *apperrors.AppError
|
|
||||||
if errors.As(err, &appErr) && appErr.Code == http.StatusTooManyRequests {
|
|
||||||
// Only reveal rate limit errors
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Error().Err(err).Str("email", req.Email).Msg("Forgot password failed")
|
|
||||||
// Don't reveal other errors to prevent email enumeration
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send password reset email (async) - only if user found
|
|
||||||
if h.emailService != nil && code != "" && user != nil {
|
|
||||||
go func() {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
log.Error().Interface("panic", r).Str("email", user.Email).Msg("Panic in password reset email goroutine")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err := h.emailService.SendPasswordResetEmail(user.Email, user.FirstName, code); err != nil {
|
|
||||||
log.Error().Err(err).Str("email", user.Email).Msg("Failed to send password reset email")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.auditService != nil {
|
|
||||||
h.auditService.LogEvent(c, nil, services.AuditEventPasswordReset, map[string]interface{}{
|
|
||||||
"email": req.Email,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Always return success to prevent email enumeration
|
|
||||||
return c.JSON(http.StatusOK, responses.ForgotPasswordResponse{
|
|
||||||
Message: "Password reset email sent",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// VerifyResetCode handles POST /api/auth/verify-reset-code/
|
|
||||||
func (h *AuthHandler) VerifyResetCode(c echo.Context) error {
|
|
||||||
var req requests.VerifyResetCodeRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
|
||||||
}
|
|
||||||
if err := c.Validate(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
|
||||||
}
|
|
||||||
|
|
||||||
resetToken, err := h.authService.VerifyResetCode(req.Email, req.Code)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug().Err(err).Str("email", req.Email).Msg("Verify reset code failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, responses.VerifyResetCodeResponse{
|
|
||||||
Message: "Reset code verified",
|
|
||||||
ResetToken: resetToken,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResetPassword handles POST /api/auth/reset-password/
|
|
||||||
func (h *AuthHandler) ResetPassword(c echo.Context) error {
|
|
||||||
var req requests.ResetPasswordRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
|
||||||
}
|
|
||||||
if err := c.Validate(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
|
||||||
}
|
|
||||||
|
|
||||||
err := h.authService.ResetPassword(req.ResetToken, req.NewPassword)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug().Err(err).Msg("Password reset failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.auditService != nil {
|
|
||||||
h.auditService.LogEvent(c, nil, services.AuditEventPasswordChanged, map[string]interface{}{
|
|
||||||
"method": "reset_token",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, responses.ResetPasswordResponse{
|
|
||||||
Message: "Password reset successful",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// AppleSignIn handles POST /api/auth/apple-sign-in/
|
|
||||||
func (h *AuthHandler) AppleSignIn(c echo.Context) error {
|
|
||||||
var req requests.AppleSignInRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
|
||||||
}
|
|
||||||
if err := c.Validate(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.appleAuthService == nil {
|
|
||||||
log.Error().Msg("Apple auth service not configured")
|
|
||||||
return &apperrors.AppError{
|
|
||||||
Code: 500,
|
|
||||||
MessageKey: "error.apple_signin_not_configured",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
response, err := h.authService.AppleSignIn(c.Request().Context(), h.appleAuthService, &req)
|
|
||||||
if err != nil {
|
|
||||||
// Check for legacy Apple Sign In error (not yet migrated)
|
|
||||||
if errors.Is(err, services.ErrAppleSignInFailed) {
|
|
||||||
log.Debug().Err(err).Msg("Apple Sign In failed (legacy error)")
|
|
||||||
return apperrors.Unauthorized("error.invalid_apple_token")
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debug().Err(err).Msg("Apple Sign In failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send welcome email for new users (async)
|
|
||||||
if response.IsNewUser && h.emailService != nil && response.User.Email != "" {
|
|
||||||
go func() {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
log.Error().Interface("panic", r).Str("email", response.User.Email).Msg("Panic in Apple welcome email goroutine")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err := h.emailService.SendAppleWelcomeEmail(response.User.Email, response.User.FirstName); err != nil {
|
|
||||||
log.Error().Err(err).Str("email", response.User.Email).Msg("Failed to send Apple welcome email")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GoogleSignIn handles POST /api/auth/google-sign-in/
|
|
||||||
func (h *AuthHandler) GoogleSignIn(c echo.Context) error {
|
|
||||||
var req requests.GoogleSignInRequest
|
|
||||||
if err := c.Bind(&req); err != nil {
|
|
||||||
return apperrors.BadRequest("error.invalid_request")
|
|
||||||
}
|
|
||||||
if err := c.Validate(&req); err != nil {
|
|
||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.googleAuthService == nil {
|
|
||||||
log.Error().Msg("Google auth service not configured")
|
|
||||||
return &apperrors.AppError{
|
|
||||||
Code: 500,
|
|
||||||
MessageKey: "error.google_signin_not_configured",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
response, err := h.authService.GoogleSignIn(c.Request().Context(), h.googleAuthService, &req)
|
|
||||||
if err != nil {
|
|
||||||
// Check for legacy Google Sign In error (not yet migrated)
|
|
||||||
if errors.Is(err, services.ErrGoogleSignInFailed) {
|
|
||||||
log.Debug().Err(err).Msg("Google Sign In failed (legacy error)")
|
|
||||||
return apperrors.Unauthorized("error.invalid_google_token")
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debug().Err(err).Msg("Google Sign In failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send welcome email for new users (async)
|
|
||||||
if response.IsNewUser && h.emailService != nil && response.User.Email != "" {
|
|
||||||
go func() {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
log.Error().Interface("panic", r).Str("email", response.User.Email).Msg("Panic in Google welcome email goroutine")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
if err := h.emailService.SendGoogleWelcomeEmail(response.User.Email, response.User.FirstName); err != nil {
|
|
||||||
log.Error().Err(err).Str("email", response.User.Email).Msg("Failed to send Google welcome email")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
|
||||||
}
|
|
||||||
|
|
||||||
// RefreshToken handles POST /api/auth/refresh/
|
|
||||||
func (h *AuthHandler) RefreshToken(c echo.Context) error {
|
|
||||||
user, err := middleware.MustGetAuthUser(c)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
token := middleware.GetAuthToken(c)
|
|
||||||
if token == "" {
|
|
||||||
return apperrors.Unauthorized("error.not_authenticated")
|
|
||||||
}
|
|
||||||
|
|
||||||
response, err := h.authService.RefreshToken(token, user.ID)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug().Err(err).Uint("user_id", user.ID).Msg("Token refresh failed")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the token was refreshed (new token), invalidate the old one from cache
|
|
||||||
if response.Token != token && h.cache != nil {
|
|
||||||
if cacheErr := h.cache.InvalidateAuthToken(c.Request().Context(), token); cacheErr != nil {
|
|
||||||
log.Warn().Err(cacheErr).Msg("Failed to invalidate old token from cache during refresh")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeleteAccount handles DELETE /api/auth/account/
|
// DeleteAccount handles DELETE /api/auth/account/
|
||||||
func (h *AuthHandler) DeleteAccount(c echo.Context) error {
|
func (h *AuthHandler) DeleteAccount(c echo.Context) error {
|
||||||
user, err := middleware.MustGetAuthUser(c)
|
user, err := middleware.MustGetAuthUser(c)
|
||||||
@@ -497,7 +177,7 @@ func (h *AuthHandler) DeleteAccount(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_request")
|
return apperrors.BadRequest("error.invalid_request")
|
||||||
}
|
}
|
||||||
|
|
||||||
fileURLs, err := h.authService.DeleteAccount(user.ID, req.Password, req.Confirmation)
|
fileURLs, err := h.authService.DeleteAccount(c.Request().Context(), user.ID, req.Password, req.Confirmation)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().Err(err).Uint("user_id", user.ID).Msg("Account deletion failed")
|
log.Debug().Err(err).Uint("user_id", user.ID).Msg("Account deletion failed")
|
||||||
return err
|
return err
|
||||||
@@ -527,13 +207,5 @@ func (h *AuthHandler) DeleteAccount(c echo.Context) error {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Invalidate auth token from cache
|
|
||||||
token := middleware.GetAuthToken(c)
|
|
||||||
if h.cache != nil && token != "" {
|
|
||||||
if err := h.cache.InvalidateAuthToken(c.Request().Context(), token); err != nil {
|
|
||||||
log.Warn().Err(err).Msg("Failed to invalidate token in cache after account deletion")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, responses.MessageResponse{Message: "Account deleted successfully"})
|
return c.JSON(http.StatusOK, responses.MessageResponse{Message: "Account deleted successfully"})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,26 +35,25 @@ func setupDeleteAccountHandler(t *testing.T) (*AuthHandler, *echo.Echo, *gorm.DB
|
|||||||
return handler, e, db
|
return handler, e, db
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAuthHandler_DeleteAccount_EmailUser(t *testing.T) {
|
// TestAuthHandler_DeleteAccount_WithConfirmation verifies that DELETE /account/
|
||||||
|
// succeeds when the user sends confirmation: "DELETE".
|
||||||
|
// Post-Kratos: all users (regardless of provider) must confirm with "DELETE".
|
||||||
|
func TestAuthHandler_DeleteAccount_WithConfirmation(t *testing.T) {
|
||||||
handler, e, db := setupDeleteAccountHandler(t)
|
handler, e, db := setupDeleteAccountHandler(t)
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "deletetest", "delete@test.com", "Password123")
|
user := testutil.CreateTestUser(t, db, "deletetest", "delete@test.com", "ignored")
|
||||||
|
|
||||||
// Create profile for the user
|
// Create profile for the user
|
||||||
profile := &models.UserProfile{UserID: user.ID, Verified: true}
|
profile := &models.UserProfile{UserID: user.ID, Verified: true}
|
||||||
require.NoError(t, db.Create(profile).Error)
|
require.NoError(t, db.Create(profile).Error)
|
||||||
|
|
||||||
// Create auth token
|
|
||||||
testutil.CreateTestToken(t, db, user.ID)
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
authGroup := e.Group("/api/auth")
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
authGroup.Use(testutil.MockAuthMiddleware(user))
|
||||||
authGroup.DELETE("/account/", handler.DeleteAccount)
|
authGroup.DELETE("/account/", handler.DeleteAccount)
|
||||||
|
|
||||||
t.Run("successful deletion with correct password", func(t *testing.T) {
|
t.Run("successful deletion with DELETE confirmation", func(t *testing.T) {
|
||||||
password := "Password123"
|
|
||||||
req := map[string]interface{}{
|
req := map[string]interface{}{
|
||||||
"password": password,
|
"confirmation": "DELETE",
|
||||||
}
|
}
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "test-token")
|
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "test-token")
|
||||||
@@ -74,106 +73,15 @@ func TestAuthHandler_DeleteAccount_EmailUser(t *testing.T) {
|
|||||||
// Verify profile is deleted
|
// Verify profile is deleted
|
||||||
db.Model(&models.UserProfile{}).Where("user_id = ?", user.ID).Count(&count)
|
db.Model(&models.UserProfile{}).Where("user_id = ?", user.ID).Count(&count)
|
||||||
assert.Equal(t, int64(0), count)
|
assert.Equal(t, int64(0), count)
|
||||||
|
|
||||||
// Verify auth token is deleted
|
|
||||||
db.Model(&models.AuthToken{}).Where("user_id = ?", user.ID).Count(&count)
|
|
||||||
assert.Equal(t, int64(0), count)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAuthHandler_DeleteAccount_WrongPassword(t *testing.T) {
|
// TestAuthHandler_DeleteAccount_MissingConfirmation verifies that a missing
|
||||||
|
// confirmation string is rejected with 400.
|
||||||
|
func TestAuthHandler_DeleteAccount_MissingConfirmation(t *testing.T) {
|
||||||
handler, e, db := setupDeleteAccountHandler(t)
|
handler, e, db := setupDeleteAccountHandler(t)
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "wrongpw", "wrongpw@test.com", "Password123")
|
user := testutil.CreateTestUser(t, db, "nopw", "nopw@test.com", "ignored")
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
|
||||||
authGroup.DELETE("/account/", handler.DeleteAccount)
|
|
||||||
|
|
||||||
t.Run("wrong password returns 401", func(t *testing.T) {
|
|
||||||
wrongPw := "wrongpassword"
|
|
||||||
req := map[string]interface{}{
|
|
||||||
"password": wrongPw,
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "test-token")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusUnauthorized)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_DeleteAccount_MissingPassword(t *testing.T) {
|
|
||||||
handler, e, db := setupDeleteAccountHandler(t)
|
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "nopw", "nopw@test.com", "Password123")
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
|
||||||
authGroup.DELETE("/account/", handler.DeleteAccount)
|
|
||||||
|
|
||||||
t.Run("missing password returns 400", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "test-token")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_DeleteAccount_SocialAuthUser(t *testing.T) {
|
|
||||||
handler, e, db := setupDeleteAccountHandler(t)
|
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "appleuser", "apple@test.com", "randompassword")
|
|
||||||
|
|
||||||
// Create Apple social auth record
|
|
||||||
appleAuth := &models.AppleSocialAuth{
|
|
||||||
UserID: user.ID,
|
|
||||||
AppleID: "apple_sub_123",
|
|
||||||
Email: "apple@test.com",
|
|
||||||
}
|
|
||||||
require.NoError(t, db.Create(appleAuth).Error)
|
|
||||||
|
|
||||||
// Create profile
|
|
||||||
profile := &models.UserProfile{UserID: user.ID, Verified: true}
|
|
||||||
require.NoError(t, db.Create(profile).Error)
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
|
||||||
authGroup.DELETE("/account/", handler.DeleteAccount)
|
|
||||||
|
|
||||||
t.Run("successful deletion with DELETE confirmation", func(t *testing.T) {
|
|
||||||
confirmation := "DELETE"
|
|
||||||
req := map[string]interface{}{
|
|
||||||
"confirmation": confirmation,
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "test-token")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
|
|
||||||
// Verify user is deleted
|
|
||||||
var count int64
|
|
||||||
db.Model(&models.User{}).Where("id = ?", user.ID).Count(&count)
|
|
||||||
assert.Equal(t, int64(0), count)
|
|
||||||
|
|
||||||
// Verify apple auth is deleted
|
|
||||||
db.Model(&models.AppleSocialAuth{}).Where("user_id = ?", user.ID).Count(&count)
|
|
||||||
assert.Equal(t, int64(0), count)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_DeleteAccount_SocialAuthMissingConfirmation(t *testing.T) {
|
|
||||||
handler, e, db := setupDeleteAccountHandler(t)
|
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "googleuser", "google@test.com", "randompassword")
|
|
||||||
|
|
||||||
// Create Google social auth record
|
|
||||||
googleAuth := &models.GoogleSocialAuth{
|
|
||||||
UserID: user.ID,
|
|
||||||
GoogleID: "google_sub_456",
|
|
||||||
Email: "google@test.com",
|
|
||||||
}
|
|
||||||
require.NoError(t, db.Create(googleAuth).Error)
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
authGroup := e.Group("/api/auth")
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
authGroup.Use(testutil.MockAuthMiddleware(user))
|
||||||
@@ -188,9 +96,8 @@ func TestAuthHandler_DeleteAccount_SocialAuthMissingConfirmation(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("wrong confirmation returns 400", func(t *testing.T) {
|
t.Run("wrong confirmation returns 400", func(t *testing.T) {
|
||||||
wrongConfirmation := "delete"
|
|
||||||
req := map[string]interface{}{
|
req := map[string]interface{}{
|
||||||
"confirmation": wrongConfirmation,
|
"confirmation": "delete", // lowercase — must be exact "DELETE"
|
||||||
}
|
}
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "test-token")
|
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "test-token")
|
||||||
@@ -199,6 +106,8 @@ func TestAuthHandler_DeleteAccount_SocialAuthMissingConfirmation(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestAuthHandler_DeleteAccount_Unauthenticated verifies that 401 is returned
|
||||||
|
// when no auth middleware is set.
|
||||||
func TestAuthHandler_DeleteAccount_Unauthenticated(t *testing.T) {
|
func TestAuthHandler_DeleteAccount_Unauthenticated(t *testing.T) {
|
||||||
handler, e, _ := setupDeleteAccountHandler(t)
|
handler, e, _ := setupDeleteAccountHandler(t)
|
||||||
|
|
||||||
@@ -207,7 +116,7 @@ func TestAuthHandler_DeleteAccount_Unauthenticated(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("unauthenticated request returns 401", func(t *testing.T) {
|
t.Run("unauthenticated request returns 401", func(t *testing.T) {
|
||||||
req := map[string]interface{}{
|
req := map[string]interface{}{
|
||||||
"password": "Password123",
|
"confirmation": "DELETE",
|
||||||
}
|
}
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "")
|
w := testutil.MakeRequest(e, "DELETE", "/api/auth/account/", req, "")
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
// auth_handler_test.go tests the auth handler endpoints that survived the
|
||||||
|
// Ory Kratos migration: GET /me/ and PUT/PATCH /profile/.
|
||||||
|
// Login, register, logout, forgot-password, and social sign-in are now
|
||||||
|
// handled by Kratos.
|
||||||
package handlers
|
package handlers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -34,204 +38,32 @@ func setupAuthHandler(t *testing.T) (*AuthHandler, *echo.Echo, *repositories.Use
|
|||||||
return handler, e, userRepo
|
return handler, e, userRepo
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAuthHandler_Register(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/register/", handler.Register)
|
|
||||||
|
|
||||||
t.Run("successful registration", func(t *testing.T) {
|
|
||||||
req := requests.RegisterRequest{
|
|
||||||
Username: "newuser",
|
|
||||||
Email: "new@test.com",
|
|
||||||
Password: "Password123",
|
|
||||||
FirstName: "New",
|
|
||||||
LastName: "User",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusCreated)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
testutil.AssertJSONFieldExists(t, response, "token")
|
|
||||||
testutil.AssertJSONFieldExists(t, response, "user")
|
|
||||||
testutil.AssertJSONFieldExists(t, response, "message")
|
|
||||||
|
|
||||||
user := response["user"].(map[string]interface{})
|
|
||||||
assert.Equal(t, "newuser", user["username"])
|
|
||||||
assert.Equal(t, "new@test.com", user["email"])
|
|
||||||
assert.Equal(t, "New", user["first_name"])
|
|
||||||
assert.Equal(t, "User", user["last_name"])
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("registration with missing fields", func(t *testing.T) {
|
|
||||||
req := map[string]string{
|
|
||||||
"username": "test",
|
|
||||||
// Missing email and password
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
|
|
||||||
response := testutil.ParseJSON(t, w.Body.Bytes())
|
|
||||||
testutil.AssertJSONFieldExists(t, response, "error")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("registration with short password", func(t *testing.T) {
|
|
||||||
req := requests.RegisterRequest{
|
|
||||||
Username: "testuser",
|
|
||||||
Email: "test@test.com",
|
|
||||||
Password: "short", // Less than 8 chars
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("registration with duplicate username", func(t *testing.T) {
|
|
||||||
// First registration
|
|
||||||
req := requests.RegisterRequest{
|
|
||||||
Username: "duplicate",
|
|
||||||
Email: "unique1@test.com",
|
|
||||||
Password: "Password123",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusCreated)
|
|
||||||
|
|
||||||
// Try to register again with same username
|
|
||||||
req.Email = "unique2@test.com"
|
|
||||||
w = testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusConflict) // 409 for duplicate resource
|
|
||||||
|
|
||||||
response := testutil.ParseJSON(t, w.Body.Bytes())
|
|
||||||
assert.Contains(t, response["error"], "Username already taken")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("registration with duplicate email", func(t *testing.T) {
|
|
||||||
// First registration
|
|
||||||
req := requests.RegisterRequest{
|
|
||||||
Username: "user1",
|
|
||||||
Email: "duplicate@test.com",
|
|
||||||
Password: "Password123",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusCreated)
|
|
||||||
|
|
||||||
// Try to register again with same email
|
|
||||||
req.Username = "user2"
|
|
||||||
w = testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusConflict) // 409 for duplicate resource
|
|
||||||
|
|
||||||
response := testutil.ParseJSON(t, w.Body.Bytes())
|
|
||||||
assert.Contains(t, response["error"], "Email already registered")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_Login(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/register/", handler.Register)
|
|
||||||
e.POST("/api/auth/login/", handler.Login)
|
|
||||||
|
|
||||||
// Create a test user
|
|
||||||
registerReq := requests.RegisterRequest{
|
|
||||||
Username: "logintest",
|
|
||||||
Email: "login@test.com",
|
|
||||||
Password: "Password123",
|
|
||||||
FirstName: "Test",
|
|
||||||
LastName: "User",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", registerReq, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusCreated)
|
|
||||||
|
|
||||||
t.Run("successful login with username", func(t *testing.T) {
|
|
||||||
req := requests.LoginRequest{
|
|
||||||
Username: "logintest",
|
|
||||||
Password: "Password123",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/login/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
testutil.AssertJSONFieldExists(t, response, "token")
|
|
||||||
testutil.AssertJSONFieldExists(t, response, "user")
|
|
||||||
|
|
||||||
user := response["user"].(map[string]interface{})
|
|
||||||
assert.Equal(t, "logintest", user["username"])
|
|
||||||
assert.Equal(t, "login@test.com", user["email"])
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("successful login with email", func(t *testing.T) {
|
|
||||||
req := requests.LoginRequest{
|
|
||||||
Username: "login@test.com", // Using email as username
|
|
||||||
Password: "Password123",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/login/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("login with wrong password", func(t *testing.T) {
|
|
||||||
req := requests.LoginRequest{
|
|
||||||
Username: "logintest",
|
|
||||||
Password: "wrongpassword",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/login/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusUnauthorized)
|
|
||||||
|
|
||||||
response := testutil.ParseJSON(t, w.Body.Bytes())
|
|
||||||
assert.Contains(t, response["error"], "Invalid credentials")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("login with non-existent user", func(t *testing.T) {
|
|
||||||
req := requests.LoginRequest{
|
|
||||||
Username: "nonexistent",
|
|
||||||
Password: "Password123",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/login/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusUnauthorized)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("login with missing fields", func(t *testing.T) {
|
|
||||||
req := map[string]string{
|
|
||||||
"username": "logintest",
|
|
||||||
// Missing password
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/login/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_CurrentUser(t *testing.T) {
|
func TestAuthHandler_CurrentUser(t *testing.T) {
|
||||||
handler, e, userRepo := setupAuthHandler(t)
|
handler, e, _ := setupAuthHandler(t)
|
||||||
|
|
||||||
db := testutil.SetupTestDB(t)
|
db := testutil.SetupTestDB(t)
|
||||||
user := testutil.CreateTestUser(t, db, "metest", "me@test.com", "Password123")
|
user := testutil.CreateTestUser(t, db, "metest", "me@test.com", "")
|
||||||
user.FirstName = "Test"
|
user.FirstName = "Test"
|
||||||
user.LastName = "User"
|
user.LastName = "User"
|
||||||
userRepo.Update(user)
|
// Use the userRepo from setupAuthHandler's DB, but since we need the user
|
||||||
|
// in the same DB we re-create it there.
|
||||||
|
db2 := testutil.SetupTestDB(t)
|
||||||
|
user2 := testutil.CreateTestUser(t, db2, "metest2", "me2@test.com", "")
|
||||||
|
user2.FirstName = "Test"
|
||||||
|
user2.LastName = "User"
|
||||||
|
userRepo2 := repositories.NewUserRepository(db2)
|
||||||
|
require.NoError(t, userRepo2.Update(user2))
|
||||||
|
|
||||||
|
// Build handler against db2
|
||||||
|
cfg := &config.Config{}
|
||||||
|
authService2 := services.NewAuthService(userRepo2, cfg)
|
||||||
|
handler2 := NewAuthHandler(authService2, nil, nil)
|
||||||
|
|
||||||
// Set up route with mock auth middleware
|
|
||||||
authGroup := e.Group("/api/auth")
|
authGroup := e.Group("/api/auth")
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
authGroup.Use(testutil.MockAuthMiddleware(user2))
|
||||||
authGroup.GET("/me/", handler.CurrentUser)
|
authGroup.GET("/me/", handler2.CurrentUser)
|
||||||
|
|
||||||
|
_ = handler // avoid unused
|
||||||
|
|
||||||
t.Run("get current user", func(t *testing.T) {
|
t.Run("get current user", func(t *testing.T) {
|
||||||
w := testutil.MakeRequest(e, "GET", "/api/auth/me/", nil, "test-token")
|
w := testutil.MakeRequest(e, "GET", "/api/auth/me/", nil, "test-token")
|
||||||
@@ -242,23 +74,26 @@ func TestAuthHandler_CurrentUser(t *testing.T) {
|
|||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
err := json.Unmarshal(w.Body.Bytes(), &response)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, "metest", response["username"])
|
assert.Equal(t, "metest2", response["username"])
|
||||||
assert.Equal(t, "me@test.com", response["email"])
|
assert.Equal(t, "me2@test.com", response["email"])
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAuthHandler_UpdateProfile(t *testing.T) {
|
func TestAuthHandler_UpdateProfile(t *testing.T) {
|
||||||
handler, e, userRepo := setupAuthHandler(t)
|
|
||||||
|
|
||||||
db := testutil.SetupTestDB(t)
|
db := testutil.SetupTestDB(t)
|
||||||
user := testutil.CreateTestUser(t, db, "updatetest", "update@test.com", "Password123")
|
userRepo := repositories.NewUserRepository(db)
|
||||||
userRepo.Update(user)
|
cfg := &config.Config{}
|
||||||
|
authService := services.NewAuthService(userRepo, cfg)
|
||||||
|
handler := NewAuthHandler(authService, nil, nil)
|
||||||
|
e := testutil.SetupTestRouter()
|
||||||
|
|
||||||
|
user := testutil.CreateTestUser(t, db, "updatetest", "update@test.com", "")
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
authGroup := e.Group("/api/auth")
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
authGroup.Use(testutil.MockAuthMiddleware(user))
|
||||||
authGroup.PUT("/profile/", handler.UpdateProfile)
|
authGroup.PUT("/profile/", handler.UpdateProfile)
|
||||||
|
|
||||||
t.Run("update profile", func(t *testing.T) {
|
t.Run("update first and last name", func(t *testing.T) {
|
||||||
firstName := "Updated"
|
firstName := "Updated"
|
||||||
lastName := "Name"
|
lastName := "Name"
|
||||||
req := requests.UpdateProfileRequest{
|
req := requests.UpdateProfileRequest{
|
||||||
@@ -278,130 +113,3 @@ func TestAuthHandler_UpdateProfile(t *testing.T) {
|
|||||||
assert.Equal(t, "Name", response["last_name"])
|
assert.Equal(t, "Name", response["last_name"])
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAuthHandler_ForgotPassword(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/register/", handler.Register)
|
|
||||||
e.POST("/api/auth/forgot-password/", handler.ForgotPassword)
|
|
||||||
|
|
||||||
// Create a test user
|
|
||||||
registerReq := requests.RegisterRequest{
|
|
||||||
Username: "forgottest",
|
|
||||||
Email: "forgot@test.com",
|
|
||||||
Password: "Password123",
|
|
||||||
}
|
|
||||||
testutil.MakeRequest(e, "POST", "/api/auth/register/", registerReq, "")
|
|
||||||
|
|
||||||
t.Run("forgot password with valid email", func(t *testing.T) {
|
|
||||||
req := requests.ForgotPasswordRequest{
|
|
||||||
Email: "forgot@test.com",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/forgot-password/", req, "")
|
|
||||||
|
|
||||||
// Always returns 200 to prevent email enumeration
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
|
|
||||||
response := testutil.ParseJSON(t, w.Body.Bytes())
|
|
||||||
testutil.AssertJSONFieldExists(t, response, "message")
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("forgot password with invalid email", func(t *testing.T) {
|
|
||||||
req := requests.ForgotPasswordRequest{
|
|
||||||
Email: "nonexistent@test.com",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/forgot-password/", req, "")
|
|
||||||
|
|
||||||
// Still returns 200 to prevent email enumeration
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_Logout(t *testing.T) {
|
|
||||||
handler, e, userRepo := setupAuthHandler(t)
|
|
||||||
|
|
||||||
db := testutil.SetupTestDB(t)
|
|
||||||
user := testutil.CreateTestUser(t, db, "logouttest", "logout@test.com", "Password123")
|
|
||||||
userRepo.Update(user)
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
|
||||||
authGroup.POST("/logout/", handler.Logout)
|
|
||||||
|
|
||||||
t.Run("successful logout", func(t *testing.T) {
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/logout/", nil, "test-token")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
|
|
||||||
response := testutil.ParseJSON(t, w.Body.Bytes())
|
|
||||||
assert.Contains(t, response["message"], "Logged out successfully")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_JSONResponses(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/register/", handler.Register)
|
|
||||||
e.POST("/api/auth/login/", handler.Login)
|
|
||||||
|
|
||||||
t.Run("register response has correct JSON structure", func(t *testing.T) {
|
|
||||||
req := requests.RegisterRequest{
|
|
||||||
Username: "jsontest",
|
|
||||||
Email: "json@test.com",
|
|
||||||
Password: "Password123",
|
|
||||||
FirstName: "JSON",
|
|
||||||
LastName: "Test",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusCreated)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// Verify top-level structure
|
|
||||||
assert.Contains(t, response, "token")
|
|
||||||
assert.Contains(t, response, "user")
|
|
||||||
assert.Contains(t, response, "message")
|
|
||||||
|
|
||||||
// Verify token is not empty
|
|
||||||
assert.NotEmpty(t, response["token"])
|
|
||||||
|
|
||||||
// Verify user structure
|
|
||||||
user := response["user"].(map[string]interface{})
|
|
||||||
assert.Contains(t, user, "id")
|
|
||||||
assert.Contains(t, user, "username")
|
|
||||||
assert.Contains(t, user, "email")
|
|
||||||
assert.Contains(t, user, "first_name")
|
|
||||||
assert.Contains(t, user, "last_name")
|
|
||||||
assert.Contains(t, user, "is_active")
|
|
||||||
assert.Contains(t, user, "date_joined")
|
|
||||||
|
|
||||||
// Verify types
|
|
||||||
assert.IsType(t, float64(0), user["id"]) // JSON numbers are float64
|
|
||||||
assert.IsType(t, "", user["username"])
|
|
||||||
assert.IsType(t, "", user["email"])
|
|
||||||
assert.IsType(t, true, user["is_active"])
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("error response has correct JSON structure", func(t *testing.T) {
|
|
||||||
req := map[string]string{
|
|
||||||
"username": "test",
|
|
||||||
}
|
|
||||||
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/register/", req, "")
|
|
||||||
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
assert.Contains(t, response, "error")
|
|
||||||
assert.IsType(t, "", response["error"])
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ func (h *ContractorHandler) ListContractors(c echo.Context) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
response, err := h.contractorService.ListContractors(user.ID)
|
response, err := h.contractorService.ListContractors(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return apperrors.Internal(err)
|
return apperrors.Internal(err)
|
||||||
}
|
}
|
||||||
@@ -48,7 +48,7 @@ func (h *ContractorHandler) GetContractor(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_contractor_id")
|
return apperrors.BadRequest("error.invalid_contractor_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.contractorService.GetContractor(uint(contractorID), user.ID)
|
response, err := h.contractorService.GetContractor(c.Request().Context(), uint(contractorID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -69,7 +69,7 @@ func (h *ContractorHandler) CreateContractor(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.contractorService.CreateContractor(&req, user.ID)
|
response, err := h.contractorService.CreateContractor(c.Request().Context(), &req, user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -95,7 +95,7 @@ func (h *ContractorHandler) UpdateContractor(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.contractorService.UpdateContractor(uint(contractorID), user.ID, &req)
|
response, err := h.contractorService.UpdateContractor(c.Request().Context(), uint(contractorID), user.ID, &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -113,7 +113,7 @@ func (h *ContractorHandler) DeleteContractor(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_contractor_id")
|
return apperrors.BadRequest("error.invalid_contractor_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = h.contractorService.DeleteContractor(uint(contractorID), user.ID)
|
err = h.contractorService.DeleteContractor(c.Request().Context(), uint(contractorID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -131,7 +131,7 @@ func (h *ContractorHandler) ToggleFavorite(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_contractor_id")
|
return apperrors.BadRequest("error.invalid_contractor_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.contractorService.ToggleFavorite(uint(contractorID), user.ID)
|
response, err := h.contractorService.ToggleFavorite(c.Request().Context(), uint(contractorID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -149,7 +149,7 @@ func (h *ContractorHandler) GetContractorTasks(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_contractor_id")
|
return apperrors.BadRequest("error.invalid_contractor_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.contractorService.GetContractorTasks(uint(contractorID), user.ID)
|
response, err := h.contractorService.GetContractorTasks(c.Request().Context(), uint(contractorID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -167,7 +167,7 @@ func (h *ContractorHandler) ListContractorsByResidence(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_residence_id")
|
return apperrors.BadRequest("error.invalid_residence_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.contractorService.ListContractorsByResidence(uint(residenceID), user.ID)
|
response, err := h.contractorService.ListContractorsByResidence(c.Request().Context(), uint(residenceID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -176,7 +176,7 @@ func (h *ContractorHandler) ListContractorsByResidence(c echo.Context) error {
|
|||||||
|
|
||||||
// GetSpecialties handles GET /api/contractors/specialties/
|
// GetSpecialties handles GET /api/contractors/specialties/
|
||||||
func (h *ContractorHandler) GetSpecialties(c echo.Context) error {
|
func (h *ContractorHandler) GetSpecialties(c echo.Context) error {
|
||||||
specialties, err := h.contractorService.GetSpecialties()
|
specialties, err := h.contractorService.GetSpecialties(c.Request().Context())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return apperrors.Internal(err)
|
return apperrors.Internal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ func (h *DocumentHandler) ListDocuments(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.documentService.ListDocuments(user.ID, filter)
|
response, err := h.documentService.ListDocuments(c.Request().Context(), user.ID, filter)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -88,7 +88,7 @@ func (h *DocumentHandler) GetDocument(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_document_id")
|
return apperrors.BadRequest("error.invalid_document_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.documentService.GetDocument(uint(documentID), user.ID)
|
response, err := h.documentService.GetDocument(c.Request().Context(), uint(documentID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -101,7 +101,7 @@ func (h *DocumentHandler) ListWarranties(c echo.Context) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
response, err := h.documentService.ListWarranties(user.ID)
|
response, err := h.documentService.ListWarranties(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return apperrors.Internal(err)
|
return apperrors.Internal(err)
|
||||||
}
|
}
|
||||||
@@ -201,7 +201,7 @@ func (h *DocumentHandler) CreateDocument(c echo.Context) error {
|
|||||||
if h.storageService == nil {
|
if h.storageService == nil {
|
||||||
return apperrors.Internal(nil)
|
return apperrors.Internal(nil)
|
||||||
}
|
}
|
||||||
result, err := h.storageService.Upload(uploadedFile, "documents")
|
result, err := h.storageService.Upload(c.Request().Context(), uploadedFile, "documents")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return apperrors.BadRequest("error.failed_to_upload_file")
|
return apperrors.BadRequest("error.failed_to_upload_file")
|
||||||
}
|
}
|
||||||
@@ -222,7 +222,7 @@ func (h *DocumentHandler) CreateDocument(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.documentService.CreateDocument(&req, user.ID)
|
response, err := h.documentService.CreateDocument(c.Request().Context(), &req, user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -248,7 +248,7 @@ func (h *DocumentHandler) UpdateDocument(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.documentService.UpdateDocument(uint(documentID), user.ID, &req)
|
response, err := h.documentService.UpdateDocument(c.Request().Context(), uint(documentID), user.ID, &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -266,7 +266,7 @@ func (h *DocumentHandler) DeleteDocument(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_document_id")
|
return apperrors.BadRequest("error.invalid_document_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = h.documentService.DeleteDocument(uint(documentID), user.ID)
|
err = h.documentService.DeleteDocument(c.Request().Context(), uint(documentID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -284,7 +284,7 @@ func (h *DocumentHandler) ActivateDocument(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_document_id")
|
return apperrors.BadRequest("error.invalid_document_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.documentService.ActivateDocument(uint(documentID), user.ID)
|
response, err := h.documentService.ActivateDocument(c.Request().Context(), uint(documentID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -302,7 +302,7 @@ func (h *DocumentHandler) DeactivateDocument(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_document_id")
|
return apperrors.BadRequest("error.invalid_document_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.documentService.DeactivateDocument(uint(documentID), user.ID)
|
response, err := h.documentService.DeactivateDocument(c.Request().Context(), uint(documentID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -342,14 +342,14 @@ func (h *DocumentHandler) UploadDocumentImage(c echo.Context) error {
|
|||||||
return apperrors.Internal(nil)
|
return apperrors.Internal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
result, err := h.storageService.Upload(uploadedFile, "images")
|
result, err := h.storageService.Upload(c.Request().Context(), uploadedFile, "images")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return apperrors.BadRequest("error.failed_to_upload_file")
|
return apperrors.BadRequest("error.failed_to_upload_file")
|
||||||
}
|
}
|
||||||
|
|
||||||
caption := c.FormValue("caption")
|
caption := c.FormValue("caption")
|
||||||
|
|
||||||
response, err := h.documentService.UploadDocumentImage(uint(documentID), user.ID, result.URL, caption)
|
response, err := h.documentService.UploadDocumentImage(c.Request().Context(), uint(documentID), user.ID, result.URL, caption)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -372,7 +372,7 @@ func (h *DocumentHandler) DeleteDocumentImage(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_image_id")
|
return apperrors.BadRequest("error.invalid_image_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.documentService.DeleteDocumentImage(uint(documentID), uint(imageID), user.ID)
|
response, err := h.documentService.DeleteDocumentImage(c.Request().Context(), uint(documentID), uint(imageID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -506,232 +506,6 @@ func TestTaskHandler_CreateCompletion_NoTaskID(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Auth Handler - Additional Coverage
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
func TestAuthHandler_AppleSignIn_NotConfigured(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/apple-sign-in/", handler.AppleSignIn)
|
|
||||||
|
|
||||||
t.Run("returns 500 when apple auth not configured", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{
|
|
||||||
"id_token": "fake-token",
|
|
||||||
"user_id": "fake-user-id",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/apple-sign-in/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusInternalServerError)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("missing identity_token returns 400", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/apple-sign-in/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_GoogleSignIn_NotConfigured(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/google-sign-in/", handler.GoogleSignIn)
|
|
||||||
|
|
||||||
t.Run("returns 500 when google auth not configured", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{
|
|
||||||
"id_token": "fake-token",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/google-sign-in/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusInternalServerError)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("missing id_token returns 400", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/google-sign-in/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// setupAuthHandlerWithDB is like setupAuthHandler but also returns the underlying *gorm.DB
|
|
||||||
// for tests that need to create records like ConfirmationCode directly.
|
|
||||||
func setupAuthHandlerWithDB(t *testing.T) (*AuthHandler, *echo.Echo, *gorm.DB) {
|
|
||||||
db := testutil.SetupTestDB(t)
|
|
||||||
userRepo := repositories.NewUserRepository(db)
|
|
||||||
cfg := &config.Config{
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
SecretKey: "test-secret-key",
|
|
||||||
PasswordResetExpiry: 15 * time.Minute,
|
|
||||||
ConfirmationExpiry: 24 * time.Hour,
|
|
||||||
MaxPasswordResetRate: 3,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
authService := services.NewAuthService(userRepo, cfg)
|
|
||||||
handler := NewAuthHandler(authService, nil, nil)
|
|
||||||
e := testutil.SetupTestRouter()
|
|
||||||
return handler, e, db
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_VerifyEmail(t *testing.T) {
|
|
||||||
handler, e, db := setupAuthHandlerWithDB(t)
|
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "verifytest", "verify@test.com", "Password123")
|
|
||||||
|
|
||||||
// Create confirmation code
|
|
||||||
confirmCode := &models.ConfirmationCode{
|
|
||||||
UserID: user.ID,
|
|
||||||
Code: "123456",
|
|
||||||
ExpiresAt: time.Now().Add(24 * time.Hour),
|
|
||||||
IsUsed: false,
|
|
||||||
}
|
|
||||||
require.NoError(t, db.Create(confirmCode).Error)
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
|
||||||
authGroup.POST("/verify-email/", handler.VerifyEmail)
|
|
||||||
|
|
||||||
t.Run("successful verification", func(t *testing.T) {
|
|
||||||
req := requests.VerifyEmailRequest{
|
|
||||||
Code: "123456",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/verify-email/", req, "test-token")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, true, response["verified"])
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("wrong code returns error", func(t *testing.T) {
|
|
||||||
req := requests.VerifyEmailRequest{
|
|
||||||
Code: "999999",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/verify-email/", req, "test-token")
|
|
||||||
// Code already used or wrong code
|
|
||||||
assert.True(t, w.Code == http.StatusBadRequest || w.Code == http.StatusNotFound,
|
|
||||||
"expected 400 or 404, got %d", w.Code)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("missing code returns 400", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/verify-email/", req, "test-token")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_ResendVerification(t *testing.T) {
|
|
||||||
handler, e, db := setupAuthHandlerWithDB(t)
|
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "resendtest", "resend@test.com", "Password123")
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
|
||||||
authGroup.Use(testutil.MockAuthMiddleware(user))
|
|
||||||
authGroup.POST("/resend-verification/", handler.ResendVerification)
|
|
||||||
|
|
||||||
t.Run("successful resend", func(t *testing.T) {
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/resend-verification/", nil, "test-token")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Contains(t, response, "message")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_RefreshToken(t *testing.T) {
|
|
||||||
handler, e, db := setupAuthHandlerWithDB(t)
|
|
||||||
|
|
||||||
user := testutil.CreateTestUser(t, db, "refreshtest", "refresh@test.com", "Password123")
|
|
||||||
|
|
||||||
// Create auth token and use its actual key in the middleware
|
|
||||||
authToken := testutil.CreateTestToken(t, db, user.ID)
|
|
||||||
|
|
||||||
authGroup := e.Group("/api/auth")
|
|
||||||
authGroup.Use(func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
||||||
return func(c echo.Context) error {
|
|
||||||
c.Set("auth_user", user)
|
|
||||||
c.Set("auth_token", authToken.Key)
|
|
||||||
return next(c)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
authGroup.POST("/refresh/", handler.RefreshToken)
|
|
||||||
|
|
||||||
t.Run("successful refresh", func(t *testing.T) {
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/refresh/", nil, authToken.Key)
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusOK)
|
|
||||||
|
|
||||||
var response map[string]interface{}
|
|
||||||
err := json.Unmarshal(w.Body.Bytes(), &response)
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Contains(t, response, "token")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_VerifyResetCode(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/register/", handler.Register)
|
|
||||||
e.POST("/api/auth/verify-reset-code/", handler.VerifyResetCode)
|
|
||||||
|
|
||||||
t.Run("invalid code returns error", func(t *testing.T) {
|
|
||||||
req := requests.VerifyResetCodeRequest{
|
|
||||||
Email: "nonexistent@test.com",
|
|
||||||
Code: "999999",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/verify-reset-code/", req, "")
|
|
||||||
// Should not be 200 since no valid code exists
|
|
||||||
assert.NotEqual(t, http.StatusOK, w.Code)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("missing fields returns 400", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/verify-reset-code/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_ResetPassword(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/reset-password/", handler.ResetPassword)
|
|
||||||
|
|
||||||
t.Run("invalid reset token returns error", func(t *testing.T) {
|
|
||||||
req := requests.ResetPasswordRequest{
|
|
||||||
ResetToken: "invalid-token",
|
|
||||||
NewPassword: "NewPassword123",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/reset-password/", req, "")
|
|
||||||
assert.NotEqual(t, http.StatusOK, w.Code)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("missing fields returns 400", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/reset-password/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("short password returns 400", func(t *testing.T) {
|
|
||||||
req := requests.ResetPasswordRequest{
|
|
||||||
ResetToken: "some-token",
|
|
||||||
NewPassword: "short",
|
|
||||||
}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/reset-password/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAuthHandler_ForgotPassword_MissingEmail(t *testing.T) {
|
|
||||||
handler, e, _ := setupAuthHandler(t)
|
|
||||||
|
|
||||||
e.POST("/api/auth/forgot-password/", handler.ForgotPassword)
|
|
||||||
|
|
||||||
t.Run("missing email returns 400", func(t *testing.T) {
|
|
||||||
req := map[string]interface{}{}
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/auth/forgot-password/", req, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Residence Handler - Additional Error Paths
|
// Residence Handler - Additional Error Paths
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -1781,45 +1555,11 @@ func TestStaticDataHandler_RefreshStaticData(t *testing.T) {
|
|||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Upload Handler - Additional Error Paths
|
// Upload Handler - Additional Error Paths
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
//
|
||||||
func TestUploadHandler_UploadImage_NoFile(t *testing.T) {
|
// Multipart upload handlers (UploadImage / UploadDocument / UploadCompletion)
|
||||||
storageSvc := newTestStorageService("/var/uploads")
|
// were removed alongside the legacy /api/uploads/{image,document,completion}
|
||||||
handler := NewUploadHandler(storageSvc, nil)
|
// routes. The presigned-URL flow (POST /api/uploads/presign) is exercised by
|
||||||
e := testutil.SetupTestRouter()
|
// integration tests that hit the full pipeline.
|
||||||
|
|
||||||
e.POST("/api/uploads/image", handler.UploadImage)
|
|
||||||
|
|
||||||
t.Run("no file returns 400", func(t *testing.T) {
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/uploads/image", nil, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUploadHandler_UploadDocument_NoFile(t *testing.T) {
|
|
||||||
storageSvc := newTestStorageService("/var/uploads")
|
|
||||||
handler := NewUploadHandler(storageSvc, nil)
|
|
||||||
e := testutil.SetupTestRouter()
|
|
||||||
|
|
||||||
e.POST("/api/uploads/document", handler.UploadDocument)
|
|
||||||
|
|
||||||
t.Run("no file returns 400", func(t *testing.T) {
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/uploads/document", nil, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUploadHandler_UploadCompletion_NoFile(t *testing.T) {
|
|
||||||
storageSvc := newTestStorageService("/var/uploads")
|
|
||||||
handler := NewUploadHandler(storageSvc, nil)
|
|
||||||
e := testutil.SetupTestRouter()
|
|
||||||
|
|
||||||
e.POST("/api/uploads/completion", handler.UploadCompletion)
|
|
||||||
|
|
||||||
t.Run("no file returns 400", func(t *testing.T) {
|
|
||||||
w := testutil.MakeRequest(e, "POST", "/api/uploads/completion", nil, "")
|
|
||||||
testutil.AssertStatusCode(t, w, http.StatusBadRequest)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUploadHandler_DeleteFile_OwnershipDenied(t *testing.T) {
|
func TestUploadHandler_DeleteFile_OwnershipDenied(t *testing.T) {
|
||||||
storageSvc := newTestStorageService("/var/uploads")
|
storageSvc := newTestStorageService("/var/uploads")
|
||||||
|
|||||||
@@ -37,6 +37,23 @@ func NewMediaHandler(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// safeContentDisposition builds an inline Content-Disposition header value
|
||||||
|
// with a sanitized filename (audit M1). Control characters (including CR/LF),
|
||||||
|
// double-quote and backslash are stripped so an attacker-controlled upload
|
||||||
|
// filename cannot inject additional response headers (CWE-113).
|
||||||
|
func safeContentDisposition(filename string) string {
|
||||||
|
cleaned := strings.Map(func(r rune) rune {
|
||||||
|
if r < 0x20 || r == 0x7f || r == '"' || r == '\\' {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}, filename)
|
||||||
|
if cleaned == "" {
|
||||||
|
cleaned = "download"
|
||||||
|
}
|
||||||
|
return `inline; filename="` + cleaned + `"`
|
||||||
|
}
|
||||||
|
|
||||||
// ServeDocument serves a document file with access control
|
// ServeDocument serves a document file with access control
|
||||||
// GET /api/media/document/:id
|
// GET /api/media/document/:id
|
||||||
func (h *MediaHandler) ServeDocument(c echo.Context) error {
|
func (h *MediaHandler) ServeDocument(c echo.Context) error {
|
||||||
@@ -71,7 +88,7 @@ func (h *MediaHandler) ServeDocument(c echo.Context) error {
|
|||||||
// Set caching and disposition headers
|
// Set caching and disposition headers
|
||||||
c.Response().Header().Set("Cache-Control", "private, max-age=3600")
|
c.Response().Header().Set("Cache-Control", "private, max-age=3600")
|
||||||
if doc.FileName != "" {
|
if doc.FileName != "" {
|
||||||
c.Response().Header().Set("Content-Disposition", "inline; filename=\""+doc.FileName+"\"")
|
c.Response().Header().Set("Content-Disposition", safeContentDisposition(doc.FileName))
|
||||||
}
|
}
|
||||||
return c.Blob(http.StatusOK, mimeType, data)
|
return c.Blob(http.StatusOK, mimeType, data)
|
||||||
}
|
}
|
||||||
@@ -114,7 +131,7 @@ func (h *MediaHandler) ServeDocumentImage(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.Response().Header().Set("Cache-Control", "private, max-age=3600")
|
c.Response().Header().Set("Cache-Control", "private, max-age=3600")
|
||||||
c.Response().Header().Set("Content-Disposition", "inline; filename=\""+filepath.Base(img.ImageURL)+"\"")
|
c.Response().Header().Set("Content-Disposition", safeContentDisposition(filepath.Base(img.ImageURL)))
|
||||||
return c.Blob(http.StatusOK, mimeType, data)
|
return c.Blob(http.StatusOK, mimeType, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -162,7 +179,7 @@ func (h *MediaHandler) ServeCompletionImage(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
c.Response().Header().Set("Cache-Control", "private, max-age=3600")
|
c.Response().Header().Set("Cache-Control", "private, max-age=3600")
|
||||||
c.Response().Header().Set("Content-Disposition", "inline; filename=\""+filepath.Base(img.ImageURL)+"\"")
|
c.Response().Header().Set("Content-Disposition", safeContentDisposition(filepath.Base(img.ImageURL)))
|
||||||
return c.Blob(http.StatusOK, mimeType, data)
|
return c.Blob(http.StatusOK, mimeType, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ func (h *NotificationHandler) ListNotifications(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
notifications, err := h.notificationService.GetNotifications(user.ID, limit, offset)
|
notifications, err := h.notificationService.GetNotifications(c.Request().Context(), user.ID, limit, offset)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -64,7 +64,7 @@ func (h *NotificationHandler) GetUnreadCount(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
count, err := h.notificationService.GetUnreadCount(user.ID)
|
count, err := h.notificationService.GetUnreadCount(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -84,7 +84,7 @@ func (h *NotificationHandler) MarkAsRead(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_notification_id")
|
return apperrors.BadRequest("error.invalid_notification_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = h.notificationService.MarkAsRead(uint(notificationID), user.ID)
|
err = h.notificationService.MarkAsRead(c.Request().Context(), uint(notificationID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -99,7 +99,7 @@ func (h *NotificationHandler) MarkAllAsRead(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = h.notificationService.MarkAllAsRead(user.ID)
|
err = h.notificationService.MarkAllAsRead(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -114,7 +114,7 @@ func (h *NotificationHandler) GetPreferences(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
prefs, err := h.notificationService.GetPreferences(user.ID)
|
prefs, err := h.notificationService.GetPreferences(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -137,7 +137,7 @@ func (h *NotificationHandler) UpdatePreferences(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
prefs, err := h.notificationService.UpdatePreferences(user.ID, &req)
|
prefs, err := h.notificationService.UpdatePreferences(c.Request().Context(), user.ID, &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -160,7 +160,7 @@ func (h *NotificationHandler) RegisterDevice(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
device, err := h.notificationService.RegisterDevice(user.ID, &req)
|
device, err := h.notificationService.RegisterDevice(c.Request().Context(), user.ID, &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -175,7 +175,7 @@ func (h *NotificationHandler) ListDevices(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
devices, err := h.notificationService.ListDevices(user.ID)
|
devices, err := h.notificationService.ListDevices(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -208,7 +208,7 @@ func (h *NotificationHandler) UnregisterDevice(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_platform")
|
return apperrors.BadRequest("error.invalid_platform")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = h.notificationService.UnregisterDevice(req.RegistrationID, req.Platform, user.ID)
|
err = h.notificationService.UnregisterDevice(c.Request().Context(), req.RegistrationID, req.Platform, user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -236,7 +236,7 @@ func (h *NotificationHandler) DeleteDevice(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_platform")
|
return apperrors.BadRequest("error.invalid_platform")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = h.notificationService.DeleteDevice(uint(deviceID), platform, user.ID)
|
err = h.notificationService.DeleteDevice(c.Request().Context(), uint(deviceID), platform, user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ func (h *ResidenceHandler) ListResidences(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.residenceService.ListResidences(user.ID)
|
response, err := h.residenceService.ListResidences(c.Request().Context(), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -55,7 +55,7 @@ func (h *ResidenceHandler) GetMyResidences(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
userNow := middleware.GetUserNow(c)
|
userNow := middleware.GetUserNow(c)
|
||||||
|
|
||||||
response, err := h.residenceService.GetMyResidences(user.ID, userNow)
|
response, err := h.residenceService.GetMyResidences(c.Request().Context(), user.ID, userNow)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -72,7 +72,7 @@ func (h *ResidenceHandler) GetSummary(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
userNow := middleware.GetUserNow(c)
|
userNow := middleware.GetUserNow(c)
|
||||||
|
|
||||||
summary, err := h.residenceService.GetSummary(user.ID, userNow)
|
summary, err := h.residenceService.GetSummary(c.Request().Context(), user.ID, userNow)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -93,7 +93,7 @@ func (h *ResidenceHandler) GetResidence(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
userNow := middleware.GetUserNow(c)
|
userNow := middleware.GetUserNow(c)
|
||||||
response, err := h.residenceService.GetResidence(uint(residenceID), user.ID, userNow)
|
response, err := h.residenceService.GetResidence(c.Request().Context(), uint(residenceID), user.ID, userNow)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -116,7 +116,7 @@ func (h *ResidenceHandler) CreateResidence(c echo.Context) error {
|
|||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.residenceService.CreateResidence(&req, user.ID)
|
response, err := h.residenceService.CreateResidence(c.Request().Context(), &req, user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -144,7 +144,7 @@ func (h *ResidenceHandler) UpdateResidence(c echo.Context) error {
|
|||||||
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
return c.JSON(http.StatusBadRequest, validator.FormatValidationErrors(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.residenceService.UpdateResidence(uint(residenceID), user.ID, &req)
|
response, err := h.residenceService.UpdateResidence(c.Request().Context(), uint(residenceID), user.ID, &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -164,7 +164,7 @@ func (h *ResidenceHandler) DeleteResidence(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_residence_id")
|
return apperrors.BadRequest("error.invalid_residence_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.residenceService.DeleteResidence(uint(residenceID), user.ID)
|
response, err := h.residenceService.DeleteResidence(c.Request().Context(), uint(residenceID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -185,7 +185,7 @@ func (h *ResidenceHandler) GetShareCode(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_residence_id")
|
return apperrors.BadRequest("error.invalid_residence_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
shareCode, err := h.residenceService.GetShareCode(uint(residenceID), user.ID)
|
shareCode, err := h.residenceService.GetShareCode(c.Request().Context(), uint(residenceID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -213,7 +213,7 @@ func (h *ResidenceHandler) GenerateShareCode(c echo.Context) error {
|
|||||||
// Request body is optional
|
// Request body is optional
|
||||||
c.Bind(&req)
|
c.Bind(&req)
|
||||||
|
|
||||||
response, err := h.residenceService.GenerateShareCode(uint(residenceID), user.ID, req.ExpiresInHours)
|
response, err := h.residenceService.GenerateShareCode(c.Request().Context(), uint(residenceID), user.ID, req.ExpiresInHours)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -238,7 +238,7 @@ func (h *ResidenceHandler) GenerateSharePackage(c echo.Context) error {
|
|||||||
// Request body is optional (for expires_in_hours)
|
// Request body is optional (for expires_in_hours)
|
||||||
c.Bind(&req)
|
c.Bind(&req)
|
||||||
|
|
||||||
response, err := h.residenceService.GenerateSharePackage(uint(residenceID), user.ID, req.ExpiresInHours)
|
response, err := h.residenceService.GenerateSharePackage(c.Request().Context(), uint(residenceID), user.ID, req.ExpiresInHours)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -261,7 +261,7 @@ func (h *ResidenceHandler) JoinWithCode(c echo.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := h.residenceService.JoinWithCode(req.Code, user.ID)
|
response, err := h.residenceService.JoinWithCode(c.Request().Context(), req.Code, user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -281,7 +281,7 @@ func (h *ResidenceHandler) GetResidenceUsers(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_residence_id")
|
return apperrors.BadRequest("error.invalid_residence_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
users, err := h.residenceService.GetResidenceUsers(uint(residenceID), user.ID)
|
users, err := h.residenceService.GetResidenceUsers(c.Request().Context(), uint(residenceID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -306,7 +306,7 @@ func (h *ResidenceHandler) RemoveResidenceUser(c echo.Context) error {
|
|||||||
return apperrors.BadRequest("error.invalid_user_id")
|
return apperrors.BadRequest("error.invalid_user_id")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = h.residenceService.RemoveUser(uint(residenceID), uint(userIDToRemove), user.ID)
|
err = h.residenceService.RemoveUser(c.Request().Context(), uint(residenceID), uint(userIDToRemove), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -316,7 +316,7 @@ func (h *ResidenceHandler) RemoveResidenceUser(c echo.Context) error {
|
|||||||
|
|
||||||
// GetResidenceTypes handles GET /api/residences/types/
|
// GetResidenceTypes handles GET /api/residences/types/
|
||||||
func (h *ResidenceHandler) GetResidenceTypes(c echo.Context) error {
|
func (h *ResidenceHandler) GetResidenceTypes(c echo.Context) error {
|
||||||
types, err := h.residenceService.GetResidenceTypes()
|
types, err := h.residenceService.GetResidenceTypes(c.Request().Context())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -348,7 +348,7 @@ func (h *ResidenceHandler) GenerateTasksReport(c echo.Context) error {
|
|||||||
c.Bind(&req)
|
c.Bind(&req)
|
||||||
|
|
||||||
// Generate the report data
|
// Generate the report data
|
||||||
report, err := h.residenceService.GenerateTasksReport(uint(residenceID), user.ID)
|
report, err := h.residenceService.GenerateTasksReport(c.Request().Context(), uint(residenceID), user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user