Compare commits
2 Commits
15359401fa
...
7e77e3bbab
| Author | SHA1 | Date | |
|---|---|---|---|
| 7e77e3bbab | |||
| ace03d2340 |
@@ -82,7 +82,7 @@ spec:
|
||||
timeoutSeconds: 5
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /admin/
|
||||
path: /
|
||||
port: 3000
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
# Simple hostname-based Ingress — no TLS (Cloudflare Flexible handles edge
|
||||
# TLS, CF→origin is plain HTTP on 80). Upgrade to Full (strict) by
|
||||
# adding back a `tls:` block with a Cloudflare Origin CA cert stored in
|
||||
# secret/cloudflare-origin-cert.
|
||||
# Hostname-based Ingress with TLS terminated at Traefik using the
|
||||
# Cloudflare Origin CA cert (secret/cloudflare-origin-cert). CF→origin
|
||||
# encryption enables CF SSL mode "Full (strict)".
|
||||
#
|
||||
# Middleware chain (security headers, rate limit, CF-only allowlist, admin
|
||||
# basic auth) is defined in `middleware.yaml` but NOT attached here —
|
||||
# annotate this ingress to turn any of them on.
|
||||
# basic auth) is defined in `middleware.yaml`. security-headers + rate-limit
|
||||
# are attached below via annotation.
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
@@ -13,8 +12,15 @@ metadata:
|
||||
namespace: honeydue
|
||||
labels:
|
||||
app.kubernetes.io/part-of: honeydue
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
tls:
|
||||
- hosts:
|
||||
- api.myhoneydue.com
|
||||
- myhoneydue.com
|
||||
secretName: cloudflare-origin-cert
|
||||
rules:
|
||||
- host: api.myhoneydue.com
|
||||
http:
|
||||
@@ -46,8 +52,14 @@ metadata:
|
||||
namespace: honeydue
|
||||
labels:
|
||||
app.kubernetes.io/part-of: honeydue
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
tls:
|
||||
- hosts:
|
||||
- admin.myhoneydue.com
|
||||
secretName: cloudflare-origin-cert
|
||||
rules:
|
||||
- host: admin.myhoneydue.com
|
||||
http:
|
||||
@@ -67,8 +79,14 @@ metadata:
|
||||
namespace: honeydue
|
||||
labels:
|
||||
app.kubernetes.io/part-of: honeydue
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
tls:
|
||||
- hosts:
|
||||
- app.myhoneydue.com
|
||||
secretName: cloudflare-origin-cert
|
||||
rules:
|
||||
- host: app.myhoneydue.com
|
||||
http:
|
||||
|
||||
@@ -27,7 +27,10 @@ spec:
|
||||
X-Content-Type-Options: "nosniff"
|
||||
X-Frame-Options: "DENY"
|
||||
Strict-Transport-Security: "max-age=31536000; includeSubDomains"
|
||||
Content-Security-Policy: "default-src 'self'; frame-ancestors 'none'"
|
||||
# Content-Security-Policy is intentionally NOT set here — the Go API
|
||||
# sets a CSP in internal/router/router.go that permits Google Fonts
|
||||
# for the landing page. Two CSP headers would intersect and break it.
|
||||
# admin and web apps set their own CSP via Next.js middleware.
|
||||
Permissions-Policy: "camera=(), microphone=(), geolocation=()"
|
||||
X-Permitted-Cross-Domain-Policies: "none"
|
||||
|
||||
|
||||
@@ -47,10 +47,19 @@ spec:
|
||||
policyTypes:
|
||||
- Ingress
|
||||
ingress:
|
||||
# Traefik runs as DaemonSet with hostNetwork=true, so traffic from it
|
||||
# arrives with the NODE IP as source (not a pod IP). The node pod CIDR
|
||||
# 10.42.0.0/16 covers any intra-cluster caller; the three node IPs
|
||||
# cover Traefik on hostNetwork.
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
- ipBlock:
|
||||
cidr: 178.105.32.198/32 # ubuntu-8gb-nbg1-1
|
||||
- ipBlock:
|
||||
cidr: 178.104.247.152/32 # ubuntu-8gb-nbg1-2
|
||||
- ipBlock:
|
||||
cidr: 178.104.249.189/32 # ubuntu-8gb-nbg1-3
|
||||
- ipBlock:
|
||||
cidr: 10.42.0.0/16 # cluster pod CIDR
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8000
|
||||
@@ -69,10 +78,17 @@ spec:
|
||||
policyTypes:
|
||||
- Ingress
|
||||
ingress:
|
||||
# Traefik runs as DaemonSet with hostNetwork=true — see allow-ingress-to-api
|
||||
# for the rationale. Same ipBlock list.
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
- ipBlock:
|
||||
cidr: 178.105.32.198/32
|
||||
- ipBlock:
|
||||
cidr: 178.104.247.152/32
|
||||
- ipBlock:
|
||||
cidr: 178.104.249.189/32
|
||||
- ipBlock:
|
||||
cidr: 10.42.0.0/16
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 3000
|
||||
@@ -200,3 +216,62 @@ spec:
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8000
|
||||
|
||||
---
|
||||
# --- Web: allow ingress from Traefik (kube-system namespace) ---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: allow-ingress-to-web
|
||||
namespace: honeydue
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: web
|
||||
policyTypes:
|
||||
- Ingress
|
||||
ingress:
|
||||
# Traefik runs as DaemonSet with hostNetwork=true — see allow-ingress-to-api
|
||||
# for the rationale. Same ipBlock list.
|
||||
- from:
|
||||
- ipBlock:
|
||||
cidr: 178.105.32.198/32
|
||||
- ipBlock:
|
||||
cidr: 178.104.247.152/32
|
||||
- ipBlock:
|
||||
cidr: 178.104.249.189/32
|
||||
- ipBlock:
|
||||
cidr: 10.42.0.0/16
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 3000
|
||||
|
||||
---
|
||||
# --- Web: allow egress for the Next.js server-side proxy routes ---
|
||||
# Browser → app.myhoneydue.com → web pod (Node.js) → api.myhoneydue.com
|
||||
# The web pod resolves api.myhoneydue.com via public DNS and hits
|
||||
# Cloudflare (143.). We don't know which CF IP yet at policy time, so
|
||||
# allow HTTPS to public ipBlock (except private CIDRs).
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: allow-egress-from-web
|
||||
namespace: honeydue
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: web
|
||||
policyTypes:
|
||||
- Egress
|
||||
egress:
|
||||
# HTTPS to public (api.myhoneydue.com via CF, PostHog, any other remote)
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 0.0.0.0/0
|
||||
except:
|
||||
- 10.0.0.0/8
|
||||
- 172.16.0.0/12
|
||||
- 192.168.0.0/16
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 443
|
||||
|
||||
@@ -27,23 +27,27 @@ that every legitimate port be enumerated in a rule.
|
||||
Run `sudo ufw status verbose` on any node to see the live ruleset. The
|
||||
canonical ruleset below, grouped by purpose.
|
||||
|
||||
### Public-facing (anywhere)
|
||||
### Public-facing
|
||||
|
||||
| Port | Protocol | From | Purpose | Comment |
|
||||
|---|---|---|---|---|
|
||||
| 22 | TCP | Anywhere | SSH | |
|
||||
| 80 | TCP | Anywhere | HTTP (Cloudflare → Traefik) | |
|
||||
| 443 | TCP | Anywhere | HTTPS (future, currently unused at origin) | |
|
||||
| Port | Protocol | From | Purpose |
|
||||
|---|---|---|---|
|
||||
| 22 | TCP | Anywhere | SSH (key-only) |
|
||||
| 443 | TCP | Cloudflare ranges (15 IPv4 + 7 IPv6) | HTTPS (CF → Traefik, TLS-terminated at Traefik) |
|
||||
|
||||
**Why 443 is open but unused**: We're on Cloudflare SSL=Flexible, so
|
||||
Cloudflare talks to origin over plain HTTP:80. Port 443 on origin is
|
||||
only hit by misconfigured clients (who bypass CF DNS and hit node IPs
|
||||
directly). Traefik's config accepts it but we don't require it. Keeping
|
||||
it open smooths a future switch to Full (strict) SSL mode.
|
||||
**Port :80 is closed** on all three nodes. CF is in Full (strict) mode
|
||||
and initiates every request on :443 to the origin. Cloudflare's
|
||||
"Always Use HTTPS" turns any plaintext client request into HTTPS at
|
||||
the edge, so the origin never needs to accept :80.
|
||||
|
||||
**Future hardening**: Restrict 80 and 443 to Cloudflare's published IP
|
||||
ranges (15 IPv4 CIDRs, 7 IPv6 CIDRs). See [Chapter 13](./13-cloudflare.md)
|
||||
for the ranges and the UFW rule format. Today they're open to anyone.
|
||||
**Port :443 is restricted to Cloudflare** via 22 UFW allow rules per
|
||||
node (one per CF CIDR). Direct-connect from any non-CF IP is dropped
|
||||
at the kernel. This closes the "node IP leak = bypass CF WAF/DDoS"
|
||||
hole entirely. See [Chapter 13](./13-cloudflare.md#cloudflare-ip-ranges-used-in-traefik-trustedips)
|
||||
for the exact ranges and UFW rule format.
|
||||
|
||||
**Refresh cadence**: CF updates its IP ranges rarely. A monthly
|
||||
`curl https://www.cloudflare.com/ips-v4` diff and UFW re-apply is
|
||||
enough. Automation TODO (Chapter 20).
|
||||
|
||||
### SSH (operator access)
|
||||
|
||||
|
||||
@@ -280,16 +280,22 @@ most Ingress controllers and matches how users think about URL routing.
|
||||
|
||||
## How requests flow
|
||||
|
||||
1. **Cloudflare DNS** resolves `api.myhoneydue.com` to one of three IPs
|
||||
(round-robin). Say it picks `178.105.32.198` (hetzner2).
|
||||
2. **Cloudflare edge** establishes TCP to `178.105.32.198:80` (plain HTTP,
|
||||
SSL=Flexible). Original HTTPS terminated at CF.
|
||||
3. **UFW on hetzner2** accepts the SYN (80/tcp open from anywhere).
|
||||
4. **Linux kernel** sees a listener on 0.0.0.0:80 (the Traefik pod).
|
||||
Hands off the SYN.
|
||||
5. **Traefik accepts** the connection. Reads the HTTP request.
|
||||
1. **Cloudflare DNS** resolves `api.myhoneydue.com` to a CF edge IP
|
||||
(client never sees the three origin IPs — CF proxies).
|
||||
2. **Cloudflare edge** terminates TLS from the browser, then opens a
|
||||
fresh TCP to one of the origin IPs on `:443` (SSL=Full (strict)).
|
||||
Say it picks `178.105.32.198` (hetzner2).
|
||||
3. **UFW on hetzner2** accepts the SYN — the source IP is in one of
|
||||
the 15 CF IPv4 CIDRs allowed on `:443`. (Any non-CF source IP is
|
||||
dropped at the kernel.)
|
||||
4. **Linux kernel** sees a listener on `0.0.0.0:443` (the Traefik pod,
|
||||
hostNetwork). Hands off the SYN.
|
||||
5. **Traefik accepts** the connection, completes the TLS handshake
|
||||
using the `cloudflare-origin-cert` secret (CF Origin CA — CF
|
||||
verifies this chain on its side). Reads the plaintext HTTP request.
|
||||
6. **Traefik matches** the `Host:` header against its router table.
|
||||
`Host: api.myhoneydue.com` → `honeydue-api` Ingress → `api` Service.
|
||||
Attached middlewares (`security-headers`, `rate-limit`) run here.
|
||||
7. **Traefik dials** `10.43.167.83:8000` (api Service ClusterIP). This
|
||||
goes through the cluster DNS (CoreDNS) and kube-proxy (IPVS).
|
||||
8. **kube-proxy IPVS** rewrites the destination to a live api pod endpoint
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
|
||||
## Summary
|
||||
|
||||
Four workloads run in the `honeydue` namespace: **api** (Go REST API, 3
|
||||
replicas), **admin** (Next.js panel, 1 replica), **worker** (Go background
|
||||
jobs, 1 replica), and **redis** (cache + job queue, 1 replica, PVC-backed).
|
||||
Five workloads run in the `honeydue` namespace: **api** (Go REST API, 3
|
||||
replicas), **admin** (Next.js admin panel, 1 replica), **web** (Next.js
|
||||
customer-facing app, 3 replicas), **worker** (Go background jobs, 1
|
||||
replica), and **redis** (cache + job queue, 1 replica, PVC-backed).
|
||||
This chapter deep-dives each: container image, resource limits, probes,
|
||||
volumes, and why each knob is set the way it is.
|
||||
|
||||
@@ -14,10 +15,11 @@ volumes, and why each knob is set the way it is.
|
||||
|---|---|---|---|---|
|
||||
| `api` | `gitea.treytartt.com/admin/honeydue-api:<sha>` | 3 | 8000 | HTTP REST API |
|
||||
| `admin` | `gitea.treytartt.com/admin/honeydue-admin:<sha>` | 1 | 3000 | Next.js admin panel |
|
||||
| `web` | `gitea.treytartt.com/admin/honeydue-web:<sha>` | 3 | 3000 | Next.js customer-facing web client at `app.myhoneydue.com` |
|
||||
| `worker` | `gitea.treytartt.com/admin/honeydue-worker:<sha>` | 1 | — | Background job processor |
|
||||
| `redis` | `redis:7-alpine` | 1 | 6379 | Cache + Asynq queue |
|
||||
|
||||
All four are Kubernetes `Deployment` workloads (not StatefulSets, not
|
||||
All five are Kubernetes `Deployment` workloads (not StatefulSets, not
|
||||
DaemonSets). They share:
|
||||
- ServiceAccount with `automountServiceAccountToken: false` (Chapter 5)
|
||||
- `imagePullSecrets: [gitea-credentials]` (Chapter 11)
|
||||
@@ -25,6 +27,66 @@ DaemonSets). They share:
|
||||
- Individual env vars wired to `honeydue-secrets` keys
|
||||
- Read-only root filesystem with `tmp` emptyDir mounted at `/tmp`
|
||||
|
||||
## Service — web (Next.js customer app)
|
||||
|
||||
### What it does
|
||||
|
||||
Lives at `https://app.myhoneydue.com`. Next.js 16 standalone build,
|
||||
served by `node server.js` inside the container. Sibling repo:
|
||||
`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-Web/`.
|
||||
|
||||
### Architecture: server-side proxy pattern
|
||||
|
||||
Unlike the admin panel (which makes CORS requests directly to
|
||||
`api.myhoneydue.com`), the web app uses a proxy pattern:
|
||||
|
||||
```
|
||||
Browser → https://app.myhoneydue.com/api/proxy/tasks/123/
|
||||
→ Next.js route handler (src/app/api/proxy/[...path]/route.ts)
|
||||
→ reads honeydue-token httpOnly cookie
|
||||
→ attaches Authorization: Token <value>
|
||||
→ https://api.myhoneydue.com/api/tasks/123/ (server-side fetch)
|
||||
→ response flows back
|
||||
```
|
||||
|
||||
**Consequences:**
|
||||
- Browser never makes cross-origin requests. No CORS entry needed on
|
||||
the Go API for `app.myhoneydue.com`.
|
||||
- Auth tokens live in httpOnly cookies, not localStorage. XSS can't
|
||||
exfiltrate them.
|
||||
- The web pod needs outbound HTTPS to `api.myhoneydue.com` — covered
|
||||
in the `allow-egress-from-web` NetworkPolicy (Chapter 5).
|
||||
|
||||
### Env vars
|
||||
|
||||
Build-time (baked into the client bundle by the Dockerfile `ARG`):
|
||||
- `NEXT_PUBLIC_API_URL` — only used as a fallback; baked for safety
|
||||
- `NEXT_PUBLIC_POSTHOG_KEY` — PostHog project API key
|
||||
- `NEXT_PUBLIC_POSTHOG_HOST` — `https://analytics.88oakapps.com`
|
||||
|
||||
Runtime (ConfigMap):
|
||||
- `API_URL=https://api.myhoneydue.com/api` — consumed by the
|
||||
server-side proxy handlers
|
||||
- `PORT=3000`, `HOSTNAME=0.0.0.0`
|
||||
|
||||
### Deployment spec highlights
|
||||
|
||||
- **3 replicas**, same as api — this is a production customer surface
|
||||
- `topologySpreadConstraints` across `kubernetes.io/hostname` —
|
||||
evicting one node at most kills one pod
|
||||
- `readOnlyRootFilesystem: true`; `emptyDir`s at `/app/.next/cache`
|
||||
(Next.js build cache) and `/tmp`
|
||||
- PDB `web-pdb` with `minAvailable: 2`
|
||||
- runAsUser/runAsGroup `1001` (matches the `nextjs` user created in
|
||||
the Dockerfile)
|
||||
|
||||
### Why same availability as api
|
||||
|
||||
The web client is now the primary user-facing surface. Users hitting
|
||||
`app.myhoneydue.com/login` should never see a 502 because a single
|
||||
node went down. 3 replicas × `minAvailable: 2` guarantees at least
|
||||
two pods stay up through any voluntary disruption.
|
||||
|
||||
## Service 1 — api (Go REST API)
|
||||
|
||||
### What it does
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
Cloudflare sits in front of every public request. It provides DNS
|
||||
(authoritative nameservers for `myhoneydue.com`), TLS termination at
|
||||
the edge, DDoS mitigation, caching, and the round-robin fan-out across
|
||||
our three node IPs. We use the Free plan. TLS mode is "Flexible"
|
||||
(HTTP between CF and origin). This chapter documents every Cloudflare
|
||||
our three node IPs. We use the Free plan. TLS mode is **Full (strict)**
|
||||
— CF connects to origin over HTTPS and verifies the origin's cert
|
||||
against CF's own Origin CA. This chapter documents every Cloudflare
|
||||
setting that matters.
|
||||
|
||||
## DNS
|
||||
@@ -72,53 +73,49 @@ when you want sub-second failover.
|
||||
|
||||
## TLS
|
||||
|
||||
### Mode: Flexible
|
||||
### Mode: Full (strict)
|
||||
|
||||
CF Dashboard → SSL/TLS → Overview → **Flexible**.
|
||||
CF Dashboard → SSL/TLS → Overview → **Full (strict)**.
|
||||
|
||||
**What this means:**
|
||||
- User ↔ Cloudflare: **TLS** (HTTPS)
|
||||
- Cloudflare ↔ Origin: **plaintext HTTP** (port 80)
|
||||
- User ↔ Cloudflare: **TLS** (HTTPS) — CF serves its own Let's Encrypt cert
|
||||
- Cloudflare ↔ Origin: **TLS** (HTTPS :443) — origin serves our CF Origin CA cert; CF verifies it chains to CF's Origin CA root
|
||||
|
||||
**Why we chose it:**
|
||||
- No origin cert required on the Hetzner nodes
|
||||
- Zero Traefik cert-management complexity
|
||||
- Fine for a site where CF terminates all user-facing TLS
|
||||
**How it's wired:**
|
||||
- k8s secret `cloudflare-origin-cert` (type `kubernetes.io/tls`) holds
|
||||
`tls.crt` + `tls.key`. The cert is valid for `*.myhoneydue.com` +
|
||||
`myhoneydue.com`, 15-year validity, issued by
|
||||
`CloudFlare Origin CA SSL Certificate Authority`.
|
||||
- All three `Ingress` resources in `deploy-k3s/manifests/ingress/ingress-simple.yaml`
|
||||
reference the secret via `spec.tls[].secretName`.
|
||||
- Traefik terminates TLS on :443 using the cert. Backend pods still
|
||||
speak plain HTTP over the cluster network (Traefik → pod is an
|
||||
intra-cluster hop, encrypted at the Flannel overlay layer).
|
||||
|
||||
**Downsides:**
|
||||
- An attacker with network access between CF and Hetzner could read
|
||||
traffic. Realistically: nobody between CF's POPs and Hetzner's
|
||||
Nuremberg DC, but it's theoretically plaintext on the wire.
|
||||
- MitM risk if DNS gets hijacked and traffic is routed through an
|
||||
unintended origin.
|
||||
**Why we chose Full (strict) over Flexible:**
|
||||
- CF → origin traffic was plaintext on Flexible. Between Cloudflare's
|
||||
POPs and Hetzner Nuremberg is a lot of internet. Full (strict)
|
||||
closes that gap.
|
||||
- Origin cert is a CF-internal-only CA, so it's useless to anyone who
|
||||
isn't CF. Non-CF clients that somehow bypass the UFW CF-IP allowlist
|
||||
can't impersonate the origin because their cert wouldn't chain to
|
||||
CF's Origin CA root.
|
||||
|
||||
### Future: Full (strict)
|
||||
**Maintenance:** the Origin CA cert is valid for 15 years (expires
|
||||
Apr 2041). No action needed until then. If rotation is ever required,
|
||||
regenerate in CF dashboard → SSL/TLS → Origin Server, re-run the
|
||||
`kubectl create secret tls cloudflare-origin-cert --dry-run=client -o yaml | kubectl apply -f -`
|
||||
command, Traefik picks it up on next secret reload (no pod restart).
|
||||
|
||||
The next step up is **Full (strict)**: CF verifies origin's TLS cert
|
||||
and connects over HTTPS. Cloudflare provides free **Origin CA
|
||||
certificates** for this: they're issued by a CF-internal CA that only
|
||||
CF's own edge accepts. An attacker without a CF-signed cert can't
|
||||
impersonate our origin.
|
||||
### Regenerating the cert (for the record)
|
||||
|
||||
Path to enable:
|
||||
1. Generate Origin CA cert in CF dashboard → SSL/TLS → Origin Server
|
||||
2. Download as PEM
|
||||
3. Create k8s Secret `cloudflare-origin-cert`:
|
||||
```bash
|
||||
kubectl create secret tls cloudflare-origin-cert -n honeydue \
|
||||
--cert=origin.crt --key=origin.key
|
||||
```
|
||||
4. Add `tls:` block to our Ingress:
|
||||
```yaml
|
||||
spec:
|
||||
tls:
|
||||
- hosts: [api.myhoneydue.com]
|
||||
secretName: cloudflare-origin-cert
|
||||
```
|
||||
5. Switch CF SSL mode to Full (strict)
|
||||
|
||||
Trad-off: the `cloudflare-origin-cert` expires (default 15 years), so
|
||||
low maintenance. **TODO** (Chapter 20).
|
||||
```bash
|
||||
# After downloading cf-origin-cert.pem + cf-origin-key.pem from CF dashboard:
|
||||
kubectl -n honeydue create secret tls cloudflare-origin-cert \
|
||||
--cert=cf-origin-cert.pem \
|
||||
--key=cf-origin-key.pem \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
```
|
||||
|
||||
### Edge certificate
|
||||
|
||||
|
||||
@@ -19,69 +19,55 @@ minute, with Slack/email alerts on failure.
|
||||
**Effort**: ~30 min for Uptime Kuma deploy, ~10 min for Better Stack
|
||||
signup.
|
||||
|
||||
### Cloudflare origin IP restriction
|
||||
### ~~Cloudflare origin IP restriction~~ ✓ DONE (2026-04-24)
|
||||
|
||||
**Why**: UFW allows :80 from anywhere. If node IPs leak, direct-connect
|
||||
attackers bypass CF's WAF/DDoS protection.
|
||||
Both `:80` and `:443` `Anywhere` rules removed on all 3 nodes. Only
|
||||
CF's 15 IPv4 + 7 IPv6 ranges allowed on `:443`. Direct-connect attempts
|
||||
from non-CF IPs time out.
|
||||
|
||||
**How**: Replace the anywhere-80 UFW rule with 15 IPv4 + 7 IPv6 CF
|
||||
ranges. See [Chapter 13 §CF IP ranges](./13-cloudflare.md#cloudflare-ip-ranges-used-in-traefik-trustedips).
|
||||
**Still TODO**: monthly automated refresh of the CF IP list. Ranges
|
||||
change rarely; manual re-run of `scripts/ufw-cf-refresh.sh` (not yet
|
||||
written) on cadence is acceptable for now.
|
||||
|
||||
Automation: a small script that refreshes the CF IP list monthly and
|
||||
re-applies UFW rules.
|
||||
### ~~Enable network policies in k3s~~ ✓ DONE (2026-04-24)
|
||||
|
||||
**Effort**: 1 hour.
|
||||
Applied with one scaffold correction: Traefik runs as a DaemonSet with
|
||||
`hostNetwork: true`, so traffic from it arrives with the **node IP** as
|
||||
source rather than a pod IP. The original scaffold used
|
||||
`namespaceSelector: kube-system` which doesn't match hostNetwork
|
||||
traffic. Fixed by using an `ipBlock` list of the three node IPs plus
|
||||
the cluster pod CIDR `10.42.0.0/16`.
|
||||
|
||||
### Enable network policies in k3s
|
||||
Also added policies for `web` (missing from the original scaffold).
|
||||
|
||||
**Why**: Currently pods can freely egress anywhere. A compromised pod
|
||||
could exfiltrate data or attack lateral services.
|
||||
### ~~Apply Traefik security middleware~~ ✓ DONE (2026-04-24)
|
||||
|
||||
**How**: `kubectl apply -f deploy-k3s/manifests/network-policies.yaml`.
|
||||
The scaffold defines default-deny + explicit allows for:
|
||||
- DNS egress for all pods
|
||||
- Traefik → api (port 8000)
|
||||
- Traefik → admin (port 3000)
|
||||
- api/worker → Redis
|
||||
- api/worker → external services (Postgres, B2, Fastmail)
|
||||
`security-headers` + `rate-limit` attached to all three ingresses
|
||||
(api, admin, web). `admin-auth` is defined but not attached (needs an
|
||||
`admin-basic-auth` secret we haven't created). `cloudflare-only` IP
|
||||
allowlist exists but is redundant with the UFW-level CF restriction —
|
||||
keep for defense in depth if we ever expose another layer.
|
||||
|
||||
Then test that nothing breaks (might need to adjust allow rules).
|
||||
|
||||
**Effort**: 1-2 hours including testing.
|
||||
|
||||
### Apply Traefik security middleware
|
||||
|
||||
**Why**: Our current Ingress has no rate limiting or security headers
|
||||
beyond what Traefik adds by default.
|
||||
|
||||
**How**: Apply `deploy-k3s/manifests/ingress/middleware.yaml`, annotate
|
||||
Ingresses to use them:
|
||||
|
||||
```yaml
|
||||
metadata:
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||
```
|
||||
|
||||
**Effort**: 15 min.
|
||||
One scaffold correction: the `Content-Security-Policy` header in
|
||||
`security-headers.customResponseHeaders` was stripped. The Go API sets
|
||||
its own CSP in `internal/router/router.go`, and two CSP headers combine
|
||||
via intersection (most restrictive wins), which would break the Google
|
||||
Fonts on the marketing landing page. Next.js apps set their own via
|
||||
middleware.
|
||||
|
||||
## Medium priority
|
||||
|
||||
### Upgrade to CF Full (strict) SSL
|
||||
### ~~Upgrade to CF Full (strict) SSL~~ ✓ DONE (2026-04-24)
|
||||
|
||||
**Why**: Currently CF↔origin is plain HTTP. An attacker between CF and
|
||||
Hetzner could read traffic. Full (strict) mode encrypts this leg with
|
||||
a CF-issued origin cert.
|
||||
Origin CA cert (`*.myhoneydue.com` + `myhoneydue.com`, 15-year
|
||||
validity) stored as `cloudflare-origin-cert` TLS secret. All three
|
||||
ingresses reference it via `tls:` blocks. CF mode flipped from
|
||||
Flexible to Full (strict). Verified by:
|
||||
|
||||
**How**:
|
||||
1. Generate Origin CA cert in CF dashboard → SSL/TLS → Origin Server
|
||||
2. Create `cloudflare-origin-cert` Secret in k8s
|
||||
3. Add `tls:` block to Ingresses
|
||||
4. Switch CF SSL mode to Full (strict)
|
||||
|
||||
**Effort**: 30 min.
|
||||
|
||||
**Citations**: [Cloudflare Origin CA docs][cf-origin-ca]
|
||||
- direct-connect to origin on `:443` serves the Origin cert (subject
|
||||
`CN=CloudFlare Origin Certificate`)
|
||||
- CF edge continues to serve its own Let's Encrypt cert to browsers
|
||||
- both layers now TLS-encrypted
|
||||
|
||||
### Migration Job for schema changes
|
||||
|
||||
@@ -312,7 +298,16 @@ k3s server on each node with the new backend.
|
||||
As items are done, mark them here. Think of this as a running changelog.
|
||||
|
||||
- [x] k3s migration from Swarm (2026-04-24)
|
||||
- [x] Traefik DaemonSet + hostNetwork
|
||||
- [x] Admin seed via ADMIN_EMAIL + ADMIN_PASSWORD
|
||||
- [x] Documentation book (this doc set)
|
||||
- [x] Traefik DaemonSet + hostNetwork (2026-04-24)
|
||||
- [x] Admin seed via ADMIN_EMAIL + ADMIN_PASSWORD (2026-04-24)
|
||||
- [x] Documentation book (this doc set) (2026-04-24)
|
||||
- [x] Web client deployed at `app.myhoneydue.com` (2026-04-24) — Next.js 16 standalone, 3 replicas with PDB, proxy pattern to api, see Chapter 7.
|
||||
- [x] Admin URL-baking fix (2026-04-24) — Dockerfile `ARG NEXT_PUBLIC_API_URL`, `.dockerignore` hardening for `admin/.env.*`.
|
||||
- [x] Auto-seed initial data on first API boot (2026-04-24) — `20260414_seed_initial_data` migration populates lookups, admin user, task templates. See commit `4ec4bbb`.
|
||||
- [x] APNs wired up (2026-04-24) — Key ID `5L5BVF5G48`, Team ID `X86BR9WTLD`, sandbox mode. Secret `honeydue-apns-key`, `FEATURE_PUSH_ENABLED=true`.
|
||||
- [x] Traefik middleware: `security-headers` + `rate-limit` attached to all three ingresses (2026-04-24). CSP is stripped from the middleware because the Go API sets its own.
|
||||
- [x] Admin liveness probe path fix (2026-04-24) — was hitting `/admin/` (404) and crashlooping every ~90s for 6 hours before the bug was caught. Fixed to `/`.
|
||||
- [x] Network policies applied (2026-04-24) — default-deny + explicit allows. Traefik hostNetwork is matched via node IP `ipBlock`s, not namespaceSelector. See Chapter 5.
|
||||
- [x] Cloudflare Full (strict) SSL (2026-04-24) — Origin CA cert installed as `cloudflare-origin-cert` secret, ingresses have `tls:` blocks, CF mode flipped from Flexible. Both user↔CF and CF↔origin now TLS.
|
||||
- [x] UFW CF-IP allowlist on all 3 nodes (2026-04-24) — 15 IPv4 + 7 IPv6 CF ranges allow `:443`; `Anywhere` rules for `:80` and `:443` deleted. Direct-connect from non-CF IPs times out.
|
||||
- [ ] All other items above
|
||||
|
||||
@@ -34,6 +34,14 @@ ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBU9xTTBD78tYUqHijgyU9PDqtmS4NuM/6uy8XgDzva+
|
||||
| `~/.docker/config.json` | Docker CLI config. After `docker login` to Gitea, contains creds. **Log out after each deploy** to not leave PATs on disk. |
|
||||
| `~/Library/Containers/com.docker.docker/` | Docker Desktop state (macOS). |
|
||||
|
||||
### Apple / Cloudflare credentials on disk
|
||||
|
||||
| Path | Purpose |
|
||||
|---|---|
|
||||
| `~/Desktop/code/honeyDue/AuthKey_5L5BVF5G48.p8` | APNs auth key (Apple). Source file for the `honeydue-apns-key` k8s secret. Sensitive — treat as a credential. |
|
||||
| `~/Desktop/code/honeyDue/cf-origin-cert.pem` | Cloudflare Origin CA cert (PEM). Source file for the `cloudflare-origin-cert` k8s secret. `*.myhoneydue.com` + `myhoneydue.com`, expires 2041. |
|
||||
| `~/Desktop/code/honeyDue/cf-origin-key.pem` | Private key for the Origin cert. CF only shows this **once** at generation time. Sensitive — treat as a credential. |
|
||||
|
||||
## Git repo (`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go/`)
|
||||
|
||||
### Top-level
|
||||
@@ -90,19 +98,21 @@ ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBU9xTTBD78tYUqHijgyU9PDqtmS4NuM/6uy8XgDzva+
|
||||
| `deploy-k3s/manifests/namespace.yaml` | Creates `honeydue` namespace. |
|
||||
| `deploy-k3s/manifests/rbac.yaml` | ServiceAccounts + `automountServiceAccountToken: false`. |
|
||||
| `deploy-k3s/manifests/pod-disruption-budgets.yaml` | PDBs for api (2/3) and worker (0/1). |
|
||||
| `deploy-k3s/manifests/network-policies.yaml` | Default-deny + allows. NOT currently applied. |
|
||||
| `deploy-k3s/manifests/network-policies.yaml` | Default-deny + allows. **Applied.** Includes web policies; Traefik hostNetwork handled via node IP `ipBlock`s rather than namespaceSelector. |
|
||||
| `deploy-k3s/manifests/api/deployment.yaml` | api Deployment. |
|
||||
| `deploy-k3s/manifests/api/service.yaml` | api ClusterIP Service. |
|
||||
| `deploy-k3s/manifests/api/hpa.yaml` | api HorizontalPodAutoscaler. NOT currently applied. |
|
||||
| `deploy-k3s/manifests/admin/deployment.yaml` | admin Deployment. |
|
||||
| `deploy-k3s/manifests/admin/service.yaml` | admin Service. |
|
||||
| `deploy-k3s/manifests/web/deployment.yaml` | web Deployment (3 replicas, customer-facing Next.js at app.myhoneydue.com). |
|
||||
| `deploy-k3s/manifests/web/service.yaml` | web ClusterIP Service. |
|
||||
| `deploy-k3s/manifests/worker/deployment.yaml` | worker Deployment. |
|
||||
| `deploy-k3s/manifests/redis/deployment.yaml` | Redis Deployment. |
|
||||
| `deploy-k3s/manifests/redis/service.yaml` | Redis Service. |
|
||||
| `deploy-k3s/manifests/redis/pvc.yaml` | Redis PersistentVolumeClaim. |
|
||||
| `deploy-k3s/manifests/ingress/ingress.yaml` | Full Ingress with TLS + middleware (scaffold; needs CF origin cert). |
|
||||
| `deploy-k3s/manifests/ingress/ingress-simple.yaml` | Simple Ingress without TLS (what we actually apply). |
|
||||
| `deploy-k3s/manifests/ingress/middleware.yaml` | Traefik middleware CRDs. Not currently applied. |
|
||||
| `deploy-k3s/manifests/ingress/ingress.yaml` | Alternate full Ingress scaffold (unused; we apply ingress-simple.yaml). |
|
||||
| `deploy-k3s/manifests/ingress/ingress-simple.yaml` | **Primary Ingress**. TLS via CF Origin cert, `security-headers` + `rate-limit` middleware attached to all three rules (api/admin/web). |
|
||||
| `deploy-k3s/manifests/ingress/middleware.yaml` | Traefik middleware CRDs (`rate-limit`, `security-headers`, `cloudflare-only`). Applied. `admin-auth` was defined but removed at runtime (needs an unset basic-auth secret). |
|
||||
| `deploy-k3s/manifests/traefik-helmchartconfig.yaml` | Our DaemonSet + hostNetwork override for Traefik. |
|
||||
| `deploy-k3s/manifests/secrets.yaml.example` | Template (never deployed). |
|
||||
| `deploy-k3s/scripts/01-provision-cluster.sh` | hetzner-k3s provisioning (we didn't use it; existing nodes). |
|
||||
|
||||
Reference in New Issue
Block a user