Compare commits
2 Commits
15359401fa
...
7e77e3bbab
| Author | SHA1 | Date | |
|---|---|---|---|
| 7e77e3bbab | |||
| ace03d2340 |
@@ -82,7 +82,7 @@ spec:
|
|||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /admin/
|
path: /
|
||||||
port: 3000
|
port: 3000
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 30
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
# Simple hostname-based Ingress — no TLS (Cloudflare Flexible handles edge
|
# Hostname-based Ingress with TLS terminated at Traefik using the
|
||||||
# TLS, CF→origin is plain HTTP on 80). Upgrade to Full (strict) by
|
# Cloudflare Origin CA cert (secret/cloudflare-origin-cert). CF→origin
|
||||||
# adding back a `tls:` block with a Cloudflare Origin CA cert stored in
|
# encryption enables CF SSL mode "Full (strict)".
|
||||||
# secret/cloudflare-origin-cert.
|
|
||||||
#
|
#
|
||||||
# Middleware chain (security headers, rate limit, CF-only allowlist, admin
|
# Middleware chain (security headers, rate limit, CF-only allowlist, admin
|
||||||
# basic auth) is defined in `middleware.yaml` but NOT attached here —
|
# basic auth) is defined in `middleware.yaml`. security-headers + rate-limit
|
||||||
# annotate this ingress to turn any of them on.
|
# are attached below via annotation.
|
||||||
apiVersion: networking.k8s.io/v1
|
apiVersion: networking.k8s.io/v1
|
||||||
kind: Ingress
|
kind: Ingress
|
||||||
metadata:
|
metadata:
|
||||||
@@ -13,8 +12,15 @@ metadata:
|
|||||||
namespace: honeydue
|
namespace: honeydue
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||||
spec:
|
spec:
|
||||||
ingressClassName: traefik
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- api.myhoneydue.com
|
||||||
|
- myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
rules:
|
rules:
|
||||||
- host: api.myhoneydue.com
|
- host: api.myhoneydue.com
|
||||||
http:
|
http:
|
||||||
@@ -46,8 +52,14 @@ metadata:
|
|||||||
namespace: honeydue
|
namespace: honeydue
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||||
spec:
|
spec:
|
||||||
ingressClassName: traefik
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- admin.myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
rules:
|
rules:
|
||||||
- host: admin.myhoneydue.com
|
- host: admin.myhoneydue.com
|
||||||
http:
|
http:
|
||||||
@@ -67,8 +79,14 @@ metadata:
|
|||||||
namespace: honeydue
|
namespace: honeydue
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: honeydue
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
annotations:
|
||||||
|
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
||||||
spec:
|
spec:
|
||||||
ingressClassName: traefik
|
ingressClassName: traefik
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- app.myhoneydue.com
|
||||||
|
secretName: cloudflare-origin-cert
|
||||||
rules:
|
rules:
|
||||||
- host: app.myhoneydue.com
|
- host: app.myhoneydue.com
|
||||||
http:
|
http:
|
||||||
|
|||||||
@@ -27,7 +27,10 @@ spec:
|
|||||||
X-Content-Type-Options: "nosniff"
|
X-Content-Type-Options: "nosniff"
|
||||||
X-Frame-Options: "DENY"
|
X-Frame-Options: "DENY"
|
||||||
Strict-Transport-Security: "max-age=31536000; includeSubDomains"
|
Strict-Transport-Security: "max-age=31536000; includeSubDomains"
|
||||||
Content-Security-Policy: "default-src 'self'; frame-ancestors 'none'"
|
# Content-Security-Policy is intentionally NOT set here — the Go API
|
||||||
|
# sets a CSP in internal/router/router.go that permits Google Fonts
|
||||||
|
# for the landing page. Two CSP headers would intersect and break it.
|
||||||
|
# admin and web apps set their own CSP via Next.js middleware.
|
||||||
Permissions-Policy: "camera=(), microphone=(), geolocation=()"
|
Permissions-Policy: "camera=(), microphone=(), geolocation=()"
|
||||||
X-Permitted-Cross-Domain-Policies: "none"
|
X-Permitted-Cross-Domain-Policies: "none"
|
||||||
|
|
||||||
|
|||||||
@@ -47,10 +47,19 @@ spec:
|
|||||||
policyTypes:
|
policyTypes:
|
||||||
- Ingress
|
- Ingress
|
||||||
ingress:
|
ingress:
|
||||||
|
# Traefik runs as DaemonSet with hostNetwork=true, so traffic from it
|
||||||
|
# arrives with the NODE IP as source (not a pod IP). The node pod CIDR
|
||||||
|
# 10.42.0.0/16 covers any intra-cluster caller; the three node IPs
|
||||||
|
# cover Traefik on hostNetwork.
|
||||||
- from:
|
- from:
|
||||||
- namespaceSelector:
|
- ipBlock:
|
||||||
matchLabels:
|
cidr: 178.105.32.198/32 # ubuntu-8gb-nbg1-1
|
||||||
kubernetes.io/metadata.name: kube-system
|
- ipBlock:
|
||||||
|
cidr: 178.104.247.152/32 # ubuntu-8gb-nbg1-2
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.249.189/32 # ubuntu-8gb-nbg1-3
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16 # cluster pod CIDR
|
||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 8000
|
port: 8000
|
||||||
@@ -69,10 +78,17 @@ spec:
|
|||||||
policyTypes:
|
policyTypes:
|
||||||
- Ingress
|
- Ingress
|
||||||
ingress:
|
ingress:
|
||||||
|
# Traefik runs as DaemonSet with hostNetwork=true — see allow-ingress-to-api
|
||||||
|
# for the rationale. Same ipBlock list.
|
||||||
- from:
|
- from:
|
||||||
- namespaceSelector:
|
- ipBlock:
|
||||||
matchLabels:
|
cidr: 178.105.32.198/32
|
||||||
kubernetes.io/metadata.name: kube-system
|
- ipBlock:
|
||||||
|
cidr: 178.104.247.152/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.249.189/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 3000
|
port: 3000
|
||||||
@@ -200,3 +216,62 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 8000
|
port: 8000
|
||||||
|
|
||||||
|
---
|
||||||
|
# --- Web: allow ingress from Traefik (kube-system namespace) ---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-ingress-to-web
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
ingress:
|
||||||
|
# Traefik runs as DaemonSet with hostNetwork=true — see allow-ingress-to-api
|
||||||
|
# for the rationale. Same ipBlock list.
|
||||||
|
- from:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.105.32.198/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.247.152/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 178.104.249.189/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 3000
|
||||||
|
|
||||||
|
---
|
||||||
|
# --- Web: allow egress for the Next.js server-side proxy routes ---
|
||||||
|
# Browser → app.myhoneydue.com → web pod (Node.js) → api.myhoneydue.com
|
||||||
|
# The web pod resolves api.myhoneydue.com via public DNS and hits
|
||||||
|
# Cloudflare (143.). We don't know which CF IP yet at policy time, so
|
||||||
|
# allow HTTPS to public ipBlock (except private CIDRs).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-web
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: web
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
# HTTPS to public (api.myhoneydue.com via CF, PostHog, any other remote)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.0.0.0/8
|
||||||
|
- 172.16.0.0/12
|
||||||
|
- 192.168.0.0/16
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 443
|
||||||
|
|||||||
@@ -27,23 +27,27 @@ that every legitimate port be enumerated in a rule.
|
|||||||
Run `sudo ufw status verbose` on any node to see the live ruleset. The
|
Run `sudo ufw status verbose` on any node to see the live ruleset. The
|
||||||
canonical ruleset below, grouped by purpose.
|
canonical ruleset below, grouped by purpose.
|
||||||
|
|
||||||
### Public-facing (anywhere)
|
### Public-facing
|
||||||
|
|
||||||
| Port | Protocol | From | Purpose | Comment |
|
| Port | Protocol | From | Purpose |
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|
|
||||||
| 22 | TCP | Anywhere | SSH | |
|
| 22 | TCP | Anywhere | SSH (key-only) |
|
||||||
| 80 | TCP | Anywhere | HTTP (Cloudflare → Traefik) | |
|
| 443 | TCP | Cloudflare ranges (15 IPv4 + 7 IPv6) | HTTPS (CF → Traefik, TLS-terminated at Traefik) |
|
||||||
| 443 | TCP | Anywhere | HTTPS (future, currently unused at origin) | |
|
|
||||||
|
|
||||||
**Why 443 is open but unused**: We're on Cloudflare SSL=Flexible, so
|
**Port :80 is closed** on all three nodes. CF is in Full (strict) mode
|
||||||
Cloudflare talks to origin over plain HTTP:80. Port 443 on origin is
|
and initiates every request on :443 to the origin. Cloudflare's
|
||||||
only hit by misconfigured clients (who bypass CF DNS and hit node IPs
|
"Always Use HTTPS" turns any plaintext client request into HTTPS at
|
||||||
directly). Traefik's config accepts it but we don't require it. Keeping
|
the edge, so the origin never needs to accept :80.
|
||||||
it open smooths a future switch to Full (strict) SSL mode.
|
|
||||||
|
|
||||||
**Future hardening**: Restrict 80 and 443 to Cloudflare's published IP
|
**Port :443 is restricted to Cloudflare** via 22 UFW allow rules per
|
||||||
ranges (15 IPv4 CIDRs, 7 IPv6 CIDRs). See [Chapter 13](./13-cloudflare.md)
|
node (one per CF CIDR). Direct-connect from any non-CF IP is dropped
|
||||||
for the ranges and the UFW rule format. Today they're open to anyone.
|
at the kernel. This closes the "node IP leak = bypass CF WAF/DDoS"
|
||||||
|
hole entirely. See [Chapter 13](./13-cloudflare.md#cloudflare-ip-ranges-used-in-traefik-trustedips)
|
||||||
|
for the exact ranges and UFW rule format.
|
||||||
|
|
||||||
|
**Refresh cadence**: CF updates its IP ranges rarely. A monthly
|
||||||
|
`curl https://www.cloudflare.com/ips-v4` diff and UFW re-apply is
|
||||||
|
enough. Automation TODO (Chapter 20).
|
||||||
|
|
||||||
### SSH (operator access)
|
### SSH (operator access)
|
||||||
|
|
||||||
|
|||||||
@@ -280,16 +280,22 @@ most Ingress controllers and matches how users think about URL routing.
|
|||||||
|
|
||||||
## How requests flow
|
## How requests flow
|
||||||
|
|
||||||
1. **Cloudflare DNS** resolves `api.myhoneydue.com` to one of three IPs
|
1. **Cloudflare DNS** resolves `api.myhoneydue.com` to a CF edge IP
|
||||||
(round-robin). Say it picks `178.105.32.198` (hetzner2).
|
(client never sees the three origin IPs — CF proxies).
|
||||||
2. **Cloudflare edge** establishes TCP to `178.105.32.198:80` (plain HTTP,
|
2. **Cloudflare edge** terminates TLS from the browser, then opens a
|
||||||
SSL=Flexible). Original HTTPS terminated at CF.
|
fresh TCP to one of the origin IPs on `:443` (SSL=Full (strict)).
|
||||||
3. **UFW on hetzner2** accepts the SYN (80/tcp open from anywhere).
|
Say it picks `178.105.32.198` (hetzner2).
|
||||||
4. **Linux kernel** sees a listener on 0.0.0.0:80 (the Traefik pod).
|
3. **UFW on hetzner2** accepts the SYN — the source IP is in one of
|
||||||
Hands off the SYN.
|
the 15 CF IPv4 CIDRs allowed on `:443`. (Any non-CF source IP is
|
||||||
5. **Traefik accepts** the connection. Reads the HTTP request.
|
dropped at the kernel.)
|
||||||
|
4. **Linux kernel** sees a listener on `0.0.0.0:443` (the Traefik pod,
|
||||||
|
hostNetwork). Hands off the SYN.
|
||||||
|
5. **Traefik accepts** the connection, completes the TLS handshake
|
||||||
|
using the `cloudflare-origin-cert` secret (CF Origin CA — CF
|
||||||
|
verifies this chain on its side). Reads the plaintext HTTP request.
|
||||||
6. **Traefik matches** the `Host:` header against its router table.
|
6. **Traefik matches** the `Host:` header against its router table.
|
||||||
`Host: api.myhoneydue.com` → `honeydue-api` Ingress → `api` Service.
|
`Host: api.myhoneydue.com` → `honeydue-api` Ingress → `api` Service.
|
||||||
|
Attached middlewares (`security-headers`, `rate-limit`) run here.
|
||||||
7. **Traefik dials** `10.43.167.83:8000` (api Service ClusterIP). This
|
7. **Traefik dials** `10.43.167.83:8000` (api Service ClusterIP). This
|
||||||
goes through the cluster DNS (CoreDNS) and kube-proxy (IPVS).
|
goes through the cluster DNS (CoreDNS) and kube-proxy (IPVS).
|
||||||
8. **kube-proxy IPVS** rewrites the destination to a live api pod endpoint
|
8. **kube-proxy IPVS** rewrites the destination to a live api pod endpoint
|
||||||
|
|||||||
@@ -2,9 +2,10 @@
|
|||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
Four workloads run in the `honeydue` namespace: **api** (Go REST API, 3
|
Five workloads run in the `honeydue` namespace: **api** (Go REST API, 3
|
||||||
replicas), **admin** (Next.js panel, 1 replica), **worker** (Go background
|
replicas), **admin** (Next.js admin panel, 1 replica), **web** (Next.js
|
||||||
jobs, 1 replica), and **redis** (cache + job queue, 1 replica, PVC-backed).
|
customer-facing app, 3 replicas), **worker** (Go background jobs, 1
|
||||||
|
replica), and **redis** (cache + job queue, 1 replica, PVC-backed).
|
||||||
This chapter deep-dives each: container image, resource limits, probes,
|
This chapter deep-dives each: container image, resource limits, probes,
|
||||||
volumes, and why each knob is set the way it is.
|
volumes, and why each knob is set the way it is.
|
||||||
|
|
||||||
@@ -14,10 +15,11 @@ volumes, and why each knob is set the way it is.
|
|||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
| `api` | `gitea.treytartt.com/admin/honeydue-api:<sha>` | 3 | 8000 | HTTP REST API |
|
| `api` | `gitea.treytartt.com/admin/honeydue-api:<sha>` | 3 | 8000 | HTTP REST API |
|
||||||
| `admin` | `gitea.treytartt.com/admin/honeydue-admin:<sha>` | 1 | 3000 | Next.js admin panel |
|
| `admin` | `gitea.treytartt.com/admin/honeydue-admin:<sha>` | 1 | 3000 | Next.js admin panel |
|
||||||
|
| `web` | `gitea.treytartt.com/admin/honeydue-web:<sha>` | 3 | 3000 | Next.js customer-facing web client at `app.myhoneydue.com` |
|
||||||
| `worker` | `gitea.treytartt.com/admin/honeydue-worker:<sha>` | 1 | — | Background job processor |
|
| `worker` | `gitea.treytartt.com/admin/honeydue-worker:<sha>` | 1 | — | Background job processor |
|
||||||
| `redis` | `redis:7-alpine` | 1 | 6379 | Cache + Asynq queue |
|
| `redis` | `redis:7-alpine` | 1 | 6379 | Cache + Asynq queue |
|
||||||
|
|
||||||
All four are Kubernetes `Deployment` workloads (not StatefulSets, not
|
All five are Kubernetes `Deployment` workloads (not StatefulSets, not
|
||||||
DaemonSets). They share:
|
DaemonSets). They share:
|
||||||
- ServiceAccount with `automountServiceAccountToken: false` (Chapter 5)
|
- ServiceAccount with `automountServiceAccountToken: false` (Chapter 5)
|
||||||
- `imagePullSecrets: [gitea-credentials]` (Chapter 11)
|
- `imagePullSecrets: [gitea-credentials]` (Chapter 11)
|
||||||
@@ -25,6 +27,66 @@ DaemonSets). They share:
|
|||||||
- Individual env vars wired to `honeydue-secrets` keys
|
- Individual env vars wired to `honeydue-secrets` keys
|
||||||
- Read-only root filesystem with `tmp` emptyDir mounted at `/tmp`
|
- Read-only root filesystem with `tmp` emptyDir mounted at `/tmp`
|
||||||
|
|
||||||
|
## Service — web (Next.js customer app)
|
||||||
|
|
||||||
|
### What it does
|
||||||
|
|
||||||
|
Lives at `https://app.myhoneydue.com`. Next.js 16 standalone build,
|
||||||
|
served by `node server.js` inside the container. Sibling repo:
|
||||||
|
`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-Web/`.
|
||||||
|
|
||||||
|
### Architecture: server-side proxy pattern
|
||||||
|
|
||||||
|
Unlike the admin panel (which makes CORS requests directly to
|
||||||
|
`api.myhoneydue.com`), the web app uses a proxy pattern:
|
||||||
|
|
||||||
|
```
|
||||||
|
Browser → https://app.myhoneydue.com/api/proxy/tasks/123/
|
||||||
|
→ Next.js route handler (src/app/api/proxy/[...path]/route.ts)
|
||||||
|
→ reads honeydue-token httpOnly cookie
|
||||||
|
→ attaches Authorization: Token <value>
|
||||||
|
→ https://api.myhoneydue.com/api/tasks/123/ (server-side fetch)
|
||||||
|
→ response flows back
|
||||||
|
```
|
||||||
|
|
||||||
|
**Consequences:**
|
||||||
|
- Browser never makes cross-origin requests. No CORS entry needed on
|
||||||
|
the Go API for `app.myhoneydue.com`.
|
||||||
|
- Auth tokens live in httpOnly cookies, not localStorage. XSS can't
|
||||||
|
exfiltrate them.
|
||||||
|
- The web pod needs outbound HTTPS to `api.myhoneydue.com` — covered
|
||||||
|
in the `allow-egress-from-web` NetworkPolicy (Chapter 5).
|
||||||
|
|
||||||
|
### Env vars
|
||||||
|
|
||||||
|
Build-time (baked into the client bundle by the Dockerfile `ARG`):
|
||||||
|
- `NEXT_PUBLIC_API_URL` — only used as a fallback; baked for safety
|
||||||
|
- `NEXT_PUBLIC_POSTHOG_KEY` — PostHog project API key
|
||||||
|
- `NEXT_PUBLIC_POSTHOG_HOST` — `https://analytics.88oakapps.com`
|
||||||
|
|
||||||
|
Runtime (ConfigMap):
|
||||||
|
- `API_URL=https://api.myhoneydue.com/api` — consumed by the
|
||||||
|
server-side proxy handlers
|
||||||
|
- `PORT=3000`, `HOSTNAME=0.0.0.0`
|
||||||
|
|
||||||
|
### Deployment spec highlights
|
||||||
|
|
||||||
|
- **3 replicas**, same as api — this is a production customer surface
|
||||||
|
- `topologySpreadConstraints` across `kubernetes.io/hostname` —
|
||||||
|
evicting one node at most kills one pod
|
||||||
|
- `readOnlyRootFilesystem: true`; `emptyDir`s at `/app/.next/cache`
|
||||||
|
(Next.js build cache) and `/tmp`
|
||||||
|
- PDB `web-pdb` with `minAvailable: 2`
|
||||||
|
- runAsUser/runAsGroup `1001` (matches the `nextjs` user created in
|
||||||
|
the Dockerfile)
|
||||||
|
|
||||||
|
### Why same availability as api
|
||||||
|
|
||||||
|
The web client is now the primary user-facing surface. Users hitting
|
||||||
|
`app.myhoneydue.com/login` should never see a 502 because a single
|
||||||
|
node went down. 3 replicas × `minAvailable: 2` guarantees at least
|
||||||
|
two pods stay up through any voluntary disruption.
|
||||||
|
|
||||||
## Service 1 — api (Go REST API)
|
## Service 1 — api (Go REST API)
|
||||||
|
|
||||||
### What it does
|
### What it does
|
||||||
|
|||||||
@@ -5,8 +5,9 @@
|
|||||||
Cloudflare sits in front of every public request. It provides DNS
|
Cloudflare sits in front of every public request. It provides DNS
|
||||||
(authoritative nameservers for `myhoneydue.com`), TLS termination at
|
(authoritative nameservers for `myhoneydue.com`), TLS termination at
|
||||||
the edge, DDoS mitigation, caching, and the round-robin fan-out across
|
the edge, DDoS mitigation, caching, and the round-robin fan-out across
|
||||||
our three node IPs. We use the Free plan. TLS mode is "Flexible"
|
our three node IPs. We use the Free plan. TLS mode is **Full (strict)**
|
||||||
(HTTP between CF and origin). This chapter documents every Cloudflare
|
— CF connects to origin over HTTPS and verifies the origin's cert
|
||||||
|
against CF's own Origin CA. This chapter documents every Cloudflare
|
||||||
setting that matters.
|
setting that matters.
|
||||||
|
|
||||||
## DNS
|
## DNS
|
||||||
@@ -72,53 +73,49 @@ when you want sub-second failover.
|
|||||||
|
|
||||||
## TLS
|
## TLS
|
||||||
|
|
||||||
### Mode: Flexible
|
### Mode: Full (strict)
|
||||||
|
|
||||||
CF Dashboard → SSL/TLS → Overview → **Flexible**.
|
CF Dashboard → SSL/TLS → Overview → **Full (strict)**.
|
||||||
|
|
||||||
**What this means:**
|
**What this means:**
|
||||||
- User ↔ Cloudflare: **TLS** (HTTPS)
|
- User ↔ Cloudflare: **TLS** (HTTPS) — CF serves its own Let's Encrypt cert
|
||||||
- Cloudflare ↔ Origin: **plaintext HTTP** (port 80)
|
- Cloudflare ↔ Origin: **TLS** (HTTPS :443) — origin serves our CF Origin CA cert; CF verifies it chains to CF's Origin CA root
|
||||||
|
|
||||||
**Why we chose it:**
|
**How it's wired:**
|
||||||
- No origin cert required on the Hetzner nodes
|
- k8s secret `cloudflare-origin-cert` (type `kubernetes.io/tls`) holds
|
||||||
- Zero Traefik cert-management complexity
|
`tls.crt` + `tls.key`. The cert is valid for `*.myhoneydue.com` +
|
||||||
- Fine for a site where CF terminates all user-facing TLS
|
`myhoneydue.com`, 15-year validity, issued by
|
||||||
|
`CloudFlare Origin CA SSL Certificate Authority`.
|
||||||
|
- All three `Ingress` resources in `deploy-k3s/manifests/ingress/ingress-simple.yaml`
|
||||||
|
reference the secret via `spec.tls[].secretName`.
|
||||||
|
- Traefik terminates TLS on :443 using the cert. Backend pods still
|
||||||
|
speak plain HTTP over the cluster network (Traefik → pod is an
|
||||||
|
intra-cluster hop, encrypted at the Flannel overlay layer).
|
||||||
|
|
||||||
**Downsides:**
|
**Why we chose Full (strict) over Flexible:**
|
||||||
- An attacker with network access between CF and Hetzner could read
|
- CF → origin traffic was plaintext on Flexible. Between Cloudflare's
|
||||||
traffic. Realistically: nobody between CF's POPs and Hetzner's
|
POPs and Hetzner Nuremberg is a lot of internet. Full (strict)
|
||||||
Nuremberg DC, but it's theoretically plaintext on the wire.
|
closes that gap.
|
||||||
- MitM risk if DNS gets hijacked and traffic is routed through an
|
- Origin cert is a CF-internal-only CA, so it's useless to anyone who
|
||||||
unintended origin.
|
isn't CF. Non-CF clients that somehow bypass the UFW CF-IP allowlist
|
||||||
|
can't impersonate the origin because their cert wouldn't chain to
|
||||||
|
CF's Origin CA root.
|
||||||
|
|
||||||
### Future: Full (strict)
|
**Maintenance:** the Origin CA cert is valid for 15 years (expires
|
||||||
|
Apr 2041). No action needed until then. If rotation is ever required,
|
||||||
|
regenerate in CF dashboard → SSL/TLS → Origin Server, re-run the
|
||||||
|
`kubectl create secret tls cloudflare-origin-cert --dry-run=client -o yaml | kubectl apply -f -`
|
||||||
|
command, Traefik picks it up on next secret reload (no pod restart).
|
||||||
|
|
||||||
The next step up is **Full (strict)**: CF verifies origin's TLS cert
|
### Regenerating the cert (for the record)
|
||||||
and connects over HTTPS. Cloudflare provides free **Origin CA
|
|
||||||
certificates** for this: they're issued by a CF-internal CA that only
|
|
||||||
CF's own edge accepts. An attacker without a CF-signed cert can't
|
|
||||||
impersonate our origin.
|
|
||||||
|
|
||||||
Path to enable:
|
|
||||||
1. Generate Origin CA cert in CF dashboard → SSL/TLS → Origin Server
|
|
||||||
2. Download as PEM
|
|
||||||
3. Create k8s Secret `cloudflare-origin-cert`:
|
|
||||||
```bash
|
```bash
|
||||||
kubectl create secret tls cloudflare-origin-cert -n honeydue \
|
# After downloading cf-origin-cert.pem + cf-origin-key.pem from CF dashboard:
|
||||||
--cert=origin.crt --key=origin.key
|
kubectl -n honeydue create secret tls cloudflare-origin-cert \
|
||||||
|
--cert=cf-origin-cert.pem \
|
||||||
|
--key=cf-origin-key.pem \
|
||||||
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
```
|
```
|
||||||
4. Add `tls:` block to our Ingress:
|
|
||||||
```yaml
|
|
||||||
spec:
|
|
||||||
tls:
|
|
||||||
- hosts: [api.myhoneydue.com]
|
|
||||||
secretName: cloudflare-origin-cert
|
|
||||||
```
|
|
||||||
5. Switch CF SSL mode to Full (strict)
|
|
||||||
|
|
||||||
Trad-off: the `cloudflare-origin-cert` expires (default 15 years), so
|
|
||||||
low maintenance. **TODO** (Chapter 20).
|
|
||||||
|
|
||||||
### Edge certificate
|
### Edge certificate
|
||||||
|
|
||||||
|
|||||||
@@ -19,69 +19,55 @@ minute, with Slack/email alerts on failure.
|
|||||||
**Effort**: ~30 min for Uptime Kuma deploy, ~10 min for Better Stack
|
**Effort**: ~30 min for Uptime Kuma deploy, ~10 min for Better Stack
|
||||||
signup.
|
signup.
|
||||||
|
|
||||||
### Cloudflare origin IP restriction
|
### ~~Cloudflare origin IP restriction~~ ✓ DONE (2026-04-24)
|
||||||
|
|
||||||
**Why**: UFW allows :80 from anywhere. If node IPs leak, direct-connect
|
Both `:80` and `:443` `Anywhere` rules removed on all 3 nodes. Only
|
||||||
attackers bypass CF's WAF/DDoS protection.
|
CF's 15 IPv4 + 7 IPv6 ranges allowed on `:443`. Direct-connect attempts
|
||||||
|
from non-CF IPs time out.
|
||||||
|
|
||||||
**How**: Replace the anywhere-80 UFW rule with 15 IPv4 + 7 IPv6 CF
|
**Still TODO**: monthly automated refresh of the CF IP list. Ranges
|
||||||
ranges. See [Chapter 13 §CF IP ranges](./13-cloudflare.md#cloudflare-ip-ranges-used-in-traefik-trustedips).
|
change rarely; manual re-run of `scripts/ufw-cf-refresh.sh` (not yet
|
||||||
|
written) on cadence is acceptable for now.
|
||||||
|
|
||||||
Automation: a small script that refreshes the CF IP list monthly and
|
### ~~Enable network policies in k3s~~ ✓ DONE (2026-04-24)
|
||||||
re-applies UFW rules.
|
|
||||||
|
|
||||||
**Effort**: 1 hour.
|
Applied with one scaffold correction: Traefik runs as a DaemonSet with
|
||||||
|
`hostNetwork: true`, so traffic from it arrives with the **node IP** as
|
||||||
|
source rather than a pod IP. The original scaffold used
|
||||||
|
`namespaceSelector: kube-system` which doesn't match hostNetwork
|
||||||
|
traffic. Fixed by using an `ipBlock` list of the three node IPs plus
|
||||||
|
the cluster pod CIDR `10.42.0.0/16`.
|
||||||
|
|
||||||
### Enable network policies in k3s
|
Also added policies for `web` (missing from the original scaffold).
|
||||||
|
|
||||||
**Why**: Currently pods can freely egress anywhere. A compromised pod
|
### ~~Apply Traefik security middleware~~ ✓ DONE (2026-04-24)
|
||||||
could exfiltrate data or attack lateral services.
|
|
||||||
|
|
||||||
**How**: `kubectl apply -f deploy-k3s/manifests/network-policies.yaml`.
|
`security-headers` + `rate-limit` attached to all three ingresses
|
||||||
The scaffold defines default-deny + explicit allows for:
|
(api, admin, web). `admin-auth` is defined but not attached (needs an
|
||||||
- DNS egress for all pods
|
`admin-basic-auth` secret we haven't created). `cloudflare-only` IP
|
||||||
- Traefik → api (port 8000)
|
allowlist exists but is redundant with the UFW-level CF restriction —
|
||||||
- Traefik → admin (port 3000)
|
keep for defense in depth if we ever expose another layer.
|
||||||
- api/worker → Redis
|
|
||||||
- api/worker → external services (Postgres, B2, Fastmail)
|
|
||||||
|
|
||||||
Then test that nothing breaks (might need to adjust allow rules).
|
One scaffold correction: the `Content-Security-Policy` header in
|
||||||
|
`security-headers.customResponseHeaders` was stripped. The Go API sets
|
||||||
**Effort**: 1-2 hours including testing.
|
its own CSP in `internal/router/router.go`, and two CSP headers combine
|
||||||
|
via intersection (most restrictive wins), which would break the Google
|
||||||
### Apply Traefik security middleware
|
Fonts on the marketing landing page. Next.js apps set their own via
|
||||||
|
middleware.
|
||||||
**Why**: Our current Ingress has no rate limiting or security headers
|
|
||||||
beyond what Traefik adds by default.
|
|
||||||
|
|
||||||
**How**: Apply `deploy-k3s/manifests/ingress/middleware.yaml`, annotate
|
|
||||||
Ingresses to use them:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
traefik.ingress.kubernetes.io/router.middlewares: honeydue-security-headers@kubernetescrd,honeydue-rate-limit@kubernetescrd
|
|
||||||
```
|
|
||||||
|
|
||||||
**Effort**: 15 min.
|
|
||||||
|
|
||||||
## Medium priority
|
## Medium priority
|
||||||
|
|
||||||
### Upgrade to CF Full (strict) SSL
|
### ~~Upgrade to CF Full (strict) SSL~~ ✓ DONE (2026-04-24)
|
||||||
|
|
||||||
**Why**: Currently CF↔origin is plain HTTP. An attacker between CF and
|
Origin CA cert (`*.myhoneydue.com` + `myhoneydue.com`, 15-year
|
||||||
Hetzner could read traffic. Full (strict) mode encrypts this leg with
|
validity) stored as `cloudflare-origin-cert` TLS secret. All three
|
||||||
a CF-issued origin cert.
|
ingresses reference it via `tls:` blocks. CF mode flipped from
|
||||||
|
Flexible to Full (strict). Verified by:
|
||||||
|
|
||||||
**How**:
|
- direct-connect to origin on `:443` serves the Origin cert (subject
|
||||||
1. Generate Origin CA cert in CF dashboard → SSL/TLS → Origin Server
|
`CN=CloudFlare Origin Certificate`)
|
||||||
2. Create `cloudflare-origin-cert` Secret in k8s
|
- CF edge continues to serve its own Let's Encrypt cert to browsers
|
||||||
3. Add `tls:` block to Ingresses
|
- both layers now TLS-encrypted
|
||||||
4. Switch CF SSL mode to Full (strict)
|
|
||||||
|
|
||||||
**Effort**: 30 min.
|
|
||||||
|
|
||||||
**Citations**: [Cloudflare Origin CA docs][cf-origin-ca]
|
|
||||||
|
|
||||||
### Migration Job for schema changes
|
### Migration Job for schema changes
|
||||||
|
|
||||||
@@ -312,7 +298,16 @@ k3s server on each node with the new backend.
|
|||||||
As items are done, mark them here. Think of this as a running changelog.
|
As items are done, mark them here. Think of this as a running changelog.
|
||||||
|
|
||||||
- [x] k3s migration from Swarm (2026-04-24)
|
- [x] k3s migration from Swarm (2026-04-24)
|
||||||
- [x] Traefik DaemonSet + hostNetwork
|
- [x] Traefik DaemonSet + hostNetwork (2026-04-24)
|
||||||
- [x] Admin seed via ADMIN_EMAIL + ADMIN_PASSWORD
|
- [x] Admin seed via ADMIN_EMAIL + ADMIN_PASSWORD (2026-04-24)
|
||||||
- [x] Documentation book (this doc set)
|
- [x] Documentation book (this doc set) (2026-04-24)
|
||||||
|
- [x] Web client deployed at `app.myhoneydue.com` (2026-04-24) — Next.js 16 standalone, 3 replicas with PDB, proxy pattern to api, see Chapter 7.
|
||||||
|
- [x] Admin URL-baking fix (2026-04-24) — Dockerfile `ARG NEXT_PUBLIC_API_URL`, `.dockerignore` hardening for `admin/.env.*`.
|
||||||
|
- [x] Auto-seed initial data on first API boot (2026-04-24) — `20260414_seed_initial_data` migration populates lookups, admin user, task templates. See commit `4ec4bbb`.
|
||||||
|
- [x] APNs wired up (2026-04-24) — Key ID `5L5BVF5G48`, Team ID `X86BR9WTLD`, sandbox mode. Secret `honeydue-apns-key`, `FEATURE_PUSH_ENABLED=true`.
|
||||||
|
- [x] Traefik middleware: `security-headers` + `rate-limit` attached to all three ingresses (2026-04-24). CSP is stripped from the middleware because the Go API sets its own.
|
||||||
|
- [x] Admin liveness probe path fix (2026-04-24) — was hitting `/admin/` (404) and crashlooping every ~90s for 6 hours before the bug was caught. Fixed to `/`.
|
||||||
|
- [x] Network policies applied (2026-04-24) — default-deny + explicit allows. Traefik hostNetwork is matched via node IP `ipBlock`s, not namespaceSelector. See Chapter 5.
|
||||||
|
- [x] Cloudflare Full (strict) SSL (2026-04-24) — Origin CA cert installed as `cloudflare-origin-cert` secret, ingresses have `tls:` blocks, CF mode flipped from Flexible. Both user↔CF and CF↔origin now TLS.
|
||||||
|
- [x] UFW CF-IP allowlist on all 3 nodes (2026-04-24) — 15 IPv4 + 7 IPv6 CF ranges allow `:443`; `Anywhere` rules for `:80` and `:443` deleted. Direct-connect from non-CF IPs times out.
|
||||||
- [ ] All other items above
|
- [ ] All other items above
|
||||||
|
|||||||
@@ -34,6 +34,14 @@ ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBU9xTTBD78tYUqHijgyU9PDqtmS4NuM/6uy8XgDzva+
|
|||||||
| `~/.docker/config.json` | Docker CLI config. After `docker login` to Gitea, contains creds. **Log out after each deploy** to not leave PATs on disk. |
|
| `~/.docker/config.json` | Docker CLI config. After `docker login` to Gitea, contains creds. **Log out after each deploy** to not leave PATs on disk. |
|
||||||
| `~/Library/Containers/com.docker.docker/` | Docker Desktop state (macOS). |
|
| `~/Library/Containers/com.docker.docker/` | Docker Desktop state (macOS). |
|
||||||
|
|
||||||
|
### Apple / Cloudflare credentials on disk
|
||||||
|
|
||||||
|
| Path | Purpose |
|
||||||
|
|---|---|
|
||||||
|
| `~/Desktop/code/honeyDue/AuthKey_5L5BVF5G48.p8` | APNs auth key (Apple). Source file for the `honeydue-apns-key` k8s secret. Sensitive — treat as a credential. |
|
||||||
|
| `~/Desktop/code/honeyDue/cf-origin-cert.pem` | Cloudflare Origin CA cert (PEM). Source file for the `cloudflare-origin-cert` k8s secret. `*.myhoneydue.com` + `myhoneydue.com`, expires 2041. |
|
||||||
|
| `~/Desktop/code/honeyDue/cf-origin-key.pem` | Private key for the Origin cert. CF only shows this **once** at generation time. Sensitive — treat as a credential. |
|
||||||
|
|
||||||
## Git repo (`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go/`)
|
## Git repo (`/Users/treyt/Desktop/code/honeyDue/honeyDueAPI-go/`)
|
||||||
|
|
||||||
### Top-level
|
### Top-level
|
||||||
@@ -90,19 +98,21 @@ ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBU9xTTBD78tYUqHijgyU9PDqtmS4NuM/6uy8XgDzva+
|
|||||||
| `deploy-k3s/manifests/namespace.yaml` | Creates `honeydue` namespace. |
|
| `deploy-k3s/manifests/namespace.yaml` | Creates `honeydue` namespace. |
|
||||||
| `deploy-k3s/manifests/rbac.yaml` | ServiceAccounts + `automountServiceAccountToken: false`. |
|
| `deploy-k3s/manifests/rbac.yaml` | ServiceAccounts + `automountServiceAccountToken: false`. |
|
||||||
| `deploy-k3s/manifests/pod-disruption-budgets.yaml` | PDBs for api (2/3) and worker (0/1). |
|
| `deploy-k3s/manifests/pod-disruption-budgets.yaml` | PDBs for api (2/3) and worker (0/1). |
|
||||||
| `deploy-k3s/manifests/network-policies.yaml` | Default-deny + allows. NOT currently applied. |
|
| `deploy-k3s/manifests/network-policies.yaml` | Default-deny + allows. **Applied.** Includes web policies; Traefik hostNetwork handled via node IP `ipBlock`s rather than namespaceSelector. |
|
||||||
| `deploy-k3s/manifests/api/deployment.yaml` | api Deployment. |
|
| `deploy-k3s/manifests/api/deployment.yaml` | api Deployment. |
|
||||||
| `deploy-k3s/manifests/api/service.yaml` | api ClusterIP Service. |
|
| `deploy-k3s/manifests/api/service.yaml` | api ClusterIP Service. |
|
||||||
| `deploy-k3s/manifests/api/hpa.yaml` | api HorizontalPodAutoscaler. NOT currently applied. |
|
| `deploy-k3s/manifests/api/hpa.yaml` | api HorizontalPodAutoscaler. NOT currently applied. |
|
||||||
| `deploy-k3s/manifests/admin/deployment.yaml` | admin Deployment. |
|
| `deploy-k3s/manifests/admin/deployment.yaml` | admin Deployment. |
|
||||||
| `deploy-k3s/manifests/admin/service.yaml` | admin Service. |
|
| `deploy-k3s/manifests/admin/service.yaml` | admin Service. |
|
||||||
|
| `deploy-k3s/manifests/web/deployment.yaml` | web Deployment (3 replicas, customer-facing Next.js at app.myhoneydue.com). |
|
||||||
|
| `deploy-k3s/manifests/web/service.yaml` | web ClusterIP Service. |
|
||||||
| `deploy-k3s/manifests/worker/deployment.yaml` | worker Deployment. |
|
| `deploy-k3s/manifests/worker/deployment.yaml` | worker Deployment. |
|
||||||
| `deploy-k3s/manifests/redis/deployment.yaml` | Redis Deployment. |
|
| `deploy-k3s/manifests/redis/deployment.yaml` | Redis Deployment. |
|
||||||
| `deploy-k3s/manifests/redis/service.yaml` | Redis Service. |
|
| `deploy-k3s/manifests/redis/service.yaml` | Redis Service. |
|
||||||
| `deploy-k3s/manifests/redis/pvc.yaml` | Redis PersistentVolumeClaim. |
|
| `deploy-k3s/manifests/redis/pvc.yaml` | Redis PersistentVolumeClaim. |
|
||||||
| `deploy-k3s/manifests/ingress/ingress.yaml` | Full Ingress with TLS + middleware (scaffold; needs CF origin cert). |
|
| `deploy-k3s/manifests/ingress/ingress.yaml` | Alternate full Ingress scaffold (unused; we apply ingress-simple.yaml). |
|
||||||
| `deploy-k3s/manifests/ingress/ingress-simple.yaml` | Simple Ingress without TLS (what we actually apply). |
|
| `deploy-k3s/manifests/ingress/ingress-simple.yaml` | **Primary Ingress**. TLS via CF Origin cert, `security-headers` + `rate-limit` middleware attached to all three rules (api/admin/web). |
|
||||||
| `deploy-k3s/manifests/ingress/middleware.yaml` | Traefik middleware CRDs. Not currently applied. |
|
| `deploy-k3s/manifests/ingress/middleware.yaml` | Traefik middleware CRDs (`rate-limit`, `security-headers`, `cloudflare-only`). Applied. `admin-auth` was defined but removed at runtime (needs an unset basic-auth secret). |
|
||||||
| `deploy-k3s/manifests/traefik-helmchartconfig.yaml` | Our DaemonSet + hostNetwork override for Traefik. |
|
| `deploy-k3s/manifests/traefik-helmchartconfig.yaml` | Our DaemonSet + hostNetwork override for Traefik. |
|
||||||
| `deploy-k3s/manifests/secrets.yaml.example` | Template (never deployed). |
|
| `deploy-k3s/manifests/secrets.yaml.example` | Template (never deployed). |
|
||||||
| `deploy-k3s/scripts/01-provision-cluster.sh` | hetzner-k3s provisioning (we didn't use it; existing nodes). |
|
| `deploy-k3s/scripts/01-provision-cluster.sh` | hetzner-k3s provisioning (we didn't use it; existing nodes). |
|
||||||
|
|||||||
Reference in New Issue
Block a user