c77ff07ce9
Remediation of the 2026-05-12/13 audits (78 findings + cluster gaps), tracked in deploy-k3s/SECURITY.md, plus fixes from two independent post-remediation reviews. Auth & sessions: - SHA-256 hashed auth-token storage (C1); prior-token cache eviction on re-login (MEDIUM-1) - local Google JWKS verification, iss/aud/exp checks (C2/C3) - constant-time login + generic errors (L1/LIVE-L11/LIVE-L13) - per-account login lockout keyed on distinct source IPs (M5/MEDIUM-3) - verified-email gating, login rate limiting (LIVE-L19, H1-H3) IAP & webhooks: - Apple/Google cross-account replay protection (C5/C6/C10/C13, H5/H6) - migrations 000003-000006 (token hashing, IAP replay, audit_log + webhook_event_log table creation, append-only audit log) Authorization & races: - file-ownership owner-OR-member fix (C7), atomic share-code join (C9/H9), device-token reassignment (C8/LOW-3) Secrets & deploy: - secrets file-mounted at /etc/honeydue/secrets, not env (F8); Redis password out of the ConfigMap (HIGH-1); B2 keys reconciled - digest-pinned images, admin ingress hardening, CSP/HSTS, /metrics lockdown; kubeconfig 0600, etcd secrets-encryption, fail2ban + unattended-upgrades at provision; secret-rotation runbook Build, vet, and the full test suite (incl. -race) pass; the goose migration chain is verified against PostgreSQL 16. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
262 lines
7.8 KiB
YAML
262 lines
7.8 KiB
YAML
# vmagent — scrapes Prometheus /metrics from in-cluster services and
|
|
# remote-writes them to https://obs.88oakapps.com/api/v1/write
|
|
# (VictoriaMetrics on 88oakappsUpdate, fronted by Cloudflare + nginx
|
|
# bearer-token auth). Single replica is fine — vmagent buffers locally
|
|
# during transient remote outages.
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: vmagent-config
|
|
namespace: honeydue
|
|
labels:
|
|
app.kubernetes.io/name: vmagent
|
|
app.kubernetes.io/part-of: honeydue
|
|
data:
|
|
scrape.yaml: |
|
|
global:
|
|
scrape_interval: 15s
|
|
external_labels:
|
|
cluster: honeydue-k3s
|
|
environment: prod
|
|
|
|
scrape_configs:
|
|
# honeyDue Go API — exposes /metrics on :8000
|
|
- job_name: api
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
namespaces:
|
|
names: [honeydue]
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
|
action: keep
|
|
regex: api
|
|
- source_labels: [__meta_kubernetes_pod_container_port_number]
|
|
action: keep
|
|
regex: "8000"
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
target_label: pod
|
|
- source_labels: [__meta_kubernetes_pod_node_name]
|
|
target_label: node
|
|
- target_label: service
|
|
replacement: api
|
|
|
|
# kube-state-metrics — cluster object state (kube_pod_*, kube_deployment_*,
|
|
# etc.) needed for Grafana panels that count pods/replicas/etc.
|
|
- job_name: kube-state-metrics
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
namespaces:
|
|
names: [kube-system]
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name]
|
|
action: keep
|
|
regex: kube-state-metrics
|
|
- source_labels: [__meta_kubernetes_endpoint_port_name]
|
|
action: keep
|
|
regex: http-metrics
|
|
|
|
# honeyDue worker — also exposes /metrics if/when we add it.
|
|
# Keep this stanza commented until the worker has a /metrics endpoint;
|
|
# uncommented form drops scrapes silently.
|
|
# - job_name: worker
|
|
# kubernetes_sd_configs:
|
|
# - role: pod
|
|
# namespaces:
|
|
# names: [honeydue]
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
|
# action: keep
|
|
# regex: worker
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: vmagent-remote-write
|
|
namespace: honeydue
|
|
labels:
|
|
app.kubernetes.io/name: vmagent
|
|
app.kubernetes.io/part-of: honeydue
|
|
type: Opaque
|
|
stringData:
|
|
# Bearer token for obs.88oakapps.com. Provisioned at deploy time from
|
|
# deploy/prod.env (OBS_INGEST_TOKEN). The cluster-side token must match
|
|
# the token in /etc/honeydue-obs/ingest_token on 88oakappsUpdate.
|
|
bearer_token: TOKEN_PLACEHOLDER
|
|
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: Role
|
|
metadata:
|
|
name: vmagent
|
|
namespace: honeydue
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources: [pods, services, endpoints]
|
|
verbs: [get, list, watch]
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: vmagent
|
|
namespace: honeydue
|
|
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: RoleBinding
|
|
metadata:
|
|
name: vmagent
|
|
namespace: honeydue
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: vmagent
|
|
namespace: honeydue
|
|
roleRef:
|
|
kind: Role
|
|
name: vmagent
|
|
apiGroup: rbac.authorization.k8s.io
|
|
|
|
---
|
|
# Allow vmagent to discover the kube-state-metrics Service/Endpoints in
|
|
# kube-system so the kube-state-metrics scrape job can find its target.
|
|
# Cross-namespace SD needs an explicit RoleBinding here.
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: Role
|
|
metadata:
|
|
name: vmagent-kube-system
|
|
namespace: kube-system
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources: [services, endpoints, pods]
|
|
verbs: [get, list, watch]
|
|
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: RoleBinding
|
|
metadata:
|
|
name: vmagent-kube-system
|
|
namespace: kube-system
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: vmagent
|
|
namespace: honeydue
|
|
roleRef:
|
|
kind: Role
|
|
name: vmagent-kube-system
|
|
apiGroup: rbac.authorization.k8s.io
|
|
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: vmagent
|
|
namespace: honeydue
|
|
labels:
|
|
app.kubernetes.io/name: vmagent
|
|
app.kubernetes.io/part-of: honeydue
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: vmagent
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: vmagent
|
|
app.kubernetes.io/part-of: honeydue
|
|
spec:
|
|
serviceAccountName: vmagent
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
fsGroup: 1000
|
|
seccompProfile:
|
|
type: RuntimeDefault
|
|
containers:
|
|
- name: vmagent
|
|
# Pinned by digest (audit K3S-F14).
|
|
image: victoriametrics/vmagent:v1.106.1@sha256:90208a667c0baf65f7536b92a84c40b6e35ffe8e88bda7e4447b97b06c6ba6b8
|
|
imagePullPolicy: IfNotPresent # audit CODE-L4 — explicit
|
|
# Container-level hardening (audit F7) — matches the other 5
|
|
# workloads. vmagent only writes to the /tmp/vmagent emptyDir
|
|
# (its remoteWrite buffer), so a read-only root filesystem holds.
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
capabilities:
|
|
drop: ["ALL"]
|
|
args:
|
|
- "-promscrape.config=/etc/vmagent/scrape.yaml"
|
|
- "-remoteWrite.url=https://obs.88oakapps.com/api/v1/write"
|
|
- "-remoteWrite.bearerTokenFile=/etc/vmagent-secrets/bearer_token"
|
|
- "-remoteWrite.tmpDataPath=/tmp/vmagent"
|
|
- "-remoteWrite.maxDiskUsagePerURL=512MB"
|
|
- "-loggerLevel=INFO"
|
|
ports:
|
|
- containerPort: 8429
|
|
name: http
|
|
resources:
|
|
requests:
|
|
cpu: 25m
|
|
memory: 64Mi
|
|
limits:
|
|
cpu: 200m
|
|
memory: 256Mi
|
|
volumeMounts:
|
|
- name: config
|
|
mountPath: /etc/vmagent
|
|
readOnly: true
|
|
- name: secrets
|
|
mountPath: /etc/vmagent-secrets
|
|
readOnly: true
|
|
- name: buffer
|
|
mountPath: /tmp/vmagent
|
|
# Process startup gate. /-/healthy returns 200 once vmagent has
|
|
# parsed config — gives the agent up to 2 min to come up before
|
|
# liveness starts evaluating.
|
|
startupProbe:
|
|
httpGet:
|
|
path: /-/healthy
|
|
port: http
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
failureThreshold: 24
|
|
# Real liveness check: are scrapes actually succeeding?
|
|
# /-/healthy was the old probe and returned 200 for 17 days even
|
|
# while vmagent had zero healthy targets (stale k8s SD watch).
|
|
# This exec probe queries vmagent's own targets API and fails if
|
|
# NO target is in state "up". Three consecutive failures (3 min)
|
|
# → kubelet kills the pod → fresh SD watch.
|
|
livenessProbe:
|
|
exec:
|
|
command:
|
|
- sh
|
|
- -c
|
|
- 'n=$(wget -qO- -T 4 http://localhost:8429/api/v1/targets 2>/dev/null | grep -c ''"health":"up"''); [ "$n" -gt 0 ]'
|
|
initialDelaySeconds: 180
|
|
periodSeconds: 120
|
|
timeoutSeconds: 5
|
|
failureThreshold: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /-/healthy
|
|
port: http
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
volumes:
|
|
- name: config
|
|
configMap:
|
|
name: vmagent-config
|
|
- name: secrets
|
|
secret:
|
|
secretName: vmagent-remote-write
|
|
defaultMode: 0400
|
|
- name: buffer
|
|
emptyDir:
|
|
sizeLimit: 512Mi
|