From 93fddc376968d26b3b22e1a218212b669003c9cf Mon Sep 17 00:00:00 2001 From: Trey t Date: Sun, 17 May 2026 20:04:09 -0500 Subject: [PATCH] feat(observability): ship pod logs to Loki via Grafana Alloy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a Grafana Alloy DaemonSet that tails honeydue-namespace pod logs from /var/log/pods and pushes them to Loki at obs.88oakapps.com, reusing the existing OBS_INGEST_TOKEN (14-day retention). - deploy-k3s/manifests/observability/alloy-logs.yaml — DaemonSet + RBAC + token Secret + Alloy config. Runs as root (/var/log/pods is 0750 root:root) but otherwise locked down: all caps dropped, read-only root filesystem, seccomp RuntimeDefault, read-only hostPath mount. - network-policies.yaml — allow-egress-from-alloy-logs (DNS + k8s API + obs HTTPS), mirroring the vmagent egress policy. - 03-deploy.sh — applies alloy-logs with the OBS_INGEST_TOKEN substitution and waits for the DaemonSet rollout. The Loki container, nginx /loki/api/v1/push route, and Grafana Loki datasource live on the obs server and are not repo-managed. Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy-k3s/manifests/network-policies.yaml | 54 ++++ .../manifests/observability/alloy-logs.yaml | 257 ++++++++++++++++++ deploy-k3s/scripts/03-deploy.sh | 10 +- 3 files changed, 320 insertions(+), 1 deletion(-) create mode 100644 deploy-k3s/manifests/observability/alloy-logs.yaml diff --git a/deploy-k3s/manifests/network-policies.yaml b/deploy-k3s/manifests/network-policies.yaml index 5bb59e1..41c571f 100644 --- a/deploy-k3s/manifests/network-policies.yaml +++ b/deploy-k3s/manifests/network-policies.yaml @@ -372,3 +372,57 @@ spec: ports: - port: 8000 protocol: TCP + +--- +# alloy-logs egress — Grafana Alloy discovers honeydue pods via the k8s API +# and pushes their logs to Loki at obs.88oakapps.com. Same k3s NetworkPolicy +# DNAT gotcha as vmagent: API-server traffic is policy-checked as +# dst=:6443, so an explicit :6443 rule is required. +# Alloy reads log FILES from a hostPath, so it needs no ingress and no +# egress to pod :8000/:8080 — only DNS, the API server, and obs HTTPS. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-egress-from-alloy-logs + namespace: honeydue +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: alloy-logs + policyTypes: + - Egress + egress: + # DNS (cluster-internal) + - to: + - namespaceSelector: {} + ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + # k8s API server via ClusterIP (pre-DNAT view) + - to: + - ipBlock: + cidr: 10.43.0.0/16 + ports: + - port: 443 + protocol: TCP + # k8s API server post-DNAT (real path k3s NetPol enforcer sees) — REQUIRED + - to: + - ipBlock: + cidr: 0.0.0.0/0 + except: + - 10.42.0.0/16 + ports: + - port: 6443 + protocol: TCP + # HTTPS to public (log push to obs.88oakapps.com via Cloudflare) + - to: + - ipBlock: + cidr: 0.0.0.0/0 + except: + - 10.42.0.0/16 + - 10.43.0.0/16 + ports: + - port: 443 + protocol: TCP diff --git a/deploy-k3s/manifests/observability/alloy-logs.yaml b/deploy-k3s/manifests/observability/alloy-logs.yaml new file mode 100644 index 0000000..09ec02f --- /dev/null +++ b/deploy-k3s/manifests/observability/alloy-logs.yaml @@ -0,0 +1,257 @@ +# honeyDue log shipper — Grafana Alloy as a DaemonSet. +# +# Each node runs one Alloy pod that tails the honeydue-namespace pod logs in +# /var/log/pods and pushes them to Loki at obs.88oakapps.com/loki/api/v1/push +# (the same nginx ingest endpoint + bearer token vmagent uses for metrics). +# +# Runs as root: /var/log/pods is 0750 root:root on the k3s nodes, so a +# non-root uid cannot even traverse it. The container is otherwise locked +# down — all capabilities dropped, read-only root filesystem, seccomp +# RuntimeDefault — and root inside the container reads only a read-only +# hostPath mount of /var/log/pods. This is the one root-running workload in +# the namespace (standard for log collectors); see docs/deployment. +# +# 03-deploy.sh substitutes TOKEN_PLACEHOLDER with OBS_INGEST_TOKEN from +# deploy/prod.env before applying — the token never lands in the repo. +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: alloy-logs + namespace: honeydue + labels: + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/part-of: honeydue +--- +# Least privilege: Alloy's discovery.kubernetes only lists/watches pods, and +# only in the honeydue namespace — so a namespaced Role, not a ClusterRole. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: alloy-logs + namespace: honeydue + labels: + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/part-of: honeydue +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: alloy-logs + namespace: honeydue + labels: + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/part-of: honeydue +subjects: + - kind: ServiceAccount + name: alloy-logs + namespace: honeydue +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: alloy-logs +--- +# Bearer token for the Loki push endpoint. TOKEN_PLACEHOLDER is replaced by +# 03-deploy.sh with OBS_INGEST_TOKEN (same token vmagent uses). +apiVersion: v1 +kind: Secret +metadata: + name: alloy-logs-auth + namespace: honeydue + labels: + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/part-of: honeydue +type: Opaque +stringData: + bearer_token: TOKEN_PLACEHOLDER +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-logs + namespace: honeydue + labels: + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/part-of: honeydue +data: + config.alloy: | + // honeyDue log shipper. Each DaemonSet instance discovers honeydue-namespace + // pods via the Kubernetes API, tails the container log files present on its + // own node (/var/log/pods), and pushes them to Loki at obs.88oakapps.com. + + logging { + level = "warn" + format = "logfmt" + } + + discovery.kubernetes "pods" { + role = "pod" + namespaces { + names = ["honeydue"] + } + } + + // Turn pod metadata into Loki labels and build the on-disk log path. + discovery.relabel "pod_logs" { + targets = discovery.kubernetes.pods.targets + + rule { + source_labels = ["__meta_kubernetes_namespace"] + action = "replace" + target_label = "namespace" + } + rule { + source_labels = ["__meta_kubernetes_pod_name"] + action = "replace" + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + action = "replace" + target_label = "container" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + action = "replace" + target_label = "app" + } + rule { + source_labels = ["__meta_kubernetes_pod_node_name"] + action = "replace" + target_label = "node" + } + // /var/log/pods/__//.log + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "/var/log/pods/*$1/*.log" + target_label = "__path__" + } + } + + local.file_match "pod_logs" { + path_targets = discovery.relabel.pod_logs.output + } + + loki.source.file "pod_logs" { + targets = local.file_match.pod_logs.targets + forward_to = [loki.process.pod_logs.receiver] + } + + // Parse the CRI log format (timestamp / stream / flags / message). + loki.process "pod_logs" { + forward_to = [loki.write.obs.receiver] + + stage.cri {} + } + + loki.write "obs" { + endpoint { + url = "https://obs.88oakapps.com/loki/api/v1/push" + bearer_token_file = "/etc/alloy-secrets/bearer_token" + } + external_labels = { + cluster = "honeydue-k3s", + environment = "prod", + } + } +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: alloy-logs + namespace: honeydue + labels: + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/part-of: honeydue +spec: + selector: + matchLabels: + app.kubernetes.io/name: alloy-logs + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + app.kubernetes.io/name: alloy-logs + app.kubernetes.io/part-of: honeydue + spec: + serviceAccountName: alloy-logs + # Alloy needs its SA token — discovery.kubernetes talks to the API server. + automountServiceAccountToken: true + # Root is required to traverse /var/log/pods (0750 root:root). The + # container is otherwise fully confined (see container securityContext). + securityContext: + runAsUser: 0 + runAsGroup: 0 + seccompProfile: + type: RuntimeDefault + tolerations: + # DaemonSet must run on every node, including any control-plane taint. + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + containers: + - name: alloy + image: grafana/alloy:v1.5.1@sha256:01a63f4e032ce54ee94b22049bc27f597e74f85566478c377f4b5c7f020c1eb3 + imagePullPolicy: IfNotPresent + args: + - run + - /etc/alloy/config.alloy + - --storage.path=/tmp/alloy + - --server.http.listen-addr=0.0.0.0:12345 + ports: + - name: http + containerPort: 12345 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + volumeMounts: + - name: config + mountPath: /etc/alloy + readOnly: true + - name: auth + mountPath: /etc/alloy-secrets + readOnly: true + - name: varlogpods + mountPath: /var/log/pods + readOnly: true + - name: tmp + mountPath: /tmp/alloy + readinessProbe: + httpGet: + path: /-/ready + port: 12345 + initialDelaySeconds: 10 + periodSeconds: 20 + resources: + requests: + cpu: 25m + memory: 64Mi + limits: + cpu: 150m + memory: 256Mi + volumes: + - name: config + configMap: + name: alloy-logs + - name: auth + secret: + secretName: alloy-logs-auth + defaultMode: 0400 + - name: varlogpods + hostPath: + path: /var/log/pods + type: Directory + - name: tmp + emptyDir: + sizeLimit: 256Mi diff --git a/deploy-k3s/scripts/03-deploy.sh b/deploy-k3s/scripts/03-deploy.sh index 9942b5b..e49dbfc 100755 --- a/deploy-k3s/scripts/03-deploy.sh +++ b/deploy-k3s/scripts/03-deploy.sh @@ -253,9 +253,14 @@ if [[ -d "${MANIFESTS}/observability" ]]; then # under deploy-k3s/. It's gitignored — operator copies values there once. OBS_TOKEN="$(grep -E '^OBS_INGEST_TOKEN=' "${REPO_DIR}/deploy/prod.env" 2>/dev/null | cut -d= -f2- || true)" if [[ -z "${OBS_TOKEN}" ]]; then - warn "OBS_INGEST_TOKEN not found in deploy/prod.env — skipping vmagent apply" + warn "OBS_INGEST_TOKEN not found in deploy/prod.env — skipping vmagent + alloy-logs apply" else sed "s|TOKEN_PLACEHOLDER|${OBS_TOKEN}|" "${MANIFESTS}/observability/vmagent.yaml" | kubectl apply -f - + # alloy-logs — DaemonSet that tails honeydue pod logs and pushes them to + # Loki at obs.88oakapps.com. Same OBS_INGEST_TOKEN as vmagent. + if [[ -f "${MANIFESTS}/observability/alloy-logs.yaml" ]]; then + sed "s|TOKEN_PLACEHOLDER|${OBS_TOKEN}|" "${MANIFESTS}/observability/alloy-logs.yaml" | kubectl apply -f - + fi fi fi @@ -273,6 +278,9 @@ fi if kubectl -n "${NAMESPACE}" get deployment vmagent >/dev/null 2>&1; then kubectl rollout status deployment/vmagent -n "${NAMESPACE}" --timeout=120s fi +if kubectl -n "${NAMESPACE}" get daemonset alloy-logs >/dev/null 2>&1; then + kubectl rollout status daemonset/alloy-logs -n "${NAMESPACE}" --timeout=120s +fi # --- Done ---