feat(observability): ship pod logs to Loki via Grafana Alloy
Adds a Grafana Alloy DaemonSet that tails honeydue-namespace pod logs from /var/log/pods and pushes them to Loki at obs.88oakapps.com, reusing the existing OBS_INGEST_TOKEN (14-day retention). - deploy-k3s/manifests/observability/alloy-logs.yaml — DaemonSet + RBAC + token Secret + Alloy config. Runs as root (/var/log/pods is 0750 root:root) but otherwise locked down: all caps dropped, read-only root filesystem, seccomp RuntimeDefault, read-only hostPath mount. - network-policies.yaml — allow-egress-from-alloy-logs (DNS + k8s API + obs HTTPS), mirroring the vmagent egress policy. - 03-deploy.sh — applies alloy-logs with the OBS_INGEST_TOKEN substitution and waits for the DaemonSet rollout. The Loki container, nginx /loki/api/v1/push route, and Grafana Loki datasource live on the obs server and are not repo-managed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -372,3 +372,57 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- port: 8000
|
- port: 8000
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
|
|
||||||
|
---
|
||||||
|
# alloy-logs egress — Grafana Alloy discovers honeydue pods via the k8s API
|
||||||
|
# and pushes their logs to Loki at obs.88oakapps.com. Same k3s NetworkPolicy
|
||||||
|
# DNAT gotcha as vmagent: API-server traffic is policy-checked as
|
||||||
|
# dst=<node_public_ip>:6443, so an explicit :6443 rule is required.
|
||||||
|
# Alloy reads log FILES from a hostPath, so it needs no ingress and no
|
||||||
|
# egress to pod :8000/:8080 — only DNS, the API server, and obs HTTPS.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: allow-egress-from-alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
policyTypes:
|
||||||
|
- Egress
|
||||||
|
egress:
|
||||||
|
# DNS (cluster-internal)
|
||||||
|
- to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# k8s API server via ClusterIP (pre-DNAT view)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
# k8s API server post-DNAT (real path k3s NetPol enforcer sees) — REQUIRED
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# HTTPS to public (log push to obs.88oakapps.com via Cloudflare)
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 10.42.0.0/16
|
||||||
|
- 10.43.0.0/16
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
|||||||
@@ -0,0 +1,257 @@
|
|||||||
|
# honeyDue log shipper — Grafana Alloy as a DaemonSet.
|
||||||
|
#
|
||||||
|
# Each node runs one Alloy pod that tails the honeydue-namespace pod logs in
|
||||||
|
# /var/log/pods and pushes them to Loki at obs.88oakapps.com/loki/api/v1/push
|
||||||
|
# (the same nginx ingest endpoint + bearer token vmagent uses for metrics).
|
||||||
|
#
|
||||||
|
# Runs as root: /var/log/pods is 0750 root:root on the k3s nodes, so a
|
||||||
|
# non-root uid cannot even traverse it. The container is otherwise locked
|
||||||
|
# down — all capabilities dropped, read-only root filesystem, seccomp
|
||||||
|
# RuntimeDefault — and root inside the container reads only a read-only
|
||||||
|
# hostPath mount of /var/log/pods. This is the one root-running workload in
|
||||||
|
# the namespace (standard for log collectors); see docs/deployment.
|
||||||
|
#
|
||||||
|
# 03-deploy.sh substitutes TOKEN_PLACEHOLDER with OBS_INGEST_TOKEN from
|
||||||
|
# deploy/prod.env before applying — the token never lands in the repo.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
---
|
||||||
|
# Least privilege: Alloy's discovery.kubernetes only lists/watches pods, and
|
||||||
|
# only in the honeydue namespace — so a namespaced Role, not a ClusterRole.
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: alloy-logs
|
||||||
|
---
|
||||||
|
# Bearer token for the Loki push endpoint. TOKEN_PLACEHOLDER is replaced by
|
||||||
|
# 03-deploy.sh with OBS_INGEST_TOKEN (same token vmagent uses).
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs-auth
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
bearer_token: TOKEN_PLACEHOLDER
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
data:
|
||||||
|
config.alloy: |
|
||||||
|
// honeyDue log shipper. Each DaemonSet instance discovers honeydue-namespace
|
||||||
|
// pods via the Kubernetes API, tails the container log files present on its
|
||||||
|
// own node (/var/log/pods), and pushes them to Loki at obs.88oakapps.com.
|
||||||
|
|
||||||
|
logging {
|
||||||
|
level = "warn"
|
||||||
|
format = "logfmt"
|
||||||
|
}
|
||||||
|
|
||||||
|
discovery.kubernetes "pods" {
|
||||||
|
role = "pod"
|
||||||
|
namespaces {
|
||||||
|
names = ["honeydue"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn pod metadata into Loki labels and build the on-disk log path.
|
||||||
|
discovery.relabel "pod_logs" {
|
||||||
|
targets = discovery.kubernetes.pods.targets
|
||||||
|
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_namespace"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "namespace"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "pod"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_container_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "container"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "app"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_node_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "node"
|
||||||
|
}
|
||||||
|
// /var/log/pods/<namespace>_<pod>_<uid>/<container>/<n>.log
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
|
||||||
|
separator = "/"
|
||||||
|
action = "replace"
|
||||||
|
replacement = "/var/log/pods/*$1/*.log"
|
||||||
|
target_label = "__path__"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
local.file_match "pod_logs" {
|
||||||
|
path_targets = discovery.relabel.pod_logs.output
|
||||||
|
}
|
||||||
|
|
||||||
|
loki.source.file "pod_logs" {
|
||||||
|
targets = local.file_match.pod_logs.targets
|
||||||
|
forward_to = [loki.process.pod_logs.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the CRI log format (timestamp / stream / flags / message).
|
||||||
|
loki.process "pod_logs" {
|
||||||
|
forward_to = [loki.write.obs.receiver]
|
||||||
|
|
||||||
|
stage.cri {}
|
||||||
|
}
|
||||||
|
|
||||||
|
loki.write "obs" {
|
||||||
|
endpoint {
|
||||||
|
url = "https://obs.88oakapps.com/loki/api/v1/push"
|
||||||
|
bearer_token_file = "/etc/alloy-secrets/bearer_token"
|
||||||
|
}
|
||||||
|
external_labels = {
|
||||||
|
cluster = "honeydue-k3s",
|
||||||
|
environment = "prod",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: alloy-logs
|
||||||
|
namespace: honeydue
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alloy-logs
|
||||||
|
app.kubernetes.io/part-of: honeydue
|
||||||
|
spec:
|
||||||
|
serviceAccountName: alloy-logs
|
||||||
|
# Alloy needs its SA token — discovery.kubernetes talks to the API server.
|
||||||
|
automountServiceAccountToken: true
|
||||||
|
# Root is required to traverse /var/log/pods (0750 root:root). The
|
||||||
|
# container is otherwise fully confined (see container securityContext).
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
runAsGroup: 0
|
||||||
|
seccompProfile:
|
||||||
|
type: RuntimeDefault
|
||||||
|
tolerations:
|
||||||
|
# DaemonSet must run on every node, including any control-plane taint.
|
||||||
|
- key: node-role.kubernetes.io/control-plane
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
containers:
|
||||||
|
- name: alloy
|
||||||
|
image: grafana/alloy:v1.5.1@sha256:01a63f4e032ce54ee94b22049bc27f597e74f85566478c377f4b5c7f020c1eb3
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args:
|
||||||
|
- run
|
||||||
|
- /etc/alloy/config.alloy
|
||||||
|
- --storage.path=/tmp/alloy
|
||||||
|
- --server.http.listen-addr=0.0.0.0:12345
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 12345
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
|
volumeMounts:
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/alloy
|
||||||
|
readOnly: true
|
||||||
|
- name: auth
|
||||||
|
mountPath: /etc/alloy-secrets
|
||||||
|
readOnly: true
|
||||||
|
- name: varlogpods
|
||||||
|
mountPath: /var/log/pods
|
||||||
|
readOnly: true
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp/alloy
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/ready
|
||||||
|
port: 12345
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 20
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 150m
|
||||||
|
memory: 256Mi
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: alloy-logs
|
||||||
|
- name: auth
|
||||||
|
secret:
|
||||||
|
secretName: alloy-logs-auth
|
||||||
|
defaultMode: 0400
|
||||||
|
- name: varlogpods
|
||||||
|
hostPath:
|
||||||
|
path: /var/log/pods
|
||||||
|
type: Directory
|
||||||
|
- name: tmp
|
||||||
|
emptyDir:
|
||||||
|
sizeLimit: 256Mi
|
||||||
@@ -253,9 +253,14 @@ if [[ -d "${MANIFESTS}/observability" ]]; then
|
|||||||
# under deploy-k3s/. It's gitignored — operator copies values there once.
|
# under deploy-k3s/. It's gitignored — operator copies values there once.
|
||||||
OBS_TOKEN="$(grep -E '^OBS_INGEST_TOKEN=' "${REPO_DIR}/deploy/prod.env" 2>/dev/null | cut -d= -f2- || true)"
|
OBS_TOKEN="$(grep -E '^OBS_INGEST_TOKEN=' "${REPO_DIR}/deploy/prod.env" 2>/dev/null | cut -d= -f2- || true)"
|
||||||
if [[ -z "${OBS_TOKEN}" ]]; then
|
if [[ -z "${OBS_TOKEN}" ]]; then
|
||||||
warn "OBS_INGEST_TOKEN not found in deploy/prod.env — skipping vmagent apply"
|
warn "OBS_INGEST_TOKEN not found in deploy/prod.env — skipping vmagent + alloy-logs apply"
|
||||||
else
|
else
|
||||||
sed "s|TOKEN_PLACEHOLDER|${OBS_TOKEN}|" "${MANIFESTS}/observability/vmagent.yaml" | kubectl apply -f -
|
sed "s|TOKEN_PLACEHOLDER|${OBS_TOKEN}|" "${MANIFESTS}/observability/vmagent.yaml" | kubectl apply -f -
|
||||||
|
# alloy-logs — DaemonSet that tails honeydue pod logs and pushes them to
|
||||||
|
# Loki at obs.88oakapps.com. Same OBS_INGEST_TOKEN as vmagent.
|
||||||
|
if [[ -f "${MANIFESTS}/observability/alloy-logs.yaml" ]]; then
|
||||||
|
sed "s|TOKEN_PLACEHOLDER|${OBS_TOKEN}|" "${MANIFESTS}/observability/alloy-logs.yaml" | kubectl apply -f -
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -273,6 +278,9 @@ fi
|
|||||||
if kubectl -n "${NAMESPACE}" get deployment vmagent >/dev/null 2>&1; then
|
if kubectl -n "${NAMESPACE}" get deployment vmagent >/dev/null 2>&1; then
|
||||||
kubectl rollout status deployment/vmagent -n "${NAMESPACE}" --timeout=120s
|
kubectl rollout status deployment/vmagent -n "${NAMESPACE}" --timeout=120s
|
||||||
fi
|
fi
|
||||||
|
if kubectl -n "${NAMESPACE}" get daemonset alloy-logs >/dev/null 2>&1; then
|
||||||
|
kubectl rollout status daemonset/alloy-logs -n "${NAMESPACE}" --timeout=120s
|
||||||
|
fi
|
||||||
|
|
||||||
# --- Done ---
|
# --- Done ---
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user