139a990ebc
vmagent's k8s service discovery has been silently broken for 17+ days
because k3s's NetworkPolicy controller evaluates egress AFTER kube-proxy's
DNAT (contrary to the k8s spec). Pod → ClusterIP 10.43.0.1:443 was
DNAT'd to <node_public_ip>:6443, and the resulting :6443 destination
matched none of vmagent's egress rules → TCP RST → "connection refused"
on every SD watch attempt. Grafana panels using kube_* or up{} metrics
returned empty as a result.
Changes:
- network-policies.yaml: commit the previously-cluster-only NetPols
(allow-egress-from-vmagent, allow-vmagent-to-api) so a fresh deploy
produces a working cluster. The vmagent egress rule now includes :6443
to public IPs (the post-DNAT path) and :8080 to the pod CIDR (for
scraping kube-state-metrics).
- observability/kube-state-metrics.yaml: new manifest. Provides the
kube_pod_*, kube_deployment_*, kube_service_* metrics that Grafana
panels need to count pods, replicas, etc. Runs in kube-system with
cluster-scoped RBAC.
- observability/vmagent.yaml:
* add kube-state-metrics scrape job to the ConfigMap
* add vmagent-kube-system Role+RoleBinding so cross-namespace SD works
* replace the misleading liveness probe (was /-/healthy, which lies
while SD is broken) with an exec probe that checks /api/v1/targets
for at least one healthy target — automatic recovery from future
stale-SD incidents
- scripts/03-deploy.sh: actually apply network-policies.yaml (was
committed but never applied) and apply kube-state-metrics.yaml.
- RUNBOOK.md (new): documents the post-DNAT gotcha, the liveness probe
trap, bearer-token recovery procedure, drift-detection diff, and a
post-redeploy verification checklist.
- .gitignore: cover kubeconfig.tunnel (created during SSH-tunnelled
kubectl sessions) so admin client cert can't be committed by accident.
Verified via kubectl --dry-run on all three modified manifests.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
224 lines
5.6 KiB
YAML
224 lines
5.6 KiB
YAML
# kube-state-metrics — exposes cluster object state (pods, deployments,
|
|
# services, etc.) as Prometheus metrics. vmagent scrapes it via the api
|
|
# group defined in vmagent-config; Grafana panels that count pods,
|
|
# replicas, etc. consume the `kube_*` metrics this produces.
|
|
#
|
|
# Lives in kube-system because it watches resources cluster-wide.
|
|
# RBAC is cluster-scoped (ClusterRole + ClusterRoleBinding).
|
|
#
|
|
# Image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.13.0
|
|
# (latest stable as of authoring; bump when a newer minor is released)
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: kube-system
|
|
labels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
app.kubernetes.io/part-of: honeydue-observability
|
|
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: kube-state-metrics
|
|
labels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
app.kubernetes.io/part-of: honeydue-observability
|
|
rules:
|
|
# Core resources
|
|
- apiGroups: [""]
|
|
resources:
|
|
- configmaps
|
|
- secrets
|
|
- nodes
|
|
- pods
|
|
- services
|
|
- serviceaccounts
|
|
- resourcequotas
|
|
- replicationcontrollers
|
|
- limitranges
|
|
- persistentvolumeclaims
|
|
- persistentvolumes
|
|
- namespaces
|
|
- endpoints
|
|
verbs: [list, watch]
|
|
# Apps
|
|
- apiGroups: ["apps"]
|
|
resources:
|
|
- statefulsets
|
|
- daemonsets
|
|
- deployments
|
|
- replicasets
|
|
verbs: [list, watch]
|
|
# Batch
|
|
- apiGroups: ["batch"]
|
|
resources:
|
|
- cronjobs
|
|
- jobs
|
|
verbs: [list, watch]
|
|
# Autoscaling
|
|
- apiGroups: ["autoscaling"]
|
|
resources:
|
|
- horizontalpodautoscalers
|
|
verbs: [list, watch]
|
|
# Authentication / authorization (used by some ksm collectors)
|
|
- apiGroups: ["authentication.k8s.io"]
|
|
resources: [tokenreviews]
|
|
verbs: [create]
|
|
- apiGroups: ["authorization.k8s.io"]
|
|
resources: [subjectaccessreviews]
|
|
verbs: [create]
|
|
# Policy
|
|
- apiGroups: ["policy"]
|
|
resources: [poddisruptionbudgets]
|
|
verbs: [list, watch]
|
|
# Certificate signing
|
|
- apiGroups: ["certificates.k8s.io"]
|
|
resources: [certificatesigningrequests]
|
|
verbs: [list, watch]
|
|
# Discovery
|
|
- apiGroups: ["discovery.k8s.io"]
|
|
resources: [endpointslices]
|
|
verbs: [list, watch]
|
|
# Storage
|
|
- apiGroups: ["storage.k8s.io"]
|
|
resources:
|
|
- storageclasses
|
|
- volumeattachments
|
|
verbs: [list, watch]
|
|
# Admission policy
|
|
- apiGroups: ["admissionregistration.k8s.io"]
|
|
resources:
|
|
- mutatingwebhookconfigurations
|
|
- validatingwebhookconfigurations
|
|
verbs: [list, watch]
|
|
# Networking
|
|
- apiGroups: ["networking.k8s.io"]
|
|
resources:
|
|
- networkpolicies
|
|
- ingressclasses
|
|
- ingresses
|
|
verbs: [list, watch]
|
|
# Coordination (leader election)
|
|
- apiGroups: ["coordination.k8s.io"]
|
|
resources: [leases]
|
|
verbs: [list, watch]
|
|
# RBAC
|
|
- apiGroups: ["rbac.authorization.k8s.io"]
|
|
resources:
|
|
- clusterrolebindings
|
|
- clusterroles
|
|
- rolebindings
|
|
- roles
|
|
verbs: [list, watch]
|
|
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: kube-state-metrics
|
|
labels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
app.kubernetes.io/part-of: honeydue-observability
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: kube-state-metrics
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: kube-state-metrics
|
|
namespace: kube-system
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: kube-system
|
|
labels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
app.kubernetes.io/part-of: honeydue-observability
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
ports:
|
|
- name: http-metrics
|
|
port: 8080
|
|
targetPort: http-metrics
|
|
protocol: TCP
|
|
- name: telemetry
|
|
port: 8081
|
|
targetPort: telemetry
|
|
protocol: TCP
|
|
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: kube-system
|
|
labels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
app.kubernetes.io/part-of: honeydue-observability
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
app.kubernetes.io/part-of: honeydue-observability
|
|
spec:
|
|
serviceAccountName: kube-state-metrics
|
|
automountServiceAccountToken: true
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 65534
|
|
fsGroup: 65534
|
|
seccompProfile:
|
|
type: RuntimeDefault
|
|
containers:
|
|
- name: kube-state-metrics
|
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.13.0
|
|
imagePullPolicy: IfNotPresent
|
|
ports:
|
|
- containerPort: 8080
|
|
name: http-metrics
|
|
- containerPort: 8081
|
|
name: telemetry
|
|
args:
|
|
- --port=8080
|
|
- --telemetry-port=8081
|
|
resources:
|
|
requests:
|
|
cpu: 25m
|
|
memory: 64Mi
|
|
limits:
|
|
cpu: 200m
|
|
memory: 256Mi
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
capabilities:
|
|
drop: [ALL]
|
|
readOnlyRootFilesystem: true
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /livez
|
|
port: http-metrics
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 30
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /readyz
|
|
port: http-metrics
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|