# Appendix B — kubectl Cheat Sheet Specific to this deployment. Assumes: ```bash export KUBECONFIG=~/.kube/honeydue-k3s.yaml ``` ## Viewing state ```bash # All pods in our namespace kubectl get pods -n honeydue # With node placement + IPs kubectl get pods -n honeydue -o wide # All resources in our namespace kubectl get all -n honeydue # Cluster-wide pod overview kubectl get pods -A # Node health kubectl get nodes kubectl top nodes # What's using RAM kubectl top pods -n honeydue --sort-by=memory # What's using CPU kubectl top pods -n honeydue --sort-by=cpu ``` ## Logs ```bash # Follow all api pod logs kubectl logs -n honeydue -l app.kubernetes.io/name=api -f --prefix # One specific pod kubectl logs -n honeydue # Previous pod's logs (after crash) kubectl logs -n honeydue --previous # Filtered kubectl logs -n honeydue deploy/api | grep -i error kubectl logs -n honeydue deploy/api --since=1h # stern is nicer for multi-pod (if installed) stern -n honeydue api ``` ## Deploying new code ```bash SHA=$(git rev-parse --short HEAD) # Build + push (requires docker login to Gitea first) docker buildx build --platform linux/amd64 --target api \ -t "gitea.treytartt.com/admin/honeydue-api:${SHA}" --push . # Roll it in kubectl set image deployment/api -n honeydue \ api="gitea.treytartt.com/admin/honeydue-api:${SHA}" # Watch kubectl rollout status -n honeydue deployment/api ``` ## Rolling update controls ```bash # Pause a rollout in progress (new pods stop being created) kubectl rollout pause deployment/api -n honeydue # Resume kubectl rollout resume deployment/api -n honeydue # Rollback to previous version kubectl rollout undo deployment/api -n honeydue # Rollback to specific revision kubectl rollout history deployment/api -n honeydue kubectl rollout undo deployment/api -n honeydue --to-revision=3 # Force restart (re-pulls image if digest changed; reloads ConfigMap) kubectl rollout restart deployment/api -n honeydue ``` ## Scaling ```bash # Scale up kubectl scale deployment/api -n honeydue --replicas=5 # Scale down kubectl scale deployment/api -n honeydue --replicas=3 # Kill everything (emergency) kubectl scale deployment -n honeydue --all --replicas=0 # Bring back kubectl scale deployment/api -n honeydue --replicas=3 kubectl scale deployment/admin deployment/worker deployment/redis -n honeydue --replicas=1 ``` ## Debugging a pod ```bash # Describe = events + state + restart history kubectl describe pod -n honeydue # Shell in kubectl exec -it -n honeydue deploy/api -- /bin/sh # Inside: # Test HTTP locally (bypasses Traefik, Service, overlay) wget -qO- http://127.0.0.1:8000/api/health/ # Test cross-Service DNS getent hosts redis getent hosts admin getent hosts postgres # Run arbitrary command (one-shot) kubectl exec -n honeydue deploy/api -- env | grep POSTGRES ``` ## Networking checks ```bash # Resolve a Service from a pod kubectl exec -n honeydue deploy/api -- nslookup redis # Check Service endpoints (the actual IPs behind a ClusterIP) kubectl get endpoints -n honeydue api # Traffic test via Service kubectl run test --rm -it --image=alpine/curl -- sh # curl http://api.honeydue.svc:8000/api/health/ # List all Ingresses kubectl get ingress -A ``` ## Secret / Config ```bash # List kubectl get secrets -n honeydue kubectl get configmap -n honeydue # Describe (shows keys, not values) kubectl describe secret honeydue-secrets -n honeydue # Read a value (DANGER: plaintext to stdout) kubectl get secret honeydue-secrets -n honeydue \ -o jsonpath='{.data.POSTGRES_PASSWORD}' | base64 -d; echo # Update a single secret key kubectl patch secret honeydue-secrets -n honeydue \ --type=merge -p "{\"data\":{\"SECRET_KEY\":\"$(echo -n 'new-val' | base64)\"}}" # Regenerate ConfigMap from prod.env kubectl create configmap honeydue-config -n honeydue \ --from-env-file=deploy/prod.env \ --dry-run=client -o yaml | kubectl apply -f - # Edit a ConfigMap interactively (does NOT restart pods) kubectl edit configmap honeydue-config -n honeydue ``` ## Node management ```bash # Prevent scheduling on a node kubectl cordon # Prevent scheduling + evict existing pods kubectl drain --ignore-daemonsets --delete-emptydir-data # Allow scheduling again kubectl uncordon # Label a node kubectl label node honeydue/redis=true --overwrite # Remove a label kubectl label node honeydue/redis- ``` ## Events (the timeline) ```bash # All events, newest last kubectl get events -A --sort-by=.lastTimestamp # Watch live kubectl get events -A --sort-by=.lastTimestamp -w # Only warnings kubectl get events -A --field-selector type=Warning # Events for a specific pod kubectl describe pod -n honeydue | awk '/Events:/,0' ``` ## Traefik-specific ```bash # All Traefik pods (DaemonSet, so one per node) kubectl get pods -n kube-system -l app.kubernetes.io/name=traefik -o wide # Restart Traefik across all nodes kubectl rollout restart daemonset/traefik -n kube-system # View Traefik config (via ConfigMap) kubectl get cm -n kube-system traefik -o yaml | less # See the HelmChartConfig we applied kubectl get helmchartconfig -n kube-system traefik -o yaml # Force Helm re-reconcile kubectl delete job -n kube-system helm-install-traefik ``` ## Cluster-wide operations ```bash # API server health kubectl cluster-info # All namespaces kubectl get namespaces # All k3s-system pods kubectl get pods -n kube-system # All ServiceAccounts in our namespace kubectl get sa -n honeydue # Check what an SA can do kubectl auth can-i --list --as=system:serviceaccount:honeydue:api ``` ## Hetzner SSH (not kubectl but oft needed) ```bash # SSH in ssh -i ~/.ssh/hetzner deploy@hetzner1 # Check k3s service ssh -i ~/.ssh/hetzner deploy@hetzner1 'sudo systemctl status k3s' # Per-node commands in parallel (e.g., apt upgrade) for h in hetzner1 hetzner2 hetzner3; do ssh -i ~/.ssh/hetzner "deploy@$h" 'sudo apt update && sudo apt upgrade -y' done ``` ## Emergency: cluster is wedged ```bash # Check all nodes Ready kubectl get nodes # If one is NotReady ssh -i ~/.ssh/hetzner deploy@ 'sudo systemctl restart k3s' # If still bad, kill k3s on that node and check ssh -i ~/.ssh/hetzner deploy@ 'sudo /usr/local/bin/k3s-killall.sh' ssh -i ~/.ssh/hetzner deploy@ 'sudo systemctl start k3s' # Last resort: uninstall + rejoin # ssh -i ~/.ssh/hetzner deploy@ 'sudo /usr/local/bin/k3s-uninstall.sh' # then re-join via the k3s install command ``` ## Observability ```bash # Hit api /metrics from inside the cluster kubectl -n honeydue exec deploy/vmagent -- wget -qO- http://api:8000/metrics | head -30 # vmagent self-stats: scrapes succeeded, samples shipped, queue health kubectl -n honeydue exec deploy/vmagent -- wget -qO- http://127.0.0.1:8429/metrics \ | grep -E "scrapes_total|targets|remotewrite_samples_dropped|persistentqueue_blocks_dropped" # Force vmagent to reload config (after editing the ConfigMap) kubectl -n honeydue rollout restart deploy/vmagent # Query VictoriaMetrics by SSH'ing to the obs box ssh 88oakappsUpdate 'curl -s "http://127.0.0.1:8428/api/v1/query?query=up"' # p95 latency by route, last 5m ssh 88oakappsUpdate 'curl -s "http://127.0.0.1:8428/api/v1/query?query=histogram_quantile(0.95,sum%20by%20(route,le)(rate(http_request_duration_seconds_bucket%5B5m%5D)))" | python3 -m json.tool' # All metric names landing in VM ssh 88oakappsUpdate 'curl -s http://127.0.0.1:8428/api/v1/label/__name__/values | python3 -m json.tool' # Restart the obs stack on 88oakappsUpdate (VM + Jaeger + Grafana) ssh 88oakappsUpdate 'cd /opt/honeydue-obs && sudo docker compose restart' # Live RAM usage of the obs containers ssh 88oakappsUpdate 'sudo docker stats --no-stream | grep honeydue-obs' # Test the obs ingest endpoint with auth TOKEN=$(grep ^OBS_INGEST_TOKEN= deploy/prod.env | cut -d= -f2) curl -s -o /dev/null -w "%{http_code}\n" https://obs.88oakapps.com/health \ -H "Authorization: Bearer $TOKEN" # 200 = healthy ``` Dashboards live at `https://grafana.88oakapps.com/d/honeydue-red`. Admin credentials in `deploy/prod.env`. ## One-liners worth memorizing ```bash # Heavy smoke test through CF for url in https://api.myhoneydue.com/api/health/ https://admin.myhoneydue.com/ https://myhoneydue.com/; do ok=0 for i in $(seq 1 20); do [[ "$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "$url")" == "200" ]] && ok=$((ok+1)) done printf "%-45s %d/20\n" "$url" "$ok" done # Pods not ready kubectl get pods -A | awk '$3!="Running" && $3!="Completed" && $3!="STATUS"' # Restart everything in our namespace for d in api admin worker redis; do kubectl rollout restart deploy/$d -n honeydue done # Watch all rollouts simultaneously for d in api admin worker redis; do kubectl rollout status deploy/$d -n honeydue & done; wait ```