b54493f785
BE-3 observability: expose the worker's Prometheus metrics on :6060/metrics (apns/fcm/asynq histograms + a new cache_ops_total counter were recorded all along but never scraped — which is why those dashboard panels read empty); add the worker containerPort, the vmagent worker scrape job, and two additive NetworkPolicies. Instrument cache Get/Set hit/miss. BE-2 retention: three periodic Asynq cleanup crons mirroring the reminder-log cleanup — notifications (90d), webhook dedup log (180d), audit_log (365d). BE-1 GDPR data export: POST /api/auth/export/ enqueues a low-priority Asynq job that gathers all of the user's data (owned residences + their tasks/contractors/ documents/share-codes, plus profile/notifications/prefs/push-tokens/subscription/ audit log), zips one JSON file per category, and emails it as an attachment. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
221 lines
7.2 KiB
Go
221 lines
7.2 KiB
Go
package prom
|
|
|
|
import (
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
var (
|
|
Registry = prometheus.NewRegistry()
|
|
|
|
httpRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "http_request_duration_seconds",
|
|
Help: "Duration of HTTP requests in seconds.",
|
|
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
|
|
}, []string{"route", "method", "status"})
|
|
|
|
gormQueryDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "gorm_query_duration_seconds",
|
|
Help: "Duration of GORM database queries in seconds.",
|
|
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5},
|
|
}, []string{"table", "operation"})
|
|
|
|
b2UploadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "b2_upload_duration_seconds",
|
|
Help: "Duration of B2/S3 upload operations in seconds.",
|
|
Buckets: []float64{0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60},
|
|
}, []string{"bucket", "result"})
|
|
|
|
b2UploadBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "b2_upload_bytes_total",
|
|
Help: "Total bytes uploaded to B2/S3.",
|
|
}, []string{"bucket", "result"})
|
|
|
|
apnsSendDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "apns_send_duration_seconds",
|
|
Help: "Duration of APNs push notification sends in seconds.",
|
|
Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5},
|
|
}, []string{"result"})
|
|
|
|
fcmSendDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "fcm_send_duration_seconds",
|
|
Help: "Duration of FCM push notification sends in seconds.",
|
|
Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5},
|
|
}, []string{"result"})
|
|
|
|
asynqJobDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "asynq_job_duration_seconds",
|
|
Help: "Duration of asynq background job execution in seconds.",
|
|
Buckets: []float64{0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 60, 300},
|
|
}, []string{"task_type", "result"})
|
|
|
|
cacheOps = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "cache_ops_total",
|
|
Help: "Redis cache operations by type and result.",
|
|
}, []string{"operation", "result"}) // operation: get|set; result: hit|miss|ok|error
|
|
)
|
|
|
|
func init() {
|
|
Registry.MustRegister(
|
|
collectors.NewGoCollector(),
|
|
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
|
|
httpRequestDuration,
|
|
gormQueryDuration,
|
|
b2UploadDuration,
|
|
b2UploadBytes,
|
|
apnsSendDuration,
|
|
fcmSendDuration,
|
|
asynqJobDuration,
|
|
cacheOps,
|
|
)
|
|
}
|
|
|
|
// Handler returns a promhttp Handler bound to the package Registry, suitable for
|
|
// mounting at GET /metrics on Echo.
|
|
func Handler() echo.HandlerFunc {
|
|
h := promhttp.HandlerFor(Registry, promhttp.HandlerOpts{Registry: Registry})
|
|
return echo.WrapHandler(h)
|
|
}
|
|
|
|
// HTTPHandler returns a net/http handler bound to the package Registry, for the
|
|
// worker's plain http.ServeMux (the api uses Handler() for Echo). This is what
|
|
// lets the worker's apns/fcm/asynq histograms actually get scraped — they were
|
|
// recorded all along but the worker exposed no /metrics endpoint.
|
|
func HTTPHandler() http.Handler {
|
|
return promhttp.HandlerFor(Registry, promhttp.HandlerOpts{Registry: Registry})
|
|
}
|
|
|
|
// ObserveCacheOp records a Redis cache operation. operation is "get" or "set";
|
|
// result is "hit"/"miss"/"error" for gets and "ok"/"error" for sets.
|
|
func ObserveCacheOp(operation, result string) {
|
|
cacheOps.WithLabelValues(operation, result).Inc()
|
|
}
|
|
|
|
// HTTPMiddleware records http_request_duration_seconds for every request,
|
|
// labeled by Echo route pattern, method, and status code.
|
|
func HTTPMiddleware() echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
start := time.Now()
|
|
err := next(c)
|
|
route := c.Path()
|
|
if route == "" {
|
|
route = "unknown"
|
|
}
|
|
httpRequestDuration.WithLabelValues(
|
|
route,
|
|
c.Request().Method,
|
|
strconv.Itoa(c.Response().Status),
|
|
).Observe(time.Since(start).Seconds())
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
// RegisterGORMCallbacks attaches before/after callbacks on a *gorm.DB so every
|
|
// SQL operation records gorm_query_duration_seconds{table,operation}.
|
|
//
|
|
// Operates at the SQL/statement level — does NOT require ctx to be threaded
|
|
// through repositories (that comes later when otelgorm lands).
|
|
func RegisterGORMCallbacks(db *gorm.DB) error {
|
|
const startKey = "honeydue:prom_start"
|
|
|
|
registerBefore := func(name string) error {
|
|
cb := db.Callback().Create().Before("gorm:create")
|
|
switch name {
|
|
case "create":
|
|
cb = db.Callback().Create().Before("gorm:create")
|
|
case "query":
|
|
cb = db.Callback().Query().Before("gorm:query")
|
|
case "update":
|
|
cb = db.Callback().Update().Before("gorm:update")
|
|
case "delete":
|
|
cb = db.Callback().Delete().Before("gorm:delete")
|
|
case "row":
|
|
cb = db.Callback().Row().Before("gorm:row")
|
|
case "raw":
|
|
cb = db.Callback().Raw().Before("gorm:raw")
|
|
}
|
|
return cb.Register("prom:before_"+name, func(tx *gorm.DB) {
|
|
tx.InstanceSet(startKey, time.Now())
|
|
})
|
|
}
|
|
|
|
registerAfter := func(name string) error {
|
|
cb := db.Callback().Create().After("gorm:create")
|
|
switch name {
|
|
case "create":
|
|
cb = db.Callback().Create().After("gorm:create")
|
|
case "query":
|
|
cb = db.Callback().Query().After("gorm:query")
|
|
case "update":
|
|
cb = db.Callback().Update().After("gorm:update")
|
|
case "delete":
|
|
cb = db.Callback().Delete().After("gorm:delete")
|
|
case "row":
|
|
cb = db.Callback().Row().After("gorm:row")
|
|
case "raw":
|
|
cb = db.Callback().Raw().After("gorm:raw")
|
|
}
|
|
return cb.Register("prom:after_"+name, func(tx *gorm.DB) {
|
|
startVal, ok := tx.InstanceGet(startKey)
|
|
if !ok {
|
|
return
|
|
}
|
|
start, ok := startVal.(time.Time)
|
|
if !ok {
|
|
return
|
|
}
|
|
table := tx.Statement.Table
|
|
if table == "" {
|
|
table = "unknown"
|
|
}
|
|
gormQueryDuration.WithLabelValues(table, name).Observe(time.Since(start).Seconds())
|
|
})
|
|
}
|
|
|
|
for _, name := range []string{"create", "query", "update", "delete", "row", "raw"} {
|
|
if err := registerBefore(name); err != nil {
|
|
return err
|
|
}
|
|
if err := registerAfter(name); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ObserveB2Upload records duration + bytes for a B2/S3 upload. result is "ok"
|
|
// or "error".
|
|
func ObserveB2Upload(bucket, result string, dur time.Duration, bytes int64) {
|
|
b2UploadDuration.WithLabelValues(bucket, result).Observe(dur.Seconds())
|
|
if bytes > 0 {
|
|
b2UploadBytes.WithLabelValues(bucket, result).Add(float64(bytes))
|
|
}
|
|
}
|
|
|
|
// ObserveAPNsSend records duration of a single APNs send. result is "ok",
|
|
// "bad_token", or "error".
|
|
func ObserveAPNsSend(result string, dur time.Duration) {
|
|
apnsSendDuration.WithLabelValues(result).Observe(dur.Seconds())
|
|
}
|
|
|
|
// ObserveFCMSend records duration of a single FCM send. result is "ok",
|
|
// "bad_token", or "error".
|
|
func ObserveFCMSend(result string, dur time.Duration) {
|
|
fcmSendDuration.WithLabelValues(result).Observe(dur.Seconds())
|
|
}
|
|
|
|
// ObserveAsynqJob records duration of a single asynq job execution. result is
|
|
// "ok", "retry", or "error".
|
|
func ObserveAsynqJob(taskType, result string, dur time.Duration) {
|
|
asynqJobDuration.WithLabelValues(taskType, result).Observe(dur.Seconds())
|
|
}
|