Wire OpenTelemetry tracing — HTTP, B2, APNs, FCM, asynq, GORM (partial)
Backend CI / Test (push) Has been cancelled
Backend CI / Contract Tests (push) Has been cancelled
Backend CI / Build (push) Has been cancelled
Backend CI / Lint (push) Has been cancelled
Backend CI / Secret Scanning (push) Has been cancelled

Step 1 — OTel SDK: cmd/api and cmd/worker initialize a tracer provider
that exports OTLP/HTTP to obs.88oakapps.com (Jaeger all-in-one). Sampling
is AlwaysSample in dev (DEBUG=true) and TraceIDRatioBased(0.1) in prod,
overridable via OTEL_TRACES_SAMPLER_ARG. Service names are honeydue-api
and honeydue-worker. otelecho.Middleware opens a span per HTTP request.

Step 2 — Manual spans: storage_service.Upload now takes ctx and emits
storage.upload + b2.PutObject spans (size_bytes, key, mime_type, bucket,
result attrs). APNs Send/SendWithCategory and FCM sendOne emit per-token
spans with topic, status_code, reason. Asynq middleware emits
asynq.handle:<task_type> per job with retry/payload attrs and records
asynq_job_duration_seconds.

Step 3 — Database: otelgorm plugin registered in database.Connect, so
any SQL emitted via db.WithContext(ctx) attaches to the request span.
Every repository now exposes WithContext(ctx) *XRepository as the
migration helper. TaskService.ListTasks and GetTasksByResidence are
migrated end-to-end (ctx threaded through handler → service → repo);
remaining services adopt the same pattern incrementally — pre-migration
methods still emit untraced SQL via the unchanged db field.

OBS_TRACES_URL and OBS_INGEST_TOKEN flow from deploy/prod.env →
honeydue-secrets → api+worker Deployments via secretKeyRef (optional).
02-setup-secrets.sh sources them from prod.env on next run; manifests
mark both env vars optional so the deployment rolls without traces if
the secret is absent.

ch15 observability doc now lists what produces spans today vs the
remaining migration work, with the explicit per-method pattern.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-04-25 15:28:05 -05:00
parent 77cfcc0b27
commit bc3da007db
30 changed files with 655 additions and 91 deletions
+34
View File
@@ -19,6 +19,7 @@ import (
"github.com/treytartt/honeydue-api/internal/push"
"github.com/treytartt/honeydue-api/internal/router"
"github.com/treytartt/honeydue-api/internal/services"
"github.com/treytartt/honeydue-api/internal/tracing"
"github.com/treytartt/honeydue-api/pkg/utils"
)
@@ -50,6 +51,27 @@ func main() {
Str("redis_url", config.MaskURLCredentials(cfg.Redis.URL)).
Msg("Starting HoneyDue API server")
// Initialize OpenTelemetry tracing — exports to obs.88oakapps.com
// (Jaeger via OTLP/HTTP) when OBS_TRACES_URL is set; otherwise installs
// a no-op tracer so call sites can use otel.Tracer() unconditionally.
tracingShutdown, err := tracing.Init(context.Background(), tracing.Config{
ServiceName: "honeydue-api",
Environment: deploymentEnvironment(cfg.Server.Debug),
EndpointURL: os.Getenv("OBS_TRACES_URL"),
BearerToken: os.Getenv("OBS_INGEST_TOKEN"),
SampleRatio: tracing.SampleRatioFromEnv(),
})
if err != nil {
log.Error().Err(err).Msg("tracing init failed — continuing without traces")
}
defer func() {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := tracingShutdown(shutdownCtx); err != nil {
log.Warn().Err(err).Msg("tracing shutdown error")
}
}()
// Connect to database (retry with backoff)
var db *gorm.DB
var dbErr error
@@ -217,3 +239,15 @@ func main() {
log.Info().Msg("Server exited")
}
// deploymentEnvironment turns the boolean Debug flag into the conventional
// environment label spans get tagged with.
func deploymentEnvironment(debug bool) string {
if env := os.Getenv("DEPLOYMENT_ENVIRONMENT"); env != "" {
return env
}
if debug {
return "dev"
}
return "prod"
}
+72
View File
@@ -11,13 +11,18 @@ import (
"github.com/hibiken/asynq"
"github.com/redis/go-redis/v9"
"github.com/rs/zerolog/log"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
"github.com/treytartt/honeydue-api/internal/config"
"github.com/treytartt/honeydue-api/internal/database"
"github.com/treytartt/honeydue-api/internal/monitoring"
"github.com/treytartt/honeydue-api/internal/prom"
"github.com/treytartt/honeydue-api/internal/push"
"github.com/treytartt/honeydue-api/internal/repositories"
"github.com/treytartt/honeydue-api/internal/services"
"github.com/treytartt/honeydue-api/internal/tracing"
"github.com/treytartt/honeydue-api/internal/worker/jobs"
"github.com/treytartt/honeydue-api/pkg/utils"
)
@@ -40,6 +45,27 @@ func main() {
os.Exit(0)
}
// Initialize OpenTelemetry tracing for the worker process. Same OTLP
// destination as the api; service.name distinguishes them in Jaeger.
tracingShutdown, err := tracing.Init(context.Background(), tracing.Config{
ServiceName: "honeydue-worker",
Environment: workerDeploymentEnv(cfg.Server.Debug),
EndpointURL: os.Getenv("OBS_TRACES_URL"),
BearerToken: os.Getenv("OBS_INGEST_TOKEN"),
SampleRatio: tracing.SampleRatioFromEnv(),
})
if err != nil {
log.Error().Err(err).Msg("worker tracing init failed — continuing without traces")
}
defer func() {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := tracingShutdown(shutdownCtx); err != nil {
log.Warn().Err(err).Msg("worker tracing shutdown error")
}
}()
asynqTracer := tracing.Tracer("honeydue/worker/asynq")
// Initialize database
db, err := database.Connect(&cfg.Database, cfg.Server.Debug)
if err != nil {
@@ -143,6 +169,11 @@ func main() {
// Create Asynq mux and register handlers
mux := asynq.NewServeMux()
// Tracing + metrics middleware: every job runs inside a span and emits
// asynq_job_duration_seconds{task_type,result}.
mux.Use(asynqTracingMiddleware(asynqTracer))
mux.HandleFunc(jobs.TypeSmartReminder, jobHandler.HandleSmartReminder)
mux.HandleFunc(jobs.TypeDailyDigest, jobHandler.HandleDailyDigest)
mux.HandleFunc(jobs.TypeSendEmail, jobHandler.HandleSendEmail)
@@ -238,3 +269,44 @@ func main() {
log.Info().Msg("Worker stopped")
}
// asynqTracingMiddleware returns an asynq.MiddlewareFunc that opens a span
// per task execution and records asynq_job_duration_seconds. Span attrs
// include task type, queue, retry count, and the result outcome.
func asynqTracingMiddleware(tracer trace.Tracer) asynq.MiddlewareFunc {
return func(next asynq.Handler) asynq.Handler {
return asynq.HandlerFunc(func(ctx context.Context, t *asynq.Task) error {
ctx, span := tracer.Start(ctx, "asynq.handle:"+t.Type(),
trace.WithAttributes(
attribute.String("asynq.task_type", t.Type()),
attribute.Int("asynq.payload_bytes", len(t.Payload())),
),
)
defer span.End()
start := time.Now()
err := next.ProcessTask(ctx, t)
dur := time.Since(start)
result := "ok"
if err != nil {
result = "error"
span.SetStatus(codes.Error, err.Error())
span.RecordError(err)
}
span.SetAttributes(attribute.String("asynq.result", result))
prom.ObserveAsynqJob(t.Type(), result, dur)
return err
})
}
}
// workerDeploymentEnv mirrors deploymentEnvironment in cmd/api/main.go.
func workerDeploymentEnv(debug bool) string {
if env := os.Getenv("DEPLOYMENT_ENVIRONMENT"); env != "" {
return env
}
if debug {
return "dev"
}
return "prod"
}