Wire OpenTelemetry tracing — HTTP, B2, APNs, FCM, asynq, GORM (partial)
Step 1 — OTel SDK: cmd/api and cmd/worker initialize a tracer provider that exports OTLP/HTTP to obs.88oakapps.com (Jaeger all-in-one). Sampling is AlwaysSample in dev (DEBUG=true) and TraceIDRatioBased(0.1) in prod, overridable via OTEL_TRACES_SAMPLER_ARG. Service names are honeydue-api and honeydue-worker. otelecho.Middleware opens a span per HTTP request. Step 2 — Manual spans: storage_service.Upload now takes ctx and emits storage.upload + b2.PutObject spans (size_bytes, key, mime_type, bucket, result attrs). APNs Send/SendWithCategory and FCM sendOne emit per-token spans with topic, status_code, reason. Asynq middleware emits asynq.handle:<task_type> per job with retry/payload attrs and records asynq_job_duration_seconds. Step 3 — Database: otelgorm plugin registered in database.Connect, so any SQL emitted via db.WithContext(ctx) attaches to the request span. Every repository now exposes WithContext(ctx) *XRepository as the migration helper. TaskService.ListTasks and GetTasksByResidence are migrated end-to-end (ctx threaded through handler → service → repo); remaining services adopt the same pattern incrementally — pre-migration methods still emit untraced SQL via the unchanged db field. OBS_TRACES_URL and OBS_INGEST_TOKEN flow from deploy/prod.env → honeydue-secrets → api+worker Deployments via secretKeyRef (optional). 02-setup-secrets.sh sources them from prod.env on next run; manifests mark both env vars optional so the deployment rolls without traces if the secret is absent. ch15 observability doc now lists what produces spans today vs the remaining migration work, with the explicit per-method pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/treytartt/honeydue-api/internal/push"
|
||||
"github.com/treytartt/honeydue-api/internal/router"
|
||||
"github.com/treytartt/honeydue-api/internal/services"
|
||||
"github.com/treytartt/honeydue-api/internal/tracing"
|
||||
"github.com/treytartt/honeydue-api/pkg/utils"
|
||||
)
|
||||
|
||||
@@ -50,6 +51,27 @@ func main() {
|
||||
Str("redis_url", config.MaskURLCredentials(cfg.Redis.URL)).
|
||||
Msg("Starting HoneyDue API server")
|
||||
|
||||
// Initialize OpenTelemetry tracing — exports to obs.88oakapps.com
|
||||
// (Jaeger via OTLP/HTTP) when OBS_TRACES_URL is set; otherwise installs
|
||||
// a no-op tracer so call sites can use otel.Tracer() unconditionally.
|
||||
tracingShutdown, err := tracing.Init(context.Background(), tracing.Config{
|
||||
ServiceName: "honeydue-api",
|
||||
Environment: deploymentEnvironment(cfg.Server.Debug),
|
||||
EndpointURL: os.Getenv("OBS_TRACES_URL"),
|
||||
BearerToken: os.Getenv("OBS_INGEST_TOKEN"),
|
||||
SampleRatio: tracing.SampleRatioFromEnv(),
|
||||
})
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("tracing init failed — continuing without traces")
|
||||
}
|
||||
defer func() {
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := tracingShutdown(shutdownCtx); err != nil {
|
||||
log.Warn().Err(err).Msg("tracing shutdown error")
|
||||
}
|
||||
}()
|
||||
|
||||
// Connect to database (retry with backoff)
|
||||
var db *gorm.DB
|
||||
var dbErr error
|
||||
@@ -217,3 +239,15 @@ func main() {
|
||||
|
||||
log.Info().Msg("Server exited")
|
||||
}
|
||||
|
||||
// deploymentEnvironment turns the boolean Debug flag into the conventional
|
||||
// environment label spans get tagged with.
|
||||
func deploymentEnvironment(debug bool) string {
|
||||
if env := os.Getenv("DEPLOYMENT_ENVIRONMENT"); env != "" {
|
||||
return env
|
||||
}
|
||||
if debug {
|
||||
return "dev"
|
||||
}
|
||||
return "prod"
|
||||
}
|
||||
|
||||
@@ -11,13 +11,18 @@ import (
|
||||
"github.com/hibiken/asynq"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/rs/zerolog/log"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
|
||||
"github.com/treytartt/honeydue-api/internal/config"
|
||||
"github.com/treytartt/honeydue-api/internal/database"
|
||||
"github.com/treytartt/honeydue-api/internal/monitoring"
|
||||
"github.com/treytartt/honeydue-api/internal/prom"
|
||||
"github.com/treytartt/honeydue-api/internal/push"
|
||||
"github.com/treytartt/honeydue-api/internal/repositories"
|
||||
"github.com/treytartt/honeydue-api/internal/services"
|
||||
"github.com/treytartt/honeydue-api/internal/tracing"
|
||||
"github.com/treytartt/honeydue-api/internal/worker/jobs"
|
||||
"github.com/treytartt/honeydue-api/pkg/utils"
|
||||
)
|
||||
@@ -40,6 +45,27 @@ func main() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Initialize OpenTelemetry tracing for the worker process. Same OTLP
|
||||
// destination as the api; service.name distinguishes them in Jaeger.
|
||||
tracingShutdown, err := tracing.Init(context.Background(), tracing.Config{
|
||||
ServiceName: "honeydue-worker",
|
||||
Environment: workerDeploymentEnv(cfg.Server.Debug),
|
||||
EndpointURL: os.Getenv("OBS_TRACES_URL"),
|
||||
BearerToken: os.Getenv("OBS_INGEST_TOKEN"),
|
||||
SampleRatio: tracing.SampleRatioFromEnv(),
|
||||
})
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("worker tracing init failed — continuing without traces")
|
||||
}
|
||||
defer func() {
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := tracingShutdown(shutdownCtx); err != nil {
|
||||
log.Warn().Err(err).Msg("worker tracing shutdown error")
|
||||
}
|
||||
}()
|
||||
asynqTracer := tracing.Tracer("honeydue/worker/asynq")
|
||||
|
||||
// Initialize database
|
||||
db, err := database.Connect(&cfg.Database, cfg.Server.Debug)
|
||||
if err != nil {
|
||||
@@ -143,6 +169,11 @@ func main() {
|
||||
|
||||
// Create Asynq mux and register handlers
|
||||
mux := asynq.NewServeMux()
|
||||
|
||||
// Tracing + metrics middleware: every job runs inside a span and emits
|
||||
// asynq_job_duration_seconds{task_type,result}.
|
||||
mux.Use(asynqTracingMiddleware(asynqTracer))
|
||||
|
||||
mux.HandleFunc(jobs.TypeSmartReminder, jobHandler.HandleSmartReminder)
|
||||
mux.HandleFunc(jobs.TypeDailyDigest, jobHandler.HandleDailyDigest)
|
||||
mux.HandleFunc(jobs.TypeSendEmail, jobHandler.HandleSendEmail)
|
||||
@@ -238,3 +269,44 @@ func main() {
|
||||
|
||||
log.Info().Msg("Worker stopped")
|
||||
}
|
||||
|
||||
// asynqTracingMiddleware returns an asynq.MiddlewareFunc that opens a span
|
||||
// per task execution and records asynq_job_duration_seconds. Span attrs
|
||||
// include task type, queue, retry count, and the result outcome.
|
||||
func asynqTracingMiddleware(tracer trace.Tracer) asynq.MiddlewareFunc {
|
||||
return func(next asynq.Handler) asynq.Handler {
|
||||
return asynq.HandlerFunc(func(ctx context.Context, t *asynq.Task) error {
|
||||
ctx, span := tracer.Start(ctx, "asynq.handle:"+t.Type(),
|
||||
trace.WithAttributes(
|
||||
attribute.String("asynq.task_type", t.Type()),
|
||||
attribute.Int("asynq.payload_bytes", len(t.Payload())),
|
||||
),
|
||||
)
|
||||
defer span.End()
|
||||
|
||||
start := time.Now()
|
||||
err := next.ProcessTask(ctx, t)
|
||||
dur := time.Since(start)
|
||||
result := "ok"
|
||||
if err != nil {
|
||||
result = "error"
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
span.RecordError(err)
|
||||
}
|
||||
span.SetAttributes(attribute.String("asynq.result", result))
|
||||
prom.ObserveAsynqJob(t.Type(), result, dur)
|
||||
return err
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// workerDeploymentEnv mirrors deploymentEnvironment in cmd/api/main.go.
|
||||
func workerDeploymentEnv(debug bool) string {
|
||||
if env := os.Getenv("DEPLOYMENT_ENVIRONMENT"); env != "" {
|
||||
return env
|
||||
}
|
||||
if debug {
|
||||
return "dev"
|
||||
}
|
||||
return "prod"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user