Files
honeyDueAPI/internal/tracing/tracing.go
T
Trey t bc3da007db
Backend CI / Test (push) Has been cancelled
Backend CI / Contract Tests (push) Has been cancelled
Backend CI / Build (push) Has been cancelled
Backend CI / Lint (push) Has been cancelled
Backend CI / Secret Scanning (push) Has been cancelled
Wire OpenTelemetry tracing — HTTP, B2, APNs, FCM, asynq, GORM (partial)
Step 1 — OTel SDK: cmd/api and cmd/worker initialize a tracer provider
that exports OTLP/HTTP to obs.88oakapps.com (Jaeger all-in-one). Sampling
is AlwaysSample in dev (DEBUG=true) and TraceIDRatioBased(0.1) in prod,
overridable via OTEL_TRACES_SAMPLER_ARG. Service names are honeydue-api
and honeydue-worker. otelecho.Middleware opens a span per HTTP request.

Step 2 — Manual spans: storage_service.Upload now takes ctx and emits
storage.upload + b2.PutObject spans (size_bytes, key, mime_type, bucket,
result attrs). APNs Send/SendWithCategory and FCM sendOne emit per-token
spans with topic, status_code, reason. Asynq middleware emits
asynq.handle:<task_type> per job with retry/payload attrs and records
asynq_job_duration_seconds.

Step 3 — Database: otelgorm plugin registered in database.Connect, so
any SQL emitted via db.WithContext(ctx) attaches to the request span.
Every repository now exposes WithContext(ctx) *XRepository as the
migration helper. TaskService.ListTasks and GetTasksByResidence are
migrated end-to-end (ctx threaded through handler → service → repo);
remaining services adopt the same pattern incrementally — pre-migration
methods still emit untraced SQL via the unchanged db field.

OBS_TRACES_URL and OBS_INGEST_TOKEN flow from deploy/prod.env →
honeydue-secrets → api+worker Deployments via secretKeyRef (optional).
02-setup-secrets.sh sources them from prod.env on next run; manifests
mark both env vars optional so the deployment rolls without traces if
the secret is absent.

ch15 observability doc now lists what produces spans today vs the
remaining migration work, with the explicit per-method pattern.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 15:28:05 -05:00

162 lines
5.0 KiB
Go

// Package tracing wires the OpenTelemetry SDK with an OTLP/HTTP exporter
// targeting obs.88oakapps.com (Jaeger all-in-one behind nginx + bearer auth).
//
// The package owns the global TracerProvider for the api process; everything
// else acquires a tracer via tracing.Tracer(name).
//
// Sampling defaults to AlwaysSample in DEBUG mode and TraceIDRatioBased(0.1)
// otherwise, controllable via OTEL_TRACES_SAMPLER_ARG.
package tracing
import (
"context"
"fmt"
"net/url"
"os"
"strconv"
"strings"
"time"
"github.com/rs/zerolog/log"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.27.0"
"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"
)
// Config controls the tracer provider that Init installs globally.
type Config struct {
// ServiceName labels every span with service.name=<this>. Required.
ServiceName string
// Environment labels every span with deployment.environment.
// Conventionally "prod", "dev", "local".
Environment string
// EndpointURL is the full OTLP/HTTP traces URL — e.g.
// https://obs.88oakapps.com/v1/traces. Empty means tracing is disabled
// (returns a no-op provider).
EndpointURL string
// BearerToken, if non-empty, is sent as Authorization: Bearer <token>.
BearerToken string
// SampleRatio is the fraction of root traces sampled. 1.0 = all, 0.1 = 10%.
// 0 disables sampling entirely; -1 means "AlwaysSample" (debug).
SampleRatio float64
// Insecure forces plain HTTP. Only useful for local testing.
Insecure bool
}
// Init configures the global TracerProvider and returns a shutdown function.
// Call shutdown on graceful exit so spans in flight get flushed.
//
// Init is safe to call when EndpointURL is empty: it installs a no-op
// provider and returns a no-op shutdown.
func Init(ctx context.Context, cfg Config) (shutdown func(context.Context) error, err error) {
if cfg.EndpointURL == "" {
log.Info().Msg("tracing: no OBS_TRACES_URL configured, installing no-op tracer")
otel.SetTracerProvider(noop.NewTracerProvider())
return func(context.Context) error { return nil }, nil
}
parsed, err := url.Parse(cfg.EndpointURL)
if err != nil {
return nil, fmt.Errorf("invalid OBS_TRACES_URL %q: %w", cfg.EndpointURL, err)
}
opts := []otlptracehttp.Option{
otlptracehttp.WithEndpoint(parsed.Host),
otlptracehttp.WithURLPath(parsed.Path),
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
otlptracehttp.WithTimeout(10 * time.Second),
}
if cfg.Insecure || parsed.Scheme == "http" {
opts = append(opts, otlptracehttp.WithInsecure())
}
if cfg.BearerToken != "" {
opts = append(opts, otlptracehttp.WithHeaders(map[string]string{
"Authorization": "Bearer " + cfg.BearerToken,
}))
}
exporter, err := otlptrace.New(ctx, otlptracehttp.NewClient(opts...))
if err != nil {
return nil, fmt.Errorf("create OTLP exporter: %w", err)
}
res, err := resource.Merge(resource.Default(), resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName(cfg.ServiceName),
semconv.DeploymentEnvironmentName(cfg.Environment),
))
if err != nil {
return nil, fmt.Errorf("build resource: %w", err)
}
var sampler sdktrace.Sampler
switch {
case cfg.SampleRatio < 0:
sampler = sdktrace.AlwaysSample()
case cfg.SampleRatio == 0:
sampler = sdktrace.NeverSample()
case cfg.SampleRatio >= 1:
sampler = sdktrace.AlwaysSample()
default:
// ParentBased so the inbound parent's sampling decision wins;
// otherwise root-span ratio applies.
sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(cfg.SampleRatio))
}
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter,
sdktrace.WithBatchTimeout(5*time.Second),
sdktrace.WithMaxExportBatchSize(512),
),
sdktrace.WithResource(res),
sdktrace.WithSampler(sampler),
)
otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
log.Info().
Str("endpoint", cfg.EndpointURL).
Str("service", cfg.ServiceName).
Str("env", cfg.Environment).
Float64("sample_ratio", cfg.SampleRatio).
Bool("auth", cfg.BearerToken != "").
Msg("tracing: OTLP exporter initialized")
return tp.Shutdown, nil
}
// Tracer returns a named tracer from the global provider. Safe to call before
// Init (returns a no-op tracer in that case).
func Tracer(name string) trace.Tracer {
return otel.Tracer(name)
}
// SampleRatioFromEnv reads OTEL_TRACES_SAMPLER_ARG with sensible defaults.
// Returns -1 ("always") when DEBUG=true, 0.1 ("10%") otherwise.
func SampleRatioFromEnv() float64 {
if v := strings.TrimSpace(os.Getenv("OTEL_TRACES_SAMPLER_ARG")); v != "" {
if f, err := strconv.ParseFloat(v, 64); err == nil {
return f
}
}
if strings.EqualFold(os.Getenv("DEBUG"), "true") {
return -1
}
return 0.1
}