bc3da007db
Step 1 — OTel SDK: cmd/api and cmd/worker initialize a tracer provider that exports OTLP/HTTP to obs.88oakapps.com (Jaeger all-in-one). Sampling is AlwaysSample in dev (DEBUG=true) and TraceIDRatioBased(0.1) in prod, overridable via OTEL_TRACES_SAMPLER_ARG. Service names are honeydue-api and honeydue-worker. otelecho.Middleware opens a span per HTTP request. Step 2 — Manual spans: storage_service.Upload now takes ctx and emits storage.upload + b2.PutObject spans (size_bytes, key, mime_type, bucket, result attrs). APNs Send/SendWithCategory and FCM sendOne emit per-token spans with topic, status_code, reason. Asynq middleware emits asynq.handle:<task_type> per job with retry/payload attrs and records asynq_job_duration_seconds. Step 3 — Database: otelgorm plugin registered in database.Connect, so any SQL emitted via db.WithContext(ctx) attaches to the request span. Every repository now exposes WithContext(ctx) *XRepository as the migration helper. TaskService.ListTasks and GetTasksByResidence are migrated end-to-end (ctx threaded through handler → service → repo); remaining services adopt the same pattern incrementally — pre-migration methods still emit untraced SQL via the unchanged db field. OBS_TRACES_URL and OBS_INGEST_TOKEN flow from deploy/prod.env → honeydue-secrets → api+worker Deployments via secretKeyRef (optional). 02-setup-secrets.sh sources them from prod.env on next run; manifests mark both env vars optional so the deployment rolls without traces if the secret is absent. ch15 observability doc now lists what produces spans today vs the remaining migration work, with the explicit per-method pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
162 lines
5.0 KiB
Go
162 lines
5.0 KiB
Go
// Package tracing wires the OpenTelemetry SDK with an OTLP/HTTP exporter
|
|
// targeting obs.88oakapps.com (Jaeger all-in-one behind nginx + bearer auth).
|
|
//
|
|
// The package owns the global TracerProvider for the api process; everything
|
|
// else acquires a tracer via tracing.Tracer(name).
|
|
//
|
|
// Sampling defaults to AlwaysSample in DEBUG mode and TraceIDRatioBased(0.1)
|
|
// otherwise, controllable via OTEL_TRACES_SAMPLER_ARG.
|
|
package tracing
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
"go.opentelemetry.io/otel"
|
|
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
|
|
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
|
"go.opentelemetry.io/otel/propagation"
|
|
"go.opentelemetry.io/otel/sdk/resource"
|
|
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
|
semconv "go.opentelemetry.io/otel/semconv/v1.27.0"
|
|
"go.opentelemetry.io/otel/trace"
|
|
"go.opentelemetry.io/otel/trace/noop"
|
|
)
|
|
|
|
// Config controls the tracer provider that Init installs globally.
|
|
type Config struct {
|
|
// ServiceName labels every span with service.name=<this>. Required.
|
|
ServiceName string
|
|
|
|
// Environment labels every span with deployment.environment.
|
|
// Conventionally "prod", "dev", "local".
|
|
Environment string
|
|
|
|
// EndpointURL is the full OTLP/HTTP traces URL — e.g.
|
|
// https://obs.88oakapps.com/v1/traces. Empty means tracing is disabled
|
|
// (returns a no-op provider).
|
|
EndpointURL string
|
|
|
|
// BearerToken, if non-empty, is sent as Authorization: Bearer <token>.
|
|
BearerToken string
|
|
|
|
// SampleRatio is the fraction of root traces sampled. 1.0 = all, 0.1 = 10%.
|
|
// 0 disables sampling entirely; -1 means "AlwaysSample" (debug).
|
|
SampleRatio float64
|
|
|
|
// Insecure forces plain HTTP. Only useful for local testing.
|
|
Insecure bool
|
|
}
|
|
|
|
// Init configures the global TracerProvider and returns a shutdown function.
|
|
// Call shutdown on graceful exit so spans in flight get flushed.
|
|
//
|
|
// Init is safe to call when EndpointURL is empty: it installs a no-op
|
|
// provider and returns a no-op shutdown.
|
|
func Init(ctx context.Context, cfg Config) (shutdown func(context.Context) error, err error) {
|
|
if cfg.EndpointURL == "" {
|
|
log.Info().Msg("tracing: no OBS_TRACES_URL configured, installing no-op tracer")
|
|
otel.SetTracerProvider(noop.NewTracerProvider())
|
|
return func(context.Context) error { return nil }, nil
|
|
}
|
|
|
|
parsed, err := url.Parse(cfg.EndpointURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid OBS_TRACES_URL %q: %w", cfg.EndpointURL, err)
|
|
}
|
|
|
|
opts := []otlptracehttp.Option{
|
|
otlptracehttp.WithEndpoint(parsed.Host),
|
|
otlptracehttp.WithURLPath(parsed.Path),
|
|
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
|
|
otlptracehttp.WithTimeout(10 * time.Second),
|
|
}
|
|
if cfg.Insecure || parsed.Scheme == "http" {
|
|
opts = append(opts, otlptracehttp.WithInsecure())
|
|
}
|
|
if cfg.BearerToken != "" {
|
|
opts = append(opts, otlptracehttp.WithHeaders(map[string]string{
|
|
"Authorization": "Bearer " + cfg.BearerToken,
|
|
}))
|
|
}
|
|
|
|
exporter, err := otlptrace.New(ctx, otlptracehttp.NewClient(opts...))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create OTLP exporter: %w", err)
|
|
}
|
|
|
|
res, err := resource.Merge(resource.Default(), resource.NewWithAttributes(
|
|
semconv.SchemaURL,
|
|
semconv.ServiceName(cfg.ServiceName),
|
|
semconv.DeploymentEnvironmentName(cfg.Environment),
|
|
))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("build resource: %w", err)
|
|
}
|
|
|
|
var sampler sdktrace.Sampler
|
|
switch {
|
|
case cfg.SampleRatio < 0:
|
|
sampler = sdktrace.AlwaysSample()
|
|
case cfg.SampleRatio == 0:
|
|
sampler = sdktrace.NeverSample()
|
|
case cfg.SampleRatio >= 1:
|
|
sampler = sdktrace.AlwaysSample()
|
|
default:
|
|
// ParentBased so the inbound parent's sampling decision wins;
|
|
// otherwise root-span ratio applies.
|
|
sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(cfg.SampleRatio))
|
|
}
|
|
|
|
tp := sdktrace.NewTracerProvider(
|
|
sdktrace.WithBatcher(exporter,
|
|
sdktrace.WithBatchTimeout(5*time.Second),
|
|
sdktrace.WithMaxExportBatchSize(512),
|
|
),
|
|
sdktrace.WithResource(res),
|
|
sdktrace.WithSampler(sampler),
|
|
)
|
|
|
|
otel.SetTracerProvider(tp)
|
|
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
|
|
propagation.TraceContext{},
|
|
propagation.Baggage{},
|
|
))
|
|
|
|
log.Info().
|
|
Str("endpoint", cfg.EndpointURL).
|
|
Str("service", cfg.ServiceName).
|
|
Str("env", cfg.Environment).
|
|
Float64("sample_ratio", cfg.SampleRatio).
|
|
Bool("auth", cfg.BearerToken != "").
|
|
Msg("tracing: OTLP exporter initialized")
|
|
|
|
return tp.Shutdown, nil
|
|
}
|
|
|
|
// Tracer returns a named tracer from the global provider. Safe to call before
|
|
// Init (returns a no-op tracer in that case).
|
|
func Tracer(name string) trace.Tracer {
|
|
return otel.Tracer(name)
|
|
}
|
|
|
|
// SampleRatioFromEnv reads OTEL_TRACES_SAMPLER_ARG with sensible defaults.
|
|
// Returns -1 ("always") when DEBUG=true, 0.1 ("10%") otherwise.
|
|
func SampleRatioFromEnv() float64 {
|
|
if v := strings.TrimSpace(os.Getenv("OTEL_TRACES_SAMPLER_ARG")); v != "" {
|
|
if f, err := strconv.ParseFloat(v, 64); err == nil {
|
|
return f
|
|
}
|
|
}
|
|
if strings.EqualFold(os.Getenv("DEBUG"), "true") {
|
|
return -1
|
|
}
|
|
return 0.1
|
|
}
|