Files
honeyDueAPI/internal/tracing/tracing.go
T
Trey t 3f5bf21e09
Backend CI / Test (push) Has been cancelled
Backend CI / Contract Tests (push) Has been cancelled
Backend CI / Build (push) Has been cancelled
Backend CI / Lint (push) Has been cancelled
Backend CI / Secret Scanning (push) Has been cancelled
tracing: bump semconv to v1.40.0 to match runtime resource schema
Pods crashed at startup with "build resource: conflicting Schema URL:
https://opentelemetry.io/schemas/1.40.0 and https://opentelemetry.io/schemas/1.27.0"
because resource.Default() in the SDK targets v1.40.0. Aligning here.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 15:35:46 -05:00

162 lines
5.0 KiB
Go

// Package tracing wires the OpenTelemetry SDK with an OTLP/HTTP exporter
// targeting obs.88oakapps.com (Jaeger all-in-one behind nginx + bearer auth).
//
// The package owns the global TracerProvider for the api process; everything
// else acquires a tracer via tracing.Tracer(name).
//
// Sampling defaults to AlwaysSample in DEBUG mode and TraceIDRatioBased(0.1)
// otherwise, controllable via OTEL_TRACES_SAMPLER_ARG.
package tracing
import (
"context"
"fmt"
"net/url"
"os"
"strconv"
"strings"
"time"
"github.com/rs/zerolog/log"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"
)
// Config controls the tracer provider that Init installs globally.
type Config struct {
// ServiceName labels every span with service.name=<this>. Required.
ServiceName string
// Environment labels every span with deployment.environment.
// Conventionally "prod", "dev", "local".
Environment string
// EndpointURL is the full OTLP/HTTP traces URL — e.g.
// https://obs.88oakapps.com/v1/traces. Empty means tracing is disabled
// (returns a no-op provider).
EndpointURL string
// BearerToken, if non-empty, is sent as Authorization: Bearer <token>.
BearerToken string
// SampleRatio is the fraction of root traces sampled. 1.0 = all, 0.1 = 10%.
// 0 disables sampling entirely; -1 means "AlwaysSample" (debug).
SampleRatio float64
// Insecure forces plain HTTP. Only useful for local testing.
Insecure bool
}
// Init configures the global TracerProvider and returns a shutdown function.
// Call shutdown on graceful exit so spans in flight get flushed.
//
// Init is safe to call when EndpointURL is empty: it installs a no-op
// provider and returns a no-op shutdown.
func Init(ctx context.Context, cfg Config) (shutdown func(context.Context) error, err error) {
if cfg.EndpointURL == "" {
log.Info().Msg("tracing: no OBS_TRACES_URL configured, installing no-op tracer")
otel.SetTracerProvider(noop.NewTracerProvider())
return func(context.Context) error { return nil }, nil
}
parsed, err := url.Parse(cfg.EndpointURL)
if err != nil {
return nil, fmt.Errorf("invalid OBS_TRACES_URL %q: %w", cfg.EndpointURL, err)
}
opts := []otlptracehttp.Option{
otlptracehttp.WithEndpoint(parsed.Host),
otlptracehttp.WithURLPath(parsed.Path),
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
otlptracehttp.WithTimeout(10 * time.Second),
}
if cfg.Insecure || parsed.Scheme == "http" {
opts = append(opts, otlptracehttp.WithInsecure())
}
if cfg.BearerToken != "" {
opts = append(opts, otlptracehttp.WithHeaders(map[string]string{
"Authorization": "Bearer " + cfg.BearerToken,
}))
}
exporter, err := otlptrace.New(ctx, otlptracehttp.NewClient(opts...))
if err != nil {
return nil, fmt.Errorf("create OTLP exporter: %w", err)
}
res, err := resource.Merge(resource.Default(), resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName(cfg.ServiceName),
semconv.DeploymentEnvironmentName(cfg.Environment),
))
if err != nil {
return nil, fmt.Errorf("build resource: %w", err)
}
var sampler sdktrace.Sampler
switch {
case cfg.SampleRatio < 0:
sampler = sdktrace.AlwaysSample()
case cfg.SampleRatio == 0:
sampler = sdktrace.NeverSample()
case cfg.SampleRatio >= 1:
sampler = sdktrace.AlwaysSample()
default:
// ParentBased so the inbound parent's sampling decision wins;
// otherwise root-span ratio applies.
sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(cfg.SampleRatio))
}
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter,
sdktrace.WithBatchTimeout(5*time.Second),
sdktrace.WithMaxExportBatchSize(512),
),
sdktrace.WithResource(res),
sdktrace.WithSampler(sampler),
)
otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
log.Info().
Str("endpoint", cfg.EndpointURL).
Str("service", cfg.ServiceName).
Str("env", cfg.Environment).
Float64("sample_ratio", cfg.SampleRatio).
Bool("auth", cfg.BearerToken != "").
Msg("tracing: OTLP exporter initialized")
return tp.Shutdown, nil
}
// Tracer returns a named tracer from the global provider. Safe to call before
// Init (returns a no-op tracer in that case).
func Tracer(name string) trace.Tracer {
return otel.Tracer(name)
}
// SampleRatioFromEnv reads OTEL_TRACES_SAMPLER_ARG with sensible defaults.
// Returns -1 ("always") when DEBUG=true, 0.1 ("10%") otherwise.
func SampleRatioFromEnv() float64 {
if v := strings.TrimSpace(os.Getenv("OTEL_TRACES_SAMPLER_ARG")); v != "" {
if f, err := strconv.ParseFloat(v, 64); err == nil {
return f
}
}
if strings.EqualFold(os.Getenv("DEBUG"), "true") {
return -1
}
return 0.1
}