3f5bf21e09
Pods crashed at startup with "build resource: conflicting Schema URL: https://opentelemetry.io/schemas/1.40.0 and https://opentelemetry.io/schemas/1.27.0" because resource.Default() in the SDK targets v1.40.0. Aligning here. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
162 lines
5.0 KiB
Go
162 lines
5.0 KiB
Go
// Package tracing wires the OpenTelemetry SDK with an OTLP/HTTP exporter
|
|
// targeting obs.88oakapps.com (Jaeger all-in-one behind nginx + bearer auth).
|
|
//
|
|
// The package owns the global TracerProvider for the api process; everything
|
|
// else acquires a tracer via tracing.Tracer(name).
|
|
//
|
|
// Sampling defaults to AlwaysSample in DEBUG mode and TraceIDRatioBased(0.1)
|
|
// otherwise, controllable via OTEL_TRACES_SAMPLER_ARG.
|
|
package tracing
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
"go.opentelemetry.io/otel"
|
|
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
|
|
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
|
"go.opentelemetry.io/otel/propagation"
|
|
"go.opentelemetry.io/otel/sdk/resource"
|
|
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
|
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
|
|
"go.opentelemetry.io/otel/trace"
|
|
"go.opentelemetry.io/otel/trace/noop"
|
|
)
|
|
|
|
// Config controls the tracer provider that Init installs globally.
|
|
type Config struct {
|
|
// ServiceName labels every span with service.name=<this>. Required.
|
|
ServiceName string
|
|
|
|
// Environment labels every span with deployment.environment.
|
|
// Conventionally "prod", "dev", "local".
|
|
Environment string
|
|
|
|
// EndpointURL is the full OTLP/HTTP traces URL — e.g.
|
|
// https://obs.88oakapps.com/v1/traces. Empty means tracing is disabled
|
|
// (returns a no-op provider).
|
|
EndpointURL string
|
|
|
|
// BearerToken, if non-empty, is sent as Authorization: Bearer <token>.
|
|
BearerToken string
|
|
|
|
// SampleRatio is the fraction of root traces sampled. 1.0 = all, 0.1 = 10%.
|
|
// 0 disables sampling entirely; -1 means "AlwaysSample" (debug).
|
|
SampleRatio float64
|
|
|
|
// Insecure forces plain HTTP. Only useful for local testing.
|
|
Insecure bool
|
|
}
|
|
|
|
// Init configures the global TracerProvider and returns a shutdown function.
|
|
// Call shutdown on graceful exit so spans in flight get flushed.
|
|
//
|
|
// Init is safe to call when EndpointURL is empty: it installs a no-op
|
|
// provider and returns a no-op shutdown.
|
|
func Init(ctx context.Context, cfg Config) (shutdown func(context.Context) error, err error) {
|
|
if cfg.EndpointURL == "" {
|
|
log.Info().Msg("tracing: no OBS_TRACES_URL configured, installing no-op tracer")
|
|
otel.SetTracerProvider(noop.NewTracerProvider())
|
|
return func(context.Context) error { return nil }, nil
|
|
}
|
|
|
|
parsed, err := url.Parse(cfg.EndpointURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid OBS_TRACES_URL %q: %w", cfg.EndpointURL, err)
|
|
}
|
|
|
|
opts := []otlptracehttp.Option{
|
|
otlptracehttp.WithEndpoint(parsed.Host),
|
|
otlptracehttp.WithURLPath(parsed.Path),
|
|
otlptracehttp.WithCompression(otlptracehttp.GzipCompression),
|
|
otlptracehttp.WithTimeout(10 * time.Second),
|
|
}
|
|
if cfg.Insecure || parsed.Scheme == "http" {
|
|
opts = append(opts, otlptracehttp.WithInsecure())
|
|
}
|
|
if cfg.BearerToken != "" {
|
|
opts = append(opts, otlptracehttp.WithHeaders(map[string]string{
|
|
"Authorization": "Bearer " + cfg.BearerToken,
|
|
}))
|
|
}
|
|
|
|
exporter, err := otlptrace.New(ctx, otlptracehttp.NewClient(opts...))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create OTLP exporter: %w", err)
|
|
}
|
|
|
|
res, err := resource.Merge(resource.Default(), resource.NewWithAttributes(
|
|
semconv.SchemaURL,
|
|
semconv.ServiceName(cfg.ServiceName),
|
|
semconv.DeploymentEnvironmentName(cfg.Environment),
|
|
))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("build resource: %w", err)
|
|
}
|
|
|
|
var sampler sdktrace.Sampler
|
|
switch {
|
|
case cfg.SampleRatio < 0:
|
|
sampler = sdktrace.AlwaysSample()
|
|
case cfg.SampleRatio == 0:
|
|
sampler = sdktrace.NeverSample()
|
|
case cfg.SampleRatio >= 1:
|
|
sampler = sdktrace.AlwaysSample()
|
|
default:
|
|
// ParentBased so the inbound parent's sampling decision wins;
|
|
// otherwise root-span ratio applies.
|
|
sampler = sdktrace.ParentBased(sdktrace.TraceIDRatioBased(cfg.SampleRatio))
|
|
}
|
|
|
|
tp := sdktrace.NewTracerProvider(
|
|
sdktrace.WithBatcher(exporter,
|
|
sdktrace.WithBatchTimeout(5*time.Second),
|
|
sdktrace.WithMaxExportBatchSize(512),
|
|
),
|
|
sdktrace.WithResource(res),
|
|
sdktrace.WithSampler(sampler),
|
|
)
|
|
|
|
otel.SetTracerProvider(tp)
|
|
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
|
|
propagation.TraceContext{},
|
|
propagation.Baggage{},
|
|
))
|
|
|
|
log.Info().
|
|
Str("endpoint", cfg.EndpointURL).
|
|
Str("service", cfg.ServiceName).
|
|
Str("env", cfg.Environment).
|
|
Float64("sample_ratio", cfg.SampleRatio).
|
|
Bool("auth", cfg.BearerToken != "").
|
|
Msg("tracing: OTLP exporter initialized")
|
|
|
|
return tp.Shutdown, nil
|
|
}
|
|
|
|
// Tracer returns a named tracer from the global provider. Safe to call before
|
|
// Init (returns a no-op tracer in that case).
|
|
func Tracer(name string) trace.Tracer {
|
|
return otel.Tracer(name)
|
|
}
|
|
|
|
// SampleRatioFromEnv reads OTEL_TRACES_SAMPLER_ARG with sensible defaults.
|
|
// Returns -1 ("always") when DEBUG=true, 0.1 ("10%") otherwise.
|
|
func SampleRatioFromEnv() float64 {
|
|
if v := strings.TrimSpace(os.Getenv("OTEL_TRACES_SAMPLER_ARG")); v != "" {
|
|
if f, err := strconv.ParseFloat(v, 64); err == nil {
|
|
return f
|
|
}
|
|
}
|
|
if strings.EqualFold(os.Getenv("DEBUG"), "true") {
|
|
return -1
|
|
}
|
|
return 0.1
|
|
}
|