From 92d7101180e4bc6d77fd8b14193e8b040c134d55 Mon Sep 17 00:00:00 2001 From: Jean-Hadrien Chabran Date: Tue, 18 Oct 2022 12:23:21 +0200 Subject: [PATCH] otel: add tailsamplingprocessor to collector build (#42444) Co-authored-by: William Bezuidenhout --- .../builder.template.yaml | 2 + .../configs/jaeger-tailsampling.yaml | 59 +++++++++++++++++++ .../configs/jaeger.yaml | 2 +- internal/instrumentation/http.go | 37 ++++++++++++ sg.config.yaml | 2 +- 5 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 docker-images/opentelemetry-collector/configs/jaeger-tailsampling.yaml diff --git a/docker-images/opentelemetry-collector/builder.template.yaml b/docker-images/opentelemetry-collector/builder.template.yaml index 82087ec56ad..55250a350e6 100644 --- a/docker-images/opentelemetry-collector/builder.template.yaml +++ b/docker-images/opentelemetry-collector/builder.template.yaml @@ -36,4 +36,6 @@ extensions: - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckextension v$OTEL_COLLECTOR_VERSION processors: + # Contrib extensions - https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor v$OTEL_COLLECTOR_VERSION + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/tailsamplingprocessor v$OTEL_COLLECTOR_VERSION diff --git a/docker-images/opentelemetry-collector/configs/jaeger-tailsampling.yaml b/docker-images/opentelemetry-collector/configs/jaeger-tailsampling.yaml new file mode 100644 index 00000000000..214b824c439 --- /dev/null +++ b/docker-images/opentelemetry-collector/configs/jaeger-tailsampling.yaml @@ -0,0 +1,59 @@ +# Export traces to a Jaeger instance, with tail sampling configured. +# +# Variables: +# +# - $JAEGER_HOST + +receivers: + otlp: + protocols: + grpc: # port 4317 + http: # port 4318 + +exporters: + jaeger: + # Default Jaeger GRPC server + endpoint: "$JAEGER_HOST:14250" + tls: + insecure: true + +extensions: + health_check: + port: 13133 + zpages: + endpoint: ":55679" + +service: + extensions: [health_check,zpages] + pipelines: + traces: + receivers: [otlp] + processors: [tail_sampling] + exporters: [jaeger] + +processors: + tail_sampling: + # Wait time since the first span of a trace before making a sampling decision + decision_wait: 30s # default value = 30s + # Number of traces kept in memory + num_traces: 50000 # default value = 50000 + # Expected number of new traces (helps in allocating data structures) + expected_new_traces_per_sec: 10 # default value = 0 + # Recommended reading to understand how the policies are applied: + # https://sourcegraph.com/github.com/open-telemetry/opentelemetry-collector-contrib@71dd19d2e59cd1f8aa9844461089d5c17efaa0ca/-/blob/processor/tailsamplingprocessor/processor.go?L214 + policies: + [ + { + # If a span contains `sampling_retain: true`, it will always be sampled (not dropped), + # regardless of the probabilistic sampling. + name: policy-retain, + type: string_attribute, + string_attribute: {key: sampling.retain, values: ['true']}, + }, + { + # Only keep 10% of the traces. + name: policy-probalistic, + type: probabilistic, + probabilistic: {sampling_percentage: 10} + } + ] diff --git a/docker-images/opentelemetry-collector/configs/jaeger.yaml b/docker-images/opentelemetry-collector/configs/jaeger.yaml index 6e0d6c49618..fc4fbba8602 100644 --- a/docker-images/opentelemetry-collector/configs/jaeger.yaml +++ b/docker-images/opentelemetry-collector/configs/jaeger.yaml @@ -21,7 +21,7 @@ extensions: health_check: port: 13133 zpages: - endpoint: "localhost:55679" + endpoint: ":55679" service: extensions: [health_check,zpages] diff --git a/internal/instrumentation/http.go b/internal/instrumentation/http.go index 2fb489bb421..8e207f10cc3 100644 --- a/internal/instrumentation/http.go +++ b/internal/instrumentation/http.go @@ -1,10 +1,14 @@ package instrumentation import ( + "context" "fmt" "net/http" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "github.com/sourcegraph/sourcegraph/internal/trace/policy" ) @@ -21,6 +25,7 @@ func HTTPMiddleware(operation string, h http.Handler, opts ...otelhttp.Option) h instrumentedHandler := otelhttp.NewHandler(h, operation, append( []otelhttp.Option{ + otelhttp.WithTracerProvider(&samplingRetainTracerProvider{}), otelhttp.WithFilter(func(r *http.Request) bool { return policy.ShouldTrace(r.Context()) }), @@ -48,3 +53,35 @@ func HTTPMiddleware(operation string, h http.Handler, opts ...otelhttp.Option) h instrumentedHandler.ServeHTTP(w, r.WithContext(policy.WithShouldTrace(r.Context(), trace))) }) } + +// Experimental: it order to mitigate the amount of traces sent by components which are not +// respecting the tracing policy, we can delegate the final decision to the collector, +// and merely indicate that when it's selective or all, we want requests to be retained. +// +// By setting "sampling.retain" attribute on the span, a sampling policy will match on the OTEL Collector +// and explicitly sample (i.e keep it) the present trace. +// +// To achieve that, it shims the default TracerProvider with samplingRetainTracerProvider to inject +// the attribute at the beginning of the span, which is mandatory to perform sampling. +type samplingRetainTracerProvider struct{} +type samplingRetainTracer struct { + tracer trace.Tracer +} + +func (p *samplingRetainTracerProvider) Tracer(instrumentationName string, opts ...trace.TracerOption) trace.Tracer { + return &samplingRetainTracer{tracer: otel.GetTracerProvider().Tracer(instrumentationName, opts...)} +} + +// samplingRetainKey is the attribute key used to mark as span as to be retained. +var samplingRetainKey = "sampling.retain" + +// Start will only inject the attribute if this trace has been explictly asked to be traced. +func (t *samplingRetainTracer) Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { + if policy.ShouldTrace(ctx) { + attrOpts := []trace.SpanStartOption{ + trace.WithAttributes(attribute.String(samplingRetainKey, "true")), + } + return t.tracer.Start(ctx, spanName, append(attrOpts, opts...)...) + } + return t.tracer.Start(ctx, spanName, opts...) +} diff --git a/sg.config.yaml b/sg.config.yaml index feb29152a0a..dbb3a75d211 100644 --- a/sg.config.yaml +++ b/sg.config.yaml @@ -729,7 +729,7 @@ commands: docker container rm otel-collector docker run --rm --name=otel-collector $DOCKER_NET $DOCKER_ARGS \ - -p 4317:4317 -p 4318:4318 -p 55679:55679 \ + -p 4317:4317 -p 4318:4318 -p 55679:55679 -p 55670:55670 \ -e JAEGER_HOST=$JAEGER_HOST \ -e HONEYCOMB_API_KEY=$HONEYCOMB_API_KEY \ -e HONEYCOMB_DATASET=$HONEYCOMB_DATASET \