otel: add tailsamplingprocessor to collector build (#42444)

Co-authored-by: William Bezuidenhout <william.bezuidenhout@sourcegraph.com>
This commit is contained in:
Jean-Hadrien Chabran 2022-10-18 12:23:21 +02:00 committed by GitHub
parent 9c0b451195
commit 92d7101180
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 100 additions and 2 deletions

View File

@ -36,4 +36,6 @@ extensions:
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckextension v$OTEL_COLLECTOR_VERSION
processors:
# Contrib extensions - https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor v$OTEL_COLLECTOR_VERSION
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/tailsamplingprocessor v$OTEL_COLLECTOR_VERSION

View File

@ -0,0 +1,59 @@
# Export traces to a Jaeger instance, with tail sampling configured.
#
# Variables:
#
# - $JAEGER_HOST
receivers:
otlp:
protocols:
grpc: # port 4317
http: # port 4318
exporters:
jaeger:
# Default Jaeger GRPC server
endpoint: "$JAEGER_HOST:14250"
tls:
insecure: true
extensions:
health_check:
port: 13133
zpages:
endpoint: ":55679"
service:
extensions: [health_check,zpages]
pipelines:
traces:
receivers: [otlp]
processors: [tail_sampling]
exporters: [jaeger]
processors:
tail_sampling:
# Wait time since the first span of a trace before making a sampling decision
decision_wait: 30s # default value = 30s
# Number of traces kept in memory
num_traces: 50000 # default value = 50000
# Expected number of new traces (helps in allocating data structures)
expected_new_traces_per_sec: 10 # default value = 0
# Recommended reading to understand how the policies are applied:
# https://sourcegraph.com/github.com/open-telemetry/opentelemetry-collector-contrib@71dd19d2e59cd1f8aa9844461089d5c17efaa0ca/-/blob/processor/tailsamplingprocessor/processor.go?L214
policies:
[
{
# If a span contains `sampling_retain: true`, it will always be sampled (not dropped),
# regardless of the probabilistic sampling.
name: policy-retain,
type: string_attribute,
string_attribute: {key: sampling.retain, values: ['true']},
},
{
# Only keep 10% of the traces.
name: policy-probalistic,
type: probabilistic,
probabilistic: {sampling_percentage: 10}
}
]

View File

@ -21,7 +21,7 @@ extensions:
health_check:
port: 13133
zpages:
endpoint: "localhost:55679"
endpoint: ":55679"
service:
extensions: [health_check,zpages]

View File

@ -1,10 +1,14 @@
package instrumentation
import (
"context"
"fmt"
"net/http"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"github.com/sourcegraph/sourcegraph/internal/trace/policy"
)
@ -21,6 +25,7 @@ func HTTPMiddleware(operation string, h http.Handler, opts ...otelhttp.Option) h
instrumentedHandler := otelhttp.NewHandler(h, operation,
append(
[]otelhttp.Option{
otelhttp.WithTracerProvider(&samplingRetainTracerProvider{}),
otelhttp.WithFilter(func(r *http.Request) bool {
return policy.ShouldTrace(r.Context())
}),
@ -48,3 +53,35 @@ func HTTPMiddleware(operation string, h http.Handler, opts ...otelhttp.Option) h
instrumentedHandler.ServeHTTP(w, r.WithContext(policy.WithShouldTrace(r.Context(), trace)))
})
}
// Experimental: it order to mitigate the amount of traces sent by components which are not
// respecting the tracing policy, we can delegate the final decision to the collector,
// and merely indicate that when it's selective or all, we want requests to be retained.
//
// By setting "sampling.retain" attribute on the span, a sampling policy will match on the OTEL Collector
// and explicitly sample (i.e keep it) the present trace.
//
// To achieve that, it shims the default TracerProvider with samplingRetainTracerProvider to inject
// the attribute at the beginning of the span, which is mandatory to perform sampling.
type samplingRetainTracerProvider struct{}
type samplingRetainTracer struct {
tracer trace.Tracer
}
func (p *samplingRetainTracerProvider) Tracer(instrumentationName string, opts ...trace.TracerOption) trace.Tracer {
return &samplingRetainTracer{tracer: otel.GetTracerProvider().Tracer(instrumentationName, opts...)}
}
// samplingRetainKey is the attribute key used to mark as span as to be retained.
var samplingRetainKey = "sampling.retain"
// Start will only inject the attribute if this trace has been explictly asked to be traced.
func (t *samplingRetainTracer) Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
if policy.ShouldTrace(ctx) {
attrOpts := []trace.SpanStartOption{
trace.WithAttributes(attribute.String(samplingRetainKey, "true")),
}
return t.tracer.Start(ctx, spanName, append(attrOpts, opts...)...)
}
return t.tracer.Start(ctx, spanName, opts...)
}

View File

@ -729,7 +729,7 @@ commands:
docker container rm otel-collector
docker run --rm --name=otel-collector $DOCKER_NET $DOCKER_ARGS \
-p 4317:4317 -p 4318:4318 -p 55679:55679 \
-p 4317:4317 -p 4318:4318 -p 55679:55679 -p 55670:55670 \
-e JAEGER_HOST=$JAEGER_HOST \
-e HONEYCOMB_API_KEY=$HONEYCOMB_API_KEY \
-e HONEYCOMB_DATASET=$HONEYCOMB_DATASET \