diff --git a/CHANGELOG.md b/CHANGELOG.md index cd5778a5474..8196266879e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ All notable changes to Sourcegraph are documented in this file. ### Added -- +- Added experimental support for exporting traces to an OpenTelemetry collector with `"observability.tracing": { "type": "opentelemetry" }` [#37984](https://github.com/sourcegraph/sourcegraph/pull/37984) ### Changed diff --git a/client/search-ui/src/results/progress/StreamingProgressCount.tsx b/client/search-ui/src/results/progress/StreamingProgressCount.tsx index 67191d01af1..3827aff7c11 100644 --- a/client/search-ui/src/results/progress/StreamingProgressCount.tsx +++ b/client/search-ui/src/results/progress/StreamingProgressCount.tsx @@ -80,7 +80,7 @@ export const StreamingProgressCount: React.FunctionComponent< )} {showTrace && progress.trace && ( - + View trace diff --git a/client/search-ui/src/results/progress/__snapshots__/StreamingProgressCount.test.tsx.snap b/client/search-ui/src/results/progress/__snapshots__/StreamingProgressCount.test.tsx.snap index edd3b56cf37..c64acae552e 100644 --- a/client/search-ui/src/results/progress/__snapshots__/StreamingProgressCount.test.tsx.snap +++ b/client/search-ui/src/results/progress/__snapshots__/StreamingProgressCount.test.tsx.snap @@ -297,7 +297,7 @@ exports[`StreamingProgressCount should render correctly when a trace url is prov NOTE: For how to *use* Sourcegraph's observability and an overview of our observability features, refer to the [observability for site administrators documentation](../../admin/observability/index.md). diff --git a/doc/dev/how-to/otel_local_dev.md b/doc/dev/how-to/otel_local_dev.md new file mode 100644 index 00000000000..d4f280fc09b --- /dev/null +++ b/doc/dev/how-to/otel_local_dev.md @@ -0,0 +1,11 @@ +# Set up local Sourcegraph OpenTelemetry development + +> WARNING: OpenTelemetry support is a work in progress, and so are these docs! + +## Tracing + +1. Set `dev-private` site config to use `opentelemetry` +2. `sg start otel` -> runs `otel-collector` and `jaeger` +3. `sg start` +4. Run a complex query with `&trace=1`, e.g. [`foobar(...) patterntype:structural`](https://sourcegraph.test:3443/search?q=context%3Aglobal+foobar%28...%29&patternType=structural&trace=1) +5. Click `View trace` diff --git a/doc/dev/index.md b/doc/dev/index.md index 4269c0332a8..90bee734449 100644 --- a/doc/dev/index.md +++ b/doc/dev/index.md @@ -164,6 +164,8 @@ Guides to help with troubleshooting, configuring test instances, debugging, and - [How to add logging](how-to/add_logging.md) - [How to find monitoring](how-to/find_monitoring.md) - [How to add monitoring](how-to/add_monitoring.md) +- [Set up local monitoring development](how-to/monitoring_local_dev.md) +- [Set up local OpenTelemetry development](how-to/otel_local_dev.md) ### Documentation diff --git a/enterprise/internal/insights/background/historical_enqueuer.go b/enterprise/internal/insights/background/historical_enqueuer.go index 3218ef2dac9..00a10b1ff00 100644 --- a/enterprise/internal/insights/background/historical_enqueuer.go +++ b/enterprise/internal/insights/background/historical_enqueuer.go @@ -505,7 +505,7 @@ func (a *backfillAnalyzer) buildForRepo(ctx context.Context, definitions []itype } span.Finish() }() - traceId := trace.IDFromSpan(span) + traceId := trace.Context(ctx) // We are encountering a problem where it seems repositories go missing, so this is overly-noisy logging to try and get a complete picture log15.Info("[historical_enqueuer_backfill] buildForRepo start", "repo_id", id, "repo_name", repoName, "traceId", traceId) diff --git a/go.mod b/go.mod index 65287a03a4d..2164407e1aa 100644 --- a/go.mod +++ b/go.mod @@ -164,6 +164,12 @@ require ( github.com/xeonx/timeago v1.0.0-rc4 github.com/yuin/gopher-lua v0.0.0-20210529063254-f4c35e4016d9 go.etcd.io/bbolt v1.3.6 + go.opentelemetry.io/contrib/propagators/ot v1.7.0 + go.opentelemetry.io/otel v1.7.0 + go.opentelemetry.io/otel/bridge/opentracing v1.7.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 + go.opentelemetry.io/otel/sdk v1.7.0 + go.opentelemetry.io/otel/trace v1.7.0 go.uber.org/atomic v1.9.0 go.uber.org/automaxprocs v1.5.1 go.uber.org/ratelimit v0.2.0 @@ -190,6 +196,15 @@ require ( sigs.k8s.io/yaml v1.3.0 ) +require ( + github.com/cenkalti/backoff/v4 v4.1.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 // indirect + go.opentelemetry.io/proto/otlp v0.16.0 // indirect +) + require ( bitbucket.org/creachadair/shell v0.0.7 // indirect cloud.google.com/go v0.101.0 // indirect @@ -390,7 +405,7 @@ require ( golang.org/x/text v0.3.7 // indirect golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/grpc v1.46.0 // indirect + google.golang.org/grpc v1.46.0 gopkg.in/DataDog/dd-trace-go.v1 v1.38.1 // indirect gopkg.in/alexcesaro/statsd.v2 v2.0.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index d79ceabb44e..68693d592a1 100644 --- a/go.sum +++ b/go.sum @@ -415,6 +415,8 @@ github.com/cenkalti/backoff v1.1.1-0.20171020064038-309aa717adbf/go.mod h1:90ReR github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= +github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4= +github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/census-instrumentation/opencensus-proto v0.2.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/certifi/gocertifi v0.0.0-20210507211836-431795d63e8d h1:S2NE3iHSwP0XV47EEXL8mWmRdEfGscSJ+7EgePNgt0s= @@ -829,8 +831,11 @@ github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KE github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8= github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= @@ -1306,6 +1311,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t github.com/grpc-ecosystem/grpc-gateway v1.9.2/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645/go.mod h1:6iZfnjpejD4L/4DwD7NryNaJyCQdzwWwH2MWhCA90Kw= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= github.com/hanwen/go-fuse v1.0.0/go.mod h1:unqXarDXqzAk0rt98O2tVndEPIpUgLD9+rwFisZH3Ok= @@ -2227,6 +2234,7 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stripe/stripe-go v70.15.0+incompatible h1:hNML7M1zx8RgtepEMlxyu/FpVPrP7KZm1gPFQquJQvM= @@ -2442,19 +2450,37 @@ go.opentelemetry.io/contrib v0.21.0/go.mod h1:EH4yDYeNoaTqn/8yCWQmfNB78VHfGX2Jt2 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.21.0/go.mod h1:Vm5u/mtkj1OMhtao0v+BGo2LUoLCgHYXvRmj0jWITlE= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.21.0/go.mod h1:a9cocRplhIBkUAJmak+BPDx+LVL7cTmqUPB0uBcTA4k= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.21.0/go.mod h1:JQAtechjxLEL81EjmbRwxBq/XEzGaHcsPuDHAx54hg4= +go.opentelemetry.io/contrib/propagators/ot v1.7.0 h1:KPPToDRxyY/HI3qD4RqwWRbaQ65RIpF8uKWDqWkFHDA= +go.opentelemetry.io/contrib/propagators/ot v1.7.0/go.mod h1:5qxBZR730yb71uXc3bazxt2Si8o8LQK3iJTnSLca/BU= go.opentelemetry.io/otel v0.11.0/go.mod h1:G8UCk+KooF2HLkgo8RHX9epABH/aRGYET7gQOqBVdB0= go.opentelemetry.io/otel v1.0.0-RC1/go.mod h1:x9tRa9HK4hSSq7jf2TKbqFbtt58/TGk0f9XiEYISI1I= +go.opentelemetry.io/otel v1.7.0 h1:Z2lA3Tdch0iDcrhJXDIlC94XE+bxok1F9B+4Lz/lGsM= +go.opentelemetry.io/otel v1.7.0/go.mod h1:5BdUoMIz5WEs0vt0CUEMtSSaTSHBBVwrhnz7+nrD5xk= +go.opentelemetry.io/otel/bridge/opentracing v1.7.0 h1:eNKHKfoez0+vGdJiatcvRrA3kO4GRPOm8hbTe0zGfCA= +go.opentelemetry.io/otel/bridge/opentracing v1.7.0/go.mod h1:JUzUxkMgJUc9QjHk4R+6na0LRq6TuQivCodD2LX1vH8= go.opentelemetry.io/otel/exporters/jaeger v1.0.0-RC1/go.mod h1:FXJnjGCoTQL6nQ8OpFJ0JI1DrdOvMoVx49ic0Hg4+D4= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 h1:7Yxsak1q4XrJ5y7XBnNwqWx9amMZvoidCctv62XOQ6Y= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0/go.mod h1:M1hVZHNxcbkAlcvrOMlpQ4YOO3Awf+4N2dxkZL3xm04= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.0.0-RC1/go.mod h1:FliQjImlo7emZVjixV8nbDMAa4iAkcWTE9zzSEOiEPw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 h1:cMDtmgJ5FpRvqx9x2Aq+Mm0O6K/zcUkH73SFz20TuBw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0/go.mod h1:ceUgdyfNv4h4gLxHR0WNfDiiVmZFodZhZSbOLhpxqXE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.0.0-RC1/go.mod h1:cDwRc2Jrh5Gku1peGK8p9rRuX/Uq2OtVmLicjlw2WYU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 h1:MFAyzUPrTwLOwCi+cltN0ZVyy4phU41lwH+lyMyQTS4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0/go.mod h1:E+/KKhwOSw8yoPxSSuUHG6vKppkvhN+S1Jc7Nib3k3o= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.0.0-RC1/go.mod h1:OYKzEoxgXFvehW7X12WYT4/a2BlASJK9l7RtG4A91fg= go.opentelemetry.io/otel/internal/metric v0.21.0/go.mod h1:iOfAaY2YycsXfYD4kaRSbLx2LKmfpKObWBEv9QK5zFo= go.opentelemetry.io/otel/metric v0.21.0/go.mod h1:JWCt1bjivC4iCrz/aCrM1GSw+ZcvY44KCbaeeRhzHnc= go.opentelemetry.io/otel/oteltest v1.0.0-RC1/go.mod h1:+eoIG0gdEOaPNftuy1YScLr1Gb4mL/9lpDkZ0JjMRq4= go.opentelemetry.io/otel/sdk v1.0.0-RC1/go.mod h1:kj6yPn7Pgt5ByRuwesbaWcRLA+V7BSDg3Hf8xRvsvf8= +go.opentelemetry.io/otel/sdk v1.7.0 h1:4OmStpcKVOfvDOgCt7UriAPtKolwIhxpnSNI/yK+1B0= +go.opentelemetry.io/otel/sdk v1.7.0/go.mod h1:uTEOTwaqIVuTGiJN7ii13Ibp75wJmYUDe374q6cZwUU= go.opentelemetry.io/otel/trace v1.0.0-RC1/go.mod h1:86UHmyHWFEtWjfWPSbu0+d0Pf9Q6e1U+3ViBOc+NXAg= +go.opentelemetry.io/otel/trace v1.7.0 h1:O37Iogk1lEkMRXewVtZ1BBTVn5JEp8GrJvP92bJqC6o= +go.opentelemetry.io/otel/trace v1.7.0/go.mod h1:fzLSB9nqR2eXzxPXb2JW9IKE+ScyXA48yyE4TNvoHqU= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.9.0/go.mod h1:1vKfU9rv61e9EVGthD1zNvUbiwPcimSsOPU9brfSHJg= +go.opentelemetry.io/proto/otlp v0.16.0 h1:WHzDWdXUvbc5bG2ObdrGfaNpQz7ft7QN9HHmJlbiB1E= +go.opentelemetry.io/proto/otlp v0.16.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5/go.mod h1:nmDLcffg48OtT/PSW0Hg7FvpRQsQh5OSqIylirxKC7o= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -2466,8 +2492,9 @@ go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/automaxprocs v1.5.1 h1:e1YG66Lrk73dn4qhg8WFSvhF0JuFQF0ERIp4rpuV8Qk= go.uber.org/automaxprocs v1.5.1/go.mod h1:BF4eumQw0P9GtnuxxovUd06vwm1o18oMzFtK66vU6XU= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= -go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= +go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= @@ -2794,6 +2821,7 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210502180810-71e4cd670f79/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -3178,6 +3206,7 @@ google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnD google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= +google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= google.golang.org/grpc v1.46.0 h1:oCjezcn6g6A75TGoKYBPgKmVBLexhYLM6MebdrPApP8= diff --git a/internal/search/backend/metered_searcher.go b/internal/search/backend/metered_searcher.go index b71f194ffba..8f92af54f0a 100644 --- a/internal/search/backend/metered_searcher.go +++ b/internal/search/backend/metered_searcher.go @@ -7,12 +7,12 @@ import ( "github.com/google/zoekt" "github.com/google/zoekt/query" - "github.com/inconshreveable/log15" "github.com/keegancsmith/rpc" "github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + sglog "github.com/sourcegraph/log" "github.com/sourcegraph/sourcegraph/internal/honey" "github.com/sourcegraph/sourcegraph/internal/trace" @@ -30,12 +30,14 @@ type meteredSearcher struct { zoekt.Streamer hostname string + log sglog.Logger } func NewMeteredSearcher(hostname string, z zoekt.Streamer) zoekt.Streamer { return &meteredSearcher{ Streamer: z, hostname: hostname, + log: sglog.Scoped("meteredSearcher", "wraps zoekt.Streamer with observability"), } } @@ -101,7 +103,7 @@ func (m *meteredSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zoe newOpts.SpanContext = spanContext opts = &newOpts } else { - log15.Warn("meteredSearcher: Error injecting new span context into map: %s", err) + m.log.Warn("error injecting new span context into map", sglog.Error(err)) } } diff --git a/internal/trace/context.go b/internal/trace/context.go index e1d389c2383..e173fbc5a86 100644 --- a/internal/trace/context.go +++ b/internal/trace/context.go @@ -6,6 +6,7 @@ import ( "github.com/opentracing/opentracing-go" "github.com/sourcegraph/log/otfields" "github.com/uber/jaeger-client-go" + oteltrace "go.opentelemetry.io/otel/trace" "github.com/sourcegraph/sourcegraph/internal/trace/policy" ) @@ -45,20 +46,11 @@ func CopyContext(ctx context.Context, from context.Context) context.Context { // ID returns a trace ID, if any, found in the given context. If you need both trace and // span ID, use trace.Context. func ID(ctx context.Context) string { - span := opentracing.SpanFromContext(ctx) + span := Context(ctx) if span == nil { return "" } - return IDFromSpan(span) -} - -// IDFromSpan returns a trace ID, if any, found in the given span. -func IDFromSpan(span opentracing.Span) string { - traceCtx := ContextFromSpan(span) - if traceCtx == nil { - return "" - } - return traceCtx.TraceID + return span.TraceID } // Context retrieves the full trace context, if any, from context - this includes @@ -68,19 +60,23 @@ func Context(ctx context.Context) *otfields.TraceContext { if span == nil { return nil } - return ContextFromSpan(span) -} -// Context retrieves the full trace context, if any, from the span - this includes -// both TraceID and SpanID. -func ContextFromSpan(span opentracing.Span) *otfields.TraceContext { - spanCtx, ok := span.Context().(jaeger.SpanContext) - if ok { + // try Jaeger ("opentracing") span + if jaegerSpan, ok := span.Context().(jaeger.SpanContext); ok { return &otfields.TraceContext{ - TraceID: spanCtx.TraceID().String(), - SpanID: spanCtx.SpanID().String(), + TraceID: jaegerSpan.TraceID().String(), + SpanID: jaegerSpan.SpanID().String(), } } + // try bridged OpenTelemetry span + if otelSpan := oteltrace.SpanFromContext(ctx).SpanContext(); otelSpan.IsValid() { + return &otfields.TraceContext{ + TraceID: otelSpan.TraceID().String(), + SpanID: otelSpan.SpanID().String(), + } + } + + // no span found return nil } diff --git a/internal/trace/httptrace.go b/internal/trace/httptrace.go index b3ff2ea1ac3..4bd35bb6027 100644 --- a/internal/trace/httptrace.go +++ b/internal/trace/httptrace.go @@ -158,7 +158,7 @@ func HTTPMiddleware(logger log.Logger, next http.Handler, siteConfig conftypes.S defer span.Finish() // get trace ID - trace := ContextFromSpan(span) + trace := Context(ctx) var traceURL string if trace != nil && trace.TraceID != "" { var traceType string diff --git a/internal/trace/logger.go b/internal/trace/logger.go index 0b7482377cb..1e4ff67894e 100644 --- a/internal/trace/logger.go +++ b/internal/trace/logger.go @@ -17,7 +17,7 @@ func Logger(ctx context.Context, l log.Logger) log.Logger { if t.family != "" { l = l.Scoped(t.family, "trace family") } - if tc := ContextFromSpan(t.span); tc != nil { + if tc := Context(ctx); tc != nil { l = l.WithTrace(*tc) } } diff --git a/internal/tracer/jaeger.go b/internal/tracer/jaeger.go new file mode 100644 index 00000000000..4c2fb4c7f06 --- /dev/null +++ b/internal/tracer/jaeger.go @@ -0,0 +1,53 @@ +package tracer + +import ( + "fmt" + "io" + "reflect" + + "github.com/opentracing/opentracing-go" + "github.com/sourcegraph/log" + "github.com/uber/jaeger-client-go" + jaegercfg "github.com/uber/jaeger-client-go/config" + jaegermetrics "github.com/uber/jaeger-lib/metrics" + + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +// newJaegerTracer creates an Jaeger tracer that serves as the underlying default tracer +// when using opentracing. +func newJaegerTracer(logger log.Logger, opts *options) (opentracing.Tracer, io.Closer, error) { + cfg, err := jaegercfg.FromEnv() + cfg.ServiceName = opts.resource.Name + if err != nil { + return nil, nil, errors.Wrap(err, "jaegercfg.FromEnv failed") + } + cfg.Tags = append(cfg.Tags, + opentracing.Tag{Key: "service.version", Value: opts.resource.Version}, + opentracing.Tag{Key: "service.env", Value: opts.resource.Namespace}) + if reflect.DeepEqual(cfg.Sampler, &jaegercfg.SamplerConfig{}) { + // Default sampler configuration for when it is not specified via + // JAEGER_SAMPLER_* env vars. In most cases, this is sufficient + // enough to connect Sourcegraph to Jaeger without any env vars. + cfg.Sampler.Type = jaeger.SamplerTypeConst + cfg.Sampler.Param = 1 + } + tracer, closer, err := cfg.NewTracer( + jaegercfg.Logger(jaegerLoggerShim{logger: logger.Scoped("jaeger", "Jaeger tracer")}), + jaegercfg.Metrics(jaegermetrics.NullFactory), + ) + if err != nil { + return nil, nil, errors.Wrap(err, "jaegercfg.NewTracer failed") + } + return tracer, closer, err +} + +type jaegerLoggerShim struct { + logger log.Logger +} + +func (l jaegerLoggerShim) Error(msg string) { l.logger.Error(msg) } + +func (l jaegerLoggerShim) Infof(msg string, args ...any) { + l.logger.Info(fmt.Sprintf(msg, args...)) +} diff --git a/internal/tracer/otel.go b/internal/tracer/otel.go new file mode 100644 index 00000000000..d9e47f045f9 --- /dev/null +++ b/internal/tracer/otel.go @@ -0,0 +1,187 @@ +package tracer + +import ( + "context" + "io" + "time" + + "github.com/opentracing/opentracing-go" + "github.com/sourcegraph/log" + otpropagator "go.opentelemetry.io/contrib/propagators/ot" + "go.opentelemetry.io/otel" + otelbridge "go.opentelemetry.io/otel/bridge/opentracing" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + oteltracesdk "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + oteltrace "go.opentelemetry.io/otel/trace" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + "github.com/sourcegraph/sourcegraph/internal/env" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +// If the OpenTelemetry Collector is running on a local cluster (minikube or +// microk8s), it should be accessible through the NodePort service at the +// `localhost:30080` endpoint. Otherwise, replace `localhost` with the +// endpoint of your cluster. If you run the app inside k8s, then you can +// probably connect directly to the service through dns +// +// OTEL_EXPORTER_OTLP_ENDPOINT is the name chosen because it is used in other +// projects: https://sourcegraph.com/search?q=OTEL_EXPORTER_OTLP_ENDPOINT+-f:vendor +var otelCollectorEndpoint = env.Get("OTEL_EXPORTER_OTLP_ENDPOINT", "127.0.0.1:4317", "Address of OpenTelemetry collector") + +// newOTelBridgeTracer creates an opentracing.Tracer that exports all OpenTracing traces +// as OpenTelemetry traces to an OpenTelemetry collector (effectively "bridging" the two +// APIs). This enables us to continue leveraging the OpenTracing API (which is a predecessor +// to OpenTelemetry tracing) without making changes to existing tracing code. +func newOTelBridgeTracer(logger log.Logger, opts *options) (opentracing.Tracer, io.Closer, error) { + logger = logger.Scoped("otel", "OpenTelemetry tracer"). + With(log.String("otel-collector.endpoint", otelCollectorEndpoint)) + + // Ensure propagation between services continues to work. This is also done by another + // project that uses the OpenTracing bridge: + // https://sourcegraph.com/github.com/thanos-io/thanos/-/blob/pkg/tracing/migration/bridge.go?L62 + compositePropagator := propagation.NewCompositeTextMapPropagator(otpropagator.OT{}, propagation.TraceContext{}, propagation.Baggage{}) + otel.SetTextMapPropagator(compositePropagator) + + // Initialize OpenTelemetry processor and tracer provider + processor, err := newOTelCollectorExporter(context.Background(), logger, opts.debug) + if err != nil { + return nil, nil, err + } + provider := oteltracesdk.NewTracerProvider( + oteltracesdk.WithResource(newResource(opts.resource)), + oteltracesdk.WithSampler(oteltracesdk.AlwaysSample()), + oteltracesdk.WithSpanProcessor(processor), + ) + + // Set up bridge for converting opentracing API calls to OpenTelemetry. + bridge, otelTracerProvider := otelbridge.NewTracerPair(provider.Tracer("tracer.global")) + bridge.SetTextMapPropagator(propagation.TraceContext{}) + + // Set OTel provider globally - this gets unset by otelBridgeCloser + otel.SetTracerProvider(otelTracerProvider) + + // Set up logging + otelLogger := logger.AddCallerSkip(1) // no additional scope needed, this is already otel scope + otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { otelLogger.Warn("error encountered", log.Error(err)) })) + bridgeLogger := logger.AddCallerSkip(1).Scoped("bridge", "OpenTracing to OpenTelemetry compatibility layer") + bridge.SetWarningHandler(func(msg string) { bridgeLogger.Debug(msg) }) + + // Done + return &otelBridgeTracer{bridge}, &otelBridgeCloser{provider}, nil +} + +// newOTelCollectorExporter creates a processor that exports spans to an OpenTelemetry +// collector. +func newOTelCollectorExporter(ctx context.Context, logger log.Logger, debug bool) (oteltracesdk.SpanProcessor, error) { + conn, err := grpc.DialContext(ctx, otelCollectorEndpoint, + grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, errors.Wrap(err, "failed to create gRPC connection to collector") + } + + // Set up a trace exporter + traceExporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithGRPCConn(conn)) + if err != nil { + return nil, errors.Wrap(err, "failed to create trace exporter") + } + + // If in debug mode, we use a synchronous span processor to force spans to get pushed + // immediately. + if debug { + logger.Warn("using synchronous span processor - disable 'observability.debug' to use something more suitable for production") + return oteltracesdk.NewSimpleSpanProcessor(traceExporter), nil + } + return oteltracesdk.NewBatchSpanProcessor(traceExporter), nil +} + +// otelBridgeCloser shuts down the wrapped TracerProvider, and unsets the global OTel +// trace provider. +type otelBridgeCloser struct{ *oteltracesdk.TracerProvider } + +var _ io.Closer = &otelBridgeCloser{} + +func (p otelBridgeCloser) Close() error { + // unset the global provider + otel.SetTracerProvider(oteltrace.NewNoopTracerProvider()) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + return p.Shutdown(ctx) +} + +// otelBridgeTracer wraps bridge.BridgeTracer with extended Inject/Extract support for +// opentracing.TextMap which is used in the codebase and similar carriers. It is adapted +// from the 'thanos-io/thanos' project +// https://sourcegraph.com/github.com/thanos-io/thanos@4de555db87d38d69b78602c1e1d0fb8ed6e0371b/-/blob/pkg/tracing/migration/bridge.go?L76-88#tab=references +// but behaves differently, disregarding the provided format if we decide to override it. +// +// The main issue is that bridge.BridgeTracer currently supports injection / +// extraction of only single carrier type which is opentracing.HTTPHeadersCarrier. See: +// +// - https://github.com/open-telemetry/opentelemetry-go/blob/c2dc940e0b48e61712e4f8f6f2320d8fd4c9aac6/bridge/opentracing/bridge.go#L634-L638 +// - https://github.com/open-telemetry/opentelemetry-go/blob/c2dc940e0b48e61712e4f8f6f2320d8fd4c9aac6/bridge/opentracing/bridge.go#L664-L668 +type otelBridgeTracer struct{ bridge *otelbridge.BridgeTracer } + +var _ opentracing.Tracer = &otelBridgeTracer{} + +func (b *otelBridgeTracer) StartSpan(operationName string, opts ...opentracing.StartSpanOption) opentracing.Span { + return b.bridge.StartSpan(operationName, opts...) +} + +func (b *otelBridgeTracer) Inject(span opentracing.SpanContext, format interface{}, carrier interface{}) error { + // Inject into a blank HTTPHeaders carrier first - we use this as a source for our + // wrapped Inject implementation. + otCarrier := opentracing.HTTPHeadersCarrier{} + err := b.bridge.Inject(span, opentracing.HTTPHeaders, otCarrier) + if err != nil { + return err + } + + // Regardless of format, inject context into the TextMapWriter if there is one. If we + // do this, there is no need to pass this on to the underlying Inject implemenation + if tmw, ok := carrier.(opentracing.TextMapWriter); ok { + return otCarrier.ForeachKey(func(key, val string) error { + tmw.Set(key, val) + return nil + }) + } + + // If we are receiving some other non-TextMapWriter type, pass it on and hope for the + // best. + return b.bridge.Inject(span, format, carrier) +} + +func (b *otelBridgeTracer) Extract(format interface{}, carrier interface{}) (opentracing.SpanContext, error) { + // Regardless of format, extract TextMapReader content into an HTTPHeadersCarrier + if tmr, ok := carrier.(opentracing.TextMapReader); ok { + otCarrier := opentracing.HTTPHeadersCarrier{} + err := tmr.ForeachKey(func(key, val string) error { + otCarrier.Set(key, val) + return nil + }) + if err != nil { + return nil, err + } + + return b.bridge.Extract(opentracing.HTTPHeaders, otCarrier) + } + + return b.bridge.Extract(format, carrier) +} + +// newResource adapts sourcegraph/log.Resource into the OpenTelemetry package's Resource +// type. +func newResource(r log.Resource) *resource.Resource { + return resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(r.Name), + semconv.ServiceNamespaceKey.String(r.Namespace), + semconv.ServiceInstanceIDKey.String(r.InstanceID), + semconv.ServiceVersionKey.String(r.Version)) +} diff --git a/internal/tracer/switchable.go b/internal/tracer/switchable.go new file mode 100644 index 00000000000..49594749f28 --- /dev/null +++ b/internal/tracer/switchable.go @@ -0,0 +1,81 @@ +package tracer + +import ( + "fmt" + "io" + "sync" + + "github.com/opentracing/opentracing-go" + "github.com/sourcegraph/log" +) + +// switchableTracer implements opentracing.Tracer, and is used to configure the global +// tracer implementations. The underlying opentracer used is switchable (set via the `set` +// method), so as to support live configuration. +type switchableTracer struct { + mu sync.RWMutex + tracer opentracing.Tracer + tracerCloser io.Closer + + log bool + logger log.Logger +} + +var _ opentracing.Tracer = &switchableTracer{} + +func newSwitchableTracer(logger log.Logger) *switchableTracer { + var t opentracing.NoopTracer + return &switchableTracer{ + tracer: t, + logger: logger.With(log.String("tracer", fmt.Sprintf("%T", t))).AddCallerSkip(1), + } +} + +func (t *switchableTracer) StartSpan(operationName string, opts ...opentracing.StartSpanOption) opentracing.Span { + t.mu.RLock() + defer t.mu.RUnlock() + if t.log { + t.logger.Info("StartSpan", + log.String("operationName", operationName)) + } + return t.tracer.StartSpan(operationName, opts...) +} + +func (t *switchableTracer) Inject(sm opentracing.SpanContext, format any, carrier any) error { + t.mu.RLock() + defer t.mu.RUnlock() + if t.log { + t.logger.Info("Inject") + } + return t.tracer.Inject(sm, format, carrier) +} + +func (t *switchableTracer) Extract(format any, carrier any) (opentracing.SpanContext, error) { + t.mu.RLock() + defer t.mu.RUnlock() + if t.log { + t.logger.Info("Extract") + } + return t.tracer.Extract(format, carrier) +} + +func (t *switchableTracer) set( + logger log.Logger, + tracer opentracing.Tracer, + tracerCloser io.Closer, + shouldLog bool, +) { + t.mu.Lock() + defer t.mu.Unlock() + if tc := t.tracerCloser; tc != nil { + // Close the old tracerCloser outside the critical zone + go tc.Close() + } + + t.tracerCloser = tracerCloser + t.tracer = tracer + t.log = shouldLog + t.logger = logger.With(log.String("tracer", fmt.Sprintf("%T", tracer))).AddCallerSkip(1) + + t.logger.Info("tracer set") +} diff --git a/internal/tracer/tracer.go b/internal/tracer/tracer.go index af7ac519ae4..fb41f884644 100644 --- a/internal/tracer/tracer.go +++ b/internal/tracer/tracer.go @@ -1,24 +1,17 @@ package tracer import ( - "fmt" "io" - "reflect" - "sync" - "github.com/inconshreveable/log15" "github.com/opentracing/opentracing-go" "github.com/sourcegraph/log" - "github.com/uber/jaeger-client-go" - jaegercfg "github.com/uber/jaeger-client-go/config" - jaegermetrics "github.com/uber/jaeger-lib/metrics" "go.uber.org/automaxprocs/maxprocs" "github.com/sourcegraph/sourcegraph/internal/conf/conftypes" "github.com/sourcegraph/sourcegraph/internal/env" + "github.com/sourcegraph/sourcegraph/internal/hostname" "github.com/sourcegraph/sourcegraph/internal/trace/policy" "github.com/sourcegraph/sourcegraph/internal/version" - "github.com/sourcegraph/sourcegraph/lib/errors" ) // options control the behavior of a TracerType @@ -27,23 +20,22 @@ type options struct { externalURL string debug bool // these values are not configurable by site config - serviceName string - version string - env string + resource log.Resource } type TracerType string const ( - None TracerType = "none" - OpenTracing TracerType = "opentracing" + None TracerType = "none" + OpenTracing TracerType = "opentracing" + OpenTelemetry TracerType = "opentelemetry" ) // isSetByUser returns true if the TracerType is one supported by the schema // should be kept in sync with ObservabilityTracing.Type in schema/site.schema.json func (t TracerType) isSetByUser() bool { switch t { - case OpenTracing: + case OpenTracing, OpenTelemetry: return true } return false @@ -61,14 +53,19 @@ func Init(logger log.Logger, c conftypes.WatchableSiteConfig) { logger.Error("automaxprocs failed", log.Error(err)) } - opts := &options{} - opts.serviceName = env.MyName - if version.IsDev(version.Version()) { - opts.env = "dev" + // Resource mirrors the initialization used by our OpenTelemetry logger. + resource := log.Resource{ + Name: env.MyName, + Version: version.Version(), + InstanceID: hostname.Get(), } - opts.version = version.Version() - initTracer(logger, opts, c) + // Additionally set a dev namespace + if version.IsDev(version.Version()) { + resource.Namespace = "dev" + } + + initTracer(logger, &options{resource: resource}, c) } // initTracer is a helper that should be called exactly once (from Init). @@ -76,15 +73,10 @@ func initTracer(logger log.Logger, opts *options, c conftypes.WatchableSiteConfi globalTracer := newSwitchableTracer(logger.Scoped("global", "the global tracer")) opentracing.SetGlobalTracer(globalTracer) - // initial tracks if it's our first run of conf.Watch. This is used to - // prevent logging "changes" when it's the first run. - initial := true - - // Initially everything is disabled since we haven't read conf yet. + // Initially everything is disabled since we haven't read conf yet. This variable is + // also updated to compare against new version of configuration. oldOpts := options{ - serviceName: opts.serviceName, - version: opts.version, - env: opts.env, + resource: opts.resource, // the values below may change TracerType: None, debug: false, @@ -93,163 +85,77 @@ func initTracer(logger log.Logger, opts *options, c conftypes.WatchableSiteConfi // Watch loop go c.Watch(func() { - siteConfig := c.SiteConfig() + var ( + siteConfig = c.SiteConfig() + debug = false + setTracer = None + ) - samplingStrategy := policy.TraceNone - shouldLog := false - setTracer := None if tracingConfig := siteConfig.ObservabilityTracing; tracingConfig != nil { - switch tracingConfig.Sampling { - case "all": - samplingStrategy = policy.TraceAll - setTracer = OpenTracing - case "selective": - samplingStrategy = policy.TraceSelective - setTracer = OpenTracing + debug = tracingConfig.Debug + + // If sampling policy is set, update the strategy and set our tracer to be + // OpenTracing by default. + previousPolicy := policy.GetTracePolicy() + switch p := policy.TracePolicy(tracingConfig.Sampling); p { + case policy.TraceAll, policy.TraceSelective: + policy.SetTracePolicy(p) + setTracer = OpenTracing // enable the defualt tracer type + default: + policy.SetTracePolicy(policy.TraceNone) } + if newPolicy := policy.GetTracePolicy(); newPolicy != previousPolicy { + logger.Info("updating TracePolicy", + log.String("oldValue", string(previousPolicy)), + log.String("newValue", string(newPolicy))) + } + + // If the tracer type is configured, also set the tracer type if t := TracerType(tracingConfig.Type); t.isSetByUser() { setTracer = t } - shouldLog = tracingConfig.Debug } - if tracePolicy := policy.GetTracePolicy(); tracePolicy != samplingStrategy && !initial { - log15.Info("opentracing: TracePolicy", "oldValue", tracePolicy, "newValue", samplingStrategy) - } - initial = false - policy.SetTracePolicy(samplingStrategy) opts := options{ - externalURL: siteConfig.ExternalURL, TracerType: setTracer, - debug: shouldLog, - serviceName: opts.serviceName, - version: opts.version, - env: opts.env, + externalURL: siteConfig.ExternalURL, + debug: debug, + // Stays the same + resource: oldOpts.resource, } - if opts == oldOpts { // Nothing changed return } - prevTracer := oldOpts.TracerType + + // update old opts for comparison oldOpts = opts - t, closer, err := newTracer(logger, &opts, prevTracer) + // create the new tracer and assign it globally + tracerLogger := logger.With( + log.String("tracerType", string(opts.TracerType)), + log.Bool("debug", opts.debug)) + t, closer, err := newTracer(tracerLogger, &opts) if err != nil { - logger.Warn("Could not initialize tracer", - log.String("tracer", string(opts.TracerType)), - log.Error(err)) + tracerLogger.Warn("failed to initialize tracer", log.Error(err)) return } - globalTracer.set(t, closer, opts.debug) + globalTracer.set(tracerLogger, t, closer, opts.debug) }) } -// TODO Use openTelemetry https://github.com/sourcegraph/sourcegraph/issues/27386 -func newTracer(logger log.Logger, opts *options, prevTracer TracerType) (opentracing.Tracer, io.Closer, error) { - if opts.TracerType == None { - logger.Info("tracing disabled") +// newTracer creates a tracer based on options +func newTracer(logger log.Logger, opts *options) (opentracing.Tracer, io.Closer, error) { + logger.Debug("configuring tracer") + + switch opts.TracerType { + case OpenTracing: + return newJaegerTracer(logger, opts) + + case OpenTelemetry: + return newOTelBridgeTracer(logger, opts) + + default: return opentracing.NoopTracer{}, nil, nil } - - logger.Info("opentracing: enabled") - cfg, err := jaegercfg.FromEnv() - cfg.ServiceName = opts.serviceName - if err != nil { - return nil, nil, errors.Wrap(err, "jaegercfg.FromEnv failed") - } - cfg.Tags = append(cfg.Tags, opentracing.Tag{Key: "service.version", Value: opts.version}, opentracing.Tag{Key: "service.env", Value: opts.env}) - if reflect.DeepEqual(cfg.Sampler, &jaegercfg.SamplerConfig{}) { - // Default sampler configuration for when it is not specified via - // JAEGER_SAMPLER_* env vars. In most cases, this is sufficient - // enough to connect Sourcegraph to Jaeger without any env vars. - cfg.Sampler.Type = jaeger.SamplerTypeConst - cfg.Sampler.Param = 1 - } - tracer, closer, err := cfg.NewTracer( - jaegercfg.Logger(jaegerLoggerShim{logger: logger.Scoped("jaeger", "Jaeger tracer")}), - jaegercfg.Metrics(jaegermetrics.NullFactory), - ) - if err != nil { - return nil, nil, errors.Wrap(err, "jaegercfg.NewTracer failed") - } - - return tracer, closer, nil -} - -type jaegerLoggerShim struct { - logger log.Logger -} - -func (l jaegerLoggerShim) Error(msg string) { l.logger.Error(msg) } - -func (l jaegerLoggerShim) Infof(msg string, args ...any) { - l.logger.Info(fmt.Sprintf(msg, args...)) -} - -// move to OpenTelemetry https://github.com/sourcegraph/sourcegraph/issues/27386 -// switchableTracer implements opentracing.Tracer. The underlying opentracer used is switchable (set via -// the `set` method). -type switchableTracer struct { - mu sync.RWMutex - opentracer opentracing.Tracer - tracerCloser io.Closer - - log bool - logger log.Logger - parentLogger log.Logger // used to create logger -} - -var _ opentracing.Tracer = &switchableTracer{} - -// move to OpenTelemetry https://github.com/sourcegraph/sourcegraph/issues/27386 -func newSwitchableTracer(logger log.Logger) *switchableTracer { - var t opentracing.NoopTracer - return &switchableTracer{ - opentracer: t, - logger: logger.With(log.String("opentracer", fmt.Sprintf("%T", t))), - parentLogger: logger, - } -} - -func (t *switchableTracer) StartSpan(operationName string, opts ...opentracing.StartSpanOption) opentracing.Span { - t.mu.RLock() - defer t.mu.RUnlock() - if t.log { - t.logger.Info("opentracing: StartSpan", - log.String("operationName", operationName)) - } - return t.opentracer.StartSpan(operationName, opts...) -} - -func (t *switchableTracer) Inject(sm opentracing.SpanContext, format any, carrier any) error { - t.mu.RLock() - defer t.mu.RUnlock() - if t.log { - t.logger.Info("opentracing: Inject") - } - return t.opentracer.Inject(sm, format, carrier) -} - -func (t *switchableTracer) Extract(format any, carrier any) (opentracing.SpanContext, error) { - t.mu.RLock() - defer t.mu.RUnlock() - if t.log { - t.logger.Info("opentracing: Extract") - } - return t.opentracer.Extract(format, carrier) -} - -func (t *switchableTracer) set(tracer opentracing.Tracer, tracerCloser io.Closer, shouldLog bool) { - t.mu.Lock() - defer t.mu.Unlock() - if tc := t.tracerCloser; tc != nil { - // Close the old tracerCloser outside the critical zone - go tc.Close() - } - - t.tracerCloser = tracerCloser - t.opentracer = tracer - t.log = shouldLog - t.logger = t.parentLogger.With(log.String("opentracer", fmt.Sprintf("%T", t))) } diff --git a/internal/workerutil/worker.go b/internal/workerutil/worker.go index 885ad8c27db..ae819abe475 100644 --- a/internal/workerutil/worker.go +++ b/internal/workerutil/worker.go @@ -285,7 +285,7 @@ func (w *Worker) dequeueAndHandle() (dequeued bool, err error) { // Create context and span based on the root context workerSpan, workerCtxWithSpan := ot.StartSpanFromContext(policy.WithShouldTrace(w.rootCtx, true), w.options.Name) handleCtx, cancel := context.WithCancel(workerCtxWithSpan) - processLog := w.options.Metrics.logger.WithTrace(log.TraceContext{TraceID: trace.IDFromSpan(workerSpan)}) + processLog := trace.Logger(workerCtxWithSpan, w.options.Metrics.logger) // Register the record as running so it is included in heartbeat updates. if !w.runningIDSet.Add(record.RecordID(), cancel) { diff --git a/lib/go.mod b/lib/go.mod index ba19f0e1f1c..56527d62e65 100644 --- a/lib/go.mod +++ b/lib/go.mod @@ -24,6 +24,7 @@ require ( github.com/sourcegraph/log v0.0.0-20220704182018-fbd708c153e2 github.com/stretchr/testify v1.7.2 github.com/xeipuuv/gojsonschema v1.2.0 + go.uber.org/atomic v1.9.0 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 @@ -72,7 +73,6 @@ require ( github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect github.com/yuin/goldmark v1.4.4 // indirect github.com/yuin/goldmark-emoji v1.0.1 // indirect - go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.8.0 // indirect go.uber.org/zap v1.21.0 // indirect golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect diff --git a/lib/go.sum b/lib/go.sum index 963dbc46c9d..38b85317800 100644 --- a/lib/go.sum +++ b/lib/go.sum @@ -309,10 +309,6 @@ github.com/sourcegraph/go-diff v0.6.1 h1:hmA1LzxW0n1c3Q4YbrFgg4P99GSnebYa3x8gr0H github.com/sourcegraph/go-diff v0.6.1/go.mod h1:iBszgVvyxdc8SFZ7gm69go2KDdt3ag071iBaWPF6cjs= github.com/sourcegraph/jsonx v0.0.0-20200629203448-1a936bd500cf h1:oAdWFqhStsWiiMP/vkkHiMXqFXzl1XfUNOdxKJbd6bI= github.com/sourcegraph/jsonx v0.0.0-20200629203448-1a936bd500cf/go.mod h1:ppFaPm6kpcHnZGqQTFhUIAQRIEhdQDWP1PCv4/ON354= -github.com/sourcegraph/log v0.0.0-20220621231153-3bee7082c87e h1:7MnFFZ85BBwLNDkrQJB503/znGuSoLirDLFWRcLqHlM= -github.com/sourcegraph/log v0.0.0-20220621231153-3bee7082c87e/go.mod h1:A+9F6IicYvBbl2aT0R81lMraKcXjVfdfw352yPe2yJI= -github.com/sourcegraph/log v0.0.0-20220630091133-9e50e760eceb h1:GSHKGxgY56eKzxw+CdlczrIP78MVjutgTVkuOLyfz2U= -github.com/sourcegraph/log v0.0.0-20220630091133-9e50e760eceb/go.mod h1:A+9F6IicYvBbl2aT0R81lMraKcXjVfdfw352yPe2yJI= github.com/sourcegraph/log v0.0.0-20220704182018-fbd708c153e2 h1:zJfrKH3roMV69m7n06C5y2Xk00HextnxvlG0gnpQolI= github.com/sourcegraph/log v0.0.0-20220704182018-fbd708c153e2/go.mod h1:zWEPlKrWBUVpko/tOgDS+qrp7BmzaCcmUrh9+ver1iQ= github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152 h1:z/MpntplPaW6QW95pzcAR/72Z5TWDyDnSo0EOcyij9o= @@ -461,8 +457,6 @@ golang.org/x/sys v0.0.0-20211102192858-4dd72447c267/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211117180635-dee7805ff2e1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211213223007-03aa0b5f6827/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c h1:aFV+BgZ4svzjfabn8ERpuB4JI4N6/rdy1iusx77G3oU= -golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e h1:CsOuNlbOuf0mzxJIefr6Q4uAUetRUwZE4qt7VfzP+xo= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/schema/schema.go b/schema/schema.go index d44cd092c5f..edbfb4ce2d7 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -1254,11 +1254,11 @@ type ObservabilityAlerts struct { // ObservabilityTracing description: Controls the settings for distributed tracing. type ObservabilityTracing struct { - // Debug description: Turns on debug logging of opentracing client requests. This can be useful for debugging connectivity issues between the tracing client and the Jaeger agent, the performance overhead of tracing, and other issues related to the use of distributed tracing. + // Debug description: Turns on debug logging of tracing client requests. This can be useful for debugging connectivity issues between the tracing client and tracing backend, the performance overhead of tracing, and other issues related to the use of distributed tracing. May have performance implications in production. Debug bool `json:"debug,omitempty"` - // Sampling description: Determines the requests for which distributed traces are recorded. "none" (default) turns off tracing entirely. "selective" sends traces whenever `?trace=1` is present in the URL. "all" sends traces on every request. Note that this only affects the behavior of the distributed tracing client. The Jaeger instance must be running for traces to be collected (as described in the Sourcegraph installation instructions). Additional downsampling can be configured in Jaeger, itself (https://www.jaegertracing.io/docs/1.17/sampling) + // Sampling description: Determines the requests for which distributed traces are recorded. "none" (default) turns off tracing entirely. "selective" sends traces whenever `?trace=1` is present in the URL. "all" sends traces on every request. Note that this only affects the behavior of the distributed tracing client. An appropriate tracing backend must be running for traces to be collected (for "opentracing", a Jaeger instance must be running as described in the Sourcegraph installation instructions). Additional downsampling can be configured in tracing backend (for Jaeger, see https://www.jaegertracing.io/docs/1.17/sampling). Sampling string `json:"sampling,omitempty"` - // Type description: Determines what tracing provider to enable. Supports "opentracing" ("datadog" support is deprecated) + // Type description: Determines what tracing provider to enable. For "opentracing", the required backend is a Jaeger instance. For "opentelemetry" (EXPERIMENTAL), the required backend is a OpenTelemetry collector instance. "datadog" support has been removed, and the configuration option will be removed in a future release. Type string `json:"type,omitempty"` } diff --git a/schema/site.schema.json b/schema/site.schema.json index 902722b70a4..8f40eb3269c 100644 --- a/schema/site.schema.json +++ b/schema/site.schema.json @@ -1100,19 +1100,19 @@ "type": "object", "properties": { "type": { - "description": "Determines what tracing provider to enable. Supports \"opentracing\" (\"datadog\" support is deprecated)", + "description": "Determines what tracing provider to enable. For \"opentracing\", the required backend is a Jaeger instance. For \"opentelemetry\" (EXPERIMENTAL), the required backend is a OpenTelemetry collector instance. \"datadog\" support has been removed, and the configuration option will be removed in a future release.", "type": "string", - "enum": ["datadog", "opentracing"], + "enum": ["opentracing", "opentelemetry", "datadog"], "default": "opentracing" }, "sampling": { - "description": "Determines the requests for which distributed traces are recorded. \"none\" (default) turns off tracing entirely. \"selective\" sends traces whenever `?trace=1` is present in the URL. \"all\" sends traces on every request. Note that this only affects the behavior of the distributed tracing client. The Jaeger instance must be running for traces to be collected (as described in the Sourcegraph installation instructions). Additional downsampling can be configured in Jaeger, itself (https://www.jaegertracing.io/docs/1.17/sampling)", + "description": "Determines the requests for which distributed traces are recorded. \"none\" (default) turns off tracing entirely. \"selective\" sends traces whenever `?trace=1` is present in the URL. \"all\" sends traces on every request. Note that this only affects the behavior of the distributed tracing client. An appropriate tracing backend must be running for traces to be collected (for \"opentracing\", a Jaeger instance must be running as described in the Sourcegraph installation instructions). Additional downsampling can be configured in tracing backend (for Jaeger, see https://www.jaegertracing.io/docs/1.17/sampling).", "type": "string", "enum": ["selective", "all", "none"], "default": "selective" }, "debug": { - "description": "Turns on debug logging of opentracing client requests. This can be useful for debugging connectivity issues between the tracing client and the Jaeger agent, the performance overhead of tracing, and other issues related to the use of distributed tracing.", + "description": "Turns on debug logging of tracing client requests. This can be useful for debugging connectivity issues between the tracing client and tracing backend, the performance overhead of tracing, and other issues related to the use of distributed tracing. May have performance implications in production.", "type": "boolean", "default": false } diff --git a/sg.config.yaml b/sg.config.yaml index 87752a39df8..2138f6311e5 100644 --- a/sg.config.yaml +++ b/sg.config.yaml @@ -652,6 +652,25 @@ commands: LOKI_VERSION: '2.3.0' LOKI_LOG_FILE: $HOME/.sourcegraph-dev/logs/loki/loki.log + otel-collector: + cmd: | + JAEGER_HOST='host.docker.internal' + if [[ "$OSTYPE" == "linux-gnu" ]]; then + # Jaeger generally runs outside of Docker, so to access it we need to be + # able to access ports on the host, because the Docker host only exists on + # MacOS. --net=host is a very dirty way of enabling this. + DOCKER_NET="--net=host" + JAEGER_HOST="localhost" + fi + + docker run --rm --name=otel-collector $DOCKER_NET \ + -p 4317:4317 -p 55679:55679 \ + -e JAEGER_HOST=$JAEGER_HOST \ + -v "$(pwd)"/dev/otel-collector/config.yaml:/etc/otel-collector/config.yaml \ + otel/opentelemetry-collector:${OTEL_COLLECTOR_VERISON} --config /etc/otel-collector/config.yaml + env: + OTEL_COLLECTOR_VERISON: '0.54.0' + storybook: cmd: yarn storybook install: yarn @@ -897,6 +916,14 @@ commandsets: - web-standalone-http-prod - caddy + # For testing out OpenTelemetry stack + otel: + checks: + - docker + commands: + - otel-collector + - jaeger + tests: # These can be run with `sg test [name]` backend: