chore: Enable traces for uploads (#63025)

We've been getting more i/o timeout errors and similar lately.
Let's enable traces so that we have more information to go on
when investigating these kinds of errors. It might take some
rounds of iteration with adding more events etc to figure out
what exactly is going wrong.
This commit is contained in:
Varun Gandhi 2024-06-03 19:17:50 +08:00 committed by GitHub
parent bab01ccaac
commit fa5a08d4b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 26 additions and 13 deletions

View File

@ -18,6 +18,7 @@ go_library(
deps = [
"//internal/metrics",
"//internal/observation",
"//internal/trace/policy",
"//internal/uploadstore",
"//lib/errors",
"@com_github_aws_aws_sdk_go_v2_feature_s3_manager//:manager",

View File

@ -13,6 +13,7 @@ import (
sglog "github.com/sourcegraph/log"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/trace/policy"
"github.com/sourcegraph/sourcegraph/internal/uploadstore"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
@ -75,7 +76,10 @@ func (h *UploadHandler[T]) handleEnqueue(w http.ResponseWriter, r *http.Request)
// easily. The remainder of the function simply serializes the result to the
// HTTP response writer.
payload, statusCode, err := func() (_ any, statusCode int, err error) {
ctx, trace, endObservation := h.operations.handleEnqueue.With(r.Context(), &err, observation.Args{})
// Always enable tracing for uploads, since a typical Sourcegraph instance doesn't
// have that many uploads, and lack of traces by default makes debugging harder.
ctx := policy.WithShouldTrace(r.Context(), true)
ctx, trace, endObservation := h.operations.handleEnqueue.With(ctx, &err, observation.Args{})
defer func() {
endObservation(1, observation.Args{Attrs: []attribute.KeyValue{
attribute.Int("statusCode", statusCode),
@ -86,17 +90,7 @@ func (h *UploadHandler[T]) handleEnqueue(w http.ResponseWriter, r *http.Request)
if err != nil {
return nil, statusCode, err
}
trace.AddEvent(
"finished constructUploadState",
attribute.Int("uploadID", uploadState.uploadID),
attribute.Int("numParts", uploadState.numParts),
attribute.Int("numUploadedParts", len(uploadState.uploadedParts)),
attribute.Bool("multipart", uploadState.multipart),
attribute.Bool("suppliedIndex", uploadState.suppliedIndex),
attribute.Int("index", uploadState.index),
attribute.Bool("done", uploadState.done),
attribute.String("metadata", fmt.Sprintf("%#v", uploadState.metadata)),
)
trace.AddEvent("finished constructUploadState", uploadState.Attrs()...)
if uploadHandlerFunc := h.selectUploadHandlerFunc(uploadState); uploadHandlerFunc != nil {
return uploadHandlerFunc(ctx, uploadState, r.Body)

View File

@ -2,9 +2,12 @@ package uploadhandler
import (
"context"
"fmt"
"net/http"
"strconv"
"go.opentelemetry.io/otel/attribute"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
@ -14,13 +17,28 @@ type uploadState[T any] struct {
numParts int
uploadedParts []int
multipart bool
suppliedIndex bool
// suppliedIndex is true if the part index was supplied in the query parameters.
suppliedIndex bool
// index is 0-based part number for multi-part uploads
index int
done bool
uncompressedSize *int64
metadata T
}
func (uploadState *uploadState[T]) Attrs() []attribute.KeyValue {
return []attribute.KeyValue{
attribute.Int("uploadID", uploadState.uploadID),
attribute.Int("numParts", uploadState.numParts),
attribute.Int("numUploadedParts", len(uploadState.uploadedParts)),
attribute.Bool("multipart", uploadState.multipart),
attribute.Bool("suppliedIndex", uploadState.suppliedIndex),
attribute.Int("index", uploadState.index),
attribute.Bool("done", uploadState.done),
attribute.String("metadata", fmt.Sprintf("%#v", uploadState.metadata)),
}
}
// constructUploadState reads the query args of the given HTTP request and populates an upload state object.
// This function should be used instead of reading directly from the request as the upload state's fields are
// backfilled/denormalized from the database, depending on the type of request.