fix(cody): use client-provided timeout for completions requests (#63875)

Closes
[CODY-2775](https://linear.app/sourcegraph/issue/CODY-2775/%5Bautocomplete-latency%5D-apply-the-same-timeout-on-the-cody-gateway-side)



Enables client control over the request processing timeout on the server
(both Sourcegraph backend and Cody Gateway). The context timeout is set
to the value provided in the `X-Timeout-Ms` header of the client
request. If the header is not provided, the default context timeout is
used (1 minute on both Sourcegraph backend and Cody Gateway).

Previously, we only had a default timeout on the Sourcegraph backend
side (8 minutes).

Corresponding client change:
- https://github.com/sourcegraph/cody/pull/4921

<!-- 💡 To write a useful PR description, make sure that your description
covers:
- WHAT this PR is changing:
    - How was it PREVIOUSLY.
    - How it will be from NOW on.
- WHY this PR is needed.
- CONTEXT, i.e. to which initiative, project or RFC it belongs.

The structure of the description doesn't matter as much as covering
these points, so use
your best judgement based on your context.
Learn how to write good pull request description:
https://www.notion.so/sourcegraph/Write-a-good-pull-request-description-610a7fd3e613496eb76f450db5a49b6e?pvs=4
-->


## Test plan
- Manually tested and confirmed that if the request contains the
`X-Timeout-Ms` header, its value is used. If not, the default maximum
request duration is applied.
- CI
- 
<!-- All pull requests REQUIRE a test plan:
https://docs-legacy.sourcegraph.com/dev/background-information/testing_principles
-->


## Changelog
- Use the provided timeout from request parameters if available;
otherwise use the default maximum request duration (8 minutes)

<!--
1. Ensure your pull request title is formatted as: $type($domain): $what
2. Add bullet list items for each additional detail you want to cover
(see example below)
3. You can edit this after the pull request was merged, as long as
release shipping it hasn't been promoted to the public.
4. For more information, please see this how-to
https://www.notion.so/sourcegraph/Writing-a-changelog-entry-dd997f411d524caabf0d8d38a24a878c?

Audience: TS/CSE > Customers > Teammates (in that order).

Cheat sheet: $type = chore|fix|feat $domain:
source|search|ci|release|plg|cody|local|...
-->

<!--
Example:

Title: fix(search): parse quotes with the appropriate context
Changelog section:

## Changelog

- When a quote is used with regexp pattern type, then ...
- Refactored underlying code.
-->
This commit is contained in:
Taras Yemets 2024-07-19 17:17:02 +03:00 committed by GitHub
parent 619fc57074
commit 26df35a69f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 32 additions and 3 deletions

View File

@ -116,6 +116,10 @@ type UpstreamRequest interface {
BuildPrompt() string
}
// maxRequestDuration is the maximum amount of time a request can take before
// being cancelled as DeadlineExceeded.
const maxRequestDuration = 1 * time.Minute
type UpstreamHandlerConfig struct {
// defaultRetryAfterSeconds sets the retry-after policy on upstream rate
// limit events in case a retry-after is not provided by the upstream
@ -161,7 +165,20 @@ func makeUpstreamHandler[ReqT UpstreamRequest](
// in order to call the upstream API. e.g. calling the upstreamHandlerMethods in
// the correct order, enforcing rate limits and anti-abuse mechanisms, etc.
upstreamHandler := func(w http.ResponseWriter, downstreamRequest *http.Request) {
ctx := downstreamRequest.Context()
// Set the context timeout: use the timeout from the request header if provided,
// otherwise use the default maximum request duration.
ctxTimeout := maxRequestDuration
if v := downstreamRequest.Header.Get("X-Timeout-Ms"); v != "" {
if t, err := strconv.Atoi(v); err != nil {
baseLogger.Warn("error parsing X-Timeout-Ms header", log.Error(err))
} else {
ctxTimeout = time.Duration(t) * time.Millisecond
}
}
ctx, cancel := context.WithTimeout(downstreamRequest.Context(), ctxTimeout)
defer cancel()
act := actor.FromContext(ctx)
// TODO: Investigate using actor propagation handler for extracting
@ -395,7 +412,9 @@ func makeUpstreamHandler[ReqT UpstreamRequest](
logger.Error("failed to log event", log.Error(err))
}
}()
resp, err := httpClient.Do(upstreamRequest)
if err != nil {
// Ignore reporting errors where client disconnected
if upstreamRequest.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) {

View File

@ -40,7 +40,7 @@ import (
// maxRequestDuration is the maximum amount of time a request can take before
// being cancelled as DeadlineExceeded.
const maxRequestDuration = 8 * time.Minute
const maxRequestDuration = 1 * time.Minute
var timeToFirstEventMetrics = metrics.NewREDMetrics(
prometheus.DefaultRegisterer,
@ -67,7 +67,17 @@ func newCompletionsHandler(
return
}
ctx, cancel := context.WithTimeout(r.Context(), maxRequestDuration)
// Set the context timeout: use the timeout from the request header if provided,
// otherwise use the default maximum request duration.
ctxTimeout := maxRequestDuration
if v := r.Header.Get("X-Timeout-Ms"); v != "" {
if t, err := strconv.Atoi(v); err != nil {
logger.Warn("error parsing X-Timeout-Ms header", log.Error(err))
} else {
ctxTimeout = time.Duration(t) * time.Millisecond
}
}
ctx, cancel := context.WithTimeout(r.Context(), ctxTimeout)
defer cancel()
// First check that Cody is enabled for this Sourcegraph instance.