httpcli: add prometheus metric for monitoring the rate that Sourcegraph issues requests to external services

commit-id:0f3120ed
This commit is contained in:
ggilmore 2024-03-22 13:05:10 -07:00 committed by Geoffrey Gilmore
parent 1a0c31675d
commit d3786cb9fd
5 changed files with 357 additions and 55 deletions

View File

@ -23,6 +23,7 @@ All notable changes to Sourcegraph are documented in this file.
- Search Jobs now supports diff, commit and path searches. Before, only file searches were supported. [#60883](https://github.com/sourcegraph/sourcegraph/pull/60883)
- Auth providers now support a `noSignIn` option that, when set to true, will hide the auth provider from the sign in page, but still allow users to connect the external account from their Account Security page for permissions syncing. [#60722](https://github.com/sourcegraph/sourcegraph/pull/60722)
- Added a "Commits" button to the folders in repos that shows commits for the items in that folder. [#60909](https://github.com/sourcegraph/sourcegraph/pull/60909)
- The frontend Grafana dashboard has a new Prometheus metric that tracks the rate of requests that Sourcegraph issues to external services. [#61348](https://github.com/sourcegraph/sourcegraph/pull/61348)
### Changed

View File

@ -4914,6 +4914,56 @@ sum(rate(src_frontend_account_lockouts_total[1m]))
<br />
### Frontend: External HTTP Request Rate
#### frontend: external_http_request_rate_by_host
<p class="subtitle">Rate of external HTTP requests by host over 1m</p>
Shows the rate of external HTTP requests made by Sourcegraph to other services, broken down by host.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103000` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*</sub>
<details>
<summary>Technical details</summary>
Query:
```
sum by (host) (rate(src_http_client_external_request_count{host=~`${httpRequestHost:regex}`}[1m]))
```
</details>
<br />
#### frontend: external_http_request_rate_by_host_by_code
<p class="subtitle">Rate of external HTTP requests by host and response code over 1m</p>
Shows the rate of external HTTP requests made by Sourcegraph to other services, broken down by host and response code.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103010` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*</sub>
<details>
<summary>Technical details</summary>
Query:
```
sum by (host, status_code) (rate(src_http_client_external_request_count{host=~`${httpRequestHost:regex}`}[1m]))
```
</details>
<br />
### Frontend: Cody API requests
#### frontend: cody_api_rate
@ -4924,7 +4974,7 @@ Rate (QPS) of requests to cody related endpoints. completions.stream is for the
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103000` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103100` on your Sourcegraph instance.
<details>
@ -4947,7 +4997,7 @@ sum by (route, code)(irate(src_http_request_duration_seconds_count{route=~"^comp
Refer to the [alerts reference](./alerts.md#frontend-cloudkms-cryptographic-requests) for 2 alerts related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103100` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103200` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*</sub>
@ -4971,7 +5021,7 @@ sum(increase(src_cloudkms_cryptographic_total[1m]))
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103101` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103201` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*</sub>
@ -4995,7 +5045,7 @@ min by (kubernetes_name) (src_encryption_cache_hit_total/(src_encryption_cache_h
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103102` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103202` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*</sub>
@ -5019,7 +5069,7 @@ sum by (kubernetes_name) (irate(src_encryption_cache_eviction_total[5m]))
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103200` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103300` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5041,7 +5091,7 @@ sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name="frontend"})
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103201` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103301` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5063,7 +5113,7 @@ sum by (app_name, db_name) (src_pgsql_conns_open{app_name="frontend"})
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103210` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103310` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5085,7 +5135,7 @@ sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name="frontend"})
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103211` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103311` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5107,7 +5157,7 @@ sum by (app_name, db_name) (src_pgsql_conns_idle{app_name="frontend"})
Refer to the [alerts reference](./alerts.md#frontend-mean-blocked-seconds-per-conn-request) for 2 alerts related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103220` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103320` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5129,7 +5179,7 @@ sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name="f
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103230` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103330` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5151,7 +5201,7 @@ sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name="f
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103231` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103331` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5173,7 +5223,7 @@ sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_nam
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103232` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103332` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5207,7 +5257,7 @@ value change independent of deployment events (such as an upgrade), it could ind
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103300` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103400` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5229,7 +5279,7 @@ count by(name) ((time() - container_last_seen{name=~"^(frontend|sourcegraph-fron
Refer to the [alerts reference](./alerts.md#frontend-container-cpu-usage) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103301` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103401` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5251,7 +5301,7 @@ cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-fron
Refer to the [alerts reference](./alerts.md#frontend-container-memory-usage) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103302` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103402` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5276,7 +5326,7 @@ When extremely high, this can indicate a resource usage problem, or can cause pr
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103303` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103403` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5300,7 +5350,7 @@ sum by(name) (rate(container_fs_reads_total{name=~"^(frontend|sourcegraph-fronte
Refer to the [alerts reference](./alerts.md#frontend-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103400` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103500` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5322,7 +5372,7 @@ quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^(f
Refer to the [alerts reference](./alerts.md#frontend-provisioning-container-memory-usage-long-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103401` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103501` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5344,7 +5394,7 @@ max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(frontend
Refer to the [alerts reference](./alerts.md#frontend-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103410` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103510` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5366,7 +5416,7 @@ max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|so
Refer to the [alerts reference](./alerts.md#frontend-provisioning-container-memory-usage-short-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103411` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103511` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5391,7 +5441,7 @@ When it occurs frequently, it is an indicator of underprovisioning.
Refer to the [alerts reference](./alerts.md#frontend-container-oomkill-events-total) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103412` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103512` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5417,7 +5467,7 @@ A high value here indicates a possible goroutine leak.
Refer to the [alerts reference](./alerts.md#frontend-go-goroutines) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103500` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103600` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5439,7 +5489,7 @@ max by(instance) (go_goroutines{job=~".*(frontend|sourcegraph-frontend)"})
Refer to the [alerts reference](./alerts.md#frontend-go-gc-duration-seconds) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103501` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103601` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5463,7 +5513,7 @@ max by(instance) (go_gc_duration_seconds{job=~".*(frontend|sourcegraph-frontend)
Refer to the [alerts reference](./alerts.md#frontend-pods-available-percentage) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103600` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103700` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5489,7 +5539,7 @@ The total number of search clicks across all search types over a 6 hour window.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103700` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103800` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Search Platform team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*</sub>
@ -5513,7 +5563,7 @@ The percent of clicks that were on the top search result, excluding searches wit
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103701` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103801` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Search Platform team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*</sub>
@ -5537,7 +5587,7 @@ The percent of clicks that were on the first 3 search results, excluding searche
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103702` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103802` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Search Platform team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*</sub>
@ -5561,7 +5611,7 @@ The distribution of clicked search results by result type. At every point in tim
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103710` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103810` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Search Platform team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*</sub>
@ -5585,7 +5635,7 @@ The percent of Zoekt searches that hit the flush time limit. These searches don`
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103711` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103811` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Search Platform team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*</sub>
@ -5609,7 +5659,7 @@ sum(increase(zoekt_final_aggregate_size_count{reason="timer_expired"}[1d])) / su
Refer to the [alerts reference](./alerts.md#frontend-email-delivery-failures) for 2 alerts related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103800` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103900` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5633,7 +5683,7 @@ Total emails successfully delivered.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103810` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103910` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5657,7 +5707,7 @@ Emails successfully delivered by source, i.e. product feature.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103811` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103911` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
@ -5683,7 +5733,7 @@ Mean search duration for all successful sentinel queries
Refer to the [alerts reference](./alerts.md#frontend-mean-successful-sentinel-duration-over-2h) for 2 alerts related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103900` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104000` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5707,7 +5757,7 @@ Mean time to first result for all successful streaming sentinel queries
Refer to the [alerts reference](./alerts.md#frontend-mean-sentinel-stream-latency-over-2h) for 2 alerts related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103901` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104001` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5731,7 +5781,7 @@ sum(rate(src_search_streaming_latency_seconds_sum{source=~"searchblitz.*"}[2h]))
Refer to the [alerts reference](./alerts.md#frontend-90th-percentile-successful-sentinel-duration-over-2h) for 2 alerts related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103910` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104010` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5755,7 +5805,7 @@ histogram_quantile(0.90, sum by (le)(label_replace(rate(src_search_response_late
Refer to the [alerts reference](./alerts.md#frontend-90th-percentile-sentinel-stream-latency-over-2h) for 2 alerts related to this panel.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103911` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104011` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5779,7 +5829,7 @@ Mean search duration for successful sentinel queries, broken down by query. Usef
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103920` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104020` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5803,7 +5853,7 @@ Mean time to first result for successful streaming sentinel queries, broken down
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103921` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104021` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5827,7 +5877,7 @@ sum(rate(src_search_streaming_latency_seconds_sum{source=~"searchblitz.*"}[$sent
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103930` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104030` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5851,7 +5901,7 @@ histogram_quantile(0.90, sum(rate(src_search_response_latency_seconds_bucket{sou
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103931` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104031` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5875,7 +5925,7 @@ histogram_quantile(0.90, sum(rate(src_search_streaming_latency_seconds_bucket{so
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103940` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104040` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5899,7 +5949,7 @@ histogram_quantile(0.90, sum(rate(src_search_response_latency_seconds_bucket{sou
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103950` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104050` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5923,7 +5973,7 @@ histogram_quantile(0.75, sum(rate(src_search_response_latency_seconds_bucket{sou
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103951` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104051` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5947,7 +5997,7 @@ histogram_quantile(0.75, sum(rate(src_search_streaming_latency_seconds_bucket{so
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103960` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104060` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -5971,7 +6021,7 @@ The rate of unsuccessful sentinel queries, broken down by failure type.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=103970` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104070` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6001,7 +6051,7 @@ p95 response time to incoming webhook requests from code hosts.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104000` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104100` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*</sub>
@ -6025,7 +6075,7 @@ histogram_quantile(0.95, sum (rate(src_http_request_duration_seconds_bucket{rou
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104100` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104200` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6047,7 +6097,7 @@ sum(increase(src_insights_aggregations_total{job=~"^(frontend|sourcegraph-fronte
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104101` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104201` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6069,7 +6119,7 @@ sum by (le)(rate(src_insights_aggregations_duration_seconds_bucket{job=~"^(fron
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104102` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104202` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6091,7 +6141,7 @@ sum(increase(src_insights_aggregations_errors_total{job=~"^(frontend|sourcegraph
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104103` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104203` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6113,7 +6163,7 @@ sum(increase(src_insights_aggregations_errors_total{job=~"^(frontend|sourcegraph
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104110` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104210` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6135,7 +6185,7 @@ sum by (op,extended_mode)(increase(src_insights_aggregations_total{job=~"^(front
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104111` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104211` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6157,7 +6207,7 @@ histogram_quantile(0.99, sum by (le,op,extended_mode)(rate(src_insights_aggrega
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104112` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104212` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>
@ -6179,7 +6229,7 @@ sum by (op,extended_mode)(increase(src_insights_aggregations_errors_total{job=~"
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104113` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=104213` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code Search team](https://handbook.sourcegraph.com/departments/engineering/teams/code-search).*</sub>

View File

@ -136,6 +136,7 @@ func newExternalClientFactory(cache bool, testOpt bool, middleware ...Middleware
ContextErrorMiddleware,
HeadersMiddleware("User-Agent", "Sourcegraph-Bot"),
redisLoggerMiddleware(),
externalRequestCountMetricsMiddleware,
}
mw = append(mw, middleware...)
@ -564,6 +565,43 @@ var metricRetry = promauto.NewCounter(prometheus.CounterOpts{
Help: "Total number of times we retry HTTP requests.",
})
var metricExternalRequestCount = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "src_http_client_external_request_count",
Help: "Count of external HTTP requests made by the Sourcegraph HTTP client.",
}, []string{"host", "method", "status_code"})
func externalRequestCountMetricsMiddleware(next Doer) Doer {
return doExternalRequestCountMetricsMiddleware(next, func(host, method string, statusCode int) {
code := strconv.Itoa(statusCode)
metricExternalRequestCount.WithLabelValues(host, method, code).Inc()
})
}
func doExternalRequestCountMetricsMiddleware(next Doer, observe func(host, method string, statusCode int)) Doer {
return DoerFunc(func(req *http.Request) (*http.Response, error) {
host := "<unknown>"
if req.Host != "" {
host = req.Host
} else if u := req.URL; u != nil && u.Host != "" {
host = u.Host
}
method := req.Method
var statusCode int
resp, err := next.Do(req)
if err != nil {
statusCode = -1 // -1 indicates unknown status code if an error occurred
} else {
statusCode = resp.StatusCode
}
observe(host, method, statusCode)
return resp, err
})
}
// A regular expression to match the error returned by net/http when the
// configured number of redirects is exhausted. This error isn't typed
// specifically so we resort to matching on the error string.

View File

@ -1074,3 +1074,168 @@ func TestRetryBasedOnStatusCode(t *testing.T) {
})
}
}
func TestDoExternalRequestCountMetricsMiddleware(t *testing.T) {
t.Run("Success", func(t *testing.T) {
observeFuncRan := false
observe := func(host, method string, statusCode int) {
observeFuncRan = true
if diff := cmp.Diff("example.com", host); diff != "" {
t.Errorf("unexpected host (-want +got):\n%s", diff)
}
if diff := cmp.Diff("POST", method); diff != "" {
t.Errorf("unexpected method (-want +got):\n%s", diff)
}
if diff := cmp.Diff(http.StatusOK, statusCode); diff != "" {
t.Errorf("unexpected status code (-want +got):\n%s", diff)
}
}
next := newFakeClient(http.StatusOK, nil, nil)
cli := doExternalRequestCountMetricsMiddleware(next, observe)
req, _ := http.NewRequest("POST", "http://example.com", nil)
_, err := cli.Do(req)
if err != nil {
t.Fatal(err)
}
if !observeFuncRan {
t.Error("observe() was not called")
}
})
t.Run("returns -1 status code if next Doer fails", func(t *testing.T) {
observeFuncRan := false
observe := func(host, method string, statusCode int) {
observeFuncRan = true
if diff := cmp.Diff("example.com", host); diff != "" {
t.Errorf("unexpected host (-want +got):\n%s", diff)
}
if diff := cmp.Diff("POST", method); diff != "" {
t.Errorf("unexpected method (-want +got):\n%s", diff)
}
if diff := cmp.Diff(-1, statusCode); diff != "" {
t.Errorf("unexpected status code (-want +got):\n%s", diff)
}
}
expectedError := errors.New("fake error")
next := newFakeClient(http.StatusOK, nil, expectedError)
cli := doExternalRequestCountMetricsMiddleware(next, observe)
req, _ := http.NewRequest("POST", "http://example.com", nil)
_, err := cli.Do(req)
if !errors.Is(err, expectedError) {
t.Errorf("unexpected error: %s", err)
}
if !observeFuncRan {
t.Error("observe() was not called")
}
})
t.Run("prefers request.Host over request.URL.Host", func(t *testing.T) {
observeFuncRan := false
wrongHost, rightHost := "example.org", "example.com"
observe := func(host, method string, statusCode int) {
observeFuncRan = true
if diff := cmp.Diff(rightHost, host); diff != "" {
t.Errorf("unexpected host (-want +got):\n%s", diff)
}
if diff := cmp.Diff("POST", method); diff != "" {
t.Errorf("unexpected method (-want +got):\n%s", diff)
}
if diff := cmp.Diff(http.StatusOK, statusCode); diff != "" {
t.Errorf("unexpected status code (-want +got):\n%s", diff)
}
}
next := newFakeClient(http.StatusOK, nil, nil)
cli := doExternalRequestCountMetricsMiddleware(next, observe)
req, _ := http.NewRequest("POST", fmt.Sprintf("http://%s", rightHost), nil)
req.URL.Host = wrongHost
_, err := cli.Do(req)
if err != nil {
t.Fatal(err)
}
if !observeFuncRan {
t.Error("observe() was not called")
}
})
t.Run("falls back to request.URL.Host if request.Host is empty", func(t *testing.T) {
observeFuncRan := false
rightHost := "example.com"
observe := func(host, method string, statusCode int) {
observeFuncRan = true
if diff := cmp.Diff(rightHost, host); diff != "" {
t.Errorf("unexpected host (-want +got):\n%s", diff)
}
if diff := cmp.Diff("POST", method); diff != "" {
t.Errorf("unexpected method (-want +got):\n%s", diff)
}
if diff := cmp.Diff(http.StatusOK, statusCode); diff != "" {
t.Errorf("unexpected status code (-want +got):\n%s", diff)
}
}
next := newFakeClient(http.StatusOK, nil, nil)
cli := doExternalRequestCountMetricsMiddleware(next, observe)
req, _ := http.NewRequest("POST", fmt.Sprintf("http://%s", rightHost), nil)
req.Host = ""
_, err := cli.Do(req)
if err != nil {
t.Fatal(err)
}
if !observeFuncRan {
t.Error("observe() was not called")
}
})
t.Run("host is unknown if both request.Host and request.URL.Host are empty", func(t *testing.T) {
observeFuncRan := false
observe := func(host, method string, statusCode int) {
observeFuncRan = true
if diff := cmp.Diff("<unknown>", host); diff != "" {
t.Errorf("unexpected host (-want +got):\n%s", diff)
}
if diff := cmp.Diff("POST", method); diff != "" {
t.Errorf("unexpected method (-want +got):\n%s", diff)
}
if diff := cmp.Diff(http.StatusOK, statusCode); diff != "" {
t.Errorf("unexpected status code (-want +got):\n%s", diff)
}
}
next := newFakeClient(http.StatusOK, nil, nil)
cli := doExternalRequestCountMetricsMiddleware(next, observe)
req, _ := http.NewRequest("POST", "http://", nil)
req.Host = ""
_, err := cli.Do(req)
if err != nil {
t.Fatal(err)
}
if !observeFuncRan {
t.Error("observe() was not called")
}
})
}

View File

@ -2,6 +2,7 @@ package definitions
import (
"fmt"
"strings"
"time"
"github.com/sourcegraph/sourcegraph/monitoring/definitions/shared"
@ -18,7 +19,8 @@ func Frontend() *monitoring.Dashboard {
grpcZoektConfigurationServiceName = "sourcegraph.zoekt.configuration.v1.ZoektConfigurationService"
grpcInternalAPIServiceName = "api.internalapi.v1.ConfigService"
scrapeJobRegex = `(sourcegraph-)?frontend`
scrapeJobRegex = `(sourcegraph-)?frontend`
httpRequestHostVariableName = "httpRequestHost"
)
var sentinelSamplingIntervals []string
@ -62,6 +64,16 @@ func Frontend() *monitoring.Dashboard {
},
Multi: true,
},
{
Label: "HTTP request hostname",
Name: httpRequestHostVariableName,
OptionsLabelValues: monitoring.ContainerVariableOptionsLabelValues{
Query: "src_http_client_external_request_count",
LabelName: "host",
ExampleOption: "pings.sourcegraph.com",
},
Multi: true,
},
grpcMethodVariableFrontendZoektConfiguration,
grpcMethodVariableFrontendInternalAPI,
},
@ -632,6 +644,42 @@ func Frontend() *monitoring.Dashboard {
},
},
},
{
Title: "External HTTP Request Rate",
Hidden: true,
Rows: []monitoring.Row{
{
{
Name: "external_http_request_rate_by_host",
Description: "rate of external HTTP requests by host over 1m",
Query: fmt.Sprintf("sum by (host) (rate(src_http_client_external_request_count{host=~`%s`}[1m]))", fmt.Sprintf("${%s:regex}", httpRequestHostVariableName)),
NoAlert: true,
Panel: monitoring.Panel().Unit(monitoring.RequestsPerSecond).LegendFormat("{{host}}").
With(monitoring.PanelOptions.ZeroIfNoData()).
With(monitoring.PanelOptions.LegendOnRight()),
Owner: monitoring.ObservableOwnerSource,
Interpretation: strings.TrimSpace(`
Shows the rate of external HTTP requests made by Sourcegraph to other services, broken down by host.
`),
},
},
{
{
Name: "external_http_request_rate_by_host_by_code",
Description: "rate of external HTTP requests by host and response code over 1m",
Query: fmt.Sprintf("sum by (host, status_code) (rate(src_http_client_external_request_count{host=~`%s`}[1m]))", fmt.Sprintf("${%s:regex}", httpRequestHostVariableName)),
NoAlert: true,
Panel: monitoring.Panel().Unit(monitoring.RequestsPerSecond).LegendFormat("{{host}}:{{status_code}}").
With(monitoring.PanelOptions.ZeroIfNoData()).
With(monitoring.PanelOptions.LegendOnRight()),
Owner: monitoring.ObservableOwnerSource,
Interpretation: strings.TrimSpace(`
Shows the rate of external HTTP requests made by Sourcegraph to other services, broken down by host and response code.
`),
},
},
},
},
{
Title: "Cody API requests",
Hidden: true,