From 711ee1a4956fe11949cb0433b0b612c2b174eef5 Mon Sep 17 00:00:00 2001 From: Erik Seliger Date: Thu, 14 Sep 2023 12:43:40 -0500 Subject: [PATCH] Remove GitHub proxy service (#56485) This service is being replaced by a redsync.Mutex that lives directly in the GitHub client. By this change we will: - Simplify deployments by removing one service - Centralize GitHub access control in the client instead of splitting it across services - Remove the dependency on a non-HA service to talk to GitHub.com successfully Other repos referencing this service will be updated once this has shipped to dotcom and proven to work over the course of a couple days. --- .vscode/launch.json | 12 - CHANGELOG.md | 1 + .../httpapi/releasecache/main_test.go | 3 +- .../internal/authz/integration_test.go | 2 + cmd/server/BUILD.bazel | 1 - cmd/server/image_tests/test_services.sh | 1 - cmd/server/shared/shared.go | 2 - cmd/sourcegraph/BUILD.bazel | 1 - cmd/sourcegraph/main.go | 2 - dev/prometheus/all/prometheus_targets.yml | 5 - dev/prometheus/linux/prometheus_targets.yml | 5 - doc/admin/deploy/docker-compose/operations.md | 3 - doc/admin/deploy/kubernetes/operations.md | 1 - doc/admin/deploy/scale.md | 35 -- doc/admin/observability/alerts.md | 303 ++---------------- doc/admin/observability/dashboards.md | 298 ++++------------- doc/admin/updates/kubernetes.md | 4 + .../architecture/architecture.dot | 11 +- .../architecture/architecture.svg | 27 -- doc/dev/background-information/sg/index.md | 1 - .../background-information/sg/reference.md | 1 - go.mod | 2 +- internal/batches/sources/github_test.go | 2 + internal/batches/sources/main_test.go | 2 +- internal/extsvc/azuredevops/client.go | 8 +- internal/extsvc/bitbucketcloud/client.go | 4 +- internal/extsvc/github/BUILD.bazel | 7 +- internal/extsvc/github/common.go | 51 +-- internal/extsvc/github/common_test.go | 9 - internal/extsvc/github/globallock.go | 185 +++++++++++ internal/extsvc/github/v3_test.go | 3 + internal/extsvc/gitlab/client.go | 4 +- internal/httpcli/client.go | 12 - internal/httptestutil/recorder.go | 7 +- internal/oauthutil/oauth2.go | 9 +- internal/oauthutil/oauth2_test.go | 5 +- internal/repos/github_test.go | 14 + internal/repos/sources_test_utils.go | 3 +- lib/servicecatalog/service-catalog.yaml | 1 - monitoring/BUILD.bazel | 4 +- monitoring/definitions/BUILD.bazel | 2 +- monitoring/definitions/containers.go | 2 +- monitoring/definitions/dashboards.go | 2 +- monitoring/definitions/github.go | 80 +++++ monitoring/definitions/github_proxy.go | 43 --- sg.config.yaml | 31 -- 46 files changed, 433 insertions(+), 778 deletions(-) create mode 100644 internal/extsvc/github/globallock.go create mode 100644 monitoring/definitions/github.go delete mode 100644 monitoring/definitions/github_proxy.go diff --git a/.vscode/launch.json b/.vscode/launch.json index 168dfff12e6..442ac684005 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -192,18 +192,6 @@ "env": {}, "args": [] }, - { - "name": "Attach to github-proxy", - "type": "go", - "request": "launch", - "mode": "remote", - "remotePath": "${workspaceRoot}", - "port": 2351, - "host": "127.0.0.1", - "program": "${workspaceRoot}", - "env": {}, - "args": [] - }, { "name": "Attach to frontend", "type": "go", diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f587c91c7b..0c4e79b585e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ All notable changes to Sourcegraph are documented in this file. - OpenTelemetry Collector has been upgraded to v0.81, and OpenTelemetry packages have been upgraded to v1.16. [#54969](https://github.com/sourcegraph/sourcegraph/pull/54969), [#54999](https://github.com/sourcegraph/sourcegraph/pull/54999) - Bitbucket Cloud code host connections no longer automatically syncs the repository of the username used. The appropriate workspace name will have to be added to the `teams` list if repositories for that account need to be synced. [#55095](https://github.com/sourcegraph/sourcegraph/pull/55095) - Newly created access tokens are now hidden by default in the Sourcegraph UI. To view a token, click "show" button next to the token. [#56481](https://github.com/sourcegraph/sourcegraph/pull/56481) +- The GitHub proxy service has been removed and is no longer required. You can safely remove it from your deployment. [#55290](https://github.com/sourcegraph/sourcegraph/issues/55290) ### Fixed diff --git a/cmd/frontend/internal/httpapi/releasecache/main_test.go b/cmd/frontend/internal/httpapi/releasecache/main_test.go index 45f53c8c808..61f46452ec6 100644 --- a/cmd/frontend/internal/httpapi/releasecache/main_test.go +++ b/cmd/frontend/internal/httpapi/releasecache/main_test.go @@ -32,8 +32,7 @@ func TestMain(m *testing.M) { func newClientFactory(t testing.TB, name string) (*httpcli.Factory, func(testing.TB)) { cassetteName := filepath.Join("testdata", strings.ReplaceAll(name, " ", "-")) rec := newRecorder(t, cassetteName, update(name)) - mw := httpcli.NewMiddleware(httpcli.GitHubProxyRedirectMiddleware) - return httpcli.NewFactory(mw, httptestutil.NewRecorderOpt(rec)), + return httpcli.NewFactory(httpcli.NewMiddleware(), httptestutil.NewRecorderOpt(rec)), func(t testing.TB) { save(t, rec) } } diff --git a/cmd/repo-updater/internal/authz/integration_test.go b/cmd/repo-updater/internal/authz/integration_test.go index 7f662508f53..4620367179c 100644 --- a/cmd/repo-updater/internal/authz/integration_test.go +++ b/cmd/repo-updater/internal/authz/integration_test.go @@ -25,6 +25,7 @@ import ( "github.com/sourcegraph/sourcegraph/internal/database/dbtest" "github.com/sourcegraph/sourcegraph/internal/extsvc" "github.com/sourcegraph/sourcegraph/internal/extsvc/auth" + "github.com/sourcegraph/sourcegraph/internal/extsvc/github" extsvcGitHub "github.com/sourcegraph/sourcegraph/internal/extsvc/github" "github.com/sourcegraph/sourcegraph/internal/httptestutil" "github.com/sourcegraph/sourcegraph/internal/ratelimit" @@ -53,6 +54,7 @@ func TestIntegration_GitHubPermissions(t *testing.T) { t.Skip() } + github.SetupForTest(t) ratelimit.SetupForTest(t) logger := logtest.Scoped(t) diff --git a/cmd/server/BUILD.bazel b/cmd/server/BUILD.bazel index 70c95170900..ecd5d904fbe 100644 --- a/cmd/server/BUILD.bazel +++ b/cmd/server/BUILD.bazel @@ -90,7 +90,6 @@ pkg_tar( ) DEPS = [ - "//cmd/github-proxy", "//cmd/precise-code-intel-worker", "//cmd/searcher", "//enterprise/cmd/embeddings", diff --git a/cmd/server/image_tests/test_services.sh b/cmd/server/image_tests/test_services.sh index 4c155ade35d..d3a71640f43 100755 --- a/cmd/server/image_tests/test_services.sh +++ b/cmd/server/image_tests/test_services.sh @@ -5,7 +5,6 @@ export SANITY_CHECK=true services=( embeddings frontend - github-proxy gitserver migrator precise-code-intel-worker diff --git a/cmd/server/shared/shared.go b/cmd/server/shared/shared.go index ceaefbae4c0..0555cd0dc6d 100644 --- a/cmd/server/shared/shared.go +++ b/cmd/server/shared/shared.go @@ -45,7 +45,6 @@ var DefaultEnv = map[string]string{ "SRC_HTTP_ADDR": ":8080", "SRC_HTTPS_ADDR": ":8443", "SRC_FRONTEND_INTERNAL": FrontendInternalHost, - "GITHUB_BASE_URL": "http://127.0.0.1:3180", // points to github-proxy "GRAFANA_SERVER_URL": "http://127.0.0.1:3370", "PROMETHEUS_URL": "http://127.0.0.1:9090", @@ -162,7 +161,6 @@ func Main() { gitserverLine, `symbols: symbols`, `searcher: searcher`, - `github-proxy: github-proxy`, `worker: worker`, `repo-updater: repo-updater`, `precise-code-intel-worker: precise-code-intel-worker`, diff --git a/cmd/sourcegraph/BUILD.bazel b/cmd/sourcegraph/BUILD.bazel index 19b97055568..7c9b4951f7c 100644 --- a/cmd/sourcegraph/BUILD.bazel +++ b/cmd/sourcegraph/BUILD.bazel @@ -9,7 +9,6 @@ go_library( "//cmd/blobstore/shared", "//cmd/executor/singlebinary", "//cmd/frontend/shared", - "//cmd/github-proxy/shared", "//cmd/gitserver/shared", "//cmd/precise-code-intel-worker/shared", "//cmd/repo-updater/shared", diff --git a/cmd/sourcegraph/main.go b/cmd/sourcegraph/main.go index 838bdb10b6c..6a42200931c 100644 --- a/cmd/sourcegraph/main.go +++ b/cmd/sourcegraph/main.go @@ -12,7 +12,6 @@ import ( blobstore_shared "github.com/sourcegraph/sourcegraph/cmd/blobstore/shared" executor_singlebinary "github.com/sourcegraph/sourcegraph/cmd/executor/singlebinary" frontend_shared "github.com/sourcegraph/sourcegraph/cmd/frontend/shared" - githubproxy_shared "github.com/sourcegraph/sourcegraph/cmd/github-proxy/shared" gitserver_shared "github.com/sourcegraph/sourcegraph/cmd/gitserver/shared" precise_code_intel_worker_shared "github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker/shared" repoupdater_shared "github.com/sourcegraph/sourcegraph/cmd/repo-updater/shared" @@ -34,7 +33,6 @@ var services = []service.Service{ blobstore_shared.Service, symbols_shared.Service, worker_shared.Service, - githubproxy_shared.Service, precise_code_intel_worker_shared.Service, executor_singlebinary.Service, servegit.Service, diff --git a/dev/prometheus/all/prometheus_targets.yml b/dev/prometheus/all/prometheus_targets.yml index 6e06ae64f71..eba41d0a13d 100644 --- a/dev/prometheus/all/prometheus_targets.yml +++ b/dev/prometheus/all/prometheus_targets.yml @@ -58,11 +58,6 @@ targets: # postgres exporter - host.docker.internal:9187 -- labels: - job: github-proxy - targets: - # github proxy - - host.docker.internal:6090 - labels: job: otel-collector targets: diff --git a/dev/prometheus/linux/prometheus_targets.yml b/dev/prometheus/linux/prometheus_targets.yml index fcea3e2dc5c..3594197ca0c 100644 --- a/dev/prometheus/linux/prometheus_targets.yml +++ b/dev/prometheus/linux/prometheus_targets.yml @@ -58,11 +58,6 @@ targets: # postgres exporter - 127.0.0.1:9187 -- labels: - job: github-proxy - targets: - # github proxy - - 127.0.0.1:6090 - labels: job: otel-collector targets: diff --git a/doc/admin/deploy/docker-compose/operations.md b/doc/admin/deploy/docker-compose/operations.md index fdd90e8ce9b..c27e990740d 100644 --- a/doc/admin/deploy/docker-compose/operations.md +++ b/doc/admin/deploy/docker-compose/operations.md @@ -59,7 +59,6 @@ caddy caddy run --config /etc/ca ... Up cadvisor /usr/bin/cadvisor -logtost ... Up (health: starting) 8080/tcp codeinsights-db docker-entrypoint.sh postgres Up 5432/tcp codeintel-db /postgres.sh Up (healthy) 5432/tcp -github-proxy /sbin/tini -- /usr/local/b ... Up gitserver-0 /sbin/tini -- /usr/local/b ... Up grafana /entry.sh Up 3000/tcp, 0.0.0.0:3370->3370/tcp jaeger /go/bin/all-in-one-linux - ... Up 0.0.0.0:14250->14250/tcp, 14268/tcp, 0.0.0.0:16686->16686/tcp, 5775/udp, 0.0.0.0:5778->5778/tcp, @@ -137,7 +136,6 @@ caddy caddy run --config /etc/ca ... Up cadvisor /usr/bin/cadvisor -logtost ... Up (health: starting) 8080/tcp codeinsights-db docker-entrypoint.sh postgres Up 5432/tcp codeintel-db /postgres.sh Up (healthy) 5432/tcp -github-proxy /sbin/tini -- /usr/local/b ... Up gitserver-0 /sbin/tini -- /usr/local/b ... Up grafana /entry.sh Up 3000/tcp, 0.0.0.0:3370->3370/tcp jaeger /go/bin/all-in-one-linux - ... Up 0.0.0.0:14250->14250/tcp, 14268/tcp, 0.0.0.0:16686->16686/tcp, 5775/udp, 0.0.0.0:5778->5778/tcp, @@ -205,7 +203,6 @@ caddy caddy run --config /etc/ca ... Up cadvisor /usr/bin/cadvisor -logtost ... Up (health: starting) 8080/tcp codeinsights-db docker-entrypoint.sh postgres Up 5432/tcp codeintel-db /postgres.sh Up (healthy) 5432/tcp -github-proxy /sbin/tini -- /usr/local/b ... Up gitserver-0 /sbin/tini -- /usr/local/b ... Up grafana /entry.sh Up 3000/tcp, 0.0.0.0:3370->3370/tcp jaeger /go/bin/all-in-one-linux - ... Up 0.0.0.0:14250->14250/tcp, 14268/tcp, 0.0.0.0:16686->16686/tcp, 5775/udp, 0.0.0.0:5778->5778/tcp, diff --git a/doc/admin/deploy/kubernetes/operations.md b/doc/admin/deploy/kubernetes/operations.md index 3008e04c5bf..f9e7620c575 100644 --- a/doc/admin/deploy/kubernetes/operations.md +++ b/doc/admin/deploy/kubernetes/operations.md @@ -434,7 +434,6 @@ blobstore ClusterIP 10.72.3.144 9000/TC cadvisor ClusterIP 10.72.14.130 48080/TCP 23h codeinsights-db ClusterIP 10.72.6.240 5432/TCP,9187/TCP 25h codeintel-db ClusterIP 10.72.5.10 5432/TCP,9187/TCP 25h -github-proxy ClusterIP 10.72.10.117 80/TCP,6060/TCP 25h gitserver ClusterIP None 10811/TCP 25h grafana ClusterIP 10.72.6.245 30070/TCP 25h indexed-search ClusterIP None 6070/TCP 25h diff --git a/doc/admin/deploy/scale.md b/doc/admin/deploy/scale.md index 0ada73ee201..b583eaaee2b 100644 --- a/doc/admin/deploy/scale.md +++ b/doc/admin/deploy/scale.md @@ -46,7 +46,6 @@ Here is a list of components you can find in a typical Sourcegraph deployment: | | | | :-------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | [`frontend`](scale.md#frontend) | Serves the web application, extensions, and graphQL services. Almost every service has a link back to the frontend, from which it gathers configuration updates. | -| [`github-proxy`](scale.md#github-proxy) | Proxies all requests to github.com to keep track of rate limits and prevent triggering abuse mechanisms. | | [`gitserver`](scale.md#gitserver) | Mirrors repositories from their code host. All other Sourcegraph services talk to gitserver when they need data from git. | | [`precise-code-intel`](scale.md#precise-code-intel) | Converts LSIF upload file into Postgres data. The entire index must be read into memory to be correlated. | | [`repo-updater`](scale.md#repo-updater) | Tracks the state of repositories. It is responsible for automatically scheduling updates using gitserver and for synchronizing metadata between code hosts and external services. | @@ -232,40 +231,6 @@ Serves the Sourcegraph web application, extensions, and graphQL API services. --- -### github-proxy - -``` -Proxies all requests to github.com to keep track of rate limits. -It also prevents triggering abuse mechanisms. -``` - -| Replica | | -| :---------- | :------------------------------------------------------ | -| `Overview` | Singleton | -| `Factors` | - | -| `Guideline` | A Singleton service should not have more than 1 replica | - -| CPU | | -| :---------- | :----------------------------------------------------- | -| `Overview` | A thread is dispatched per request | -| `Factors` | Number of API requests to GitHub and GitHub Enterprise | -| `Guideline` | The default value should work for all deployments | - -| Memory | | -| :---------- | :---------------------------------------------------------- | -| `Overview` | Linear to the concurrent number of API requests proxied | -| `Factors` | Number of API requests to GitHub and GitHub Enterprise | -| `Guideline` | The default setup should be sufficient for most deployments | - -| Storage | | -| :---------- | :--- | -| `Overview` | - | -| `Factors` | - | -| `Guideline` | - | -| `Type` | None | - ---- - ### gitserver ``` diff --git a/doc/admin/observability/alerts.md b/doc/admin/observability/alerts.md index 2b44cfbcc04..cc132f46a93 100644 --- a/doc/admin/observability/alerts.md +++ b/doc/admin/observability/alerts.md @@ -1862,24 +1862,25 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*gitserver"})
-## github-proxy: github_proxy_waiting_requests +## github: src_githubcom_concurrency_lock_waiting_requests

number of requests waiting on the global mutex

**Descriptions** -- warning github-proxy: 100+ number of requests waiting on the global mutex for 5m0s +- warning github: 100+ number of requests waiting on the global mutex for 5m0s **Next steps** -- - **Check github-proxy logs for network connection issues. - - **Check github status. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-github-proxy-waiting-requests). +- - **Check container logs for network connection issues and log entries from the githubcom-concurrency-limiter logger. + - **Check redis-store health. + - **Check GitHub status. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-src-githubcom-concurrency-lock-waiting-requests). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json "observability.silenceAlerts": [ - "warning_github-proxy_github_proxy_waiting_requests" + "warning_github_src_githubcom_concurrency_lock_waiting_requests" ] ``` @@ -1888,323 +1889,71 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*gitserver"})
Technical details -Generated query for warning alert: `max((max(github_proxy_waiting_requests)) >= 100)` +Generated query for warning alert: `max((max(src_githubcom_concurrency_lock_waiting_requests)) >= 100)`

-## github-proxy: container_cpu_usage +## github: src_githubcom_concurrency_lock_failed_lock_requests -

container cpu usage total (1m average) across all cores by instance

+

number of lock failures

**Descriptions** -- warning github-proxy: 99%+ container cpu usage total (1m average) across all cores by instance +- warning github: 100+ number of lock failures for 5m0s **Next steps** -- **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. -- **Docker Compose:** Consider increasing `cpus:` of the github-proxy container in `docker-compose.yml`. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-container-cpu-usage). +- - **Check container logs for network connection issues and log entries from the githubcom-concurrency-limiter logger. + - **Check redis-store health. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-src-githubcom-concurrency-lock-failed-lock-requests). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json "observability.silenceAlerts": [ - "warning_github-proxy_container_cpu_usage" + "warning_github_src_githubcom_concurrency_lock_failed_lock_requests" ] ``` -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* +*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*
Technical details -Generated query for warning alert: `max((cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}) >= 99)` +Generated query for warning alert: `max((sum(rate(src_githubcom_concurrency_lock_failed_lock_requests[5m]))) >= 100)`

-## github-proxy: container_memory_usage +## github: src_githubcom_concurrency_lock_failed_unlock_requests -

container memory usage by instance

+

number of unlock failures

**Descriptions** -- warning github-proxy: 99%+ container memory usage by instance +- warning github: 100+ number of unlock failures for 5m0s **Next steps** -- **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. -- **Docker Compose:** Consider increasing `memory:` of github-proxy container in `docker-compose.yml`. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-container-memory-usage). +- - **Check container logs for network connection issues and log entries from the githubcom-concurrency-limiter logger. + - **Check redis-store health. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-src-githubcom-concurrency-lock-failed-unlock-requests). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json "observability.silenceAlerts": [ - "warning_github-proxy_container_memory_usage" + "warning_github_src_githubcom_concurrency_lock_failed_unlock_requests" ] ``` -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* +*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*
Technical details -Generated query for warning alert: `max((cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}) >= 99)` - -
- -
- -## github-proxy: provisioning_container_cpu_usage_long_term - -

container cpu usage total (90th percentile over 1d) across all cores by instance

- -**Descriptions** - -- warning github-proxy: 80%+ container cpu usage total (90th percentile over 1d) across all cores by instance for 336h0m0s - -**Next steps** - -- **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the github-proxy service. -- **Docker Compose:** Consider increasing `cpus:` of the github-proxy container in `docker-compose.yml`. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-cpu-usage-long-term). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_github-proxy_provisioning_container_cpu_usage_long_term" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for warning alert: `max((quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}[1d])) >= 80)` - -
- -
- -## github-proxy: provisioning_container_memory_usage_long_term - -

container memory usage (1d maximum) by instance

- -**Descriptions** - -- warning github-proxy: 80%+ container memory usage (1d maximum) by instance for 336h0m0s - -**Next steps** - -- **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the github-proxy service. -- **Docker Compose:** Consider increasing `memory:` of the github-proxy container in `docker-compose.yml`. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-memory-usage-long-term). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_github-proxy_provisioning_container_memory_usage_long_term" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for warning alert: `max((max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}[1d])) >= 80)` - -
- -
- -## github-proxy: provisioning_container_cpu_usage_short_term - -

container cpu usage total (5m maximum) across all cores by instance

- -**Descriptions** - -- warning github-proxy: 90%+ container cpu usage total (5m maximum) across all cores by instance for 30m0s - -**Next steps** - -- **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. -- **Docker Compose:** Consider increasing `cpus:` of the github-proxy container in `docker-compose.yml`. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-cpu-usage-short-term). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_github-proxy_provisioning_container_cpu_usage_short_term" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for warning alert: `max((max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}[5m])) >= 90)` - -
- -
- -## github-proxy: provisioning_container_memory_usage_short_term - -

container memory usage (5m maximum) by instance

- -**Descriptions** - -- warning github-proxy: 90%+ container memory usage (5m maximum) by instance - -**Next steps** - -- **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. -- **Docker Compose:** Consider increasing `memory:` of github-proxy container in `docker-compose.yml`. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-memory-usage-short-term). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_github-proxy_provisioning_container_memory_usage_short_term" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for warning alert: `max((max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}[5m])) >= 90)` - -
- -
- -## github-proxy: container_oomkill_events_total - -

container OOMKILL events total by instance

- -**Descriptions** - -- warning github-proxy: 1+ container OOMKILL events total by instance - -**Next steps** - -- **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. -- **Docker Compose:** Consider increasing `memory:` of github-proxy container in `docker-compose.yml`. -- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#github-proxy-container-oomkill-events-total). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_github-proxy_container_oomkill_events_total" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for warning alert: `max((max by (name) (container_oom_events_total{name=~"^github-proxy.*"})) >= 1)` - -
- -
- -## github-proxy: go_goroutines - -

maximum active goroutines

- -**Descriptions** - -- warning github-proxy: 10000+ maximum active goroutines for 10m0s - -**Next steps** - -- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#github-proxy-go-goroutines). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_github-proxy_go_goroutines" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for warning alert: `max((max by (instance) (go_goroutines{job=~".*github-proxy"})) >= 10000)` - -
- -
- -## github-proxy: go_gc_duration_seconds - -

maximum go garbage collection duration

- -**Descriptions** - -- warning github-proxy: 2s+ maximum go garbage collection duration - -**Next steps** - -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-go-gc-duration-seconds). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_github-proxy_go_gc_duration_seconds" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for warning alert: `max((max by (instance) (go_gc_duration_seconds{job=~".*github-proxy"})) >= 2)` - -
- -
- -## github-proxy: pods_available_percentage - -

percentage pods available

- -**Descriptions** - -- critical github-proxy: less than 90% percentage pods available for 10m0s - -**Next steps** - -- Determine if the pod was OOM killed using `kubectl describe pod github-proxy` (look for `OOMKilled: true`) and, if so, consider increasing the memory limit in the relevant `Deployment.yaml`. -- Check the logs before the container restarted to see if there are `panic:` messages or similar using `kubectl logs -p github-proxy`. -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-pods-available-percentage). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "critical_github-proxy_pods_available_percentage" -] -``` - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Generated query for critical alert: `min((sum by (app) (up{app=~".*github-proxy"}) / count by (app) (up{app=~".*github-proxy"}) * 100) <= 90)` +Generated query for warning alert: `max((sum(rate(src_githubcom_concurrency_lock_failed_unlock_requests[5m]))) >= 100)`
diff --git a/doc/admin/observability/dashboards.md b/doc/admin/observability/dashboards.md index d22b51d1f98..c381db788a0 100644 --- a/doc/admin/observability/dashboards.md +++ b/doc/admin/observability/dashboards.md @@ -7710,282 +7710,108 @@ Query: `sum by(app) (up{app=~".*gitserver"}) / count by (app) (up{app=~".*gitser
-## GitHub Proxy +## GitHub -

Proxies all requests to github.com, keeping track of and managing rate limits.

+

Dashboard to track requests and global concurrency locks for talking to github.com.

-To see this dashboard, visit `/-/debug/grafana/d/github-proxy/github-proxy` on your Sourcegraph instance. +To see this dashboard, visit `/-/debug/grafana/d/github/github` on your Sourcegraph instance. -### GitHub Proxy: GitHub API monitoring +### GitHub: GitHub API monitoring -#### github-proxy: github_proxy_waiting_requests +#### github: src_githubcom_concurrency_lock_waiting_requests

Number of requests waiting on the global mutex

-Refer to the [alerts reference](./alerts.md#github-proxy-github-proxy-waiting-requests) for 1 alert related to this panel. +Refer to the [alerts reference](./alerts.md#github-src-githubcom-concurrency-lock-waiting-requests) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100000` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/github/github?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*
Technical details -Query: `max(github_proxy_waiting_requests)` +Query: `max(src_githubcom_concurrency_lock_waiting_requests)`

-### GitHub Proxy: Container monitoring (not available on server) +#### github: src_githubcom_concurrency_lock_failed_lock_requests -#### github-proxy: container_missing +

Number of lock failures

-

Container missing

+Refer to the [alerts reference](./alerts.md#github-src-githubcom-concurrency-lock-failed-lock-requests) for 1 alert related to this panel. -This value is the number of times a container has not been seen for more than one minute. If you observe this -value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. +To see this panel, visit `/-/debug/grafana/d/github/github?viewPanel=100010` on your Sourcegraph instance. -- **Kubernetes:** - - Determine if the pod was OOM killed using `kubectl describe pod github-proxy` (look for `OOMKilled: true`) and, if so, consider increasing the memory limit in the relevant `Deployment.yaml`. - - Check the logs before the container restarted to see if there are `panic:` messages or similar using `kubectl logs -p github-proxy`. -- **Docker Compose:** - - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' github-proxy` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the github-proxy container in `docker-compose.yml`. - - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs github-proxy` (note this will include logs from the previous and currently running container). +*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).* + +
+Technical details + +Query: `sum(rate(src_githubcom_concurrency_lock_failed_lock_requests[5m]))` + +
+ +
+ +#### github: src_githubcom_concurrency_lock_failed_unlock_requests + +

Number of unlock failures

+ +Refer to the [alerts reference](./alerts.md#github-src-githubcom-concurrency-lock-failed-unlock-requests) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github/github?viewPanel=100011` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).* + +
+Technical details + +Query: `sum(rate(src_githubcom_concurrency_lock_failed_unlock_requests[5m]))` + +
+ +
+ +#### github: src_githubcom_concurrency_lock_requests + +

Number of locks taken global mutex

+ +A high number of locks indicates heavy usage of the GitHub API. This might not be a problem, but you should check if request counts are expected. This panel has no related alerts. -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100100` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/github/github?viewPanel=100020` on your Sourcegraph instance. -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* +*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*
Technical details -Query: `count by(name) ((time() - container_last_seen{name=~"^github-proxy.*"}) > 60)` +Query: `sum(rate(src_githubcom_concurrency_lock_requests[5m]))`

-#### github-proxy: container_cpu_usage +#### github: src_githubcom_concurrency_lock_acquire_duration_seconds_latency_p75 -

Container cpu usage total (1m average) across all cores by instance

+

75 percentile latency of src_githubcom_concurrency_lock_acquire_duration_seconds

-Refer to the [alerts reference](./alerts.md#github-proxy-container-cpu-usage) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100101` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}` - -
- -
- -#### github-proxy: container_memory_usage - -

Container memory usage by instance

- -Refer to the [alerts reference](./alerts.md#github-proxy-container-memory-usage) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100102` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}` - -
- -
- -#### github-proxy: fs_io_operations - -

Filesystem reads and writes rate by instance over 1h

- -This value indicates the number of filesystem read and write operations by containers of this service. -When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +99 percentile latency of acquiring the global GitHub concurrency lock. This panel has no related alerts. -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100103` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/github/github?viewPanel=100021` on your Sourcegraph instance. -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* +*Managed by the [Sourcegraph Source team](https://handbook.sourcegraph.com/departments/engineering/teams/source).*
Technical details -Query: `sum by(name) (rate(container_fs_reads_total{name=~"^github-proxy.*"}[1h]) + rate(container_fs_writes_total{name=~"^github-proxy.*"}[1h]))` - -
- -
- -### GitHub Proxy: Provisioning indicators (not available on server) - -#### github-proxy: provisioning_container_cpu_usage_long_term - -

Container cpu usage total (90th percentile over 1d) across all cores by instance

- -Refer to the [alerts reference](./alerts.md#github-proxy-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100200` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}[1d])` - -
- -
- -#### github-proxy: provisioning_container_memory_usage_long_term - -

Container memory usage (1d maximum) by instance

- -Refer to the [alerts reference](./alerts.md#github-proxy-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100201` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}[1d])` - -
- -
- -#### github-proxy: provisioning_container_cpu_usage_short_term - -

Container cpu usage total (5m maximum) across all cores by instance

- -Refer to the [alerts reference](./alerts.md#github-proxy-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100210` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}[5m])` - -
- -
- -#### github-proxy: provisioning_container_memory_usage_short_term - -

Container memory usage (5m maximum) by instance

- -Refer to the [alerts reference](./alerts.md#github-proxy-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100211` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}[5m])` - -
- -
- -#### github-proxy: container_oomkill_events_total - -

Container OOMKILL events total by instance

- -This value indicates the total number of times the container main process or child processes were terminated by OOM killer. -When it occurs frequently, it is an indicator of underprovisioning. - -Refer to the [alerts reference](./alerts.md#github-proxy-container-oomkill-events-total) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100212` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `max by (name) (container_oom_events_total{name=~"^github-proxy.*"})` - -
- -
- -### GitHub Proxy: Golang runtime monitoring - -#### github-proxy: go_goroutines - -

Maximum active goroutines

- -A high value here indicates a possible goroutine leak. - -Refer to the [alerts reference](./alerts.md#github-proxy-go-goroutines) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100300` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `max by(instance) (go_goroutines{job=~".*github-proxy"})` - -
- -
- -#### github-proxy: go_gc_duration_seconds - -

Maximum go garbage collection duration

- -Refer to the [alerts reference](./alerts.md#github-proxy-go-gc-duration-seconds) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100301` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `max by(instance) (go_gc_duration_seconds{job=~".*github-proxy"})` - -
- -
- -### GitHub Proxy: Kubernetes monitoring (only available on Kubernetes) - -#### github-proxy: pods_available_percentage - -

Percentage pods available

- -Refer to the [alerts reference](./alerts.md#github-proxy-pods-available-percentage) for 1 alert related to this panel. - -To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100400` on your Sourcegraph instance. - -*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).* - -
-Technical details - -Query: `sum by(app) (up{app=~".*github-proxy"}) / count by (app) (up{app=~".*github-proxy"}) * 100` +Query: `histogram_quantile(0.75, sum(rate(src_githubcom_concurrency_lock_acquire_duration_seconds_bucket[5m])) by (le))`
@@ -23908,7 +23734,7 @@ To see this panel, visit `/-/debug/grafana/d/containers/containers?viewPanel=100
Technical details -Query: `cadvisor_container_memory_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|github-proxy|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}` +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}`
@@ -23929,7 +23755,7 @@ To see this panel, visit `/-/debug/grafana/d/containers/containers?viewPanel=100
Technical details -Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|github-proxy|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}` +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}`
@@ -23952,7 +23778,7 @@ To see this panel, visit `/-/debug/grafana/d/containers/containers?viewPanel=100
Technical details -Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|github-proxy|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}[5m]) >= 80` +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}[5m]) >= 80`
@@ -23973,7 +23799,7 @@ To see this panel, visit `/-/debug/grafana/d/containers/containers?viewPanel=100
Technical details -Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|github-proxy|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}[5m]) >= 80` +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend|gitserver|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}[5m]) >= 80`
@@ -23995,7 +23821,7 @@ To see this panel, visit `/-/debug/grafana/d/containers/containers?viewPanel=100
Technical details -Query: `max by (name) (container_oom_events_total{name=~"^(frontend|sourcegraph-frontend|gitserver|github-proxy|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}) >= 1` +Query: `max by (name) (container_oom_events_total{name=~"^(frontend|sourcegraph-frontend|gitserver|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}) >= 1`
@@ -24017,7 +23843,7 @@ To see this panel, visit `/-/debug/grafana/d/containers/containers?viewPanel=100
Technical details -Query: `count by(name) ((time() - container_last_seen{name=~"^(frontend|sourcegraph-frontend|gitserver|github-proxy|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}) > 60)` +Query: `count by(name) ((time() - container_last_seen{name=~"^(frontend|sourcegraph-frontend|gitserver|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger).*"}) > 60)`
diff --git a/doc/admin/updates/kubernetes.md b/doc/admin/updates/kubernetes.md index 45df150a066..a8e5863841c 100644 --- a/doc/admin/updates/kubernetes.md +++ b/doc/admin/updates/kubernetes.md @@ -16,6 +16,10 @@ For upgrade procedures or general info about sourcegraph versioning see the link ## Unreleased +#### Notes for 5.2: + +- The GitHub proxy service has been removed and is no longer required. You can safely remove it. [#55290](https://github.com/sourcegraph/sourcegraph/issues/55290) + No applicable notes for unreleased versions. diff --git a/doc/dev/background-information/architecture/architecture.dot b/doc/dev/background-information/architecture/architecture.dot index 94cf8161c81..0d39b1efbed 100644 --- a/doc/dev/background-information/architecture/architecture.dot +++ b/doc/dev/background-information/architecture/architecture.dot @@ -133,12 +133,6 @@ digraph architecture { URL="https://github.com/sourcegraph/sourcegraph/tree/main/cmd/repo-updater" ] - github_proxy [ - label="github proxy" - fillcolor="#aaaaff" - URL="https://github.com/sourcegraph/sourcegraph/tree/main/cmd/github-proxy" - ] - syntect_server [ label="syntect\nserver" fillcolor="#cc0085" @@ -198,7 +192,6 @@ digraph architecture { web_app -> frontend[ltail=cluster_clients, fillcolor="#fff0d0"] gitserver -> {bitbucket_server} [lhead=cluster_codehosts, fillcolor="#cd5c5c"] repo_updater -> {bitbucket_server} [lhead=cluster_codehosts, fillcolor="#05a167"] - github_proxy -> github_dot_com [fillcolor="#aaaaff"] /* To databases */ frontend -> {postgres, codeintel_db, codeinsights_db} [fillcolor="#7e78dc"] @@ -217,7 +210,7 @@ digraph architecture { /* Unconstrained internal routes */ codeintel_worker -> {blob_store} [fillcolor="#eac1c1", constraint=false] - gitserver -> {github_proxy} [fillcolor="#cd5c5c", constraint=false] - repo_updater -> {github_proxy} [fillcolor="#05a167"] + gitserver -> {github_dot_com} [fillcolor="#cd5c5c", constraint=false] + repo_updater -> {github_dot_com} [fillcolor="#05a167"] repo_updater -> {postgres} [fillcolor="#05a167", constraint=false] } diff --git a/doc/dev/background-information/architecture/architecture.svg b/doc/dev/background-information/architecture/architecture.svg index 7c1469fddb8..9de5ee8a48f 100644 --- a/doc/dev/background-information/architecture/architecture.svg +++ b/doc/dev/background-information/architecture/architecture.svg @@ -405,21 +405,6 @@ - - -github_proxy - - -github proxy - - - - - -gitserver->github_proxy - - - bitbucket_server @@ -433,12 +418,6 @@ - - -repo_updater->github_proxy - - - repo_updater->postgres @@ -457,12 +436,6 @@ github.com - - -github_proxy->github_dot_com - - - executor diff --git a/doc/dev/background-information/sg/index.md b/doc/dev/background-information/sg/index.md index f2cf062b8ff..ec203b047e7 100644 --- a/doc/dev/background-information/sg/index.md +++ b/doc/dev/background-information/sg/index.md @@ -164,7 +164,6 @@ commandsets: - searcher - symbols - caddy - - github-proxy - zoekt-indexserver-0 - zoekt-indexserver-1 - zoekt-webserver-0 diff --git a/doc/dev/background-information/sg/reference.md b/doc/dev/background-information/sg/reference.md index 85f20835c2e..d1b1fb36f2a 100644 --- a/doc/dev/background-information/sg/reference.md +++ b/doc/dev/background-information/sg/reference.md @@ -112,7 +112,6 @@ Available commands in `sg.config.yaml`: * executor-kubernetes-template * executor-template * frontend: Frontend -* github-proxy * gitserver * gitserver-0 * gitserver-1 diff --git a/go.mod b/go.mod index f0f8743028c..8bbd758fe96 100644 --- a/go.mod +++ b/go.mod @@ -132,7 +132,6 @@ require ( github.com/google/uuid v1.3.0 github.com/gorilla/context v1.1.1 github.com/gorilla/csrf v1.7.1 - github.com/gorilla/handlers v1.5.1 github.com/gorilla/mux v1.8.0 github.com/gorilla/schema v1.2.0 github.com/gorilla/securecookie v1.1.1 @@ -270,6 +269,7 @@ require ( github.com/aws/jsii-runtime-go v1.84.0 github.com/edsrzf/mmap-go v1.1.0 github.com/go-redsync/redsync/v4 v4.8.1 + github.com/gorilla/handlers v1.5.1 github.com/hashicorp/cronexpr v1.1.1 github.com/hashicorp/go-tfe v1.32.1 github.com/hashicorp/terraform-cdk-go/cdktf v0.17.3 diff --git a/internal/batches/sources/github_test.go b/internal/batches/sources/github_test.go index c6dcbd8124b..05350dfa8c9 100644 --- a/internal/batches/sources/github_test.go +++ b/internal/batches/sources/github_test.go @@ -116,6 +116,7 @@ func TestGithubSource_CreateChangeset(t *testing.T) { } func TestGithubSource_CreateChangeset_CreationLimit(t *testing.T) { + github.SetupForTest(t) cli := new(mockDoer) // Version lookup versionMatchedBy := func(req *http.Request) bool { @@ -888,6 +889,7 @@ func setup(t *testing.T, ctx context.Context, tName string) (src *GitHubSource, // The GithubSource uses the github.Client under the hood, which uses rcache, a // caching layer that uses Redis. We need to clear the cache before we run the tests rcache.SetupForTest(t) + github.SetupForTest(t) cf, save := newClientFactory(t, tName) diff --git a/internal/batches/sources/main_test.go b/internal/batches/sources/main_test.go index 1e8f2a19b10..7ef912d7181 100644 --- a/internal/batches/sources/main_test.go +++ b/internal/batches/sources/main_test.go @@ -39,7 +39,7 @@ func TestMain(m *testing.M) { func newClientFactory(t testing.TB, name string) (*httpcli.Factory, func(testing.TB)) { cassete := filepath.Join("testdata", "sources", strings.ReplaceAll(name, " ", "-")) rec := newRecorder(t, cassete, update(name)) - mw := httpcli.NewMiddleware(httpcli.GitHubProxyRedirectMiddleware, gitserverRedirectMiddleware) + mw := httpcli.NewMiddleware(gitserverRedirectMiddleware) return httpcli.NewFactory(mw, httptestutil.NewRecorderOpt(rec)), func(t testing.TB) { save(t, rec) } } diff --git a/internal/extsvc/azuredevops/client.go b/internal/extsvc/azuredevops/client.go index 5efa309d93a..2a82e0134e6 100644 --- a/internal/extsvc/azuredevops/client.go +++ b/internal/extsvc/azuredevops/client.go @@ -134,7 +134,9 @@ func (c *client) do(ctx context.Context, req *http.Request, urlOverride string, } logger := log.Scoped("azuredevops.Client", "azuredevops Client logger") - resp, err := oauthutil.DoRequest(ctx, logger, c.httpClient, req, c.auth) + resp, err := oauthutil.DoRequest(ctx, logger, c.httpClient, req, c.auth, func(r *http.Request) (*http.Response, error) { + return c.httpClient.Do(r) + }) if err != nil { return "", err } @@ -149,7 +151,9 @@ func (c *client) do(ctx context.Context, req *http.Request, urlOverride string, _ = c.externalRateLimiter.WaitForRateLimit(ctx, 1) req.Body = io.NopCloser(bytes.NewReader(reqBody)) - resp, err = oauthutil.DoRequest(ctx, logger, c.httpClient, req, c.auth) + resp, err = oauthutil.DoRequest(ctx, logger, c.httpClient, req, c.auth, func(r *http.Request) (*http.Response, error) { + return c.httpClient.Do(r) + }) numRetries++ } diff --git a/internal/extsvc/bitbucketcloud/client.go b/internal/extsvc/bitbucketcloud/client.go index e2ade07d7d2..1118f5a77e6 100644 --- a/internal/extsvc/bitbucketcloud/client.go +++ b/internal/extsvc/bitbucketcloud/client.go @@ -241,7 +241,9 @@ func (c *client) do(ctx context.Context, req *http.Request, result any) (code in var resp *http.Response sleepTime := 10 * time.Second for { - resp, err = oauthutil.DoRequest(ctx, nil, c.httpClient, req, c.Auth) + resp, err = oauthutil.DoRequest(ctx, nil, c.httpClient, req, c.Auth, func(r *http.Request) (*http.Response, error) { + return c.httpClient.Do(r) + }) if resp != nil { code = resp.StatusCode } diff --git a/internal/extsvc/github/BUILD.bazel b/internal/extsvc/github/BUILD.bazel index d18f4bce9c9..ac25740f2dc 100644 --- a/internal/extsvc/github/BUILD.bazel +++ b/internal/extsvc/github/BUILD.bazel @@ -6,6 +6,7 @@ go_library( srcs = [ "common.go", "doc.go", + "globallock.go", "v3.go", "v4.go", ], @@ -14,7 +15,6 @@ go_library( deps = [ "//internal/api", "//internal/conf", - "//internal/conf/deploy", "//internal/encryption", "//internal/env", "//internal/extsvc", @@ -23,14 +23,19 @@ go_library( "//internal/metrics", "//internal/oauthutil", "//internal/ratelimit", + "//internal/redispool", "//internal/trace", "//lib/errors", + "@com_github_go_redsync_redsync_v4//:redsync", + "@com_github_go_redsync_redsync_v4//redis/redigo", "@com_github_google_go_github//github", "@com_github_google_go_github_v41//github", "@com_github_graphql_go_graphql//language/ast", "@com_github_graphql_go_graphql//language/parser", "@com_github_graphql_go_graphql//language/visitor", "@com_github_masterminds_semver//:semver", + "@com_github_prometheus_client_golang//prometheus", + "@com_github_prometheus_client_golang//prometheus/promauto", "@com_github_segmentio_fasthash//fnv1", "@com_github_sourcegraph_log//:log", "@io_opentelemetry_go_otel//attribute", diff --git a/internal/extsvc/github/common.go b/internal/extsvc/github/common.go index 6f05f429abe..eb5a361ee71 100644 --- a/internal/extsvc/github/common.go +++ b/internal/extsvc/github/common.go @@ -23,7 +23,6 @@ import ( "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/conf" - "github.com/sourcegraph/sourcegraph/internal/conf/deploy" "github.com/sourcegraph/sourcegraph/internal/encryption" "github.com/sourcegraph/sourcegraph/internal/env" "github.com/sourcegraph/sourcegraph/internal/extsvc" @@ -1531,35 +1530,13 @@ func ExternalRepoSpec(repo *Repository, baseURL *url.URL) api.ExternalRepoSpec { } } -func githubBaseURLDefault() string { - if deploy.IsSingleBinary() { - return "" - } - return "http://github-proxy" -} - var ( gitHubDisable, _ = strconv.ParseBool(env.Get("SRC_GITHUB_DISABLE", "false", "disables communication with GitHub instances. Used to test GitHub service degradation")) // The metric generated here will be named as "src_github_requests_total". requestCounter = metrics.NewRequestMeter("github", "Total number of requests sent to the GitHub API.") - - // Get raw proxy URL at service startup, but only get parsed URL at runtime with getGithubProxyURL - githubProxyRawURL = env.Get("GITHUB_BASE_URL", githubBaseURLDefault(), "base URL for GitHub.com API (used for github-proxy)") ) -func getGithubProxyURL() (*url.URL, bool) { - if githubProxyRawURL == "" { - return nil, false - } - parsedUrl, err := url.Parse(githubProxyRawURL) - if err != nil { - log.Scoped("extsvc.github", "github package").Fatal("Error parsing GITHUB_BASE_URL", log.Error(err)) - return nil, false - } - return parsedUrl, true -} - // APIRoot returns the root URL of the API using the base URL of the GitHub instance. func APIRoot(baseURL *url.URL) (apiURL *url.URL, githubDotCom bool) { if hostname := strings.ToLower(baseURL.Hostname()); hostname == "github.com" || hostname == "www.github.com" { @@ -1605,7 +1582,15 @@ func doRequest(ctx context.Context, logger log.Logger, apiURL *url.URL, auther a }() req = req.WithContext(ctx) - resp, err = oauthutil.DoRequest(ctx, logger, httpClient, req, auther) + resp, err = oauthutil.DoRequest(ctx, logger, httpClient, req, auther, func(r *http.Request) (*http.Response, error) { + // For GitHub.com, to avoid running into rate limits we're limiting concurrency + // per auth token to 1 globally. + if urlIsGitHubDotCom(r.URL) { + return restrictGitHubDotComConcurrency(logger, httpClient, r) + } + + return httpClient.Do(r) + }) if err != nil { return nil, errors.Wrap(err, "request failed") } @@ -1650,11 +1635,9 @@ func doRequest(ctx context.Context, logger log.Logger, apiURL *url.URL, auther a func canonicalizedURL(apiURL *url.URL) *url.URL { if urlIsGitHubDotCom(apiURL) { - // For GitHub.com API requests, use github-proxy (which adds our OAuth2 client ID/secret to get a much higher - // rate limit). - u, ok := getGithubProxyURL() - if ok { - return u + return &url.URL{ + Scheme: "https", + Host: "api.github.com", } } return apiURL @@ -1662,15 +1645,7 @@ func canonicalizedURL(apiURL *url.URL) *url.URL { func urlIsGitHubDotCom(apiURL *url.URL) bool { hostname := strings.ToLower(apiURL.Hostname()) - if hostname == "api.github.com" || hostname == "github.com" || hostname == "www.github.com" { - return true - } - - if u, ok := getGithubProxyURL(); ok { - return apiURL.String() == u.String() - } - - return false + return hostname == "api.github.com" || hostname == "github.com" || hostname == "www.github.com" } var ErrRepoNotFound = &RepoNotFoundError{} diff --git a/internal/extsvc/github/common_test.go b/internal/extsvc/github/common_test.go index ff3c87a6380..789ebb172e5 100644 --- a/internal/extsvc/github/common_test.go +++ b/internal/extsvc/github/common_test.go @@ -1,7 +1,6 @@ package github import ( - "fmt" "io" "net/http" "strings" @@ -39,11 +38,3 @@ func (s *mockHTTPResponseBody) Do(req *http.Request) (*http.Response, error) { Body: io.NopCloser(strings.NewReader(s.responseBody)), }, nil } - -func stringForRepoList(repos []*Repository) string { - repoStrings := []string{} - for _, repo := range repos { - repoStrings = append(repoStrings, fmt.Sprintf("%#v", repo)) - } - return "{\n" + strings.Join(repoStrings, ",\n") + "}\n" -} diff --git a/internal/extsvc/github/globallock.go b/internal/extsvc/github/globallock.go new file mode 100644 index 00000000000..a1d477c44e8 --- /dev/null +++ b/internal/extsvc/github/globallock.go @@ -0,0 +1,185 @@ +package github + +import ( + "context" + "crypto/sha256" + "fmt" + "net/http" + "strings" + "sync" + "time" + + "github.com/go-redsync/redsync/v4" + "github.com/go-redsync/redsync/v4/redis/redigo" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/sourcegraph/log" + + "github.com/sourcegraph/sourcegraph/internal/httpcli" + "github.com/sourcegraph/sourcegraph/internal/redispool" +) + +var metricWaitingRequestsGauge = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "src_githubcom_concurrency_lock_waiting_requests", + Help: "Number of requests to GitHub.com waiting on the mutex", +}) + +var metricLockRequestsGauge = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "src_githubcom_concurrency_lock_requests", + Help: "Number of requests to GitHub.com that require a the mutex", +}) + +var metricFailedLockRequestsGauge = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "src_githubcom_concurrency_lock_failed_lock_requests", + Help: "Number of requests to GitHub.com that failed acquiring a the mutex", +}) + +var metricFailedUnlockRequestsGauge = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "src_githubcom_concurrency_lock_failed_unlock_requests", + Help: "Number of requests to GitHub.com that failed unlocking a the mutex", +}) + +var metricLockRequestDurationGauge = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "src_githubcom_concurrency_lock_acquire_duration_seconds", + Help: "Current number of requests to GitHub.com running for a method.", + Buckets: prometheus.ExponentialBuckets(1, 2, 10), +}) + +func restrictGitHubDotComConcurrency(logger log.Logger, doer httpcli.Doer, r *http.Request) (*http.Response, error) { + logger = logger.Scoped("githubcom-concurrency-limiter", "Limits concurrency to 1 per token against GitHub.com to prevent abuse detection") + var token string + if v := r.Header["Authorization"]; len(v) > 0 { + fields := strings.Fields(v[0]) + token = fields[len(fields)-1] + } + + lock := lockForToken(logger, token) + + metricLockRequestsGauge.Inc() + metricWaitingRequestsGauge.Inc() + start := time.Now() + didGetLock := false + if err := lock.LockContext(r.Context()); err != nil { + metricFailedLockRequestsGauge.Inc() + // Note that we do NOT fail the request here, this lock is considered best + // effort. + logger.Error("failed to get mutex for GitHub.com, concurrent requests may occur and rate limits can happen", log.Error(err)) + } else { + didGetLock = true + } + metricLockRequestDurationGauge.Observe(float64(time.Since(start) / time.Second)) + metricWaitingRequestsGauge.Dec() + + resp, err := doer.Do(r) + + // We use a background context to still successfully unlock the mutex + // in case the request has been canceled. + if didGetLock { + if _, err := lock.UnlockContext(context.Background()); err != nil { + metricFailedUnlockRequestsGauge.Inc() + logger.Error("failed to unlock mutex, GitHub.com requests may be delayed briefly", log.Error(err)) + } + } + + return resp, err +} + +type lock interface { + LockContext(context.Context) error + UnlockContext(context.Context) (bool, error) +} + +var testLock *mockLock + +// TB is a subset of testing.TB +type TB interface { + Name() string + Skip(args ...any) + Helper() + Fatalf(string, ...any) +} + +func SetupForTest(t TB) { + t.Helper() + + testLock = &mockLock{} +} + +type mockLock struct{} + +func (m *mockLock) LockContext(_ context.Context) error { + return nil +} + +func (m *mockLock) UnlockContext(_ context.Context) (bool, error) { + return false, nil +} + +func lockForToken(logger log.Logger, token string) lock { + if testLock != nil { + return testLock + } + // We hash the token so we don't store it as plain-text in redis. + hash := sha256.New() + hashedToken := "hash-failed" + if _, err := hash.Write([]byte(token)); err != nil { + logger.Error("failed to hash token", log.Error(err)) + } else { + hashedToken = string(hash.Sum(nil)) + } + + pool, ok := redispool.Store.Pool() + if !ok { + return globalLockMap.get(hashedToken) + } + + locker := redsync.New(redigo.NewPool(pool)) + return locker.NewMutex(fmt.Sprintf("github-concurrency:%s", hashedToken)) +} + +type inMemoryLock struct{ mu *sync.Mutex } + +func (l *inMemoryLock) LockContext(ctx context.Context) error { + l.mu.Lock() + return nil +} + +func (l *inMemoryLock) UnlockContext(ctx context.Context) (bool, error) { + l.mu.Unlock() + return true, nil +} + +var globalLockMap = lockMap{ + locks: make(map[string]*sync.Mutex), +} + +// lockMap is a map of strings to mutexes. It's used to serialize github.com API +// requests of each access token in order to prevent abuse rate limiting due +// to concurrency in App mode, where redis is not available. +type lockMap struct { + init sync.Once + mu sync.RWMutex + locks map[string]*sync.Mutex +} + +func (m *lockMap) get(k string) lock { + m.init.Do(func() { m.locks = make(map[string]*sync.Mutex) }) + + m.mu.RLock() + lock, ok := m.locks[k] + m.mu.RUnlock() + + if ok { + return &inMemoryLock{mu: lock} + } + + m.mu.Lock() + lock, ok = m.locks[k] + if !ok { + lock = &sync.Mutex{} + m.locks[k] = lock + } + m.mu.Unlock() + + return &inMemoryLock{mu: lock} +} diff --git a/internal/extsvc/github/v3_test.go b/internal/extsvc/github/v3_test.go index 1d24bf2a7fc..de22d1a77b3 100644 --- a/internal/extsvc/github/v3_test.go +++ b/internal/extsvc/github/v3_test.go @@ -36,6 +36,7 @@ func newTestClient(t *testing.T, cli httpcli.Doer) *V3Client { } func newTestClientWithAuthenticator(t *testing.T, auth auth.Authenticator, cli httpcli.Doer) *V3Client { + SetupForTest(t) rcache.SetupForTest(t) ratelimit.SetupForTest(t) @@ -967,6 +968,7 @@ func TestV3Client_UpdateRef(t *testing.T) { func newV3TestClient(t testing.TB, name string) (*V3Client, func()) { t.Helper() + SetupForTest(t) cf, save := httptestutil.NewGitHubRecorderFactory(t, update(name), name) uri, err := url.Parse("https://github.com") @@ -987,6 +989,7 @@ func newV3TestClient(t testing.TB, name string) (*V3Client, func()) { func newV3TestEnterpriseClient(t testing.TB, name string) (*V3Client, func()) { t.Helper() + SetupForTest(t) cf, save := httptestutil.NewGitHubRecorderFactory(t, update(name), name) uri, err := url.Parse("https://ghe.sgdev.org/api/v3") diff --git a/internal/extsvc/gitlab/client.go b/internal/extsvc/gitlab/client.go index dc875328f0a..f7080503f35 100644 --- a/internal/extsvc/gitlab/client.go +++ b/internal/extsvc/gitlab/client.go @@ -268,7 +268,9 @@ func (c *Client) doWithBaseURL(ctx context.Context, req *http.Request, result an // to cache server-side req.Header.Set("Cache-Control", "max-age=0") - resp, err = oauthutil.DoRequest(ctx, log.Scoped("gitlab client", "do request"), c.httpClient, req, c.Auth) + resp, err = oauthutil.DoRequest(ctx, log.Scoped("gitlab client", "do request"), c.httpClient, req, c.Auth, func(r *http.Request) (*http.Response, error) { + return c.httpClient.Do(r) + }) if resp != nil { c.externalRateLimiter.Update(resp.Header) } diff --git a/internal/httpcli/client.go b/internal/httpcli/client.go index cbb35f02814..e7887c6d718 100644 --- a/internal/httpcli/client.go +++ b/internal/httpcli/client.go @@ -301,18 +301,6 @@ func ContextErrorMiddleware(cli Doer) Doer { }) } -// GitHubProxyRedirectMiddleware rewrites requests to the "github-proxy" host -// to "https://api.github.com". -func GitHubProxyRedirectMiddleware(cli Doer) Doer { - return DoerFunc(func(req *http.Request) (*http.Response, error) { - if req.URL.Hostname() == "github-proxy" { - req.URL.Host = "api.github.com" - req.URL.Scheme = "https" - } - return cli.Do(req) - }) -} - // requestContextKey is used to denote keys to fields that should be logged by the logging // middleware. They should be set to the request context associated with a response. type requestContextKey int diff --git a/internal/httptestutil/recorder.go b/internal/httptestutil/recorder.go index 1eef7b00688..7692ee77333 100644 --- a/internal/httptestutil/recorder.go +++ b/internal/httptestutil/recorder.go @@ -53,8 +53,7 @@ func NewRecorderOpt(rec *recorder.Recorder) httpcli.Opt { } } -// NewGitHubRecorderFactory returns a *http.Factory that rewrites HTTP requests -// to github-proxy to github.com and records all HTTP requests in +// NewGitHubRecorderFactory returns a *http.Factory that records all HTTP requests in // "testdata/vcr/{name}" with {name} being the name that's passed in. // // If update is true, the HTTP requests are recorded, otherwise they're replayed @@ -70,9 +69,7 @@ func NewGitHubRecorderFactory(t testing.TB, update bool, name string) (*httpcli. t.Fatal(err) } - mw := httpcli.NewMiddleware(httpcli.GitHubProxyRedirectMiddleware) - - hc := httpcli.NewFactory(mw, httpcli.CachedTransportOpt, NewRecorderOpt(rec)) + hc := httpcli.NewFactory(httpcli.NewMiddleware(), httpcli.CachedTransportOpt, NewRecorderOpt(rec)) return hc, func() { if err := rec.Stop(); err != nil { diff --git a/internal/oauthutil/oauth2.go b/internal/oauthutil/oauth2.go index 3f6a2206ae5..bb4f7a3778d 100644 --- a/internal/oauthutil/oauth2.go +++ b/internal/oauthutil/oauth2.go @@ -9,6 +9,7 @@ import ( "golang.org/x/oauth2" "github.com/sourcegraph/log" + "github.com/sourcegraph/sourcegraph/internal/extsvc/auth" "github.com/sourcegraph/sourcegraph/internal/httpcli" "github.com/sourcegraph/sourcegraph/lib/errors" @@ -42,9 +43,9 @@ type TokenRefresher func(ctx context.Context, doer httpcli.Doer, oauthCtx OAuthC // If the Authenticator implements the AuthenticatorWithRefresh interface, // it will also attempt to refresh the token in case of a 401 response. // If the token is updated successfully, the same request will be retried exactly once. -func DoRequest(ctx context.Context, logger log.Logger, doer httpcli.Doer, req *http.Request, auther auth.Authenticator) (*http.Response, error) { +func DoRequest(ctx context.Context, logger log.Logger, doer httpcli.Doer, req *http.Request, auther auth.Authenticator, doRequest func(*http.Request) (*http.Response, error)) (*http.Response, error) { if auther == nil { - return doer.Do(req.WithContext(ctx)) + return doRequest(req.WithContext(ctx)) } // Try a pre-emptive token refresh in case we know it is definitely expired @@ -70,7 +71,7 @@ func DoRequest(ctx context.Context, logger log.Logger, doer httpcli.Doer, req *h } req.Body = io.NopCloser(bytes.NewBuffer(reqBody)) // Do first request - resp, err := doer.Do(req.WithContext(ctx)) + resp, err := doRequest(req.WithContext(ctx)) if err != nil { return resp, err } @@ -87,7 +88,7 @@ func DoRequest(ctx context.Context, logger log.Logger, doer httpcli.Doer, req *h } // We need to reset the body before retrying the request req.Body = io.NopCloser(bytes.NewBuffer(reqBody)) - resp, err = doer.Do(req.WithContext(ctx)) + resp, err = doRequest(req.WithContext(ctx)) } return resp, err diff --git a/internal/oauthutil/oauth2_test.go b/internal/oauthutil/oauth2_test.go index ec88d25477b..000e900a969 100644 --- a/internal/oauthutil/oauth2_test.go +++ b/internal/oauthutil/oauth2_test.go @@ -11,6 +11,7 @@ import ( "time" "github.com/sourcegraph/log/logtest" + "github.com/sourcegraph/sourcegraph/internal/extsvc/auth" "github.com/sourcegraph/sourcegraph/internal/httpcli" ) @@ -94,7 +95,9 @@ func TestDoRequest(t *testing.T) { } } - resp, err := DoRequest(ctx, logger, http.DefaultClient, req, auther) + resp, err := DoRequest(ctx, logger, http.DefaultClient, req, auther, func(r *http.Request) (*http.Response, error) { + return http.DefaultClient.Do(r) + }) if err != nil { t.Fatal(err) } diff --git a/internal/repos/github_test.go b/internal/repos/github_test.go index 4658a999cac..a8359d9797e 100644 --- a/internal/repos/github_test.go +++ b/internal/repos/github_test.go @@ -136,6 +136,7 @@ func TestPublicRepos_PaginationTerminatesGracefully(t *testing.T) { // We need to clear the cache before we run the tests rcache.SetupForTest(t) ratelimit.SetupForTest(t) + github.SetupForTest(t) fixtureName := "GITHUB-ENTERPRISE/list-public-repos" gheToken := prepareGheToken(t, fixtureName) @@ -193,6 +194,7 @@ func prepareGheToken(t *testing.T, fixtureName string) string { } func TestGithubSource_GetRepo(t *testing.T) { + github.SetupForTest(t) testCases := []struct { name string nameWithOwner string @@ -293,6 +295,7 @@ func TestGithubSource_GetRepo(t *testing.T) { } func TestGithubSource_GetRepo_Enterprise(t *testing.T) { + github.SetupForTest(t) testCases := []struct { name string nameWithOwner string @@ -408,6 +411,7 @@ func TestMakeRepo_NullCharacter(t *testing.T) { // uses rcache, a caching layer that uses Redis. // We need to clear the cache before we run the tests rcache.SetupForTest(t) + github.SetupForTest(t) r := &github.Repository{ Description: "Fun nulls \x00\x00\x00", @@ -429,6 +433,7 @@ func TestMakeRepo_NullCharacter(t *testing.T) { } func TestGithubSource_makeRepo(t *testing.T) { + github.SetupForTest(t) b, err := os.ReadFile(filepath.Join("testdata", "github-repos.json")) if err != nil { t.Fatal(err) @@ -523,6 +528,7 @@ func TestMatchOrg(t *testing.T) { } func TestGitHubSource_doRecursively(t *testing.T) { + github.SetupForTest(t) ctx := context.Background() testCases := map[string]struct { @@ -606,6 +612,7 @@ func TestGitHubSource_doRecursively(t *testing.T) { } func TestGithubSource_ListRepos(t *testing.T) { + github.SetupForTest(t) assertAllReposListed := func(want []string) typestest.ReposAssertion { return func(t testing.TB, rs types.Repos) { t.Helper() @@ -793,6 +800,7 @@ func TestGithubSource_WithAuthenticator(t *testing.T) { // uses rcache, a caching layer that uses Redis. // We need to clear the cache before we run the tests rcache.SetupForTest(t) + github.SetupForTest(t) svc := &types.ExternalService{ Kind: extsvc.KindGitHub, @@ -827,6 +835,7 @@ func TestGithubSource_excludes_disabledAndLocked(t *testing.T) { // uses rcache, a caching layer that uses Redis. // We need to clear the cache before we run the tests rcache.SetupForTest(t) + github.SetupForTest(t) svc := &types.ExternalService{ Kind: extsvc.KindGitHub, @@ -854,6 +863,7 @@ func TestGithubSource_excludes_disabledAndLocked(t *testing.T) { } func TestGithubSource_GetVersion(t *testing.T) { + github.SetupForTest(t) logger := logtest.Scoped(t) t.Run("github.com", func(t *testing.T) { // The GitHubSource uses the github.Client under the hood, which @@ -925,6 +935,7 @@ func TestGithubSource_GetVersion(t *testing.T) { } func TestRepositoryQuery_DoWithRefinedWindow(t *testing.T) { + github.SetupForTest(t) for _, tc := range []struct { name string query string @@ -992,6 +1003,7 @@ func TestRepositoryQuery_DoWithRefinedWindow(t *testing.T) { } func TestRepositoryQuery_DoSingleRequest(t *testing.T) { + github.SetupForTest(t) for _, tc := range []struct { name string query string @@ -1064,6 +1076,7 @@ func TestRepositoryQuery_DoSingleRequest(t *testing.T) { } func TestGithubSource_SearchRepositories(t *testing.T) { + github.SetupForTest(t) assertReposSearched := func(want []string) typestest.ReposAssertion { return func(t testing.TB, rs types.Repos) { t.Helper() @@ -1269,6 +1282,7 @@ func (c *mockDoer) Do(r *http.Request) (*http.Response, error) { // tests for GitHub App and non-GitHub App connections can be updated separately, // as setting up credentials for a GitHub App VCR test is significantly more effort. func TestGithubSource_ListRepos_GitHubApp(t *testing.T) { + github.SetupForTest(t) // This private key is no longer valid. If this VCR test needs to be updated, // a new GitHub App with new keys and secrets will have to be created // and deleted afterwards. diff --git a/internal/repos/sources_test_utils.go b/internal/repos/sources_test_utils.go index a9f9fe9f317..9ff00136077 100644 --- a/internal/repos/sources_test_utils.go +++ b/internal/repos/sources_test_utils.go @@ -11,6 +11,7 @@ import ( "github.com/dnaeon/go-vcr/cassette" "github.com/dnaeon/go-vcr/recorder" + "github.com/sourcegraph/sourcegraph/internal/extsvc" "github.com/sourcegraph/sourcegraph/internal/httpcli" "github.com/sourcegraph/sourcegraph/internal/httptestutil" @@ -40,7 +41,7 @@ func Update(name string) bool { func TestClientFactorySetup(t testing.TB, name string, mws ...httpcli.Middleware) (httpcli.Middleware, *recorder.Recorder) { cassete := filepath.Join("testdata", "sources", strings.ReplaceAll(name, " ", "-")) rec := NewRecorder(t, cassete, Update(name)) - mws = append(mws, httpcli.GitHubProxyRedirectMiddleware, GitserverRedirectMiddleware) + mws = append(mws, GitserverRedirectMiddleware) mw := httpcli.NewMiddleware(mws...) return mw, rec } diff --git a/lib/servicecatalog/service-catalog.yaml b/lib/servicecatalog/service-catalog.yaml index d23fff85a4e..77623edc52f 100644 --- a/lib/servicecatalog/service-catalog.yaml +++ b/lib/servicecatalog/service-catalog.yaml @@ -31,7 +31,6 @@ protected_services: consumers: - blobstore - frontend - - github-proxy - gitserver - migrator - repo-updater diff --git a/monitoring/BUILD.bazel b/monitoring/BUILD.bazel index f60a08ac35a..2ee6e7fc107 100644 --- a/monitoring/BUILD.bazel +++ b/monitoring/BUILD.bazel @@ -75,7 +75,7 @@ genrule( "outputs/grafana/embeddings.json", "outputs/grafana/executor.json", "outputs/grafana/frontend.json", - "outputs/grafana/github-proxy.json", + "outputs/grafana/github.json", "outputs/grafana/gitserver.json", "outputs/grafana/home.json", "outputs/grafana/otel-collector.json", @@ -99,7 +99,7 @@ genrule( "outputs/prometheus/embeddings_alert_rules.yml", "outputs/prometheus/executor_alert_rules.yml", "outputs/prometheus/frontend_alert_rules.yml", - "outputs/prometheus/github_proxy_alert_rules.yml", + "outputs/prometheus/github_alert_rules.yml", "outputs/prometheus/gitserver_alert_rules.yml", "outputs/prometheus/otel_collector_alert_rules.yml", "outputs/prometheus/postgres_alert_rules.yml", diff --git a/monitoring/definitions/BUILD.bazel b/monitoring/definitions/BUILD.bazel index 586e78b2324..00d4333dfc0 100644 --- a/monitoring/definitions/BUILD.bazel +++ b/monitoring/definitions/BUILD.bazel @@ -14,7 +14,7 @@ go_library( "executor.go", "frontend.go", "git_server.go", - "github_proxy.go", + "github.go", "otel_collector.go", "postgres.go", "precise_code_intel_worker.go", diff --git a/monitoring/definitions/containers.go b/monitoring/definitions/containers.go index b9764cd0242..3c4db83d515 100644 --- a/monitoring/definitions/containers.go +++ b/monitoring/definitions/containers.go @@ -23,7 +23,7 @@ func Containers() *monitoring.Dashboard { // - review what's changed in the commits // - check if the commit contains changes to the container name query in each dashboard definition // - update this container name query accordingly - containerNameQuery = shared.CadvisorContainerNameMatcher("(frontend|sourcegraph-frontend|gitserver|github-proxy|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger)") + containerNameQuery = shared.CadvisorContainerNameMatcher("(frontend|sourcegraph-frontend|gitserver|pgsql|codeintel-db|codeinsights|precise-code-intel-worker|prometheus|redis-cache|redis-store|redis-exporter|repo-updater|searcher|symbols|syntect-server|worker|zoekt-indexserver|zoekt-webserver|indexed-search|grafana|blobstore|jaeger)") ) return &monitoring.Dashboard{ diff --git a/monitoring/definitions/dashboards.go b/monitoring/definitions/dashboards.go index 6ff955aab96..bc5d0680e19 100644 --- a/monitoring/definitions/dashboards.go +++ b/monitoring/definitions/dashboards.go @@ -12,7 +12,7 @@ func Default() Dashboards { return []*monitoring.Dashboard{ Frontend(), GitServer(), - GitHubProxy(), + GitHub(), Postgres(), PreciseCodeIntelWorker(), Redis(), diff --git a/monitoring/definitions/github.go b/monitoring/definitions/github.go new file mode 100644 index 00000000000..f437bc9417e --- /dev/null +++ b/monitoring/definitions/github.go @@ -0,0 +1,80 @@ +package definitions + +import ( + "time" + + "github.com/sourcegraph/sourcegraph/monitoring/monitoring" +) + +func GitHub() *monitoring.Dashboard { + return &monitoring.Dashboard{ + Name: "github", + Title: "GitHub", + Description: "Dashboard to track requests and global concurrency locks for talking to github.com.", + Groups: []monitoring.Group{ + { + Title: "GitHub API monitoring", + Rows: []monitoring.Row{ + { + { + Name: "src_githubcom_concurrency_lock_waiting_requests", + Description: "number of requests waiting on the global mutex", + Query: `max(src_githubcom_concurrency_lock_waiting_requests)`, + Warning: monitoring.Alert().GreaterOrEqual(100).For(5 * time.Minute), + Panel: monitoring.Panel().LegendFormat("requests waiting"), + Owner: monitoring.ObservableOwnerSource, + NextSteps: ` + - **Check container logs for network connection issues and log entries from the githubcom-concurrency-limiter logger. + - **Check redis-store health. + - **Check GitHub status.`, + }, + }, + { + { + Name: "src_githubcom_concurrency_lock_failed_lock_requests", + Description: "number of lock failures", + Query: `sum(rate(src_githubcom_concurrency_lock_failed_lock_requests[5m]))`, + Warning: monitoring.Alert().GreaterOrEqual(100).For(5 * time.Minute), + Panel: monitoring.Panel().LegendFormat("failed lock requests"), + Owner: monitoring.ObservableOwnerSource, + NextSteps: ` + - **Check container logs for network connection issues and log entries from the githubcom-concurrency-limiter logger. + - **Check redis-store health.`, + }, + { + Name: "src_githubcom_concurrency_lock_failed_unlock_requests", + Description: "number of unlock failures", + Query: `sum(rate(src_githubcom_concurrency_lock_failed_unlock_requests[5m]))`, + Warning: monitoring.Alert().GreaterOrEqual(100).For(5 * time.Minute), + Panel: monitoring.Panel().LegendFormat("failed unlock requests"), + Owner: monitoring.ObservableOwnerSource, + NextSteps: ` + - **Check container logs for network connection issues and log entries from the githubcom-concurrency-limiter logger. + - **Check redis-store health.`, + }, + }, + { + { + Name: "src_githubcom_concurrency_lock_requests", + Description: "number of locks taken global mutex", + Query: `sum(rate(src_githubcom_concurrency_lock_requests[5m]))`, + NoAlert: true, + Panel: monitoring.Panel().LegendFormat("number of requests"), + Owner: monitoring.ObservableOwnerSource, + Interpretation: "A high number of locks indicates heavy usage of the GitHub API. This might not be a problem, but you should check if request counts are expected.", + }, + { + Name: "src_githubcom_concurrency_lock_acquire_duration_seconds_latency_p75", + Description: "75 percentile latency of src_githubcom_concurrency_lock_acquire_duration_seconds", + Query: `histogram_quantile(0.75, sum(rate(src_githubcom_concurrency_lock_acquire_duration_seconds_bucket[5m])) by (le))`, + NoAlert: true, + Panel: monitoring.Panel().LegendFormat("lock acquire latency").Unit(monitoring.Milliseconds), + Owner: monitoring.ObservableOwnerSource, + Interpretation: `99 percentile latency of acquiring the global GitHub concurrency lock.`, + }, + }, + }, + }, + }, + } +} diff --git a/monitoring/definitions/github_proxy.go b/monitoring/definitions/github_proxy.go deleted file mode 100644 index 0c31220fc86..00000000000 --- a/monitoring/definitions/github_proxy.go +++ /dev/null @@ -1,43 +0,0 @@ -package definitions - -import ( - "time" - - "github.com/sourcegraph/sourcegraph/monitoring/definitions/shared" - "github.com/sourcegraph/sourcegraph/monitoring/monitoring" -) - -func GitHubProxy() *monitoring.Dashboard { - const containerName = "github-proxy" - - return &monitoring.Dashboard{ - Name: "github-proxy", - Title: "GitHub Proxy", - Description: "Proxies all requests to github.com, keeping track of and managing rate limits.", - Groups: []monitoring.Group{ - { - Title: "GitHub API monitoring", - Rows: []monitoring.Row{ - { - { - Name: "github_proxy_waiting_requests", - Description: "number of requests waiting on the global mutex", - Query: `max(github_proxy_waiting_requests)`, - Warning: monitoring.Alert().GreaterOrEqual(100).For(5 * time.Minute), - Panel: monitoring.Panel().LegendFormat("requests waiting"), - Owner: monitoring.ObservableOwnerSource, - NextSteps: ` - - **Check github-proxy logs for network connection issues. - - **Check github status.`, - }, - }, - }, - }, - - shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil), - shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerDevOps, nil), - shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil), - shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil), - }, - } -} diff --git a/sg.config.yaml b/sg.config.yaml index 4887befc7f5..5043497a8cf 100644 --- a/sg.config.yaml +++ b/sg.config.yaml @@ -31,7 +31,6 @@ env: SRC_HTTP_ADDR: ":3082" - GITHUB_BASE_URL: http://127.0.0.1:3180 # I don't think we even need to set these? SEARCHER_URL: http://127.0.0.1:3181 REPO_UPDATER_URL: http://127.0.0.1:3182 @@ -56,7 +55,6 @@ env: { "Name": "symbols", "Host": "127.0.0.1:6071" }, { "Name": "repo-updater", "Host": "127.0.0.1:6074" }, { "Name": "codeintel-worker", "Host": "127.0.0.1:6088" }, - { "Name": "github-proxy", "Host": "127.0.0.1:6090" }, { "Name": "worker", "Host": "127.0.0.1:6089" }, { "Name": "worker-executors", "Host": "127.0.0.1:6996" }, { "Name": "embeddings", "Host": "127.0.0.1:6099" }, @@ -210,19 +208,6 @@ commands: SRC_REPOS_DIR: $HOME/.sourcegraph/repos_2 SRC_PROF_HTTP: 127.0.0.1:3552 - github-proxy: - cmd: .bin/github-proxy - install: | - if [ -n "$DELVE" ]; then - export GCFLAGS='all=-N -l' - fi - go build -gcflags="$GCFLAGS" -o .bin/github-proxy github.com/sourcegraph/sourcegraph/cmd/github-proxy - checkBinary: .bin/github-proxy - watch: - - lib - - internal - - cmd/github-proxy - repo-updater: cmd: | export SOURCEGRAPH_LICENSE_GENERATION_KEY=$(cat ../dev-private/enterprise/dev/test-license-generation-key.pem) @@ -959,8 +944,6 @@ bazelCommands: ROCKET_KEEP_ALIVE: "0" ROCKET_PORT: "9238" QUIET: "true" - github-proxy: - target: //cmd/github-proxy frontend: description: Enterprise frontend target: //cmd/frontend @@ -1062,7 +1045,6 @@ commandsets: - searcher - symbols - syntax-highlighter - - github-proxy commands: - web - docsite @@ -1091,7 +1073,6 @@ commandsets: - symbols - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1129,7 +1110,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1159,7 +1139,6 @@ commandsets: - searcher - symbols - syntax-highlighter - - github-proxy - codeintel-worker - codeintel-executor commands: @@ -1193,7 +1172,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1225,7 +1203,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1257,7 +1234,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1288,7 +1264,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1323,7 +1298,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1348,7 +1322,6 @@ commandsets: - gitserver-1 - searcher - symbols - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1374,7 +1347,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1402,7 +1374,6 @@ commandsets: - caddy - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0 @@ -1425,7 +1396,6 @@ commandsets: - gitserver-0 - gitserver-1 - caddy - - github-proxy monitoring: checks: @@ -1511,7 +1481,6 @@ commandsets: - symbols - docsite - syntax-highlighter - - github-proxy - zoekt-index-0 - zoekt-index-1 - zoekt-web-0