From 4933ac4e8194357d004ea3b41a2bc5bbdbc07811 Mon Sep 17 00:00:00 2001 From: Robert Lin Date: Thu, 2 Sep 2021 16:05:36 -0400 Subject: [PATCH] monitoring: dashboards docs improvements (#24563) - Render Description more prominently because that's how panels are identified in dashboards - Render link to panel and dashboards - Render full query in collapsible section for reference - Show number of alerts and indicate that no alerts are defined more prominently - Alerts now always link to the panel docs - Fix code-insights owner slug that lead to handbook 404 - Fix some `MetricDescriptionRoot` using underscore instead of space --- doc/admin/observability/alert_solutions.md | 346 +- doc/admin/observability/dashboards.md | 9514 +++++++++++++++-- monitoring/definitions/shared/codeinsights.go | 2 +- monitoring/definitions/shared/codeintel.go | 6 +- monitoring/definitions/worker.go | 6 +- monitoring/monitoring/documentation.go | 50 +- monitoring/monitoring/monitoring.go | 24 +- monitoring/monitoring/util.go | 7 + 8 files changed, 8733 insertions(+), 1222 deletions(-) diff --git a/doc/admin/observability/alert_solutions.md b/doc/admin/observability/alert_solutions.md index 5c8ae8ffc0e..d1d81c40931 100644 --- a/doc/admin/observability/alert_solutions.md +++ b/doc/admin/observability/alert_solutions.md @@ -21,6 +21,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check that most repositories are indexed** by visiting https://sourcegraph.example.com/site-admin/repositories?filter=needs-index (it should show few or no results.) - **Kubernetes:** Check CPU usage of zoekt-webserver in the indexed-search pod, consider increasing CPU limits in the `indexed-search.Deployment.yaml` if regularly hitting max CPU utilization. - **Docker Compose:** Check CPU usage on the Zoekt Web Server dashboard, consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml` if regularly hitting max CPU utilization. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-99th-percentile-search-request-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -47,6 +48,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check that most repositories are indexed** by visiting https://sourcegraph.example.com/site-admin/repositories?filter=needs-index (it should show few or no results.) - **Kubernetes:** Check CPU usage of zoekt-webserver in the indexed-search pod, consider increasing CPU limits in the `indexed-search.Deployment.yaml` if regularly hitting max CPU utilization. - **Docker Compose:** Check CPU usage on the Zoekt Web Server dashboard, consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml` if regularly hitting max CPU utilization. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-90th-percentile-search-request-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -70,6 +72,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-hard-timeout-search-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -94,6 +97,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-hard-error-search-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -117,6 +121,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-partial-timeout-search-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -140,6 +145,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - This indicates your user`s are making syntax errors or similar user errors. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-search-alert-user-suggestions). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -164,6 +170,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Confirm that the Sourcegraph frontend has enough CPU/memory using the provisioning panels. - Trace a request to see what the slowest part is: https://docs.sourcegraph.com/admin/observability/tracing +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-page-load-latency). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -188,6 +195,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Confirm that the Sourcegraph frontend has enough CPU/memory using the provisioning panels. - Trace a request to see what the slowest part is: https://docs.sourcegraph.com/admin/observability/tracing +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-blob-load-latency). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -214,6 +222,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check that most repositories are indexed** by visiting https://sourcegraph.example.com/site-admin/repositories?filter=needs-index (it should show few or no results.) - **Kubernetes:** Check CPU usage of zoekt-webserver in the indexed-search pod, consider increasing CPU limits in the `indexed-search.Deployment.yaml` if regularly hitting max CPU utilization. - **Docker Compose:** Check CPU usage on the Zoekt Web Server dashboard, consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml` if regularly hitting max CPU utilization. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-99th-percentile-search-codeintel-request-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -240,6 +249,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check that most repositories are indexed** by visiting https://sourcegraph.example.com/site-admin/repositories?filter=needs-index (it should show few or no results.) - **Kubernetes:** Check CPU usage of zoekt-webserver in the indexed-search pod, consider increasing CPU limits in the `indexed-search.Deployment.yaml` if regularly hitting max CPU utilization. - **Docker Compose:** Check CPU usage on the Zoekt Web Server dashboard, consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml` if regularly hitting max CPU utilization. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-90th-percentile-search-codeintel-request-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -263,6 +273,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-hard-timeout-search-codeintel-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -287,6 +298,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-hard-error-search-codeintel-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -310,6 +322,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-partial-timeout-search-codeintel-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -333,6 +346,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - This indicates a bug in Sourcegraph, please [open an issue](https://github.com/sourcegraph/sourcegraph/issues/new/choose). +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-search-codeintel-alert-user-suggestions). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -359,6 +373,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check that most repositories are indexed** by visiting https://sourcegraph.example.com/site-admin/repositories?filter=needs-index (it should show few or no results.) - **Kubernetes:** Check CPU usage of zoekt-webserver in the indexed-search pod, consider increasing CPU limits in the `indexed-search.Deployment.yaml` if regularly hitting max CPU utilization. - **Docker Compose:** Check CPU usage on the Zoekt Web Server dashboard, consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml` if regularly hitting max CPU utilization. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-99th-percentile-search-api-request-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -385,6 +400,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check that most repositories are indexed** by visiting https://sourcegraph.example.com/site-admin/repositories?filter=needs-index (it should show few or no results.) - **Kubernetes:** Check CPU usage of zoekt-webserver in the indexed-search pod, consider increasing CPU limits in the `indexed-search.Deployment.yaml` if regularly hitting max CPU utilization. - **Docker Compose:** Check CPU usage on the Zoekt Web Server dashboard, consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml` if regularly hitting max CPU utilization. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-90th-percentile-search-api-request-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -408,6 +424,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-hard-error-search-api-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -431,6 +448,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-partial-timeout-search-api-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -454,6 +472,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - This indicates your user`s search API requests have syntax errors or a similar user error. Check the responses the API sends back for an explanation. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-search-api-alert-user-suggestions). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -477,6 +496,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - Check the Zoekt Web Server dashboard for indications it might be unhealthy. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-internal-indexed-search-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -500,6 +520,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - Check the Searcher dashboard for indications it might be unhealthy. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-internal-unindexed-search-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -523,6 +544,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - May not be a substantial issue, check the `frontend` logs for potential causes. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -545,6 +567,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-99th-percentile-gitserver-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -567,6 +590,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-gitserver-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -590,6 +614,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - This alert is triggered via the `triggerObservabilityTestAlert` GraphQL endpoint, and will automatically resolve itself. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-observability-test-alert-warning). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -613,6 +638,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - This alert is triggered via the `triggerObservabilityTestAlert` GraphQL endpoint, and will automatically resolve itself. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-observability-test-alert-critical). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -638,6 +664,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Increase SRC_PGSQL_MAX_OPEN together with giving more memory to the database if needed - Scale up Postgres memory / cpus [See our scaling guide](https://docs.sourcegraph.com/admin/config/postgres-conf) +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-mean-blocked-seconds-per-conn-request). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -663,6 +690,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the (frontend|sourcegraph-frontend) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -687,6 +715,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of (frontend|sourcegraph-frontend) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -711,6 +740,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the (frontend|sourcegraph-frontend) service. - **Docker Compose:** Consider increasing `cpus:` of the (frontend|sourcegraph-frontend) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -735,6 +765,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the (frontend|sourcegraph-frontend) service. - **Docker Compose:** Consider increasing `memory:` of the (frontend|sourcegraph-frontend) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -759,6 +790,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the (frontend|sourcegraph-frontend) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -783,6 +815,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of (frontend|sourcegraph-frontend) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -805,6 +838,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#frontend-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -813,8 +847,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#frontend-go-goroutines). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -829,6 +861,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -851,6 +884,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -877,6 +911,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Look at the breakdown by query to determine if a specific query type is being affected - Check for high CPU usage on zoekt-webserver - Check Honeycomb for unusual activity +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-mean-successful-sentinel-duration-5m). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -904,6 +939,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Look at the breakdown by query to determine if a specific query type is being affected - Check for high CPU usage on zoekt-webserver - Check Honeycomb for unusual activity +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-mean-sentinel-stream-latency-5m). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -931,6 +967,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Look at the breakdown by query to determine if a specific query type is being affected - Check for high CPU usage on zoekt-webserver - Check Honeycomb for unusual activity +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-90th-percentile-successful-sentinel-duration-5m). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -958,6 +995,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Look at the breakdown by query to determine if a specific query type is being affected - Check for high CPU usage on zoekt-webserver - Check Honeycomb for unusual activity +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-90th-percentile-sentinel-stream-latency-5m). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -983,6 +1021,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - **Provision more disk space:** Sourcegraph will begin deleting least-used repository clones at 10% disk space remaining which may result in decreased performance, users having to wait for repositories to clone, etc. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-disk-space-remaining). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1010,6 +1049,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check if the problem may be an intermittent and temporary peak** using the "Container monitoring" section at the bottom of the Git Server dashboard. - **Single container deployments:** Consider upgrading to a [Docker Compose deployment](../install/docker-compose/migrate.md) which offers better scalability and resource isolation. - **Kubernetes and Docker Compose:** Check that you are running a similar number of git server replicas and that their CPU/memory limits are allocated according to what is shown in the [Sourcegraph resource estimator](../install/resource_estimator.md). +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#gitserver-running-git-commands). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1019,8 +1059,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#gitserver-running-git-commands). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1037,6 +1075,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **If you just added several repositories**, the warning may be expected. - **Check which repositories need cloning**, by visiting e.g. https://sourcegraph.example.com/site-admin/repositories?filter=not-cloned +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-repository-clone-queue-size). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1062,6 +1101,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Check the code host status indicator for errors:** on the Sourcegraph app homepage, when signed in as an admin click the cloud icon in the top right corner of the page. - **Check if the issue continues to happen after 30 minutes**, it may be temporary. - **Check the gitserver logs for more information.** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-repository-existence-check-queue-size). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1091,6 +1131,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Docker Compose:** - Confirm that `docker ps` shows the `frontend-internal` container is healthy. - Check `docker logs gitserver` for logs indicating request failures to `frontend` or `frontend-internal`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1116,6 +1157,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Increase SRC_PGSQL_MAX_OPEN together with giving more memory to the database if needed - Scale up Postgres memory / cpus [See our scaling guide](https://docs.sourcegraph.com/admin/config/postgres-conf) +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-mean-blocked-seconds-per-conn-request). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1141,6 +1183,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the gitserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1165,6 +1208,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of gitserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1189,6 +1233,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the gitserver service. - **Docker Compose:** Consider increasing `cpus:` of the gitserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1213,6 +1258,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the gitserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1235,6 +1281,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#gitserver-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1243,8 +1290,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#gitserver-go-goroutines). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1259,6 +1304,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1281,6 +1327,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1305,6 +1352,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - - **Check github-proxy logs for network connection issues. - **Check github status. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-github-proxy-waiting-requests). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1329,6 +1377,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the github-proxy container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1353,6 +1402,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of github-proxy container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1377,6 +1427,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the github-proxy service. - **Docker Compose:** Consider increasing `cpus:` of the github-proxy container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1401,6 +1452,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the github-proxy service. - **Docker Compose:** Consider increasing `memory:` of the github-proxy container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1425,6 +1477,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the github-proxy container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1449,6 +1502,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of github-proxy container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1471,6 +1525,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#github-proxy-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1479,8 +1534,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#github-proxy-go-goroutines). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1495,6 +1548,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1517,6 +1571,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#github-proxy-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1539,6 +1594,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#postgres-connections). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1562,6 +1618,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#postgres-transaction-durations). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1585,6 +1642,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-postgres-up). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1593,8 +1651,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-postgres-up). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1610,6 +1666,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - Drop and re-create the invalid trigger - please contact Sourcegraph to supply the trigger definition. +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-invalid-indexes). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1618,8 +1675,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-invalid-indexes). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1635,6 +1690,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - Ensure the Postgres exporter can access the Postgres database. Also, check the Postgres exporter logs for errors. +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-pg-exporter-err). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1643,8 +1699,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-pg-exporter-err). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1660,6 +1714,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** - The database migration has been in progress for 5 or more minutes - please contact Sourcegraph if this persists. +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-migration-in-progress). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1668,8 +1723,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#postgres-migration-in-progress). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1686,6 +1739,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the (pgsql|codeintel-db) service. - **Docker Compose:** Consider increasing `cpus:` of the (pgsql|codeintel-db) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#postgres-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1710,6 +1764,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the (pgsql|codeintel-db) service. - **Docker Compose:** Consider increasing `memory:` of the (pgsql|codeintel-db) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#postgres-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1734,6 +1789,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the (pgsql|codeintel-db) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#postgres-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1758,6 +1814,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of (pgsql|codeintel-db) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#postgres-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1780,6 +1837,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#postgres-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1809,6 +1867,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Docker Compose:** - Confirm that `docker ps` shows the `frontend-internal` container is healthy. - Check `docker logs precise-code-intel-worker` for logs indicating request failures to `frontend` or `frontend-internal`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1817,7 +1876,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -1834,6 +1893,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Increase SRC_PGSQL_MAX_OPEN together with giving more memory to the database if needed - Scale up Postgres memory / cpus [See our scaling guide](https://docs.sourcegraph.com/admin/config/postgres-conf) +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-mean-blocked-seconds-per-conn-request). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1859,6 +1919,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the precise-code-intel-worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1867,7 +1928,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -1883,6 +1944,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of precise-code-intel-worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1891,7 +1953,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -1907,6 +1969,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the precise-code-intel-worker service. - **Docker Compose:** Consider increasing `cpus:` of the precise-code-intel-worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1915,7 +1978,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -1931,6 +1994,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the precise-code-intel-worker service. - **Docker Compose:** Consider increasing `memory:` of the precise-code-intel-worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1939,7 +2003,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -1955,6 +2019,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the precise-code-intel-worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1963,7 +2028,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -1979,6 +2044,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of precise-code-intel-worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -1987,7 +2053,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2001,6 +2067,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#precise-code-intel-worker-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2009,9 +2076,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#precise-code-intel-worker-go-goroutines). - -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2025,6 +2090,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2033,7 +2099,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2047,6 +2113,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#precise-code-intel-worker-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2055,7 +2122,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2076,6 +2143,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Docker Compose:** - Confirm that `docker ps` shows the `frontend-internal` container is healthy. - Check `docker logs query-runner` for logs indicating request failures to `frontend` or `frontend-internal`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2100,6 +2168,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the query-runner container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2124,6 +2193,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of query-runner container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2148,6 +2218,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the query-runner service. - **Docker Compose:** Consider increasing `cpus:` of the query-runner container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2172,6 +2243,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the query-runner service. - **Docker Compose:** Consider increasing `memory:` of the query-runner container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2196,6 +2268,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the query-runner container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2220,6 +2293,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of query-runner container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2242,6 +2316,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#query-runner-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2250,8 +2325,6 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#query-runner-go-goroutines). - *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).*
@@ -2266,6 +2339,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2288,6 +2362,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#query-runner-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2315,6 +2390,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - `WORKER_JOB_ALLOWLIST` contains "codeintel-janitor" (or "all"), and - `WORKER_JOB_BLOCKLIST` does not contain "codeintel-janitor" - Ensure that such a container is not failing to start or stay active +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-worker-job-codeintel-janitor-count). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2324,7 +2400,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2343,6 +2419,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - `WORKER_JOB_ALLOWLIST` contains "codeintel-commitgraph" (or "all"), and - `WORKER_JOB_BLOCKLIST` does not contain "codeintel-commitgraph" - Ensure that such a container is not failing to start or stay active +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-worker-job-codeintel-commitgraph-count). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2352,7 +2429,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2371,6 +2448,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - `WORKER_JOB_ALLOWLIST` contains "codeintel-auto-indexing" (or "all"), and - `WORKER_JOB_BLOCKLIST` does not contain "codeintel-auto-indexing" - Ensure that such a container is not failing to start or stay active +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-worker-job-codeintel-auto-indexing-count). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2380,7 +2458,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2401,6 +2479,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Docker Compose:** - Confirm that `docker ps` shows the `frontend-internal` container is healthy. - Check `docker logs worker` for logs indicating request failures to `frontend` or `frontend-internal`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2409,7 +2488,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2426,6 +2505,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - Increase SRC_PGSQL_MAX_OPEN together with giving more memory to the database if needed - Scale up Postgres memory / cpus [See our scaling guide](https://docs.sourcegraph.com/admin/config/postgres-conf) +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-mean-blocked-seconds-per-conn-request). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2451,6 +2531,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2459,7 +2540,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2475,6 +2556,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2483,7 +2565,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2499,6 +2581,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the worker service. - **Docker Compose:** Consider increasing `cpus:` of the worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2507,7 +2590,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2523,6 +2606,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the worker service. - **Docker Compose:** Consider increasing `memory:` of the worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2531,7 +2615,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2547,6 +2631,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2555,7 +2640,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2571,6 +2656,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of worker container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2579,7 +2665,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2593,6 +2679,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#worker-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2601,9 +2688,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#worker-go-goroutines). - -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2617,6 +2702,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2625,7 +2711,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2639,6 +2725,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2647,7 +2734,7 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -2668,6 +2755,7 @@ with your code hosts connections or networking issues affecting communication wi - Check the repo-updater logs for errors about syncing. - Confirm that outbound network connections are allowed where repo-updater is deployed. - Check back in an hour to see if the issue has resolved itself. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-src-repoupdater-max-sync-backoff). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2698,6 +2786,7 @@ with your code hosts connections or networking issues affecting communication wi - Check the repo-updater logs for errors about syncing. - Confirm that outbound network connections are allowed where repo-updater is deployed. - Check back in an hour to see if the issue has resolved itself. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-src-repoupdater-syncer-sync-errors-total). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2722,6 +2811,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs for errors. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-syncer-sync-start). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2745,6 +2835,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-syncer-sync-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2768,6 +2859,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-source-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2791,6 +2883,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check network connectivity to code hosts +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-syncer-synced-repos). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2814,6 +2907,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check network connectivity to code hosts +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-sourced-repos). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2837,6 +2931,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check for unusual spikes in user added repos. Each user is only allowed to add 2000 +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-user-added-repos). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2860,6 +2955,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater`s connectivity with gitserver and gitserver logs +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-purge-failed). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2883,6 +2979,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs. This is expected to fire if there are no user added code hosts +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-sched-auto-fetch). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2906,6 +3003,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs. This is expected to fire if there are no user added code hosts +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-sched-known-repos). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2929,6 +3027,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs for indications that the queue is not being processed. The queue length should trend downwards over time as items are sent to GitServer +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-sched-update-queue-length). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2952,6 +3051,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs for errors. This is expected to fire if there are no user added code hosts +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-sched-loops). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2975,6 +3075,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs for errors +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-sched-error). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -2998,6 +3099,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Increase the API rate limit to [GitHub](https://docs.sourcegraph.com/admin/external_service/github#github-com-rate-limits), [GitLab](https://docs.sourcegraph.com/admin/external_service/gitlab#internal-rate-limits) or [Bitbucket Server](https://docs.sourcegraph.com/admin/external_service/bitbucket_server#internal-rate-limits). +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-perms-syncer-perms). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3021,6 +3123,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Increase the API rate limit to [GitHub](https://docs.sourcegraph.com/admin/external_service/github#github-com-rate-limits), [GitLab](https://docs.sourcegraph.com/admin/external_service/gitlab#internal-rate-limits) or [Bitbucket Server](https://docs.sourcegraph.com/admin/external_service/bitbucket_server#internal-rate-limits). +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-perms-syncer-stale-perms). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3045,6 +3148,7 @@ with your code hosts connections or networking issues affecting communication wi - **Enabled permissions for the first time:** Wait for few minutes and see if the number goes down. - **Otherwise:** Increase the API rate limit to [GitHub](https://docs.sourcegraph.com/admin/external_service/github#github-com-rate-limits), [GitLab](https://docs.sourcegraph.com/admin/external_service/gitlab#internal-rate-limits) or [Bitbucket Server](https://docs.sourcegraph.com/admin/external_service/bitbucket_server#internal-rate-limits). +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-perms-syncer-no-perms). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3068,6 +3172,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-perms-syncer-sync-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3092,6 +3197,7 @@ with your code hosts connections or networking issues affecting communication wi - **Enabled permissions for the first time:** Wait for few minutes and see if the number goes down. - **Otherwise:** Increase the API rate limit to [GitHub](https://docs.sourcegraph.com/admin/external_service/github#github-com-rate-limits), [GitLab](https://docs.sourcegraph.com/admin/external_service/gitlab#internal-rate-limits) or [Bitbucket Server](https://docs.sourcegraph.com/admin/external_service/bitbucket_server#internal-rate-limits). +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-perms-syncer-queue-size). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3116,6 +3222,7 @@ with your code hosts connections or networking issues affecting communication wi - Check the network connectivity the Sourcegraph and the code host. - Check if API rate limit quota is exhausted on the code host. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-perms-syncer-sync-errors). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3139,6 +3246,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check for spikes in external services, could be abuse +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-src-repoupdater-external-services-total). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3162,6 +3270,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check for spikes in external services, could be abuse +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-src-repoupdater-user-external-services-total). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3186,6 +3295,7 @@ with your code hosts connections or networking issues affecting communication wi - **Check if jobs are failing to sync:** "SELECT * FROM external_service_sync_jobs WHERE state = `errored`"; - **Increase the number of workers** using the `repoConcurrentExternalServiceSyncers` site config. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-repoupdater-queued-sync-jobs-total). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3209,6 +3319,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs. Jobs older than 1 day should have been removed. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-repoupdater-completed-sync-jobs-total). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3232,6 +3343,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check repo-updater logs. Check code host connectivity +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-repoupdater-errored-sync-jobs-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3255,6 +3367,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Try restarting the pod to get a different public IP. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-github-graphql-rate-limit-remaining). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3278,6 +3391,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Try restarting the pod to get a different public IP. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-github-rest-rate-limit-remaining). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3301,6 +3415,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Try restarting the pod to get a different public IP. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-github-search-rate-limit-remaining). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3324,6 +3439,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Try restarting the pod to get a different public IP. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-gitlab-rest-rate-limit-remaining). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3353,6 +3469,7 @@ with your code hosts connections or networking issues affecting communication wi - **Docker Compose:** - Confirm that `docker ps` shows the `frontend-internal` container is healthy. - Check `docker logs repo-updater` for logs indicating request failures to `frontend` or `frontend-internal`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3378,6 +3495,7 @@ with your code hosts connections or networking issues affecting communication wi - Increase SRC_PGSQL_MAX_OPEN together with giving more memory to the database if needed - Scale up Postgres memory / cpus [See our scaling guide](https://docs.sourcegraph.com/admin/config/postgres-conf) +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-mean-blocked-seconds-per-conn-request). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3403,6 +3521,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the repo-updater container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3427,6 +3546,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of repo-updater container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3451,6 +3571,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the repo-updater service. - **Docker Compose:** Consider increasing `cpus:` of the repo-updater container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3475,6 +3596,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the repo-updater service. - **Docker Compose:** Consider increasing `memory:` of the repo-updater container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3499,6 +3621,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the repo-updater container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3523,6 +3646,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of repo-updater container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3545,6 +3669,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#repo-updater-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3553,8 +3678,6 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#repo-updater-go-goroutines). - *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -3569,6 +3692,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3591,6 +3715,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3613,6 +3738,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-unindexed-search-request-errors). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3635,6 +3761,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-replica-traffic). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3664,6 +3791,7 @@ with your code hosts connections or networking issues affecting communication wi - **Docker Compose:** - Confirm that `docker ps` shows the `frontend-internal` container is healthy. - Check `docker logs searcher` for logs indicating request failures to `frontend` or `frontend-internal`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3688,6 +3816,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the searcher container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3712,6 +3841,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of searcher container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3736,6 +3866,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the searcher service. - **Docker Compose:** Consider increasing `cpus:` of the searcher container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3760,6 +3891,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the searcher service. - **Docker Compose:** Consider increasing `memory:` of the searcher container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3784,6 +3916,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the searcher container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3808,6 +3941,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of searcher container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3830,6 +3964,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#searcher-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3838,8 +3973,6 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#searcher-go-goroutines). - *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).*
@@ -3854,6 +3987,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3876,6 +4010,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3898,6 +4033,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-store-fetch-failures). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3906,7 +4042,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -3920,6 +4056,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-current-fetch-queue-size). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3928,7 +4065,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -3949,6 +4086,7 @@ with your code hosts connections or networking issues affecting communication wi - **Docker Compose:** - Confirm that `docker ps` shows the `frontend-internal` container is healthy. - Check `docker logs symbols` for logs indicating request failures to `frontend` or `frontend-internal`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-frontend-internal-api-error-responses). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3957,7 +4095,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -3973,6 +4111,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the symbols container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -3981,7 +4120,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -3997,6 +4136,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of symbols container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4005,7 +4145,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4021,6 +4161,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the symbols service. - **Docker Compose:** Consider increasing `cpus:` of the symbols container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4029,7 +4170,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4045,6 +4186,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the symbols service. - **Docker Compose:** Consider increasing `memory:` of the symbols container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4053,7 +4195,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4069,6 +4211,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the symbols container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4077,7 +4220,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4093,6 +4236,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of symbols container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4101,7 +4245,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4115,6 +4259,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#symbols-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4123,9 +4268,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#symbols-go-goroutines). - -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4139,6 +4282,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4147,7 +4291,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4161,6 +4305,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4169,7 +4314,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -4185,6 +4330,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the syntect-server container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#syntect-server-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4209,6 +4355,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of syntect-server container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#syntect-server-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4233,6 +4380,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the syntect-server service. - **Docker Compose:** Consider increasing `cpus:` of the syntect-server container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#syntect-server-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4257,6 +4405,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the syntect-server service. - **Docker Compose:** Consider increasing `memory:` of the syntect-server container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#syntect-server-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4281,6 +4430,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the syntect-server container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#syntect-server-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4305,6 +4455,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of syntect-server container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#syntect-server-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4327,6 +4478,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#syntect-server-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4350,6 +4502,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-average-resolve-revision-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4375,6 +4528,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the zoekt-indexserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4399,6 +4553,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of zoekt-indexserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4423,6 +4578,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the zoekt-indexserver service. - **Docker Compose:** Consider increasing `cpus:` of the zoekt-indexserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4447,6 +4603,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the zoekt-indexserver service. - **Docker Compose:** Consider increasing `memory:` of the zoekt-indexserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4471,6 +4628,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the zoekt-indexserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4495,6 +4653,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of zoekt-indexserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4517,6 +4676,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-indexserver-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4539,6 +4699,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-webserver-indexed-search-request-errors). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4563,6 +4724,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-webserver-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4587,6 +4749,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of zoekt-webserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-webserver-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4611,6 +4774,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the zoekt-webserver service. - **Docker Compose:** Consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-webserver-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4635,6 +4799,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the zoekt-webserver service. - **Docker Compose:** Consider increasing `memory:` of the zoekt-webserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-webserver-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4659,6 +4824,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the zoekt-webserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-webserver-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4683,6 +4849,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of zoekt-webserver container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#zoekt-webserver-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4708,6 +4875,7 @@ with your code hosts connections or networking issues affecting communication wi - Check the Container monitoring (not available on server) panels and try increasing resources for Prometheus if necessary. - If the rule group taking a long time to evaluate belongs to `/sg_prometheus_addons`, try reducing the complexity of any custom Prometheus rules provided. - If the rule group taking a long time to evaluate belongs to `/sg_config_prometheus`, please [open an issue](https://github.com/sourcegraph/sourcegraph/issues/new?assignees=&labels=&template=bug_report.md&title=). +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-prometheus-rule-eval-duration). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4716,8 +4884,6 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-prometheus-rule-eval-duration). - *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).*
@@ -4735,6 +4901,7 @@ with your code hosts connections or networking issues affecting communication wi - Check Prometheus logs for messages related to rule group evaluation (generally with log field `component="rule manager"`). - If the rule group failing to evaluate belongs to `/sg_prometheus_addons`, ensure any custom Prometheus configuration provided is valid. - If the rule group taking a long time to evaluate belongs to `/sg_config_prometheus`, please [open an issue](https://github.com/sourcegraph/sourcegraph/issues/new?assignees=&labels=&template=bug_report.md&title=). +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-prometheus-rule-eval-failures). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4743,8 +4910,6 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-prometheus-rule-eval-failures). - *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).*
@@ -4762,6 +4927,7 @@ with your code hosts connections or networking issues affecting communication wi - Check the Container monitoring (not available on server) panels and try increasing resources for Prometheus if necessary. - Ensure that your [`observability.alerts` configuration](https://docs.sourcegraph.com/admin/observability/alerting#setting-up-alerting) (in site configuration) is valid. - Check if the relevant alert integration service is experiencing downtime or issues. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-alertmanager-notification-latency). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4786,6 +4952,7 @@ with your code hosts connections or networking issues affecting communication wi - Ensure that your [`observability.alerts` configuration](https://docs.sourcegraph.com/admin/observability/alerting#setting-up-alerting) (in site configuration) is valid. - Check if the relevant alert integration service is experiencing downtime or issues. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-alertmanager-notification-failures). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4810,6 +4977,7 @@ with your code hosts connections or networking issues affecting communication wi - Check Prometheus logs for messages related to configuration loading. - Ensure any [custom configuration you have provided Prometheus](https://docs.sourcegraph.com/admin/observability/metrics#prometheus-configuration) is valid. +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-prometheus-config-status). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4818,8 +4986,6 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-prometheus-config-status). - *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).*
@@ -4835,6 +5001,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Ensure that your [`observability.alerts` configuration](https://docs.sourcegraph.com/admin/observability/alerting#setting-up-alerting) (in site configuration) is valid. +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-alertmanager-config-status). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4843,8 +5010,6 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#prometheus-alertmanager-config-status). - *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).*
@@ -4860,6 +5025,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check Prometheus logs for messages related to the failing operation. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-prometheus-tsdb-op-failure). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4883,6 +5049,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check Prometheus logs for messages related to target scrape failures. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-prometheus-target-sample-exceeded). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4906,6 +5073,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** - Check Prometheus logs for messages related to target scrape failures. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-prometheus-target-sample-duplicate). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4930,6 +5098,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the prometheus container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4954,6 +5123,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of prometheus container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -4978,6 +5148,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the prometheus service. - **Docker Compose:** Consider increasing `cpus:` of the prometheus container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5002,6 +5173,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the prometheus service. - **Docker Compose:** Consider increasing `memory:` of the prometheus container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5026,6 +5198,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the prometheus container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5050,6 +5223,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of prometheus container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5072,6 +5246,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#prometheus-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5096,6 +5271,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the (executor|sourcegraph-code-intel-indexers|executor-batches) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-container-cpu-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5104,7 +5280,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5120,6 +5296,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of (executor|sourcegraph-code-intel-indexers|executor-batches) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-container-memory-usage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5128,7 +5305,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5144,6 +5321,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the (executor|sourcegraph-code-intel-indexers|executor-batches) service. - **Docker Compose:** Consider increasing `cpus:` of the (executor|sourcegraph-code-intel-indexers|executor-batches) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-provisioning-container-cpu-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5152,7 +5330,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5168,6 +5346,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the (executor|sourcegraph-code-intel-indexers|executor-batches) service. - **Docker Compose:** Consider increasing `memory:` of the (executor|sourcegraph-code-intel-indexers|executor-batches) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-provisioning-container-memory-usage-long-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5176,7 +5355,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5192,6 +5371,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `cpus:` of the (executor|sourcegraph-code-intel-indexers|executor-batches) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-provisioning-container-cpu-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5200,7 +5380,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5216,6 +5396,7 @@ with your code hosts connections or networking issues affecting communication wi - **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`. - **Docker Compose:** Consider increasing `memory:` of (executor|sourcegraph-code-intel-indexers|executor-batches) container in `docker-compose.yml`. +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-provisioning-container-memory-usage-short-term). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5224,7 +5405,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5238,6 +5419,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#executor-go-goroutines). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5246,9 +5428,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -> NOTE: More help interpreting this metric is available in the [dashboards reference](./dashboards.md#executor-go-goroutines). - -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5262,6 +5442,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-go-gc-duration-seconds). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5270,7 +5451,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
@@ -5284,6 +5465,7 @@ with your code hosts connections or networking issues affecting communication wi **Possible solutions** +- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#executor-pods-available-percentage). - **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: ```json @@ -5292,7 +5474,7 @@ with your code hosts connections or networking issues affecting communication wi ] ``` -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).*
diff --git a/doc/admin/observability/dashboards.md b/doc/admin/observability/dashboards.md index d7df1605883..7bfb244db05 100644 --- a/doc/admin/observability/dashboards.md +++ b/doc/admin/observability/dashboards.md @@ -10,265 +10,580 @@ To learn more about Sourcegraph's metrics and how to view these dashboards, see

Serves all end-user browser and API requests.

+To see this dashboard, visit `/-/debug/grafana/d/frontend/frontend` on your Sourcegraph instance. + ### Frontend: Search at a glance #### frontend: 99th_percentile_search_request_duration -This panel indicates 99th percentile successful search request duration over 5m. +

99th percentile successful search request duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-search-request-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-search-request-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_graphql_field_seconds_bucket{type="Search",field="results",error="false",source="browser",request_name!="CodeIntelSearch"}[5m])))` + +
+
#### frontend: 90th_percentile_search_request_duration -This panel indicates 90th percentile successful search request duration over 5m. +

90th percentile successful search request duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-search-request-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-search-request-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100001` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.90, sum by (le)(rate(src_graphql_field_seconds_bucket{type="Search",field="results",error="false",source="browser",request_name!="CodeIntelSearch"}[5m])))` + +
+
#### frontend: hard_timeout_search_responses -This panel indicates hard timeout search responses every 5m. +

Hard timeout search responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-hard-timeout-search-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-hard-timeout-search-responses) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100010` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `(sum(increase(src_graphql_search_response{status="timeout",source="browser",request_name!="CodeIntelSearch"}[5m])) + sum(increase(src_graphql_search_response{status="alert",alert_type="timed_out",source="browser",request_name!="CodeIntelSearch"}[5m]))) / sum(increase(src_graphql_search_response{source="browser",request_name!="CodeIntelSearch"}[5m])) * 100` + +
+
#### frontend: hard_error_search_responses -This panel indicates hard error search responses every 5m. +

Hard error search responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-hard-error-search-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-hard-error-search-responses) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100011` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (status)(increase(src_graphql_search_response{status=~"error",source="browser",request_name!="CodeIntelSearch"}[5m])) / ignoring(status) group_left sum(increase(src_graphql_search_response{source="browser",request_name!="CodeIntelSearch"}[5m])) * 100` + +
+
#### frontend: partial_timeout_search_responses -This panel indicates partial timeout search responses every 5m. +

Partial timeout search responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-partial-timeout-search-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-partial-timeout-search-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100012` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (status)(increase(src_graphql_search_response{status="partial_timeout",source="browser",request_name!="CodeIntelSearch"}[5m])) / ignoring(status) group_left sum(increase(src_graphql_search_response{source="browser",request_name!="CodeIntelSearch"}[5m])) * 100` + +
+
#### frontend: search_alert_user_suggestions -This panel indicates search alert user suggestions shown every 5m. +

Search alert user suggestions shown every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-search-alert-user-suggestions). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-search-alert-user-suggestions) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100013` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (alert_type)(increase(src_graphql_search_response{status="alert",alert_type!~"timed_out|no_results__suggest_quotes",source="browser",request_name!="CodeIntelSearch"}[5m])) / ignoring(alert_type) group_left sum(increase(src_graphql_search_response{source="browser",request_name!="CodeIntelSearch"}[5m])) * 100` + +
+
#### frontend: page_load_latency -This panel indicates 90th percentile page load latency over all routes over 10m. +

90th percentile page load latency over all routes over 10m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-page-load-latency). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-page-load-latency) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100020` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `histogram_quantile(0.9, sum by(le) (rate(src_http_request_duration_seconds_bucket{route!="raw",route!="blob",route!~"graphql.*"}[10m])))` + +
+
#### frontend: blob_load_latency -This panel indicates 90th percentile blob load latency over 10m. +

90th percentile blob load latency over 10m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-blob-load-latency). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-blob-load-latency) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100021` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `histogram_quantile(0.9, sum by(le) (rate(src_http_request_duration_seconds_bucket{route="blob"}[10m])))` + +
+
### Frontend: Search-based code intelligence at a glance #### frontend: 99th_percentile_search_codeintel_request_duration -This panel indicates 99th percentile code-intel successful search request duration over 5m. +

99th percentile code-intel successful search request duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-search-codeintel-request-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-search-codeintel-request-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100100` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_graphql_field_seconds_bucket{type="Search",field="results",error="false",source="browser",request_name="CodeIntelSearch"}[5m])))` + +
+
#### frontend: 90th_percentile_search_codeintel_request_duration -This panel indicates 90th percentile code-intel successful search request duration over 5m. +

90th percentile code-intel successful search request duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-search-codeintel-request-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-search-codeintel-request-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.90, sum by (le)(rate(src_graphql_field_seconds_bucket{type="Search",field="results",error="false",source="browser",request_name="CodeIntelSearch"}[5m])))` + +
+
#### frontend: hard_timeout_search_codeintel_responses -This panel indicates hard timeout search code-intel responses every 5m. +

Hard timeout search code-intel responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-hard-timeout-search-codeintel-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-hard-timeout-search-codeintel-responses) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100110` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `(sum(increase(src_graphql_search_response{status="timeout",source="browser",request_name="CodeIntelSearch"}[5m])) + sum(increase(src_graphql_search_response{status="alert",alert_type="timed_out",source="browser",request_name="CodeIntelSearch"}[5m]))) / sum(increase(src_graphql_search_response{source="browser",request_name="CodeIntelSearch"}[5m])) * 100` + +
+
#### frontend: hard_error_search_codeintel_responses -This panel indicates hard error search code-intel responses every 5m. +

Hard error search code-intel responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-hard-error-search-codeintel-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-hard-error-search-codeintel-responses) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100111` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (status)(increase(src_graphql_search_response{status=~"error",source="browser",request_name="CodeIntelSearch"}[5m])) / ignoring(status) group_left sum(increase(src_graphql_search_response{source="browser",request_name="CodeIntelSearch"}[5m])) * 100` + +
+
#### frontend: partial_timeout_search_codeintel_responses -This panel indicates partial timeout search code-intel responses every 5m. +

Partial timeout search code-intel responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-partial-timeout-search-codeintel-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-partial-timeout-search-codeintel-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100112` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (status)(increase(src_graphql_search_response{status="partial_timeout",source="browser",request_name="CodeIntelSearch"}[5m])) / ignoring(status) group_left sum(increase(src_graphql_search_response{status="partial_timeout",source="browser",request_name="CodeIntelSearch"}[5m])) * 100` + +
+
#### frontend: search_codeintel_alert_user_suggestions -This panel indicates search code-intel alert user suggestions shown every 5m. +

Search code-intel alert user suggestions shown every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-search-codeintel-alert-user-suggestions). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-search-codeintel-alert-user-suggestions) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100113` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (alert_type)(increase(src_graphql_search_response{status="alert",alert_type!~"timed_out",source="browser",request_name="CodeIntelSearch"}[5m])) / ignoring(alert_type) group_left sum(increase(src_graphql_search_response{source="browser",request_name="CodeIntelSearch"}[5m])) * 100` + +
+
### Frontend: Search API usage at a glance #### frontend: 99th_percentile_search_api_request_duration -This panel indicates 99th percentile successful search API request duration over 5m. +

99th percentile successful search API request duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-search-api-request-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-search-api-request-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_graphql_field_seconds_bucket{type="Search",field="results",error="false",source="other"}[5m])))` + +
+
#### frontend: 90th_percentile_search_api_request_duration -This panel indicates 90th percentile successful search API request duration over 5m. +

90th percentile successful search API request duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-search-api-request-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-search-api-request-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.90, sum by (le)(rate(src_graphql_field_seconds_bucket{type="Search",field="results",error="false",source="other"}[5m])))` + +
+
#### frontend: hard_error_search_api_responses -This panel indicates hard error search API responses every 5m. +

Hard error search API responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-hard-error-search-api-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-hard-error-search-api-responses) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (status)(increase(src_graphql_search_response{status=~"error",source="other"}[5m])) / ignoring(status) group_left sum(increase(src_graphql_search_response{source="other"}[5m]))` + +
+
#### frontend: partial_timeout_search_api_responses -This panel indicates partial timeout search API responses every 5m. +

Partial timeout search API responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-partial-timeout-search-api-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-partial-timeout-search-api-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(increase(src_graphql_search_response{status="partial_timeout",source="other"}[5m])) / sum(increase(src_graphql_search_response{source="other"}[5m]))` + +
+
#### frontend: search_api_alert_user_suggestions -This panel indicates search API alert user suggestions shown every 5m. +

Search API alert user suggestions shown every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-search-api-alert-user-suggestions). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-search-api-alert-user-suggestions) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100212` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (alert_type)(increase(src_graphql_search_response{status="alert",alert_type!~"timed_out|no_results__suggest_quotes",source="other"}[5m])) / ignoring(alert_type) group_left sum(increase(src_graphql_search_response{status="alert",source="other"}[5m]))` + +
+
### Frontend: Codeintel: Precise code intelligence usage at a glance #### frontend: codeintel_resolvers_total -This panel indicates aggregate graphql operations every 5m. +

Aggregate graphql operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_resolvers_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_resolvers_99th_percentile_duration -This panel indicates 99th percentile successful aggregate graphql operation duration over 5m. +

99th percentile successful aggregate graphql operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_resolvers_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_resolvers_errors_total -This panel indicates aggregate graphql operation errors every 5m. +

Aggregate graphql operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100302` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_resolvers_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_resolvers_error_rate -This panel indicates aggregate graphql operation error rate over 5m. +

Aggregate graphql operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100303` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_resolvers_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_codeintel_resolvers_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_codeintel_resolvers_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

#### frontend: codeintel_resolvers_total -This panel indicates graphql operations every 5m. +

Graphql operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100310` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_resolvers_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_resolvers_99th_percentile_duration -This panel indicates 99th percentile successful graphql operation duration over 5m. +

99th percentile successful graphql operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100311` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_resolvers_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_resolvers_errors_total -This panel indicates graphql operation errors every 5m. +

Graphql operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100312` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_resolvers_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_resolvers_error_rate -This panel indicates graphql operation error rate over 5m. +

Graphql operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100313` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_resolvers_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum by (op)(increase(src_codeintel_resolvers_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum by (op)(increase(src_codeintel_resolvers_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -276,65 +591,169 @@ This panel indicates graphql operation error rate over 5m. #### frontend: codeintel_autoindex_enqueuer_total -This panel indicates aggregate enqueuer operations every 5m. +

Aggregate enqueuer operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_autoindex_enqueuer_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_autoindex_enqueuer_99th_percentile_duration -This panel indicates 99th percentile successful aggregate enqueuer operation duration over 5m. +

99th percentile successful aggregate enqueuer operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_autoindex_enqueuer_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_autoindex_enqueuer_errors_total -This panel indicates aggregate enqueuer operation errors every 5m. +

Aggregate enqueuer operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100402` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_autoindex_enqueuer_error_rate -This panel indicates aggregate enqueuer operation error rate over 5m. +

Aggregate enqueuer operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100403` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_codeintel_autoindex_enqueuer_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

#### frontend: codeintel_autoindex_enqueuer_total -This panel indicates enqueuer operations every 5m. +

Enqueuer operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100410` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_autoindex_enqueuer_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_autoindex_enqueuer_99th_percentile_duration -This panel indicates 99th percentile successful enqueuer operation duration over 5m. +

99th percentile successful enqueuer operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100411` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_autoindex_enqueuer_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_autoindex_enqueuer_errors_total -This panel indicates enqueuer operation errors every 5m. +

Enqueuer operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100412` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_autoindex_enqueuer_error_rate -This panel indicates enqueuer operation error rate over 5m. +

Enqueuer operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100413` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum by (op)(increase(src_codeintel_autoindex_enqueuer_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum by (op)(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -342,65 +761,169 @@ This panel indicates enqueuer operation error rate over 5m. #### frontend: codeintel_dbstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100500` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100501` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_dbstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_dbstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100502` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_dbstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100503` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_codeintel_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_codeintel_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

#### frontend: codeintel_dbstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100510` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100511` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_dbstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_dbstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100512` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_dbstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100513` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum by (op)(increase(src_codeintel_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -408,33 +931,85 @@ This panel indicates store operation error rate over 5m. #### frontend: workerutil_dbworker_store_codeintel_index_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100600` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_index_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: workerutil_dbworker_store_codeintel_index_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100601` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_workerutil_dbworker_store_codeintel_index_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: workerutil_dbworker_store_codeintel_index_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100602` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_index_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: workerutil_dbworker_store_codeintel_index_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100603` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_index_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_workerutil_dbworker_store_codeintel_index_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_workerutil_dbworker_store_codeintel_index_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -442,65 +1017,169 @@ This panel indicates store operation error rate over 5m. #### frontend: codeintel_lsifstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100700` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_lsifstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100701` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_lsifstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_lsifstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100702` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_lsifstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100703` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_codeintel_lsifstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_codeintel_lsifstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

#### frontend: codeintel_lsifstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100710` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_lsifstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100711` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_lsifstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_lsifstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100712` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_lsifstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100713` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum by (op)(increase(src_codeintel_lsifstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -508,65 +1187,169 @@ This panel indicates store operation error rate over 5m. #### frontend: codeintel_gitserver_total -This panel indicates aggregate client operations every 5m. +

Aggregate client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100800` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_gitserver_99th_percentile_duration -This panel indicates 99th percentile successful aggregate client operation duration over 5m. +

99th percentile successful aggregate client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100801` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_gitserver_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_gitserver_errors_total -This panel indicates aggregate client operation errors every 5m. +

Aggregate client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100802` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_gitserver_error_rate -This panel indicates aggregate client operation error rate over 5m. +

Aggregate client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100803` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_codeintel_gitserver_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_codeintel_gitserver_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

#### frontend: codeintel_gitserver_total -This panel indicates client operations every 5m. +

Client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100810` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_gitserver_99th_percentile_duration -This panel indicates 99th percentile successful client operation duration over 5m. +

99th percentile successful client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100811` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_gitserver_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_gitserver_errors_total -This panel indicates client operation errors every 5m. +

Client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100812` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_gitserver_error_rate -This panel indicates client operation error rate over 5m. +

Client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100813` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum by (op)(increase(src_codeintel_gitserver_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -574,65 +1357,169 @@ This panel indicates client operation error rate over 5m. #### frontend: codeintel_uploadstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100900` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_uploadstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_uploadstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100901` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_uploadstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_uploadstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100902` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_uploadstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_uploadstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100903` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_uploadstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_codeintel_uploadstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_codeintel_uploadstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

#### frontend: codeintel_uploadstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100910` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_uploadstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_uploadstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100911` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_uploadstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: codeintel_uploadstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100912` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_uploadstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: codeintel_uploadstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=100913` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_uploadstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum by (op)(increase(src_codeintel_uploadstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum by (op)(increase(src_codeintel_uploadstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -640,99 +1527,255 @@ This panel indicates store operation error rate over 5m. #### frontend: batches_dbstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101000` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum(increase(src_batches_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +
+
#### frontend: batches_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101001` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_batches_dbstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +
+
#### frontend: batches_dbstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101002` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum(increase(src_batches_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +
+
#### frontend: batches_dbstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101003` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum(increase(src_batches_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_batches_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_batches_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +
+
#### frontend: batches_dbstore_total -This panel indicates store operations every 5m. +

Store operations every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101010` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum by (op)(increase(src_batches_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +
+
#### frontend: batches_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101011` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_batches_dbstore_duration_seconds_bucket{job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +
+
#### frontend: batches_dbstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101012` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum by (op)(increase(src_batches_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +
+
#### frontend: batches_dbstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101013` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum by (op)(increase(src_batches_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum by (op)(increase(src_batches_dbstore_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum by (op)(increase(src_batches_dbstore_errors_total{job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +
+
### Frontend: Out-of-band migrations: up migration invocation (one batch processed) #### frontend: oobmigration_total -This panel indicates migration handler operations every 5m. +

Migration handler operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_oobmigration_total{op="up",job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: oobmigration_99th_percentile_duration -This panel indicates 99th percentile successful migration handler operation duration over 5m. +

99th percentile successful migration handler operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101101` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_oobmigration_duration_seconds_bucket{op="up",job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: oobmigration_errors_total -This panel indicates migration handler operation errors every 5m. +

Migration handler operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101102` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_oobmigration_errors_total{op="up",job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: oobmigration_error_rate -This panel indicates migration handler operation error rate over 5m. +

Migration handler operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101103` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_oobmigration_errors_total{op="up",job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_oobmigration_total{op="up",job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_oobmigration_errors_total{op="up",job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -740,33 +1783,85 @@ This panel indicates migration handler operation error rate over 5m. #### frontend: oobmigration_total -This panel indicates migration handler operations every 5m. +

Migration handler operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_oobmigration_total{op="down",job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: oobmigration_99th_percentile_duration -This panel indicates 99th percentile successful migration handler operation duration over 5m. +

99th percentile successful migration handler operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_oobmigration_duration_seconds_bucket{op="down",job=~"^(frontend|sourcegraph-frontend).*"}[5m])))` + +

#### frontend: oobmigration_errors_total -This panel indicates migration handler operation errors every 5m. +

Migration handler operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_oobmigration_errors_total{op="down",job=~"^(frontend|sourcegraph-frontend).*"}[5m]))` + +

#### frontend: oobmigration_error_rate -This panel indicates migration handler operation error rate over 5m. +

Migration handler operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101203` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_oobmigration_errors_total{op="down",job=~"^(frontend|sourcegraph-frontend).*"}[5m])) / (sum(increase(src_oobmigration_total{op="down",job=~"^(frontend|sourcegraph-frontend).*"}[5m])) + sum(increase(src_oobmigration_errors_total{op="down",job=~"^(frontend|sourcegraph-frontend).*"}[5m]))) * 100` + +

@@ -774,147 +1869,328 @@ This panel indicates migration handler operation error rate over 5m. #### frontend: internal_indexed_search_error_responses -This panel indicates internal indexed search error responses every 5m. +

Internal indexed search error responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-internal-indexed-search-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-internal-indexed-search-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101300` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by(code) (increase(src_zoekt_request_duration_seconds_count{code!~"2.."}[5m])) / ignoring(code) group_left sum(increase(src_zoekt_request_duration_seconds_count[5m])) * 100` + +
+
#### frontend: internal_unindexed_search_error_responses -This panel indicates internal unindexed search error responses every 5m. +

Internal unindexed search error responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-internal-unindexed-search-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-internal-unindexed-search-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101301` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by(code) (increase(searcher_service_request_total{code!~"2.."}[5m])) / ignoring(code) group_left sum(increase(searcher_service_request_total[5m])) * 100` + +
+
#### frontend: internal_api_error_responses -This panel indicates internal API error responses every 5m by route. +

Internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-internal-api-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101302` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(category) (increase(src_frontend_internal_request_duration_seconds_count{code!~"2.."}[5m])) / ignoring(code) group_left sum(increase(src_frontend_internal_request_duration_seconds_count[5m])) * 100` + +
+
#### frontend: 99th_percentile_gitserver_duration -This panel indicates 99th percentile successful gitserver query duration over 5m. +

99th percentile successful gitserver query duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-gitserver-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-99th-percentile-gitserver-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101310` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,category)(rate(src_gitserver_request_duration_seconds_bucket{job=~"(sourcegraph-)?frontend"}[5m])))` + +
+
#### frontend: gitserver_error_responses -This panel indicates gitserver error responses every 5m. +

Gitserver error responses every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-gitserver-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-gitserver-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101311` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (category)(increase(src_gitserver_request_duration_seconds_count{job=~"(sourcegraph-)?frontend",code!~"2.."}[5m])) / ignoring(code) group_left sum by (category)(increase(src_gitserver_request_duration_seconds_count{job=~"(sourcegraph-)?frontend"}[5m])) * 100` + +
+
#### frontend: observability_test_alert_warning -This panel indicates warning test alert metric. +

Warning test alert metric -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-observability-test-alert-warning). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-observability-test-alert-warning) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101320` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `max by(owner) (observability_test_metric_warning)` + +
+
#### frontend: observability_test_alert_critical -This panel indicates critical test alert metric. +

Critical test alert metric -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-observability-test-alert-critical). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-observability-test-alert-critical) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101321` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `max by(owner) (observability_test_metric_critical)` + +
+
### Frontend: Database connections #### frontend: max_open_conns -This panel indicates maximum open. +

Maximum open + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101400` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name="frontend"})` + +
+
#### frontend: open_conns -This panel indicates established. +

Established + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101401` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_open{app_name="frontend"})` + +
+
#### frontend: in_use -This panel indicates used. +

Used + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101410` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name="frontend"})` + +
+
#### frontend: idle -This panel indicates idle. +

Idle + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101411` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_idle{app_name="frontend"})` + +
+
#### frontend: mean_blocked_seconds_per_conn_request -This panel indicates mean blocked seconds per conn request. +

Mean blocked seconds per conn request -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-mean-blocked-seconds-per-conn-request). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-mean-blocked-seconds-per-conn-request) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101420` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name="frontend"}[5m])) / sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name="frontend"}[5m]))` + +
+
#### frontend: closed_max_idle -This panel indicates closed by SetMaxIdleConns. +

Closed by SetMaxIdleConns + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101430` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name="frontend"}[5m]))` + +
+
#### frontend: closed_max_lifetime -This panel indicates closed by SetConnMaxLifetime. +

Closed by SetConnMaxLifetime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101431` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_name="frontend"}[5m]))` + +
+
#### frontend: closed_max_idle_time -This panel indicates closed by SetConnMaxIdleTime. +

Closed by SetConnMaxIdleTime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101432` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle_time{app_name="frontend"}[5m]))` + +
+
### Frontend: Container monitoring (not available on server) #### frontend: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -926,350 +2202,747 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' (frontend|sourcegraph-frontend)` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the (frontend|sourcegraph-frontend) container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs (frontend|sourcegraph-frontend)` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101500` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^(frontend|sourcegraph-frontend).*"}) > 60)` + +
+
#### frontend: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101501` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend).*"}` + +
+
#### frontend: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101502` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend).*"}` + +
+
#### frontend: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101503` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^(frontend|sourcegraph-frontend).*"}[1h]) + rate(container_fs_writes_total{name=~"^(frontend|sourcegraph-frontend).*"}[1h]))` + +
+
### Frontend: Provisioning indicators (not available on server) #### frontend: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101600` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend).*"}[1d])` + +
+
#### frontend: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101601` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend).*"}[1d])` + +
+
#### frontend: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101610` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend).*"}[5m])` + +
+
#### frontend: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101611` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(frontend|sourcegraph-frontend).*"}[5m])` + +
+
### Frontend: Golang runtime monitoring #### frontend: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#frontend-go-goroutines) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101700` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*(frontend|sourcegraph-frontend)"})` + +
+
#### frontend: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-go-gc-duration-seconds). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101701` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*(frontend|sourcegraph-frontend)"})` + +
+
### Frontend: Kubernetes monitoring (only available on Kubernetes) #### frontend: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101800` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(app) (up{app=~".*(frontend|sourcegraph-frontend)"}) / count by (app) (up{app=~".*(frontend|sourcegraph-frontend)"}) * 100` + +
+
### Frontend: Sentinel queries (only on sourcegraph.com) #### frontend: mean_successful_sentinel_duration_5m -This panel indicates mean successful sentinel search duration over 5m. +

Mean successful sentinel search duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-mean-successful-sentinel-duration-5m). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-mean-successful-sentinel-duration-5m) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101900` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(rate(src_search_response_latency_seconds_sum{source=~"searchblitz.*", status="success"}[5m])) / sum(rate(src_search_response_latency_seconds_count{source=~"searchblitz.*", status="success"}[5m]))` + +
+
#### frontend: mean_sentinel_stream_latency_5m -This panel indicates mean sentinel stream latency over 5m. +

Mean sentinel stream latency over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-mean-sentinel-stream-latency-5m). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-mean-sentinel-stream-latency-5m) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101901` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(rate(src_search_streaming_latency_seconds_sum{source=~"searchblitz.*"}[5m])) / sum(rate(src_search_streaming_latency_seconds_count{source=~"searchblitz.*"}[5m]))` + +
+
#### frontend: 90th_percentile_successful_sentinel_duration_5m -This panel indicates 90th percentile successful sentinel search duration over 5m. +

90th percentile successful sentinel search duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-successful-sentinel-duration-5m). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-successful-sentinel-duration-5m) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101910` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.90, sum by (le)(label_replace(rate(src_search_response_latency_seconds_bucket{source=~"searchblitz.*", status="success"}[5m]), "source", "$1", "source", "searchblitz_(.*)")))` + +
+
#### frontend: 90th_percentile_sentinel_stream_latency_5m -This panel indicates 90th percentile sentinel stream latency over 5m. +

90th percentile sentinel stream latency over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-sentinel-stream-latency-5m). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#frontend-90th-percentile-sentinel-stream-latency-5m) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101911` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `histogram_quantile(0.90, sum by (le)(label_replace(rate(src_search_streaming_latency_seconds_bucket{source=~"searchblitz.*"}[5m]), "source", "$1", "source", "searchblitz_(.*)")))` + +
+
#### frontend: mean_successful_sentinel_duration_by_query_5m -This panel indicates mean successful sentinel search duration by query over 5m. +

Mean successful sentinel search duration by query over 5m + +

- The mean search duration for sentinel queries, broken down by query. Useful for debugging whether a slowdown is limited to a specific type of query. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101920` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(rate(src_search_response_latency_seconds_sum{source=~"searchblitz.*", status="success"}[5m])) by (source) / sum(rate(src_search_response_latency_seconds_count{source=~"searchblitz.*", status="success"}[5m])) by (source)` + +
+
#### frontend: mean_sentinel_stream_latency_by_query_5m -This panel indicates mean sentinel stream latency by query over 5m. +

Mean sentinel stream latency by query over 5m + +

- The mean streaming search latency for sentinel queries, broken down by query. Useful for debugging whether a slowdown is limited to a specific type of query. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101921` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(rate(src_search_streaming_latency_seconds_sum{source=~"searchblitz.*"}[5m])) by (source) / sum(rate(src_search_streaming_latency_seconds_count{source=~"searchblitz.*"}[5m])) by (source)` + +
+
#### frontend: unsuccessful_status_rate_5m -This panel indicates unsuccessful status rate per 5m. +

Unsuccessful status rate per 5m + +

- The rate of unsuccessful sentinel query, broken down by failure type +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/frontend/frontend?viewPanel=101930` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(rate(src_graphql_search_response{source=~"searchblitz.*", status!="success"}[5m])) by (status)` + +
+
## Git Server

Stores, manages, and operates Git repositories.

+To see this dashboard, visit `/-/debug/grafana/d/gitserver/gitserver` on your Sourcegraph instance. + #### gitserver: memory_working_set -This panel indicates memory working set. +

Memory working set + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100000` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_pod_name) (container_memory_working_set_bytes{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"})` + +
+
#### gitserver: go_routines -This panel indicates go routines. +

Go routines + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100001` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `go_goroutines{app="gitserver", instance=~"${shard:regex}"}` + +
+
#### gitserver: cpu_throttling_time -This panel indicates container CPU throttling time %. +

Container CPU throttling time % + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100010` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_pod_name) ((rate(container_cpu_cfs_throttled_periods_total{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"}[5m]) / rate(container_cpu_cfs_periods_total{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"}[5m])) * 100)` + +
+
#### gitserver: cpu_usage_seconds -This panel indicates cpu usage seconds. +

Cpu usage seconds + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100011` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_pod_name) (rate(container_cpu_usage_seconds_total{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"}[5m]))` + +
+
#### gitserver: disk_space_remaining -This panel indicates disk space remaining by instance. +

Disk space remaining by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-disk-space-remaining). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-disk-space-remaining) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100020` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `(src_gitserver_disk_space_available / src_gitserver_disk_space_total) * 100` + +
+
#### gitserver: io_reads_total -This panel indicates i/o reads total. +

I/o reads total + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100030` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_container_name) (rate(container_fs_reads_total{container_label_io_kubernetes_container_name="gitserver"}[5m]))` + +
+
#### gitserver: io_writes_total -This panel indicates i/o writes total. +

I/o writes total + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100031` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_container_name) (rate(container_fs_writes_total{container_label_io_kubernetes_container_name="gitserver"}[5m]))` + +
+
#### gitserver: io_reads -This panel indicates i/o reads. +

I/o reads + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100040` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_pod_name) (rate(container_fs_reads_total{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"}[5m]))` + +
+
#### gitserver: io_writes -This panel indicates i/o writes. +

I/o writes + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100041` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_pod_name) (rate(container_fs_writes_total{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"}[5m]))` + +
+
#### gitserver: io_read_througput -This panel indicates i/o read throughput. +

I/o read throughput + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100050` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_pod_name) (rate(container_fs_reads_bytes_total{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"}[5m]))` + +
+
#### gitserver: io_write_throughput -This panel indicates i/o write throughput. +

I/o write throughput + +

+This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100051` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (container_label_io_kubernetes_pod_name) (rate(container_fs_writes_bytes_total{container_label_io_kubernetes_container_name="gitserver", container_label_io_kubernetes_pod_name=~"${shard:regex}"}[5m]))` + +
+
#### gitserver: running_git_commands -This panel indicates git commands running on each gitserver instance. +

Git commands running on each gitserver instance + +

A high value signals load. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-running-git-commands). +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-running-git-commands) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100060` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (instance, cmd) (src_gitserver_exec_running{instance=~"${shard:regex}"})` + +
+
#### gitserver: git_commands_received -This panel indicates rate of git commands received across all instances. +

Rate of git commands received across all instances + +

per second rate per command across all instances +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100061` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (cmd) (rate(src_gitserver_exec_duration_seconds_count[5m]))` + +
+
#### gitserver: repository_clone_queue_size -This panel indicates repository clone queue size. +

Repository clone queue size -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-repository-clone-queue-size). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-repository-clone-queue-size) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100070` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum(src_gitserver_clone_queue)` + +
+
#### gitserver: repository_existence_check_queue_size -This panel indicates repository existence check queue size. +

Repository existence check queue size -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-repository-existence-check-queue-size). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-repository-existence-check-queue-size) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100071` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum(src_gitserver_lsremote_queue)` + +
+
#### gitserver: echo_command_duration_test -This panel indicates echo test command duration. +

Echo test command duration + +

A high value here likely indicates a problem, especially if consistently high. You can query for individual commands using `sum by (cmd)(src_gitserver_exec_running)` in Grafana (`/-/debug/grafana`) to see if a specific Git Server command might be spiking in frequency. @@ -1279,115 +2952,280 @@ If this value is consistently high, consider the following: - **Single container deployments:** Upgrade to a [Docker Compose deployment](../install/docker-compose/migrate.md) which offers better scalability and resource isolation. - **Kubernetes and Docker Compose:** Check that you are running a similar number of git server replicas and that their CPU/memory limits are allocated according to what is shown in the [Sourcegraph resource estimator](../install/resource_estimator.md). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100080` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_gitserver_echo_duration_seconds)` + +
+
#### gitserver: frontend_internal_api_error_responses -This panel indicates frontend-internal API error responses every 5m by route. +

Frontend-internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-frontend-internal-api-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100081` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (category)(increase(src_frontend_internal_request_duration_seconds_count{job="gitserver",code!~"2.."}[5m])) / ignoring(category) group_left sum(increase(src_frontend_internal_request_duration_seconds_count{job="gitserver"}[5m]))` + +
+
### Git Server: Gitserver cleanup jobs #### gitserver: janitor_running -This panel indicates if the janitor process is running. +

If the janitor process is running + +

1, if the janitor process is currently running +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100100` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (instance) (src_gitserver_janitor_running)` + +
+
#### gitserver: janitor_job_duration -This panel indicates 95th percentile job run duration. +

95th percentile job run duration + +

95th percentile job run duration +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100110` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `histogram_quantile(0.95, sum(rate(src_gitserver_janitor_job_duration_seconds_bucket[5m])) by (le, job_name))` + +
+
#### gitserver: repos_removed -This panel indicates repositories removed due to disk pressure. +

Repositories removed due to disk pressure + +

Repositories removed due to disk pressure +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100120` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (instance) (rate(src_gitserver_repos_removed_disk_pressure[5m]))` + +
+
### Git Server: Codeintel: Coursier invocation stats #### gitserver: codeintel_coursier_total -This panel indicates aggregate invocations operations every 5m. +

Aggregate invocations operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^gitserver.*"}[5m]))` + +

#### gitserver: codeintel_coursier_99th_percentile_duration -This panel indicates 99th percentile successful aggregate invocations operation duration over 5m. +

99th percentile successful aggregate invocations operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_coursier_duration_seconds_bucket{op!="RunCommand",job=~"^gitserver.*"}[5m])))` + +

#### gitserver: codeintel_coursier_errors_total -This panel indicates aggregate invocations operation errors every 5m. +

Aggregate invocations operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^gitserver.*"}[5m]))` + +

#### gitserver: codeintel_coursier_error_rate -This panel indicates aggregate invocations operation error rate over 5m. +

Aggregate invocations operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100203` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^gitserver.*"}[5m])) / (sum(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^gitserver.*"}[5m])) + sum(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^gitserver.*"}[5m]))) * 100` + +

#### gitserver: codeintel_coursier_total -This panel indicates invocations operations every 5m. +

Invocations operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100210` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^gitserver.*"}[5m]))` + +

#### gitserver: codeintel_coursier_99th_percentile_duration -This panel indicates 99th percentile successful invocations operation duration over 5m. +

99th percentile successful invocations operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100211` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_coursier_duration_seconds_bucket{op!="RunCommand",job=~"^gitserver.*"}[5m])))` + +

#### gitserver: codeintel_coursier_errors_total -This panel indicates invocations operation errors every 5m. +

Invocations operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100212` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^gitserver.*"}[5m]))` + +

#### gitserver: codeintel_coursier_error_rate -This panel indicates invocations operation error rate over 5m. +

Invocations operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100213` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^gitserver.*"}[5m])) / (sum by (op)(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^gitserver.*"}[5m])) + sum by (op)(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^gitserver.*"}[5m]))) * 100` + +

@@ -1395,75 +3233,179 @@ This panel indicates invocations operation error rate over 5m. #### gitserver: max_open_conns -This panel indicates maximum open. +

Maximum open + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name="gitserver"})` + +
+
#### gitserver: open_conns -This panel indicates established. +

Established + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100301` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_open{app_name="gitserver"})` + +
+
#### gitserver: in_use -This panel indicates used. +

Used + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100310` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name="gitserver"})` + +
+
#### gitserver: idle -This panel indicates idle. +

Idle + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100311` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_idle{app_name="gitserver"})` + +
+
#### gitserver: mean_blocked_seconds_per_conn_request -This panel indicates mean blocked seconds per conn request. +

Mean blocked seconds per conn request -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-mean-blocked-seconds-per-conn-request). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-mean-blocked-seconds-per-conn-request) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100320` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name="gitserver"}[5m])) / sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name="gitserver"}[5m]))` + +
+
#### gitserver: closed_max_idle -This panel indicates closed by SetMaxIdleConns. +

Closed by SetMaxIdleConns + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100330` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name="gitserver"}[5m]))` + +
+
#### gitserver: closed_max_lifetime -This panel indicates closed by SetConnMaxLifetime. +

Closed by SetConnMaxLifetime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100331` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_name="gitserver"}[5m]))` + +
+
#### gitserver: closed_max_idle_time -This panel indicates closed by SetConnMaxIdleTime. +

Closed by SetConnMaxIdleTime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100332` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle_time{app_name="gitserver"}[5m]))` + +
+
### Git Server: Container monitoring (not available on server) #### gitserver: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -1475,140 +3417,282 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' gitserver` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the gitserver container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs gitserver` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100400` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^gitserver.*"}) > 60)` + +
+
#### gitserver: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100401` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^gitserver.*"}` + +
+
#### gitserver: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100402` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^gitserver.*"}` + +
+
#### gitserver: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100403` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^gitserver.*"}[1h]) + rate(container_fs_writes_total{name=~"^gitserver.*"}[1h]))` + +
+
### Git Server: Provisioning indicators (not available on server) #### gitserver: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100500` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^gitserver.*"}[1d])` + +
+
#### gitserver: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance + +

Git Server is expected to use up all the memory it is provided. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100501` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^gitserver.*"}[1d])` + +
+
#### gitserver: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100510` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^gitserver.*"}[5m])` + +
+
#### gitserver: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance + +

Git Server is expected to use up all the memory it is provided. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100511` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^gitserver.*"}[5m])` + +
+
### Git Server: Golang runtime monitoring #### gitserver: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-go-goroutines) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100600` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*gitserver"})` + +
+
#### gitserver: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-go-gc-duration-seconds). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100601` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*gitserver"})` + +
+
### Git Server: Kubernetes monitoring (only available on Kubernetes) #### gitserver: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#gitserver-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/gitserver/gitserver?viewPanel=100700` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(app) (up{app=~".*gitserver"}) / count by (app) (up{app=~".*gitserver"}) * 100` + +
+
## GitHub Proxy

Proxies all requests to github.com, keeping track of and managing rate limits.

+To see this dashboard, visit `/-/debug/grafana/d/github-proxy/github-proxy` on your Sourcegraph instance. + ### GitHub Proxy: GitHub API monitoring #### github-proxy: github_proxy_waiting_requests -This panel indicates number of requests waiting on the global mutex. +

Number of requests waiting on the global mutex -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-github-proxy-waiting-requests). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-github-proxy-waiting-requests) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(github_proxy_waiting_requests)` + +
+
### GitHub Proxy: Container monitoring (not available on server) #### github-proxy: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -1620,306 +3704,621 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' github-proxy` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the github-proxy container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs github-proxy` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100100` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^github-proxy.*"}) > 60)` + +
+
#### github-proxy: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}` + +
+
#### github-proxy: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100102` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}` + +
+
#### github-proxy: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100103` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^github-proxy.*"}[1h]) + rate(container_fs_writes_total{name=~"^github-proxy.*"}[1h]))` + +
+
### GitHub Proxy: Provisioning indicators (not available on server) #### github-proxy: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}[1d])` + +
+
#### github-proxy: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}[1d])` + +
+
#### github-proxy: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^github-proxy.*"}[5m])` + +
+
#### github-proxy: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^github-proxy.*"}[5m])` + +
+
### GitHub Proxy: Golang runtime monitoring #### github-proxy: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-go-goroutines) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*github-proxy"})` + +
+
#### github-proxy: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-go-gc-duration-seconds). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100301` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*github-proxy"})` + +
+
### GitHub Proxy: Kubernetes monitoring (only available on Kubernetes) #### github-proxy: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#github-proxy-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#github-proxy-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/github-proxy/github-proxy?viewPanel=100400` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(app) (up{app=~".*github-proxy"}) / count by (app) (up{app=~".*github-proxy"}) * 100` + +
+
## Postgres

Postgres metrics, exported from postgres_exporter (only available on Kubernetes).

+To see this dashboard, visit `/-/debug/grafana/d/postgres/postgres` on your Sourcegraph instance. + #### postgres: connections -This panel indicates active connections. +

Active connections -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-connections). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#postgres-connections) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (job) (pg_stat_activity_count{datname!~"template.*|postgres|cloudsqladmin"})` + +
+
#### postgres: transaction_durations -This panel indicates maximum transaction durations. +

Maximum transaction durations -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-transaction-durations). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#postgres-transaction-durations) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100001` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (datname) (pg_stat_activity_max_tx_duration{datname!~"template.*|postgres|cloudsqladmin"})` + +
+
### Postgres: Database and collector status #### postgres: postgres_up -This panel indicates database availability. +

Database availability + +

A non-zero value indicates the database is online. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-postgres-up). +Refer to the [alert solutions reference](./alert_solutions.md#postgres-postgres-up) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100100` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `pg_up` + +
+
#### postgres: invalid_indexes -This panel indicates invalid indexes (unusable by the query planner). +

Invalid indexes (unusable by the query planner) + +

A non-zero value indicates the that Postgres failed to build an index. Expect degraded performance until the index is manually rebuilt. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-invalid-indexes). +Refer to the [alert solutions reference](./alert_solutions.md#postgres-invalid-indexes) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (relname)(pg_invalid_index_count)` + +
+
#### postgres: pg_exporter_err -This panel indicates errors scraping postgres exporter. +

Errors scraping postgres exporter + +

This value indicates issues retrieving metrics from postgres_exporter. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-pg-exporter-err). +Refer to the [alert solutions reference](./alert_solutions.md#postgres-pg-exporter-err) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100110` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `pg_exporter_last_scrape_error` + +
+
#### postgres: migration_in_progress -This panel indicates active schema migration. +

Active schema migration + +

A 0 value indicates that no migration is in progress. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-migration-in-progress). +Refer to the [alert solutions reference](./alert_solutions.md#postgres-migration-in-progress) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100111` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `pg_sg_migration_status` + +
+
### Postgres: Object size and bloat #### postgres: pg_table_size -This panel indicates table size. +

Table size + +

Total size of this table +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100200` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (relname)(pg_table_bloat_size)` + +
+
#### postgres: pg_table_bloat_ratio -This panel indicates table bloat ratio. +

Table bloat ratio + +

Estimated bloat ratio of this table (high bloat = high overhead) +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100201` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (relname)(pg_table_bloat_ratio) * 100` + +
+
#### postgres: pg_index_size -This panel indicates index size. +

Index size + +

Total size of this index +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100210` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (relname)(pg_index_bloat_size)` + +
+
#### postgres: pg_index_bloat_ratio -This panel indicates index bloat ratio. +

Index bloat ratio + +

Estimated bloat ratio of this index (high bloat = high overhead) +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100211` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (relname)(pg_index_bloat_ratio) * 100` + +
+
### Postgres: Provisioning indicators (not available on server) #### postgres: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^(pgsql|codeintel-db).*"}[1d])` + +
+
#### postgres: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100301` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(pgsql|codeintel-db).*"}[1d])` + +
+
#### postgres: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100310` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^(pgsql|codeintel-db).*"}[5m])` + +
+
#### postgres: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#postgres-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100311` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(pgsql|codeintel-db).*"}[5m])` + +
+
### Postgres: Kubernetes monitoring (only available on Kubernetes) #### postgres: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#postgres-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#postgres-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/postgres/postgres?viewPanel=100400` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(app) (up{app=~".*(pgsql|codeintel-db)"}) / count by (app) (up{app=~".*(pgsql|codeintel-db)"}) * 100` + +
+
## Precise Code Intel Worker

Handles conversion of uploaded precise code intelligence bundles.

+To see this dashboard, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker` on your Sourcegraph instance. + ### Precise Code Intel Worker: Codeintel: LSIF uploads #### precise-code-intel-worker: codeintel_upload_queue_size -This panel indicates unprocessed upload record queue size. +

Unprocessed upload record queue size -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100000` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max(src_codeintel_upload_total{job=~"^precise-code-intel-worker.*"})` + +

#### precise-code-intel-worker: codeintel_upload_queue_growth_rate -This panel indicates unprocessed upload record queue growth rate over 30m. +

Unprocessed upload record queue growth rate over 30m + +

This value compares the rate of enqueues against the rate of finished jobs. @@ -1927,7 +4326,18 @@ This value compares the rate of enqueues against the rate of finished jobs. - A value = than 1 indicates that process rate = enqueue rate - A value > than 1 indicates that process rate < enqueue rate -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100001` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_upload_total{job=~"^precise-code-intel-worker.*"}[30m])) / sum(increase(src_codeintel_upload_processor_total{job=~"^precise-code-intel-worker.*"}[30m]))` + +

@@ -1935,41 +4345,106 @@ This value compares the rate of enqueues against the rate of finished jobs. #### precise-code-intel-worker: codeintel_upload_handlers -This panel indicates handler active handlers. +

Handler active handlers -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(src_codeintel_upload_processor_handlers{job=~"^precise-code-intel-worker.*"})` + +

#### precise-code-intel-worker: codeintel_upload_processor_total -This panel indicates handler operations every 5m. +

Handler operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100110` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_upload_processor_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_upload_processor_99th_percentile_duration -This panel indicates 99th percentile successful handler operation duration over 5m. +

99th percentile successful handler operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100111` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_upload_processor_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_upload_processor_errors_total -This panel indicates handler operation errors every 5m. +

Handler operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100112` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_upload_processor_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_upload_processor_error_rate -This panel indicates handler operation error rate over 5m. +

Handler operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100113` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_upload_processor_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum(increase(src_codeintel_upload_processor_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum(increase(src_codeintel_upload_processor_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

@@ -1977,65 +4452,169 @@ This panel indicates handler operation error rate over 5m. #### precise-code-intel-worker: codeintel_dbstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_dbstore_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_dbstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_dbstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100203` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum(increase(src_codeintel_dbstore_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum(increase(src_codeintel_dbstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

#### precise-code-intel-worker: codeintel_dbstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100210` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100211` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_dbstore_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_dbstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100212` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_dbstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100213` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_dbstore_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

@@ -2043,65 +4622,169 @@ This panel indicates store operation error rate over 5m. #### precise-code-intel-worker: codeintel_lsifstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_lsifstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_lsifstore_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_lsifstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100302` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_lsifstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100303` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum(increase(src_codeintel_lsifstore_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum(increase(src_codeintel_lsifstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

#### precise-code-intel-worker: codeintel_lsifstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100310` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_lsifstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100311` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_lsifstore_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_lsifstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100312` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_lsifstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100313` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_lsifstore_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

@@ -2109,33 +4792,85 @@ This panel indicates store operation error rate over 5m. #### precise-code-intel-worker: workerutil_dbworker_store_codeintel_upload_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_upload_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: workerutil_dbworker_store_codeintel_upload_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_workerutil_dbworker_store_codeintel_upload_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: workerutil_dbworker_store_codeintel_upload_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100402` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_upload_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: workerutil_dbworker_store_codeintel_upload_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100403` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_upload_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum(increase(src_workerutil_dbworker_store_codeintel_upload_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum(increase(src_workerutil_dbworker_store_codeintel_upload_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

@@ -2143,65 +4878,169 @@ This panel indicates store operation error rate over 5m. #### precise-code-intel-worker: codeintel_gitserver_total -This panel indicates aggregate client operations every 5m. +

Aggregate client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100500` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_gitserver_99th_percentile_duration -This panel indicates 99th percentile successful aggregate client operation duration over 5m. +

99th percentile successful aggregate client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100501` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_gitserver_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_gitserver_errors_total -This panel indicates aggregate client operation errors every 5m. +

Aggregate client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100502` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_gitserver_error_rate -This panel indicates aggregate client operation error rate over 5m. +

Aggregate client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100503` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum(increase(src_codeintel_gitserver_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum(increase(src_codeintel_gitserver_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

#### precise-code-intel-worker: codeintel_gitserver_total -This panel indicates client operations every 5m. +

Client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100510` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_gitserver_99th_percentile_duration -This panel indicates 99th percentile successful client operation duration over 5m. +

99th percentile successful client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100511` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_gitserver_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_gitserver_errors_total -This panel indicates client operation errors every 5m. +

Client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100512` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_gitserver_error_rate -This panel indicates client operation error rate over 5m. +

Client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100513` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_gitserver_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

@@ -2209,65 +5048,169 @@ This panel indicates client operation error rate over 5m. #### precise-code-intel-worker: codeintel_uploadstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100600` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_uploadstore_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_uploadstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100601` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_uploadstore_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_uploadstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100602` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_uploadstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_uploadstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100603` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_uploadstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum(increase(src_codeintel_uploadstore_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum(increase(src_codeintel_uploadstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

#### precise-code-intel-worker: codeintel_uploadstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100610` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_uploadstore_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_uploadstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100611` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_uploadstore_duration_seconds_bucket{job=~"^precise-code-intel-worker.*"}[5m])))` + +

#### precise-code-intel-worker: codeintel_uploadstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100612` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_uploadstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))` + +

#### precise-code-intel-worker: codeintel_uploadstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100613` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_uploadstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_uploadstore_total{job=~"^precise-code-intel-worker.*"}[5m])) + sum by (op)(increase(src_codeintel_uploadstore_errors_total{job=~"^precise-code-intel-worker.*"}[5m]))) * 100` + +

@@ -2275,11 +5218,22 @@ This panel indicates store operation error rate over 5m. #### precise-code-intel-worker: frontend_internal_api_error_responses -This panel indicates frontend-internal API error responses every 5m by route. +

Frontend-internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-frontend-internal-api-error-responses). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100700` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (category)(increase(src_frontend_internal_request_duration_seconds_count{job="precise-code-intel-worker",code!~"2.."}[5m])) / ignoring(category) group_left sum(increase(src_frontend_internal_request_duration_seconds_count{job="precise-code-intel-worker"}[5m]))` + +

@@ -2287,75 +5241,179 @@ This panel indicates frontend-internal API error responses every 5m by route. #### precise-code-intel-worker: max_open_conns -This panel indicates maximum open. +

Maximum open + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100800` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name="precise-code-intel-worker"})` + +
+
#### precise-code-intel-worker: open_conns -This panel indicates established. +

Established + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100801` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_open{app_name="precise-code-intel-worker"})` + +
+
#### precise-code-intel-worker: in_use -This panel indicates used. +

Used + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100810` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name="precise-code-intel-worker"})` + +
+
#### precise-code-intel-worker: idle -This panel indicates idle. +

Idle + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100811` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_idle{app_name="precise-code-intel-worker"})` + +
+
#### precise-code-intel-worker: mean_blocked_seconds_per_conn_request -This panel indicates mean blocked seconds per conn request. +

Mean blocked seconds per conn request -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-mean-blocked-seconds-per-conn-request). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-mean-blocked-seconds-per-conn-request) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100820` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name="precise-code-intel-worker"}[5m])) / sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name="precise-code-intel-worker"}[5m]))` + +
+
#### precise-code-intel-worker: closed_max_idle -This panel indicates closed by SetMaxIdleConns. +

Closed by SetMaxIdleConns + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100830` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name="precise-code-intel-worker"}[5m]))` + +
+
#### precise-code-intel-worker: closed_max_lifetime -This panel indicates closed by SetConnMaxLifetime. +

Closed by SetConnMaxLifetime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100831` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_name="precise-code-intel-worker"}[5m]))` + +
+
#### precise-code-intel-worker: closed_max_idle_time -This panel indicates closed by SetConnMaxIdleTime. +

Closed by SetConnMaxIdleTime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100832` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle_time{app_name="precise-code-intel-worker"}[5m]))` + +
+
### Precise Code Intel Worker: Container monitoring (not available on server) #### precise-code-intel-worker: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -2367,80 +5425,170 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' precise-code-intel-worker` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the precise-code-intel-worker container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs precise-code-intel-worker` (note this will include logs from the previous and currently running container). -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100900` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^precise-code-intel-worker.*"}) > 60)` + +

#### precise-code-intel-worker: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-container-cpu-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100901` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^precise-code-intel-worker.*"}` + +

#### precise-code-intel-worker: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-container-memory-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100902` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^precise-code-intel-worker.*"}` + +

#### precise-code-intel-worker: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=100903` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^precise-code-intel-worker.*"}[1h]) + rate(container_fs_writes_total{name=~"^precise-code-intel-worker.*"}[1h]))` + +
+
### Precise Code Intel Worker: Provisioning indicators (not available on server) #### precise-code-intel-worker: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-cpu-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=101000` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^precise-code-intel-worker.*"}[1d])` + +

#### precise-code-intel-worker: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-memory-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=101001` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^precise-code-intel-worker.*"}[1d])` + +

#### precise-code-intel-worker: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-cpu-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=101010` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^precise-code-intel-worker.*"}[5m])` + +

#### precise-code-intel-worker: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-memory-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=101011` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^precise-code-intel-worker.*"}[5m])` + +

@@ -2448,23 +5596,45 @@ This panel indicates container memory usage (5m maximum) by instance. #### precise-code-intel-worker: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-go-goroutines) for 1 alert related to this panel. -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=101100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*precise-code-intel-worker"})` + +

#### precise-code-intel-worker: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-go-gc-duration-seconds). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=101101` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*precise-code-intel-worker"})` + +

@@ -2472,11 +5642,22 @@ This panel indicates maximum go garbage collection duration. #### precise-code-intel-worker: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-pods-available-percentage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/precise-code-intel-worker/precise-code-intel-worker?viewPanel=101200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by(app) (up{app=~".*precise-code-intel-worker"}) / count by (app) (up{app=~".*precise-code-intel-worker"}) * 100` + +

@@ -2484,23 +5665,38 @@ This panel indicates percentage pods available.

Periodically runs saved searches and instructs the frontend to send out notifications.

+To see this dashboard, visit `/-/debug/grafana/d/query-runner/query-runner` on your Sourcegraph instance. + ### Query Runner: Internal service requests #### query-runner: frontend_internal_api_error_responses -This panel indicates frontend-internal API error responses every 5m by route. +

Frontend-internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-frontend-internal-api-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (category)(increase(src_frontend_internal_request_duration_seconds_count{job="query-runner",code!~"2.."}[5m])) / ignoring(category) group_left sum(increase(src_frontend_internal_request_duration_seconds_count{job="query-runner"}[5m]))` + +
+
### Query Runner: Container monitoring (not available on server) #### query-runner: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -2512,162 +5708,333 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' query-runner` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the query-runner container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs query-runner` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100100` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^query-runner.*"}) > 60)` + +
+
#### query-runner: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^query-runner.*"}` + +
+
#### query-runner: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100102` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^query-runner.*"}` + +
+
#### query-runner: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100103` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^query-runner.*"}[1h]) + rate(container_fs_writes_total{name=~"^query-runner.*"}[1h]))` + +
+
### Query Runner: Provisioning indicators (not available on server) #### query-runner: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^query-runner.*"}[1d])` + +
+
#### query-runner: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^query-runner.*"}[1d])` + +
+
#### query-runner: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^query-runner.*"}[5m])` + +
+
#### query-runner: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^query-runner.*"}[5m])` + +
+
### Query Runner: Golang runtime monitoring #### query-runner: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-go-goroutines) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*query-runner"})` + +
+
#### query-runner: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-go-gc-duration-seconds). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100301` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*query-runner"})` + +
+
### Query Runner: Kubernetes monitoring (only available on Kubernetes) #### query-runner: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#query-runner-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#query-runner-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/query-runner/query-runner?viewPanel=100400` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by(app) (up{app=~".*query-runner"}) / count by (app) (up{app=~".*query-runner"}) * 100` + +
+
## Worker

Manages background processes.

+To see this dashboard, visit `/-/debug/grafana/d/worker/worker` on your Sourcegraph instance. + ### Worker: Active jobs #### worker: worker_job_count -This panel indicates number of worker instances running each job. +

Number of worker instances running each job + +

The number of worker instances running each job type. It is necessary for each job type to be managed by at least one worker instance. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100000` on your Sourcegraph instance. + + +
+Technical details + +Query: `sum by (job_name) (src_worker_jobs{job="worker"})` + +

#### worker: worker_job_codeintel-janitor_count -This panel indicates number of worker instances running the codeintel-janitor job. +

Number of worker instances running the codeintel-janitor job -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-worker-job-codeintel-janitor-count). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-worker-job-codeintel-janitor-count) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100010` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum (src_worker_jobs{job="worker", job_name="codeintel-janitor"})` + +

#### worker: worker_job_codeintel-commitgraph_count -This panel indicates number of worker instances running the codeintel-commitgraph job. +

Number of worker instances running the codeintel-commitgraph job -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-worker-job-codeintel-commitgraph-count). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-worker-job-codeintel-commitgraph-count) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100011` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum (src_worker_jobs{job="worker", job_name="codeintel-commitgraph"})` + +

#### worker: worker_job_codeintel-auto-indexing_count -This panel indicates number of worker instances running the codeintel-auto-indexing job. +

Number of worker instances running the codeintel-auto-indexing job -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-worker-job-codeintel-auto-indexing-count). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-worker-job-codeintel-auto-indexing-count) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100012` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum (src_worker_jobs{job="worker", job_name="codeintel-auto-indexing"})` + +

@@ -2675,15 +6042,30 @@ This panel indicates number of worker instances running the codeintel-auto-index #### worker: codeintel_commit_graph_queue_size -This panel indicates repository queue size. +

Repository queue size -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max(src_codeintel_commit_graph_total{job=~"^worker.*"})` + +

#### worker: codeintel_commit_graph_queue_growth_rate -This panel indicates repository queue growth rate over 30m. +

Repository queue growth rate over 30m + +

This value compares the rate of enqueues against the rate of finished jobs. @@ -2691,7 +6073,18 @@ This value compares the rate of enqueues against the rate of finished jobs. - A value = than 1 indicates that process rate = enqueue rate - A value > than 1 indicates that process rate < enqueue rate -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100101` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_commit_graph_total{job=~"^worker.*"}[30m])) / sum(increase(src_codeintel_commit_graph_processor_total{job=~"^worker.*"}[30m]))` + +

@@ -2699,33 +6092,85 @@ This value compares the rate of enqueues against the rate of finished jobs. #### worker: codeintel_commit_graph_processor_total -This panel indicates update operations every 5m. +

Update operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_commit_graph_processor_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_commit_graph_processor_99th_percentile_duration -This panel indicates 99th percentile successful update operation duration over 5m. +

99th percentile successful update operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_commit_graph_processor_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_commit_graph_processor_errors_total -This panel indicates update operation errors every 5m. +

Update operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_commit_graph_processor_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_commit_graph_processor_error_rate -This panel indicates update operation error rate over 5m. +

Update operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100203` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_commit_graph_processor_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_commit_graph_processor_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_commit_graph_processor_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -2733,15 +6178,30 @@ This panel indicates update operation error rate over 5m. #### worker: codeintel_dependency_index_queue_size -This panel indicates dependency index job queue size. +

Dependency index job queue size -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max(src_codeintel_dependency_index_total{job=~"^worker.*"})` + +

#### worker: codeintel_dependency_index_queue_growth_rate -This panel indicates dependency index job queue growth rate over 30m. +

Dependency index job queue growth rate over 30m + +

This value compares the rate of enqueues against the rate of finished jobs. @@ -2749,7 +6209,18 @@ This value compares the rate of enqueues against the rate of finished jobs. - A value = than 1 indicates that process rate = enqueue rate - A value > than 1 indicates that process rate < enqueue rate -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dependency_index_total{job=~"^worker.*"}[30m])) / sum(increase(src_codeintel_dependency_index_processor_total{job=~"^worker.*"}[30m]))` + +

@@ -2757,41 +6228,106 @@ This value compares the rate of enqueues against the rate of finished jobs. #### worker: codeintel_dependency_index_handlers -This panel indicates handler active handlers. +

Handler active handlers -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(src_codeintel_dependency_index_processor_handlers{job=~"^worker.*"})` + +

#### worker: codeintel_dependency_index_processor_total -This panel indicates handler operations every 5m. +

Handler operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100410` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dependency_index_processor_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dependency_index_processor_99th_percentile_duration -This panel indicates 99th percentile successful handler operation duration over 5m. +

99th percentile successful handler operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100411` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_dependency_index_processor_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_dependency_index_processor_errors_total -This panel indicates handler operation errors every 5m. +

Handler operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100412` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dependency_index_processor_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dependency_index_processor_error_rate -This panel indicates handler operation error rate over 5m. +

Handler operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100413` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dependency_index_processor_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_dependency_index_processor_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_dependency_index_processor_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -2799,41 +6335,93 @@ This panel indicates handler operation error rate over 5m. #### worker: codeintel_background_upload_records_removed_total -This panel indicates lsif_upload records deleted every 5m. +

Lsif upload records deleted every 5m + +

Number of LSIF upload records deleted due to expiration or unreachability every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100500` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_upload_records_removed_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_index_records_removed_total -This panel indicates lsif_index records deleted every 5m. +

Lsif index records deleted every 5m + +

Number of LSIF index records deleted due to expiration or unreachability every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100501` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_index_records_removed_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_uploads_purged_total -This panel indicates lsif_upload data bundles deleted every 5m. +

Lsif upload data bundles deleted every 5m + +

Number of LSIF upload data bundles purged from the codeintel-db database every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100502` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_uploads_purged_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_errors_total -This panel indicates janitor operation errors every 5m. +

Janitor operation errors every 5m + +

Number of code intelligence janitor errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100503` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_errors_total{job=~"^worker.*"}[5m]))` + +

@@ -2841,65 +6429,169 @@ Number of code intelligence janitor errors every 5m #### worker: codeintel_index_scheduler_total -This panel indicates aggregate scheduler operations every 5m. +

Aggregate scheduler operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100600` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_index_scheduler_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_index_scheduler_99th_percentile_duration -This panel indicates 99th percentile successful aggregate scheduler operation duration over 5m. +

99th percentile successful aggregate scheduler operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100601` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_index_scheduler_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_index_scheduler_errors_total -This panel indicates aggregate scheduler operation errors every 5m. +

Aggregate scheduler operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100602` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_index_scheduler_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_index_scheduler_error_rate -This panel indicates aggregate scheduler operation error rate over 5m. +

Aggregate scheduler operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100603` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_index_scheduler_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_index_scheduler_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_index_scheduler_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

#### worker: codeintel_index_scheduler_total -This panel indicates scheduler operations every 5m. +

Scheduler operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100610` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_index_scheduler_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_index_scheduler_99th_percentile_duration -This panel indicates 99th percentile successful scheduler operation duration over 5m. +

99th percentile successful scheduler operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100611` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_index_scheduler_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_index_scheduler_errors_total -This panel indicates scheduler operation errors every 5m. +

Scheduler operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100612` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_index_scheduler_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_index_scheduler_error_rate -This panel indicates scheduler operation error rate over 5m. +

Scheduler operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100613` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_index_scheduler_errors_total{job=~"^worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_index_scheduler_total{job=~"^worker.*"}[5m])) + sum by (op)(increase(src_codeintel_index_scheduler_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -2907,65 +6599,169 @@ This panel indicates scheduler operation error rate over 5m. #### worker: codeintel_autoindex_enqueuer_total -This panel indicates aggregate enqueuer operations every 5m. +

Aggregate enqueuer operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100700` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_autoindex_enqueuer_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_autoindex_enqueuer_99th_percentile_duration -This panel indicates 99th percentile successful aggregate enqueuer operation duration over 5m. +

99th percentile successful aggregate enqueuer operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100701` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_autoindex_enqueuer_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_autoindex_enqueuer_errors_total -This panel indicates aggregate enqueuer operation errors every 5m. +

Aggregate enqueuer operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100702` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_autoindex_enqueuer_error_rate -This panel indicates aggregate enqueuer operation error rate over 5m. +

Aggregate enqueuer operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100703` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_autoindex_enqueuer_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

#### worker: codeintel_autoindex_enqueuer_total -This panel indicates enqueuer operations every 5m. +

Enqueuer operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100710` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_autoindex_enqueuer_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_autoindex_enqueuer_99th_percentile_duration -This panel indicates 99th percentile successful enqueuer operation duration over 5m. +

99th percentile successful enqueuer operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100711` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_autoindex_enqueuer_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_autoindex_enqueuer_errors_total -This panel indicates enqueuer operation errors every 5m. +

Enqueuer operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100712` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_autoindex_enqueuer_error_rate -This panel indicates enqueuer operation error rate over 5m. +

Enqueuer operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100713` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_autoindex_enqueuer_total{job=~"^worker.*"}[5m])) + sum by (op)(increase(src_codeintel_autoindex_enqueuer_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -2973,65 +6769,169 @@ This panel indicates enqueuer operation error rate over 5m. #### worker: codeintel_dbstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100800` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100801` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_dbstore_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_dbstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100802` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dbstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100803` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dbstore_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_dbstore_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_dbstore_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

#### worker: codeintel_dbstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100810` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100811` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_dbstore_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_dbstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100812` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dbstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100813` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_dbstore_total{job=~"^worker.*"}[5m])) + sum by (op)(increase(src_codeintel_dbstore_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -3039,65 +6939,169 @@ This panel indicates store operation error rate over 5m. #### worker: codeintel_lsifstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100900` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_lsifstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100901` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_lsifstore_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_lsifstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100902` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_lsifstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100903` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_lsifstore_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_lsifstore_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_lsifstore_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

#### worker: codeintel_lsifstore_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100910` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_lsifstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100911` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_lsifstore_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_lsifstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100912` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_lsifstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=100913` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_lsifstore_total{job=~"^worker.*"}[5m])) + sum by (op)(increase(src_codeintel_lsifstore_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -3105,33 +7109,85 @@ This panel indicates store operation error rate over 5m. #### worker: workerutil_dbworker_store_codeintel_dependency_index_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101000` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_dependency_index_total{job=~"^worker.*"}[5m]))` + +

#### worker: workerutil_dbworker_store_codeintel_dependency_index_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101001` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_workerutil_dbworker_store_codeintel_dependency_index_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: workerutil_dbworker_store_codeintel_dependency_index_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101002` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_dependency_index_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: workerutil_dbworker_store_codeintel_dependency_index_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101003` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_codeintel_dependency_index_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_workerutil_dbworker_store_codeintel_dependency_index_total{job=~"^worker.*"}[5m])) + sum(increase(src_workerutil_dbworker_store_codeintel_dependency_index_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -3139,65 +7195,169 @@ This panel indicates store operation error rate over 5m. #### worker: codeintel_gitserver_total -This panel indicates aggregate client operations every 5m. +

Aggregate client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_gitserver_99th_percentile_duration -This panel indicates 99th percentile successful aggregate client operation duration over 5m. +

99th percentile successful aggregate client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101101` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_gitserver_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_gitserver_errors_total -This panel indicates aggregate client operation errors every 5m. +

Aggregate client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101102` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_gitserver_error_rate -This panel indicates aggregate client operation error rate over 5m. +

Aggregate client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101103` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_gitserver_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_gitserver_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_gitserver_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

#### worker: codeintel_gitserver_total -This panel indicates client operations every 5m. +

Client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101110` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_gitserver_99th_percentile_duration -This panel indicates 99th percentile successful client operation duration over 5m. +

99th percentile successful client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101111` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_gitserver_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_gitserver_errors_total -This panel indicates client operation errors every 5m. +

Client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101112` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_gitserver_error_rate -This panel indicates client operation error rate over 5m. +

Client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101113` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^worker.*"}[5m])) / (sum by (op)(increase(src_codeintel_gitserver_total{job=~"^worker.*"}[5m])) + sum by (op)(increase(src_codeintel_gitserver_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -3205,65 +7365,169 @@ This panel indicates client operation error rate over 5m. #### worker: codeintel_dependency_repos_total -This panel indicates aggregate insert operations every 5m. +

Aggregate insert operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dependency_repos_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dependency_repos_99th_percentile_duration -This panel indicates 99th percentile successful aggregate insert operation duration over 5m. +

99th percentile successful aggregate insert operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_dependency_repos_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_dependency_repos_errors_total -This panel indicates aggregate insert operation errors every 5m. +

Aggregate insert operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dependency_repos_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dependency_repos_error_rate -This panel indicates aggregate insert operation error rate over 5m. +

Aggregate insert operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101203` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_dependency_repos_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_codeintel_dependency_repos_total{job=~"^worker.*"}[5m])) + sum(increase(src_codeintel_dependency_repos_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

#### worker: codeintel_dependency_repos_total -This panel indicates insert operations every 5m. +

Insert operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101210` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (scheme,new)(increase(src_codeintel_dependency_repos_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dependency_repos_99th_percentile_duration -This panel indicates 99th percentile successful insert operation duration over 5m. +

99th percentile successful insert operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101211` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,scheme,new)(rate(src_codeintel_dependency_repos_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: codeintel_dependency_repos_errors_total -This panel indicates insert operation errors every 5m. +

Insert operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101212` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (scheme,new)(increase(src_codeintel_dependency_repos_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_dependency_repos_error_rate -This panel indicates insert operation error rate over 5m. +

Insert operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101213` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (scheme,new)(increase(src_codeintel_dependency_repos_errors_total{job=~"^worker.*"}[5m])) / (sum by (scheme,new)(increase(src_codeintel_dependency_repos_total{job=~"^worker.*"}[5m])) + sum by (scheme,new)(increase(src_codeintel_dependency_repos_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -3271,25 +7535,64 @@ This panel indicates insert operation error rate over 5m. #### worker: codeintel_background_upload_record_resets_total -This panel indicates lsif_upload records reset to queued state every 5m. +

Lsif upload records reset to queued state every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_upload_record_resets_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_upload_record_reset_failures_total -This panel indicates lsif_upload records reset to errored state every 5m. +

Lsif upload records reset to errored state every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_upload_record_reset_failures_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_upload_record_reset_errors_total -This panel indicates lsif_upload operation errors every 5m. +

Lsif upload operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101302` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_upload_record_reset_errors_total{job=~"^worker.*"}[5m]))` + +

@@ -3297,25 +7600,64 @@ This panel indicates lsif_upload operation errors every 5m. #### worker: codeintel_background_index_record_resets_total -This panel indicates lsif_index records reset to queued state every 5m. +

Lsif index records reset to queued state every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_index_record_resets_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_index_record_reset_failures_total -This panel indicates lsif_index records reset to errored state every 5m. +

Lsif index records reset to errored state every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_index_record_reset_failures_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_index_record_reset_errors_total -This panel indicates lsif_index operation errors every 5m. +

Lsif index operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101402` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_index_record_reset_errors_total{job=~"^worker.*"}[5m]))` + +

@@ -3323,25 +7665,64 @@ This panel indicates lsif_index operation errors every 5m. #### worker: codeintel_background_dependency_index_record_resets_total -This panel indicates lsif_dependency_index records reset to queued state every 5m. +

Lsif dependency index records reset to queued state every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101500` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_dependency_index_record_resets_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_dependency_index_record_reset_failures_total -This panel indicates lsif_dependency_index records reset to errored state every 5m. +

Lsif dependency index records reset to errored state every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101501` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_dependency_index_record_reset_failures_total{job=~"^worker.*"}[5m]))` + +

#### worker: codeintel_background_dependency_index_record_reset_errors_total -This panel indicates lsif_dependency_index operation errors every 5m. +

Lsif dependency index operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101502` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_background_dependency_index_record_reset_errors_total{job=~"^worker.*"}[5m]))` + +

@@ -3349,15 +7730,30 @@ This panel indicates lsif_dependency_index operation errors every 5m. #### worker: insights_search_queue_queue_size -This panel indicates code insights search queue queue size. +

Code insights search queue queue size -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101600` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `max(src_insights_search_queue_total{job=~"^worker.*"})` + +

#### worker: insights_search_queue_queue_growth_rate -This panel indicates code insights search queue queue growth rate over 30m. +

Code insights search queue queue growth rate over 30m + +

This value compares the rate of enqueues against the rate of finished jobs. @@ -3365,7 +7761,18 @@ This value compares the rate of enqueues against the rate of finished jobs. - A value = than 1 indicates that process rate = enqueue rate - A value > than 1 indicates that process rate < enqueue rate -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101601` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_insights_search_queue_total{job=~"^worker.*"}[30m])) / sum(increase(src_insights_search_queue_processor_total{job=~"^worker.*"}[30m]))` + +

@@ -3373,41 +7780,106 @@ This value compares the rate of enqueues against the rate of finished jobs. #### worker: insights_search_queue_handlers -This panel indicates handler active handlers. +

Handler active handlers -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101700` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(src_insights_search_queue_processor_handlers{job=~"^worker.*"})` + +

#### worker: insights_search_queue_processor_total -This panel indicates handler operations every 5m. +

Handler operations every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101710` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_insights_search_queue_processor_total{job=~"^worker.*"}[5m]))` + +

#### worker: insights_search_queue_processor_99th_percentile_duration -This panel indicates 99th percentile successful handler operation duration over 5m. +

99th percentile successful handler operation duration over 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101711` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_insights_search_queue_processor_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: insights_search_queue_processor_errors_total -This panel indicates handler operation errors every 5m. +

Handler operation errors every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101712` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_insights_search_queue_processor_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: insights_search_queue_processor_error_rate -This panel indicates handler operation error rate over 5m. +

Handler operation error rate over 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101713` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_insights_search_queue_processor_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_insights_search_queue_processor_total{job=~"^worker.*"}[5m])) + sum(increase(src_insights_search_queue_processor_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -3415,25 +7887,64 @@ This panel indicates handler operation error rate over 5m. #### worker: insights_search_queue_record_resets_total -This panel indicates insights_search_queue records reset to queued state every 5m. +

Insights search queue records reset to queued state every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101800` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_insights_search_queue_record_resets_total{job=~"^worker.*"}[5m]))` + +

#### worker: insights_search_queue_record_reset_failures_total -This panel indicates insights_search_queue records reset to errored state every 5m. +

Insights search queue records reset to errored state every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101801` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_insights_search_queue_record_reset_failures_total{job=~"^worker.*"}[5m]))` + +

#### worker: insights_search_queue_record_reset_errors_total -This panel indicates insights_search_queue operation errors every 5m. +

Insights search queue operation errors every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101802` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_insights_search_queue_record_reset_errors_total{job=~"^worker.*"}[5m]))` + +

@@ -3441,65 +7952,169 @@ This panel indicates insights_search_queue operation errors every 5m. #### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101900` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_total{job=~"^worker.*"}[5m]))` + +

#### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101901` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_workerutil_dbworker_store_insights_query_runner_jobs_store_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101902` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101903` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total{job=~"^worker.*"}[5m])) / (sum(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_total{job=~"^worker.*"}[5m])) + sum(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

#### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_total -This panel indicates store operations every 5m. +

Store operations every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101910` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum by (op)(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_total{job=~"^worker.*"}[5m]))` + +

#### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101911` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_workerutil_dbworker_store_insights_query_runner_jobs_store_duration_seconds_bucket{job=~"^worker.*"}[5m])))` + +

#### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101912` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum by (op)(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total{job=~"^worker.*"}[5m]))` + +

#### worker: workerutil_dbworker_store_insights_query_runner_jobs_store_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m -*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/code-insights).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=101913` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-insights team](https://about.sourcegraph.com/handbook/engineering/developer-insights/code-insights).* + +
+Technical details + +Query: `sum by (op)(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total{job=~"^worker.*"}[5m])) / (sum by (op)(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_total{job=~"^worker.*"}[5m])) + sum by (op)(increase(src_workerutil_dbworker_store_insights_query_runner_jobs_store_errors_total{job=~"^worker.*"}[5m]))) * 100` + +

@@ -3507,11 +8122,22 @@ This panel indicates store operation error rate over 5m. #### worker: frontend_internal_api_error_responses -This panel indicates frontend-internal API error responses every 5m by route. +

Frontend-internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-frontend-internal-api-error-responses). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102000` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (category)(increase(src_frontend_internal_request_duration_seconds_count{job="worker",code!~"2.."}[5m])) / ignoring(category) group_left sum(increase(src_frontend_internal_request_duration_seconds_count{job="worker"}[5m]))` + +

@@ -3519,75 +8145,179 @@ This panel indicates frontend-internal API error responses every 5m by route. #### worker: max_open_conns -This panel indicates maximum open. +

Maximum open + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102100` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name="worker"})` + +
+
#### worker: open_conns -This panel indicates established. +

Established + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102101` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_open{app_name="worker"})` + +
+
#### worker: in_use -This panel indicates used. +

Used + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102110` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name="worker"})` + +
+
#### worker: idle -This panel indicates idle. +

Idle + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102111` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_idle{app_name="worker"})` + +
+
#### worker: mean_blocked_seconds_per_conn_request -This panel indicates mean blocked seconds per conn request. +

Mean blocked seconds per conn request -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-mean-blocked-seconds-per-conn-request). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#worker-mean-blocked-seconds-per-conn-request) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102120` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name="worker"}[5m])) / sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name="worker"}[5m]))` + +
+
#### worker: closed_max_idle -This panel indicates closed by SetMaxIdleConns. +

Closed by SetMaxIdleConns + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102130` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name="worker"}[5m]))` + +
+
#### worker: closed_max_lifetime -This panel indicates closed by SetConnMaxLifetime. +

Closed by SetConnMaxLifetime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102131` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_name="worker"}[5m]))` + +
+
#### worker: closed_max_idle_time -This panel indicates closed by SetConnMaxIdleTime. +

Closed by SetConnMaxIdleTime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102132` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle_time{app_name="worker"}[5m]))` + +
+
### Worker: Container monitoring (not available on server) #### worker: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -3599,80 +8329,170 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' worker` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the worker container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs worker` (note this will include logs from the previous and currently running container). -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^worker.*"}) > 60)` + +

#### worker: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-container-cpu-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^worker.*"}` + +

#### worker: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-container-memory-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^worker.*"}` + +

#### worker: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102203` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^worker.*"}[1h]) + rate(container_fs_writes_total{name=~"^worker.*"}[1h]))` + +
+
### Worker: Provisioning indicators (not available on server) #### worker: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-cpu-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^worker.*"}[1d])` + +

#### worker: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-memory-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^worker.*"}[1d])` + +

#### worker: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-cpu-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102310` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^worker.*"}[5m])` + +

#### worker: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-memory-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102311` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^worker.*"}[5m])` + +

@@ -3680,23 +8500,45 @@ This panel indicates container memory usage (5m maximum) by instance. #### worker: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#worker-go-goroutines) for 1 alert related to this panel. -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*worker"})` + +

#### worker: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-go-gc-duration-seconds). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*worker"})` + +

@@ -3704,11 +8546,22 @@ This panel indicates maximum go garbage collection duration. #### worker: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-pods-available-percentage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#worker-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/worker/worker?viewPanel=102500` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by(app) (up{app=~".*worker"}) / count by (app) (up{app=~".*worker"}) * 100` + +

@@ -3716,493 +8569,1100 @@ This panel indicates percentage pods available.

Manages interaction with code hosts, instructs Gitserver to update repositories.

+To see this dashboard, visit `/-/debug/grafana/d/repo-updater/repo-updater` on your Sourcegraph instance. + ### Repo Updater: Repositories #### repo-updater: syncer_sync_last_time -This panel indicates time since last sync. +

Time since last sync + +

A high value here indicates issues synchronizing repo metadata. If the value is persistently high, make sure all external services have valid tokens. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100000` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(timestamp(vector(time()))) - max(src_repoupdater_syncer_sync_last_time)` + +
+
#### repo-updater: src_repoupdater_max_sync_backoff -This panel indicates time since oldest sync. +

Time since oldest sync -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-max-sync-backoff). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-max-sync-backoff) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100001` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_max_sync_backoff)` + +
+
#### repo-updater: src_repoupdater_syncer_sync_errors_total -This panel indicates site level external service sync error rate. +

Site level external service sync error rate -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-syncer-sync-errors-total). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-syncer-sync-errors-total) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100002` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (family) (rate(src_repoupdater_syncer_sync_errors_total{owner!="user"}[5m]))` + +
+
#### repo-updater: syncer_sync_start -This panel indicates repo metadata sync was started. +

Repo metadata sync was started -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-syncer-sync-start). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-syncer-sync-start) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100010` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (family) (rate(src_repoupdater_syncer_start_sync{family="Syncer.SyncExternalService"}[9h0m0s]))` + +
+
#### repo-updater: syncer_sync_duration -This panel indicates 95th repositories sync duration. +

95th repositories sync duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-syncer-sync-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-syncer-sync-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100011` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `histogram_quantile(0.95, max by (le, family, success) (rate(src_repoupdater_syncer_sync_duration_seconds_bucket[1m])))` + +
+
#### repo-updater: source_duration -This panel indicates 95th repositories source duration. +

95th repositories source duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-source-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-source-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100012` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `histogram_quantile(0.95, max by (le) (rate(src_repoupdater_source_duration_seconds_bucket[1m])))` + +
+
#### repo-updater: syncer_synced_repos -This panel indicates repositories synced. +

Repositories synced -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-syncer-synced-repos). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-syncer-synced-repos) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100020` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (state) (rate(src_repoupdater_syncer_synced_repos_total[1m]))` + +
+
#### repo-updater: sourced_repos -This panel indicates repositories sourced. +

Repositories sourced -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-sourced-repos). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-sourced-repos) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100021` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(rate(src_repoupdater_source_repos_total[1m]))` + +
+
#### repo-updater: user_added_repos -This panel indicates total number of user added repos. +

Total number of user added repos -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-user-added-repos). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-user-added-repos) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100022` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_user_repos_total)` + +
+
#### repo-updater: purge_failed -This panel indicates repositories purge failed. +

Repositories purge failed -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-purge-failed). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-purge-failed) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100030` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(rate(src_repoupdater_purge_failed[1m]))` + +
+
#### repo-updater: sched_auto_fetch -This panel indicates repositories scheduled due to hitting a deadline. +

Repositories scheduled due to hitting a deadline -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-sched-auto-fetch). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-sched-auto-fetch) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100040` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(rate(src_repoupdater_sched_auto_fetch[1m]))` + +
+
#### repo-updater: sched_manual_fetch -This panel indicates repositories scheduled due to user traffic. +

Repositories scheduled due to user traffic + +

Check repo-updater logs if this value is persistently high. This does not indicate anything if there are no user added code hosts. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100041` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(rate(src_repoupdater_sched_manual_fetch[1m]))` + +
+
#### repo-updater: sched_known_repos -This panel indicates repositories managed by the scheduler. +

Repositories managed by the scheduler -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-sched-known-repos). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-sched-known-repos) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100050` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_sched_known_repos)` + +
+
#### repo-updater: sched_update_queue_length -This panel indicates rate of growth of update queue length over 5 minutes. +

Rate of growth of update queue length over 5 minutes -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-sched-update-queue-length). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-sched-update-queue-length) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100051` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(deriv(src_repoupdater_sched_update_queue_length[5m]))` + +
+
#### repo-updater: sched_loops -This panel indicates scheduler loops. +

Scheduler loops -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-sched-loops). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-sched-loops) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100052` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(rate(src_repoupdater_sched_loops[1m]))` + +
+
#### repo-updater: sched_error -This panel indicates repositories schedule error rate. +

Repositories schedule error rate -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-sched-error). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-sched-error) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100060` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(rate(src_repoupdater_sched_error[1m]))` + +
+
### Repo Updater: Permissions #### repo-updater: perms_syncer_perms -This panel indicates time gap between least and most up to date permissions. +

Time gap between least and most up to date permissions -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-perms). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-perms) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100100` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (type) (src_repoupdater_perms_syncer_perms_gap_seconds)` + +
+
#### repo-updater: perms_syncer_stale_perms -This panel indicates number of entities with stale permissions. +

Number of entities with stale permissions -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-stale-perms). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-stale-perms) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (type) (src_repoupdater_perms_syncer_stale_perms)` + +
+
#### repo-updater: perms_syncer_no_perms -This panel indicates number of entities with no permissions. +

Number of entities with no permissions -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-no-perms). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-no-perms) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100102` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (type) (src_repoupdater_perms_syncer_no_perms)` + +
+
#### repo-updater: perms_syncer_sync_duration -This panel indicates 95th permissions sync duration. +

95th permissions sync duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-sync-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-sync-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100110` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `histogram_quantile(0.95, max by (le, type) (rate(src_repoupdater_perms_syncer_sync_duration_seconds_bucket[1m])))` + +
+
#### repo-updater: perms_syncer_queue_size -This panel indicates permissions sync queued items. +

Permissions sync queued items -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-queue-size). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-queue-size) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100111` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_perms_syncer_queue_size)` + +
+
#### repo-updater: perms_syncer_sync_errors -This panel indicates permissions sync error rate. +

Permissions sync error rate -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-sync-errors). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-perms-syncer-sync-errors) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100120` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (type) (ceil(rate(src_repoupdater_perms_syncer_sync_errors_total[1m])))` + +
+
### Repo Updater: External services #### repo-updater: src_repoupdater_external_services_total -This panel indicates the total number of external services. +

The total number of external services -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-external-services-total). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-external-services-total) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_external_services_total)` + +
+
#### repo-updater: src_repoupdater_user_external_services_total -This panel indicates the total number of user added external services. +

The total number of user added external services -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-user-external-services-total). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-src-repoupdater-user-external-services-total) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_user_external_services_total)` + +
+
#### repo-updater: repoupdater_queued_sync_jobs_total -This panel indicates the total number of queued sync jobs. +

The total number of queued sync jobs -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-repoupdater-queued-sync-jobs-total). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-repoupdater-queued-sync-jobs-total) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_queued_sync_jobs_total)` + +
+
#### repo-updater: repoupdater_completed_sync_jobs_total -This panel indicates the total number of completed sync jobs. +

The total number of completed sync jobs -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-repoupdater-completed-sync-jobs-total). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-repoupdater-completed-sync-jobs-total) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_completed_sync_jobs_total)` + +
+
#### repo-updater: repoupdater_errored_sync_jobs_percentage -This panel indicates the percentage of external services that have failed their most recent sync. +

The percentage of external services that have failed their most recent sync -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-repoupdater-errored-sync-jobs-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-repoupdater-errored-sync-jobs-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100212` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max(src_repoupdater_errored_sync_jobs_percentage)` + +
+
#### repo-updater: github_graphql_rate_limit_remaining -This panel indicates remaining calls to GitHub graphql API before hitting the rate limit. +

Remaining calls to GitHub graphql API before hitting the rate limit -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-github-graphql-rate-limit-remaining). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-github-graphql-rate-limit-remaining) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100220` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (name) (src_github_rate_limit_remaining_v2{resource="graphql"})` + +
+
#### repo-updater: github_rest_rate_limit_remaining -This panel indicates remaining calls to GitHub rest API before hitting the rate limit. +

Remaining calls to GitHub rest API before hitting the rate limit -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-github-rest-rate-limit-remaining). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-github-rest-rate-limit-remaining) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100221` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (name) (src_github_rate_limit_remaining_v2{resource="rest"})` + +
+
#### repo-updater: github_search_rate_limit_remaining -This panel indicates remaining calls to GitHub search API before hitting the rate limit. +

Remaining calls to GitHub search API before hitting the rate limit -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-github-search-rate-limit-remaining). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-github-search-rate-limit-remaining) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100222` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (name) (src_github_rate_limit_remaining_v2{resource="search"})` + +
+
#### repo-updater: github_graphql_rate_limit_wait_duration -This panel indicates time spent waiting for the GitHub graphql API rate limiter. +

Time spent waiting for the GitHub graphql API rate limiter + +

Indicates how long we`re waiting on the rate limit once it has been exceeded +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100230` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(name) (rate(src_github_rate_limit_wait_duration_seconds{resource="graphql"}[5m]))` + +
+
#### repo-updater: github_rest_rate_limit_wait_duration -This panel indicates time spent waiting for the GitHub rest API rate limiter. +

Time spent waiting for the GitHub rest API rate limiter + +

Indicates how long we`re waiting on the rate limit once it has been exceeded +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100231` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(name) (rate(src_github_rate_limit_wait_duration_seconds{resource="rest"}[5m]))` + +
+
#### repo-updater: github_search_rate_limit_wait_duration -This panel indicates time spent waiting for the GitHub search API rate limiter. +

Time spent waiting for the GitHub search API rate limiter + +

Indicates how long we`re waiting on the rate limit once it has been exceeded +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100232` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(name) (rate(src_github_rate_limit_wait_duration_seconds{resource="search"}[5m]))` + +
+
#### repo-updater: gitlab_rest_rate_limit_remaining -This panel indicates remaining calls to GitLab rest API before hitting the rate limit. +

Remaining calls to GitLab rest API before hitting the rate limit -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-gitlab-rest-rate-limit-remaining). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-gitlab-rest-rate-limit-remaining) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100240` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by (name) (src_gitlab_rate_limit_remaining{resource="rest"})` + +
+
#### repo-updater: gitlab_rest_rate_limit_wait_duration -This panel indicates time spent waiting for the GitLab rest API rate limiter. +

Time spent waiting for the GitLab rest API rate limiter + +

Indicates how long we`re waiting on the rate limit once it has been exceeded +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100241` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(name) (rate(src_gitlab_rate_limit_wait_duration_seconds{resource="rest"}[5m]))` + +
+
### Repo Updater: Batches: dbstore stats #### repo-updater: batches_dbstore_total -This panel indicates aggregate store operations every 5m. +

Aggregate store operations every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum(increase(src_batches_dbstore_total{job=~"^repo-updater.*"}[5m]))` + +
+
#### repo-updater: batches_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful aggregate store operation duration over 5m. +

99th percentile successful aggregate store operation duration over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100301` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_batches_dbstore_duration_seconds_bucket{job=~"^repo-updater.*"}[5m])))` + +
+
#### repo-updater: batches_dbstore_errors_total -This panel indicates aggregate store operation errors every 5m. +

Aggregate store operation errors every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100302` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum(increase(src_batches_dbstore_errors_total{job=~"^repo-updater.*"}[5m]))` + +
+
#### repo-updater: batches_dbstore_error_rate -This panel indicates aggregate store operation error rate over 5m. +

Aggregate store operation error rate over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100303` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum(increase(src_batches_dbstore_errors_total{job=~"^repo-updater.*"}[5m])) / (sum(increase(src_batches_dbstore_total{job=~"^repo-updater.*"}[5m])) + sum(increase(src_batches_dbstore_errors_total{job=~"^repo-updater.*"}[5m]))) * 100` + +
+
#### repo-updater: batches_dbstore_total -This panel indicates store operations every 5m. +

Store operations every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100310` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum by (op)(increase(src_batches_dbstore_total{job=~"^repo-updater.*"}[5m]))` + +
+
#### repo-updater: batches_dbstore_99th_percentile_duration -This panel indicates 99th percentile successful store operation duration over 5m. +

99th percentile successful store operation duration over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100311` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_batches_dbstore_duration_seconds_bucket{job=~"^repo-updater.*"}[5m])))` + +
+
#### repo-updater: batches_dbstore_errors_total -This panel indicates store operation errors every 5m. +

Store operation errors every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100312` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum by (op)(increase(src_batches_dbstore_errors_total{job=~"^repo-updater.*"}[5m]))` + +
+
#### repo-updater: batches_dbstore_error_rate -This panel indicates store operation error rate over 5m. +

Store operation error rate over 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100313` on your Sourcegraph instance. *Managed by the [Sourcegraph Batches team](https://about.sourcegraph.com/handbook/engineering/batches).* +
+Technical details + +Query: `sum by (op)(increase(src_batches_dbstore_errors_total{job=~"^repo-updater.*"}[5m])) / (sum by (op)(increase(src_batches_dbstore_total{job=~"^repo-updater.*"}[5m])) + sum by (op)(increase(src_batches_dbstore_errors_total{job=~"^repo-updater.*"}[5m]))) * 100` + +
+
### Repo Updater: Codeintel: Coursier invocation stats #### repo-updater: codeintel_coursier_total -This panel indicates aggregate invocations operations every 5m. +

Aggregate invocations operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^repo-updater.*"}[5m]))` + +

#### repo-updater: codeintel_coursier_99th_percentile_duration -This panel indicates 99th percentile successful aggregate invocations operation duration over 5m. +

99th percentile successful aggregate invocations operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_codeintel_coursier_duration_seconds_bucket{op!="RunCommand",job=~"^repo-updater.*"}[5m])))` + +

#### repo-updater: codeintel_coursier_errors_total -This panel indicates aggregate invocations operation errors every 5m. +

Aggregate invocations operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100402` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^repo-updater.*"}[5m]))` + +

#### repo-updater: codeintel_coursier_error_rate -This panel indicates aggregate invocations operation error rate over 5m. +

Aggregate invocations operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100403` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^repo-updater.*"}[5m])) / (sum(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^repo-updater.*"}[5m])) + sum(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^repo-updater.*"}[5m]))) * 100` + +

#### repo-updater: codeintel_coursier_total -This panel indicates invocations operations every 5m. +

Invocations operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100410` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^repo-updater.*"}[5m]))` + +

#### repo-updater: codeintel_coursier_99th_percentile_duration -This panel indicates 99th percentile successful invocations operation duration over 5m. +

99th percentile successful invocations operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100411` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_coursier_duration_seconds_bucket{op!="RunCommand",job=~"^repo-updater.*"}[5m])))` + +

#### repo-updater: codeintel_coursier_errors_total -This panel indicates invocations operation errors every 5m. +

Invocations operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100412` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^repo-updater.*"}[5m]))` + +

#### repo-updater: codeintel_coursier_error_rate -This panel indicates invocations operation error rate over 5m. +

Invocations operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100413` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^repo-updater.*"}[5m])) / (sum by (op)(increase(src_codeintel_coursier_total{op!="RunCommand",job=~"^repo-updater.*"}[5m])) + sum by (op)(increase(src_codeintel_coursier_errors_total{op!="RunCommand",job=~"^repo-updater.*"}[5m]))) * 100` + +

@@ -4210,87 +9670,202 @@ This panel indicates invocations operation error rate over 5m. #### repo-updater: frontend_internal_api_error_responses -This panel indicates frontend-internal API error responses every 5m by route. +

Frontend-internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-frontend-internal-api-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100500` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (category)(increase(src_frontend_internal_request_duration_seconds_count{job="repo-updater",code!~"2.."}[5m])) / ignoring(category) group_left sum(increase(src_frontend_internal_request_duration_seconds_count{job="repo-updater"}[5m]))` + +
+
### Repo Updater: Database connections #### repo-updater: max_open_conns -This panel indicates maximum open. +

Maximum open + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100600` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name="repo-updater"})` + +
+
#### repo-updater: open_conns -This panel indicates established. +

Established + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100601` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_open{app_name="repo-updater"})` + +
+
#### repo-updater: in_use -This panel indicates used. +

Used + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100610` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name="repo-updater"})` + +
+
#### repo-updater: idle -This panel indicates idle. +

Idle + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100611` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (src_pgsql_conns_idle{app_name="repo-updater"})` + +
+
#### repo-updater: mean_blocked_seconds_per_conn_request -This panel indicates mean blocked seconds per conn request. +

Mean blocked seconds per conn request -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-mean-blocked-seconds-per-conn-request). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-mean-blocked-seconds-per-conn-request) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100620` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name="repo-updater"}[5m])) / sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name="repo-updater"}[5m]))` + +
+
#### repo-updater: closed_max_idle -This panel indicates closed by SetMaxIdleConns. +

Closed by SetMaxIdleConns + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100630` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name="repo-updater"}[5m]))` + +
+
#### repo-updater: closed_max_lifetime -This panel indicates closed by SetConnMaxLifetime. +

Closed by SetConnMaxLifetime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100631` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_name="repo-updater"}[5m]))` + +
+
#### repo-updater: closed_max_idle_time -This panel indicates closed by SetConnMaxIdleTime. +

Closed by SetConnMaxIdleTime + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100632` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle_time{app_name="repo-updater"}[5m]))` + +
+
### Repo Updater: Container monitoring (not available on server) #### repo-updater: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -4302,160 +9877,320 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' repo-updater` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the repo-updater container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs repo-updater` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100700` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^repo-updater.*"}) > 60)` + +
+
#### repo-updater: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100701` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^repo-updater.*"}` + +
+
#### repo-updater: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100702` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^repo-updater.*"}` + +
+
#### repo-updater: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100703` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^repo-updater.*"}[1h]) + rate(container_fs_writes_total{name=~"^repo-updater.*"}[1h]))` + +
+
### Repo Updater: Provisioning indicators (not available on server) #### repo-updater: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100800` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^repo-updater.*"}[1d])` + +
+
#### repo-updater: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100801` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^repo-updater.*"}[1d])` + +
+
#### repo-updater: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100810` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^repo-updater.*"}[5m])` + +
+
#### repo-updater: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100811` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^repo-updater.*"}[5m])` + +
+
### Repo Updater: Golang runtime monitoring #### repo-updater: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-go-goroutines) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100900` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*repo-updater"})` + +
+
#### repo-updater: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-go-gc-duration-seconds). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=100901` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*repo-updater"})` + +
+
### Repo Updater: Kubernetes monitoring (only available on Kubernetes) #### repo-updater: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#repo-updater-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/repo-updater/repo-updater?viewPanel=101000` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(app) (up{app=~".*repo-updater"}) / count by (app) (up{app=~".*repo-updater"}) * 100` + +
+
## Searcher

Performs unindexed searches (diff and commit search, text search for unindexed branches).

+To see this dashboard, visit `/-/debug/grafana/d/searcher/searcher` on your Sourcegraph instance. + #### searcher: unindexed_search_request_errors -This panel indicates unindexed search request errors every 5m by code. +

Unindexed search request errors every 5m by code -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-unindexed-search-request-errors). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-unindexed-search-request-errors) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (code)(increase(searcher_service_request_total{code!="200",code!="canceled"}[5m])) / ignoring(code) group_left sum(increase(searcher_service_request_total[5m])) * 100` + +
+
#### searcher: replica_traffic -This panel indicates requests per second over 10m. +

Requests per second over 10m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-replica-traffic). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-replica-traffic) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100001` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by(instance) (rate(searcher_service_request_total[10m]))` + +
+
### Searcher: Internal service requests #### searcher: frontend_internal_api_error_responses -This panel indicates frontend-internal API error responses every 5m by route. +

Frontend-internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-frontend-internal-api-error-responses). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100100` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (category)(increase(src_frontend_internal_request_duration_seconds_count{job="searcher",code!~"2.."}[5m])) / ignoring(category) group_left sum(increase(src_frontend_internal_request_duration_seconds_count{job="searcher"}[5m]))` + +
+
### Searcher: Container monitoring (not available on server) #### searcher: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -4467,140 +10202,287 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' searcher` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the searcher container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs searcher` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100200` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^searcher.*"}) > 60)` + +
+
#### searcher: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^searcher.*"}` + +
+
#### searcher: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100202` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^searcher.*"}` + +
+
#### searcher: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100203` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^searcher.*"}[1h]) + rate(container_fs_writes_total{name=~"^searcher.*"}[1h]))` + +
+
### Searcher: Provisioning indicators (not available on server) #### searcher: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^searcher.*"}[1d])` + +
+
#### searcher: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100301` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^searcher.*"}[1d])` + +
+
#### searcher: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100310` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^searcher.*"}[5m])` + +
+
#### searcher: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100311` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^searcher.*"}[5m])` + +
+
### Searcher: Golang runtime monitoring #### searcher: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#searcher-go-goroutines) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100400` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*searcher"})` + +
+
#### searcher: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-go-gc-duration-seconds). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100401` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*searcher"})` + +
+
### Searcher: Kubernetes monitoring (only available on Kubernetes) #### searcher: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#searcher-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#searcher-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100500` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by(app) (up{app=~".*searcher"}) / count by (app) (up{app=~".*searcher"}) * 100` + +
+
## Symbols

Handles symbol searches for unindexed branches.

+To see this dashboard, visit `/-/debug/grafana/d/symbols/symbols` on your Sourcegraph instance. + #### symbols: store_fetch_failures -This panel indicates store fetch failures every 5m. +

Store fetch failures every 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-store-fetch-failures). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-store-fetch-failures) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100000` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(symbols_store_fetch_failed[5m]))` + +

#### symbols: current_fetch_queue_size -This panel indicates current fetch queue size. +

Current fetch queue size -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-current-fetch-queue-size). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-current-fetch-queue-size) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100001` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(symbols_store_fetch_queue_size)` + +

@@ -4608,11 +10490,22 @@ This panel indicates current fetch queue size. #### symbols: frontend_internal_api_error_responses -This panel indicates frontend-internal API error responses every 5m by route. +

Frontend-internal API error responses every 5m by route -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-frontend-internal-api-error-responses). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-frontend-internal-api-error-responses) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (category)(increase(src_frontend_internal_request_duration_seconds_count{job="symbols",code!~"2.."}[5m])) / ignoring(category) group_left sum(increase(src_frontend_internal_request_duration_seconds_count{job="symbols"}[5m]))` + +

@@ -4620,7 +10513,9 @@ This panel indicates frontend-internal API error responses every 5m by route. #### symbols: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -4632,80 +10527,170 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' symbols` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the symbols container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs symbols` (note this will include logs from the previous and currently running container). -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^symbols.*"}) > 60)` + +

#### symbols: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-container-cpu-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^symbols.*"}` + +

#### symbols: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-container-memory-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^symbols.*"}` + +

#### symbols: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100203` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^symbols.*"}[1h]) + rate(container_fs_writes_total{name=~"^symbols.*"}[1h]))` + +
+
### Symbols: Provisioning indicators (not available on server) #### symbols: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^symbols.*"}[1d])` + +

#### symbols: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^symbols.*"}[1d])` + +

#### symbols: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100310` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^symbols.*"}[5m])` + +

#### symbols: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100311` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^symbols.*"}[5m])` + +

@@ -4713,23 +10698,45 @@ This panel indicates container memory usage (5m maximum) by instance. #### symbols: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#symbols-go-goroutines) for 1 alert related to this panel. -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*symbols"})` + +

#### symbols: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-go-gc-duration-seconds). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*symbols"})` + +

@@ -4737,11 +10744,22 @@ This panel indicates maximum go garbage collection duration. #### symbols: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#symbols-pods-available-percentage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#symbols-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100500` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by(app) (up{app=~".*symbols"}) / count by (app) (up{app=~".*symbols"}) * 100` + +

@@ -4749,43 +10767,99 @@ This panel indicates percentage pods available.

Handles syntax highlighting for code files.

+To see this dashboard, visit `/-/debug/grafana/d/syntect-server/syntect-server` on your Sourcegraph instance. + #### syntect-server: syntax_highlighting_errors -This panel indicates syntax highlighting errors every 5m. +

Syntax highlighting errors every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum(increase(src_syntax_highlighting_requests{status="error"}[5m])) / sum(increase(src_syntax_highlighting_requests[5m])) * 100` + +
+
#### syntect-server: syntax_highlighting_timeouts -This panel indicates syntax highlighting timeouts every 5m. +

Syntax highlighting timeouts every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100001` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum(increase(src_syntax_highlighting_requests{status="timeout"}[5m])) / sum(increase(src_syntax_highlighting_requests[5m])) * 100` + +
+
#### syntect-server: syntax_highlighting_panics -This panel indicates syntax highlighting panics every 5m. +

Syntax highlighting panics every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100010` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum(increase(src_syntax_highlighting_requests{status="panic"}[5m]))` + +
+
#### syntect-server: syntax_highlighting_worker_deaths -This panel indicates syntax highlighter worker deaths every 5m. +

Syntax highlighter worker deaths every 5m + +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100011` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum(increase(src_syntax_highlighting_requests{status="hss_worker_timeout"}[5m]))` + +
+
### Syntect Server: Container monitoring (not available on server) #### syntect-server: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -4797,154 +10871,322 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' syntect-server` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the syntect-server container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs syntect-server` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100100` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^syntect-server.*"}) > 60)` + +
+
#### syntect-server: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#syntect-server-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#syntect-server-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^syntect-server.*"}` + +
+
#### syntect-server: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#syntect-server-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#syntect-server-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100102` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^syntect-server.*"}` + +
+
#### syntect-server: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100103` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^syntect-server.*"}[1h]) + rate(container_fs_writes_total{name=~"^syntect-server.*"}[1h]))` + +
+
### Syntect Server: Provisioning indicators (not available on server) #### syntect-server: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^syntect-server.*"}[1d])` + +
+
#### syntect-server: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^syntect-server.*"}[1d])` + +
+
#### syntect-server: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^syntect-server.*"}[5m])` + +
+
#### syntect-server: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#syntect-server-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^syntect-server.*"}[5m])` + +
+
### Syntect Server: Kubernetes monitoring (only available on Kubernetes) #### syntect-server: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#syntect-server-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#syntect-server-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/syntect-server/syntect-server?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(app) (up{app=~".*syntect-server"}) / count by (app) (up{app=~".*syntect-server"}) * 100` + +
+
## Zoekt Index Server

Indexes repositories and populates the search index.

+To see this dashboard, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver` on your Sourcegraph instance. + #### zoekt-indexserver: repos_assigned -This panel indicates total number of repos. +

Total number of repos + +

Sudden changes should be caused by indexing configuration changes. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100000` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(index_num_assigned)` + +
+
#### zoekt-indexserver: repo_index_state -This panel indicates indexing results over 5m (noop=no changes, empty=no branches to index). +

Indexing results over 5m (noop=no changes, empty=no branches to index) + +

A persistent failing state indicates some repositories cannot be indexed, perhaps due to size and timeouts. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100010` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (state) (increase(index_repo_seconds_count[5m]))` + +
+
#### zoekt-indexserver: repo_index_success_speed -This panel indicates successful indexing durations. +

Successful indexing durations + +

Latency increases can indicate bottlenecks in the indexserver. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100011` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (le, state) (increase(index_repo_seconds_bucket{state="success"}[$__rate_interval]))` + +
+
#### zoekt-indexserver: repo_index_fail_speed -This panel indicates failed indexing durations. +

Failed indexing durations + +

Failures happening after a long time indicates timeouts. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100012` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (le, state) (increase(index_repo_seconds_bucket{state="fail"}[$__rate_interval]))` + +
+
#### zoekt-indexserver: average_resolve_revision_duration -This panel indicates average resolve revision duration over 5m. +

Average resolve revision duration over 5m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-average-resolve-revision-duration). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-average-resolve-revision-duration) for 2 alerts related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100020` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum(rate(resolve_revision_seconds_sum[5m])) / sum(rate(resolve_revision_seconds_count[5m]))` + +
+
### Zoekt Index Server: Container monitoring (not available on server) #### zoekt-indexserver: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -4956,114 +11198,230 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' zoekt-indexserver` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the zoekt-indexserver container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs zoekt-indexserver` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100100` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^zoekt-indexserver.*"}) > 60)` + +
+
#### zoekt-indexserver: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-indexserver.*"}` + +
+
#### zoekt-indexserver: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100102` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^zoekt-indexserver.*"}` + +
+
#### zoekt-indexserver: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100103` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^zoekt-indexserver.*"}[1h]) + rate(container_fs_writes_total{name=~"^zoekt-indexserver.*"}[1h]))` + +
+
### Zoekt Index Server: Provisioning indicators (not available on server) #### zoekt-indexserver: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-indexserver.*"}[1d])` + +
+
#### zoekt-indexserver: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^zoekt-indexserver.*"}[1d])` + +
+
#### zoekt-indexserver: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-indexserver.*"}[5m])` + +
+
#### zoekt-indexserver: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^zoekt-indexserver.*"}[5m])` + +
+
### Zoekt Index Server: Kubernetes monitoring (only available on Kubernetes) #### zoekt-indexserver: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-indexserver-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-indexserver/zoekt-indexserver?viewPanel=100300` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by(app) (up{app=~".*indexed-search"}) / count by (app) (up{app=~".*indexed-search"}) * 100` + +
+
## Zoekt Web Server

Serves indexed search requests using the search index.

+To see this dashboard, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver` on your Sourcegraph instance. + #### zoekt-webserver: indexed_search_request_errors -This panel indicates indexed search request errors every 5m by code. +

Indexed search request errors every 5m by code -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-webserver-indexed-search-request-errors). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-webserver-indexed-search-request-errors) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `sum by (code)(increase(src_zoekt_request_duration_seconds_count{code!~"2.."}[5m])) / ignoring(code) group_left sum(increase(src_zoekt_request_duration_seconds_count[5m])) * 100` + +
+
### Zoekt Web Server: Container monitoring (not available on server) #### zoekt-webserver: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -5075,199 +11433,392 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' zoekt-webserver` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the zoekt-webserver container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs zoekt-webserver` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100100` on your Sourcegraph instance. + *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^zoekt-webserver.*"}) > 60)` + +
+
#### zoekt-webserver: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-webserver-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-webserver-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-webserver.*"}` + +
+
#### zoekt-webserver: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-webserver-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-webserver-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100102` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^zoekt-webserver.*"}` + +
+
#### zoekt-webserver: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100103` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^zoekt-webserver.*"}[1h]) + rate(container_fs_writes_total{name=~"^zoekt-webserver.*"}[1h]))` + +
+
### Zoekt Web Server: Provisioning indicators (not available on server) #### zoekt-webserver: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-webserver.*"}[1d])` + +
+
#### zoekt-webserver: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^zoekt-webserver.*"}[1d])` + +
+
#### zoekt-webserver: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-webserver.*"}[5m])` + +
+
#### zoekt-webserver: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#zoekt-webserver-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/zoekt-webserver/zoekt-webserver?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Search team](https://about.sourcegraph.com/handbook/engineering/search).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^zoekt-webserver.*"}[5m])` + +
+
## Prometheus

Sourcegraph's all-in-one Prometheus and Alertmanager service.

+To see this dashboard, visit `/-/debug/grafana/d/prometheus/prometheus` on your Sourcegraph instance. + ### Prometheus: Metrics #### prometheus: prometheus_rule_eval_duration -This panel indicates average prometheus rule group evaluation duration over 10m by rule group. +

Average prometheus rule group evaluation duration over 10m by rule group + +

A high value here indicates Prometheus rule evaluation is taking longer than expected. It might indicate that certain rule groups are taking too long to evaluate, or Prometheus is underprovisioned. Rules that Sourcegraph ships with are grouped under `/sg_config_prometheus`. [Custom rules are grouped under `/sg_prometheus_addons`](https://docs.sourcegraph.com/admin/observability/metrics#prometheus-configuration). -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-rule-eval-duration). +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-rule-eval-duration) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100000` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `sum by(rule_group) (avg_over_time(prometheus_rule_group_last_duration_seconds[10m]))` + +
+
#### prometheus: prometheus_rule_eval_failures -This panel indicates failed prometheus rule evaluations over 5m by rule group. +

Failed prometheus rule evaluations over 5m by rule group + +

Rules that Sourcegraph ships with are grouped under `/sg_config_prometheus`. [Custom rules are grouped under `/sg_prometheus_addons`](https://docs.sourcegraph.com/admin/observability/metrics#prometheus-configuration). -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-rule-eval-failures). +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-rule-eval-failures) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100001` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `sum by(rule_group) (rate(prometheus_rule_evaluation_failures_total[5m]))` + +
+
### Prometheus: Alerts #### prometheus: alertmanager_notification_latency -This panel indicates alertmanager notification latency over 1m by integration. +

Alertmanager notification latency over 1m by integration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-alertmanager-notification-latency). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-alertmanager-notification-latency) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100100` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `sum by(integration) (rate(alertmanager_notification_latency_seconds_sum[1m]))` + +
+
#### prometheus: alertmanager_notification_failures -This panel indicates failed alertmanager notifications over 1m by integration. +

Failed alertmanager notifications over 1m by integration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-alertmanager-notification-failures). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-alertmanager-notification-failures) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100101` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `sum by(integration) (rate(alertmanager_notifications_failed_total[1m]))` + +
+
### Prometheus: Internals #### prometheus: prometheus_config_status -This panel indicates prometheus configuration reload status. +

Prometheus configuration reload status + +

A `1` indicates Prometheus reloaded its configuration successfully. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-config-status). +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-config-status) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100200` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `prometheus_config_last_reload_successful` + +
+
#### prometheus: alertmanager_config_status -This panel indicates alertmanager configuration reload status. +

Alertmanager configuration reload status + +

A `1` indicates Alertmanager reloaded its configuration successfully. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-alertmanager-config-status). +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-alertmanager-config-status) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100201` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `alertmanager_config_last_reload_successful` + +
+
#### prometheus: prometheus_tsdb_op_failure -This panel indicates prometheus tsdb failures by operation over 1m by operation. +

Prometheus tsdb failures by operation over 1m by operation -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-tsdb-op-failure). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-tsdb-op-failure) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100210` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `increase(label_replace({__name__=~"prometheus_tsdb_(.*)_failed_total"}, "operation", "$1", "__name__", "(.+)s_failed_total")[5m:1m])` + +
+
#### prometheus: prometheus_target_sample_exceeded -This panel indicates prometheus scrapes that exceed the sample limit over 10m. +

Prometheus scrapes that exceed the sample limit over 10m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-target-sample-exceeded). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-target-sample-exceeded) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100211` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `increase(prometheus_target_scrapes_exceeded_sample_limit_total[10m])` + +
+
#### prometheus: prometheus_target_sample_duplicate -This panel indicates prometheus scrapes rejected due to duplicate timestamps over 10m. +

Prometheus scrapes rejected due to duplicate timestamps over 10m -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-target-sample-duplicate). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-prometheus-target-sample-duplicate) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100212` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `increase(prometheus_target_scrapes_sample_duplicate_timestamp_total[10m])` + +
+
### Prometheus: Container monitoring (not available on server) #### prometheus: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -5279,112 +11830,230 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' prometheus` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the prometheus container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs prometheus` (note this will include logs from the previous and currently running container). +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100300` on your Sourcegraph instance. + *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^prometheus.*"}) > 60)` + +
+
#### prometheus: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-container-cpu-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100301` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^prometheus.*"}` + +
+
#### prometheus: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-container-memory-usage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100302` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^prometheus.*"}` + +
+
#### prometheus: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100303` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^prometheus.*"}[1h]) + rate(container_fs_writes_total{name=~"^prometheus.*"}[1h]))` + +
+
### Prometheus: Provisioning indicators (not available on server) #### prometheus: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-cpu-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100400` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^prometheus.*"}[1d])` + +
+
#### prometheus: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-memory-usage-long-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100401` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^prometheus.*"}[1d])` + +
+
#### prometheus: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-cpu-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100410` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^prometheus.*"}[5m])` + +
+
#### prometheus: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-memory-usage-short-term). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100411` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^prometheus.*"}[5m])` + +
+
### Prometheus: Kubernetes monitoring (only available on Kubernetes) #### prometheus: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#prometheus-pods-available-percentage). +

+ +Refer to the [alert solutions reference](./alert_solutions.md#prometheus-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/prometheus/prometheus?viewPanel=100500` on your Sourcegraph instance. *Managed by the [Sourcegraph Distribution team](https://about.sourcegraph.com/handbook/engineering/distribution).* +
+Technical details + +Query: `sum by(app) (up{app=~".*prometheus"}) / count by (app) (up{app=~".*prometheus"}) * 100` + +
+
## Executor

Executes jobs in an isolated environment.

+To see this dashboard, visit `/-/debug/grafana/d/executor/executor` on your Sourcegraph instance. + ### Executor: Executor: Executor jobs #### executor: executor_queue_size -This panel indicates unprocessed executor job queue size. +

Unprocessed executor job queue size -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100000` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by (queue)(src_executor_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches|frontend|sourcegraph-frontend).*"})` + +

#### executor: executor_queue_growth_rate -This panel indicates unprocessed executor job queue growth rate over 30m. +

Unprocessed executor job queue growth rate over 30m + +

This value compares the rate of enqueues against the rate of finished jobs for the selected queue. @@ -5392,7 +12061,18 @@ This value compares the rate of enqueues against the rate of finished jobs for t - A value = than 1 indicates that process rate = enqueue rate - A value > than 1 indicates that process rate < enqueue rate -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100001` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (queue)(increase(src_executor_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches|frontend|sourcegraph-frontend).*"}[30m])) / sum by (queue)(increase(src_executor_processor_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches|frontend|sourcegraph-frontend).*"}[30m]))` + +

@@ -5400,41 +12080,106 @@ This value compares the rate of enqueues against the rate of finished jobs for t #### executor: executor_handlers -This panel indicates handler active handlers. +

Handler active handlers -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(src_executor_processor_handlers{queue=~"${queue:regex}",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"})` + +

#### executor: executor_processor_total -This panel indicates handler operations every 5m. +

Handler operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100110` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_executor_processor_total{queue=~"${queue:regex}",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: executor_processor_99th_percentile_duration -This panel indicates 99th percentile successful handler operation duration over 5m. +

99th percentile successful handler operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100111` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_executor_processor_duration_seconds_bucket{queue=~"${queue:regex}",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: executor_processor_errors_total -This panel indicates handler operation errors every 5m. +

Handler operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100112` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: executor_processor_error_rate -This panel indicates handler operation error rate over 5m. +

Handler operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100113` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum(increase(src_executor_processor_total{queue=~"${queue:regex}",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum(increase(src_executor_processor_errors_total{queue=~"${queue:regex}",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

@@ -5442,65 +12187,169 @@ This panel indicates handler operation error rate over 5m. #### executor: apiworker_apiclient_total -This panel indicates aggregate client operations every 5m. +

Aggregate client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_apiclient_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_apiclient_99th_percentile_duration -This panel indicates 99th percentile successful aggregate client operation duration over 5m. +

99th percentile successful aggregate client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_apiworker_apiclient_duration_seconds_bucket{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_apiclient_errors_total -This panel indicates aggregate client operation errors every 5m. +

Aggregate client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_apiclient_errors_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_apiclient_error_rate -This panel indicates aggregate client operation error rate over 5m. +

Aggregate client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100203` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_apiclient_errors_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum(increase(src_apiworker_apiclient_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum(increase(src_apiworker_apiclient_errors_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

#### executor: apiworker_apiclient_total -This panel indicates client operations every 5m. +

Client operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100210` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_apiclient_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_apiclient_99th_percentile_duration -This panel indicates 99th percentile successful client operation duration over 5m. +

99th percentile successful client operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100211` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_apiclient_duration_seconds_bucket{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_apiclient_errors_total -This panel indicates client operation errors every 5m. +

Client operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100212` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_apiclient_errors_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_apiclient_error_rate -This panel indicates client operation error rate over 5m. +

Client operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100213` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_apiclient_errors_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum by (op)(increase(src_apiworker_apiclient_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum by (op)(increase(src_apiworker_apiclient_errors_total{job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

@@ -5508,65 +12357,169 @@ This panel indicates client operation error rate over 5m. #### executor: apiworker_command_total -This panel indicates aggregate command operations every 5m. +

Aggregate command operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_99th_percentile_duration -This panel indicates 99th percentile successful aggregate command operation duration over 5m. +

99th percentile successful aggregate command operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_command_errors_total -This panel indicates aggregate command operation errors every 5m. +

Aggregate command operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100302` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_error_rate -This panel indicates aggregate command operation error rate over 5m. +

Aggregate command operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100303` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

#### executor: apiworker_command_total -This panel indicates command operations every 5m. +

Command operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100310` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_99th_percentile_duration -This panel indicates 99th percentile successful command operation duration over 5m. +

99th percentile successful command operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100311` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_command_errors_total -This panel indicates command operation errors every 5m. +

Command operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100312` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_error_rate -This panel indicates command operation error rate over 5m. +

Command operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100313` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"setup.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

@@ -5574,65 +12527,169 @@ This panel indicates command operation error rate over 5m. #### executor: apiworker_command_total -This panel indicates aggregate command operations every 5m. +

Aggregate command operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_99th_percentile_duration -This panel indicates 99th percentile successful aggregate command operation duration over 5m. +

99th percentile successful aggregate command operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_command_errors_total -This panel indicates aggregate command operation errors every 5m. +

Aggregate command operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100402` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_error_rate -This panel indicates aggregate command operation error rate over 5m. +

Aggregate command operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100403` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

#### executor: apiworker_command_total -This panel indicates command operations every 5m. +

Command operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100410` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_99th_percentile_duration -This panel indicates 99th percentile successful command operation duration over 5m. +

99th percentile successful command operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100411` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_command_errors_total -This panel indicates command operation errors every 5m. +

Command operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100412` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_error_rate -This panel indicates command operation error rate over 5m. +

Command operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100413` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"exec.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

@@ -5640,65 +12697,169 @@ This panel indicates command operation error rate over 5m. #### executor: apiworker_command_total -This panel indicates aggregate command operations every 5m. +

Aggregate command operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100500` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_99th_percentile_duration -This panel indicates 99th percentile successful aggregate command operation duration over 5m. +

99th percentile successful aggregate command operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100501` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le)(rate(src_apiworker_command_duration_seconds_bucket{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_command_errors_total -This panel indicates aggregate command operation errors every 5m. +

Aggregate command operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100502` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_error_rate -This panel indicates aggregate command operation error rate over 5m. +

Aggregate command operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100503` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

#### executor: apiworker_command_total -This panel indicates command operations every 5m. +

Command operations every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100510` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_99th_percentile_duration -This panel indicates 99th percentile successful command operation duration over 5m. +

99th percentile successful command operation duration over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100511` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_apiworker_command_duration_seconds_bucket{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])))` + +

#### executor: apiworker_command_errors_total -This panel indicates command operation errors every 5m. +

Command operation errors every 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100512` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))` + +

#### executor: apiworker_command_error_rate -This panel indicates command operation error rate over 5m. +

Command operation error rate over 5m -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100513` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) / (sum by (op)(increase(src_apiworker_command_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])) + sum by (op)(increase(src_apiworker_command_errors_total{op=~"teardown.*",job=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m]))) * 100` + +

@@ -5706,7 +12867,9 @@ This panel indicates command operation error rate over 5m. #### executor: container_missing -This panel indicates container missing. +

Container missing + +

This value is the number of times a container has not been seen for more than one minute. If you observe this value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons. @@ -5718,80 +12881,170 @@ value change independent of deployment events (such as an upgrade), it could ind - Determine if the pod was OOM killed using `docker inspect -f '{{json .State}}' (executor|sourcegraph-code-intel-indexers|executor-batches)` (look for `"OOMKilled":true`) and, if so, consider increasing the memory limit of the (executor|sourcegraph-code-intel-indexers|executor-batches) container in `docker-compose.yml`. - Check the logs before the container restarted to see if there are `panic:` messages or similar using `docker logs (executor|sourcegraph-code-intel-indexers|executor-batches)` (note this will include logs from the previous and currently running container). -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100600` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `count by(name) ((time() - container_last_seen{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}) > 60)` + +

#### executor: container_cpu_usage -This panel indicates container cpu usage total (1m average) across all cores by instance. +

Container cpu usage total (1m average) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-container-cpu-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-container-cpu-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100601` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}` + +

#### executor: container_memory_usage -This panel indicates container memory usage by instance. +

Container memory usage by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-container-memory-usage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-container-memory-usage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100602` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `cadvisor_container_memory_usage_percentage_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}` + +

#### executor: fs_io_operations -This panel indicates filesystem reads and writes rate by instance over 1h. +

Filesystem reads and writes rate by instance over 1h + +

This value indicates the number of filesystem read and write operations by containers of this service. When extremely high, this can indicate a resource usage problem, or can cause problems with the service itself, especially if high values or spikes correlate with {{CONTAINER_NAME}} issues. +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100603` on your Sourcegraph instance. + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* +
+Technical details + +Query: `sum by(name) (rate(container_fs_reads_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[1h]) + rate(container_fs_writes_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[1h]))` + +
+
### Executor: Provisioning indicators (not available on server) #### executor: provisioning_container_cpu_usage_long_term -This panel indicates container cpu usage total (90th percentile over 1d) across all cores by instance. +

Container cpu usage total (90th percentile over 1d) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-cpu-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100700` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[1d])` + +

#### executor: provisioning_container_memory_usage_long_term -This panel indicates container memory usage (1d maximum) by instance. +

Container memory usage (1d maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-memory-usage-long-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100701` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[1d])` + +

#### executor: provisioning_container_cpu_usage_short_term -This panel indicates container cpu usage total (5m maximum) across all cores by instance. +

Container cpu usage total (5m maximum) across all cores by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-cpu-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100710` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])` + +

#### executor: provisioning_container_memory_usage_short_term -This panel indicates container memory usage (5m maximum) by instance. +

Container memory usage (5m maximum) by instance -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-memory-usage-short-term). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100711` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^(executor|sourcegraph-code-intel-indexers|executor-batches).*"}[5m])` + +

@@ -5799,23 +13052,45 @@ This panel indicates container memory usage (5m maximum) by instance. #### executor: go_goroutines -This panel indicates maximum active goroutines. +

Maximum active goroutines + +

A high value here indicates a possible goroutine leak. -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-go-goroutines). +Refer to the [alert solutions reference](./alert_solutions.md#executor-go-goroutines) for 1 alert related to this panel. -*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100800` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_goroutines{job=~".*(executor|sourcegraph-code-intel-indexers|executor-batches)"})` + +

#### executor: go_gc_duration_seconds -This panel indicates maximum go garbage collection duration. +

Maximum go garbage collection duration -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-go-gc-duration-seconds). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-go-gc-duration-seconds) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100801` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `max by(instance) (go_gc_duration_seconds{job=~".*(executor|sourcegraph-code-intel-indexers|executor-batches)"})` + +

@@ -5823,11 +13098,22 @@ This panel indicates maximum go garbage collection duration. #### executor: pods_available_percentage -This panel indicates percentage pods available. +

Percentage pods available -> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-pods-available-percentage). +

-*Managed by the [Sourcegraph Code-intelligence team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* +Refer to the [alert solutions reference](./alert_solutions.md#executor-pods-available-percentage) for 1 alert related to this panel. + +To see this panel, visit `/-/debug/grafana/d/executor/executor?viewPanel=100900` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://about.sourcegraph.com/handbook/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by(app) (up{app=~".*(executor|sourcegraph-code-intel-indexers|executor-batches)"}) / count by (app) (up{app=~".*(executor|sourcegraph-code-intel-indexers|executor-batches)"}) * 100` + +

diff --git a/monitoring/definitions/shared/codeinsights.go b/monitoring/definitions/shared/codeinsights.go index 876ad638652..6e90655e26e 100644 --- a/monitoring/definitions/shared/codeinsights.go +++ b/monitoring/definitions/shared/codeinsights.go @@ -75,7 +75,7 @@ func (codeInsights) NewInsightsQueryRunnerResetterGroup(containerName string) mo ObservableConstructorOptions: ObservableConstructorOptions{ MetricNameRoot: "insights_search_queue", - MetricDescriptionRoot: "insights_search_queue", + MetricDescriptionRoot: "insights search queue", }, }, diff --git a/monitoring/definitions/shared/codeintel.go b/monitoring/definitions/shared/codeintel.go index 6fde93d162a..f526b5004c9 100644 --- a/monitoring/definitions/shared/codeintel.go +++ b/monitoring/definitions/shared/codeintel.go @@ -663,21 +663,21 @@ func (codeIntelligence) NewJanitorGroup(containerName string) monitoring.Group { { Standard.Count("records deleted")(ObservableConstructorOptions{ MetricNameRoot: "codeintel_background_upload_records_removed", - MetricDescriptionRoot: "lsif_upload", + MetricDescriptionRoot: "lsif upload", })(containerName, monitoring.ObservableOwnerCodeIntel).WithNoAlerts(` Number of LSIF upload records deleted due to expiration or unreachability every 5m `).Observable(), Standard.Count("records deleted")(ObservableConstructorOptions{ MetricNameRoot: "codeintel_background_index_records_removed", - MetricDescriptionRoot: "lsif_index", + MetricDescriptionRoot: "lsif index", })(containerName, monitoring.ObservableOwnerCodeIntel).WithNoAlerts(` Number of LSIF index records deleted due to expiration or unreachability every 5m `).Observable(), Standard.Count("data bundles deleted")(ObservableConstructorOptions{ MetricNameRoot: "codeintel_background_uploads_purged", - MetricDescriptionRoot: "lsif_upload", + MetricDescriptionRoot: "lsif upload", })(containerName, monitoring.ObservableOwnerCodeIntel).WithNoAlerts(` Number of LSIF upload data bundles purged from the codeintel-db database every 5m `).Observable(), diff --git a/monitoring/definitions/worker.go b/monitoring/definitions/worker.go index b2907acb5bd..f9a16f1350d 100644 --- a/monitoring/definitions/worker.go +++ b/monitoring/definitions/worker.go @@ -112,7 +112,7 @@ func Worker() *monitoring.Container { ObservableConstructorOptions: shared.ObservableConstructorOptions{ MetricNameRoot: "codeintel_background_upload", - MetricDescriptionRoot: "lsif_upload", + MetricDescriptionRoot: "lsif upload", }, }, @@ -132,7 +132,7 @@ func Worker() *monitoring.Container { ObservableConstructorOptions: shared.ObservableConstructorOptions{ MetricNameRoot: "codeintel_background_index", - MetricDescriptionRoot: "lsif_index", + MetricDescriptionRoot: "lsif index", }, }, @@ -152,7 +152,7 @@ func Worker() *monitoring.Container { ObservableConstructorOptions: shared.ObservableConstructorOptions{ MetricNameRoot: "codeintel_background_dependency_index", - MetricDescriptionRoot: "lsif_dependency_index", + MetricDescriptionRoot: "lsif dependency index", }, }, diff --git a/monitoring/monitoring/documentation.go b/monitoring/monitoring/documentation.go index 113d8d40c58..44b558737c1 100644 --- a/monitoring/monitoring/documentation.go +++ b/monitoring/monitoring/documentation.go @@ -78,20 +78,21 @@ func renderDocumentation(containers []*Container) (*documentation, error) { for _, c := range containers { fmt.Fprintf(&docs.dashboards, "## %s\n\n", c.Title) fprintSubtitle(&docs.dashboards, c.Description) + fmt.Fprintf(&docs.dashboards, "To see this dashboard, visit `/-/debug/grafana/d/%[1]s/%[1]s` on your Sourcegraph instance.\n\n", c.Name) - for _, g := range c.Groups { + for gIndex, g := range c.Groups { // the "General" group is top-level if g.Title != "General" { fmt.Fprintf(&docs.dashboards, "### %s: %s\n\n", c.Title, g.Title) } - for _, r := range g.Rows { - for _, o := range r { + for rIndex, r := range g.Rows { + for oIndex, o := range r { if err := docs.renderAlertSolutionEntry(c, o); err != nil { return nil, errors.Errorf("error rendering alert solution entry %q %q: %w", c.Name, o.Name, err) } - if err := docs.renderDashboardPanelEntry(c, o); err != nil { + if err := docs.renderDashboardPanelEntry(c, g, o, observablePanelID(gIndex, rIndex, oIndex)); err != nil { return nil, errors.Errorf("error rendering dashboard panel entry %q %q: %w", c.Name, o.Name, err) } @@ -140,16 +141,20 @@ func (d *documentation) renderAlertSolutionEntry(c *Container, o Observable) err possibleSolutions, _ := toMarkdown(o.PossibleSolutions, true) fmt.Fprintf(&d.alertSolutions, "%s\n", possibleSolutions) } + if o.Interpretation != "" && o.Interpretation != "none" { + // indicate help is available in dashboards reference + fmt.Fprintf(&d.alertSolutions, "- More help interpreting this metric is available in the [dashboards reference](./%s#%s).\n", + dashboardsDocsFile, observableDocAnchor(c, o)) + } else { + // just show the panel reference + fmt.Fprintf(&d.alertSolutions, "- Learn more about the related dashboard panel in the [dashboards reference](./%s#%s).\n", + dashboardsDocsFile, observableDocAnchor(c, o)) + } // add silencing configuration as another solution fmt.Fprintf(&d.alertSolutions, "- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:\n\n") fmt.Fprintf(&d.alertSolutions, "```json\n%s\n```\n\n", fmt.Sprintf(`"observability.silenceAlerts": [ %s ]`, strings.Join(prometheusAlertNames, ",\n"))) - // add link to panel information IF there are additional details available - if o.Interpretation != "" && o.Interpretation != "none" { - fmt.Fprintf(&d.alertSolutions, "> NOTE: More help interpreting this metric is available in the [dashboards reference](./%s#%s).\n\n", - dashboardsDocsFile, observableDocAnchor(c, o)) - } if o.Owner != "" { // add owner fprintOwnedBy(&d.alertSolutions, o.Owner) @@ -159,23 +164,42 @@ func (d *documentation) renderAlertSolutionEntry(c *Container, o Observable) err return nil } -func (d *documentation) renderDashboardPanelEntry(c *Container, o Observable) error { +func (d *documentation) renderDashboardPanelEntry(c *Container, g Group, o Observable, panelID uint) error { fprintObservableHeader(&d.dashboards, c, &o, 4) - fmt.Fprintf(&d.dashboards, "This panel indicates %s.\n\n", o.Description) + fprintSubtitle(&d.dashboards, fmt.Sprintf("%s\n\n", upperFirst(o.Description))) + // render interpretation reference if available if o.Interpretation != "" && o.Interpretation != "none" { interpretation, _ := toMarkdown(o.Interpretation, false) fmt.Fprintf(&d.dashboards, "%s\n\n", interpretation) } + // add link to alert solutions IF there is an alert attached if !o.NoAlert { - fmt.Fprintf(&d.dashboards, "> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./%s#%s).\n\n", - alertSolutionsFile, observableDocAnchor(c, o)) + fmt.Fprintf(&d.dashboards, "Refer to the [alert solutions reference](./%s#%s) for %s related to this panel.\n\n", + alertSolutionsFile, observableDocAnchor(c, o), pluralize("alert", o.alertsCount())) + } else { + fmt.Fprintf(&d.dashboards, "This panel has no related alerts.\n\n") } + + // how to get to this panel + fmt.Fprintf(&d.dashboards, "To see this panel, visit `/-/debug/grafana/d/%[1]s/%[1]s?viewPanel=%[2]d` on your Sourcegraph instance.\n\n", + c.Name, panelID) + if o.Owner != "" { // add owner fprintOwnedBy(&d.dashboards, o.Owner) } + + fmt.Fprintf(&d.dashboards, ` +
+Technical details + +Query: %s + +
+`, fmt.Sprintf("`%s`", o.Query)) + // render break for readability fmt.Fprint(&d.dashboards, "\n
\n\n") return nil diff --git a/monitoring/monitoring/monitoring.go b/monitoring/monitoring/monitoring.go index b02d6b3dd5e..757dc8b73a6 100644 --- a/monitoring/monitoring/monitoring.go +++ b/monitoring/monitoring/monitoring.go @@ -422,19 +422,21 @@ const ( // toMarkdown returns a Markdown string that also links to the owner's team page func (o ObservableOwner) toMarkdown() string { - var teamName string + var slug string // special cases for differences in how a team is named in ObservableOwner and how // they are named in the handbook. // see https://about.sourcegraph.com/company/team/org_chart#engineering switch o { case ObservableOwnerCodeIntel: - teamName = "code-intelligence" + slug = "code-intelligence" + case ObservableOwnerCodeInsights: + slug = "developer-insights/code-insights" default: - teamName = string(o) + slug = strings.ReplaceAll(string(o), " ", "-") } - slug := strings.ReplaceAll(teamName, " ", "-") - return fmt.Sprintf("[Sourcegraph %s team](https://about.sourcegraph.com/handbook/engineering/%s)", upperFirst(teamName), slug) + return fmt.Sprintf("[Sourcegraph %s team](https://about.sourcegraph.com/handbook/engineering/%s)", + upperFirst(string(o)), slug) } // Observable describes a metric about a container that can be observed. For example, memory usage. @@ -574,7 +576,7 @@ func (o Observable) validate() error { return errors.New(`Panel.panelType must be "graph" or "heatmap"`) } - allAlertsEmpty := o.Warning.isEmpty() && o.Critical.isEmpty() + allAlertsEmpty := o.alertsCount() == 0 if allAlertsEmpty || o.NoAlert { // Ensure lack of alerts is intentional if allAlertsEmpty && !o.NoAlert { @@ -619,6 +621,16 @@ func (o Observable) validate() error { return nil } +func (o Observable) alertsCount() (count int) { + if !o.Warning.isEmpty() { + count++ + } + if !o.Critical.isEmpty() { + count++ + } + return +} + // Alert provides a builder for defining alerting on an Observable. func Alert() *ObservableAlertDefinition { return &ObservableAlertDefinition{} diff --git a/monitoring/monitoring/util.go b/monitoring/monitoring/util.go index 66ffeb356b0..5b7f5758255 100644 --- a/monitoring/monitoring/util.go +++ b/monitoring/monitoring/util.go @@ -20,6 +20,13 @@ func withPeriod(s string) string { return s } +func pluralize(noun string, count int) string { + if count != 1 { + noun += "s" + } + return fmt.Sprintf("%d %s", count, noun) +} + // StringPtr converts a string value to a pointer, useful for setting fields in some APIs. func StringPtr(s string) *string { return &s }