diff --git a/doc/admin/observability/alert_solutions.md b/doc/admin/observability/alert_solutions.md index 00cccd5155d..67ce684dd6e 100644 --- a/doc/admin/observability/alert_solutions.md +++ b/doc/admin/observability/alert_solutions.md @@ -842,6 +842,30 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al
+## frontend: mean_blocked_seconds_per_conn_request + +

mean blocked seconds per conn request

+ +**Descriptions** + +- warning frontend: 0.05s+ mean blocked seconds per conn request for 5m0s +- critical frontend: 0.1s+ mean blocked seconds per conn request for 10m0s + +**Possible solutions** + +- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: + +```json +"observability.silenceAlerts": [ + "warning_frontend_mean_blocked_seconds_per_conn_request", + "critical_frontend_mean_blocked_seconds_per_conn_request" +] +``` + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ## frontend: internal_indexed_search_error_responses

internal indexed search error responses every 5m

@@ -1453,6 +1477,30 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al
+## gitserver: mean_blocked_seconds_per_conn_request + +

mean blocked seconds per conn request

+ +**Descriptions** + +- warning gitserver: 0.05s+ mean blocked seconds per conn request for 5m0s +- critical gitserver: 0.1s+ mean blocked seconds per conn request for 10m0s + +**Possible solutions** + +- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: + +```json +"observability.silenceAlerts": [ + "warning_gitserver_mean_blocked_seconds_per_conn_request", + "critical_gitserver_mean_blocked_seconds_per_conn_request" +] +``` + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ## gitserver: container_cpu_usage

container cpu usage total (1m average) across all cores by instance

@@ -2402,6 +2450,30 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al
+## precise-code-intel-worker: mean_blocked_seconds_per_conn_request + +

mean blocked seconds per conn request

+ +**Descriptions** + +- warning precise-code-intel-worker: 0.05s+ mean blocked seconds per conn request for 5m0s +- critical precise-code-intel-worker: 0.1s+ mean blocked seconds per conn request for 10m0s + +**Possible solutions** + +- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: + +```json +"observability.silenceAlerts": [ + "warning_precise-code-intel-worker_mean_blocked_seconds_per_conn_request", + "critical_precise-code-intel-worker_mean_blocked_seconds_per_conn_request" +] +``` + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ## precise-code-intel-worker: frontend_internal_api_error_responses

frontend-internal API error responses every 5m by route

@@ -3210,6 +3282,30 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al
+## worker: mean_blocked_seconds_per_conn_request + +

mean blocked seconds per conn request

+ +**Descriptions** + +- warning worker: 0.05s+ mean blocked seconds per conn request for 5m0s +- critical worker: 0.1s+ mean blocked seconds per conn request for 10m0s + +**Possible solutions** + +- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: + +```json +"observability.silenceAlerts": [ + "warning_worker_mean_blocked_seconds_per_conn_request", + "critical_worker_mean_blocked_seconds_per_conn_request" +] +``` + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ## worker: frontend_internal_api_error_responses

frontend-internal API error responses every 5m by route

@@ -4163,6 +4259,30 @@ with your code hosts connections or networking issues affecting communication wi
+## repo-updater: mean_blocked_seconds_per_conn_request + +

mean blocked seconds per conn request

+ +**Descriptions** + +- warning repo-updater: 0.05s+ mean blocked seconds per conn request for 5m0s +- critical repo-updater: 0.1s+ mean blocked seconds per conn request for 10m0s + +**Possible solutions** + +- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: + +```json +"observability.silenceAlerts": [ + "warning_repo-updater_mean_blocked_seconds_per_conn_request", + "critical_repo-updater_mean_blocked_seconds_per_conn_request" +] +``` + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ## repo-updater: container_cpu_usage

container cpu usage total (1m average) across all cores by instance

@@ -5966,6 +6086,30 @@ with your code hosts connections or networking issues affecting communication wi
+## executor-queue: mean_blocked_seconds_per_conn_request + +

mean blocked seconds per conn request

+ +**Descriptions** + +- warning executor-queue: 0.05s+ mean blocked seconds per conn request for 5m0s +- critical executor-queue: 0.1s+ mean blocked seconds per conn request for 10m0s + +**Possible solutions** + +- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: + +```json +"observability.silenceAlerts": [ + "warning_executor-queue_mean_blocked_seconds_per_conn_request", + "critical_executor-queue_mean_blocked_seconds_per_conn_request" +] +``` + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ## executor-queue: frontend_internal_api_error_responses

frontend-internal API error responses every 5m by route

diff --git a/doc/admin/observability/dashboards.md b/doc/admin/observability/dashboards.md index e998f6e45df..8365ba7658c 100644 --- a/doc/admin/observability/dashboards.md +++ b/doc/admin/observability/dashboards.md @@ -436,6 +436,8 @@ This panel indicates idle. This panel indicates mean blocked seconds per conn request. +> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#frontend-mean-blocked-seconds-per-conn-request). + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -978,6 +980,8 @@ This panel indicates idle. This panel indicates mean blocked seconds per conn request. +> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#gitserver-mean-blocked-seconds-per-conn-request). + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -1641,6 +1645,8 @@ This panel indicates idle. This panel indicates mean blocked seconds per conn request. +> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#precise-code-intel-worker-mean-blocked-seconds-per-conn-request). + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -2171,6 +2177,8 @@ This panel indicates idle. This panel indicates mean blocked seconds per conn request. +> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#worker-mean-blocked-seconds-per-conn-request). + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -2739,6 +2747,8 @@ This panel indicates idle. This panel indicates mean blocked seconds per conn request. +> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#repo-updater-mean-blocked-seconds-per-conn-request). + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
@@ -3912,6 +3922,8 @@ This panel indicates idle. This panel indicates mean blocked seconds per conn request. +> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./alert_solutions.md#executor-queue-mean-blocked-seconds-per-conn-request). + *Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*
diff --git a/monitoring/definitions/shared/dbconns.go b/monitoring/definitions/shared/dbconns.go index d29e2f3a70b..60637bd7f76 100644 --- a/monitoring/definitions/shared/dbconns.go +++ b/monitoring/definitions/shared/dbconns.go @@ -2,6 +2,7 @@ package shared import ( "fmt" + "time" "github.com/sourcegraph/sourcegraph/monitoring/monitoring" ) @@ -61,10 +62,11 @@ func DatabaseConnectionsMonitoring(app string) []monitoring.Row { Description: "mean blocked seconds per conn request", Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name=%q}[5m])) / `+ `sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name=%q}[5m]))`, app, app), - Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}").Unit(monitoring.Seconds), - NoAlert: true, - Owner: monitoring.ObservableOwnerCoreApplication, - Interpretation: "none", + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}").Unit(monitoring.Seconds), + Warning: monitoring.Alert().GreaterOrEqual(0.05, nil).For(5 * time.Minute), + Critical: monitoring.Alert().GreaterOrEqual(0.10, nil).For(10 * time.Minute), + Owner: monitoring.ObservableOwnerCoreApplication, + PossibleSolutions: "none", }, }, { diff --git a/monitoring/monitoring/monitoring.go b/monitoring/monitoring/monitoring.go index ad21a9e1be3..49637cd042f 100644 --- a/monitoring/monitoring/monitoring.go +++ b/monitoring/monitoring/monitoring.go @@ -50,7 +50,7 @@ func (c *Container) validate() error { return errors.Errorf("Title must be in Title Case; found \"%s\" want \"%s\"", c.Title, strings.Title(c.Title)) } if c.Description != withPeriod(c.Description) || c.Description != upperFirst(c.Description) { - return errors.Errorf("Description must be sentence starting with an uppercas eletter and ending with period; found \"%s\"", c.Description) + return errors.Errorf("Description must be sentence starting with an uppercase letter and ending with period; found \"%s\"", c.Description) } for i, g := range c.Groups { if err := g.validate(); err != nil {