monitoring: test owners for valid Opsgenie teams and handbook pages (#59251)

In INC-264 it seems that certain alerts - such as [zoekt: less than 90% percentage pods available for 10m0s](https://opsg.in/a/i/sourcegraph/178a626f-0f28-4295-bee9-84da988bb473-1703759057681) - don't seem to end up going anywhere because the ObservableOwner is defunct. This change adds _opt-in_ testing to report:

1. How many owners have valid Opsgenie teams
2. How many owners have valid handbook pages

In addition, we collect ObservableOwners that pass the test and use it to generate configuration for `site.json` in Sourcegraph.com: https://github.com/sourcegraph/deploy-sourcegraph-cloud/pull/18338 - this helps ensure the list is valid and not deceptively high-coverage.

The results are not great, but **enforcing** that owners are valid isn't currently in scope:

```
6/10 ObservableOwners do not have valid Opsgenie teams
3/10 ObservableOwners do not point to valid handbook pages
```

I also removed some defunct/unused functionality/owners.

## Test plan

To run these tests:

```
export OPSGENIE_API_KEY="..."
go test -timeout 30s  github.com/sourcegraph/sourcegraph/monitoring/monitoring -update -online                       
```
This commit is contained in:
Robert Lin 2023-12-29 14:07:35 -08:00 committed by GitHub
parent b1f7ee0547
commit 55825e9939
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 753 additions and 439 deletions

View File

@ -4834,8 +4834,8 @@ def go_dependencies():
name = "com_github_opsgenie_opsgenie_go_sdk_v2",
build_file_proto_mode = "disable_global",
importpath = "github.com/opsgenie/opsgenie-go-sdk-v2",
sum = "h1:nV98dkBpqaYbDnhefmOQ+Rn4hE+jD6AtjYHXaU5WyJI=",
version = "v1.2.13",
sum = "h1:0h+YoXSyipf6XQGyIaDg6z5jwRik1JSm+sQetnD7vGY=",
version = "v1.2.22",
)
go_repository(
name = "com_github_oschwald_maxminddb_golang",

View File

@ -633,7 +633,7 @@ Generated query for warning alert: `max((sum by (alert_type) (increase(src_graph
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -817,7 +817,7 @@ Generated query for warning alert: `max((sum by (category) (increase(src_gitserv
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -848,7 +848,7 @@ Generated query for warning alert: `max((max by (owner) (observability_test_metr
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -917,7 +917,7 @@ Generated query for critical alert: `max((sum(increase(src_cloudkms_cryptographi
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -951,7 +951,7 @@ Generated query for critical alert: `max((sum by (app_name, db_name) (increase(s
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -983,7 +983,7 @@ Generated query for warning alert: `max((cadvisor_container_cpu_usage_percentage
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1015,7 +1015,7 @@ Generated query for warning alert: `max((cadvisor_container_memory_usage_percent
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1047,7 +1047,7 @@ Generated query for warning alert: `max((quantile_over_time(0.9, cadvisor_contai
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1079,7 +1079,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1111,7 +1111,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_cpu_us
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1143,7 +1143,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1173,7 +1173,7 @@ Generated query for warning alert: `max((max by (name) (container_oom_events_tot
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1203,7 +1203,7 @@ Generated query for warning alert: `max((max by (instance) (go_goroutines{job=~"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1235,7 +1235,7 @@ Generated query for warning alert: `max((max by (instance) (go_gc_duration_secon
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1271,7 +1271,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*(frontend|sou
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1628,7 +1628,7 @@ Generated query for warning alert: `max((sum by (category) (increase(src_fronten
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1946,7 +1946,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*gitserver"})
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -1979,7 +1979,7 @@ Generated query for warning alert: `min((sum by (job) (pg_stat_activity_count{da
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2011,7 +2011,7 @@ Generated query for critical alert: `max((sum by (job) (pg_stat_activity_count)
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2051,7 +2051,7 @@ Generated query for warning alert: `max((sum by (job) (pg_stat_activity_max_tx_d
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2082,7 +2082,7 @@ Generated query for critical alert: `min((pg_up) <= 0)`
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2113,7 +2113,7 @@ Generated query for critical alert: `sum((max by (relname) (pg_invalid_index_cou
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2144,7 +2144,7 @@ Generated query for warning alert: `max((pg_exporter_last_scrape_error) >= 1)`
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2176,7 +2176,7 @@ Generated query for critical alert: `max((pg_sg_migration_status) >= 1)`
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2208,7 +2208,7 @@ Generated query for warning alert: `max((quantile_over_time(0.9, cadvisor_contai
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2240,7 +2240,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2272,7 +2272,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_cpu_us
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2304,7 +2304,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2336,7 +2336,7 @@ Generated query for warning alert: `max((max by (name) (container_oom_events_tot
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2440,7 +2440,7 @@ Generated query for warning alert: `max((sum by (category) (increase(src_fronten
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2789,7 +2789,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*precise-code-
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2820,7 +2820,7 @@ Generated query for critical alert: `min((redis_up{app="redis-store"}) < 1)`
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2852,7 +2852,7 @@ Generated query for critical alert: `min((redis_up{app="redis-cache"}) < 1)`
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2884,7 +2884,7 @@ Generated query for warning alert: `max((quantile_over_time(0.9, cadvisor_contai
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2916,7 +2916,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2948,7 +2948,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_cpu_us
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -2980,7 +2980,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3012,7 +3012,7 @@ Generated query for warning alert: `max((max by (name) (container_oom_events_tot
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3044,7 +3044,7 @@ Generated query for warning alert: `max((quantile_over_time(0.9, cadvisor_contai
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3076,7 +3076,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3108,7 +3108,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_cpu_us
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3140,7 +3140,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3172,7 +3172,7 @@ Generated query for warning alert: `max((max by (name) (container_oom_events_tot
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3204,7 +3204,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*redis-cache"}
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3452,7 +3452,7 @@ Generated query for warning alert: `max((sum by (category) (increase(src_fronten
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -3802,7 +3802,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*worker"}) / c
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -4629,7 +4629,7 @@ Generated query for critical alert: `min((max by (name) (src_gitlab_rate_limit_r
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -5110,7 +5110,7 @@ Generated query for warning alert: `max((sum by (code) (increase(searcher_servic
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -5144,7 +5144,7 @@ Generated query for critical alert: `max((max(max_over_time(src_conf_client_time
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -5531,7 +5531,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*searcher"}) /
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -5565,7 +5565,7 @@ Generated query for critical alert: `max((max(max_over_time(src_conf_client_time
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -5952,7 +5952,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*symbols"}) /
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -5984,7 +5984,7 @@ Generated query for warning alert: `max((cadvisor_container_cpu_usage_percentage
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6016,7 +6016,7 @@ Generated query for warning alert: `max((cadvisor_container_memory_usage_percent
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6048,7 +6048,7 @@ Generated query for warning alert: `max((quantile_over_time(0.9, cadvisor_contai
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6080,7 +6080,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6112,7 +6112,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_cpu_us
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6144,7 +6144,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6176,7 +6176,7 @@ Generated query for warning alert: `max((max by (name) (container_oom_events_tot
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6833,7 +6833,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*indexed-searc
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6866,7 +6866,7 @@ Generated query for warning alert: `max((sum by (rule_group) (avg_over_time(prom
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6899,7 +6899,7 @@ Generated query for warning alert: `max((sum by (rule_group) (rate(prometheus_ru
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6931,7 +6931,7 @@ Generated query for warning alert: `max((sum by (integration) (rate(alertmanager
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6963,7 +6963,7 @@ Generated query for warning alert: `max((sum by (integration) (rate(alertmanager
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -6994,7 +6994,7 @@ Generated query for warning alert: `min((prometheus_config_last_reload_successfu
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7025,7 +7025,7 @@ Generated query for warning alert: `min((alertmanager_config_last_reload_success
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7056,7 +7056,7 @@ Generated query for warning alert: `max((increase(label_replace({__name__=~"prom
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7087,7 +7087,7 @@ Generated query for warning alert: `max((increase(prometheus_target_scrapes_exce
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7119,7 +7119,7 @@ Generated query for warning alert: `max((increase(prometheus_target_scrapes_samp
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7151,7 +7151,7 @@ Generated query for warning alert: `max((cadvisor_container_cpu_usage_percentage
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7183,7 +7183,7 @@ Generated query for warning alert: `max((cadvisor_container_memory_usage_percent
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7215,7 +7215,7 @@ Generated query for warning alert: `max((quantile_over_time(0.9, cadvisor_contai
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7247,7 +7247,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7279,7 +7279,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_cpu_us
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7311,7 +7311,7 @@ Generated query for warning alert: `max((max_over_time(cadvisor_container_memory
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7343,7 +7343,7 @@ Generated query for warning alert: `max((max by (name) (container_oom_events_tot
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7729,7 +7729,7 @@ Generated query for warning alert: `max((rate(src_telemetry_job_total{op="SendEv
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7760,7 +7760,7 @@ Generated query for warning alert: `max((sum by (receiver) (rate(otelcol_receive
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7791,7 +7791,7 @@ Generated query for warning alert: `max((sum by (exporter) (rate(otelcol_exporte
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7822,7 +7822,7 @@ Generated query for warning alert: `max((sum by (exporter) (rate(otelcol_exporte
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7854,7 +7854,7 @@ Generated query for warning alert: `max((sum by (processor) (rate(otelcol_proces
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7886,7 +7886,7 @@ Generated query for warning alert: `max((cadvisor_container_cpu_usage_percentage
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7918,7 +7918,7 @@ Generated query for warning alert: `max((cadvisor_container_memory_usage_percent
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>
@ -7950,7 +7950,7 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*otel-collecto
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<sub>*Managed by the [Sourcegraph Infrastructure Org team](https://handbook.sourcegraph.com/departments/engineering/infrastructure).*</sub>
<details>
<summary>Technical details</summary>

File diff suppressed because it is too large Load Diff

2
go.mod
View File

@ -542,7 +542,7 @@ require (
github.com/nu7hatch/gouuid v0.0.0-20131221200532-179d4d0c4d8d // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.13
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.22
github.com/pandatix/go-cvss v0.5.2
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
github.com/pkg/errors v0.9.1 // indirect

4
go.sum
View File

@ -1436,8 +1436,8 @@ github.com/opencontainers/image-spec v1.1.0-rc3/go.mod h1:X4pATf0uXsnn3g5aiGIsVn
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.13 h1:nV98dkBpqaYbDnhefmOQ+Rn4hE+jD6AtjYHXaU5WyJI=
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.13/go.mod h1:4OjcxgwdXzezqytxN534MooNmrxRD50geWZxTD7845s=
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.22 h1:0h+YoXSyipf6XQGyIaDg6z5jwRik1JSm+sQetnD7vGY=
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.22/go.mod h1:4OjcxgwdXzezqytxN534MooNmrxRD50geWZxTD7845s=
github.com/oschwald/maxminddb-golang v1.12.0 h1:9FnTOD0YOhP7DGxGsq4glzpGy5+w7pq50AS6wALUMYs=
github.com/oschwald/maxminddb-golang v1.12.0/go.mod h1:q0Nob5lTCqyQ8WT6FYgS1L7PXKVVbgiymefNwIjPzgY=
github.com/pandatix/go-cvss v0.5.2 h1:9441i+Sn/P/TP9kNBl3kI7mwYtNYFr1eN8JdsiybiMM=

View File

@ -45,7 +45,7 @@ func Containers() *monitoring.Dashboard {
Query: fmt.Sprintf(`cadvisor_container_memory_usage_percentage_total{%s}`, containerNameQuery),
NoAlert: true,
Panel: monitoring.Panel().With(monitoring.PanelOptions.LegendOnRight()).LegendFormat("{{name}}").Unit(monitoring.Percentage).Interval(100).Max(100).Min(0),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: `
This value indicates the memory usage of all containers.
`,
@ -58,7 +58,7 @@ func Containers() *monitoring.Dashboard {
Query: fmt.Sprintf(`cadvisor_container_cpu_usage_percentage_total{%s}`, containerNameQuery),
NoAlert: true,
Panel: monitoring.Panel().With(monitoring.PanelOptions.LegendOnRight()).LegendFormat("{{name}}").Unit(monitoring.Percentage).Interval(100).Max(100).Min(0),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: `
This value indicates the CPU usage of all containers.
`,
@ -77,7 +77,7 @@ func Containers() *monitoring.Dashboard {
Query: fmt.Sprintf(`max_over_time(cadvisor_container_memory_usage_percentage_total{%s}[5m]) >= 80`, containerNameQuery),
NoAlert: true,
Panel: monitoring.Panel().With(monitoring.PanelOptions.LegendOnRight()).LegendFormat("{{name}}").Unit(monitoring.Percentage).Interval(100).Max(100).Min(0),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: `
Containers that exceed 80% memory limit. The value indicates potential underprovisioned resources.
`,
@ -90,7 +90,7 @@ func Containers() *monitoring.Dashboard {
Query: fmt.Sprintf(`max_over_time(cadvisor_container_cpu_usage_percentage_total{%s}[5m]) >= 80`, containerNameQuery),
NoAlert: true,
Panel: monitoring.Panel().With(monitoring.PanelOptions.LegendOnRight()).LegendFormat("{{name}}").Unit(monitoring.Percentage).Interval(100).Max(100).Min(0),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: `
Containers that exceed 80% CPU limit. The value indicates potential underprovisioned resources.
`,
@ -103,7 +103,7 @@ func Containers() *monitoring.Dashboard {
Query: fmt.Sprintf(`max by (name) (container_oom_events_total{%s}) >= 1`, containerNameQuery),
NoAlert: true,
Panel: monitoring.Panel().With(monitoring.PanelOptions.LegendOnRight()).LegendFormat("{{name}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: `
This value indicates the total number of times the container main process or child processes were terminated by OOM killer.
When it occurs frequently, it is an indicator of underprovisioning.
@ -118,7 +118,7 @@ func Containers() *monitoring.Dashboard {
Query: fmt.Sprintf(`count by(name) ((time() - container_last_seen{%s}) > 60)`, containerNameQuery),
NoAlert: true,
Panel: monitoring.Panel().With(monitoring.PanelOptions.LegendOnRight()).LegendFormat("{{name}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: `
This value is the number of times a container has not been seen for more than one minute. If you observe this
value change independent of deployment events (such as an upgrade), it could indicate pods are being OOM killed or terminated for some other reasons.

View File

@ -28,7 +28,7 @@ func Embeddings() *monitoring.Dashboard {
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "embeddings",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
}, monitoring.ObservableOwnerInfraOrg),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerCody),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCody, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCody, nil),
@ -42,7 +42,6 @@ func Embeddings() *monitoring.Dashboard {
{
Name: "hit_ratio",
Description: "hit ratio of the embeddings cache",
Owner: monitoring.ObservableOwner{},
Query: "rate(src_embeddings_cache_hit_count[30m]) / (rate(src_embeddings_cache_hit_count[30m]) + rate(src_embeddings_cache_miss_count[30m]))",
NoAlert: true,
Interpretation: "A low hit rate indicates your cache is not well utilized. Consider increasing the cache size.",
@ -51,7 +50,6 @@ func Embeddings() *monitoring.Dashboard {
{
Name: "missed_bytes",
Description: "bytes fetched due to a cache miss",
Owner: monitoring.ObservableOwner{},
Query: "rate(src_embeddings_cache_miss_bytes[10m])",
NoAlert: true,
Interpretation: "A high volume of misses indicates that the many searches are not hitting the cache. Consider increasing the cache size.",

View File

@ -336,7 +336,7 @@ func Frontend() *monitoring.Dashboard {
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "frontend",
InstanceFilterRegex: `${internalInstance:regex}`,
}, monitoring.ObservableOwnerDevOps),
}, monitoring.ObservableOwnerInfraOrg),
shared.CodeIntelligence.NewResolversGroup(containerName),
shared.CodeIntelligence.NewAutoIndexEnqueuerGroup(containerName),
@ -507,7 +507,7 @@ func Frontend() *monitoring.Dashboard {
Query: `max by(owner) (observability_test_metric_warning)`,
Warning: monitoring.Alert().GreaterOrEqual(1),
Panel: monitoring.Panel().Max(1),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: "This alert is triggered via the `triggerObservabilityTestAlert` GraphQL endpoint, and will automatically resolve itself.",
},
{
@ -516,7 +516,7 @@ func Frontend() *monitoring.Dashboard {
Query: `max by(owner) (observability_test_metric_critical)`,
Critical: monitoring.Alert().GreaterOrEqual(1),
Panel: monitoring.Panel().Max(1),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: "This alert is triggered via the `triggerObservabilityTestAlert` GraphQL endpoint, and will automatically resolve itself.",
},
},
@ -694,11 +694,11 @@ func Frontend() *monitoring.Dashboard {
},
// Resource monitoring
shared.NewDatabaseConnectionsMonitoringGroup("frontend", monitoring.ObservableOwnerDevOps),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewDatabaseConnectionsMonitoringGroup("frontend", monitoring.ObservableOwnerInfraOrg),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
{
Title: "Search: Ranking",
Hidden: true,
@ -789,7 +789,7 @@ func Frontend() *monitoring.Dashboard {
Warning: monitoring.Alert().Greater(0),
Critical: monitoring.Alert().GreaterOrEqual(10),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: `
- Check your SMTP configuration in site configuration.
- Check 'sourcegraph-frontend' logs for more detailed error messages.
@ -805,7 +805,7 @@ func Frontend() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("emails"),
NoAlert: true, // this is a purely informational panel
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: "Total emails successfully delivered.",
// use to observe behaviour of email usage across instances
@ -819,7 +819,7 @@ func Frontend() *monitoring.Dashboard {
With(monitoring.PanelOptions.LegendOnRight()),
NoAlert: true, // this is a purely informational panel
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: "Emails successfully delivered by source, i.e. product feature.",
// use to observe behaviour of email usage across instances.

View File

@ -560,7 +560,7 @@ func GitServer() *monitoring.Dashboard {
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "gitserver",
InstanceFilterRegex: `${shard:regex}`,
}, monitoring.ObservableOwnerDevOps),
}, monitoring.ObservableOwnerInfraOrg),
shared.CodeIntelligence.NewCoursierGroup(containerName),
shared.CodeIntelligence.NewNpmGroup(containerName),

View File

@ -24,7 +24,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otel_span_receive_rate",
Description: "spans received per receiver per minute",
Panel: monitoring.Panel().Unit(monitoring.Number).LegendFormat("receiver: {{receiver}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (receiver) (rate(otelcol_receiver_accepted_spans[1m]))",
NoAlert: true,
Interpretation: `
@ -44,7 +44,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otel_span_refused",
Description: "spans refused per receiver",
Panel: monitoring.Panel().Unit(monitoring.Number).LegendFormat("receiver: {{receiver}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (receiver) (rate(otelcol_receiver_refused_spans[1m]))",
Warning: monitoring.Alert().Greater(1).For(5 * time.Minute),
NextSteps: "Check logs of the collector and configuration of the receiver",
@ -69,7 +69,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otel_span_export_rate",
Description: "spans exported per exporter per minute",
Panel: monitoring.Panel().Unit(monitoring.Number).LegendFormat("exporter: {{exporter}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (exporter) (rate(otelcol_exporter_sent_spans[1m]))",
NoAlert: true,
Interpretation: `
@ -85,7 +85,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otel_span_export_failures",
Description: "span export failures by exporter",
Panel: monitoring.Panel().Unit(monitoring.Number).LegendFormat("exporter: {{exporter}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (exporter) (rate(otelcol_exporter_send_failed_spans[1m]))",
Warning: monitoring.Alert().Greater(1).For(5 * time.Minute),
NextSteps: "Check the configuration of the exporter and if the service being exported is up",
@ -107,7 +107,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otelcol_exporter_queue_capacity",
Description: "exporter queue capacity",
Panel: monitoring.Panel().LegendFormat("exporter: {{exporter}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (exporter) (rate(otelcol_exporter_queue_capacity{job=~\"^.*\"}[1m]))",
NoAlert: true,
Interpretation: `Shows the the capacity of the retry queue (in batches).`,
@ -116,7 +116,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otelcol_exporter_queue_size",
Description: "exporter queue size",
Panel: monitoring.Panel().LegendFormat("exporter: {{exporter}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (exporter) (rate(otelcol_exporter_queue_size{job=~\"^.*\"}[1m]))",
NoAlert: true,
Interpretation: `Shows the current size of retry queue`,
@ -125,7 +125,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otelcol_exporter_enqueue_failed_spans",
Description: "exporter enqueue failed spans",
Panel: monitoring.Panel().LegendFormat("exporter: {{exporter}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (exporter) (rate(otelcol_exporter_enqueue_failed_spans{job=~\"^.*\"}[1m]))",
Warning: monitoring.Alert().Greater(0).For(5 * time.Minute),
NextSteps: "Check the configuration of the exporter and if the service being exported is up. This may be caused by a queue full of unsettled elements, so you may need to decrease your sending rate or horizontally scale collectors.",
@ -143,7 +143,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otelcol_processor_dropped_spans",
Description: "spans dropped per processor per minute",
Panel: monitoring.Panel().Unit(monitoring.Number).LegendFormat("processor: {{processor}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (processor) (rate(otelcol_processor_dropped_spans[1m]))",
Warning: monitoring.Alert().Greater(0).For(5 * time.Minute),
NextSteps: "Check the configuration of the processor",
@ -162,7 +162,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otel_cpu_usage",
Description: "cpu usage of the collector",
Panel: monitoring.Panel().Unit(monitoring.Seconds).LegendFormat("{{job}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (job) (rate(otelcol_process_cpu_seconds{job=~\"^.*\"}[1m]))",
NoAlert: true,
Interpretation: `Shows CPU usage as reported by the OpenTelemetry collector.`,
@ -171,7 +171,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otel_memory_resident_set_size",
Description: "memory allocated to the otel collector",
Panel: monitoring.Panel().Unit(monitoring.Bytes).LegendFormat("{{job}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (job) (rate(otelcol_process_memory_rss{job=~\"^.*\"}[1m]))",
NoAlert: true,
Interpretation: `Shows the allocated memory Resident Set Size (RSS) as reported by the OpenTelemetry collector.`,
@ -180,7 +180,7 @@ func OtelCollector() *monitoring.Dashboard {
Name: "otel_memory_usage",
Description: "memory used by the collector",
Panel: monitoring.Panel().Unit(monitoring.Bytes).LegendFormat("{{job}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: "sum by (job) (rate(otelcol_process_runtime_total_alloc_bytes{job=~\"^.*\"}[1m]))",
NoAlert: true,
Interpretation: `
@ -196,8 +196,8 @@ func OtelCollector() *monitoring.Dashboard {
},
},
},
shared.NewContainerMonitoringGroup("otel-collector", monitoring.ObservableOwnerDevOps, nil),
shared.NewKubernetesMonitoringGroup("otel-collector", monitoring.ObservableOwnerDevOps, nil),
shared.NewContainerMonitoringGroup("otel-collector", monitoring.ObservableOwnerInfraOrg, nil),
shared.NewKubernetesMonitoringGroup("otel-collector", monitoring.ObservableOwnerInfraOrg, nil),
},
}
}

View File

@ -29,7 +29,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "connections",
Description: "active connections",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
DataMustExist: false, // not deployed on docker-compose
Query: `sum by (job) (pg_stat_activity_count{datname!~"template.*|postgres|cloudsqladmin"}) OR sum by (job) (pg_stat_activity_count{job="codeinsights-db", datname!~"template.*|cloudsqladmin"})`,
Panel: monitoring.Panel().LegendFormat("{{datname}}"),
@ -39,7 +39,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "usage_connections_percentage",
Description: "connection in use",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
DataMustExist: false,
Query: `sum(pg_stat_activity_count) by (job) / (sum(pg_settings_max_connections) by (job) - sum(pg_settings_superuser_reserved_connections) by (job)) * 100`,
Panel: monitoring.Panel().LegendFormat("{{job}}").Unit(monitoring.Percentage).Max(100).Min(0),
@ -52,7 +52,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "transaction_durations",
Description: "maximum transaction durations",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
DataMustExist: false, // not deployed on docker-compose
// Ignore in codeintel-db because Rockskip processing involves long transactions
// during normal operation.
@ -72,7 +72,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "postgres_up",
Description: "database availability",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
DataMustExist: false, // not deployed on docker-compose
Query: "pg_up",
Panel: monitoring.Panel().LegendFormat("{{app}}"),
@ -95,7 +95,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "invalid_indexes",
Description: "invalid indexes (unusable by the query planner)",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
DataMustExist: false, // not deployed on docker-compose
Query: "max by (relname)(pg_invalid_index_count)",
Panel: monitoring.Panel().LegendFormat("{{relname}}"),
@ -110,7 +110,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "pg_exporter_err",
Description: "errors scraping postgres exporter",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
DataMustExist: false, // not deployed on docker-compose
Query: "pg_exporter_last_scrape_error",
Panel: monitoring.Panel().LegendFormat("{{app}}"),
@ -124,7 +124,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "migration_in_progress",
Description: "active schema migration",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
DataMustExist: false, // not deployed on docker-compose
Query: "pg_sg_migration_status",
Panel: monitoring.Panel().LegendFormat("{{app}}"),
@ -157,7 +157,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "pg_table_size",
Description: "table size",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: `max by (relname)(pg_table_bloat_size)`,
Panel: monitoring.Panel().LegendFormat("{{relname}}").Unit(monitoring.Bytes),
NoAlert: true,
@ -166,7 +166,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "pg_table_bloat_ratio",
Description: "table bloat ratio",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: `max by (relname)(pg_table_bloat_ratio) * 100`,
Panel: monitoring.Panel().LegendFormat("{{relname}}").Unit(monitoring.Percentage),
NoAlert: true,
@ -177,7 +177,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "pg_index_size",
Description: "index size",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: `max by (relname)(pg_index_bloat_size)`,
Panel: monitoring.Panel().LegendFormat("{{relname}}").Unit(monitoring.Bytes),
NoAlert: true,
@ -186,7 +186,7 @@ func Postgres() *monitoring.Dashboard {
monitoring.Observable{
Name: "pg_index_bloat_ratio",
Description: "index bloat ratio",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: `max by (relname)(pg_index_bloat_ratio) * 100`,
Panel: monitoring.Panel().LegendFormat("{{relname}}").Unit(monitoring.Percentage),
NoAlert: true,
@ -196,8 +196,8 @@ func Postgres() *monitoring.Dashboard {
},
},
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
},
}
}

View File

@ -23,7 +23,7 @@ func PreciseCodeIntelWorker() *monitoring.Dashboard {
// Resource monitoring
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),

View File

@ -30,7 +30,7 @@ func Prometheus() *monitoring.Dashboard {
Description: "metrics with highest cardinalities",
Query: `topk(10, count by (__name__, job)({__name__!=""}))`,
Panel: monitoring.Panel().LegendFormat("{{__name__}} ({{job}})"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NoAlert: true,
Interpretation: "The 10 highest-cardinality metrics collected by this Prometheus instance.",
},
@ -39,7 +39,7 @@ func Prometheus() *monitoring.Dashboard {
Description: "samples scraped by job",
Query: `sum by(job) (scrape_samples_post_metric_relabeling{job!=""})`,
Panel: monitoring.Panel().LegendFormat("{{job}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NoAlert: true,
Interpretation: "The number of samples scraped after metric relabeling was applied by this Prometheus instance.",
},
@ -51,7 +51,7 @@ func Prometheus() *monitoring.Dashboard {
Query: `sum by(rule_group) (avg_over_time(prometheus_rule_group_last_duration_seconds[10m]))`,
Warning: monitoring.Alert().GreaterOrEqual(30), // standard prometheus_rule_group_interval_seconds
Panel: monitoring.Panel().Unit(monitoring.Seconds).MinAuto().LegendFormat("{{rule_group}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: fmt.Sprintf(`
A high value here indicates Prometheus rule evaluation is taking longer than expected.
It might indicate that certain rule groups are taking too long to evaluate, or Prometheus is underprovisioned.
@ -70,7 +70,7 @@ func Prometheus() *monitoring.Dashboard {
Query: `sum by(rule_group) (rate(prometheus_rule_evaluation_failures_total[5m]))`,
Warning: monitoring.Alert().Greater(0),
Panel: monitoring.Panel().LegendFormat("{{rule_group}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: ruleGroupInterpretation,
NextSteps: `
- Check Prometheus logs for messages related to rule group evaluation (generally with log field 'component="rule manager"').
@ -91,7 +91,7 @@ func Prometheus() *monitoring.Dashboard {
Query: `sum by(integration) (rate(alertmanager_notification_latency_seconds_sum[1m]))`,
Warning: monitoring.Alert().GreaterOrEqual(1),
Panel: monitoring.Panel().Unit(monitoring.Seconds).LegendFormat("{{integration}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: fmt.Sprintf(`
- Check the %s panels and try increasing resources for Prometheus if necessary.
- Ensure that your ['observability.alerts' configuration](https://docs.sourcegraph.com/admin/observability/alerting#setting-up-alerting) (in site configuration) is valid.
@ -104,7 +104,7 @@ func Prometheus() *monitoring.Dashboard {
Query: `sum by(integration) (rate(alertmanager_notifications_failed_total[1m]))`,
Warning: monitoring.Alert().Greater(0),
Panel: monitoring.Panel().LegendFormat("{{integration}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: `
- Ensure that your ['observability.alerts' configuration](https://docs.sourcegraph.com/admin/observability/alerting#setting-up-alerting) (in site configuration) is valid.
- Check if the relevant alert integration service is experiencing downtime or issues.
@ -124,7 +124,7 @@ func Prometheus() *monitoring.Dashboard {
Query: `prometheus_config_last_reload_successful`,
Warning: monitoring.Alert().Less(1),
Panel: monitoring.Panel().LegendFormat("reload success").Max(1),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: "A '1' indicates Prometheus reloaded its configuration successfully.",
NextSteps: `
- Check Prometheus logs for messages related to configuration loading.
@ -137,7 +137,7 @@ func Prometheus() *monitoring.Dashboard {
Query: `alertmanager_config_last_reload_successful`,
Warning: monitoring.Alert().Less(1),
Panel: monitoring.Panel().LegendFormat("reload success").Max(1),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Interpretation: "A '1' indicates Alertmanager reloaded its configuration successfully.",
NextSteps: "Ensure that your [`observability.alerts` configuration](https://docs.sourcegraph.com/admin/observability/alerting#setting-up-alerting) (in site configuration) is valid.",
},
@ -149,7 +149,7 @@ func Prometheus() *monitoring.Dashboard {
Query: `increase(label_replace({__name__=~"prometheus_tsdb_(.*)_failed_total"}, "operation", "$1", "__name__", "(.+)s_failed_total")[5m:1m])`,
Warning: monitoring.Alert().Greater(0),
Panel: monitoring.Panel().LegendFormat("{{operation}}"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: "Check Prometheus logs for messages related to the failing operation.",
},
{
@ -158,7 +158,7 @@ func Prometheus() *monitoring.Dashboard {
Query: "increase(prometheus_target_scrapes_exceeded_sample_limit_total[10m])",
Warning: monitoring.Alert().Greater(0),
Panel: monitoring.Panel().LegendFormat("rejected scrapes"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: "Check Prometheus logs for messages related to target scrape failures.",
},
{
@ -167,16 +167,16 @@ func Prometheus() *monitoring.Dashboard {
Query: "increase(prometheus_target_scrapes_sample_duplicate_timestamp_total[10m])",
Warning: monitoring.Alert().Greater(0),
Panel: monitoring.Panel().LegendFormat("rejected scrapes"),
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
NextSteps: "Check Prometheus logs for messages related to target scrape failures.",
},
},
},
},
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
},
}
}

View File

@ -27,7 +27,7 @@ func Redis() *monitoring.Dashboard {
{
Name: "redis-store_up",
Description: "redis-store availability",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: `redis_up{app="redis-store"}`,
Panel: monitoring.Panel().LegendFormat("{{app}}"),
DataMustExist: false, // not deployed on docker-compose
@ -48,7 +48,7 @@ func Redis() *monitoring.Dashboard {
{
Name: "redis-cache_up",
Description: "redis-cache availability",
Owner: monitoring.ObservableOwnerDevOps,
Owner: monitoring.ObservableOwnerInfraOrg,
Query: `redis_up{app="redis-cache"}`,
Panel: monitoring.Panel().LegendFormat("{{app}}"),
DataMustExist: false, // not deployed on docker-compose
@ -62,10 +62,10 @@ func Redis() *monitoring.Dashboard {
},
},
},
shared.NewProvisioningIndicatorsGroup(redisCache, monitoring.ObservableOwnerDevOps, nil),
shared.NewProvisioningIndicatorsGroup(redisStore, monitoring.ObservableOwnerDevOps, nil),
shared.NewKubernetesMonitoringGroup(redisCache, monitoring.ObservableOwnerDevOps, nil),
shared.NewKubernetesMonitoringGroup(redisStore, monitoring.ObservableOwnerDevOps, nil),
shared.NewProvisioningIndicatorsGroup(redisCache, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewProvisioningIndicatorsGroup(redisStore, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewKubernetesMonitoringGroup(redisCache, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewKubernetesMonitoringGroup(redisStore, monitoring.ObservableOwnerInfraOrg, nil),
},
}
}

View File

@ -599,7 +599,7 @@ func RepoUpdater() *monitoring.Dashboard {
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "repo_updater",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
}, monitoring.ObservableOwnerInfraOrg),
shared.HTTP.NewHandlersGroup(containerName),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerSource),

View File

@ -243,8 +243,8 @@ regularly above 0 it is a sign for further investigation.`,
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "searcher",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps),
}, monitoring.ObservableOwnerInfraOrg),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerSearchCore, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerSearchCore, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerSearchCore, nil),

View File

@ -61,8 +61,8 @@ func Symbols() *monitoring.Dashboard {
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "symbols",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps),
}, monitoring.ObservableOwnerInfraOrg),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),

View File

@ -60,9 +60,9 @@ func SyntectServer() *monitoring.Dashboard {
},
},
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg, nil),
},
}
}

View File

@ -254,7 +254,7 @@ func Worker() *monitoring.Dashboard {
// Resource monitoring
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerDevOps),
shared.NewDatabaseConnectionsMonitoringGroup(containerName, monitoring.ObservableOwnerInfraOrg),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
@ -269,7 +269,7 @@ func Worker() *monitoring.Dashboard {
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "worker",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
}, monitoring.ObservableOwnerInfraOrg),
},
}
}

View File

@ -6,13 +6,16 @@ require (
github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db
github.com/hashicorp/hcl v1.0.0
github.com/hexops/autogold/v2 v2.0.3
github.com/iancoleman/strcase v0.3.0
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.22
github.com/prometheus/common v0.37.0
github.com/prometheus/prometheus v0.40.5
github.com/sourcegraph/log v0.0.0-20231018134238-fbadff7458bb
github.com/sourcegraph/sourcegraph/lib v0.0.0-20230613175844-f031949c72f5
github.com/stretchr/testify v1.8.4
github.com/urfave/cli/v2 v2.23.7
golang.org/x/exp v0.0.0-20230425010034-47ecfdc1ba53
golang.org/x/text v0.14.0
gopkg.in/yaml.v2 v2.4.0
)
@ -45,6 +48,10 @@ require (
github.com/google/uuid v1.4.0 // indirect
github.com/gosimple/slug v1.12.0 // indirect
github.com/gosimple/unidecode v1.0.1 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-retryablehttp v0.7.1 // indirect
github.com/hexops/gotextdiff v1.0.3 // indirect
github.com/hexops/valast v1.4.3 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
github.com/jackc/pgconn v1.14.0 // indirect
github.com/jackc/pgio v1.0.0 // indirect
@ -56,6 +63,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.18 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/nightlyone/lockfile v1.0.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.14.0 // indirect
@ -63,14 +71,17 @@ require (
github.com/prometheus/procfs v0.8.0 // indirect
github.com/rogpeppe/go-internal v1.11.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sirupsen/logrus v1.9.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/goleak v1.2.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.24.0 // indirect
golang.org/x/crypto v0.15.0 // indirect
golang.org/x/exp v0.0.0-20230425010034-47ecfdc1ba53 // indirect
golang.org/x/mod v0.11.0 // indirect
golang.org/x/sys v0.14.0 // indirect
golang.org/x/tools v0.10.0 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
mvdan.cc/gofumpt v0.4.0 // indirect
)

View File

@ -77,8 +77,11 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
github.com/getsentry/sentry-go v0.25.0 h1:q6Eo+hS+yoJlTO3uu/azhQadsD8V+jQn2D8VvX1eOyI=
github.com/getsentry/sentry-go v0.25.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY=
github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
@ -141,6 +144,9 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
@ -167,10 +173,27 @@ github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc h1:PXZQA2WCxe85T
github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc/go.mod h1:AHHlOEv1+GGQ3ktHMlhuTUwo3zljV3QJbC0+8o2kn+4=
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db h1:7aN5cccjIqCLTzedH7MZzRZt5/lsAHch6Z3L2ZGn5FA=
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-hclog v0.14.1 h1:nQcJDQwIAGnmoUWp8ubocEX40cCml/17YkF6csQLReU=
github.com/hashicorp/go-retryablehttp v0.5.1/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-retryablehttp v0.7.1 h1:sUiuQAnLlbvmExtFQs72iFW/HXeUn8Z1aJLQ4LJJbTQ=
github.com/hashicorp/go-retryablehttp v0.7.1/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hexops/autogold v0.8.1/go.mod h1:97HLDXyG23akzAoRYJh/2OBs3kd80eHyKPvZw0S5ZBY=
github.com/hexops/autogold v1.3.1 h1:YgxF9OHWbEIUjhDbpnLhgVsjUDsiHDTyDfy2lrfdlzo=
github.com/hexops/autogold/v2 v2.0.3 h1:zyrfTlNfyxLpX/zuk8wjTeTYP5AXaFeeRYFEZfHPwao=
github.com/hexops/autogold/v2 v2.0.3/go.mod h1:cYVc0tJn6v9Uf9xMOHvmH6scuTxsVJSxGcKR/yOVPzY=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/hexops/valast v1.4.3 h1:oBoGERMJh6UZdRc6cduE1CTPK+VAdXA59Y1HFgu3sm0=
github.com/hexops/valast v1.4.3/go.mod h1:Iqx2kLj3Jn47wuXpj3wX40xn6F93QNFBHuiKBerkTGA=
github.com/iancoleman/strcase v0.3.0 h1:nTXanmYxhfFAMjZL34Ov6gkzEsSJZ5DbhxWjvSASxEI=
github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
@ -226,6 +249,7 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
@ -239,10 +263,13 @@ github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
@ -253,7 +280,11 @@ github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJ
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/nightlyone/lockfile v1.0.0 h1:RHep2cFKK4PonZJDdEl4GmkabuhbsRMgk/k3uAmxBiA=
github.com/nightlyone/lockfile v1.0.0/go.mod h1:rywoIealpdNse2r832aiD9jRk8ErCatROs6LzC841CI=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.22 h1:0h+YoXSyipf6XQGyIaDg6z5jwRik1JSm+sQetnD7vGY=
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.22/go.mod h1:4OjcxgwdXzezqytxN534MooNmrxRD50geWZxTD7845s=
github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@ -278,6 +309,7 @@ github.com/prometheus/prometheus v0.40.5 h1:wmk5yNrQlkQ2OvZucMhUB4k78AVfG34szb1U
github.com/prometheus/prometheus v0.40.5/go.mod h1:bxgdmtoSNLmmIVPGmeTJ3OiP67VmuY4yalE4ZP6L/j8=
github.com/rainycape/unidecode v0.0.0-20150907023854-cb7f23ec59be/go.mod h1:MIDFMn7db1kT65GmV94GzpX9Qdi7N/pQlwb+AN8wh+Q=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
@ -290,6 +322,8 @@ github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdh
github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sourcegraph/log v0.0.0-20231018134238-fbadff7458bb h1:tHKdC+bXxxGJ0cy/R06kg6Z0zqwVGOWMx8uWsIwsaoY=
github.com/sourcegraph/log v0.0.0-20231018134238-fbadff7458bb/go.mod h1:IDp09QkoqS8Z3CyN2RW6vXjgABkNpDbyjLIHNQwQ8P8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@ -383,6 +417,9 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.11.0 h1:bUO06HqtnRcc/7l71XBe4WcqTZ+3AH1J59zWDDwLKgU=
golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -411,9 +448,11 @@ golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81R
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@ -434,6 +473,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -454,6 +495,7 @@ golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -476,8 +518,11 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
@ -485,6 +530,7 @@ golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@ -494,6 +540,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
@ -545,7 +592,9 @@ golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg=
golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM=
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@ -649,6 +698,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
mvdan.cc/gofumpt v0.4.0 h1:JVf4NN1mIpHogBj7ABpgOyZc65/UUOkKQFkoURsz4MM=
mvdan.cc/gofumpt v0.4.0/go.mod h1:PljLOHDeZqgS8opHRKLzp2It2VBuSdteAgqUfzMTxlQ=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=

View File

@ -11,6 +11,7 @@ go_library(
"generator.go",
"monitoring.go",
"multi_instance.go",
"owners.go",
"panel.go",
"panel_options.go",
"prometheus.go",
@ -42,13 +43,23 @@ go_test(
timeout = "short",
srcs = [
"generator_test.go",
"owners_test.go",
"variables_test.go",
],
embed = [":monitoring"],
tags = [
# We validate that external resources are valid
"requires-network",
],
deps = [
"//monitoring/definitions",
"@com_github_hexops_autogold_v2//:autogold",
"@com_github_opsgenie_opsgenie_go_sdk_v2//client",
"@com_github_opsgenie_opsgenie_go_sdk_v2//team",
"@com_github_prometheus_prometheus//model/labels",
"@com_github_sourcegraph_log//logtest",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
"@org_golang_x_exp//maps",
],
)

View File

@ -168,7 +168,7 @@ func (d *documentation) renderAlertSolutionEntry(c *Dashboard, o Observable) err
fmt.Fprintf(&d.alertDocs, "```json\n%s\n```\n\n", fmt.Sprintf(`"observability.silenceAlerts": [
%s
]`, strings.Join(prometheusAlertNames, ",\n")))
if o.Owner.identifier != "" {
if o.Owner.opsgenieTeam != "" {
// add owner
fprintOwnedBy(&d.alertDocs, o.Owner)
}
@ -211,7 +211,7 @@ func (d *documentation) renderDashboardPanelEntry(c *Dashboard, o Observable, pa
fmt.Fprintf(&d.dashboards, "To see this panel, visit `/-/debug/grafana/d/%[1]s/%[1]s?viewPanel=%[2]d` on your Sourcegraph instance.\n\n",
c.Name, panelID)
if o.Owner.identifier != "" {
if o.Owner.opsgenieTeam != "" {
// add owner
fprintOwnedBy(&d.dashboards, o.Owner)
}

View File

@ -133,13 +133,13 @@ func Generate(logger log.Logger, opts GenerateOptions, dashboards ...*Dashboard)
// Set up disk directories
if opts.GrafanaDir != "" {
os.MkdirAll(opts.GrafanaDir, os.ModePerm)
_ = os.MkdirAll(opts.GrafanaDir, os.ModePerm)
}
if opts.PrometheusDir != "" {
os.MkdirAll(opts.PrometheusDir, os.ModePerm)
_ = os.MkdirAll(opts.PrometheusDir, os.ModePerm)
}
if opts.DocsDir != "" {
os.MkdirAll(opts.DocsDir, os.ModePerm)
_ = os.MkdirAll(opts.DocsDir, os.ModePerm)
}
// Generate the goods

View File

@ -2,13 +2,11 @@ package monitoring
import (
"fmt"
"path"
"strconv"
"strings"
"time"
"github.com/grafana-tools/sdk"
"github.com/grafana/regexp"
"github.com/prometheus/prometheus/model/labels"
"github.com/sourcegraph/sourcegraph/lib/errors"
@ -373,7 +371,7 @@ func (c *Dashboard) RenderPrometheusRules(injectLabelMatchers []*labels.Matcher)
"level": level,
"service_name": c.Name,
"description": description,
"owner": o.Owner.identifier,
"owner": o.Owner.opsgenieTeam,
// in the corresponding dashboard, this label should indicate
// the panel associated with this rule
@ -448,108 +446,6 @@ func (r Row) validate(variables []ContainerVariable) error {
return errs
}
// ObservableOwner denotes a team that owns an Observable. The current teams are described in
// the handbook: https://handbook.sourcegraph.com/departments/engineering/
type ObservableOwner struct {
// identifier is the team's name on OpsGenie and is used for routing alerts.
identifier string
// human-friendly name for this team
teamName string
// path relative to handbookBaseURL for this team's page
handbookSlug string
// optional - defaults to /departments/engineering/teams
handbookBasePath string
}
// identifer must be all lowercase, and optionally hyphenated.
//
// Some examples of valid identifiers:
// foo
// foo-bar
// foo-bar-baz
//
// Some examples of invalid identifiers:
// Foo
// FOO
// Foo-Bar
// foo_bar
var identifierPattern = regexp.MustCompile("^([a-z]+)(-[a-z]+)*?$")
var (
ObservableOwnerSearch = ObservableOwner{
identifier: "search",
handbookSlug: "search/product",
teamName: "Search",
}
ObservableOwnerSearchCore = ObservableOwner{
identifier: "search-core",
handbookSlug: "search/core",
teamName: "Search Core",
}
ObservableOwnerBatches = ObservableOwner{
identifier: "batch-changes",
handbookSlug: "batch-changes",
teamName: "Batch Changes",
}
ObservableOwnerCodeIntel = ObservableOwner{
identifier: "code-intel",
handbookSlug: "code-intelligence",
teamName: "Code intelligence",
}
ObservableOwnerSecurity = ObservableOwner{
identifier: "security",
handbookSlug: "security",
teamName: "Security",
}
ObservableOwnerSource = ObservableOwner{
identifier: "source",
handbookSlug: "source",
teamName: "Source",
}
ObservableOwnerCodeInsights = ObservableOwner{
identifier: "code-insights",
handbookSlug: "code-insights",
teamName: "Code Insights",
}
ObservableOwnerDevOps = ObservableOwner{
identifier: "devops",
handbookSlug: "devops",
teamName: "Cloud DevOps",
}
ObservableOwnerDataAnalytics = ObservableOwner{
identifier: "data-analytics",
handbookSlug: "data-analytics",
teamName: "Data & Analytics",
}
ObservableOwnerCloud = ObservableOwner{
identifier: "cloud",
handbookSlug: "cloud",
handbookBasePath: "/departments",
teamName: "Cloud",
}
ObservableOwnerCody = ObservableOwner{
identifier: "cody",
handbookSlug: "cody",
teamName: "Cody",
}
ObservableOwnerOwn = ObservableOwner{
identifier: "own",
teamName: "own",
handbookSlug: "own",
}
)
// toMarkdown returns a Markdown string that also links to the owner's team page in the handbook.
func (o ObservableOwner) toMarkdown() string {
basePath := "/departments/engineering/teams"
if o.handbookBasePath != "" {
basePath = o.handbookBasePath
}
return fmt.Sprintf("[Sourcegraph %s team](https://%s)",
o.teamName, path.Join("handbook.sourcegraph.com", basePath, o.handbookSlug),
)
}
// Observable describes a metric about a container that can be observed. For example, memory usage.
//
// These correspond to Grafana graphs.
@ -703,13 +599,12 @@ func (o Observable) validate(variables []ContainerVariable) error {
if first, second := string([]rune(o.Description)[0]), string([]rune(o.Description)[1]); first != strings.ToLower(first) && second == strings.ToLower(second) {
return errors.Errorf("Description must be lowercase except for acronyms; found \"%s\"", o.Description)
}
if o.Owner.identifier == "" && !o.NoAlert {
return errors.New("Owner.identifier must be defined for observables with alerts")
}
// In some cases, the identifier is an empty string. We don't want to run it through the regex.
if o.Owner.identifier != "" && !identifierPattern.Match([]byte(o.Owner.identifier)) {
return errors.Errorf(`Owner.identifier has invalid format: "%v"`, []byte(o.Owner.identifier))
// If there is an alert, the given owner must be valid
if !o.NoAlert {
if err := o.Owner.validate(); err != nil {
return err
}
}
if !o.Panel.panelType.validate() {

View File

@ -0,0 +1,137 @@
package monitoring
import (
"fmt"
"path"
"github.com/grafana/regexp"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
// ObservableOwner denotes a team that owns an Observable. The current teams are described in
// the handbook: https://handbook.sourcegraph.com/departments/engineering/
type ObservableOwner struct {
// opsgenieTeam is the team's name on OpsGenie and is used for routing alerts.
opsgenieTeam string
// human-friendly name for this team
teamName string
// path relative to handbookBaseURL for this team's page
handbookSlug string
// optional - defaults to /departments/engineering/teams
handbookBasePath string
}
var (
// ObservableOwnerInfraOrg represents the shared infra-org rotation which
// currently manages Sourcegraph.com.
ObservableOwnerInfraOrg = registerObservableOwner(ObservableOwner{
opsgenieTeam: "infra-support",
handbookBasePath: "/departments/engineering",
handbookSlug: "infrastructure",
teamName: "Infrastructure Org",
})
ObservableOwnerSearch = registerObservableOwner(ObservableOwner{
opsgenieTeam: "search",
handbookSlug: "search/product",
teamName: "Search",
})
ObservableOwnerSearchCore = registerObservableOwner(ObservableOwner{
opsgenieTeam: "search-core",
handbookSlug: "search/core",
teamName: "Search Core",
})
ObservableOwnerBatches = registerObservableOwner(ObservableOwner{
opsgenieTeam: "batch-changes",
handbookSlug: "batch-changes",
teamName: "Batch Changes",
})
ObservableOwnerCodeIntel = registerObservableOwner(ObservableOwner{
opsgenieTeam: "code-intel",
handbookSlug: "code-intelligence",
teamName: "Code intelligence",
})
ObservableOwnerSource = registerObservableOwner(ObservableOwner{
opsgenieTeam: "source",
handbookSlug: "source",
teamName: "Source",
})
ObservableOwnerCodeInsights = registerObservableOwner(ObservableOwner{
opsgenieTeam: "code-insights",
handbookSlug: "code-insights",
teamName: "Code Insights",
})
ObservableOwnerDataAnalytics = registerObservableOwner(ObservableOwner{
opsgenieTeam: "data-analytics",
handbookSlug: "data-analytics",
teamName: "Data & Analytics",
})
ObservableOwnerCody = registerObservableOwner(ObservableOwner{
opsgenieTeam: "cody",
handbookSlug: "cody",
teamName: "Cody",
})
ObservableOwnerOwn = registerObservableOwner(ObservableOwner{
opsgenieTeam: "own",
teamName: "own",
handbookSlug: "own",
})
)
// identifer must be all lowercase, and optionally hyphenated.
//
// Some examples of valid identifiers:
// foo
// foo-bar
// foo-bar-baz
//
// Some examples of invalid identifiers:
// Foo
// FOO
// Foo-Bar
// foo_bar
var opsgenieTeamPattern = regexp.MustCompile("^([a-z]+)(-[a-z]+)*?$")
// validate does a simple offline validation that this owner is not a zero value
// and that the opsgenie team name matches the expected pattern.
func (o ObservableOwner) validate() error {
var zero ObservableOwner
if o == zero {
return errors.New("Owner must be set")
}
if !opsgenieTeamPattern.Match([]byte(o.opsgenieTeam)) {
return errors.Errorf(`Owner.opsgenieteam has invalid format: "%v"`, []byte(o.opsgenieTeam))
}
return nil
}
// toMarkdown returns a Markdown string that also links to the owner's team page in the handbook.
func (o ObservableOwner) toMarkdown() string {
return fmt.Sprintf("[Sourcegraph %s team](%s)",
o.teamName, o.getHandbookPageURL())
}
// getHandbookPageURL links to the owner's team page in the handbook.
func (o ObservableOwner) getHandbookPageURL() string {
basePath := "/departments/engineering/teams"
if o.handbookBasePath != "" {
basePath = o.handbookBasePath
}
return "https://" + path.Join("handbook.sourcegraph.com", basePath, o.handbookSlug)
}
var allKnownOwners = make(map[string]ObservableOwner)
func registerObservableOwner(o ObservableOwner) ObservableOwner {
if err := o.validate(); err != nil {
panic(err)
}
if _, exists := allKnownOwners[o.teamName]; exists {
panic(errors.Newf("duplicate ObservableOwner %+v", o))
}
allKnownOwners[o.teamName] = o
return o
}

View File

@ -0,0 +1,211 @@
package monitoring
import (
"bytes"
"context"
"encoding/json"
"flag"
"net/http"
"net/url"
"os"
"slices"
"testing"
"github.com/hexops/autogold/v2"
opsgenie "github.com/opsgenie/opsgenie-go-sdk-v2/client"
opsgenieteam "github.com/opsgenie/opsgenie-go-sdk-v2/team"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/maps"
)
var onlineCheck = flag.Bool("online", false, "Run online checks")
type opsgenieResponderConfig struct {
Type string `json:"type"` // "team"
Name string `json:"name"` // owner.opsgenieTeam
}
type opsgenieNotifierConfig struct {
Type string `json:"type"` // "opgsenie"
Responders []opsgenieResponderConfig `json:"responders"`
}
type notifierConfig struct {
Level string `json:"level"` // "critical"
Notifier opsgenieNotifierConfig `json:"notifier"`
Owners []string `json:"owners"` // owner.opsgenieTeam
}
// TestOwnersOpsgenieTeam checks Opsgenie team details of each owner.
func TestOwnersOpsgenieTeam(t *testing.T) {
if !*onlineCheck {
t.Skip("MONITORING_OWNERS_ONLINE_CHECK not set to true, skipping online checks")
}
opsgenieKey := os.Getenv("OPSGENIE_API_KEY")
if opsgenieKey == "" {
t.Fatal("OPSGENIE_API_KEY not set, skipping test")
}
client, err := opsgenieteam.NewClient(&opsgenie.Config{
ApiKey: opsgenieKey,
})
require.NoError(t, err)
ctx := context.Background()
// As part of this test, we also build notifier config of all valid owners
// so that they can be included in Sourcegraph.com's 'observability.alerts'
// configuration. Configuration with invalid targets means that alerts might
// end up not going to _any_ team, so we want to make sure to skip those
// owners. If a team is kind enough to set up a real owner and opsgenie team
// this helps make sure they are included as well.
var observabilityAlertsConfig []notifierConfig
// Count failed subtests for a convenience summary at the end of the test
var failed int
// Range over stable sort of owners for test stability
owners := maps.Keys(allKnownOwners)
slices.Sort(owners)
for _, key := range owners {
owner := allKnownOwners[key]
if t.Run(owner.teamName, func(t *testing.T) {
team, err := client.Get(ctx, &opsgenieteam.GetTeamRequest{
IdentifierType: opsgenieteam.Name,
IdentifierValue: owner.opsgenieTeam,
})
assert.NoError(t, err)
if assert.NotNil(t, team) {
t.Logf("TeamMeta: %+v", team.TeamMeta)
t.Logf("Description: %q", team.Description)
t.Logf("Members: %d", len(team.Members))
}
}) {
observabilityAlertsConfig = append(observabilityAlertsConfig, notifierConfig{
Level: "critical",
Notifier: opsgenieNotifierConfig{
Type: "opsgenie",
Responders: []opsgenieResponderConfig{{
Type: "team",
Name: owner.opsgenieTeam,
}},
},
Owners: []string{owner.opsgenieTeam},
})
} else {
failed += 1
}
}
var data bytes.Buffer
enc := json.NewEncoder(&data)
enc.SetIndent(" ", " ")
assert.NoError(t, enc.Encode(observabilityAlertsConfig))
// The below can be copy-pasted into
// https://sourcegraph.sourcegraph.com/search?q=context:global+repo:github.com/sourcegraph/deploy-sourcegraph-cloud+file:overlays/prod/frontend/files/site.json+%22observability.alerts%22:+%5B...%5D&patternType=structural&sm=1&groupBy=repo
autogold.Expect(`[
{
"level": "critical",
"notifier": {
"type": "opsgenie",
"responders": [
{
"type": "team",
"name": "code-insights"
}
]
},
"owners": [
"code-insights"
]
},
{
"level": "critical",
"notifier": {
"type": "opsgenie",
"responders": [
{
"type": "team",
"name": "code-intel"
}
]
},
"owners": [
"code-intel"
]
},
{
"level": "critical",
"notifier": {
"type": "opsgenie",
"responders": [
{
"type": "team",
"name": "infra-support"
}
]
},
"owners": [
"infra-support"
]
},
{
"level": "critical",
"notifier": {
"type": "opsgenie",
"responders": [
{
"type": "team",
"name": "source"
}
]
},
"owners": [
"source"
]
}
]
`).Equal(t, data.String())
if failed > 0 {
t.Errorf("%d/%d ObservableOwners do not have valid Opsgenie teams",
failed, len(allKnownOwners))
}
}
// TestOwnersHandbookPages checks if the handbook page URLs of each owner is
// valid and exists.
func TestOwnersHandbookPages(t *testing.T) {
if !*onlineCheck {
t.Skip("MONITORING_OWNERS_ONLINE_CHECK not set to true, skipping online checks")
}
var failed int
for _, owner := range allKnownOwners {
if !t.Run(owner.teamName, func(t *testing.T) {
page, err := url.Parse(owner.getHandbookPageURL())
if !assert.NoError(t, err) {
return
}
resp, err := http.DefaultClient.Do(&http.Request{
Method: http.MethodGet,
URL: page,
})
if !assert.NoError(t, err) {
return
}
assert.Equal(t, http.StatusOK, resp.StatusCode)
}) {
failed += 1
}
}
if failed > 0 {
t.Errorf("%d/%d ObservableOwners do not point to valid handbook pages",
failed, len(allKnownOwners))
}
}