mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 15:51:43 +00:00
Add Redis monitoring dashboard (#26969)
Co-authored-by: Robert Lin <robert@bobheadxi.dev>
This commit is contained in:
parent
418d2d076a
commit
bdb298d25e
@ -19,6 +19,7 @@ All notable changes to Sourcegraph are documented in this file.
|
||||
- Added site config variable `cloneProgressLog` to optionally enable logging of clone progress to temporary files for debugging. Disabled by default. [#26568](https://github.com/sourcegraph/sourcegraph/pull/26568)
|
||||
- GNU's `wget` has been added to all `sourcegraph/*` Docker images that use `sourcegraph/alpine` as its base [#26823](https://github.com/sourcegraph/sourcegraph/pull/26823)
|
||||
- Added the "no results page", a help page shown if a search doesn't return any results [#26154](https://github.com/sourcegraph/sourcegraph/pull/26154)
|
||||
- Added monitoring page for Redis databases [#26967](https://github.com/sourcegraph/sourcegraph/issues/26967)
|
||||
|
||||
### Changed
|
||||
|
||||
|
||||
@ -2376,6 +2376,300 @@ To learn more about Sourcegraph's alerting and how to set up alerts, see [our al
|
||||
|
||||
<br />
|
||||
|
||||
## redis: redis-store_up
|
||||
|
||||
<p class="subtitle">redis-store availability</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-critical">critical</span> redis: less than 1 redis-store availability for 10s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- Ensure redis-store is running
|
||||
- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#redis-redis-store-up).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"critical_redis_redis-store_up"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: redis-cache_up
|
||||
|
||||
<p class="subtitle">redis-cache availability</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-critical">critical</span> redis: less than 1 redis-cache availability for 10s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- Ensure redis-cache is running
|
||||
- More help interpreting this metric is available in the [dashboards reference](./dashboards.md#redis-redis-cache-up).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"critical_redis_redis-cache_up"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_cpu_usage_long_term
|
||||
|
||||
<p class="subtitle">container cpu usage total (90th percentile over 1d) across all cores by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 80%+ container cpu usage total (90th percentile over 1d) across all cores by instance for 336h0m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the redis-cache service.
|
||||
- **Docker Compose:** Consider increasing `cpus:` of the redis-cache container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-cpu-usage-long-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_cpu_usage_long_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_memory_usage_long_term
|
||||
|
||||
<p class="subtitle">container memory usage (1d maximum) by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 80%+ container memory usage (1d maximum) by instance for 336h0m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the redis-cache service.
|
||||
- **Docker Compose:** Consider increasing `memory:` of the redis-cache container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-memory-usage-long-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_memory_usage_long_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_cpu_usage_short_term
|
||||
|
||||
<p class="subtitle">container cpu usage total (5m maximum) across all cores by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 90%+ container cpu usage total (5m maximum) across all cores by instance for 30m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`.
|
||||
- **Docker Compose:** Consider increasing `cpus:` of the redis-cache container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-cpu-usage-short-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_cpu_usage_short_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_memory_usage_short_term
|
||||
|
||||
<p class="subtitle">container memory usage (5m maximum) by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 90%+ container memory usage (5m maximum) by instance
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`.
|
||||
- **Docker Compose:** Consider increasing `memory:` of redis-cache container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-memory-usage-short-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_memory_usage_short_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_cpu_usage_long_term
|
||||
|
||||
<p class="subtitle">container cpu usage total (90th percentile over 1d) across all cores by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 80%+ container cpu usage total (90th percentile over 1d) across all cores by instance for 336h0m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing CPU limits in the `Deployment.yaml` for the redis-store service.
|
||||
- **Docker Compose:** Consider increasing `cpus:` of the redis-store container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-cpu-usage-long-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_cpu_usage_long_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_memory_usage_long_term
|
||||
|
||||
<p class="subtitle">container memory usage (1d maximum) by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 80%+ container memory usage (1d maximum) by instance for 336h0m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing memory limits in the `Deployment.yaml` for the redis-store service.
|
||||
- **Docker Compose:** Consider increasing `memory:` of the redis-store container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-memory-usage-long-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_memory_usage_long_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_cpu_usage_short_term
|
||||
|
||||
<p class="subtitle">container cpu usage total (5m maximum) across all cores by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 90%+ container cpu usage total (5m maximum) across all cores by instance for 30m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing CPU limits in the the relevant `Deployment.yaml`.
|
||||
- **Docker Compose:** Consider increasing `cpus:` of the redis-store container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-cpu-usage-short-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_cpu_usage_short_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: provisioning_container_memory_usage_short_term
|
||||
|
||||
<p class="subtitle">container memory usage (5m maximum) by instance</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-warning">warning</span> redis: 90%+ container memory usage (5m maximum) by instance
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- **Kubernetes:** Consider increasing memory limit in relevant `Deployment.yaml`.
|
||||
- **Docker Compose:** Consider increasing `memory:` of redis-store container in `docker-compose.yml`.
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-provisioning-container-memory-usage-short-term).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_redis_provisioning_container_memory_usage_short_term"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: pods_available_percentage
|
||||
|
||||
<p class="subtitle">percentage pods available</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-critical">critical</span> redis: less than 90% percentage pods available for 10m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-pods-available-percentage).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"critical_redis_pods_available_percentage"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## redis: pods_available_percentage
|
||||
|
||||
<p class="subtitle">percentage pods available</p>
|
||||
|
||||
**Descriptions**
|
||||
|
||||
- <span class="badge badge-critical">critical</span> redis: less than 90% percentage pods available for 10m0s
|
||||
|
||||
**Possible solutions**
|
||||
|
||||
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#redis-pods-available-percentage).
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"critical_redis_pods_available_percentage"
|
||||
]
|
||||
```
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<br />
|
||||
|
||||
## worker: worker_job_codeintel-janitor_count
|
||||
|
||||
<p class="subtitle">number of worker instances running the codeintel-janitor job</p>
|
||||
|
||||
@ -5646,6 +5646,256 @@ Query: `sum by(app) (up{app=~".*query-runner"}) / count by (app) (up{app=~".*que
|
||||
|
||||
<br />
|
||||
|
||||
## Redis
|
||||
|
||||
<p class="subtitle">Metrics from both redis databases.</p>
|
||||
|
||||
To see this dashboard, visit `/-/debug/grafana/d/redis/redis` on your Sourcegraph instance.
|
||||
|
||||
### Redis: Redis Store
|
||||
|
||||
#### redis: redis-store_up
|
||||
|
||||
<p class="subtitle">Redis-store availability</p>
|
||||
|
||||
A value of 1 indicates the service is currently running
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-redis-store-up) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100000` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `redis_up{app="redis-store"}`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
### Redis: Redis Cache
|
||||
|
||||
#### redis: redis-cache_up
|
||||
|
||||
<p class="subtitle">Redis-cache availability</p>
|
||||
|
||||
A value of 1 indicates the service is currently running
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-redis-cache-up) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100100` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `redis_up{app="redis-cache"}`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
### Redis: Provisioning indicators (not available on server)
|
||||
|
||||
#### redis: provisioning_container_cpu_usage_long_term
|
||||
|
||||
<p class="subtitle">Container cpu usage total (90th percentile over 1d) across all cores by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100200` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^redis-cache.*"}[1d])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
#### redis: provisioning_container_memory_usage_long_term
|
||||
|
||||
<p class="subtitle">Container memory usage (1d maximum) by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-memory-usage-long-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100201` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^redis-cache.*"}[1d])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
#### redis: provisioning_container_cpu_usage_short_term
|
||||
|
||||
<p class="subtitle">Container cpu usage total (5m maximum) across all cores by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100210` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^redis-cache.*"}[5m])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
#### redis: provisioning_container_memory_usage_short_term
|
||||
|
||||
<p class="subtitle">Container memory usage (5m maximum) by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-memory-usage-short-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100211` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^redis-cache.*"}[5m])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
### Redis: Provisioning indicators (not available on server)
|
||||
|
||||
#### redis: provisioning_container_cpu_usage_long_term
|
||||
|
||||
<p class="subtitle">Container cpu usage total (90th percentile over 1d) across all cores by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100300` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{name=~"^redis-store.*"}[1d])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
#### redis: provisioning_container_memory_usage_long_term
|
||||
|
||||
<p class="subtitle">Container memory usage (1d maximum) by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-memory-usage-long-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100301` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^redis-store.*"}[1d])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
#### redis: provisioning_container_cpu_usage_short_term
|
||||
|
||||
<p class="subtitle">Container cpu usage total (5m maximum) across all cores by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100310` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^redis-store.*"}[5m])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
#### redis: provisioning_container_memory_usage_short_term
|
||||
|
||||
<p class="subtitle">Container memory usage (5m maximum) by instance</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-provisioning-container-memory-usage-short-term) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100311` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^redis-store.*"}[5m])`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
### Redis: Kubernetes monitoring (only available on Kubernetes)
|
||||
|
||||
#### redis: pods_available_percentage
|
||||
|
||||
<p class="subtitle">Percentage pods available</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-pods-available-percentage) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100400` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `sum by(app) (up{app=~".*redis-cache"}) / count by (app) (up{app=~".*redis-cache"}) * 100`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
### Redis: Kubernetes monitoring (only available on Kubernetes)
|
||||
|
||||
#### redis: pods_available_percentage
|
||||
|
||||
<p class="subtitle">Percentage pods available</p>
|
||||
|
||||
Refer to the [alert solutions reference](./alert_solutions.md#redis-pods-available-percentage) for 1 alert related to this panel.
|
||||
|
||||
To see this panel, visit `/-/debug/grafana/d/redis/redis?viewPanel=100500` on your Sourcegraph instance.
|
||||
|
||||
<sub>*Managed by the [Sourcegraph Devops team](https://handbook.sourcegraph.com/engineering/devops).*</sub>
|
||||
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: `sum by(app) (up{app=~".*redis-store"}) / count by (app) (up{app=~".*redis-store"}) * 100`
|
||||
|
||||
</details>
|
||||
|
||||
<br />
|
||||
|
||||
## Worker
|
||||
|
||||
<p class="subtitle">Manages background processes.</p>
|
||||
|
||||
70
monitoring/definitions/redis.go
Normal file
70
monitoring/definitions/redis.go
Normal file
@ -0,0 +1,70 @@
|
||||
package definitions
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/monitoring/definitions/shared"
|
||||
"github.com/sourcegraph/sourcegraph/monitoring/monitoring"
|
||||
)
|
||||
|
||||
func Redis() *monitoring.Container {
|
||||
const (
|
||||
redisCache = "redis-cache"
|
||||
redisStore = "redis-store"
|
||||
)
|
||||
|
||||
return &monitoring.Container{
|
||||
Name: "redis",
|
||||
Title: "Redis",
|
||||
Description: "Metrics from both redis databases.",
|
||||
NoSourcegraphDebugServer: true, // This is third-party service
|
||||
Groups: []monitoring.Group{
|
||||
{
|
||||
Title: "Redis Store",
|
||||
Hidden: false,
|
||||
Rows: []monitoring.Row{
|
||||
{
|
||||
{
|
||||
Name: "redis-store_up",
|
||||
Description: "redis-store availability",
|
||||
Owner: monitoring.ObservableOwnerDevOps,
|
||||
Query: `redis_up{app="redis-store"}`,
|
||||
DataMustExist: true,
|
||||
Panel: monitoring.Panel().LegendFormat("{{app}}"),
|
||||
Critical: monitoring.Alert().LessOrEqual(1, nil).For(10 * time.Second),
|
||||
PossibleSolutions: `
|
||||
- Ensure redis-store is running
|
||||
`,
|
||||
Interpretation: "A value of 1 indicates the service is currently running",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Title: "Redis Cache",
|
||||
Hidden: false,
|
||||
Rows: []monitoring.Row{
|
||||
{
|
||||
{
|
||||
Name: "redis-cache_up",
|
||||
Description: "redis-cache availability",
|
||||
Owner: monitoring.ObservableOwnerDevOps,
|
||||
Query: `redis_up{app="redis-cache"}`,
|
||||
Panel: monitoring.Panel().LegendFormat("{{app}}"),
|
||||
DataMustExist: true,
|
||||
Critical: monitoring.Alert().LessOrEqual(1, nil).For(10 * time.Second),
|
||||
PossibleSolutions: `
|
||||
- Ensure redis-cache is running
|
||||
`,
|
||||
Interpretation: "A value of 1 indicates the service is currently running",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
shared.NewProvisioningIndicatorsGroup(redisCache, monitoring.ObservableOwnerDevOps, nil),
|
||||
shared.NewProvisioningIndicatorsGroup(redisStore, monitoring.ObservableOwnerDevOps, nil),
|
||||
shared.NewKubernetesMonitoringGroup(redisCache, monitoring.ObservableOwnerDevOps, nil),
|
||||
shared.NewKubernetesMonitoringGroup(redisStore, monitoring.ObservableOwnerDevOps, nil),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -38,6 +38,7 @@ func main() {
|
||||
definitions.Postgres(),
|
||||
definitions.PreciseCodeIntelWorker(),
|
||||
definitions.QueryRunner(),
|
||||
definitions.Redis(),
|
||||
definitions.Worker(),
|
||||
definitions.RepoUpdater(),
|
||||
definitions.Searcher(),
|
||||
|
||||
@ -393,6 +393,7 @@ const (
|
||||
ObservableOwnerWeb ObservableOwner = "web"
|
||||
ObservableOwnerCoreApplication ObservableOwner = "core application"
|
||||
ObservableOwnerCodeInsights ObservableOwner = "code-insights"
|
||||
ObservableOwnerDevOps ObservableOwner = "devops"
|
||||
)
|
||||
|
||||
// toMarkdown returns a Markdown string that also links to the owner's team page
|
||||
|
||||
@ -613,6 +613,7 @@ commands:
|
||||
# We want to capture that output, but because it's fairly noisy, don't want to
|
||||
# display it in the normal case.
|
||||
GRAFANA_LOG_FILE: $HOME/.sourcegraph-dev/logs/grafana/grafana.log
|
||||
IMAGE: sourcegraph/grafana:dev
|
||||
CONTAINER: grafana
|
||||
PORT: 3370
|
||||
# docker containers must access things via docker host on non-linux platforms
|
||||
|
||||
Loading…
Reference in New Issue
Block a user