mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 17:51:57 +00:00
repo-updater: Add critical alerts (#14530)
* repo-updater: Add critical alerts The number of user added repos is greater than 90% of our hard limit of 200k. Number of external services added is great than 20k No external services have synced in more than 8 hours. By default we backoff for up to 8 hours, so anything higher than this indicates a problem. * syncer_sync_start should be a rate Alert when we've performed <= 1 syncs for 8 hours. Ideally we want this to be <= 0 but our monitoring package doesn't allow this. Therefore we need to make this a warning as it'll fire for instances that only have 1 external service defined. * Fill in possible solutions and remove unnecessary panel * Adjust rate * Update monitoring/repo_updater.go Co-authored-by: Tomás Senart <tomas@sourcegraph.com> * Update monitoring/repo_updater.go Co-authored-by: Robert Lin <robert@bobheadxi.dev> * Update possible error descriptions * Fix alert * Switch alert to critical Co-authored-by: Tomás Senart <tomas@sourcegraph.com> Co-authored-by: Robert Lin <robert@bobheadxi.dev>
This commit is contained in:
parent
61143bdffa
commit
9214e6656f
@ -836,6 +836,8 @@ func (s *Syncer) observe(ctx context.Context, family, title string) (context.Con
|
||||
tr, ctx := trace.New(ctx, family, title)
|
||||
|
||||
return ctx, func(d *Diff, err *error) {
|
||||
syncStarted.WithLabelValues(family).Inc()
|
||||
|
||||
now := s.Now()
|
||||
took := s.Now().Sub(began).Seconds()
|
||||
|
||||
@ -862,7 +864,6 @@ func (s *Syncer) observe(ctx context.Context, family, title string) (context.Con
|
||||
tr.LogFields(fields...)
|
||||
|
||||
lastSync.WithLabelValues(family).Set(float64(now.Unix()))
|
||||
syncStarted.WithLabelValues(family).Inc()
|
||||
|
||||
success := err == nil || *err == nil
|
||||
syncDuration.WithLabelValues(strconv.FormatBool(success), family).Observe(took)
|
||||
|
||||
@ -2896,7 +2896,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 28800s+ time since oldest sync for 5m0s_
|
||||
- _repo-updater: 28800s+ time since oldest sync for 10m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
@ -2905,7 +2905,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_repo-updater_src_repoupdater_max_sync_backoff"
|
||||
"critical_repo-updater_src_repoupdater_max_sync_backoff"
|
||||
]
|
||||
```
|
||||
|
||||
@ -2913,11 +2913,11 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 100+ sync was started for 5m0s_
|
||||
- _repo-updater: less than 1 sync was started for 8h0m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- None
|
||||
- Check repo-updater logs for errors. Ignore this alert if only one code host connection is defined
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -2934,7 +2934,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host.
|
||||
- Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -2951,7 +2951,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host.
|
||||
- Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -2964,10 +2964,11 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 1000+ repositories synced for 5m0s_
|
||||
- _repo-updater: less than 1 repositories synced for 8h0m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check network connectivity to code hosts
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -2980,10 +2981,11 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 1000+ repositories sourced for 5m0s_
|
||||
- _repo-updater: less than 1 repositories sourced for 8h0m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check network connectivity to code hosts
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -2992,6 +2994,23 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
]
|
||||
```
|
||||
|
||||
## repo-updater: user_added_repos
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 180000+ total number of user added repos for 5m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check for unusual spikes in user added repos. Each user is only allowed to add 2000
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"critical_repo-updater_user_added_repos"
|
||||
]
|
||||
```
|
||||
|
||||
## repo-updater: purge_failed
|
||||
|
||||
**Descriptions:**
|
||||
@ -3000,6 +3019,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check repo-updater`s connectivity with gitserver and gitserver logs
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3008,30 +3028,15 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
]
|
||||
```
|
||||
|
||||
## repo-updater: purge_success
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 10+ repositories purge succeeded for 5m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_repo-updater_purge_success"
|
||||
]
|
||||
```
|
||||
|
||||
## repo-updater: sched_auto_fetch
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 1000+ repositories scheduled due to hitting a deadline for 5m0s_
|
||||
- _repo-updater: less than 1 repositories scheduled due to hitting a deadline for 8h0m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check repo-updater logs. This is expected to fire if there are no user added code hosts
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3044,10 +3049,11 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 1000+ repositories scheduled due to user traffic for 5m0s_
|
||||
- _repo-updater: less than 1 repositories scheduled due to user traffic for 8h0m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check repo-updater logs. This is expected to fire if there are no user added code hosts
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3060,10 +3066,11 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 1000+ repositories managed by the scheduler for 5m0s_
|
||||
- _repo-updater: less than 1 repositories managed by the scheduler for 10m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check repo-updater logs. This is expected to fire if there are no user added code hosts
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3080,11 +3087,12 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check repo-updater logs. The queue should drop as items are sent to GitServer
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_repo-updater_sched_update_queue_length"
|
||||
"critical_repo-updater_sched_update_queue_length"
|
||||
]
|
||||
```
|
||||
|
||||
@ -3092,10 +3100,11 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Descriptions:**
|
||||
|
||||
- _repo-updater: 10+ scheduler loops for 5m0s_
|
||||
- _repo-updater: less than 1 scheduler loops for 8h0m0s_
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check repo-updater logs for errors. This is expected to fire if there are no user added code hosts
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3112,6 +3121,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- Check repo-updater logs for errors
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3250,12 +3260,12 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- None
|
||||
- Check for spikes in external services, could be abuse
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
"observability.silenceAlerts": [
|
||||
"warning_repo-updater_src_repoupdater_external_services_total"
|
||||
"critical_repo-updater_src_repoupdater_external_services_total"
|
||||
]
|
||||
```
|
||||
|
||||
@ -3267,7 +3277,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- None
|
||||
- Check for spikes in external services, could be abuse
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3302,7 +3312,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- None
|
||||
- Check repo-updater logs. Jobs older than 1 day should have been removed.
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
@ -3319,7 +3329,7 @@ To learn more about Sourcegraph's alerting, see [our alerting documentation](htt
|
||||
|
||||
**Possible solutions:**
|
||||
|
||||
- None
|
||||
- Check repo-updater logs. Check code host connectivity
|
||||
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
|
||||
|
||||
```json
|
||||
|
||||
@ -37,7 +37,7 @@ func RepoUpdater() *Container {
|
||||
Description: "time since oldest sync",
|
||||
Query: `src_repoupdater_max_sync_backoff`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 8 * time.Hour.Seconds(), For: 5 * time.Minute},
|
||||
Critical: Alert{GreaterOrEqual: 8 * time.Hour.Seconds(), For: 10 * time.Minute},
|
||||
PanelOptions: PanelOptions().LegendFormat("seconds").Unit(Seconds),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "Make sure there are external services added with valid tokens",
|
||||
@ -47,12 +47,12 @@ func RepoUpdater() *Container {
|
||||
Observable{
|
||||
Name: "syncer_sync_start",
|
||||
Description: "sync was started",
|
||||
Query: `src_repoupdater_syncer_start_sync`,
|
||||
Query: `rate(src_repoupdater_syncer_start_sync[5m])`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 100, For: 5 * time.Minute},
|
||||
Warning: Alert{LessOrEqual: 1, For: 8 * time.Hour},
|
||||
PanelOptions: PanelOptions().LegendFormat("{{family}}-{{external_service_id}}").Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "None",
|
||||
PossibleSolutions: "Check repo-updater logs for errors. Ignore this alert if only one code host connection is defined",
|
||||
},
|
||||
Observable{
|
||||
Name: "syncer_sync_duration",
|
||||
@ -62,7 +62,7 @@ func RepoUpdater() *Container {
|
||||
Warning: Alert{GreaterOrEqual: 30, For: 5 * time.Minute},
|
||||
PanelOptions: PanelOptions().LegendFormat("seconds").Unit(Seconds),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host.",
|
||||
PossibleSolutions: "Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host",
|
||||
},
|
||||
Observable{
|
||||
Name: "source_duration",
|
||||
@ -72,7 +72,7 @@ func RepoUpdater() *Container {
|
||||
Warning: Alert{GreaterOrEqual: 30, For: 5 * time.Minute},
|
||||
PanelOptions: PanelOptions().LegendFormat("seconds").Unit(Seconds),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host.",
|
||||
PossibleSolutions: "Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host",
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -81,20 +81,31 @@ func RepoUpdater() *Container {
|
||||
Description: "repositories synced",
|
||||
Query: `rate(src_repoupdater_syncer_synced_repos_total[1m])`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 1000, For: 5 * time.Minute}, // NOTE: There is really no point to have such warning
|
||||
Warning: Alert{LessOrEqual: 1, For: 8 * time.Hour},
|
||||
PanelOptions: PanelOptions().LegendFormat("{{state}}").Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check network connectivity to code hosts",
|
||||
},
|
||||
Observable{
|
||||
Name: "sourced_repos",
|
||||
Description: "repositories sourced",
|
||||
Query: `rate(src_repoupdater_source_repos_total[1m])`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 1000, For: 5 * time.Minute}, // NOTE: There is really no point to have such warning
|
||||
Warning: Alert{LessOrEqual: 1, For: 8 * time.Hour},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check network connectivity to code hosts",
|
||||
},
|
||||
Observable{
|
||||
Name: "user_added_repos",
|
||||
Description: "total number of user added repos",
|
||||
Query: `src_repoupdater_user_repos_total`,
|
||||
DataMayNotExist: true,
|
||||
// 90% of our enforced limit
|
||||
Critical: Alert{GreaterOrEqual: 200000 * 0.9, For: 5 * time.Minute},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "Check for unusual spikes in user added repos. Each user is only allowed to add 2000",
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -106,17 +117,7 @@ func RepoUpdater() *Container {
|
||||
Warning: Alert{GreaterOrEqual: 1, For: 5 * time.Minute},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
},
|
||||
Observable{
|
||||
Name: "purge_success",
|
||||
Description: "repositories purge succeeded",
|
||||
Query: `rate(src_repoupdater_purge_success[1m])`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 10, For: 5 * time.Minute},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check repo-updater's connectivity with gitserver and gitserver logs",
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -125,20 +126,20 @@ func RepoUpdater() *Container {
|
||||
Description: "repositories scheduled due to hitting a deadline",
|
||||
Query: `rate(src_repoupdater_sched_auto_fetch[1m])`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 1000, For: 5 * time.Minute}, // NOTE: There is really no point to have such warning
|
||||
Warning: Alert{LessOrEqual: 1, For: 8 * time.Hour},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check repo-updater logs. This is expected to fire if there are no user added code hosts",
|
||||
},
|
||||
Observable{
|
||||
Name: "sched_manual_fetch",
|
||||
Description: "repositories scheduled due to user traffic",
|
||||
Query: `rate(src_repoupdater_sched_manual_fetch[1m])`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 1000, For: 5 * time.Minute}, // NOTE: There is really no point to have such warning
|
||||
Warning: Alert{LessOrEqual: 1, For: 8 * time.Hour},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check repo-updater logs. This is expected to fire if there are no user added code hosts",
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -147,30 +148,30 @@ func RepoUpdater() *Container {
|
||||
Description: "repositories managed by the scheduler",
|
||||
Query: `src_repoupdater_sched_known_repos`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 1000, For: 5 * time.Minute}, // NOTE: There is really no point to have such warning
|
||||
Warning: Alert{LessOrEqual: 1, For: 10 * time.Minute},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check repo-updater logs. This is expected to fire if there are no user added code hosts",
|
||||
},
|
||||
Observable{
|
||||
Name: "sched_update_queue_length",
|
||||
Description: "repositories queued for update",
|
||||
Query: `src_repoupdater_sched_update_queue_length`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 1000, For: 5 * time.Minute}, // NOTE: There is really no point to have such warning
|
||||
Critical: Alert{GreaterOrEqual: 1000, For: 5 * time.Minute},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check repo-updater logs. The queue should drop as items are sent to GitServer",
|
||||
},
|
||||
Observable{
|
||||
Name: "sched_loops",
|
||||
Description: "scheduler loops",
|
||||
Query: `rate(src_repoupdater_sched_loops[1m])`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 10, For: 5 * time.Minute}, // NOTE: There is really no point to have such warning
|
||||
Warning: Alert{LessOrEqual: 1, For: 8 * time.Hour},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check repo-updater logs for errors. This is expected to fire if there are no user added code hosts",
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -182,7 +183,7 @@ func RepoUpdater() *Container {
|
||||
Critical: Alert{GreaterOrEqual: 1, For: time.Minute},
|
||||
PanelOptions: PanelOptions().Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "none",
|
||||
PossibleSolutions: "Check repo-updater logs for errors",
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -288,10 +289,10 @@ func RepoUpdater() *Container {
|
||||
Description: "the total number of external services",
|
||||
Query: `src_repoupdater_external_services_total`,
|
||||
DataMayNotExist: true,
|
||||
Warning: Alert{GreaterOrEqual: 20000, For: 1 * time.Hour},
|
||||
Critical: Alert{GreaterOrEqual: 20000, For: 1 * time.Hour},
|
||||
PanelOptions: PanelOptions().LegendFormat("{{type}}").Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "None",
|
||||
PossibleSolutions: "Check for spikes in external services, could be abuse",
|
||||
},
|
||||
Observable{
|
||||
Name: "src_repoupdater_user_external_services_total",
|
||||
@ -301,7 +302,7 @@ func RepoUpdater() *Container {
|
||||
Warning: Alert{GreaterOrEqual: 20000, For: 1 * time.Hour},
|
||||
PanelOptions: PanelOptions().LegendFormat("{{type}}").Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "None",
|
||||
PossibleSolutions: "Check for spikes in external services, could be abuse",
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -326,7 +327,7 @@ func RepoUpdater() *Container {
|
||||
Warning: Alert{GreaterOrEqual: 100000, For: 1 * time.Hour},
|
||||
PanelOptions: PanelOptions().LegendFormat("{{type}}").Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "None",
|
||||
PossibleSolutions: "Check repo-updater logs. Jobs older than 1 day should have been removed.",
|
||||
},
|
||||
Observable{
|
||||
Name: "repoupdater_errored_sync_jobs_total",
|
||||
@ -336,7 +337,7 @@ func RepoUpdater() *Container {
|
||||
Warning: Alert{GreaterOrEqual: 100, For: 1 * time.Hour},
|
||||
PanelOptions: PanelOptions().LegendFormat("{{type}}").Unit(Number),
|
||||
Owner: ObservableOwnerCloud,
|
||||
PossibleSolutions: "None",
|
||||
PossibleSolutions: "Check repo-updater logs. Check code host connectivity",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
Loading…
Reference in New Issue
Block a user