diff --git a/doc/admin/observability/alert_solutions.md b/doc/admin/observability/alert_solutions.md index 3ce0073be1d..3e95745d687 100644 --- a/doc/admin/observability/alert_solutions.md +++ b/doc/admin/observability/alert_solutions.md @@ -2710,7 +2710,8 @@ with your code hosts connections or networking issues affecting communication wi **Descriptions** -- critical repo-updater: 0+ site level external service sync error rate for 10m0s +- warning repo-updater: 0.5+ site level external service sync error rate for 10m0s +- critical repo-updater: 1+ site level external service sync error rate for 10m0s **Possible solutions** @@ -2725,6 +2726,7 @@ with your code hosts connections or networking issues affecting communication wi ```json "observability.silenceAlerts": [ + "warning_repo-updater_src_repoupdater_syncer_sync_errors_total", "critical_repo-updater_src_repoupdater_syncer_sync_errors_total" ] ``` diff --git a/monitoring/definitions/repo_updater.go b/monitoring/definitions/repo_updater.go index 9f65bf42cfd..b2061c1ec6c 100644 --- a/monitoring/definitions/repo_updater.go +++ b/monitoring/definitions/repo_updater.go @@ -64,7 +64,8 @@ func RepoUpdater() *monitoring.Container { Name: "src_repoupdater_syncer_sync_errors_total", Description: "site level external service sync error rate", Query: `max by (family) (rate(src_repoupdater_syncer_sync_errors_total{owner!="user"}[5m]))`, - Critical: monitoring.Alert().Greater(0, nil).For(10 * time.Minute), + Warning: monitoring.Alert().Greater(0.5, nil).For(10 * time.Minute), + Critical: monitoring.Alert().Greater(1, nil).For(10 * time.Minute), Panel: monitoring.Panel().Unit(monitoring.Number).With(monitoring.PanelOptions.ZeroIfNoData()), Owner: monitoring.ObservableOwnerCoreApplication, PossibleSolutions: `