conf: add metric and associated alert if clients fail to update site configuration within 5 minutes (#57682)

This commit is contained in:
Geoffrey Gilmore 2023-10-18 16:53:55 -07:00 committed by GitHub
parent 75cbd196f3
commit 9d34a48425
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 1196 additions and 517 deletions

View File

@ -612,6 +612,38 @@ Generated query for warning alert: `max((sum by (alert_type) (increase(src_graph
<br />
## frontend: frontend_site_configuration_duration_since_last_successful_update_by_instance
<p class="subtitle">maximum duration since last successful site configuration update (all "frontend" instances)</p>
**Descriptions**
- <span class="badge badge-critical">critical</span> frontend: 300s+ maximum duration since last successful site configuration update (all "frontend" instances)
**Next steps**
- This indicates that one or more "frontend" instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the "frontend" logs, as well as frontend`s logs.
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#frontend-frontend-site-configuration-duration-since-last-successful-update-by-instance).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"critical_frontend_frontend_site_configuration_duration_since_last_successful_update_by_instance"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<details>
<summary>Technical details</summary>
Generated query for critical alert: `max((max(max_over_time(src_conf_client_time_since_last_successful_update_seconds[1m]))) >= 300)`
</details>
<br />
## frontend: internal_indexed_search_error_responses
<p class="subtitle">internal indexed search error responses every 5m</p>
@ -1575,6 +1607,38 @@ Generated query for warning alert: `max((sum by (category) (increase(src_fronten
<br />
## gitserver: gitserver_site_configuration_duration_since_last_successful_update_by_instance
<p class="subtitle">maximum duration since last successful site configuration update (all "gitserver" instances)</p>
**Descriptions**
- <span class="badge badge-critical">critical</span> gitserver: 300s+ maximum duration since last successful site configuration update (all "gitserver" instances)
**Next steps**
- This indicates that one or more "gitserver" instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the "gitserver" logs, as well as frontend`s logs.
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#gitserver-gitserver-site-configuration-duration-since-last-successful-update-by-instance).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"critical_gitserver_gitserver_site_configuration_duration_since_last_successful_update_by_instance"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<details>
<summary>Technical details</summary>
Generated query for critical alert: `max((max(max_over_time(src_conf_client_time_since_last_successful_update_seconds[1m]))) >= 300)`
</details>
<br />
## gitserver: mean_blocked_seconds_per_conn_request
<p class="subtitle">mean blocked seconds per conn request</p>
@ -3814,6 +3878,38 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*worker"}) / c
<br />
## worker: worker_site_configuration_duration_since_last_successful_update_by_instance
<p class="subtitle">maximum duration since last successful site configuration update (all "worker" instances)</p>
**Descriptions**
- <span class="badge badge-critical">critical</span> worker: 300s+ maximum duration since last successful site configuration update (all "worker" instances)
**Next steps**
- This indicates that one or more "worker" instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the "worker" logs, as well as frontend`s logs.
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#worker-worker-site-configuration-duration-since-last-successful-update-by-instance).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"critical_worker_worker_site_configuration_duration_since_last_successful_update_by_instance"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<details>
<summary>Technical details</summary>
Generated query for critical alert: `max((max(max_over_time(src_conf_client_time_since_last_successful_update_seconds[1m]))) >= 300)`
</details>
<br />
## repo-updater: src_repoupdater_max_sync_backoff
<p class="subtitle">time since oldest sync</p>
@ -4609,6 +4705,38 @@ Generated query for critical alert: `min((max by (name) (src_gitlab_rate_limit_r
<br />
## repo-updater: repo_updater_site_configuration_duration_since_last_successful_update_by_instance
<p class="subtitle">maximum duration since last successful site configuration update (all "repo_updater" instances)</p>
**Descriptions**
- <span class="badge badge-critical">critical</span> repo-updater: 300s+ maximum duration since last successful site configuration update (all "repo_updater" instances)
**Next steps**
- This indicates that one or more "repo_updater" instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the "repo_updater" logs, as well as frontend`s logs.
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#repo-updater-repo-updater-site-configuration-duration-since-last-successful-update-by-instance).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"critical_repo-updater_repo_updater_site_configuration_duration_since_last_successful_update_by_instance"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<details>
<summary>Technical details</summary>
Generated query for critical alert: `max((max(max_over_time(src_conf_client_time_since_last_successful_update_seconds[1m]))) >= 300)`
</details>
<br />
## repo-updater: frontend_internal_api_error_responses
<p class="subtitle">frontend-internal API error responses every 5m by route</p>
@ -5058,6 +5186,38 @@ Generated query for warning alert: `max((sum by (code) (increase(searcher_servic
<br />
## searcher: searcher_site_configuration_duration_since_last_successful_update_by_instance
<p class="subtitle">maximum duration since last successful site configuration update (all "searcher" instances)</p>
**Descriptions**
- <span class="badge badge-critical">critical</span> searcher: 300s+ maximum duration since last successful site configuration update (all "searcher" instances)
**Next steps**
- This indicates that one or more "searcher" instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the "searcher" logs, as well as frontend`s logs.
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#searcher-searcher-site-configuration-duration-since-last-successful-update-by-instance).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"critical_searcher_searcher_site_configuration_duration_since_last_successful_update_by_instance"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<details>
<summary>Technical details</summary>
Generated query for critical alert: `max((max(max_over_time(src_conf_client_time_since_last_successful_update_seconds[1m]))) >= 300)`
</details>
<br />
## searcher: mean_blocked_seconds_per_conn_request
<p class="subtitle">mean blocked seconds per conn request</p>
@ -5447,6 +5607,38 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*searcher"}) /
<br />
## symbols: symbols_site_configuration_duration_since_last_successful_update_by_instance
<p class="subtitle">maximum duration since last successful site configuration update (all "symbols" instances)</p>
**Descriptions**
- <span class="badge badge-critical">critical</span> symbols: 300s+ maximum duration since last successful site configuration update (all "symbols" instances)
**Next steps**
- This indicates that one or more "symbols" instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the "symbols" logs, as well as frontend`s logs.
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-symbols-site-configuration-duration-since-last-successful-update-by-instance).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"critical_symbols_symbols_site_configuration_duration_since_last_successful_update_by_instance"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<details>
<summary>Technical details</summary>
Generated query for critical alert: `max((max(max_over_time(src_conf_client_time_since_last_successful_update_seconds[1m]))) >= 300)`
</details>
<br />
## symbols: mean_blocked_seconds_per_conn_request
<p class="subtitle">mean blocked seconds per conn request</p>
@ -7831,6 +8023,38 @@ Generated query for critical alert: `min((sum by (app) (up{app=~".*otel-collecto
<br />
## embeddings: embeddings_site_configuration_duration_since_last_successful_update_by_instance
<p class="subtitle">maximum duration since last successful site configuration update (all "embeddings" instances)</p>
**Descriptions**
- <span class="badge badge-critical">critical</span> embeddings: 300s+ maximum duration since last successful site configuration update (all "embeddings" instances)
**Next steps**
- This indicates that one or more "embeddings" instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the "embeddings" logs, as well as frontend`s logs.
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#embeddings-embeddings-site-configuration-duration-since-last-successful-update-by-instance).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"critical_embeddings_embeddings_site_configuration_duration_since_last_successful_update_by_instance"
]
```
<sub>*Managed by the [Sourcegraph Cloud DevOps team](https://handbook.sourcegraph.com/departments/engineering/teams/devops).*</sub>
<details>
<summary>Technical details</summary>
Generated query for critical alert: `max((max(max_over_time(src_conf_client_time_since_last_successful_update_seconds[1m]))) >= 300)`
</details>
<br />
## embeddings: mean_blocked_seconds_per_conn_request
<p class="subtitle">mean blocked seconds per conn request</p>

File diff suppressed because it is too large Load Diff

View File

@ -43,6 +43,8 @@ go_library(
"@com_github_getsentry_sentry_go//:sentry-go",
"@com_github_grafana_regexp//:regexp",
"@com_github_hashicorp_cronexpr//:cronexpr",
"@com_github_prometheus_client_golang//prometheus",
"@com_github_prometheus_client_golang//prometheus/promauto",
"@com_github_sourcegraph_jsonx//:jsonx",
"@com_github_sourcegraph_log//:log",
"@com_github_xeipuuv_gojsonschema//:gojsonschema",

View File

@ -8,6 +8,7 @@ import (
"sync/atomic"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/log"
"github.com/sourcegraph/sourcegraph/internal/api/internalapi"
"github.com/sourcegraph/sourcegraph/internal/conf/conftypes"
@ -28,6 +29,10 @@ type client struct {
// should be closed when future queries to the client returns the most up to date
// configuration.
sourceUpdates <-chan chan struct{}
// metricDurationSinceLastSuccessfulUpdateSeconds measures the duration in seconds since the client's
// last successful update from the configuration source
metricDurationSinceLastSuccessfulUpdateSeconds prometheus.Gauge
}
var _ conftypes.UnifiedQuerier = &client{}
@ -47,7 +52,12 @@ func DefaultClient() *client {
// MockClient returns a client in the same basic configuration as the DefaultClient, but is not limited to a global singleton.
// This is useful to mock configuration in tests without race conditions modifying values when running tests in parallel.
func MockClient() *client {
return &client{store: newStore()}
return &client{
store: newStore(),
metricDurationSinceLastSuccessfulUpdateSeconds: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "src_mock_conf_client_time_since_last_successful_update_seconds",
Help: "Time since the last successful update of the configuration by the mock conf client"}),
}
}
// Raw returns a copy of the raw configuration.
@ -277,8 +287,12 @@ func (c *client) continuouslyUpdate(optOnlySetByTests *continuousUpdateOptions)
// error on this initial attempt.
_ = c.fetchAndUpdate(opts.logger)
start := time.Now()
lastSuccessfulUpdate := time.Now()
for {
if c.metricDurationSinceLastSuccessfulUpdateSeconds != nil { // Update configuration latency at the top of the loop
c.metricDurationSinceLastSuccessfulUpdateSeconds.Set(time.Since(lastSuccessfulUpdate).Seconds())
}
logger := opts.logger
// signalDoneReading, if set, indicates that we were prompted to update because
@ -293,19 +307,27 @@ func (c *client) continuouslyUpdate(optOnlySetByTests *continuousUpdateOptions)
logger = logger.With(log.String("triggered_by", "waitForSleep"))
}
if c.metricDurationSinceLastSuccessfulUpdateSeconds != nil { // Update configuration latency after sleeping
c.metricDurationSinceLastSuccessfulUpdateSeconds.Set(time.Since(lastSuccessfulUpdate).Seconds())
}
logger.Debug("checking for updates")
err := c.fetchAndUpdate(logger)
if err != nil {
// Suppress log messages for errors caused by the frontend being unreachable until we've
// given the frontend enough time to initialize (in case other services start up before
// the frontend), to reduce log spam.
if time.Since(start) > opts.delayBeforeUnreachableLog || !isFrontendUnreachableError(err) {
if time.Since(lastSuccessfulUpdate) > opts.delayBeforeUnreachableLog || !isFrontendUnreachableError(err) {
logger.Error("received error during background config update", log.Error(err))
}
} else {
// We successfully fetched the config, we reset the timer to give
// frontend time if it needs to restart
start = time.Now()
lastSuccessfulUpdate = time.Now()
}
if c.metricDurationSinceLastSuccessfulUpdateSeconds != nil { // Record the update latency after the fetch
c.metricDurationSinceLastSuccessfulUpdateSeconds.Set(time.Since(lastSuccessfulUpdate).Seconds())
}
// Indicate that we are done reading, if we were prompted to update by the updates

View File

@ -10,6 +10,8 @@ import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sourcegraph/jsonx"
sglog "github.com/sourcegraph/log"
@ -106,7 +108,13 @@ func getModeUncached() configurationMode {
var configurationServerFrontendOnlyInitialized = make(chan struct{})
func initDefaultClient() *client {
defaultClient := &client{store: newStore()}
defaultClient := &client{
store: newStore(),
metricDurationSinceLastSuccessfulUpdateSeconds: promauto.NewGauge(prometheus.GaugeOpts{
Name: "src_conf_client_time_since_last_successful_update_seconds",
Help: "Time since the last successful update of the configuration by the conf client",
}),
}
mode := getMode()
// Don't kickoff the background updaters for the client/server

View File

@ -12,7 +12,23 @@ func Embeddings() *monitoring.Dashboard {
Name: "embeddings",
Title: "Embeddings",
Description: "Handles embeddings searches.",
Variables: []monitoring.ContainerVariable{
{
Label: "instance",
Name: "instance",
OptionsLabelValues: monitoring.ContainerVariableOptionsLabelValues{
Query: "src_embeddings_cache_hit_count",
LabelName: "instance",
ExampleOption: "embeddings:6099",
},
Multi: true,
},
},
Groups: []monitoring.Group{
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "embeddings",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.NewDatabaseConnectionsMonitoringGroup(containerName),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCody, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCody, nil),

View File

@ -333,6 +333,11 @@ func Frontend() *monitoring.Dashboard {
},
},
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "frontend",
InstanceFilterRegex: `${internalInstance:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.CodeIntelligence.NewResolversGroup(containerName),
shared.CodeIntelligence.NewAutoIndexEnqueuerGroup(containerName),
shared.CodeIntelligence.NewDBStoreGroup(containerName),

View File

@ -554,6 +554,11 @@ func GitServer() *monitoring.Dashboard {
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerSearchCore),
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "gitserver",
InstanceFilterRegex: `${shard:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.CodeIntelligence.NewCoursierGroup(containerName),
shared.CodeIntelligence.NewNpmGroup(containerName),

View File

@ -594,6 +594,10 @@ func RepoUpdater() *monitoring.Dashboard {
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerSource),
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "repo_updater",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.HTTP.NewHandlersGroup(containerName),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
shared.NewDatabaseConnectionsMonitoringGroup(containerName),

View File

@ -240,6 +240,10 @@ regularly above 0 it is a sign for further investigation.`,
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerSearchCore),
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "searcher",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.NewDatabaseConnectionsMonitoringGroup(containerName),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerSearchCore, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerSearchCore, nil),

View File

@ -28,6 +28,7 @@ go_library(
"provisioning.go",
"queues.go",
"shared.go",
"site_configuration.go",
"standard.go",
"usage_data_pipeline.go",
"workerutil.go",

View File

@ -0,0 +1,73 @@
package shared
import (
"fmt"
"strings"
"time"
"github.com/iancoleman/strcase"
"github.com/sourcegraph/sourcegraph/monitoring/monitoring"
)
type SiteConfigurationMetricsOptions struct {
// HumanServiceName is the short, lowercase, snake_case, human-readable name of the service that we're gathering metrics for.
//
// Example: "gitserver"
HumanServiceName string
// InstanceFilterRegex is the PromQL regex that's used to filter the
// site configuration client metrics to only those emitted by the instance(s) that were interested in.
//
// Example: (gitserver-0 | gitserver-1)
InstanceFilterRegex string
}
// NewSiteConfigurationClientMetricsGroup creates a group containing site configuration fetching latency statistics for the service
// specified in the given options.
func NewSiteConfigurationClientMetricsGroup(opts SiteConfigurationMetricsOptions, owner monitoring.ObservableOwner) monitoring.Group {
opts.HumanServiceName = strcase.ToSnake(opts.HumanServiceName)
metric := func(base string, labelFilters ...string) string {
metric := base
instanceLabelFilter := fmt.Sprintf("instance=~`%s`", opts.InstanceFilterRegex)
labelFilters = append(labelFilters, instanceLabelFilter)
if len(labelFilters) > 0 {
metric = fmt.Sprintf("%s{%s}", metric, strings.Join(labelFilters, ","))
}
return metric
}
return monitoring.Group{
Title: "Site configuration client update latency",
Hidden: true,
Rows: []monitoring.Row{
{
{
Name: fmt.Sprintf("%s_site_configuration_duration_since_last_successful_update_by_instance", opts.HumanServiceName),
Description: "duration since last successful site configuration update (by instance)",
Query: metric("src_conf_client_time_since_last_successful_update_seconds"),
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.Seconds),
Owner: owner,
NoAlert: true,
Interpretation: fmt.Sprintf("The duration since the configuration client used by the %q service last successfully updated its site configuration. Long durations could indicate issues updating the site configuration.", opts.HumanServiceName),
},
{
Name: fmt.Sprintf("%s_site_configuration_duration_since_last_successful_update_by_instance", opts.HumanServiceName),
Description: fmt.Sprintf("maximum duration since last successful site configuration update (all %q instances)", opts.HumanServiceName),
Query: fmt.Sprintf("max(max_over_time(%s[1m]))", metric("src_conf_client_time_since_last_successful_update_seconds")),
Panel: monitoring.Panel().Unit(monitoring.Seconds),
Owner: owner,
Critical: monitoring.Alert().GreaterOrEqual((5 * time.Minute).Seconds()),
NextSteps: fmt.Sprintf(`
- This indicates that one or more %q instances have not successfully updated the site configuration in over 5 minutes. This could be due to networking issues between services or problems with the site configuration service itself.
- Check for relevant errors in the %q logs, as well as frontend's logs.
`, opts.HumanServiceName, opts.HumanServiceName),
},
},
},
}
}

View File

@ -58,6 +58,10 @@ func Symbols() *monitoring.Dashboard {
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerCodeIntel),
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "symbols",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
shared.NewDatabaseConnectionsMonitoringGroup(containerName),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),

View File

@ -127,6 +127,18 @@ func Worker() *monitoring.Dashboard {
Name: "worker",
Title: "Worker",
Description: "Manages background processes.",
Variables: []monitoring.ContainerVariable{
{
Label: "Instance",
Name: "instance",
OptionsLabelValues: monitoring.ContainerVariableOptionsLabelValues{
Query: "src_worker_jobs",
LabelName: "instance",
ExampleOption: "worker:6089",
},
Multi: true,
},
},
Groups: []monitoring.Group{
// src_worker_jobs
activeJobsGroup,
@ -251,6 +263,11 @@ func Worker() *monitoring.Dashboard {
shared.SourcegraphOwn.NewOwnRepoIndexerWorkerGroup(containerName),
shared.SourcegraphOwn.NewOwnRepoIndexerResetterGroup(containerName),
shared.SourcegraphOwn.NewOwnRepoIndexerSchedulerGroup(containerName),
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{
HumanServiceName: "worker",
InstanceFilterRegex: `${instance:regex}`,
}, monitoring.ObservableOwnerDevOps),
},
}
}