gitserver: Add observability for repo service (#63026)

Since we split out this service, we lost a few metrics on call counts and latencies.
This PR adds them back.

Closes #62785

Test plan:

Ran the dashboards locally and they return data. These dashboards are a 1:1 replica of the git service observability.
This commit is contained in:
Erik Seliger 2024-06-03 16:37:20 +02:00 committed by GitHub
parent a18904b6fc
commit 6d142c833f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 1356 additions and 238 deletions

View File

@ -5,6 +5,7 @@ go_library(
name = "gitserver",
srcs = [
"mocks_temp.go",
"observability.go",
"repositoryservice.go",
],
importpath = "github.com/sourcegraph/sourcegraph/cmd/repo-updater/internal/gitserver",
@ -15,6 +16,10 @@ go_library(
"//internal/gitserver/connection",
"//internal/gitserver/v1:gitserver",
"//internal/grpc/defaults",
"//internal/metrics",
"//internal/observation",
"@com_github_sourcegraph_log//:log",
"@io_opentelemetry_go_otel//attribute",
],
)

View File

@ -0,0 +1,55 @@
package gitserver
import (
"fmt"
"sync"
"github.com/sourcegraph/log"
"github.com/sourcegraph/sourcegraph/internal/metrics"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type operations struct {
deleteRepository *observation.Operation
fetchRepository *observation.Operation
}
func newOperations(observationCtx *observation.Context) *operations {
redMetrics := metrics.NewREDMetrics(
observationCtx.Registerer,
"gitserver_repositoryservice_client",
metrics.WithLabels("op", "scope"),
metrics.WithCountHelp("Total number of method invocations."),
)
op := func(name string) *observation.Operation {
return observationCtx.Operation(observation.Op{
Name: fmt.Sprintf("reposervice.client.%s", name),
MetricLabelValues: []string{name},
Metrics: redMetrics,
ErrorFilter: func(err error) observation.ErrorFilterBehaviour {
return observation.EmitForAllExceptLogs
},
})
}
return &operations{
deleteRepository: op("DeleteRepository"),
fetchRepository: op("FetchRepository"),
}
}
var (
operationsInst *operations
operationsInstOnce sync.Once
)
func getOperations() *operations {
operationsInstOnce.Do(func() {
observationCtx := observation.NewContext(log.Scoped("reposervice.client"))
operationsInst = newOperations(observationCtx)
})
return operationsInst
}

View File

@ -4,10 +4,13 @@ import (
"context"
"time"
"go.opentelemetry.io/otel/attribute"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/gitserver/connection"
proto "github.com/sourcegraph/sourcegraph/internal/gitserver/v1"
"github.com/sourcegraph/sourcegraph/internal/grpc/defaults"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type RepositoryServiceClient interface {
@ -15,34 +18,66 @@ type RepositoryServiceClient interface {
FetchRepository(context.Context, api.RepoName) (lastFetched, lastChanged time.Time, err error)
}
func NewRepositoryServiceClient() RepositoryServiceClient {
return &repositoryServiceClient{}
func NewRepositoryServiceClient(scope string) RepositoryServiceClient {
return &repositoryServiceClient{
operations: getOperations(),
scope: scope,
}
}
type repositoryServiceClient struct{}
type repositoryServiceClient struct {
operations *operations
scope string
}
func (c *repositoryServiceClient) DeleteRepository(ctx context.Context, repo api.RepoName) (err error) {
ctx, _, endObservation := c.operations.deleteRepository.With(ctx,
&err,
observation.Args{
Attrs: []attribute.KeyValue{
repo.Attr(),
},
MetricLabelValues: []string{c.scope},
},
)
defer endObservation(1, observation.Args{})
func (c *repositoryServiceClient) DeleteRepository(ctx context.Context, repo api.RepoName) error {
cc, err := c.clientForRepo(ctx, repo)
if err != nil {
return err
}
_, err = cc.DeleteRepository(ctx, &proto.DeleteRepositoryRequest{
RepoName: string(repo),
}, defaults.RetryPolicy...)
return err
}
func (c *repositoryServiceClient) FetchRepository(ctx context.Context, repo api.RepoName) (lastFetched, lastChanged time.Time, err error) {
ctx, _, endObservation := c.operations.fetchRepository.With(ctx,
&err,
observation.Args{
Attrs: []attribute.KeyValue{
repo.Attr(),
},
MetricLabelValues: []string{c.scope},
},
)
defer endObservation(1, observation.Args{})
cc, err := c.clientForRepo(ctx, repo)
if err != nil {
return lastFetched, lastChanged, err
}
resp, err := cc.FetchRepository(ctx, &proto.FetchRepositoryRequest{
RepoName: string(repo),
}, defaults.RetryPolicy...)
if err != nil {
return lastFetched, lastChanged, err
}
return resp.GetLastFetched().AsTime(), resp.GetLastChanged().AsTime(), nil
}

View File

@ -105,7 +105,7 @@ func PurgeOldestRepos(logger log.Logger, db database.DB, limit int, perSecond fl
// purge purges repos, returning the number of repos that were successfully purged
func purge(ctx context.Context, logger log.Logger, db database.DB, options database.ListPurgableReposOptions) error {
start := time.Now()
gitserverClient := gitserver.NewRepositoryServiceClient()
gitserverClient := gitserver.NewRepositoryServiceClient("repoupdater.purgeworker")
var (
total int
success int

View File

@ -146,7 +146,7 @@ func (s *Server) RecloneRepository(ctx context.Context, req *proto.RecloneReposi
repo := rs[0]
svc := gitserver.NewRepositoryServiceClient()
svc := gitserver.NewRepositoryServiceClient("repoupdater.reclone")
if err := svc.DeleteRepository(ctx, repoName); err != nil {
return nil, status.Error(codes.Internal, fmt.Sprintf("failed to delete repository %q: %s", repoName, err))

View File

@ -109,7 +109,7 @@ func Main(ctx context.Context, observationCtx *observation.Context, ready servic
repos.ObservedSource(sourcerLogger, sourceMetrics),
)
syncer := repos.NewSyncer(observationCtx, store, src)
updateScheduler := scheduler.NewUpdateScheduler(logger, db, repogitserver.NewRepositoryServiceClient())
updateScheduler := scheduler.NewUpdateScheduler(logger, db, repogitserver.NewRepositoryServiceClient("repoupdater.scheduler"))
server := &repoupdater.Server{
Logger: logger,
Store: store,

File diff suppressed because it is too large Load Diff

View File

@ -14,8 +14,9 @@ import (
func GitServer() *monitoring.Dashboard {
const (
containerName = "gitserver"
grpcServiceName = "gitserver.v1.GitserverService"
containerName = "gitserver"
grpcGitServiceName = "gitserver.v1.GitserverService"
grpcRepositoryServiceName = "gitserver.v1.GitserverRepositoryService"
)
scrapeJobRegex := fmt.Sprintf(".*%s", containerName)
@ -31,7 +32,8 @@ func GitServer() *monitoring.Dashboard {
vcsSyncerVariableName := "vcsSyncerType"
grpcMethodVariable := shared.GRPCMethodVariable("gitserver", grpcServiceName)
grpcGitServiceMethodVariable := shared.GRPCMethodVariable("Git Service", grpcGitServiceName)
grpcRepositoryServiceMethodVariable := shared.GRPCMethodVariable("Repository Service", grpcRepositoryServiceName)
titleCaser := cases.Title(language.English)
@ -120,7 +122,8 @@ func GitServer() *monitoring.Dashboard {
},
Multi: true,
},
grpcMethodVariable,
grpcGitServiceMethodVariable,
grpcRepositoryServiceMethodVariable,
{
Label: "VCS Syncer Kind",
Name: vcsSyncerVariableName,
@ -588,6 +591,7 @@ func GitServer() *monitoring.Dashboard {
shared.GitServer.NewBackendGroup(containerName, true),
shared.GitServer.NewClientGroup("*"),
shared.GitServer.NewRepoClientGroup("*"),
shared.NewDiskMetricsGroup(
shared.DiskMetricsGroupOptions{
@ -602,32 +606,62 @@ func GitServer() *monitoring.Dashboard {
monitoring.ObservableOwnerSource,
),
// GitService
shared.NewGRPCServerMetricsGroup(
shared.GRPCServerMetricsOptions{
HumanServiceName: "gitserver",
RawGRPCServiceName: grpcServiceName,
HumanServiceName: "Git Service",
RawGRPCServiceName: grpcGitServiceName,
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcGitServiceMethodVariable.Name),
InstanceFilterRegex: `${shard:regex}`,
MessageSizeNamespace: "src",
}, monitoring.ObservableOwnerSource),
shared.NewGRPCInternalErrorMetricsGroup(
shared.GRPCInternalErrorMetricsOptions{
HumanServiceName: "gitserver",
RawGRPCServiceName: grpcServiceName,
HumanServiceName: "Git Service",
RawGRPCServiceName: grpcGitServiceName,
Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcGitServiceMethodVariable.Name),
}, monitoring.ObservableOwnerSource),
shared.NewGRPCRetryMetricsGroup(
shared.GRPCRetryMetricsOptions{
HumanServiceName: "gitserver",
RawGRPCServiceName: grpcServiceName,
HumanServiceName: "Git Service",
RawGRPCServiceName: grpcGitServiceName,
Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcGitServiceMethodVariable.Name),
}, monitoring.ObservableOwnerSource),
// RepositoryService
shared.NewGRPCServerMetricsGroup(
shared.GRPCServerMetricsOptions{
HumanServiceName: "Repository Service",
RawGRPCServiceName: grpcRepositoryServiceName,
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcRepositoryServiceMethodVariable.Name),
InstanceFilterRegex: `${shard:regex}`,
MessageSizeNamespace: "src",
}, monitoring.ObservableOwnerSource),
shared.NewGRPCInternalErrorMetricsGroup(
shared.GRPCInternalErrorMetricsOptions{
HumanServiceName: "Repository Service",
RawGRPCServiceName: grpcRepositoryServiceName,
Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcRepositoryServiceMethodVariable.Name),
}, monitoring.ObservableOwnerSource),
shared.NewGRPCRetryMetricsGroup(
shared.GRPCRetryMetricsOptions{
HumanServiceName: "Repository Service",
RawGRPCServiceName: grpcRepositoryServiceName,
Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcRepositoryServiceMethodVariable.Name),
}, monitoring.ObservableOwnerSource),
shared.NewSiteConfigurationClientMetricsGroup(shared.SiteConfigurationMetricsOptions{

View File

@ -396,6 +396,7 @@ func RepoUpdater() *monitoring.Dashboard {
},
shared.GitServer.NewClientGroup(containerName),
shared.GitServer.NewRepoClientGroup(containerName),
shared.Batches.NewDBStoreGroup(containerName),
shared.Batches.NewServiceGroup(containerName),

View File

@ -88,3 +88,35 @@ func (gitServer) NewClientGroup(containerName string) monitoring.Group {
},
})
}
// src_gitserver_repositoryservice_client_total
// src_gitserver_repositoryservice_client_duration_seconds_bucket
// src_gitserver_repositoryservice_client_errors_total
func (gitServer) NewRepoClientGroup(containerName string) monitoring.Group {
return Observation.NewGroup(containerName, monitoring.ObservableOwnerSource, ObservationGroupOptions{
GroupConstructorOptions: GroupConstructorOptions{
Namespace: "gitserver",
DescriptionRoot: "Gitserver Repository Service Client",
Hidden: true,
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "gitserver_repositoryservice_client",
MetricDescriptionRoot: "client",
By: []string{"op", "scope"},
},
},
SharedObservationGroupOptions: SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
Aggregate: &SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
})
}