mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 16:51:55 +00:00
Closes https://linear.app/sourcegraph/issue/SRC-369/finalize-memory-tracking-in-gitserver-on-linux This PR adds a memory tracking feature to all gitserver command, using the utilities introduced in https://app.graphite.dev/github/pr/sourcegraph/sourcegraph/62803. When the GITSERVER_MEMORY_OBSERVATION_ENABLED env var is set, an observer will be spawned that keeps track of the memory usage of the git invocation. This information is available in traces. Additionally, we'll print WARN logs if the commands uses more than 100 MB. ## Test plan Run sg start, and manually tweak the high memory usage threshold down from 500 MB to 1MB. ```diff diff --git a/cmd/gitserver/internal/git/gitcli/command.go b/cmd/gitserver/internal/git/gitcli/command.go index 393fa3c91e6..cf2630fc830 100644 --- a/cmd/gitserver/internal/git/gitcli/command.go +++ b/cmd/gitserver/internal/git/gitcli/command.go @@ -297,7 +297,7 @@ func (rc *cmdReader) waitCmd() error { return rc.err } -const highMemoryUsageThreshold = 500 * 1024 * 1024 // 500 MiB +const highMemoryUsageThreshold = 1 * 1024 * 1024 // 500 MiB func (rc *cmdReader) trace() { duration := time.Since(rc.cmdStart) ``` See log lines similar to the following when I run `sg start` (note the memory usage information in the log fields): ``` [ gitserver-1] WARN gitserver gitcli/command.go:363 High memory usage exec request {"TraceId": "5881f40e3bbbe4d26ec0f32b7f64f535", "SpanId": "a68818dc9f1f9ee2", "ev.Fields": {"cmd": "config", "cmd_ru_maxrss_kib": "5056", "cmd_ru_maxrss_human_readable": "5.2 MB", "traceID": "5881f40e3bbbe4d26ec0f32b7f64f535", "cmd_ru_inblock": "0", "args": "[git config --unset-all gc.auto]", "actor": "0", "cmd_duration_ms": "6", "user_time_ms": "2", "system_time_ms": "2", "repo": "github.com/sgtest/typescript-deps", "error": "git command [git config --unset-all gc.auto] failed with status code 5 (output: \"\")", "cmd_ru_majflt": "10", "cmd_ru_oublock": "0", "trace": "https://sourcegraph.test:3443/-/debug/jaeger/trace/5881f40e3bbbe4d26ec0f32b7f64f535", "exit_status": "5", "cmd_ru_minflt": "494"}} [ gitserver-1] WARN gitserver gitcli/command.go:363 High memory usage exec request {"TraceId": "93cb7e9b3a42cfc085f570b8ad4a2ded", "SpanId": "c35ecddb6ef89e24", "ev.Fields": {"cmd_duration_ms": "6", "system_time_ms": "2", "cmd_ru_maxrss_kib": "5072", "cmd_ru_maxrss_human_readable": "5.2 MB", "cmd_ru_minflt": "495", "cmd_ru_majflt": "10", "actor": "0", "exit_status": "5", "user_time_ms": "2", "cmd_ru_oublock": "0", "traceID": "93cb7e9b3a42cfc085f570b8ad4a2ded", "trace": "https://sourcegraph.test:3443/-/debug/jaeger/trace/93cb7e9b3a42cfc085f570b8ad4a2ded", "repo": "github.com/sgtest/update-ids-test-base", "cmd": "config", "args": "[git config --unset-all gc.auto]", "error": "git command [git config --unset-all gc.auto] failed with status code 5 (output: \"\")", "cmd_ru_inblock": "0"}} [ gitserver-1] WARN gitserver gitcli/command.go:363 High memory usage exec request {"TraceId": "346f1d04dc869f069b04bcabadec0665", "SpanId": "ec43228229e5f531", "ev.Fields": {"actor": "0", "error": "git command [git config --unset-all gc.auto] failed with status code 5 (output: \"\")", "cmd_ru_maxrss_kib": "4960", "trace": "https://sourcegraph.test:3443/-/debug/jaeger/trace/346f1d04dc869f069b04bcabadec0665", "args": "[git config --unset-all gc.auto]", "exit_status": "5", "cmd_duration_ms": "7", "system_time_ms": "2", "repo": "github.com/sgtest/utf8_test", "user_time_ms": "2", "cmd_ru_maxrss_human_readable": "5.1 MB", "cmd_ru_minflt": "487", "cmd_ru_inblock": "0", "cmd_ru_oublock": "0", "traceID": "346f1d04dc869f069b04bcabadec0665", "cmd": "config", "cmd_ru_majflt": "10"}} [ gitserver-1] WARN gitserver gitcli/command.go:363 High memory usage exec request {"TraceId": "1b6a65835e3f75e7e83c8fe7355baeb2", "SpanId": "d525ac1f8c077184", "ev.Fields": {"cmd_ru_oublock": "0", "repo": "github.com/sourcegraph/about", "args": "[git config --unset-all gc.auto]", "cmd_duration_ms": "7", "system_time_ms": "2", "cmd_ru_inblock": "0", "exit_status": "5", "error": "git command [git config --unset-all gc.auto] failed with status code 5 (output: \"\")", "cmd_ru_maxrss_kib": "4912", "cmd_ru_maxrss_human_readable": "5.0 MB", "cmd_ru_minflt": "483", "cmd": "config", "user_time_ms": "2", "trace": "https://sourcegraph.test:3443/-/debug/jaeger/trace/1b6a65835e3f75e7e83c8fe7355baeb2", "actor": "0", "cmd_ru_majflt": "10", "traceID": "1b6a65835e3f75e7e83c8fe7355baeb2"}} [ gitserver-1] WARN gitserver gitcli/command.go:363 High memory usage exec request {"TraceId": "39bbc0cc8b99292e6d3b6dfc067d4049", "SpanId": "60e4d69a25a3074b", "ev.Fields": {"args": "[git config --unset-all gc.auto]", "cmd_duration_ms": "7", "cmd_ru_maxrss_kib": "5056", "cmd_ru_minflt": "492", "trace": "https://sourcegraph.test:3443/-/debug/jaeger/trace/39bbc0cc8b99292e6d3b6dfc067d4049", "actor": "0", "system_time_ms": "2", "cmd_ru_inblock": "0", "cmd": "config", "exit_status": "5", "error": "git command [git config --unset-all gc.auto] failed with status code 5 (output: \"\")", "user_time_ms": "2", "cmd_ru_maxrss_human_readable": "5.2 MB", "cmd_ru_oublock": "0", "repo": "github.com/sourcegraph-testing/etcd", "cmd_ru_majflt": "10", "traceID": "39bbc0cc8b99292e6d3b6dfc067d4049"}} [ ``` Note that I have not tested this e2e in a linux system, but I think it's fine to test it on sourcegraph.com since: 1) we have the benchmarks / integration tests from the previous PR 2) it's behind a environment variable that is off by default ## Changelog Adds a new experimental feature to enable track of `git` command memory invocations when the `GITSERVER_MEMORY_OBSERVATION_ENABLED` environment variable is true (off by default).
551 lines
17 KiB
Go
551 lines
17 KiB
Go
package gitcli
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/dustin/go-humanize"
|
|
|
|
"github.com/sourcegraph/sourcegraph/internal/bytesize"
|
|
"github.com/sourcegraph/sourcegraph/internal/env"
|
|
"github.com/sourcegraph/sourcegraph/internal/memcmd"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
"github.com/sourcegraph/log"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
|
|
"github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/common"
|
|
"github.com/sourcegraph/sourcegraph/internal/actor"
|
|
"github.com/sourcegraph/sourcegraph/internal/api"
|
|
"github.com/sourcegraph/sourcegraph/internal/honey"
|
|
"github.com/sourcegraph/sourcegraph/internal/lazyregexp"
|
|
"github.com/sourcegraph/sourcegraph/internal/trace"
|
|
"github.com/sourcegraph/sourcegraph/internal/wrexec"
|
|
"github.com/sourcegraph/sourcegraph/lib/errors"
|
|
)
|
|
|
|
var (
|
|
execRunning = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "src_gitserver_exec_running",
|
|
Help: "number of gitserver.GitCommand running concurrently.",
|
|
}, []string{"cmd"})
|
|
execDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "src_gitserver_exec_duration_seconds",
|
|
Help: "gitserver.GitCommand latencies in seconds.",
|
|
Buckets: prometheus.ExponentialBucketsRange(0.01, 60.0, 12),
|
|
}, []string{"cmd"})
|
|
highMemoryCounter = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "src_gitserver_exec_high_memory_usage_count",
|
|
Help: "gitcli.GitCommand high memory usage by subcommand",
|
|
}, []string{"cmd"})
|
|
|
|
memoryObservationEnabled = env.MustGetBool("GITSERVER_MEMORY_OBSERVATION_ENABLED", false, "enable memory observation for gitserver commands")
|
|
)
|
|
|
|
type commandOpts struct {
|
|
arguments []string
|
|
|
|
stdin io.Reader
|
|
}
|
|
|
|
func optsFromFuncs(optFns ...CommandOptionFunc) commandOpts {
|
|
var opts commandOpts
|
|
for _, optFn := range optFns {
|
|
optFn(&opts)
|
|
}
|
|
return opts
|
|
}
|
|
|
|
type CommandOptionFunc func(*commandOpts)
|
|
|
|
// WithArguments sets the given arguments to the command arguments.
|
|
func WithArguments(args ...string) CommandOptionFunc {
|
|
return func(o *commandOpts) {
|
|
o.arguments = args
|
|
}
|
|
}
|
|
|
|
// WithStdin specifies the reader to use for the command's stdin input.
|
|
func WithStdin(stdin io.Reader) CommandOptionFunc {
|
|
return func(o *commandOpts) {
|
|
o.stdin = stdin
|
|
}
|
|
}
|
|
|
|
const gitCommandDefaultTimeout = time.Minute
|
|
|
|
func (g *gitCLIBackend) NewCommand(ctx context.Context, optFns ...CommandOptionFunc) (_ io.ReadCloser, err error) {
|
|
opts := optsFromFuncs(optFns...)
|
|
|
|
tr, ctx := trace.New(ctx, "gitcli.NewCommand",
|
|
attribute.StringSlice("args", opts.arguments),
|
|
attribute.String("dir", g.dir.Path()),
|
|
)
|
|
defer func() {
|
|
if err != nil {
|
|
tr.EndWithErr(&err)
|
|
}
|
|
}()
|
|
|
|
logger := g.logger.WithTrace(trace.Context(ctx))
|
|
|
|
if !IsAllowedGitCmd(logger, opts.arguments) {
|
|
blockedCommandExecutedCounter.Inc()
|
|
return nil, errBadGitCommand
|
|
}
|
|
|
|
if len(opts.arguments) == 0 {
|
|
// Technically can't happen because IsAllowedGitCmd catches this, but
|
|
// if someone ever touches that logic, let's be safe before we access
|
|
// args[0].
|
|
return nil, errors.New("provide arguments")
|
|
}
|
|
|
|
subCmd := opts.arguments[0]
|
|
|
|
// If no deadline is set, use the default git command timeout.
|
|
cancel := func() {}
|
|
if _, ok := ctx.Deadline(); !ok {
|
|
ctx, cancel = context.WithTimeout(ctx, gitCommandDefaultTimeout)
|
|
}
|
|
|
|
cmd := exec.CommandContext(ctx, "git", opts.arguments...)
|
|
cmd.Cancel = func() error {
|
|
// Send SIGKILL to the process group instead of just the process
|
|
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
|
|
}
|
|
g.dir.Set(cmd)
|
|
|
|
if cmd.SysProcAttr == nil {
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
|
}
|
|
// We use setpgid here so that child processes live in their own process groups.
|
|
// This is helpful for two things:
|
|
// - We can kill a process group to make sure that any subprocesses git might spawn
|
|
// will also receive the termination signal. The standard go implementation sends
|
|
// a SIGKILL only to the process itself. By using process groups, we can tell all
|
|
// children to shut down as well.
|
|
// - We want to track maxRSS for tracking purposes to identify memory usage by command
|
|
// and linux tracks the maxRSS as "the maximum resident set size used (in kilobytes)"
|
|
// of the process in the process group that had the highest maximum resident set size.
|
|
// Read: If we don't use a separate process group here, we usually get the maxRSS from
|
|
// the process with the biggest memory usage in the process group, which is gitserver.
|
|
// So we cannot track the memory well. This is leaky, as it only tracks the largest sub-
|
|
// process, but it gives us a good indication of the general resource consumption.
|
|
cmd.SysProcAttr.Setpgid = true
|
|
|
|
stderr, stderrBuf := stderrBuffer()
|
|
cmd.Stderr = stderr
|
|
|
|
wrappedCmd := g.rcf.WrapWithRepoName(ctx, logger, g.repoName, cmd)
|
|
|
|
stdout, err := wrappedCmd.StdoutPipe()
|
|
if err != nil {
|
|
cancel()
|
|
return nil, errors.Wrap(err, "failed to create stdout pipe")
|
|
}
|
|
|
|
if opts.stdin != nil {
|
|
cmd.Stdin = opts.stdin
|
|
}
|
|
|
|
cmdStart := time.Now()
|
|
|
|
if err := wrappedCmd.Start(); err != nil {
|
|
cancel()
|
|
return nil, errors.Wrap(err, "failed to start git process")
|
|
}
|
|
|
|
observer := memcmd.NewNoOpObserver()
|
|
|
|
if memoryObservationEnabled {
|
|
maybeObs, err := memcmd.NewDefaultObserver(ctx, cmd)
|
|
if err != nil {
|
|
logger.Warn("failed to create memory observer, defaulting to no-op", log.Error(err))
|
|
}
|
|
|
|
observer = maybeObs
|
|
}
|
|
observer.Start()
|
|
defer func() {
|
|
if err != nil {
|
|
observer.Stop() // stop the observer if we return an error
|
|
}
|
|
}()
|
|
|
|
execRunning.WithLabelValues(subCmd).Inc()
|
|
|
|
cr := &cmdReader{
|
|
ctx: ctx,
|
|
subCmd: subCmd,
|
|
ctxCancel: cancel,
|
|
cmdStart: cmdStart,
|
|
stdout: stdout,
|
|
cmd: wrappedCmd,
|
|
stderr: stderrBuf,
|
|
repoName: g.repoName,
|
|
logger: logger,
|
|
gitDir: g.dir,
|
|
tr: tr,
|
|
memoryObserver: observer,
|
|
}
|
|
|
|
return cr, nil
|
|
}
|
|
|
|
// errBadGitCommand is returned from the git CLI backend if the arguments provided
|
|
// are not allowed.
|
|
var errBadGitCommand = errors.New("bad git command, not allowed")
|
|
|
|
func newCommandFailedError(ctx context.Context, err error, cmd wrexec.Cmder, stderr []byte) error {
|
|
if ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
|
|
return &commandFailedError{
|
|
Inner: err,
|
|
args: cmd.Unwrap().Args,
|
|
Stderr: stderr,
|
|
ExitStatus: cmd.Unwrap().ProcessState.ExitCode(),
|
|
}
|
|
}
|
|
|
|
type commandFailedError struct {
|
|
Stderr []byte
|
|
ExitStatus int
|
|
Inner error
|
|
args []string
|
|
}
|
|
|
|
func (e *commandFailedError) Unwrap() error {
|
|
return e.Inner
|
|
}
|
|
|
|
func (e *commandFailedError) Error() string {
|
|
return fmt.Sprintf("git command %v failed with status code %d (output: %q)", e.args, e.ExitStatus, e.Stderr)
|
|
}
|
|
|
|
type cmdReader struct {
|
|
stdout io.Reader
|
|
ctx context.Context
|
|
ctxCancel context.CancelFunc
|
|
subCmd string
|
|
cmdStart time.Time
|
|
cmd wrexec.Cmder
|
|
stderr *bytes.Buffer
|
|
logger log.Logger
|
|
gitDir common.GitDir
|
|
repoName api.RepoName
|
|
mu sync.Mutex
|
|
tr trace.Trace
|
|
err error
|
|
waitOnce sync.Once
|
|
memoryObserver memcmd.Observer
|
|
}
|
|
|
|
func (rc *cmdReader) Read(p []byte) (n int, err error) {
|
|
rc.mu.Lock()
|
|
defer rc.mu.Unlock()
|
|
|
|
n, err = rc.stdout.Read(p)
|
|
// If the command has finished, we close the stdout pipe and wait on the command
|
|
// to free any leftover resources. If it errored, this will return the command
|
|
// error from Read.
|
|
if err == io.EOF {
|
|
if err := rc.waitCmd(); err != nil {
|
|
return n, err
|
|
}
|
|
}
|
|
return n, err
|
|
}
|
|
|
|
func (rc *cmdReader) Close() error {
|
|
rc.mu.Lock()
|
|
defer rc.mu.Unlock()
|
|
|
|
return rc.waitCmd()
|
|
}
|
|
|
|
func (rc *cmdReader) waitCmd() error {
|
|
// Waiting on a command should only happen once, so
|
|
// we synchronize all potential calls to Read and Close
|
|
// here, and memoize the error.
|
|
rc.waitOnce.Do(func() {
|
|
rc.err = rc.cmd.Wait()
|
|
rc.memoryObserver.Stop()
|
|
|
|
if rc.err != nil {
|
|
if checkMaybeCorruptRepo(rc.logger, rc.gitDir, rc.repoName, rc.stderr.String()) {
|
|
rc.err = common.ErrRepoCorrupted{Reason: rc.stderr.String()}
|
|
} else {
|
|
rc.err = newCommandFailedError(rc.ctx, rc.err, rc.cmd, rc.stderr.Bytes())
|
|
}
|
|
}
|
|
|
|
rc.trace()
|
|
rc.tr.EndWithErr(&rc.err)
|
|
rc.ctxCancel()
|
|
})
|
|
|
|
return rc.err
|
|
}
|
|
|
|
const highMemoryUsageThreshold = 500 * bytesize.MiB
|
|
|
|
func (rc *cmdReader) trace() {
|
|
duration := time.Since(rc.cmdStart)
|
|
|
|
execRunning.WithLabelValues(rc.subCmd).Dec()
|
|
execDuration.WithLabelValues(rc.subCmd).Observe(duration.Seconds())
|
|
|
|
processState := rc.cmd.Unwrap().ProcessState
|
|
var sysUsage syscall.Rusage
|
|
s, ok := processState.SysUsage().(*syscall.Rusage)
|
|
if ok {
|
|
sysUsage = *s
|
|
}
|
|
|
|
memUsage, err := rc.memoryObserver.MaxMemoryUsage()
|
|
if err != nil {
|
|
rc.logger.Warn("failed to get max memory usage", log.Error(err))
|
|
}
|
|
|
|
isSlow := duration > shortGitCommandSlow(rc.cmd.Unwrap().Args)
|
|
|
|
isHighMem := memUsage > highMemoryUsageThreshold
|
|
|
|
if isHighMem {
|
|
highMemoryCounter.WithLabelValues(rc.subCmd).Inc()
|
|
}
|
|
|
|
if honey.Enabled() || isSlow || isHighMem {
|
|
act := actor.FromContext(rc.ctx)
|
|
ev := honey.NewEvent("gitserver-exec")
|
|
ev.SetSampleRate(HoneySampleRate(rc.subCmd, act))
|
|
ev.AddField("repo", rc.repoName)
|
|
ev.AddField("cmd", rc.subCmd)
|
|
ev.AddField("args", rc.cmd.Unwrap().Args)
|
|
ev.AddField("actor", act.UIDString())
|
|
ev.AddField("exit_status", processState.ExitCode())
|
|
if rc.err != nil {
|
|
ev.AddField("error", rc.err.Error())
|
|
}
|
|
ev.AddField("cmd_duration_ms", duration.Milliseconds())
|
|
ev.AddField("user_time_ms", processState.UserTime().Milliseconds())
|
|
ev.AddField("system_time_ms", processState.SystemTime().Milliseconds())
|
|
ev.AddField("cmd_ru_maxrss_kib", memUsage>>10)
|
|
ev.AddField("cmd_ru_maxrss_human_readable", humanize.Bytes(uint64(memUsage)))
|
|
ev.AddField("cmd_ru_minflt", sysUsage.Minflt)
|
|
ev.AddField("cmd_ru_majflt", sysUsage.Majflt)
|
|
ev.AddField("cmd_ru_inblock", sysUsage.Inblock)
|
|
ev.AddField("cmd_ru_oublock", sysUsage.Oublock)
|
|
|
|
if traceID := trace.ID(rc.ctx); traceID != "" {
|
|
ev.AddField("traceID", traceID)
|
|
ev.AddField("trace", trace.URL(traceID))
|
|
}
|
|
|
|
if honey.Enabled() {
|
|
_ = ev.Send()
|
|
}
|
|
|
|
if isSlow {
|
|
rc.logger.Warn("Long exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
if isHighMem {
|
|
rc.logger.Warn("High memory usage exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
}
|
|
|
|
rc.tr.SetAttributes(attribute.Int("exit_code", processState.ExitCode()))
|
|
rc.tr.SetAttributes(attribute.Int64("cmd_duration_ms", duration.Milliseconds()))
|
|
rc.tr.SetAttributes(attribute.Int64("user_time_ms", processState.UserTime().Milliseconds()))
|
|
rc.tr.SetAttributes(attribute.Int64("system_time_ms", processState.SystemTime().Milliseconds()))
|
|
rc.tr.SetAttributes(attribute.Int64("cmd_ru_maxrss_kib", int64(memUsage>>10)))
|
|
rc.tr.SetAttributes(attribute.String("cmd_ru_maxrss_human_readable", humanize.Bytes(uint64(memUsage))))
|
|
rc.tr.SetAttributes(attribute.Int64("cmd_ru_minflt", sysUsage.Minflt))
|
|
rc.tr.SetAttributes(attribute.Int64("cmd_ru_majflt", sysUsage.Majflt))
|
|
rc.tr.SetAttributes(attribute.Int64("cmd_ru_inblock", sysUsage.Inblock))
|
|
rc.tr.SetAttributes(attribute.Int64("cmd_ru_oublock", sysUsage.Oublock))
|
|
}
|
|
|
|
const maxStderrCapture = 1024
|
|
|
|
// stderrBuffer sets up a limited buffer to capture stderr for error reporting.
|
|
func stderrBuffer() (io.Writer, *bytes.Buffer) {
|
|
stderrBuf := bytes.NewBuffer(make([]byte, 0, maxStderrCapture))
|
|
stderr := &limitWriter{W: stderrBuf, N: maxStderrCapture}
|
|
return stderr, stderrBuf
|
|
}
|
|
|
|
// limitWriter is a io.Writer that writes to an W but discards after N bytes.
|
|
type limitWriter struct {
|
|
W io.Writer // underling writer
|
|
N int // max bytes remaining
|
|
}
|
|
|
|
func (l *limitWriter) Write(p []byte) (int, error) {
|
|
if l.N <= 0 {
|
|
return len(p), nil
|
|
}
|
|
origLen := len(p)
|
|
if len(p) > l.N {
|
|
p = p[:l.N]
|
|
}
|
|
n, err := l.W.Write(p)
|
|
l.N -= n
|
|
if l.N <= 0 {
|
|
// If we have written limit bytes, then we can include the discarded
|
|
// part of p in the count.
|
|
n = origLen
|
|
}
|
|
return n, err
|
|
}
|
|
|
|
func checkMaybeCorruptRepo(logger log.Logger, girDir common.GitDir, repo api.RepoName, stderr string) bool {
|
|
if !stdErrIndicatesCorruption(stderr) {
|
|
return false
|
|
}
|
|
|
|
logger = logger.With(log.String("repo", string(repo)), log.String("repo", string(repo)))
|
|
logger.Warn("marking repo for re-cloning due to stderr output indicating repo corruption", log.String("stderr", stderr))
|
|
|
|
// We set a flag in the config for the cleanup janitor job to fix. The janitor
|
|
// runs every minute.
|
|
// We use a background context here to record corruption events even when the
|
|
// context has since been cancelled.
|
|
err := markRepoMaybeCorrupt(girDir)
|
|
if err != nil {
|
|
logger.Error("failed to set maybeCorruptRepo flag", log.Error(err))
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// markRepoMaybeCorrupt ensures a file called sourcegraph.maybeCorruptRepo exists
|
|
// in the repo root, and makes sure the files mtime set to the current time.
|
|
func markRepoMaybeCorrupt(gitDir common.GitDir) error {
|
|
p := gitDir.Path(RepoMaybeCorruptFlagFilepath)
|
|
|
|
f, err := os.Create(p)
|
|
if err != nil && !os.IsExist(err) {
|
|
return err
|
|
}
|
|
_ = f.Close()
|
|
return os.Chtimes(p, time.Time{}, time.Now())
|
|
}
|
|
|
|
// RepoMaybeCorruptFlagFilepath is a magic file we add to the root of a git repo
|
|
// to signal that a repo may be corrupt on disk, when the git stderr output indicates
|
|
// potential corruption.
|
|
const RepoMaybeCorruptFlagFilepath = ".sourcegraph-maybe-corrupt-repo"
|
|
|
|
var (
|
|
// objectOrPackFileCorruptionRegex matches stderr lines from git which indicate
|
|
// that a repository's packfiles or commit objects might be corrupted.
|
|
//
|
|
// See https://github.com/sourcegraph/sourcegraph/issues/6676 for more
|
|
// context.
|
|
objectOrPackFileCorruptionRegex = lazyregexp.NewPOSIX(`^error: (Could not read|packfile) `)
|
|
|
|
// objectOrPackFileCorruptionRegex matches stderr lines from git which indicate that
|
|
// git's supplemental commit-graph might be corrupted.
|
|
//
|
|
// See https://github.com/sourcegraph/sourcegraph/issues/37872 for more
|
|
// context.
|
|
commitGraphCorruptionRegex = lazyregexp.NewPOSIX(`^fatal: commit-graph requires overflow generation data but has none`)
|
|
)
|
|
|
|
// stdErrIndicatesCorruption returns true if the provided stderr output from a git command indicates
|
|
// that there might be repository corruption.
|
|
func stdErrIndicatesCorruption(stderr string) bool {
|
|
return objectOrPackFileCorruptionRegex.MatchString(stderr) || commitGraphCorruptionRegex.MatchString(stderr)
|
|
}
|
|
|
|
// shortGitCommandSlow returns the threshold for regarding an git command as
|
|
// slow. Some commands such as "git archive" are inherently slower than "git
|
|
// rev-parse", so this will return an appropriate threshold given the command.
|
|
func shortGitCommandSlow(args []string) time.Duration {
|
|
if len(args) < 1 {
|
|
return time.Second
|
|
}
|
|
switch args[0] {
|
|
case "archive":
|
|
return 1 * time.Minute
|
|
|
|
case "blame", "ls-tree", "log", "show":
|
|
return 5 * time.Second
|
|
|
|
case "fetch":
|
|
return 10 * time.Second
|
|
|
|
default:
|
|
return 2500 * time.Millisecond
|
|
}
|
|
}
|
|
|
|
// mapToLoggerField translates a map to log context fields.
|
|
func mapToLoggerField(m map[string]any) []log.Field {
|
|
LogFields := []log.Field{}
|
|
|
|
for i, v := range m {
|
|
|
|
LogFields = append(LogFields, log.String(i, fmt.Sprint(v)))
|
|
}
|
|
|
|
return LogFields
|
|
}
|
|
|
|
// Send 1 in 16 events to honeycomb. This is hardcoded since we only use this
|
|
// for Sourcegraph.com.
|
|
//
|
|
// 2020-05-29 1 in 4. We are currently at the top tier for honeycomb (before
|
|
// enterprise) and using double our quota. This gives us room to grow. If you
|
|
// find we keep bumping this / missing data we care about we can look into
|
|
// more dynamic ways to sample in our application code.
|
|
//
|
|
// 2020-07-20 1 in 16. Again hitting very high usage. Likely due to recent
|
|
// scaling up of the indexed search cluster. Will require more investigation,
|
|
// but we should probably segment user request path traffic vs internal batch
|
|
// traffic.
|
|
//
|
|
// 2020-11-02 Dynamically sample. Again hitting very high usage. Same root
|
|
// cause as before, scaling out indexed search cluster. We update our sampling
|
|
// to instead be dynamic, since "rev-parse" is 12 times more likely than the
|
|
// next most common command.
|
|
//
|
|
// 2021-08-20 over two hours we did 128 * 128 * 1e6 rev-parse requests
|
|
// internally. So we update our sampling to heavily downsample internal
|
|
// rev-parse, while upping our sampling for non-internal.
|
|
// https://ui.honeycomb.io/sourcegraph/datasets/gitserver-exec/result/67e4bLvUddg
|
|
//
|
|
// 2024-02-23 we are now capturing all execs done in honeycomb, including
|
|
// internal stuff like config and janitor jobs. In particular "config" is now
|
|
// running as often as rev-parse. rev-list is also higher than most so we
|
|
// include it in the big sample rate.
|
|
func HoneySampleRate(cmd string, actor *actor.Actor) uint {
|
|
// HACK(keegan) 2022-11-02 IsInternal on sourcegraph.com is always
|
|
// returning false. For now I am also marking it internal if UID is not
|
|
// set to work around us hammering honeycomb.
|
|
internal := actor.IsInternal() || actor.UID == 0
|
|
switch {
|
|
case (cmd == "rev-parse" || cmd == "rev-list" || cmd == "config") && internal:
|
|
return 1 << 14 // 16384
|
|
|
|
case internal:
|
|
// we care more about user requests, so downsample internal more.
|
|
return 16
|
|
|
|
default:
|
|
return 8
|
|
}
|
|
}
|