sourcegraph/cmd/gitserver/server/server.go

// Package server implements the gitserver service.
package server

import (
	"bufio"
	"bytes"
	"container/list"
	"context"
	"crypto/sha256"
	"encoding/gob"
	"encoding/hex"
	"encoding/json"
	"fmt"
	"io"
	"math"
	"net/http"
	"os"
	"os/exec"
	"path/filepath"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/opentracing/opentracing-go/ext"
	otlog "github.com/opentracing/opentracing-go/log"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
	"go.opentelemetry.io/otel/attribute"
	"golang.org/x/sync/errgroup"
	"golang.org/x/sync/semaphore"
	"golang.org/x/time/rate"

	"github.com/sourcegraph/log"

	"github.com/sourcegraph/sourcegraph/cmd/gitserver/server/internal/accesslog"
	"github.com/sourcegraph/sourcegraph/internal/actor"
	"github.com/sourcegraph/sourcegraph/internal/api"
	"github.com/sourcegraph/sourcegraph/internal/conf"
	"github.com/sourcegraph/sourcegraph/internal/database"
	"github.com/sourcegraph/sourcegraph/internal/env"
	"github.com/sourcegraph/sourcegraph/internal/fileutil"
	"github.com/sourcegraph/sourcegraph/internal/gitserver"
	"github.com/sourcegraph/sourcegraph/internal/gitserver/adapters"
	"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"
	"github.com/sourcegraph/sourcegraph/internal/gitserver/protocol"
	"github.com/sourcegraph/sourcegraph/internal/gitserver/search"
	"github.com/sourcegraph/sourcegraph/internal/honey"
	"github.com/sourcegraph/sourcegraph/internal/lazyregexp"
	"github.com/sourcegraph/sourcegraph/internal/mutablelimiter"
	"github.com/sourcegraph/sourcegraph/internal/observation"
	"github.com/sourcegraph/sourcegraph/internal/ratelimit"
	streamhttp "github.com/sourcegraph/sourcegraph/internal/search/streaming/http"
	"github.com/sourcegraph/sourcegraph/internal/syncx"
	"github.com/sourcegraph/sourcegraph/internal/trace"
	"github.com/sourcegraph/sourcegraph/internal/trace/ot"
	"github.com/sourcegraph/sourcegraph/internal/types"
	"github.com/sourcegraph/sourcegraph/internal/vcs"
	"github.com/sourcegraph/sourcegraph/internal/wrexec"
	"github.com/sourcegraph/sourcegraph/lib/errors"
)

// tempDirName is the name used for the temporary directory under ReposDir.
const tempDirName = ".tmp"

// P4HomeName is the name used for the directory that git p4 will use as $HOME
// and where it will store cache data.
const P4HomeName = ".p4home"

// traceLogs is controlled via the env SRC_GITSERVER_TRACE. If true we trace
// logs to stderr
var traceLogs bool

var (
	lastCheckAt    = make(map[api.RepoName]time.Time)
	lastCheckMutex sync.Mutex
)

// debounce() provides some filtering to prevent spammy requests for the same
// repository. If the last fetch of the repository was within the given
// duration, returns false, otherwise returns true and updates the last
// fetch stamp.
func debounce(name api.RepoName, since time.Duration) bool {
	lastCheckMutex.Lock()
	defer lastCheckMutex.Unlock()
	if t, ok := lastCheckAt[name]; ok && time.Now().Before(t.Add(since)) {
		return false
	}
	lastCheckAt[name] = time.Now()
	return true
}

func init() {
	traceLogs, _ = strconv.ParseBool(env.Get("SRC_GITSERVER_TRACE", "false", "Toggles trace logging to stderr"))
}

// runCommandMock is set by tests. When non-nil it is run instead of
// runCommand
var runCommandMock func(context.Context, *exec.Cmd) (int, error)

// runCommand runs the command and returns the exit status. All clients of this function should set the context
// in cmd themselves, but we have to pass the context separately here for the sake of tracing.
func runCommand(ctx context.Context, cmd wrexec.Cmder) (exitCode int, err error) {
	if runCommandMock != nil {
		return runCommandMock(ctx, cmd.Unwrap())
	}
	span, _ := ot.StartSpanFromContext(ctx, "runCommand") //nolint:staticcheck // OT is deprecated
	span.SetTag("path", cmd.Unwrap().Path)
	span.SetTag("args", cmd.Unwrap().Args)
	span.SetTag("dir", cmd.Unwrap().Dir)
	defer func() {
		if err != nil {
			ext.Error.Set(span, true)
			span.SetTag("err", err.Error())
			span.SetTag("exitCode", exitCode)
		}
		span.Finish()
	}()

	err = cmd.Run()
	exitStatus := -10810                  // sentinel value to indicate not set
	if cmd.Unwrap().ProcessState != nil { // is nil if process failed to start
		exitStatus = cmd.Unwrap().ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
	}
	return exitStatus, err
}

// runCommandGraceful runs the command and returns the exit status. If the
// supplied context is cancelled we attempt to send SIGINT to the command to
// allow it to gracefully shutdown. All clients of this function should pass in a
// command *without* a context.
func runCommandGraceful(ctx context.Context, logger log.Logger, cmd wrexec.Cmder) (exitCode int, err error) {
	span, _ := ot.StartSpanFromContext(ctx, "runCommandGraceful") //nolint:staticcheck // OT is deprecated
	c := cmd.Unwrap()
	span.SetTag("path", c.Path)
	span.SetTag("args", c.Args)
	span.SetTag("dir", c.Dir)
	defer func() {
		if err != nil {
			ext.Error.Set(span, true)
			span.SetTag("err", err.Error())
			span.SetTag("exitCode", exitCode)
		}
		span.Finish()
	}()

	exitCode = -10810 // sentinel value to indicate not set
	err = cmd.Start()
	if err != nil {
		return exitCode, err
	}

	done := make(chan struct{})
	go func() {
		defer close(done)
		err = cmd.Wait()
		if err != nil {
			logger.Error("running command", log.Error(err))
		}
	}()

	// Wait for command to exit or context to be done
	select {
	case <-ctx.Done():
		logger.Debug("context cancelled, sending SIGINT")
		// Attempt to send SIGINT
		if err := cmd.Unwrap().Process.Signal(syscall.SIGINT); err != nil {
			logger.Warn("Sending SIGINT to command", log.Error(err))
			if err := cmd.Unwrap().Process.Kill(); err != nil {
				logger.Warn("killing process", log.Error(err))
			}
			return exitCode, err
		}
		// Now, continue waiting for command for up to two seconds before killing it
		timer := time.NewTimer(2 * time.Second)
		select {
		case <-done:
			logger.Debug("process exited after SIGINT sent")
			timer.Stop()
			if err == nil {
				exitCode = 0
			}
		case <-timer.C:
			logger.Debug("timed out, killing process")
			if err := cmd.Unwrap().Process.Kill(); err != nil {
				logger.Warn("killing process", log.Error(err))
			}
			logger.Debug("process killed, waiting for done")
			// Wait again to ensure we can access cmd.ProcessState below
			<-done
		}

		if exitError, ok := err.(*exec.ExitError); ok {
			exitCode = exitError.ExitCode()
		}
		err = ctx.Err()
		return exitCode, err
	case <-done:
		// Happy path, command exits
	}

	if exitError, ok := err.(*exec.ExitError); ok {
		exitCode = exitError.ExitCode()
	}
	if err == nil {
		exitCode = 0
	}
	return exitCode, err
}

// cloneJob abstracts away a repo and necessary metadata to clone it. In the future it may be
// possible to simplify this, but to do that, doClone will need to do a lot less than it does at the
// moment.
type cloneJob struct {
	repo   api.RepoName
	dir    GitDir
	syncer VCSSyncer

	// TODO: cloneJobConsumer should acquire a new lock. We are trying to keep the changes simple
	// for the time being. When we start using the new approach of using long lived goroutines for
	// cloning we will refactor doClone to acquire a new lock.
	lock *RepositoryLock

	remoteURL *vcs.URL
	options   *cloneOptions
}

// cloneQueue is a threadsafe list.List of cloneJobs that functions as a queue in practice.
type cloneQueue struct {
	mu   sync.Mutex
	jobs *list.List

	cmu  sync.Mutex
	cond *sync.Cond
}

// push will queue the cloneJob to the end of the queue.
func (c *cloneQueue) push(cj *cloneJob) {
	c.mu.Lock()
	defer c.mu.Unlock()

	c.jobs.PushBack(cj)
	c.cond.Signal()
}

// pop will return the next cloneJob. If there's no next job available, it returns nil.
func (c *cloneQueue) pop() *cloneJob {
	c.mu.Lock()
	defer c.mu.Unlock()

	next := c.jobs.Front()
	if next == nil {
		return nil
	}

	return c.jobs.Remove(next).(*cloneJob)
}

func (c *cloneQueue) empty() bool {
	c.mu.Lock()
	defer c.mu.Unlock()

	return c.jobs.Len() == 0
}

// NewCloneQueue initializes a new cloneQueue.
func NewCloneQueue(jobs *list.List) *cloneQueue {
	cq := cloneQueue{jobs: jobs}
	cq.cond = sync.NewCond(&cq.cmu)

	return &cq
}

// Server is a gitserver server.
type Server struct {
	// Logger should be used for all logging and logger creation.
	Logger log.Logger

	// ObservationCtx is used to initialize an operations struct
	// with the appropriate metrics register etc.
	ObservationCtx *observation.Context

	// ReposDir is the path to the base directory for gitserver storage.
	ReposDir string

	// DesiredPercentFree is the desired percentage of disk space to keep free.
	DesiredPercentFree int

	// DiskSizer tells how much disk is free and how large the disk is.
	DiskSizer DiskSizer

	// GetRemoteURLFunc is a function which returns the remote URL for a
	// repository. This is used when cloning or fetching a repository. In
	// production this will speak to the database to look up the clone URL. In
	// tests this is usually set to clone a local repository or intentionally
	// error.
	//
	// Note: internal uses should call getRemoteURL which will handle
	// GetRemoteURLFunc being nil.
	GetRemoteURLFunc func(context.Context, api.RepoName) (string, error)

	// GetVCSSyncer is a function which returns the VCS syncer for a repository.
	// This is used when cloning or fetching a repository. In production this will
	// speak to the database to determine the code host type. In tests this is
	// usually set to return a GitRepoSyncer.
	GetVCSSyncer func(context.Context, api.RepoName) (VCSSyncer, error)

	// Hostname is how we identify this instance of gitserver. Generally it is the
	// actual hostname but can also be overridden by the HOSTNAME environment variable.
	Hostname string

	// shared db handle
	DB database.DB

	// CloneQueue is a threadsafe queue used by DoBackgroundClones to process incoming clone
	// requests asynchronously.
	CloneQueue *cloneQueue

	// skipCloneForTests is set by tests to avoid clones.
	skipCloneForTests bool

	// ctx is the context we use for all background jobs. It is done when the
	// server is stopped. Do not directly call this, rather call
	// Server.context()
	ctx      context.Context
	cancel   context.CancelFunc // used to shutdown background jobs
	cancelMu sync.Mutex         // protects canceled
	canceled bool
	wg       sync.WaitGroup // tracks running background jobs

	locker *RepositoryLocker

	// cloneLimiter and cloneableLimiter limits the number of concurrent
	// clones and ls-remotes respectively. Use s.acquireCloneLimiter() and
	// s.acquireClonableLimiter() instead of using these directly.
	cloneLimiter     *mutablelimiter.Limiter
	cloneableLimiter *mutablelimiter.Limiter

	// rpsLimiter limits the remote code host git operations done per second
	// per gitserver instance
	rpsLimiter *ratelimit.InstrumentedLimiter

	repoUpdateLocksMu sync.Mutex // protects the map below and also updates to locks.once
	repoUpdateLocks   map[api.RepoName]*locks

	// GlobalBatchLogSemaphore is a semaphore shared between all requests to ensure that a
	// maximum number of Git subprocesses are active for all /batch-log requests combined.
	GlobalBatchLogSemaphore *semaphore.Weighted

	// operations provide uniform observability via internal/observation. This value is
	// set by RegisterMetrics when compiled as part of the gitserver binary. The server
	// method ensureOperations should be used in all references to avoid a nil pointer
	// dereferencs.
	operations *operations

	// recordingCommandFactory is a factory that creates recordable commands by wrapping os/exec.Commands.
	// The factory creates recordable commands with a set predicate, which is used to determine whether a
	// particular command should be recorded or not.
	recordingCommandFactory *wrexec.RecordingCommandFactory
}

type locks struct {
	once *sync.Once  // consolidates multiple waiting updates
	mu   *sync.Mutex // prevents updates running in parallel
}

// shortGitCommandTimeout returns the timeout for git commands that should not
// take a long time. Some commands such as "git archive" are allowed more time
// than "git rev-parse", so this will return an appropriate timeout given the
// command.
func shortGitCommandTimeout(args []string) time.Duration {
	if len(args) < 1 {
		return time.Minute
	}
	switch args[0] {
	case "archive":
		// This is a long time, but this never blocks a user request for this
		// long. Even repos that are not that large can take a long time, for
		// example a search over all repos in an organization may have several
		// large repos. All of those repos will be competing for IO => we need
		// a larger timeout.
		return conf.GitLongCommandTimeout()

	case "ls-remote":
		return 30 * time.Second

	default:
		return time.Minute
	}
}

// shortGitCommandSlow returns the threshold for regarding an git command as
// slow. Some commands such as "git archive" are inherently slower than "git
// rev-parse", so this will return an appropriate threshold given the command.
func shortGitCommandSlow(args []string) time.Duration {
	if len(args) < 1 {
		return time.Second
	}
	switch args[0] {
	case "archive":
		return 1 * time.Minute

	case "blame", "ls-tree", "log", "show":
		return 5 * time.Second

	default:
		return 2500 * time.Millisecond
	}
}

// 🚨 SECURITY: headerXRequestedWithMiddleware will ensure that the X-Requested-With
// header contains the correct value. See "What does X-Requested-With do, anyway?" in
// https://github.com/sourcegraph/sourcegraph/pull/27931.
func headerXRequestedWithMiddleware(next http.Handler) http.HandlerFunc {
	return func(w http.ResponseWriter, r *http.Request) {
		l := log.Scoped("gitserver", "headerXRequestedWithMiddleware")

		// Do not apply the middleware to /ping and /git endpoints.
		//
		// 1. /ping is used by health check services who most likely don't set this header
		// at all.
		//
		// 2. /git may be used to run "git fetch" from another gitserver instance over
		// HTTP and the fetchCommand does not set this header yet.
		if strings.HasPrefix(r.URL.Path, "/ping") || strings.HasPrefix(r.URL.Path, "/git") {
			next.ServeHTTP(w, r)
			return
		}

		if value := r.Header.Get("X-Requested-With"); value != "Sourcegraph" {
			l.Error("header X-Requested-With is not set or is invalid", log.String("path", r.URL.Path))
			http.Error(w, "header X-Requested-With is not set or is invalid", http.StatusBadRequest)
			return
		}

		next.ServeHTTP(w, r)
	}
}

// recordCommandsOnRepos returns a ShouldRecordFunc which determines whether the given command should be recorded
// for a particular repository.
func recordCommandsOnRepos(repos []string) wrexec.ShouldRecordFunc {
	// empty repos, means we should never record since there is nothing to match on
	if len(repos) == 0 {
		return func(ctx context.Context, c *exec.Cmd) bool {
			return false
		}
	}

	// we won't record any git commands with these commands since they are considered to be not destructive
	ignoredGitCommands := map[string]struct{}{
		"show":      {},
		"rev-parse": {},
		"log":       {},
		"diff":      {},
		"ls-tree":   {},
	}
	return func(ctx context.Context, cmd *exec.Cmd) bool {
		base := filepath.Base(cmd.Path)
		if base != "git" {
			return false
		}

		repoMatch := false
		for _, repo := range repos {
			if strings.Contains(cmd.Dir, repo) {
				repoMatch = true
				break
			}
		}

		// If the repo doesn't match, no use in checking if it is a command we should record.
		if !repoMatch {
			return false
		}
		// we have to scan the Args, since it isn't guaranteed that the Arg at index 1 is the git command:
		// git -c "protocol.version=2" remote show
		for _, arg := range cmd.Args {
			if _, ok := ignoredGitCommands[arg]; ok {
				return false
			}
		}
		return true
	}

}

// Handler returns the http.Handler that should be used to serve requests.
func (s *Server) Handler() http.Handler {
	s.ctx, s.cancel = context.WithCancel(context.Background())
	s.locker = &RepositoryLocker{}
	s.repoUpdateLocks = make(map[api.RepoName]*locks)

	s.recordingCommandFactory = wrexec.NewRecordingCommandFactory(nil, 0)
	conf.Watch(func() {
		// We update the factory with a predicate func. Each subsequent recordable command will use this predicate
		// to determine whether a command should be recorded or not.
		recordingConf := conf.Get().SiteConfig().GitRecorder
		if recordingConf == nil {
			return
		}
		s.recordingCommandFactory.Update(recordCommandsOnRepos(recordingConf.Repos), recordingConf.Size)
	})

	// GitMaxConcurrentClones controls the maximum number of clones that
	// can happen at once on a single gitserver.
	// Used to prevent throttle limits from a code host. Defaults to 5.
	//
	// The new repo-updater scheduler enforces the rate limit across all gitserver,
	// so ideally this logic could be removed here; however, ensureRevision can also
	// cause an update to happen and it is called on every exec command.
	maxConcurrentClones := conf.GitMaxConcurrentClones()
	s.cloneLimiter = mutablelimiter.New(maxConcurrentClones)
	s.cloneableLimiter = mutablelimiter.New(maxConcurrentClones)

	conf.Watch(func() {
		limit := conf.GitMaxConcurrentClones()
		s.cloneLimiter.SetLimit(limit)
		s.cloneableLimiter.SetLimit(limit)
	})

	s.rpsLimiter = ratelimit.NewInstrumentedLimiter("RpsLimiter", rate.NewLimiter(rate.Inf, 10))
	setRPSLimiter := func() {
		if maxRequestsPerSecond := conf.GitMaxCodehostRequestsPerSecond(); maxRequestsPerSecond == -1 {
			// As a special case, -1 means no limiting
			s.rpsLimiter.SetLimit(rate.Inf)
			s.rpsLimiter.SetBurst(10)
		} else if maxRequestsPerSecond == 0 {
			// A limiter with zero limit but a non-zero burst is not rejecting all events
			// because the bucket is initially full with N tokens and refilled N tokens
			// every second, where N is the burst size. See
			// https://github.com/golang/go/issues/18763 for details.
			s.rpsLimiter.SetLimit(0)
			s.rpsLimiter.SetBurst(0)
		} else {
			s.rpsLimiter.SetLimit(rate.Limit(maxRequestsPerSecond))
			s.rpsLimiter.SetBurst(10)
		}
	}
	conf.Watch(func() {
		setRPSLimiter()
	})

	mux := http.NewServeMux()
	mux.HandleFunc("/archive", trace.WithRouteName("archive", accesslog.HTTPMiddleware(
		s.Logger.Scoped("archive.accesslog", "archive endpoint access log"),
		conf.DefaultClient(),
		s.handleArchive,
	)))
	mux.HandleFunc("/exec", trace.WithRouteName("exec", accesslog.HTTPMiddleware(
		s.Logger.Scoped("exec.accesslog", "exec endpoint access log"),
		conf.DefaultClient(),
		s.handleExec,
	)))
	mux.HandleFunc("/search", trace.WithRouteName("search", s.handleSearch))
	mux.HandleFunc("/batch-log", trace.WithRouteName("batch-log", s.handleBatchLog))
	mux.HandleFunc("/p4-exec", trace.WithRouteName("p4-exec", accesslog.HTTPMiddleware(
		s.Logger.Scoped("p4-exec.accesslog", "p4-exec endpoint access log"),
		conf.DefaultClient(),
		s.handleP4Exec,
	)))
	mux.HandleFunc("/list-gitolite", trace.WithRouteName("list-gitolite", s.handleListGitolite))
	mux.HandleFunc("/is-repo-cloneable", trace.WithRouteName("is-repo-cloneable", s.handleIsRepoCloneable))
	mux.HandleFunc("/repos-stats", trace.WithRouteName("repos-stats", s.handleReposStats))
	mux.HandleFunc("/repo-clone-progress", trace.WithRouteName("repo-clone-progress", s.handleRepoCloneProgress))
	mux.HandleFunc("/delete", trace.WithRouteName("delete", s.handleRepoDelete))
	mux.HandleFunc("/repo-update", trace.WithRouteName("repo-update", s.handleRepoUpdate))
	mux.HandleFunc("/repo-clone", trace.WithRouteName("repo-clone", s.handleRepoClone))
	mux.HandleFunc("/create-commit-from-patch-binary", trace.WithRouteName("create-commit-from-patch-binary", s.handleCreateCommitFromPatchBinary))
	mux.HandleFunc("/create-commit-from-patch", trace.WithRouteName("create-commit-from-patch", s.handleCreateCommitFromPatch))
	mux.HandleFunc("/ping", trace.WithRouteName("ping", func(w http.ResponseWriter, _ *http.Request) {
		w.WriteHeader(http.StatusOK)
	}))

	// This endpoint allows us to expose gitserver itself as a "git service"
	// (ETOOMANYGITS!) that allows other services to run commands like "git fetch"
	// directly against a gitserver replica and treat it as a git remote.
	//
	// Example use case for this is a repo migration from one replica to another during
	// scaling events and the new destination gitserver replica can directly clone from
	// the gitserver replica which hosts the repository currently.
	mux.HandleFunc("/git/", trace.WithRouteName("git", accesslog.HTTPMiddleware(
		s.Logger.Scoped("git.accesslog", "git endpoint access log"),
		conf.DefaultClient(),
		func(rw http.ResponseWriter, r *http.Request) {
			http.StripPrefix("/git", s.gitServiceHandler()).ServeHTTP(rw, r)
		},
	)))

	// Migration to hexagonal architecture starting here:

	gitAdapter := &adapters.Git{
		ReposDir: s.ReposDir,
	}
	getObjectService := gitdomain.GetObjectService{
		RevParse:      gitAdapter.RevParse,
		GetObjectType: gitAdapter.GetObjectType,
	}
	getObjectFunc := gitdomain.GetObjectFunc(func(ctx context.Context, repo api.RepoName, objectName string) (*gitdomain.GitObject, error) {
		// Tracing is server concern, so add it here. Once generics lands we should be
		// able to create some simple wrappers
		span, ctx := ot.StartSpanFromContext(ctx, "Git: GetObject") //nolint:staticcheck // OT is deprecated
		span.SetTag("objectName", objectName)
		defer span.Finish()
		return getObjectService.GetObject(ctx, repo, objectName)
	})

	mux.HandleFunc("/commands/get-object", trace.WithRouteName("commands/get-object",
		accesslog.HTTPMiddleware(
			s.Logger.Scoped("commands/get-object.accesslog", "commands/get-object endpoint access log"),
			conf.DefaultClient(),
			handleGetObject(s.Logger.Scoped("commands/get-object", "handles get object"), getObjectFunc),
		)))

	// 🚨 SECURITY: This must be wrapped in headerXRequestedWithMiddleware.
	return headerXRequestedWithMiddleware(mux)
}

// Janitor does clean up tasks over s.ReposDir and is expected to run in a
// background goroutine.
func (s *Server) Janitor(ctx context.Context, interval time.Duration) {
	for {
		gitserverAddrs := currentGitserverAddresses()
		s.cleanupRepos(actor.WithInternalActor(ctx), gitserverAddrs)
		time.Sleep(interval)
	}
}

// SyncRepoState syncs state on disk to the database for all repos and is
// expected to run in a background goroutine. We perform a full sync if the known
// gitserver addresses has changed since the last run. Otherwise, we only sync
// repos that have not yet been assigned a shard.
func (s *Server) SyncRepoState(interval time.Duration, batchSize, perSecond int) {
	var previousAddrs string
	var previousPinned string
	for {
		gitServerAddrs := currentGitserverAddresses()
		addrs := gitServerAddrs.Addresses
		// We turn addrs into a string here for easy comparison and storage of previous
		// addresses since we'd need to take a copy of the slice anyway.
		currentAddrs := strings.Join(addrs, ",")
		fullSync := currentAddrs != previousAddrs
		previousAddrs = currentAddrs

		// We turn PinnedServers into a string here for easy comparison and storage
		// of previous pins.
		pinnedServerPairs := make([]string, 0, len(gitServerAddrs.PinnedServers))
		for k, v := range gitServerAddrs.PinnedServers {
			pinnedServerPairs = append(pinnedServerPairs, fmt.Sprintf("%s=%s", k, v))
		}
		sort.Strings(pinnedServerPairs)
		currentPinned := strings.Join(pinnedServerPairs, ",")
		fullSync = fullSync || currentPinned != previousPinned
		previousPinned = currentPinned

		if err := s.syncRepoState(gitServerAddrs, batchSize, perSecond, fullSync); err != nil {
			s.Logger.Error("Syncing repo state", log.Error(err))
		}

		time.Sleep(interval)
	}
}

func (s *Server) addrForRepo(ctx context.Context, repoName api.RepoName, gitServerAddrs gitserver.GitServerAddresses) (string, error) {
	return gitserver.AddrForRepo(ctx, filepath.Base(os.Args[0]), repoName, gitServerAddrs)
}

func currentGitserverAddresses() gitserver.GitServerAddresses {
	cfg := conf.Get()
	gitServerAddrs := gitserver.GitServerAddresses{
		Addresses: cfg.ServiceConnectionConfig.GitServers,
	}
	if cfg.ExperimentalFeatures != nil {
		gitServerAddrs.PinnedServers = cfg.ExperimentalFeatures.GitServerPinnedRepos
	}

	return gitServerAddrs
}

// StartClonePipeline clones repos asynchronously. It creates a producer-consumer
// pipeline.
func (s *Server) StartClonePipeline(ctx context.Context) {
	jobs := make(chan *cloneJob)

	go s.cloneJobConsumer(ctx, jobs)
	go s.cloneJobProducer(ctx, jobs)
}

func (s *Server) cloneJobProducer(ctx context.Context, jobs chan<- *cloneJob) {
	defer close(jobs)

	for {
		// Acquire the cond mutex lock and wait for a signal if the queue is empty.
		s.CloneQueue.cmu.Lock()
		if s.CloneQueue.empty() {
			s.CloneQueue.cond.Wait()
		}

		// The queue is not empty and we have a job to process! But don't forget to unlock the cond
		// mutex here as we don't need to hold the lock beyond this point for now.
		s.CloneQueue.cmu.Unlock()

		// Keep popping from the queue until the queue is empty again, in which case we start all
		// over again from the top.
		for {
			job := s.CloneQueue.pop()
			if job == nil {
				break
			}

			select {
			case jobs <- job:
			case <-ctx.Done():
				s.Logger.Error("cloneJobProducer: ", log.Error(ctx.Err()))
				return
			}
		}
	}
}

func (s *Server) cloneJobConsumer(ctx context.Context, jobs <-chan *cloneJob) {
	logger := s.Logger.Scoped("cloneJobConsumer", "process clone jobs")

	for j := range jobs {
		logger := logger.With(log.String("job.repo", string(j.repo)))

		select {
		case <-ctx.Done():
			logger.Error("context done", log.Error(ctx.Err()))
			return
		default:
		}

		ctx, cancel, err := s.acquireCloneLimiter(ctx)
		if err != nil {
			logger.Error("acquireCloneLimiter", log.Error(err))
			continue
		}

		go func(job *cloneJob) {
			defer cancel()

			err := s.doClone(ctx, job.repo, job.dir, job.syncer, job.lock, job.remoteURL, job.options)
			if err != nil {
				logger.Error("failed to clone repo", log.Error(err))
			}
			// Use a different context in case we failed because the original context failed.
			s.setLastErrorNonFatal(s.ctx, job.repo, err)
		}(j)
	}
}

// hostnameMatch checks whether the hostname matches the given address.
// If we don't find an exact match, we look at the initial prefix.
func (s *Server) hostnameMatch(addr string) bool {
	if !strings.HasPrefix(addr, s.Hostname) {
		return false
	}
	if addr == s.Hostname {
		return true
	}
	// We know that s.Hostname is shorter than addr so we can safely check the next
	// char
	next := addr[len(s.Hostname)]
	return next == '.' || next == ':'
}

var (
	repoSyncStateCounter = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "src_repo_sync_state_counter",
		Help: "Incremented each time we check the state of repo",
	}, []string{"type"})
	repoStateUpsertCounter = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "src_repo_sync_state_upsert_counter",
		Help: "Incremented each time we upsert repo state in the database",
	}, []string{"success"})
	wrongShardReposTotal = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "src_gitserver_repo_wrong_shard",
		Help: "The number of repos that are on disk on the wrong shard",
	})
	wrongShardReposSizeTotalBytes = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "src_gitserver_repo_wrong_shard_bytes",
		Help: "Size (in bytes) of repos that are on disk on the wrong shard",
	})
	wrongShardReposDeletedCounter = promauto.NewCounter(prometheus.CounterOpts{
		Name: "src_gitserver_repo_wrong_shard_deleted",
		Help: "The number of repos on the wrong shard that we deleted",
	})
)

func (s *Server) syncRepoState(gitServerAddrs gitserver.GitServerAddresses, batchSize, perSecond int, fullSync bool) error {
	s.Logger.Debug("starting syncRepoState", log.Bool("fullSync", fullSync))
	addrs := gitServerAddrs.Addresses

	// When fullSync is true we'll scan all repos in the database and ensure we set
	// their clone state and assign any that belong to this shard with the correct
	// shard_id.
	//
	// When fullSync is false, we assume that we only need to check repos that have
	// not yet had their shard_id allocated.

	// Sanity check our host exists in addrs before starting any work
	var found bool
	for _, a := range addrs {
		if s.hostnameMatch(a) {
			found = true
			break
		}
	}
	if !found {
		return errors.Errorf("gitserver hostname, %q, not found in list", s.Hostname)
	}

	ctx := s.ctx
	store := s.DB.GitserverRepos()

	// The rate limit should be enforced across all instances
	perSecond = perSecond / len(addrs)
	if perSecond < 0 {
		perSecond = 1
	}
	limiter := ratelimit.NewInstrumentedLimiter("SyncRepoState", rate.NewLimiter(rate.Limit(perSecond), perSecond))

	// The rate limiter doesn't allow writes that are larger than the burst size
	// which we've set to perSecond.
	if batchSize > perSecond {
		batchSize = perSecond
	}

	batch := make([]*types.GitserverRepo, 0)

	writeBatch := func() {
		if len(batch) == 0 {
			return
		}
		// We always clear the batch
		defer func() {
			batch = batch[0:0]
		}()
		err := limiter.WaitN(ctx, len(batch))
		if err != nil {
			s.Logger.Error("Waiting for rate limiter", log.Error(err))
			return
		}

		if err := store.Update(ctx, batch...); err != nil {
			repoStateUpsertCounter.WithLabelValues("false").Add(float64(len(batch)))
			s.Logger.Error("Updating GitserverRepos", log.Error(err))
			return
		}
		repoStateUpsertCounter.WithLabelValues("true").Add(float64(len(batch)))
	}

	// Make sure we fetch at least a good chunk of records, assuming that most
	// would not need an update anyways. Don't fetch too many though to keep the
	// DB load at a reasonable level and constrain memory usage.
	iteratePageSize := batchSize * 2
	if iteratePageSize < 500 {
		iteratePageSize = 500
	}

	options := database.IterateRepoGitserverStatusOptions{
		// We also want to include deleted repos as they may still be cloned on disk
		IncludeDeleted: true,
		BatchSize:      iteratePageSize,
	}
	if !fullSync {
		options.OnlyWithoutShard = true
	}
	for {
		repos, nextRepo, err := store.IterateRepoGitserverStatus(ctx, options)
		if err != nil {
			return err
		}
		for _, repo := range repos {
			repoSyncStateCounter.WithLabelValues("check").Inc()

			// We may have a deleted repo, we need to extract the original name both to
			// ensure that the shard check is correct and also so that we can find the
			// directory.
			repo.Name = api.UndeletedRepoName(repo.Name)

			// Ensure we're only dealing with repos we are responsible for.
			addr, err := s.addrForRepo(ctx, repo.Name, gitServerAddrs)
			if err != nil {
				return err
			}
			if !s.hostnameMatch(addr) {
				repoSyncStateCounter.WithLabelValues("other_shard").Inc()
				continue
			}
			repoSyncStateCounter.WithLabelValues("this_shard").Inc()

			dir := s.dir(repo.Name)
			cloned := repoCloned(dir)
			_, cloning := s.locker.Status(dir)

			var shouldUpdate bool
			if repo.ShardID != s.Hostname {
				repo.ShardID = s.Hostname
				shouldUpdate = true
			}
			cloneStatus := cloneStatus(cloned, cloning)
			if repo.CloneStatus != cloneStatus {
				repo.CloneStatus = cloneStatus
				// Since the repo has been recloned or is being cloned
				// we can reset the corruption
				repo.CorruptedAt = time.Time{}
				shouldUpdate = true
			}

			if !shouldUpdate {
				continue
			}

			batch = append(batch, repo.GitserverRepo)

			if len(batch) >= batchSize {
				writeBatch()
			}
		}

		if nextRepo == 0 {
			break
		}

		options.NextCursor = nextRepo
	}

	// Attempt final write
	writeBatch()

	return nil
}

// Stop cancels the running background jobs and returns when done.
func (s *Server) Stop() {
	// idempotent so we can just always set and cancel
	s.cancel()
	s.cancelMu.Lock()
	s.canceled = true
	s.cancelMu.Unlock()
	s.wg.Wait()
}

// serverContext returns a child context tied to the lifecycle of server.
func (s *Server) serverContext() (context.Context, context.CancelFunc) {
	// if we are already canceled don't increment our WaitGroup. This is to
	// prevent a loop somewhere preventing us from ever finishing the
	// WaitGroup, even though all calls fails instantly due to the canceled
	// context.
	s.cancelMu.Lock()
	if s.canceled {
		s.cancelMu.Unlock()
		return s.ctx, func() {}
	}
	s.wg.Add(1)
	s.cancelMu.Unlock()

	ctx, cancel := context.WithCancel(s.ctx)

	// we need to track if we have called cancel, since we are only allowed to
	// call wg.Done() once, but CancelFuncs can be called any number of times.
	var canceled int32
	return ctx, func() {
		ok := atomic.CompareAndSwapInt32(&canceled, 0, 1)
		if ok {
			cancel()
			s.wg.Done()
		}
	}
}

func (s *Server) getRemoteURL(ctx context.Context, name api.RepoName) (*vcs.URL, error) {
	if s.GetRemoteURLFunc == nil {
		return nil, errors.New("gitserver GetRemoteURLFunc is unset")
	}

	remoteURL, err := s.GetRemoteURLFunc(ctx, name)
	if err != nil {
		return nil, errors.Wrap(err, "GetRemoteURLFunc")
	}

	return vcs.ParseURL(remoteURL)
}

// acquireCloneLimiter() acquires a cancellable context associated with the
// clone limiter.
func (s *Server) acquireCloneLimiter(ctx context.Context) (context.Context, context.CancelFunc, error) {
	pendingClones.Inc()
	defer pendingClones.Dec()
	return s.cloneLimiter.Acquire(ctx)
}

func (s *Server) acquireCloneableLimiter(ctx context.Context) (context.Context, context.CancelFunc, error) {
	lsRemoteQueue.Inc()
	defer lsRemoteQueue.Dec()
	return s.cloneableLimiter.Acquire(ctx)
}

// tempDir is a wrapper around os.MkdirTemp, but using the server's
// temporary directory filepath.Join(s.ReposDir, tempDirName).
//
// This directory is cleaned up by gitserver and will be ignored by repository
// listing operations.
func (s *Server) tempDir(prefix string) (name string, err error) {
	dir := filepath.Join(s.ReposDir, tempDirName)

	// Create tmpdir directory if doesn't exist yet.
	if err := os.MkdirAll(dir, os.ModePerm); err != nil {
		return "", err
	}

	return os.MkdirTemp(dir, prefix)
}

func (s *Server) ignorePath(path string) bool {
	// We ignore any path which starts with .tmp or .p4home in ReposDir
	if filepath.Dir(path) != s.ReposDir {
		return false
	}
	base := filepath.Base(path)
	return strings.HasPrefix(base, tempDirName) || strings.HasPrefix(base, P4HomeName)
}

func (s *Server) handleIsRepoCloneable(w http.ResponseWriter, r *http.Request) {
	var req protocol.IsRepoCloneableRequest
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}

	if req.Repo == "" {
		http.Error(w, "no Repo given", http.StatusBadRequest)
		return
	}

	var syncer VCSSyncer
	// We use an internal actor here as the repo may be private. It is safe since all
	// we return is a bool indicating whether the repo is cloneable or not. Perhaps
	// the only things that could leak here is whether a private repo exists although
	// the endpoint is only available internally so it's low risk.
	remoteURL, err := s.getRemoteURL(actor.WithInternalActor(r.Context()), req.Repo)
	if err != nil {
		// We use this endpoint to verify if a repo exists without consuming
		// API rate limit, since many users visit private or bogus repos,
		// so we deduce the unauthenticated clone URL from the repo name.
		remoteURL, _ = vcs.ParseURL("https://" + string(req.Repo) + ".git")

		// At this point we are assuming it's a git repo
		syncer = &GitRepoSyncer{}
	} else {
		syncer, err = s.GetVCSSyncer(r.Context(), req.Repo)
		if err != nil {
			http.Error(w, err.Error(), http.StatusInternalServerError)
			return
		}
	}

	resp := protocol.IsRepoCloneableResponse{
		Cloned: repoCloned(s.dir(req.Repo)),
	}
	if err := syncer.IsCloneable(r.Context(), remoteURL); err == nil {
		resp.Cloneable = true
	} else {
		resp.Reason = err.Error()
	}

	if err := json.NewEncoder(w).Encode(resp); err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
}

// handleRepoUpdate is a synchronous (waits for update to complete or
// time out) method so it can yield errors. Updates are not
// unconditional; we debounce them based on the provided
// interval, to avoid spam.
func (s *Server) handleRepoUpdate(w http.ResponseWriter, r *http.Request) {
	logger := s.Logger.Scoped("handleRepoUpdate", "synchronous http handler for repo updates")
	var req protocol.RepoUpdateRequest
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}
	var resp protocol.RepoUpdateResponse
	req.Repo = protocol.NormalizeRepo(req.Repo)
	dir := s.dir(req.Repo)

	// despite the existence of a context on the request, we don't want to
	// cancel the git commands partway through if the request terminates.
	ctx, cancel1 := s.serverContext()
	defer cancel1()
	ctx, cancel2 := context.WithTimeout(ctx, conf.GitLongCommandTimeout())
	defer cancel2()
	if !repoCloned(dir) && !s.skipCloneForTests {
		// We do not need to check if req.CloneFromShard is non-zero here since that has no effect on
		// the code path at this point. Since the repo is already not cloned at this point, either
		// this request was received for a repo migration or a regular clone - for both of which we
		// want to go ahead and clone the repo. The responsibility of figuring out where to clone
		// the repo from (upstream URL of the external service or the gitserver instance) lies with
		// the implementation details of cloneRepo.
		_, err := s.cloneRepo(ctx, req.Repo, &cloneOptions{Block: true, CloneFromShard: req.CloneFromShard})
		if err != nil {
			logger.Warn("error cloning repo", log.String("repo", string(req.Repo)), log.Error(err))
			resp.Error = err.Error()
		}
	} else {
		var statusErr, updateErr error

		if debounce(req.Repo, req.Since) {
			updateErr = s.doRepoUpdate(ctx, req.Repo, "")
		}

		// attempts to acquire these values are not contingent on the success of
		// the update.
		lastFetched, err := repoLastFetched(dir)
		if err != nil {
			statusErr = err
		} else {
			resp.LastFetched = &lastFetched
		}
		lastChanged, err := repoLastChanged(dir)
		if err != nil {
			statusErr = err
		} else {
			resp.LastChanged = &lastChanged
		}
		if statusErr != nil {
			logger.Error("failed to get status of repo", log.String("repo", string(req.Repo)), log.Error(statusErr))
			// report this error in-band, but still produce a valid response with the
			// other information.
			resp.Error = statusErr.Error()
		}
		// If an error occurred during update, report it but don't actually make
		// it into an http error; we want the client to get the information cleanly.
		// An update error "wins" over a status error.
		if updateErr != nil {
			resp.Error = updateErr.Error()
		}
	}

	if err := json.NewEncoder(w).Encode(resp); err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
}

// handleRepoClone is an asynchronous (does not wait for update to complete or
// time out) call to clone a repository.
// Asynchronous errors will have to be checked in the gitserver_repos table under last_error.
func (s *Server) handleRepoClone(w http.ResponseWriter, r *http.Request) {
	logger := s.Logger.Scoped("handleRepoClone", "asynchronous http handler for repo clones")
	var req protocol.RepoCloneRequest
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}
	var resp protocol.RepoCloneResponse
	req.Repo = protocol.NormalizeRepo(req.Repo)

	_, err := s.cloneRepo(context.Background(), req.Repo, &cloneOptions{Block: false})
	if err != nil {
		logger.Warn("error cloning repo", log.String("repo", string(req.Repo)), log.Error(err))
		resp.Error = err.Error()
	}

	if err := json.NewEncoder(w).Encode(resp); err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
}

func (s *Server) handleArchive(w http.ResponseWriter, r *http.Request) {
	var (
		logger    = s.Logger.Scoped("handleArchive", "http handler for repo archive")
		q         = r.URL.Query()
		treeish   = q.Get("treeish")
		repo      = q.Get("repo")
		format    = q.Get("format")
		pathspecs = q["path"]
	)

	// Log which which actor is accessing the repo.
	accesslog.Record(r.Context(), repo,
		log.String("treeish", treeish),
		log.String("format", format),
		log.Strings("path", pathspecs),
	)

	if err := checkSpecArgSafety(treeish); err != nil {
		w.WriteHeader(http.StatusBadRequest)
		s.Logger.Error("gitserver.archive.CheckSpecArgSafety", log.Error(err))
		return
	}

	if repo == "" || format == "" {
		w.WriteHeader(http.StatusBadRequest)
		logger.Error("gitserver.archive", log.String("error", "empty repo or format"))
		return
	}

	req := &protocol.ExecRequest{
		Repo: api.RepoName(repo),
		Args: []string{
			"archive",

			// Suppresses fatal error when the repo contains paths matching **/.git/** and instead
			// includes those files (to allow archiving invalid such repos). This is unexpected
			// behavior; the --worktree-attributes flag should merely let us specify a gitattributes
			// file that contains `**/.git/** export-ignore`, but it actually makes everything work as
			// desired. Tested by the "repo with .git dir" test case.
			"--worktree-attributes",

			"--format=" + format,
		},
	}

	if format == string(gitserver.ArchiveFormatZip) {
		// Compression level of 0 (no compression) seems to perform the
		// best overall on fast network links, but this has not been tuned
		// thoroughly.
		req.Args = append(req.Args, "-0")
	}

	req.Args = append(req.Args, treeish, "--")
	req.Args = append(req.Args, pathspecs...)

	s.exec(w, r, req)
}

func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
	logger := s.Logger.Scoped("handleSearch", "http handler for search")
	tr, ctx := trace.New(r.Context(), "search", "")
	defer tr.Finish()

	// Decode the request
	protocol.RegisterGob()
	var args protocol.SearchRequest
	if err := gob.NewDecoder(r.Body).Decode(&args); err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}
	tr.SetAttributes(
		attribute.String("repo", string(args.Repo)),
		attribute.Bool("include_diff", args.IncludeDiff),
		attribute.String("query", args.Query.String()),
		attribute.Int("limit", args.Limit),
		attribute.Bool("include_modified_files", args.IncludeModifiedFiles),
	)

	searchStart := time.Now()
	searchRunning.Inc()
	defer searchRunning.Dec()

	observeLatency := syncx.OnceFunc(func() {
		searchLatency.Observe(time.Since(searchStart).Seconds())
	})

	eventWriter, err := streamhttp.NewWriter(w)
	if err != nil {
		tr.SetError(err)
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	matchesBuf := streamhttp.NewJSONArrayBuf(8*1024, func(data []byte) error {
		tr.AddEvent("flushing data", attribute.Int("data.len", len(data)))
		observeLatency()
		return eventWriter.EventBytes("matches", data)
	})

	// Run the search
	limitHit, searchErr := s.search(ctx, &args, matchesBuf)
	if writeErr := eventWriter.Event("done", protocol.NewSearchEventDone(limitHit, searchErr)); writeErr != nil {
		if !errors.Is(writeErr, syscall.EPIPE) {
			logger.Error("failed to send done event", log.Error(writeErr))
		}
	}
	tr.AddEvent("done", attribute.Bool("limit_hit", limitHit))
	tr.SetError(searchErr)
	searchDuration.
		WithLabelValues(strconv.FormatBool(searchErr != nil)).
		Observe(time.Since(searchStart).Seconds())

	if honey.Enabled() || traceLogs {
		act := actor.FromContext(ctx)
		ev := honey.NewEvent("gitserver-search")
		ev.SetSampleRate(honeySampleRate("", act))
		ev.AddField("repo", args.Repo)
		ev.AddField("revisions", args.Revisions)
		ev.AddField("include_diff", args.IncludeDiff)
		ev.AddField("include_modified_files", args.IncludeModifiedFiles)
		ev.AddField("actor", act.UIDString())
		ev.AddField("query", args.Query.String())
		ev.AddField("limit", args.Limit)
		ev.AddField("duration_ms", time.Since(searchStart).Milliseconds())
		if searchErr != nil {
			ev.AddField("error", searchErr.Error())
		}
		if traceID := trace.ID(ctx); traceID != "" {
			ev.AddField("traceID", traceID)
			ev.AddField("trace", trace.URL(traceID, conf.DefaultClient()))
		}
		if honey.Enabled() {
			_ = ev.Send()
		}
		if traceLogs {
			logger.Debug("TRACE gitserver search", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
		}
	}
}

// search handles the core logic of the search. It is passed a matchesBuf so it doesn't need to
// concern itself with event types, and all instrumentation is handled in the calling function.
func (s *Server) search(ctx context.Context, args *protocol.SearchRequest, matchesBuf *streamhttp.JSONArrayBuf) (limitHit bool, err error) {
	args.Repo = protocol.NormalizeRepo(args.Repo)
	if args.Limit == 0 {
		args.Limit = math.MaxInt32
	}

	dir := s.dir(args.Repo)
	if !repoCloned(dir) {
		if conf.Get().DisableAutoGitUpdates {
			s.Logger.Debug("not cloning on demand as DisableAutoGitUpdates is set")
			return false, &gitdomain.RepoNotExistError{
				Repo: args.Repo,
			}
		}

		cloneProgress, cloneInProgress := s.locker.Status(dir)
		if cloneInProgress {
			return false, &gitdomain.RepoNotExistError{
				Repo:            args.Repo,
				CloneInProgress: true,
				CloneProgress:   cloneProgress,
			}
		}

		cloneProgress, err := s.cloneRepo(ctx, args.Repo, nil)
		if err != nil {
			s.Logger.Debug("error starting repo clone", log.String("repo", string(args.Repo)), log.Error(err))
			return false, &gitdomain.RepoNotExistError{
				Repo:            args.Repo,
				CloneInProgress: false,
			}
		}

		return false, &gitdomain.RepoNotExistError{
			Repo:            args.Repo,
			CloneInProgress: true,
			CloneProgress:   cloneProgress,
		}
	}

	for _, rev := range args.Revisions {
		// TODO add result to trace
		if rev.RevSpec != "" {
			_ = s.ensureRevision(ctx, args.Repo, rev.RevSpec, dir)
		} else if rev.RefGlob != "" {
			_ = s.ensureRevision(ctx, args.Repo, rev.RefGlob, dir)
		}
	}

	g, ctx := errgroup.WithContext(ctx)
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()

	// Search all commits, sending matching commits down resultChan
	resultChan := make(chan *protocol.CommitMatch, 128)
	g.Go(func() error {
		defer close(resultChan)
		done := ctx.Done()

		mt, err := search.ToMatchTree(args.Query)
		if err != nil {
			return err
		}

		// Ensure that we populate ModifiedFiles when we have a DiffModifiesFile filter.
		// --name-status is not zero cost, so we don't do it on every search.
		hasDiffModifiesFile := false
		search.Visit(mt, func(mt search.MatchTree) {
			switch mt.(type) {
			case *search.DiffModifiesFile:
				hasDiffModifiesFile = true
			}
		})

		searcher := &search.CommitSearcher{
			Logger:               s.Logger,
			RepoName:             args.Repo,
			RepoDir:              dir.Path(),
			Revisions:            args.Revisions,
			Query:                mt,
			IncludeDiff:          args.IncludeDiff,
			IncludeModifiedFiles: args.IncludeModifiedFiles || hasDiffModifiesFile,
		}

		return searcher.Search(ctx, func(match *protocol.CommitMatch) {
			select {
			case <-done:
			case resultChan <- match:
			}
		})
	})

	// Write matching commits to the stream, flushing occasionally
	g.Go(func() error {
		defer cancel()
		defer matchesBuf.Flush()

		flushTicker := time.NewTicker(50 * time.Millisecond)
		defer flushTicker.Stop()

		sentCount := 0
		firstMatch := true
		for {
			select {
			case result, ok := <-resultChan:
				if !ok {
					return nil
				}

				if sentCount >= args.Limit {
					limitHit = true
					return nil
				}
				sentCount += matchCount(result)

				_ = matchesBuf.Append(result) // EOF only

				// Send immediately if this if the first result we've seen
				if firstMatch {
					_ = matchesBuf.Flush() // EOF only
					firstMatch = false
				}
			case <-flushTicker.C:
				_ = matchesBuf.Flush() // EOF only
			}
		}
	})

	return limitHit, g.Wait()
}

// matchCount returns either:
// 1) the number of diff matches if there are any
// 2) the number of messsage matches if there are any
// 3) one, to represent matching the commit, but nothing inside it
func matchCount(cm *protocol.CommitMatch) int {
	if len(cm.Diff.MatchedRanges) > 0 {
		return len(cm.Diff.MatchedRanges)
	}
	if len(cm.Message.MatchedRanges) > 0 {
		return len(cm.Message.MatchedRanges)
	}
	return 1
}

func (s *Server) handleBatchLog(w http.ResponseWriter, r *http.Request) {
	// 🚨 SECURITY: Only allow POST requests.
	if strings.ToUpper(r.Method) != http.MethodPost {
		http.Error(w, "", http.StatusMethodNotAllowed)
		return
	}

	operations := s.ensureOperations()

	// Run git log for a single repository.
	// Invoked multiple times from the handler defined below.
	performGitLogCommand := func(ctx context.Context, repoCommit api.RepoCommit, format string) (output string, isRepoCloned bool, err error) {
		ctx, _, endObservation := operations.batchLogSingle.With(ctx, &err, observation.Args{
			LogFields: append(
				[]otlog.Field{
					otlog.String("format", format),
				},
				repoCommit.LogFields()...,
			),
		})
		defer func() {
			endObservation(1, observation.Args{LogFields: []otlog.Field{
				otlog.Bool("isRepoCloned", isRepoCloned),
			}})
		}()

		dir := s.dir(repoCommit.Repo)
		if !repoCloned(dir) {
			return "", false, nil
		}

		var buf bytes.Buffer

		commitId := string(repoCommit.CommitID)
		// make sure CommitID is not an arg
		if commitId[0] == '-' {
			return "", true, errors.New("commit ID starting with - is not allowed")
		}

		cmd := s.recordingCommandFactory.Command(ctx, s.Logger, "git", "log", "-n", "1", "--name-only", format, commitId)
		dir.Set(cmd.Unwrap())
		cmd.Unwrap().Stdout = &buf

		if _, err := runCommand(ctx, cmd); err != nil {
			return "", true, err
		}

		return buf.String(), true, nil
	}

	// Handles the /batch-log route
	instrumentedHandler := func(ctx context.Context) (statusCodeOnError int, err error) {
		ctx, logger, endObservation := operations.batchLog.With(ctx, &err, observation.Args{})
		defer func() {
			endObservation(1, observation.Args{LogFields: []otlog.Field{
				otlog.Int("statusCodeOnError", statusCodeOnError),
			}})
		}()

		// Read request body
		var req protocol.BatchLogRequest
		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
			return http.StatusBadRequest, err
		}
		logger.AddEvent("read request.body", req.SpanAttributes()...)

		// Validate request parameters
		if len(req.RepoCommits) == 0 {
			// Early exit: implicitly writes 200 OK
			_ = json.NewEncoder(w).Encode(protocol.BatchLogResponse{Results: []protocol.BatchLogResult{}})
			return 0, nil
		}
		if !strings.HasPrefix(req.Format, "--format=") {
			return http.StatusUnprocessableEntity, errors.New("format parameter expected to be of the form `--format=<git log format>`")
		}

		// Perform requests in each repository in the input batch. We perform these commands
		// concurrently, but only allow for so many commands to be in-flight at a time so that
		// we don't overwhelm a shard with either a large request or too many concurrent batch
		// requests.

		g, ctx := errgroup.WithContext(ctx)
		results := make([]protocol.BatchLogResult, len(req.RepoCommits))

		if s.GlobalBatchLogSemaphore == nil {
			return http.StatusInternalServerError, errors.New("s.GlobalBatchLogSemaphore not initialized")
		}

		for i, repoCommit := range req.RepoCommits {
			// Avoid capture of loop variables
			i, repoCommit := i, repoCommit

			start := time.Now()
			if err := s.GlobalBatchLogSemaphore.Acquire(ctx, 1); err != nil {
				return http.StatusInternalServerError, err
			}
			s.operations.batchLogSemaphoreWait.Observe(time.Since(start).Seconds())

			g.Go(func() error {
				defer s.GlobalBatchLogSemaphore.Release(1)

				output, isRepoCloned, err := performGitLogCommand(ctx, repoCommit, req.Format)
				if err == nil && !isRepoCloned {
					err = errors.Newf("repo not found")
				}
				var errMessage string
				if err != nil {
					errMessage = err.Error()
				}

				// Concurrently write results to shared slice. This slice is already properly
				// sized, and each goroutine writes to a unique index exactly once. There should
				// be no data race conditions possible here.

				results[i] = protocol.BatchLogResult{
					RepoCommit:    repoCommit,
					CommandOutput: output,
					CommandError:  errMessage,
				}
				return nil
			})
		}

		if err := g.Wait(); err != nil {
			return http.StatusInternalServerError, err
		}

		// Write payload to client: implicitly writes 200 OK
		_ = json.NewEncoder(w).Encode(protocol.BatchLogResponse{Results: results})
		return 0, nil
	}

	// Handle unexpected error conditions. We expect the instrumented handler to not
	// have written the status code or any of the body if this error value is non-nil.
	if statusCodeOnError, err := instrumentedHandler(r.Context()); err != nil {
		http.Error(w, err.Error(), statusCodeOnError)
		return
	}
}

// ensureOperations returns the non-nil operations value supplied to this server
// via RegisterMetrics (when constructed as part of the gitserver binary), or
// constructs and memoizes a no-op operations value (for use in tests).
func (s *Server) ensureOperations() *operations {
	if s.operations == nil {
		s.operations = newOperations(s.ObservationCtx)
	}

	return s.operations
}

func (s *Server) handleExec(w http.ResponseWriter, r *http.Request) {
	// 🚨 SECURITY: Only allow POST requests.
	// See https://github.com/sourcegraph/security-issues/issues/213.
	if strings.ToUpper(r.Method) != http.MethodPost {
		http.Error(w, "", http.StatusMethodNotAllowed)
		return
	}

	var req protocol.ExecRequest
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}

	// Log which which actor is accessing the repo.
	args := req.Args
	cmd := ""
	if len(req.Args) > 0 {
		cmd = req.Args[0]
		args = args[1:]
	}
	accesslog.Record(r.Context(), string(req.Repo),
		log.String("cmd", cmd),
		log.Strings("args", args),
	)

	s.exec(w, r, &req)
}

var blockedCommandExecutedCounter = promauto.NewCounter(prometheus.CounterOpts{
	Name: "src_gitserver_exec_blocked_command_received",
	Help: "Incremented each time a command not in the allowlist for gitserver is executed",
})

func (s *Server) exec(w http.ResponseWriter, r *http.Request, req *protocol.ExecRequest) {
	logger := s.Logger.Scoped("exec", "").With(log.Strings("req.Args", req.Args))

	// Flush writes more aggressively than standard net/http so that clients
	// with a context deadline see as much partial response body as possible.
	if fw := newFlushingResponseWriter(logger, w); fw != nil {
		w = fw
		defer fw.Close()
	}

	// 🚨 SECURITY: Ensure that only commands in the allowed list are executed.
	// See https://github.com/sourcegraph/security-issues/issues/213.
	if !gitdomain.IsAllowedGitCmd(logger, req.Args) {
		blockedCommandExecutedCounter.Inc()
		logger.Warn("exec: bad command", log.String("RemoteAddr", r.RemoteAddr))

		w.WriteHeader(http.StatusBadRequest)
		_, _ = w.Write([]byte("invalid command"))
		return
	}

	ctx := r.Context()

	if !req.NoTimeout {
		var cancel context.CancelFunc
		ctx, cancel = context.WithTimeout(ctx, shortGitCommandTimeout(req.Args))
		defer cancel()
	}

	start := time.Now()
	var cmdStart time.Time // set once we have ensured commit
	exitStatus := -10810   // sentinel value to indicate not set
	var stdoutN, stderrN int64
	var status string
	var execErr error
	ensureRevisionStatus := "noop"

	req.Repo = protocol.NormalizeRepo(req.Repo)

	// Instrumentation
	{
		cmd := ""
		if len(req.Args) > 0 {
			cmd = req.Args[0]
		}
		args := strings.Join(req.Args, " ")

		var tr *trace.Trace
		tr, ctx = trace.New(ctx, "exec."+cmd, string(req.Repo))
		tr.SetAttributes(
			attribute.String("args", args),
			attribute.String("ensure_revision", req.EnsureRevision),
		)
		logger = logger.WithTrace(trace.Context(ctx))

		execRunning.WithLabelValues(cmd).Inc()
		defer func() {
			tr.AddEvent(
				"done",
				attribute.String("status", status),
				attribute.Int64("stdout", stdoutN),
				attribute.Int64("stderr", stderrN),
				attribute.String("ensure_revision_status", ensureRevisionStatus),
			)
			tr.SetError(execErr)
			tr.Finish()

			duration := time.Since(start)
			execRunning.WithLabelValues(cmd).Dec()
			execDuration.WithLabelValues(cmd, status).Observe(duration.Seconds())

			var cmdDuration time.Duration
			var fetchDuration time.Duration
			if !cmdStart.IsZero() {
				cmdDuration = time.Since(cmdStart)
				fetchDuration = cmdStart.Sub(start)
			}

			isSlow := cmdDuration > shortGitCommandSlow(req.Args)
			isSlowFetch := fetchDuration > 10*time.Second
			if honey.Enabled() || traceLogs || isSlow || isSlowFetch {
				act := actor.FromContext(ctx)
				ev := honey.NewEvent("gitserver-exec")
				ev.SetSampleRate(honeySampleRate(cmd, act))
				ev.AddField("repo", req.Repo)
				ev.AddField("cmd", cmd)
				ev.AddField("args", args)
				ev.AddField("actor", act.UIDString())
				ev.AddField("ensure_revision", req.EnsureRevision)
				ev.AddField("ensure_revision_status", ensureRevisionStatus)
				ev.AddField("client", r.UserAgent())
				ev.AddField("duration_ms", duration.Milliseconds())
				ev.AddField("stdin_size", len(req.Stdin))
				ev.AddField("stdout_size", stdoutN)
				ev.AddField("stderr_size", stderrN)
				ev.AddField("exit_status", exitStatus)
				ev.AddField("status", status)
				if execErr != nil {
					ev.AddField("error", execErr.Error())
				}
				if !cmdStart.IsZero() {
					ev.AddField("cmd_duration_ms", cmdDuration.Milliseconds())
					ev.AddField("fetch_duration_ms", fetchDuration.Milliseconds())
				}

				if traceID := trace.ID(ctx); traceID != "" {
					ev.AddField("traceID", traceID)
					ev.AddField("trace", trace.URL(traceID, conf.DefaultClient()))
				}

				if honey.Enabled() {
					_ = ev.Send()
				}

				if traceLogs {
					logger.Debug("TRACE gitserver exec", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
				}
				if isSlow {
					logger.Warn("Long exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
				}
				if isSlowFetch {
					logger.Warn("Slow fetch/clone for exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
				}
			}
		}()
	}

	if notFoundPayload, cloned := s.maybeStartClone(ctx, logger, req.Repo); !cloned {
		if notFoundPayload.CloneInProgress {
			status = "clone-in-progress"
		} else {
			status = "repo-not-found"
		}
		w.WriteHeader(http.StatusNotFound)
		_ = json.NewEncoder(w).Encode(notFoundPayload)
		return
	}

	dir := s.dir(req.Repo)
	if s.ensureRevision(ctx, req.Repo, req.EnsureRevision, dir) {
		ensureRevisionStatus = "fetched"
	}

	w.Header().Set("Content-Type", "application/octet-stream")
	w.Header().Set("Cache-Control", "no-cache")

	w.Header().Set("Trailer", "X-Exec-Error")
	w.Header().Add("Trailer", "X-Exec-Exit-Status")
	w.Header().Add("Trailer", "X-Exec-Stderr")
	w.WriteHeader(http.StatusOK)

	// Special-case `git rev-parse HEAD` requests. These are invoked by search queries for every repo in scope.
	// For searches over large repo sets (> 1k), this leads to too many child process execs, which can lead
	// to a persistent failure mode where every exec takes > 10s, which is disastrous for gitserver performance.
	if len(req.Args) == 2 && req.Args[0] == "rev-parse" && req.Args[1] == "HEAD" {
		if resolved, err := quickRevParseHead(dir); err == nil && isAbsoluteRevision(resolved) {
			_, _ = w.Write([]byte(resolved))
			w.Header().Set("X-Exec-Error", "")
			w.Header().Set("X-Exec-Exit-Status", "0")
			w.Header().Set("X-Exec-Stderr", "")
			return
		}
	}
	// Special-case `git symbolic-ref HEAD` requests. These are invoked by resolvers determining the default branch of a repo.
	// For searches over large repo sets (> 1k), this leads to too many child process execs, which can lead
	// to a persistent failure mode where every exec takes > 10s, which is disastrous for gitserver performance.
	if len(req.Args) == 2 && req.Args[0] == "symbolic-ref" && req.Args[1] == "HEAD" {
		if resolved, err := quickSymbolicRefHead(dir); err == nil {
			_, _ = w.Write([]byte(resolved))
			w.Header().Set("X-Exec-Error", "")
			w.Header().Set("X-Exec-Exit-Status", "0")
			w.Header().Set("X-Exec-Stderr", "")
			return
		}
	}

	var stderrBuf bytes.Buffer
	stdoutW := &writeCounter{w: w}
	stderrW := &writeCounter{w: &limitWriter{W: &stderrBuf, N: 1024}}

	cmdStart = time.Now()
	cmd := s.recordingCommandFactory.Command(ctx, s.Logger, "git", req.Args...)
	dir.Set(cmd.Unwrap())
	cmd.Unwrap().Stdout = stdoutW
	cmd.Unwrap().Stderr = stderrW
	cmd.Unwrap().Stdin = bytes.NewReader(req.Stdin)

	exitStatus, execErr = runCommand(ctx, cmd)

	status = strconv.Itoa(exitStatus)
	stdoutN = stdoutW.n
	stderrN = stderrW.n

	stderr := stderrBuf.String()
	s.logIfCorrupt(ctx, req.Repo, dir, stderr)

	// write trailer
	w.Header().Set("X-Exec-Error", errorString(execErr))
	w.Header().Set("X-Exec-Exit-Status", status)
	w.Header().Set("X-Exec-Stderr", stderr)
}

func (s *Server) handleP4Exec(w http.ResponseWriter, r *http.Request) {
	var req protocol.P4ExecRequest
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}

	if len(req.Args) < 1 {
		http.Error(w, "args must be greater than or equal to 1", http.StatusBadRequest)
		return
	}

	// Make sure the subcommand is explicitly allowed
	allowlist := []string{"protects", "groups", "users", "group"}
	allowed := false
	for _, arg := range allowlist {
		if req.Args[0] == arg {
			allowed = true
			break
		}
	}
	if !allowed {
		http.Error(w, fmt.Sprintf("subcommand %q is not allowed", req.Args[0]), http.StatusBadRequest)
		return
	}

	// Log which actor is accessing p4-exec.
	//
	// p4-exec is currently only used for fetching user based permissions information
	// so, we don't have a repo name.
	accesslog.Record(r.Context(), "<no-repo>",
		log.String("p4user", req.P4User),
		log.String("p4port", req.P4Port),
		log.Strings("args", req.Args),
	)

	// Make sure credentials are valid before heavier operation
	err := p4pingWithTrust(r.Context(), req.P4Port, req.P4User, req.P4Passwd)
	if err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}

	s.p4exec(w, r, &req)
}

func (s *Server) p4exec(w http.ResponseWriter, r *http.Request, req *protocol.P4ExecRequest) {
	logger := s.Logger.Scoped("p4exec", "")

	// Flush writes more aggressively than standard net/http so that clients
	// with a context deadline see as much partial response body as possible.
	if fw := newFlushingResponseWriter(logger, w); fw != nil {
		w = fw
		defer fw.Close()
	}

	ctx, cancel := context.WithTimeout(r.Context(), time.Minute)
	defer cancel()

	start := time.Now()
	var cmdStart time.Time // set once we have ensured commit
	exitStatus := -10810   // sentinel value to indicate not set
	var stdoutN, stderrN int64
	var status string
	var execErr error

	// Instrumentation
	{
		cmd := ""
		if len(req.Args) > 0 {
			cmd = req.Args[0]
		}
		args := strings.Join(req.Args, " ")

		var tr *trace.Trace
		tr, ctx = trace.New(ctx, "p4exec."+cmd, req.P4Port)
		tr.SetAttributes(attribute.String("args", args))
		logger = logger.WithTrace(trace.Context(ctx))

		execRunning.WithLabelValues(cmd).Inc()
		defer func() {
			tr.AddEvent("done",
				attribute.String("status", status),
				attribute.Int64("stdout", stdoutN),
				attribute.Int64("stderr", stderrN),
			)
			tr.SetError(execErr)
			tr.Finish()

			duration := time.Since(start)
			execRunning.WithLabelValues(cmd).Dec()
			execDuration.WithLabelValues(cmd, status).Observe(duration.Seconds())

			var cmdDuration time.Duration
			if !cmdStart.IsZero() {
				cmdDuration = time.Since(cmdStart)
			}

			isSlow := cmdDuration > 30*time.Second
			if honey.Enabled() || traceLogs || isSlow {
				act := actor.FromContext(ctx)
				ev := honey.NewEvent("gitserver-p4exec")
				ev.SetSampleRate(honeySampleRate(cmd, act))
				ev.AddField("p4port", req.P4Port)
				ev.AddField("cmd", cmd)
				ev.AddField("args", args)
				ev.AddField("actor", act.UIDString())
				ev.AddField("client", r.UserAgent())
				ev.AddField("duration_ms", duration.Milliseconds())
				ev.AddField("stdout_size", stdoutN)
				ev.AddField("stderr_size", stderrN)
				ev.AddField("exit_status", exitStatus)
				ev.AddField("status", status)
				if execErr != nil {
					ev.AddField("error", execErr.Error())
				}
				if !cmdStart.IsZero() {
					ev.AddField("cmd_duration_ms", cmdDuration.Milliseconds())
				}

				if traceID := trace.ID(ctx); traceID != "" {
					ev.AddField("traceID", traceID)
					ev.AddField("trace", trace.URL(traceID, conf.DefaultClient()))
				}

				_ = ev.Send()

				if traceLogs {
					logger.Debug("TRACE gitserver p4exec", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
				}
				if isSlow {
					logger.Warn("Long p4exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
				}
			}
		}()
	}

	w.Header().Set("Trailer", "X-Exec-Error")
	w.Header().Add("Trailer", "X-Exec-Exit-Status")
	w.Header().Add("Trailer", "X-Exec-Stderr")
	w.WriteHeader(http.StatusOK)

	var stderrBuf bytes.Buffer
	stdoutW := &writeCounter{w: w}
	stderrW := &writeCounter{w: &limitWriter{W: &stderrBuf, N: 1024}}

	cmdStart = time.Now()
	cmd := exec.CommandContext(ctx, "p4", req.Args...)
	cmd.Env = append(os.Environ(),
		"P4PORT="+req.P4Port,
		"P4USER="+req.P4User,
		"P4PASSWD="+req.P4Passwd,
	)
	cmd.Stdout = stdoutW
	cmd.Stderr = stderrW

	exitStatus, execErr = runCommand(ctx, s.recordingCommandFactory.Wrap(ctx, s.Logger, cmd))

	status = strconv.Itoa(exitStatus)
	stdoutN = stdoutW.n
	stderrN = stderrW.n

	stderr := stderrBuf.String()

	// write trailer
	w.Header().Set("X-Exec-Error", errorString(execErr))
	w.Header().Set("X-Exec-Exit-Status", status)
	w.Header().Set("X-Exec-Stderr", stderr)
}

func (s *Server) setLastFetched(ctx context.Context, name api.RepoName) error {
	dir := s.dir(name)

	lastFetched, err := repoLastFetched(dir)
	if err != nil {
		return errors.Wrapf(err, "failed to get last fetched for %s", name)
	}

	lastChanged, err := repoLastChanged(dir)
	if err != nil {
		return errors.Wrapf(err, "failed to get last changed for %s", name)
	}

	return s.DB.GitserverRepos().SetLastFetched(ctx, name, database.GitserverFetchData{
		LastFetched: lastFetched,
		LastChanged: lastChanged,
		ShardID:     s.Hostname,
	})
}

// setLastErrorNonFatal will set the last_error column for the repo in the gitserver table.
func (s *Server) setLastErrorNonFatal(ctx context.Context, name api.RepoName, err error) {
	var errString string
	if err != nil {
		errString = err.Error()
	}

	if err := s.DB.GitserverRepos().SetLastError(ctx, name, errString, s.Hostname); err != nil {
		s.Logger.Warn("Setting last error in DB", log.Error(err))
	}
}

func (s *Server) setCloneStatus(ctx context.Context, name api.RepoName, status types.CloneStatus) (err error) {
	return s.DB.GitserverRepos().SetCloneStatus(ctx, name, status, s.Hostname)
}

// setCloneStatusNonFatal is the same as setCloneStatus but only logs errors
func (s *Server) setCloneStatusNonFatal(ctx context.Context, name api.RepoName, status types.CloneStatus) {
	if err := s.setCloneStatus(ctx, name, status); err != nil {
		s.Logger.Warn("Setting clone status in DB", log.Error(err))
	}
}

// setRepoSize calculates the size of the repo and stores it in the database.
func (s *Server) setRepoSize(ctx context.Context, name api.RepoName) error {
	return s.DB.GitserverRepos().SetRepoSize(ctx, name, dirSize(s.dir(name).Path(".")), s.Hostname)
}

func (s *Server) logIfCorrupt(ctx context.Context, repo api.RepoName, dir GitDir, stderr string) {
	if checkMaybeCorruptRepo(s.Logger, repo, dir, stderr) {
		reason := stderr
		if err := s.DB.GitserverRepos().LogCorruption(ctx, repo, reason, s.Hostname); err != nil {
			s.Logger.Warn("failed to log repo corruption", log.String("repo", string(repo)), log.Error(err))
		}
	}
}

// setGitAttributes writes our global gitattributes to
// gitDir/info/attributes. This will override .gitattributes inside of
// repositories. It is used to unset attributes such as export-ignore.
func setGitAttributes(dir GitDir) error {
	infoDir := dir.Path("info")
	if err := os.Mkdir(infoDir, os.ModePerm); err != nil && !os.IsExist(err) {
		return errors.Wrap(err, "failed to set git attributes")
	}

	_, err := fileutil.UpdateFileIfDifferent(
		filepath.Join(infoDir, "attributes"),
		[]byte(`# Managed by Sourcegraph gitserver.

# We want every file to be present in git archive.
* -export-ignore
`))
	if err != nil {
		return errors.Wrap(err, "failed to set git attributes")
	}
	return nil
}

// testRepoCorrupter is used by tests to disrupt a cloned repository (e.g. deleting
// HEAD, zeroing it out, etc.)
var testRepoCorrupter func(ctx context.Context, tmpDir GitDir)

// cloneOptions specify optional behaviour for the cloneRepo function.
type cloneOptions struct {
	// Block will wait for the clone to finish before returning. If the clone
	// fails, the error will be returned. The passed in context is
	// respected. When not blocking the clone is done with a server background
	// context.
	Block bool

	// Overwrite will overwrite the existing clone.
	Overwrite bool

	// CloneFromShard is the hostname of the gitserver instance which is the current owner of the
	// repository. If this is a non-zero string, then gitserver will attempt to clone the repo from
	// that gitserver instance instead of the upstream repo URL of the external service.
	CloneFromShard string
}

// cloneRepo performs a clone operation for the given repository. It is
// non-blocking by default.
func (s *Server) cloneRepo(ctx context.Context, repo api.RepoName, opts *cloneOptions) (cloneProgress string, err error) {
	if isAlwaysCloningTest(repo) {
		return "This will never finish cloning", nil
	}

	// We always want to store whether there was an error cloning the repo
	defer func() {
		// Use a different context in case we failed because the original context failed.
		s.setLastErrorNonFatal(s.ctx, repo, err)
	}()

	dir := s.dir(repo)

	// PERF: Before doing the network request to check if isCloneable, lets
	// ensure we are not already cloning.
	if progress, cloneInProgress := s.locker.Status(dir); cloneInProgress {
		return progress, nil
	}

	syncer, err := s.GetVCSSyncer(ctx, repo)
	if err != nil {
		return "", errors.Wrap(err, "get VCS syncer")
	}

	var remoteURL *vcs.URL
	if opts != nil && opts.CloneFromShard != "" {
		// are we cloning from the same gitserver instance?
		if s.hostnameMatch(strings.TrimPrefix(opts.CloneFromShard, "http://")) {
			return "", errors.Errorf("cannot clone from the same gitserver instance")
		}

		remoteURL, err = vcs.ParseURL(opts.CloneFromShard)
		if err != nil {
			return "", err
		}
		remoteURL = remoteURL.JoinPath("git", string(repo))
	} else {
		// We may be attempting to clone a private repo so we need an internal actor.
		remoteURL, err = s.getRemoteURL(actor.WithInternalActor(ctx), repo)
		if err != nil {
			return "", err
		}
	}

	// isCloneable causes a network request, so we limit the number that can
	// run at one time. We use a separate semaphore to cloning since these
	// checks being blocked by a few slow clones will lead to poor feedback to
	// users. We can defer since the rest of the function does not block this
	// goroutine.
	ctx, cancel, err := s.acquireCloneableLimiter(ctx)
	if err != nil {
		return "", err // err will be a context error
	}
	defer cancel()

	if err = s.rpsLimiter.Wait(ctx); err != nil {
		return "", err
	}

	if err := syncer.IsCloneable(ctx, remoteURL); err != nil {
		redactedErr := newURLRedactor(remoteURL).redact(err.Error())
		return "", errors.Errorf("error cloning repo: repo %s not cloneable: %s", repo, redactedErr)
	}

	// Mark this repo as currently being cloned. We have to check again if someone else isn't already
	// cloning since we released the lock. We released the lock since isCloneable is a potentially
	// slow operation.
	lock, ok := s.locker.TryAcquire(dir, "starting clone")
	if !ok {
		// Someone else beat us to it
		status, _ := s.locker.Status(dir)
		return status, nil
	}

	if s.skipCloneForTests {
		lock.Release()
		return "", nil
	}

	// We clone to a temporary location first to avoid having incomplete
	// clones in the repo tree. This also avoids leaving behind corrupt clones
	// if the clone is interrupted.
	if opts != nil && opts.Block {
		ctx, cancel, err := s.acquireCloneLimiter(ctx)
		if err != nil {
			return "", err
		}
		defer cancel()

		// We are blocking, so use the passed in context.
		err = s.doClone(ctx, repo, dir, syncer, lock, remoteURL, opts)
		err = errors.Wrapf(err, "failed to clone %s", repo)
		return "", err
	}

	// We push the cloneJob to a queue and let the producer-consumer pipeline take over from this
	// point. See definitions of cloneJobProducer and cloneJobConsumer to understand how these jobs
	// are processed.
	s.CloneQueue.push(&cloneJob{
		repo:      repo,
		dir:       dir,
		syncer:    syncer,
		lock:      lock,
		remoteURL: remoteURL,
		options:   opts,
	})

	return "", nil
}

func (s *Server) doClone(ctx context.Context, repo api.RepoName, dir GitDir, syncer VCSSyncer, lock *RepositoryLock, remoteURL *vcs.URL, opts *cloneOptions) (err error) {
	logger := s.Logger.Scoped("doClone", "").With(log.String("repo", string(repo)))

	defer lock.Release()
	defer func() {
		if err != nil {
			repoCloneFailedCounter.Inc()
		}
	}()
	if err := s.rpsLimiter.Wait(ctx); err != nil {
		return err
	}
	ctx, cancel2 := context.WithTimeout(ctx, conf.GitLongCommandTimeout())
	defer cancel2()

	dstPath := string(dir)
	overwrite := opts != nil && opts.Overwrite
	if !overwrite {
		// We clone to a temporary directory first, so avoid wasting resources
		// if the directory already exists.
		if _, err := os.Stat(dstPath); err == nil {
			return &os.PathError{
				Op:   "cloneRepo",
				Path: dstPath,
				Err:  os.ErrExist,
			}
		}
	}

	tmpPath, err := s.tempDir("clone-")
	if err != nil {
		return err
	}
	defer os.RemoveAll(tmpPath)
	tmpPath = filepath.Join(tmpPath, ".git")
	tmp := GitDir(tmpPath)

	// It may already be cloned
	if !repoCloned(dir) {
		s.setCloneStatusNonFatal(ctx, repo, types.CloneStatusCloning)
	}
	defer func() {
		// Use a background context to ensure we still update the DB even if we time out
		s.setCloneStatusNonFatal(context.Background(), repo, cloneStatus(repoCloned(dir), false))
	}()

	cmd, err := syncer.CloneCommand(ctx, remoteURL, tmpPath)
	if err != nil {
		return errors.Wrap(err, "get clone command")
	}
	if cmd.Env == nil {
		cmd.Env = os.Environ()
	}

	// see issue #7322: skip LFS content in repositories with Git LFS configured
	cmd.Env = append(cmd.Env, "GIT_LFS_SKIP_SMUDGE=1")
	logger.Info("cloning repo", log.String("tmp", tmpPath), log.String("dst", dstPath))

	pr, pw := io.Pipe()
	defer pw.Close()

	go readCloneProgress(logger, newURLRedactor(remoteURL), lock, pr, repo)

	if output, err := runWith(ctx, s.recordingCommandFactory.Wrap(ctx, s.Logger, cmd), true, pw); err != nil {
		return errors.Wrapf(err, "clone failed. Output: %s", string(output))
	}

	if testRepoCorrupter != nil {
		testRepoCorrupter(ctx, tmp)
	}

	removeBadRefs(ctx, tmp)

	if err := setHEAD(ctx, logger, s.recordingCommandFactory, tmp, syncer, remoteURL); err != nil {
		logger.Warn("Failed to ensure HEAD exists", log.Error(err))
		return errors.Wrap(err, "failed to ensure HEAD exists")
	}

	if err := setRepositoryType(tmp, syncer.Type()); err != nil {
		return errors.Wrap(err, `git config set "sourcegraph.type"`)
	}

	// Update the last-changed stamp.
	if err := setLastChanged(logger, tmp); err != nil {
		return errors.Wrapf(err, "failed to update last changed time")
	}

	// Set gitattributes
	if err := setGitAttributes(tmp); err != nil {
		return err
	}

	// Set gc.auto depending on gitGCMode.
	if err := gitSetAutoGC(tmp); err != nil {
		return err
	}

	if overwrite {
		// remove the current repo by putting it into our temporary directory
		err := fileutil.RenameAndSync(dstPath, filepath.Join(filepath.Dir(tmpPath), "old"))
		if err != nil && !os.IsNotExist(err) {
			return errors.Wrapf(err, "failed to remove old clone")
		}
	}

	if err := os.MkdirAll(filepath.Dir(dstPath), os.ModePerm); err != nil {
		return err
	}
	if err := fileutil.RenameAndSync(tmpPath, dstPath); err != nil {
		return err
	}

	// Successfully updated, best-effort updating of db fetch state based on
	// disk state.
	if err := s.setLastFetched(ctx, repo); err != nil {
		logger.Warn("failed setting last fetch in DB", log.Error(err))
	}

	// Successfully updated, best-effort calculation of the repo size.
	if err := s.setRepoSize(ctx, repo); err != nil {
		logger.Warn("failed setting repo size", log.Error(err))
	}

	logger.Info("repo cloned")
	repoClonedCounter.Inc()

	return nil
}

// readCloneProgress scans the reader and saves the most recent line of output
// as the lock status.
func readCloneProgress(logger log.Logger, redactor *urlRedactor, lock *RepositoryLock, pr io.Reader, repo api.RepoName) {
	var logFile *os.File
	var err error

	if conf.Get().CloneProgressLog {
		logFile, err = os.CreateTemp("", "")
		if err != nil {
			logger.Warn("failed to create temporary clone log file", log.Error(err), log.String("repo", string(repo)))
		} else {
			logger.Info("logging clone output", log.String("file", logFile.Name()), log.String("repo", string(repo)))
			defer logFile.Close()
		}
	}

	scan := bufio.NewScanner(pr)
	scan.Split(scanCRLF)
	for scan.Scan() {
		progress := scan.Text()

		// 🚨 SECURITY: The output could include the clone url with may contain a sensitive token.
		// Redact the full url and any found HTTP credentials to be safe.
		//
		// e.g.
		// $ git clone http://token@github.com/foo/bar
		// Cloning into 'nick'...
		// fatal: repository 'http://token@github.com/foo/bar/' not found
		redactedProgress := redactor.redact(progress)

		lock.SetStatus(redactedProgress)

		if logFile != nil {
			// Failing to write here is non-fatal and we don't want to spam our logs if there
			// are issues
			_, _ = fmt.Fprintln(logFile, progress)
		}
	}
	if err := scan.Err(); err != nil {
		logger.Error("error reporting progress", log.Error(err))
	}
}

// urlRedactor redacts all sensitive strings from a message.
type urlRedactor struct {
	// sensitive are sensitive strings to be redacted.
	// The strings should not be empty.
	sensitive []string
}

// newURLRedactor returns a new urlRedactor that redacts
// credentials found in rawurl, and the rawurl itself.
func newURLRedactor(parsedURL *vcs.URL) *urlRedactor {
	var sensitive []string
	pw, _ := parsedURL.User.Password()
	u := parsedURL.User.Username()
	if pw != "" && u != "" {
		// Only block password if we have both as we can
		// assume that the username isn't sensitive in this case
		sensitive = append(sensitive, pw)
	} else {
		if pw != "" {
			sensitive = append(sensitive, pw)
		}
		if u != "" {
			sensitive = append(sensitive, u)
		}
	}
	sensitive = append(sensitive, parsedURL.String())
	return &urlRedactor{sensitive: sensitive}
}

// redact returns a redacted version of message.
// Sensitive strings are replaced with "<redacted>".
func (r *urlRedactor) redact(message string) string {
	for _, s := range r.sensitive {
		message = strings.ReplaceAll(message, s, "<redacted>")
	}
	return message
}

// scanCRLF is similar to bufio.ScanLines except it splits on both '\r' and '\n'
// and it does not return tokens that contain only whitespace.
func scanCRLF(data []byte, atEOF bool) (advance int, token []byte, err error) {
	if atEOF && len(data) == 0 {
		return 0, nil, nil
	}
	trim := func(data []byte) []byte {
		data = bytes.TrimSpace(data)
		if len(data) == 0 {
			// Don't pass back a token that is all whitespace.
			return nil
		}
		return data
	}
	if i := bytes.IndexAny(data, "\r\n"); i >= 0 {
		// We have a full newline-terminated line.
		return i + 1, trim(data[:i]), nil
	}
	// If we're at EOF, we have a final, non-terminated line. Return it.
	if atEOF {
		return len(data), trim(data), nil
	}
	// Request more data.
	return 0, nil, nil
}

// testGitRepoExists is a test fixture that overrides the return value for
// GitRepoSyncer.IsCloneable when it is set.
var testGitRepoExists func(ctx context.Context, remoteURL *vcs.URL) error

var (
	execRunning = promauto.NewGaugeVec(prometheus.GaugeOpts{
		Name: "src_gitserver_exec_running",
		Help: "number of gitserver.GitCommand running concurrently.",
	}, []string{"cmd"})
	execDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "src_gitserver_exec_duration_seconds",
		Help:    "gitserver.GitCommand latencies in seconds.",
		Buckets: trace.UserLatencyBuckets,
	}, []string{"cmd", "status"})

	searchRunning = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "src_gitserver_search_running",
		Help: "number of gitserver.Search running concurrently.",
	})
	searchDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "src_gitserver_search_duration_seconds",
		Help:    "gitserver.Search duration in seconds.",
		Buckets: []float64{0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30},
	}, []string{"error"})
	searchLatency = promauto.NewHistogram(prometheus.HistogramOpts{
		Name:    "src_gitserver_search_latency_seconds",
		Help:    "gitserver.Search latency (time until first result is sent) in seconds.",
		Buckets: []float64{0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30},
	})

	pendingClones = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "src_gitserver_clone_queue",
		Help: "number of repos waiting to be cloned.",
	})
	lsRemoteQueue = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "src_gitserver_lsremote_queue",
		Help: "number of repos waiting to check existence on remote code host (git ls-remote).",
	})
	repoClonedCounter = promauto.NewCounter(prometheus.CounterOpts{
		Name: "src_gitserver_repo_cloned",
		Help: "number of successful git clones run",
	})
	repoCloneFailedCounter = promauto.NewCounter(prometheus.CounterOpts{
		Name: "src_gitserver_repo_cloned_failed",
		Help: "number of failed git clones",
	})
)

// Send 1 in 16 events to honeycomb. This is hardcoded since we only use this
// for Sourcegraph.com.
//
// 2020-05-29 1 in 4. We are currently at the top tier for honeycomb (before
// enterprise) and using double our quota. This gives us room to grow. If you
// find we keep bumping this / missing data we care about we can look into
// more dynamic ways to sample in our application code.
//
// 2020-07-20 1 in 16. Again hitting very high usage. Likely due to recent
// scaling up of the indexed search cluster. Will require more investigation,
// but we should probably segment user request path traffic vs internal batch
// traffic.
//
// 2020-11-02 Dynamically sample. Again hitting very high usage. Same root
// cause as before, scaling out indexed search cluster. We update our sampling
// to instead be dynamic, since "rev-parse" is 12 times more likely than the
// next most common command.
//
// 2021-08-20 over two hours we did 128 * 128 * 1e6 rev-parse requests
// internally. So we update our sampling to heavily downsample internal
// rev-parse, while upping our sampling for non-internal.
// https://ui.honeycomb.io/sourcegraph/datasets/gitserver-exec/result/67e4bLvUddg
func honeySampleRate(cmd string, actor *actor.Actor) uint {
	// HACK(keegan) 2022-11-02 IsInternal on sourcegraph.com is always
	// returning false. For now I am also marking it internal if UID is not
	// set to work around us hammering honeycomb.
	internal := actor.IsInternal() || actor.UID == 0
	switch {
	case cmd == "rev-parse" && internal:
		return 1 << 14 // 16384

	case internal:
		// we care more about user requests, so downsample internal more.
		return 16

	default:
		return 8
	}
}

var headBranchPattern = lazyregexp.New(`HEAD branch: (.+?)\n`)

func (s *Server) doRepoUpdate(ctx context.Context, repo api.RepoName, revspec string) error {
	span, ctx := ot.StartSpanFromContext(ctx, "Server.doRepoUpdate") //nolint:staticcheck // OT is deprecated
	span.SetTag("repo", repo)
	defer span.Finish()

	if msg, ok := isPaused(filepath.Join(s.ReposDir, string(protocol.NormalizeRepo(repo)))); ok {
		s.Logger.Warn("doRepoUpdate paused", log.String("repo", string(repo)), log.String("reason", msg))
		return nil
	}

	s.repoUpdateLocksMu.Lock()
	l, ok := s.repoUpdateLocks[repo]
	if !ok {
		l = &locks{
			once: new(sync.Once),
			mu:   new(sync.Mutex),
		}
		s.repoUpdateLocks[repo] = l
	}
	once := l.once
	mu := l.mu
	s.repoUpdateLocksMu.Unlock()

	// doBackgroundRepoUpdate can block longer than our context deadline. done will
	// close when its done. We can return when either done is closed or our
	// deadline has passed.
	done := make(chan struct{})
	err := errors.New("another operation is already in progress")
	go func() {
		defer close(done)
		once.Do(func() {
			mu.Lock() // Prevent multiple updates in parallel. It works fine, but it wastes resources.
			defer mu.Unlock()

			s.repoUpdateLocksMu.Lock()
			l.once = new(sync.Once) // Make new requests wait for next update.
			s.repoUpdateLocksMu.Unlock()

			err = s.doBackgroundRepoUpdate(repo, revspec)
			if err != nil {
				// We don't want to spam our logs when the rate limiter has been set to block all
				// updates
				if !errors.Is(err, ratelimit.ErrBlockAll) {
					s.Logger.Error("performing background repo update", log.Error(err))
				}

				// The repo update might have failed due to the repo being corrupt
				var gitErr *GitCommandError
				if errors.As(err, &gitErr) {
					s.logIfCorrupt(ctx, repo, s.dir(repo), gitErr.Output)
				}
			}
			s.setLastErrorNonFatal(s.ctx, repo, err)
		})
	}()

	select {
	case <-done:
		return errors.Wrapf(err, "repo %s:", repo)
	case <-ctx.Done():
		span.LogFields(otlog.String("event", "context canceled"))
		return ctx.Err()
	}
}

var doBackgroundRepoUpdateMock func(api.RepoName) error

func (s *Server) doBackgroundRepoUpdate(repo api.RepoName, revspec string) error {
	logger := s.Logger.Scoped("backgroundRepoUpdate", "").With(log.String("repo", string(repo)))

	if doBackgroundRepoUpdateMock != nil {
		return doBackgroundRepoUpdateMock(repo)
	}
	// background context.
	ctx, cancel1 := s.serverContext()
	defer cancel1()

	// ensure the background update doesn't hang forever
	ctx, cancel2 := context.WithTimeout(ctx, conf.GitLongCommandTimeout())
	defer cancel2()

	// This background process should use our internal actor
	ctx = actor.WithInternalActor(ctx)

	ctx, cancel2, err := s.acquireCloneLimiter(ctx)
	if err != nil {
		return err
	}
	defer cancel2()

	if err = s.rpsLimiter.Wait(ctx); err != nil {
		return err
	}

	repo = protocol.NormalizeRepo(repo)
	dir := s.dir(repo)

	remoteURL, err := s.getRemoteURL(ctx, repo)
	if err != nil {
		return errors.Wrap(err, "failed to determine Git remote URL")
	}

	syncer, err := s.GetVCSSyncer(ctx, repo)
	if err != nil {
		return errors.Wrap(err, "get VCS syncer")
	}

	// drop temporary pack files after a fetch. this function won't
	// return until this fetch has completed or definitely-failed,
	// either way they can't still be in use. we don't care exactly
	// when the cleanup happens, just that it does.
	defer s.cleanTmpFiles(dir)

	err = syncer.Fetch(ctx, remoteURL, dir, revspec)
	if err != nil {
		return errors.Wrapf(err, "failed to fetch repo %q", repo)
	}

	removeBadRefs(ctx, dir)

	if err := setHEAD(ctx, logger, s.recordingCommandFactory, dir, syncer, remoteURL); err != nil {
		return errors.Wrapf(err, "failed to ensure HEAD exists for repo %q", repo)
	}

	if err := setRepositoryType(dir, syncer.Type()); err != nil {
		return errors.Wrapf(err, "failed to set repository type for repo %q", repo)
	}

	// Update the last-changed stamp on disk.
	if err := setLastChanged(logger, dir); err != nil {
		logger.Warn("failed to update last changed time", log.Error(err))
	}

	// Successfully updated, best-effort updating of db fetch state based on
	// disk state.
	if err := s.setLastFetched(ctx, repo); err != nil {
		logger.Warn("failed to set last_fetched in DB", log.Error(err))
	}

	// Successfully updated, best-effort calculation of the repo size.
	if err := s.setRepoSize(ctx, repo); err != nil {
		logger.Warn("failed to set repo size", log.Error(err))
	}

	return nil
}

// older versions of git do not remove tags case insensitively, so we generate
// every possible case of HEAD (2^4 = 16)
var badRefs = syncx.OnceValue(func() []string {
	refs := make([]string, 0, 1<<4)
	for bits := uint8(0); bits < (1 << 4); bits++ {
		s := []byte("HEAD")
		for i, c := range s {
			// lowercase if the i'th bit of bits is 1
			if bits&(1<<i) != 0 {
				s[i] = c - 'A' + 'a'
			}
		}
		refs = append(refs, string(s))
	}
	return refs
})

// removeBadRefs removes bad refs and tags from the git repo at dir. This
// should be run after a clone or fetch. If your repository contains a ref or
// tag called HEAD (case insensitive), most commands will output a warning
// from git:
//
//	warning: refname 'HEAD' is ambiguous.
//
// Instead we just remove this ref.
func removeBadRefs(ctx context.Context, dir GitDir) {
	args := append([]string{"branch", "-D"}, badRefs()...)
	cmd := exec.CommandContext(ctx, "git", args...)
	dir.Set(cmd)
	_ = cmd.Run()

	args = append([]string{"tag", "-d"}, badRefs()...)
	cmd = exec.CommandContext(ctx, "git", args...)
	dir.Set(cmd)
	_ = cmd.Run()
}

// ensureHEAD verifies that there is a HEAD file within the repo, and that it
// is of non-zero length. If either condition is met, we configure a
// best-effort default.
func ensureHEAD(dir GitDir) {
	head, err := os.Stat(dir.Path("HEAD"))
	if os.IsNotExist(err) || head.Size() == 0 {
		os.WriteFile(dir.Path("HEAD"), []byte("ref: refs/heads/master"), 0o600)
	}
}

// setHEAD configures git repo defaults (such as what HEAD is) which are
// needed for git commands to work.
func setHEAD(ctx context.Context, logger log.Logger, rf *wrexec.RecordingCommandFactory, dir GitDir, syncer VCSSyncer, remoteURL *vcs.URL) error {
	// Verify that there is a HEAD file within the repo, and that it is of
	// non-zero length.
	ensureHEAD(dir)

	// Fallback to git's default branch name if git remote show fails.
	headBranch := "master"

	// try to fetch HEAD from origin
	cmd, err := syncer.RemoteShowCommand(ctx, remoteURL)
	if err != nil {
		return errors.Wrap(err, "get remote show command")
	}
	dir.Set(cmd)
	output, err := runWith(ctx, rf.Wrap(ctx, logger, cmd), true, nil)
	if err != nil {
		logger.Error("Failed to fetch remote info", log.Error(err), log.String("output", string(output)))
		return errors.Wrap(err, "failed to fetch remote info")
	}

	submatches := headBranchPattern.FindSubmatch(output)
	if len(submatches) == 2 {
		submatch := string(submatches[1])
		if submatch != "(unknown)" {
			headBranch = submatch
		}
	}

	// check if branch pointed to by HEAD exists
	cmd = exec.CommandContext(ctx, "git", "rev-parse", headBranch, "--")
	dir.Set(cmd)
	if err := cmd.Run(); err != nil {
		// branch does not exist, pick first branch
		cmd := exec.CommandContext(ctx, "git", "branch")
		dir.Set(cmd)
		output, err := cmd.Output()
		if err != nil {
			logger.Error("Failed to list branches", log.Error(err), log.String("output", string(output)))
			return errors.Wrap(err, "failed to list branches")
		}
		lines := strings.Split(string(output), "\n")
		branch := strings.TrimPrefix(strings.TrimPrefix(lines[0], "* "), "  ")
		if branch != "" {
			headBranch = branch
		}
	}

	// set HEAD
	cmd = exec.CommandContext(ctx, "git", "symbolic-ref", "HEAD", "refs/heads/"+headBranch)
	dir.Set(cmd)
	if output, err := cmd.CombinedOutput(); err != nil {
		logger.Error("Failed to set HEAD", log.Error(err), log.String("output", string(output)))
		return errors.Wrap(err, "Failed to set HEAD")
	}

	return nil
}

// setLastChanged discerns an approximate last-changed timestamp for a
// repository. This can be approximate; it's used to determine how often we
// should run `git fetch`, but is not relied on strongly. The basic plan
// is as follows: If a repository has never had a timestamp before, we
// guess that the right stamp is *probably* the timestamp of the most
// chronologically-recent commit. If there are no commits, we just use the
// current time because that's probably usually a temporary state.
//
// If a timestamp already exists, we want to update it if and only if
// the set of references (as determined by `git show-ref`) has changed.
//
// To accomplish this, we assert that the file `sg_refhash` in the git
// directory should, if it exists, contain a hash of the output of
// `git show-ref`, and have a timestamp of "the last time this changed",
// except that if we're creating that file for the first time, we set
// it to the timestamp of the top commit. We then compute the hash of
// the show-ref output, and store it in the file if and only if it's
// different from the current contents.
//
// If show-ref fails, we use rev-list to determine whether that's just
// an empty repository (not an error) or some kind of actual error
// that is possibly causing our data to be incorrect, which should
// be reported.
func setLastChanged(logger log.Logger, dir GitDir) error {
	hashFile := dir.Path("sg_refhash")

	hash, err := computeRefHash(dir)
	if err != nil {
		return errors.Wrapf(err, "computeRefHash failed for %s", dir)
	}

	var stamp time.Time
	if _, err := os.Stat(hashFile); os.IsNotExist(err) {
		// This is the first time we are calculating the hash. Give a more
		// approriate timestamp for sg_refhash than the current time.
		stamp = computeLatestCommitTimestamp(logger, dir)
	}

	_, err = fileutil.UpdateFileIfDifferent(hashFile, hash)
	if err != nil {
		return errors.Wrapf(err, "failed to update %s", hashFile)
	}

	// If stamp is non-zero we have a more approriate mtime.
	if !stamp.IsZero() {
		err = os.Chtimes(hashFile, stamp, stamp)
		if err != nil {
			return errors.Wrapf(err, "failed to set mtime to the lastest commit timestamp for %s", dir)
		}
	}

	return nil
}

// computeLatestCommitTimestamp returns the timestamp of the most recent
// commit if any. If there are no commits or the latest commit is in the
// future, or there is any error, time.Now is returned.
func computeLatestCommitTimestamp(logger log.Logger, dir GitDir) time.Time {
	logger = logger.Scoped("computeLatestCommitTimestamp", "compute the timestamp of the most recent commit").
		With(log.String("repo", string(dir)))

	now := time.Now() // return current time if we don't find a more accurate time
	cmd := exec.Command("git", "rev-list", "--all", "--timestamp", "-n", "1")
	dir.Set(cmd)
	output, err := cmd.Output()
	// If we don't have a more specific stamp, we'll return the current time,
	// and possibly an error.
	if err != nil {
		logger.Warn("failed to execute, defaulting to time.Now", log.Error(err))
		return now
	}

	words := bytes.Split(output, []byte(" "))
	// An empty rev-list output, without an error, is okay.
	if len(words) < 2 {
		return now
	}

	// We should have a timestamp and a commit hash; format is
	// 1521316105 ff03fac223b7f16627b301e03bf604e7808989be
	epoch, err := strconv.ParseInt(string(words[0]), 10, 64)
	if err != nil {
		logger.Warn("ignoring corrupted timestamp, defaulting to time.Now", log.String("timestamp", string(words[0])))
		return now
	}
	stamp := time.Unix(epoch, 0)
	if stamp.After(now) {
		return now
	}
	return stamp
}

// computeRefHash returns a hash of the refs for dir. The hash should only
// change if the set of refs and the commits they point to change.
func computeRefHash(dir GitDir) ([]byte, error) {
	// Do not use CommandContext since this is a fast operation we do not want
	// to interrupt.
	cmd := exec.Command("git", "show-ref")
	dir.Set(cmd)
	output, err := cmd.Output()
	if err != nil {
		// Ignore the failure for an empty repository: show-ref fails with
		// empty output and an exit code of 1
		var e *exec.ExitError
		if !errors.As(err, &e) || len(output) != 0 || len(e.Stderr) != 0 || e.Sys().(syscall.WaitStatus).ExitStatus() != 1 {
			return nil, err
		}
	}

	lines := bytes.Split(output, []byte("\n"))
	sort.Slice(lines, func(i, j int) bool {
		return bytes.Compare(lines[i], lines[j]) < 0
	})
	hasher := sha256.New()
	for _, b := range lines {
		_, _ = hasher.Write(b)
		_, _ = hasher.Write([]byte("\n"))
	}
	hash := make([]byte, hex.EncodedLen(hasher.Size()))
	hex.Encode(hash, hasher.Sum(nil))
	return hash, nil
}

func (s *Server) ensureRevision(ctx context.Context, repo api.RepoName, rev string, repoDir GitDir) (didUpdate bool) {
	if rev == "" || rev == "HEAD" {
		return false
	}
	if conf.Get().DisableAutoGitUpdates {
		// ensureRevision may kick off a git fetch operation which we don't want if we've
		// configured DisableAutoGitUpdates.
		return false
	}

	// rev-parse on an OID does not check if the commit actually exists, so it always
	// works. So we append ^0 to force the check
	if isAbsoluteRevision(rev) {
		rev = rev + "^0"
	}
	cmd := exec.Command("git", "rev-parse", rev, "--")
	repoDir.Set(cmd)
	if err := cmd.Run(); err == nil {
		return false
	}
	// Revision not found, update before returning.
	err := s.doRepoUpdate(ctx, repo, rev)
	if err != nil {
		s.Logger.Warn("failed to perform background repo update", log.Error(err), log.String("repo", string(repo)), log.String("rev", rev))
	}
	return true
}

const headFileRefPrefix = "ref: "

// quickSymbolicRefHead best-effort mimics the execution of `git symbolic-ref HEAD`, but doesn't exec a child process.
// It just reads the .git/HEAD file from the bare git repository directory.
func quickSymbolicRefHead(dir GitDir) (string, error) {
	// See if HEAD contains a commit hash and fail if so.
	head, err := os.ReadFile(dir.Path("HEAD"))
	if err != nil {
		return "", err
	}
	head = bytes.TrimSpace(head)
	if isAbsoluteRevision(string(head)) {
		return "", errors.New("ref HEAD is not a symbolic ref")
	}

	// HEAD doesn't contain a commit hash. It contains something like "ref: refs/heads/master".
	if !bytes.HasPrefix(head, []byte(headFileRefPrefix)) {
		return "", errors.New("unrecognized HEAD file format")
	}
	headRef := bytes.TrimPrefix(head, []byte(headFileRefPrefix))
	return string(headRef), nil
}

// quickRevParseHead best-effort mimics the execution of `git rev-parse HEAD`, but doesn't exec a child process.
// It just reads the relevant files from the bare git repository directory.
func quickRevParseHead(dir GitDir) (string, error) {
	// See if HEAD contains a commit hash and return it if so.
	head, err := os.ReadFile(dir.Path("HEAD"))
	if err != nil {
		return "", err
	}
	head = bytes.TrimSpace(head)
	if h := string(head); isAbsoluteRevision(h) {
		return h, nil
	}

	// HEAD doesn't contain a commit hash. It contains something like "ref: refs/heads/master".
	if !bytes.HasPrefix(head, []byte(headFileRefPrefix)) {
		return "", errors.New("unrecognized HEAD file format")
	}
	// Look for the file in refs/heads. If it exists, it contains the commit hash.
	headRef := bytes.TrimPrefix(head, []byte(headFileRefPrefix))
	if bytes.HasPrefix(headRef, []byte("../")) || bytes.Contains(headRef, []byte("/../")) || bytes.HasSuffix(headRef, []byte("/..")) {
		// 🚨 SECURITY: prevent leakage of file contents outside repo dir
		return "", errors.Errorf("invalid ref format: %s", headRef)
	}
	headRefFile := dir.Path(filepath.FromSlash(string(headRef)))
	if refs, err := os.ReadFile(headRefFile); err == nil {
		return string(bytes.TrimSpace(refs)), nil
	}

	// File didn't exist in refs/heads. Look for it in packed-refs.
	f, err := os.Open(dir.Path("packed-refs"))
	if err != nil {
		return "", err
	}
	defer f.Close()
	scanner := bufio.NewScanner(f)
	for scanner.Scan() {
		fields := bytes.Fields(scanner.Bytes())
		if len(fields) != 2 {
			continue
		}
		commit, ref := fields[0], fields[1]
		if bytes.Equal(ref, headRef) {
			return string(commit), nil
		}
	}
	if err := scanner.Err(); err != nil {
		return "", err
	}

	// Didn't find the refs/heads/$HEAD_BRANCH in packed_refs
	return "", errors.New("could not compute `git rev-parse HEAD` in-process, try running `git` process")
}

// errorString returns the error string. If err is nil it returns the empty
// string.
func errorString(err error) string {
	if err == nil {
		return ""
	}
	return err.Error()
}

// IsAbsoluteRevision checks if the revision is a git OID SHA string.
//
// Note: This doesn't mean the SHA exists in a repository, nor does it mean it
// isn't a ref. Git allows 40-char hexadecimal strings to be references.
//
// copied from internal/vcs/git to avoid cyclic import
func isAbsoluteRevision(s string) bool {
	if len(s) != 40 {
		return false
	}
	for _, r := range s {
		if !(('0' <= r && r <= '9') ||
			('a' <= r && r <= 'f') ||
			('A' <= r && r <= 'F')) {
			return false
		}
	}
	return true
}