repos: log corruption events per repo (#45667)

* return a bool for checkMaybeCorrupt * store plumbing for SetCorruptedAt * debug * move to markIfCorrupted + test * add additional checks for repo corruption * add test cases for server repo corruption detection * add corrupted_at field for sql mapping * fix err not being returned * ensure corrupted at gets mapped * test cases to ensure corrupted at gets cleared * add migration * update types for CorrruptedAt * fix sql query and remove field for now * run go generate * Update internal/database/gitserver_repos_test.go Co-authored-by: Erik Seliger <erikseliger@me.com> * Update internal/database/gitserver_repos_test.go Co-authored-by: Erik Seliger <erikseliger@me.com> * Update cmd/gitserver/server/cleanup.go Co-authored-by: Erik Seliger <erikseliger@me.com> * review comments * review feedback - check for err - remove reason - set default timestamp to 0 value * tests pass 🎉 * reset stiched migration graph * go gen magic * move docstring * check for err in corruption check * fix tests 🤞 * add corruption_lod column and rename migration * fix migration scripts * go gen * change corruption_log column type to jsonb * update db mocks * add logCorruption method - log repo corruption into a JSON array - limit repo corruption log to 10 elements - db tests * fix wording and remove test * add CorruptionLog to ignored fields * add more ignores * remove comment * review feedback * log git corrupt stderr * cap corruption reason to 1mb * rename CorruptionLog -> CorruptionLogs * rename corruption_log -> corruption_logs * Update internal/database/gitserver_repos_test.go Co-authored-by: Thorsten Ball <mrnugget@gmail.com> * Fixes Fix rename in cmp IgnoreFields Fix malformed SQL * fix test * add comment to trigger sonar cloud analysis * review comments append to json array in postgres * add comment about json query * comments and fixes * Update cmd/gitserver/server/server_test.go Co-authored-by: Thorsten Ball <mrnugget@gmail.com> * Fix indentation * review feedback - Make corrupted_at column nullable - Check err returned from LogCorruption call - Add condition to make sure we only log more repo corruptions if there repo isn't currently corrupt. - Run sg generate * Fix test - update corrupted_at also when using GitserverRepo store update - clear corrupted_at status during batch sync update * Update cmd/gitserver/server/server_test.go Co-authored-by: Thorsten Ball <mrnugget@gmail.com> * store plumbing for SetCorruptedAt * debug * move to markIfCorrupted + test * add additional checks for repo corruption * add test cases for server repo corruption detection * add migration * update types for CorrruptedAt * fix sql query and remove field for now * review comments * review feedback - check for err - remove reason - set default timestamp to 0 value * reset stiched migration graph * add corruption_lod column and rename migration * go gen * add logCorruption method - log repo corruption into a JSON array - limit repo corruption log to 10 elements - db tests * fix wording and remove test * Fix more indentation 🤦🏻 * review feedback: update test * run sg generate again * test fixes - warn instead of return an error during cleanup - delete repos so that the name can be reused * fix bug - repo was corrupt but did not set the reason so the repo never got cleaned up * review feedback - sql query indentation * review feedback - use assert module * do not clear corruption status on err * add test for LastError and corruptedAt - fix SQL issue on LastError Co-authored-by: Erik Seliger <erikseliger@me.com> Co-authored-by: Thorsten Ball <mrnugget@gmail.com>
2026-02-06 17:51:57 +00:00 · 2023-01-09 17:24:32 +02:00 · 2023-01-09 17:24:32 +02:00 · 7dbfb87a11
commit 7dbfb87a11
parent 4ddf243b16
14 changed files with 581 additions and 57 deletions
--- a/cmd/gitserver/server/cleanup.go
+++ b/cmd/gitserver/server/cleanup.go
@ -255,28 +255,15 @@ func (s *Server) cleanupRepos(ctx context.Context, gitServerAddrs gitserver.GitS
 	}

 	maybeRemoveCorrupt := func(dir GitDir) (done bool, _ error) {
-		var reason string
-
-		// We treat repositories missing HEAD to be corrupt. Both our cloning
-		// and fetching ensure there is a HEAD file.
-		if _, err := os.Stat(dir.Path("HEAD")); os.IsNotExist(err) {
-			reason = "missing-head"
-		} else if err != nil {
+		corrupt, reason, err := checkRepoDirCorrupt(dir)
+		if !corrupt || err != nil {
 			return false, err
 		}

-		// We have seen repository corruption fail in such a way that the git
-		// config is missing the bare repo option but everything else looks
-		// like it works. This leads to failing fetches, so treat non-bare
-		// repos as corrupt. Since we often fetch with ensureRevision, this
-		// leads to most commands failing against the repository. It is safer
-		// to remove now than try a safe reclone.
-		if reason == "" && gitIsNonBareBestEffort(dir) {
-			reason = "non-bare"
-		}
-
-		if reason == "" {
-			return false, nil
+		err = s.DB.GitserverRepos().LogCorruption(ctx, s.name(dir), fmt.Sprintf("sourcegraph detected corrupt repo: %s", reason))
+		if err != nil {
+			repoName := string(s.name(dir))
+			logger.Warn("failed to log repo corruption", log.String("repo", repoName), log.Error(err))
 		}

 		s.Logger.Info("removing corrupt repo", log.String("repo", string(dir)), log.String("reason", reason))
@ -336,7 +323,13 @@ func (s *Server) cleanupRepos(ctx context.Context, gitServerAddrs gitserver.GitS
 		var reason string
 		const maybeCorrupt = "maybeCorrupt"
 		if maybeCorrupt, _ := gitConfigGet(dir, gitConfigMaybeCorrupt); maybeCorrupt != "" {
+			// Set the reason so that the repo cleaned up
 			reason = maybeCorrupt
+			// We don't log the corruption here, since the corruption *should* have already been
+			// logged when this config setting was set in the repo.
+			// When the repo is recloned, the corrupted_at status should be cleared, which means
+			// the repo is not considered corrupted anymore.
+			//
 			// unset flag to stop constantly re-cloning if it fails.
 			_ = gitConfigUnset(dir, gitConfigMaybeCorrupt)
 		}
@ -571,6 +564,28 @@ func (s *Server) cleanupRepos(ctx context.Context, gitServerAddrs gitserver.GitS
 	}
 }

+func checkRepoDirCorrupt(dir GitDir) (bool, string, error) {
+	// We treat repositories missing HEAD to be corrupt. Both our cloning
+	// and fetching ensure there is a HEAD file.
+	if _, err := os.Stat(dir.Path("HEAD")); os.IsNotExist(err) {
+		return true, "missing-head", nil
+	} else if err != nil {
+		return false, "", err
+	}
+
+	// We have seen repository corruption fail in such a way that the git
+	// config is missing the bare repo option but everything else looks
+	// like it works. This leads to failing fetches, so treat non-bare
+	// repos as corrupt. Since we often fetch with ensureRevision, this
+	// leads to most commands failing against the repository. It is safer
+	// to remove now than try a safe reclone.
+	if gitIsNonBareBestEffort(dir) {
+		return true, "non-bare", nil
+	}
+
+	return false, "", nil
+}
+
 // setRepoSizes uses calculated sizes of repos to update database entries of repos
 // with actual sizes, but only up to 10,000 in one run.
 func (s *Server) setRepoSizes(ctx context.Context, repoToSize map[api.RepoName]int64) error {
@ -1027,13 +1042,12 @@ func getRecloneTime(dir GitDir) (time.Time, error) {
 	return time.Unix(sec, 0), nil
 }

-func checkMaybeCorruptRepo(logger log.Logger, repo api.RepoName, dir GitDir, stderr string) {
+func checkMaybeCorruptRepo(logger log.Logger, repo api.RepoName, dir GitDir, stderr string) bool {
 	if !stdErrIndicatesCorruption(stderr) {
-		return
+		return false
 	}

 	logger = logger.With(log.String("repo", string(repo)), log.String("dir", string(dir)))
-
 	logger.Warn("marking repo for re-cloning due to stderr output indicating repo corruption",
 		log.String("stderr", stderr))

@ -1043,6 +1057,8 @@ func checkMaybeCorruptRepo(logger log.Logger, repo api.RepoName, dir GitDir, std
 	if err != nil {
 		logger.Error("failed to set maybeCorruptRepo config", log.Error(err))
 	}
+
+	return true
 }

 // stdErrIndicatesCorruption returns true if the provided stderr output from a git command indicates
--- a/cmd/gitserver/server/server.go
+++ b/cmd/gitserver/server/server.go
@ -839,6 +839,9 @@ func (s *Server) syncRepoState(gitServerAddrs gitserver.GitServerAddresses, batc
 			cloneStatus := cloneStatus(cloned, cloning)
 			if repo.CloneStatus != cloneStatus {
 				repo.CloneStatus = cloneStatus
+				// Since the repo has been recloned or is being cloned
+				// we can reset the corruption
+				repo.CorruptedAt = time.Time{}
 				shouldUpdate = true
 			}

@ -1771,7 +1774,7 @@ func (s *Server) exec(w http.ResponseWriter, r *http.Request, req *protocol.Exec
 	stderrN = stderrW.n

 	stderr := stderrBuf.String()
-	checkMaybeCorruptRepo(s.Logger, req.Repo, dir, stderr)
+	s.logIfCorrupt(ctx, req.Repo, dir, stderr)

 	// write trailer
 	w.Header().Set("X-Exec-Error", errorString(execErr))
@ -1997,6 +2000,15 @@ func (s *Server) setRepoSize(ctx context.Context, name api.RepoName) error {
 	return s.DB.GitserverRepos().SetRepoSize(ctx, name, dirSize(s.dir(name).Path(".")), s.Hostname)
 }

+func (s *Server) logIfCorrupt(ctx context.Context, repo api.RepoName, dir GitDir, stderr string) {
+	if checkMaybeCorruptRepo(s.Logger, repo, dir, stderr) {
+		reason := stderr
+		if err := s.DB.GitserverRepos().LogCorruption(ctx, repo, reason); err != nil {
+			s.Logger.Warn("failed to log repo corruption", log.String("repo", string(repo)), log.Error(err))
+		}
+	}
+}
+
 // setGitAttributes writes our global gitattributes to
 // gitDir/info/attributes. This will override .gitattributes inside of
 // repositories. It is used to unset attributes such as export-ignore.
--- a/cmd/gitserver/server/server_test.go
+++ b/cmd/gitserver/server/server_test.go
@ -815,7 +815,7 @@ func testHandleRepoDelete(t *testing.T, deletedInDB bool) {
 		t.Fatal(err)
 	}

-	cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt")
+	cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs")

 	// We don't expect an error
 	if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" {
@ -864,7 +864,7 @@ func testHandleRepoDelete(t *testing.T, deletedInDB bool) {
 		t.Fatal(err)
 	}

-	cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt")
+	cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs")

 	// We don't expect an error
 	if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" {
@ -938,7 +938,7 @@ func TestHandleRepoUpdate(t *testing.T) {
 	}

 	// We don't care exactly what the error is here
-	cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "LastError")
+	cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "LastError", "CorruptionLogs")
 	// But we do care that it exists
 	if fromDB.LastError == "" {
 		t.Errorf("Expected an error when trying to clone from an invalid URL")
@ -967,7 +967,7 @@ func TestHandleRepoUpdate(t *testing.T) {
 		t.Fatal(err)
 	}

-	cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt")
+	cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs")

 	// We don't expect an error
 	if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" {
@ -1109,7 +1109,7 @@ func TestHandleRepoUpdateFromShard(t *testing.T) {
 		t.Fatal(err)
 	}

-	cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt")
+	cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs")

 	// We don't expect an error
 	if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" {
@ -1272,6 +1272,7 @@ func TestCloneRepo_EnsureValidity(t *testing.T) {
 			t.Fatalf("expected no error, got %v", err)
 		}

+		// wait for repo to be cloned
 		dst := s.dir("example.com/foo/bar")
 		for i := 0; i < 1000; i++ {
 			_, cloning := s.locker.Status(dst)
@ -1733,6 +1734,73 @@ func TestHeaderXRequestedWithMiddleware(t *testing.T) {
 	})
 }

+func TestLogIfCorrupt(t *testing.T) {
+	logger := logtest.Scoped(t)
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	db := database.NewDB(logger, dbtest.NewDB(logger, t))
+	remoteDir := t.TempDir()
+
+	reposDir := t.TempDir()
+	hostname := "test"
+
+	repoName := api.RepoName("example.com/bar/foo")
+	s := makeTestServer(ctx, t, reposDir, remoteDir, db)
+	s.Hostname = hostname
+
+	t.Run("git corruption output creates corruption log", func(t *testing.T) {
+		dbRepo := &types.Repo{
+			Name:        repoName,
+			URI:         string(repoName),
+			Description: "Test",
+		}
+
+		// Insert the repo into our database
+		err := db.Repos().Create(ctx, dbRepo)
+		if err != nil {
+			t.Fatal(err)
+		}
+		t.Cleanup(func() {
+			db.Repos().Delete(ctx, dbRepo.ID)
+		})
+
+		stdErr := "error: packfile .git/objects/pack/pack-e26c1fc0add58b7649a95f3e901e30f29395e174.pack does not match index"
+
+		s.logIfCorrupt(ctx, repoName, s.dir(repoName), stdErr)
+
+		fromDB, err := s.DB.GitserverRepos().GetByName(ctx, repoName)
+		assert.NoError(t, err)
+		assert.Len(t, fromDB.CorruptionLogs, 1)
+		assert.Contains(t, fromDB.CorruptionLogs[0].Reason, stdErr)
+	})
+
+	t.Run("non corruption output does not create corruption log", func(t *testing.T) {
+		dbRepo := &types.Repo{
+			Name:        repoName,
+			URI:         string(repoName),
+			Description: "Test",
+		}
+
+		// Insert the repo into our database
+		err := db.Repos().Create(ctx, dbRepo)
+		if err != nil {
+			t.Fatal(err)
+		}
+		t.Cleanup(func() {
+			db.Repos().Delete(ctx, dbRepo.ID)
+		})
+
+		stdErr := "Brought to you by Horsegraph"
+
+		s.logIfCorrupt(ctx, repoName, s.dir(repoName), stdErr)
+
+		fromDB, err := s.DB.GitserverRepos().GetByName(ctx, repoName)
+		assert.NoError(t, err)
+		assert.Len(t, fromDB.CorruptionLogs, 0)
+	})
+}
+
 func mustEncodeJSONResponse(value any) string {
 	encoded, _ := json.Marshal(value)
 	return strings.TrimSpace(string(encoded))
--- a/internal/database/gitserver_repos.go
+++ b/internal/database/gitserver_repos.go
@ -3,6 +3,7 @@ package database
 import (
 	"context"
 	"database/sql"
+	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
@ -33,6 +34,9 @@ type GitserverRepoStore interface {
 	GetByID(ctx context.Context, id api.RepoID) (*types.GitserverRepo, error)
 	GetByName(ctx context.Context, name api.RepoName) (*types.GitserverRepo, error)
 	GetByNames(ctx context.Context, names ...api.RepoName) (map[api.RepoName]*types.GitserverRepo, error)
+	// LogCorruption sets the corrupted at value and logs the corruption reason. Reason will be truncated if it exceeds
+	// MaxReasonSizeInMB
+	LogCorruption(ctx context.Context, name api.RepoName, reason string) error
 	// SetCloneStatus will attempt to update ONLY the clone status of a
 	// GitServerRepo. If a matching row does not yet exist a new one will be created.
 	// If the status value hasn't changed, the row will not be updated.
@ -64,6 +68,9 @@ type GitserverRepoStore interface {

 var _ GitserverRepoStore = (*gitserverRepoStore)(nil)

+// Max reason size megabyte - 1 MB
+const MaxReasonSizeInMB = 1 << 20
+
 // gitserverRepoStore is responsible for data stored in the gitserver_repos table.
 type gitserverRepoStore struct {
 	*basestore.Store
@ -87,13 +94,14 @@ func (s *gitserverRepoStore) Transact(ctx context.Context) (GitserverRepoStore,
 func (s *gitserverRepoStore) Update(ctx context.Context, repos ...*types.GitserverRepo) error {
 	values := make([]*sqlf.Query, 0, len(repos))
 	for _, gr := range repos {
-		values = append(values, sqlf.Sprintf("(%s::integer, %s::text, %s::text, %s::text, %s::timestamp with time zone, %s::timestamp with time zone, %s::bigint, NOW())",
+		values = append(values, sqlf.Sprintf("(%s::integer, %s::text, %s::text, %s::text, %s::timestamp with time zone, %s::timestamp with time zone, %s::timestamp with time zone, %s::bigint, NOW())",
 			gr.RepoID,
 			gr.CloneStatus,
 			gr.ShardID,
 			dbutil.NewNullString(sanitizeToUTF8(gr.LastError)),
 			gr.LastFetched,
 			gr.LastChanged,
+			dbutil.NullTimeColumn(gr.CorruptedAt),
 			&dbutil.NullInt64{N: &gr.RepoSizeBytes},
 		))
 	}
@ -111,12 +119,13 @@ SET
 	last_error = tmp.last_error,
 	last_fetched = tmp.last_fetched,
 	last_changed = tmp.last_changed,
+	corrupted_at = tmp.corrupted_at,
 	repo_size_bytes = tmp.repo_size_bytes,
 	updated_at = NOW()
 FROM (VALUES
-	-- (<repo_id>, <clone_status>, <shard_id>, <last_error>, <last_fetched>, <last_changed>, <repo_size_bytes>),
+	-- (<repo_id>, <clone_status>, <shard_id>, <last_error>, <last_fetched>, <last_changed>, <corrupted_at>, <repo_size_bytes>),
 		%s
-	) AS tmp(repo_id, clone_status, shard_id, last_error, last_fetched, last_changed, repo_size_bytes)
+	) AS tmp(repo_id, clone_status, shard_id, last_error, last_fetched, last_changed, corrupted_at, repo_size_bytes)
 	WHERE
 		tmp.repo_id = gr.repo_id
 `
@ -309,7 +318,9 @@ SELECT
 	gr.last_fetched,
 	gr.last_changed,
 	gr.repo_size_bytes,
-	gr.updated_at
+	gr.updated_at,
+	gr.corrupted_at,
+	gr.corruption_logs
 FROM gitserver_repos gr
 JOIN repo ON gr.repo_id = repo.id
 WHERE %s
@ -339,7 +350,9 @@ SELECT
 	last_fetched,
 	last_changed,
 	repo_size_bytes,
-	updated_at
+	updated_at,
+	corrupted_at,
+	corruption_logs
 FROM gitserver_repos
 WHERE repo_id = %s
 `
@ -350,6 +363,7 @@ func (s *gitserverRepoStore) GetByName(ctx context.Context, name api.RepoName) (
 		if err == sql.ErrNoRows {
 			return nil, &errGitserverRepoNotFound{}
 		}
+		return nil, err
 	}
 	return repo, nil
 }
@ -365,7 +379,9 @@ SELECT
 	gr.last_fetched,
 	gr.last_changed,
 	gr.repo_size_bytes,
-	gr.updated_at
+	gr.updated_at,
+	gr.corrupted_at,
+	gr.corruption_logs
 FROM gitserver_repos gr
 JOIN repo r ON r.id = gr.repo_id
 WHERE r.name = %s
@ -386,7 +402,9 @@ SELECT
 	gr.last_fetched,
 	gr.last_changed,
 	gr.repo_size_bytes,
-	gr.updated_at
+	gr.updated_at,
+	gr.corrupted_at,
+	gr.corruption_logs
 FROM gitserver_repos gr
 JOIN repo r on r.id = gr.repo_id
 WHERE r.name = ANY (%s)
@ -414,6 +432,7 @@ func (s *gitserverRepoStore) GetByNames(ctx context.Context, names ...api.RepoNa

 func scanGitserverRepo(scanner dbutil.Scanner) (*types.GitserverRepo, api.RepoName, error) {
 	var gr types.GitserverRepo
+	var rawLogs []byte
 	var cloneStatus string
 	var repoName api.RepoName
 	err := scanner.Scan(
@ -426,12 +445,18 @@ func scanGitserverRepo(scanner dbutil.Scanner) (*types.GitserverRepo, api.RepoNa
 		&gr.LastChanged,
 		&dbutil.NullInt64{N: &gr.RepoSizeBytes},
 		&gr.UpdatedAt,
+		&dbutil.NullTime{Time: &gr.CorruptedAt},
+		&rawLogs,
 	)
 	if err != nil {
 		return nil, "", errors.Wrap(err, "scanning GitserverRepo")
 	}
 	gr.CloneStatus = types.ParseCloneStatus(cloneStatus)

+	err = json.Unmarshal(rawLogs, &gr.CorruptionLogs)
+	if err != nil {
+		return nil, repoName, errors.Wrap(err, "unmarshal of corruption_logs failed")
+	}
 	return &gr, repoName, nil
 }

@ -439,6 +464,7 @@ func (s *gitserverRepoStore) SetCloneStatus(ctx context.Context, name api.RepoNa
 	err := s.Exec(ctx, sqlf.Sprintf(`
 UPDATE gitserver_repos
 SET
+	corrupted_at = NULL,
 	clone_status = %s,
 	shard_id = %s,
 	updated_at = NOW()
@ -495,6 +521,46 @@ WHERE
 	return nil
 }

+func (s *gitserverRepoStore) LogCorruption(ctx context.Context, name api.RepoName, reason string) error {
+	// trim reason to 1 MB so that we don't store huge reasons and run into trouble when it gets too large
+	if len(reason) > MaxReasonSizeInMB {
+		reason = reason[:MaxReasonSizeInMB]
+	}
+
+	log := types.RepoCorruptionLog{
+		Timestamp: time.Now(),
+		Reason:    reason,
+	}
+	var rawLog []byte
+	if data, err := json.Marshal(log); err != nil {
+		return errors.Wrap(err, "could not marshal corruption_logs")
+	} else {
+		rawLog = data
+	}
+
+	res, err := s.ExecResult(ctx, sqlf.Sprintf(`
+UPDATE gitserver_repos as gtr
+SET
+	corrupted_at = NOW(),
+	-- prepend the json and then ensure we only keep 10 items in the resulting json array
+	corruption_logs = (SELECT jsonb_path_query_array(%s||gtr.corruption_logs, '$[0 to 9]')),
+	updated_at = NOW()
+WHERE
+	repo_id = (SELECT id FROM repo WHERE name = %s)
+AND
+	corrupted_at IS NULL`, rawLog, name))
+	if err != nil {
+		return errors.Wrapf(err, "logging repo corruption")
+	}
+
+	if nrows, err := res.RowsAffected(); err != nil {
+		return errors.Wrapf(err, "getting rows affected")
+	} else if nrows != 1 {
+		return errors.New("repo not found or already corrupt")
+	}
+	return nil
+}
+
 // GitserverFetchData is the metadata associated with a fetch operation on
 // gitserver.
 type GitserverFetchData struct {
@ -510,6 +576,7 @@ func (s *gitserverRepoStore) SetLastFetched(ctx context.Context, name api.RepoNa
 	res, err := s.ExecResult(ctx, sqlf.Sprintf(`
 UPDATE gitserver_repos
 SET
+	corrupted_at = NULL,
 	last_fetched = %s,
 	last_changed = %s,
 	shard_id = %s,
--- a/internal/database/gitserver_repos_test.go
+++ b/internal/database/gitserver_repos_test.go
@ -375,7 +375,7 @@ func TestGitserverReposGetByID(t *testing.T) {
 		t.Fatal(err)
 	}

-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}
 }
@ -401,7 +401,7 @@ func TestGitserverReposGetByName(t *testing.T) {
 		t.Fatal(err)
 	}

-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}
 }
@ -441,7 +441,7 @@ func TestGitserverReposGetByNames(t *testing.T) {
 		sort.Slice(haveRepos, func(i, j int) bool {
 			return haveRepos[i].RepoID < haveRepos[j].RepoID
 		})
-		if diff := cmp.Diff(gitserverRepos[:i+1], haveRepos, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+		if diff := cmp.Diff(gitserverRepos[:i+1], haveRepos, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 			t.Fatal(diff)
 		}
 	}
@ -473,7 +473,7 @@ func TestSetCloneStatus(t *testing.T) {

 	gitserverRepo.CloneStatus = types.CloneStatusCloned
 	gitserverRepo.ShardID = shardID
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -500,7 +500,7 @@ func TestSetCloneStatus(t *testing.T) {
 		ShardID:     shardID,
 		CloneStatus: types.CloneStatusCloned,
 	}
-	if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -515,6 +515,160 @@ func TestSetCloneStatus(t *testing.T) {
 	}
 }

+func TestLogCorruption(t *testing.T) {
+	if testing.Short() {
+		t.Skip()
+	}
+
+	logger := logtest.Scoped(t)
+	db := NewDB(logger, dbtest.NewDB(logger, t))
+	ctx := context.Background()
+
+	t.Run("log repo corruption sets corrupted_at time", func(t *testing.T) {
+		repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{
+			Name:          "github.com/sourcegraph/repo1",
+			RepoSizeBytes: 100,
+			CloneStatus:   types.CloneStatusNotCloned,
+		})
+		logRepoCorruption(t, db, repo.Name, "test")
+
+		fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID)
+		if err != nil {
+			t.Fatalf("failed to get repo by id: %s", err)
+		}
+
+		if fromDB.CorruptedAt.IsZero() {
+			t.Errorf("Expected corruptedAt time to be set. Got zero value for time %q", fromDB.CorruptedAt)
+		}
+		// We should have one corruption log entry
+		if len(fromDB.CorruptionLogs) != 1 {
+			t.Errorf("Wanted 1 Corruption log entries,  got %d entries", len(fromDB.CorruptionLogs))
+		}
+		if fromDB.CorruptionLogs[0].Timestamp.IsZero() {
+			t.Errorf("Corruption Log entry expected to have non zero timestamp. Got %q", fromDB.CorruptionLogs[0])
+		}
+		if fromDB.CorruptionLogs[0].Reason != "test" {
+			t.Errorf("Wanted Corruption Log reason %q got %q", "test", fromDB.CorruptionLogs[0].Reason)
+		}
+	})
+	t.Run("setting clone status clears corruptedAt time", func(t *testing.T) {
+		repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{
+			Name:          "github.com/sourcegraph/repo2",
+			RepoSizeBytes: 100,
+			CloneStatus:   types.CloneStatusNotCloned,
+		})
+		logRepoCorruption(t, db, repo.Name, "test 2")
+
+		setGitserverRepoCloneStatus(t, db, repo.Name, types.CloneStatusCloned)
+
+		fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID)
+		if err != nil {
+			t.Fatalf("failed to get repo by id: %s", err)
+		}
+		if !fromDB.CorruptedAt.IsZero() {
+			t.Errorf("Setting clone status should set corrupt_at value to zero time value. Got non zero value for time %q", fromDB.CorruptedAt)
+		}
+	})
+	t.Run("setting last error does not clear corruptedAt time", func(t *testing.T) {
+		repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{
+			Name:          "github.com/sourcegraph/repo3",
+			RepoSizeBytes: 100,
+			CloneStatus:   types.CloneStatusNotCloned,
+		})
+		logRepoCorruption(t, db, repo.Name, "test 3")
+
+		setGitserverRepoLastChanged(t, db, repo.Name, time.Now())
+
+		fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID)
+		if err != nil {
+			t.Fatalf("failed to get repo by id: %s", err)
+		}
+		if !fromDB.CorruptedAt.IsZero() {
+			t.Errorf("Setting Last Changed should set corrupted at value to zero time value. Got non zero value for time %q", fromDB.CorruptedAt)
+		}
+	})
+	t.Run("setting clone status clears corruptedAt time", func(t *testing.T) {
+		repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{
+			Name:          "github.com/sourcegraph/repo4",
+			RepoSizeBytes: 100,
+			CloneStatus:   types.CloneStatusNotCloned,
+		})
+		logRepoCorruption(t, db, repo.Name, "test 3")
+
+		setGitserverRepoLastError(t, db, repo.Name, "This is a TEST ERAWR")
+
+		fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID)
+		if err != nil {
+			t.Fatalf("failed to get repo by id: %s", err)
+		}
+		if fromDB.CorruptedAt.IsZero() {
+			t.Errorf("Setting Last Error should not clear the corruptedAt value")
+		}
+	})
+	t.Run("consecutive corruption logs appends", func(t *testing.T) {
+		repo, gitserverRepo := createTestRepo(ctx, t, db, &createTestRepoPayload{
+			Name:          "github.com/sourcegraph/repo5",
+			RepoSizeBytes: 100,
+			CloneStatus:   types.CloneStatusNotCloned,
+		})
+		for i := 0; i < 12; i++ {
+			logRepoCorruption(t, db, repo.Name, fmt.Sprintf("test %d", i))
+			// We set the Clone status so that the 'corrupted_at' time gets cleared
+			// otherwise we cannot log corruption for a repo that is already corrupt
+			gitserverRepo.CloneStatus = types.CloneStatusCloned
+			gitserverRepo.CorruptedAt = time.Time{}
+			if err := db.GitserverRepos().Update(ctx, gitserverRepo); err != nil {
+				t.Fatal(err)
+			}
+
+		}
+
+		fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID)
+		if err != nil {
+			t.Fatalf("failed to retrieve repo from db: %s", err)
+		}
+
+		// We added 12 entries but we only keep 10
+		if len(fromDB.CorruptionLogs) != 10 {
+			t.Errorf("expected 10 corruption log entries but got %d", len(fromDB.CorruptionLogs))
+		}
+
+		// A log entry gets prepended to the json array, so:
+		// first entry = most recent log entry
+		// last entry = oldest log entry
+		// Our most recent log entry (idx 0!) should have "test 11" as the reason ie. the last element the loop
+		// that we added
+		wanted := "test 11"
+		if fromDB.CorruptionLogs[0].Reason != wanted {
+			t.Errorf("Wanted %q for last corruption log entry but got %q", wanted, fromDB.CorruptionLogs[9].Reason)
+		}
+
+	})
+	t.Run("large reason gets truncated", func(t *testing.T) {
+		repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{
+			Name:          "github.com/sourcegraph/repo6",
+			RepoSizeBytes: 100,
+			CloneStatus:   types.CloneStatusNotCloned,
+		})
+
+		largeReason := make([]byte, MaxReasonSizeInMB*2)
+		for i := 0; i < len(largeReason); i++ {
+			largeReason[i] = 'a'
+		}
+
+		logRepoCorruption(t, db, repo.Name, string(largeReason))
+
+		fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID)
+		if err != nil {
+			t.Fatalf("failed to retrieve repo from db: %s", err)
+		}
+
+		if len(fromDB.CorruptionLogs[0].Reason) == len(largeReason) {
+			t.Errorf("expected reason to be truncated - got length=%d, wanted=%d", len(fromDB.CorruptionLogs[0].Reason), MaxReasonSizeInMB)
+		}
+	})
+}
+
 func TestSetLastError(t *testing.T) {
 	if testing.Short() {
 		t.Skip()
@ -546,7 +700,7 @@ func TestSetLastError(t *testing.T) {
 	}

 	gitserverRepo.LastError = "oops"
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -563,7 +717,7 @@ func TestSetLastError(t *testing.T) {
 	}

 	gitserverRepo.LastError = emptyErr
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -623,7 +777,7 @@ func TestSetRepoSize(t *testing.T) {
 	gitserverRepo.RepoSizeBytes = 200
 	// If we have size, we can assume it's cloned
 	gitserverRepo.CloneStatus = types.CloneStatusCloned
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -654,7 +808,7 @@ func TestSetRepoSize(t *testing.T) {
 		// If we have size, we can assume it's cloned
 		CloneStatus: types.CloneStatusCloned,
 	}
-	if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged", "CloneStatus")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged", "CloneStatus", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -696,7 +850,7 @@ func TestGitserverRepo_Update(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -717,7 +871,7 @@ func TestGitserverRepo_Update(t *testing.T) {
 	// Set LastError to the expected error string but without the null character, because we expect
 	// our code to work and strip it before writing to the DB.
 	gitserverRepo.LastError = "Oops"
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -730,7 +884,7 @@ func TestGitserverRepo_Update(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}
 }
@ -769,7 +923,7 @@ func TestGitserverRepoUpdateMany(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		if diff := cmp.Diff(gitserverRepo1, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+		if diff := cmp.Diff(gitserverRepo1, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 			t.Fatal(diff)
 		}
 	})
@ -778,7 +932,7 @@ func TestGitserverRepoUpdateMany(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+		if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 			t.Fatal(diff)
 		}
 	})
@ -826,7 +980,7 @@ func TestGitserverRepoListReposWithoutSize(t *testing.T) {
 		t.Fatal(err)
 	}

-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

@ -851,7 +1005,7 @@ func TestGitserverRepoListReposWithoutSize(t *testing.T) {
 	}

 	// Check that nothing except UpdatedAt and RepoSizeBytes has been changed
-	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "RepoSizeBytes")); diff != "" {
+	if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "RepoSizeBytes", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}
 }
@ -907,7 +1061,7 @@ func TestGitserverUpdateRepoSizes(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		if diff := cmp.Diff(repo, reloaded, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" {
+		if diff := cmp.Diff(repo, reloaded, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" {
 			t.Fatal(diff)
 		}
 		// Separately make sure UpdatedAt has changed, though
@ -974,10 +1128,11 @@ func createTestRepo(ctx context.Context, t *testing.T, db DB, payload *createTes
 	}

 	want := &types.GitserverRepo{
-		RepoID:      repo.ID,
-		CloneStatus: types.CloneStatusNotCloned,
+		RepoID:         repo.ID,
+		CloneStatus:    types.CloneStatusNotCloned,
+		CorruptionLogs: []types.RepoCorruptionLog{},
 	}
-	if diff := cmp.Diff(want, gitserverRepo, cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "UpdatedAt")); diff != "" {
+	if diff := cmp.Diff(want, gitserverRepo, cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "UpdatedAt", "CorruptionLogs")); diff != "" {
 		t.Fatal(diff)
 	}

--- a/internal/database/mocks_temp.go
+++ b/internal/database/mocks_temp.go
@ -23132,6 +23132,9 @@ type MockGitserverRepoStore struct {
 	// ListReposWithoutSizeFunc is an instance of a mock function object
 	// controlling the behavior of the method ListReposWithoutSize.
 	ListReposWithoutSizeFunc *GitserverRepoStoreListReposWithoutSizeFunc
+	// LogCorruptionFunc is an instance of a mock function object
+	// controlling the behavior of the method LogCorruption.
+	LogCorruptionFunc *GitserverRepoStoreLogCorruptionFunc
 	// SetCloneStatusFunc is an instance of a mock function object
 	// controlling the behavior of the method SetCloneStatus.
 	SetCloneStatusFunc *GitserverRepoStoreSetCloneStatusFunc
@ -23204,6 +23207,11 @@ func NewMockGitserverRepoStore() *MockGitserverRepoStore {
 				return
 			},
 		},
+		LogCorruptionFunc: &GitserverRepoStoreLogCorruptionFunc{
+			defaultHook: func(context.Context, api.RepoName, string) (r0 error) {
+				return
+			},
+		},
 		SetCloneStatusFunc: &GitserverRepoStoreSetCloneStatusFunc{
 			defaultHook: func(context.Context, api.RepoName, types.CloneStatus, string) (r0 error) {
 				return
@ -23292,6 +23300,11 @@ func NewStrictMockGitserverRepoStore() *MockGitserverRepoStore {
 				panic("unexpected invocation of MockGitserverRepoStore.ListReposWithoutSize")
 			},
 		},
+		LogCorruptionFunc: &GitserverRepoStoreLogCorruptionFunc{
+			defaultHook: func(context.Context, api.RepoName, string) error {
+				panic("unexpected invocation of MockGitserverRepoStore.LogCorruption")
+			},
+		},
 		SetCloneStatusFunc: &GitserverRepoStoreSetCloneStatusFunc{
 			defaultHook: func(context.Context, api.RepoName, types.CloneStatus, string) error {
 				panic("unexpected invocation of MockGitserverRepoStore.SetCloneStatus")
@ -23364,6 +23377,9 @@ func NewMockGitserverRepoStoreFrom(i GitserverRepoStore) *MockGitserverRepoStore
 		ListReposWithoutSizeFunc: &GitserverRepoStoreListReposWithoutSizeFunc{
 			defaultHook: i.ListReposWithoutSize,
 		},
+		LogCorruptionFunc: &GitserverRepoStoreLogCorruptionFunc{
+			defaultHook: i.LogCorruption,
+		},
 		SetCloneStatusFunc: &GitserverRepoStoreSetCloneStatusFunc{
 			defaultHook: i.SetCloneStatus,
 		},
@ -24267,6 +24283,117 @@ func (c GitserverRepoStoreListReposWithoutSizeFuncCall) Results() []interface{}
 	return []interface{}{c.Result0, c.Result1}
 }

+// GitserverRepoStoreLogCorruptionFunc describes the behavior when the
+// LogCorruption method of the parent MockGitserverRepoStore instance is
+// invoked.
+type GitserverRepoStoreLogCorruptionFunc struct {
+	defaultHook func(context.Context, api.RepoName, string) error
+	hooks       []func(context.Context, api.RepoName, string) error
+	history     []GitserverRepoStoreLogCorruptionFuncCall
+	mutex       sync.Mutex
+}
+
+// LogCorruption delegates to the next hook function in the queue and stores
+// the parameter and result values of this invocation.
+func (m *MockGitserverRepoStore) LogCorruption(v0 context.Context, v1 api.RepoName, v2 string) error {
+	r0 := m.LogCorruptionFunc.nextHook()(v0, v1, v2)
+	m.LogCorruptionFunc.appendCall(GitserverRepoStoreLogCorruptionFuncCall{v0, v1, v2, r0})
+	return r0
+}
+
+// SetDefaultHook sets function that is called when the LogCorruption method
+// of the parent MockGitserverRepoStore instance is invoked and the hook
+// queue is empty.
+func (f *GitserverRepoStoreLogCorruptionFunc) SetDefaultHook(hook func(context.Context, api.RepoName, string) error) {
+	f.defaultHook = hook
+}
+
+// PushHook adds a function to the end of hook queue. Each invocation of the
+// LogCorruption method of the parent MockGitserverRepoStore instance
+// invokes the hook at the front of the queue and discards it. After the
+// queue is empty, the default hook function is invoked for any future
+// action.
+func (f *GitserverRepoStoreLogCorruptionFunc) PushHook(hook func(context.Context, api.RepoName, string) error) {
+	f.mutex.Lock()
+	f.hooks = append(f.hooks, hook)
+	f.mutex.Unlock()
+}
+
+// SetDefaultReturn calls SetDefaultHook with a function that returns the
+// given values.
+func (f *GitserverRepoStoreLogCorruptionFunc) SetDefaultReturn(r0 error) {
+	f.SetDefaultHook(func(context.Context, api.RepoName, string) error {
+		return r0
+	})
+}
+
+// PushReturn calls PushHook with a function that returns the given values.
+func (f *GitserverRepoStoreLogCorruptionFunc) PushReturn(r0 error) {
+	f.PushHook(func(context.Context, api.RepoName, string) error {
+		return r0
+	})
+}
+
+func (f *GitserverRepoStoreLogCorruptionFunc) nextHook() func(context.Context, api.RepoName, string) error {
+	f.mutex.Lock()
+	defer f.mutex.Unlock()
+
+	if len(f.hooks) == 0 {
+		return f.defaultHook
+	}
+
+	hook := f.hooks[0]
+	f.hooks = f.hooks[1:]
+	return hook
+}
+
+func (f *GitserverRepoStoreLogCorruptionFunc) appendCall(r0 GitserverRepoStoreLogCorruptionFuncCall) {
+	f.mutex.Lock()
+	f.history = append(f.history, r0)
+	f.mutex.Unlock()
+}
+
+// History returns a sequence of GitserverRepoStoreLogCorruptionFuncCall
+// objects describing the invocations of this function.
+func (f *GitserverRepoStoreLogCorruptionFunc) History() []GitserverRepoStoreLogCorruptionFuncCall {
+	f.mutex.Lock()
+	history := make([]GitserverRepoStoreLogCorruptionFuncCall, len(f.history))
+	copy(history, f.history)
+	f.mutex.Unlock()
+
+	return history
+}
+
+// GitserverRepoStoreLogCorruptionFuncCall is an object that describes an
+// invocation of method LogCorruption on an instance of
+// MockGitserverRepoStore.
+type GitserverRepoStoreLogCorruptionFuncCall struct {
+	// Arg0 is the value of the 1st argument passed to this method
+	// invocation.
+	Arg0 context.Context
+	// Arg1 is the value of the 2nd argument passed to this method
+	// invocation.
+	Arg1 api.RepoName
+	// Arg2 is the value of the 3rd argument passed to this method
+	// invocation.
+	Arg2 string
+	// Result0 is the value of the 1st result returned from this method
+	// invocation.
+	Result0 error
+}
+
+// Args returns an interface slice containing the arguments of this
+// invocation.
+func (c GitserverRepoStoreLogCorruptionFuncCall) Args() []interface{} {
+	return []interface{}{c.Arg0, c.Arg1, c.Arg2}
+}
+
+// Results returns an interface slice containing the results of this
+// invocation.
+func (c GitserverRepoStoreLogCorruptionFuncCall) Results() []interface{} {
+	return []interface{}{c.Result0}
+}
+
 // GitserverRepoStoreSetCloneStatusFunc describes the behavior when the
 // SetCloneStatus method of the parent MockGitserverRepoStore instance is
 // invoked.
--- a/internal/database/repos_test.go
+++ b/internal/database/repos_test.go
@ -119,6 +119,15 @@ func setGitserverRepoLastError(t *testing.T, db DB, name api.RepoName, msg strin
 	}
 }

+func logRepoCorruption(t *testing.T, db DB, name api.RepoName, logOutput string) {
+	t.Helper()
+
+	err := db.GitserverRepos().LogCorruption(context.Background(), name, logOutput)
+	if err != nil {
+		t.Fatalf("failed to log repo corruption: %s", err)
+	}
+}
+
 func setZoektIndexed(t *testing.T, db DB, name api.RepoName) {
 	t.Helper()
 	ctx := context.Background()
--- a/internal/database/schema.json
+++ b/internal/database/schema.json
@ -10686,6 +10686,32 @@
          "GenerationExpression": "",
          "Comment": ""
        },
+        {
+          "Name": "corrupted_at",
+          "Index": 10,
+          "TypeName": "timestamp with time zone",
+          "IsNullable": true,
+          "Default": "",
+          "CharacterMaximumLength": 0,
+          "IsIdentity": false,
+          "IdentityGeneration": "",
+          "IsGenerated": "NEVER",
+          "GenerationExpression": "",
+          "Comment": "Timestamp of when repo corruption was detected"
+        },
+        {
+          "Name": "corruption_logs",
+          "Index": 11,
+          "TypeName": "jsonb",
+          "IsNullable": false,
+          "Default": "'[]'::jsonb",
+          "CharacterMaximumLength": 0,
+          "IsIdentity": false,
+          "IdentityGeneration": "",
+          "IsGenerated": "NEVER",
+          "GenerationExpression": "",
+          "Comment": "Log output of repo corruptions that have been detected - encoded as json"
+        },
        {
          "Name": "last_changed",
          "Index": 7,
--- a/internal/database/schema.md
+++ b/internal/database/schema.md
@ -1477,6 +1477,8 @@ Indexes:
 last_fetched    | timestamp with time zone |           | not null | now()
 last_changed    | timestamp with time zone |           | not null | now()
 repo_size_bytes | bigint                   |           |          | 
+ corrupted_at    | timestamp with time zone |           |          | 
+ corruption_logs | jsonb                    |           | not null | '[]'::jsonb
 Indexes:
    "gitserver_repos_pkey" PRIMARY KEY, btree (repo_id)
    "gitserver_repo_size_bytes" btree (repo_size_bytes)
@ -1496,6 +1498,10 @@ Triggers:

 ```

+**corrupted_at**: Timestamp of when repo corruption was detected
+
+**corruption_logs**: Log output of repo corruptions that have been detected - encoded as json
+
 # Table "public.gitserver_repos_statistics"
 ```
    Column    |  Type  | Collation | Nullable | Default 
--- a/internal/types/types.go
+++ b/internal/types/types.go
@ -4,6 +4,7 @@ package types
 import (
 	"context"
 	"database/sql"
+	"encoding/json"
 	"fmt"
 	"reflect"
 	"sort"
@ -557,7 +558,26 @@ type GitserverRepo struct {
 	LastChanged time.Time
 	// Size of the repository in bytes.
 	RepoSizeBytes int64
-	UpdatedAt     time.Time
+	// Time when corruption of repo was detected
+	CorruptedAt time.Time
+	UpdatedAt   time.Time
+	// A log of the different types of corruption that was detected on this repo. The order of the log entries are
+	// stored from most recent to least recent and capped at 10 entries. See LogCorruption on Gitserverrepo store.
+	CorruptionLogs []RepoCorruptionLog
+}
+
+// RepoCorruptionLog represents a corruption event that has been detected on a repo.
+type RepoCorruptionLog struct {
+	// When the corruption event was detected
+	Timestamp time.Time `json:"time"`
+	// Why the repo is considered to be corrupt. Can be git output stderr output or a short reason like "missing head"
+	Reason string `json:"reason"`
+}
+
+func UnmarshalCorruptionLog(data []byte) ([]RepoCorruptionLog, error) {
+	var logs []RepoCorruptionLog
+	err := json.Unmarshal(data, &logs)
+	return logs, err
 }

 // ExternalService is a connection to an external service.
--- a/migrations/frontend/1670934184_add_gitserver_corruption_columns/down.sql
+++ b/migrations/frontend/1670934184_add_gitserver_corruption_columns/down.sql
@ -0,0 +1,4 @@
+ALTER TABLE gitserver_repos
+    DROP COLUMN IF EXISTS corrupted_at,
+    DROP COLUMN IF EXISTS corrupted_logs;
+
--- a/migrations/frontend/1670934184_add_gitserver_corruption_columns/metadata.yaml
+++ b/migrations/frontend/1670934184_add_gitserver_corruption_columns/metadata.yaml
@ -0,0 +1,2 @@
+name: add_gitserver_corruption_columns
+parents: [1670350006, 1670543231]
--- a/migrations/frontend/1670934184_add_gitserver_corruption_columns/up.sql
+++ b/migrations/frontend/1670934184_add_gitserver_corruption_columns/up.sql
@ -0,0 +1,6 @@
+ALTER TABLE gitserver_repos
+    ADD COLUMN IF NOT EXISTS corrupted_at TIMESTAMP WITH TIME ZONE,
+    ADD COLUMN IF NOT EXISTS corruption_logs JSONB NOT NULL DEFAULT '[]';
+
+COMMENT ON COLUMN gitserver_repos.corrupted_at IS 'Timestamp of when repo corruption was detected';
+COMMENT ON COLUMN gitserver_repos.corruption_logs IS 'Log output of repo corruptions that have been detected - encoded as json';
--- a/migrations/frontend/squashed.sql
+++ b/migrations/frontend/squashed.sql
@ -2178,9 +2178,15 @@ CREATE TABLE gitserver_repos (
    updated_at timestamp with time zone DEFAULT now() NOT NULL,
    last_fetched timestamp with time zone DEFAULT now() NOT NULL,
    last_changed timestamp with time zone DEFAULT now() NOT NULL,
-    repo_size_bytes bigint
+    repo_size_bytes bigint,
+    corrupted_at timestamp with time zone,
+    corruption_logs jsonb DEFAULT '[]'::jsonb NOT NULL
 );

+COMMENT ON COLUMN gitserver_repos.corrupted_at IS 'Timestamp of when repo corruption was detected';
+
+COMMENT ON COLUMN gitserver_repos.corruption_logs IS 'Log output of repo corruptions that have been detected - encoded as json';
+
 CREATE TABLE gitserver_repos_statistics (
    shard_id text NOT NULL,
    total bigint DEFAULT 0 NOT NULL,