From 7dbfb87a11d89c01dcb2a5c28aedd7872851809e Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Mon, 9 Jan 2023 17:24:32 +0200 Subject: [PATCH] repos: log corruption events per repo (#45667) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * return a bool for checkMaybeCorrupt * store plumbing for SetCorruptedAt * debug * move to markIfCorrupted + test * add additional checks for repo corruption * add test cases for server repo corruption detection * add corrupted_at field for sql mapping * fix err not being returned * ensure corrupted at gets mapped * test cases to ensure corrupted at gets cleared * add migration * update types for CorrruptedAt * fix sql query and remove field for now * run go generate * Update internal/database/gitserver_repos_test.go Co-authored-by: Erik Seliger * Update internal/database/gitserver_repos_test.go Co-authored-by: Erik Seliger * Update cmd/gitserver/server/cleanup.go Co-authored-by: Erik Seliger * review comments * review feedback - check for err - remove reason - set default timestamp to 0 value * tests pass 🎉 * reset stiched migration graph * go gen magic * move docstring * check for err in corruption check * fix tests 🤞 * add corruption_lod column and rename migration * fix migration scripts * go gen * change corruption_log column type to jsonb * update db mocks * add logCorruption method - log repo corruption into a JSON array - limit repo corruption log to 10 elements - db tests * fix wording and remove test * add CorruptionLog to ignored fields * add more ignores * remove comment * review feedback * log git corrupt stderr * cap corruption reason to 1mb * rename CorruptionLog -> CorruptionLogs * rename corruption_log -> corruption_logs * Update internal/database/gitserver_repos_test.go Co-authored-by: Thorsten Ball * Fixes Fix rename in cmp IgnoreFields Fix malformed SQL * fix test * add comment to trigger sonar cloud analysis * review comments append to json array in postgres * add comment about json query * comments and fixes * Update cmd/gitserver/server/server_test.go Co-authored-by: Thorsten Ball * Fix indentation * review feedback - Make corrupted_at column nullable - Check err returned from LogCorruption call - Add condition to make sure we only log more repo corruptions if there repo isn't currently corrupt. - Run sg generate * Fix test - update corrupted_at also when using GitserverRepo store update - clear corrupted_at status during batch sync update * Update cmd/gitserver/server/server_test.go Co-authored-by: Thorsten Ball * store plumbing for SetCorruptedAt * debug * move to markIfCorrupted + test * add additional checks for repo corruption * add test cases for server repo corruption detection * add migration * update types for CorrruptedAt * fix sql query and remove field for now * review comments * review feedback - check for err - remove reason - set default timestamp to 0 value * reset stiched migration graph * add corruption_lod column and rename migration * go gen * add logCorruption method - log repo corruption into a JSON array - limit repo corruption log to 10 elements - db tests * fix wording and remove test * Fix more indentation 🤦🏻 * review feedback: update test * run sg generate again * test fixes - warn instead of return an error during cleanup - delete repos so that the name can be reused * fix bug - repo was corrupt but did not set the reason so the repo never got cleaned up * review feedback - sql query indentation * review feedback - use assert module * do not clear corruption status on err * add test for LastError and corruptedAt - fix SQL issue on LastError Co-authored-by: Erik Seliger Co-authored-by: Thorsten Ball --- cmd/gitserver/server/cleanup.go | 60 ++++-- cmd/gitserver/server/server.go | 14 +- cmd/gitserver/server/server_test.go | 78 ++++++- internal/database/gitserver_repos.go | 81 +++++++- internal/database/gitserver_repos_test.go | 195 ++++++++++++++++-- internal/database/mocks_temp.go | 127 ++++++++++++ internal/database/repos_test.go | 9 + internal/database/schema.json | 26 +++ internal/database/schema.md | 6 + internal/types/types.go | 22 +- .../down.sql | 4 + .../metadata.yaml | 2 + .../up.sql | 6 + migrations/frontend/squashed.sql | 8 +- 14 files changed, 581 insertions(+), 57 deletions(-) create mode 100644 migrations/frontend/1670934184_add_gitserver_corruption_columns/down.sql create mode 100644 migrations/frontend/1670934184_add_gitserver_corruption_columns/metadata.yaml create mode 100644 migrations/frontend/1670934184_add_gitserver_corruption_columns/up.sql diff --git a/cmd/gitserver/server/cleanup.go b/cmd/gitserver/server/cleanup.go index 48f421b7ef0..08253f8df23 100644 --- a/cmd/gitserver/server/cleanup.go +++ b/cmd/gitserver/server/cleanup.go @@ -255,28 +255,15 @@ func (s *Server) cleanupRepos(ctx context.Context, gitServerAddrs gitserver.GitS } maybeRemoveCorrupt := func(dir GitDir) (done bool, _ error) { - var reason string - - // We treat repositories missing HEAD to be corrupt. Both our cloning - // and fetching ensure there is a HEAD file. - if _, err := os.Stat(dir.Path("HEAD")); os.IsNotExist(err) { - reason = "missing-head" - } else if err != nil { + corrupt, reason, err := checkRepoDirCorrupt(dir) + if !corrupt || err != nil { return false, err } - // We have seen repository corruption fail in such a way that the git - // config is missing the bare repo option but everything else looks - // like it works. This leads to failing fetches, so treat non-bare - // repos as corrupt. Since we often fetch with ensureRevision, this - // leads to most commands failing against the repository. It is safer - // to remove now than try a safe reclone. - if reason == "" && gitIsNonBareBestEffort(dir) { - reason = "non-bare" - } - - if reason == "" { - return false, nil + err = s.DB.GitserverRepos().LogCorruption(ctx, s.name(dir), fmt.Sprintf("sourcegraph detected corrupt repo: %s", reason)) + if err != nil { + repoName := string(s.name(dir)) + logger.Warn("failed to log repo corruption", log.String("repo", repoName), log.Error(err)) } s.Logger.Info("removing corrupt repo", log.String("repo", string(dir)), log.String("reason", reason)) @@ -336,7 +323,13 @@ func (s *Server) cleanupRepos(ctx context.Context, gitServerAddrs gitserver.GitS var reason string const maybeCorrupt = "maybeCorrupt" if maybeCorrupt, _ := gitConfigGet(dir, gitConfigMaybeCorrupt); maybeCorrupt != "" { + // Set the reason so that the repo cleaned up reason = maybeCorrupt + // We don't log the corruption here, since the corruption *should* have already been + // logged when this config setting was set in the repo. + // When the repo is recloned, the corrupted_at status should be cleared, which means + // the repo is not considered corrupted anymore. + // // unset flag to stop constantly re-cloning if it fails. _ = gitConfigUnset(dir, gitConfigMaybeCorrupt) } @@ -571,6 +564,28 @@ func (s *Server) cleanupRepos(ctx context.Context, gitServerAddrs gitserver.GitS } } +func checkRepoDirCorrupt(dir GitDir) (bool, string, error) { + // We treat repositories missing HEAD to be corrupt. Both our cloning + // and fetching ensure there is a HEAD file. + if _, err := os.Stat(dir.Path("HEAD")); os.IsNotExist(err) { + return true, "missing-head", nil + } else if err != nil { + return false, "", err + } + + // We have seen repository corruption fail in such a way that the git + // config is missing the bare repo option but everything else looks + // like it works. This leads to failing fetches, so treat non-bare + // repos as corrupt. Since we often fetch with ensureRevision, this + // leads to most commands failing against the repository. It is safer + // to remove now than try a safe reclone. + if gitIsNonBareBestEffort(dir) { + return true, "non-bare", nil + } + + return false, "", nil +} + // setRepoSizes uses calculated sizes of repos to update database entries of repos // with actual sizes, but only up to 10,000 in one run. func (s *Server) setRepoSizes(ctx context.Context, repoToSize map[api.RepoName]int64) error { @@ -1027,13 +1042,12 @@ func getRecloneTime(dir GitDir) (time.Time, error) { return time.Unix(sec, 0), nil } -func checkMaybeCorruptRepo(logger log.Logger, repo api.RepoName, dir GitDir, stderr string) { +func checkMaybeCorruptRepo(logger log.Logger, repo api.RepoName, dir GitDir, stderr string) bool { if !stdErrIndicatesCorruption(stderr) { - return + return false } logger = logger.With(log.String("repo", string(repo)), log.String("dir", string(dir))) - logger.Warn("marking repo for re-cloning due to stderr output indicating repo corruption", log.String("stderr", stderr)) @@ -1043,6 +1057,8 @@ func checkMaybeCorruptRepo(logger log.Logger, repo api.RepoName, dir GitDir, std if err != nil { logger.Error("failed to set maybeCorruptRepo config", log.Error(err)) } + + return true } // stdErrIndicatesCorruption returns true if the provided stderr output from a git command indicates diff --git a/cmd/gitserver/server/server.go b/cmd/gitserver/server/server.go index 3e7dedae58d..5de038cadd7 100644 --- a/cmd/gitserver/server/server.go +++ b/cmd/gitserver/server/server.go @@ -839,6 +839,9 @@ func (s *Server) syncRepoState(gitServerAddrs gitserver.GitServerAddresses, batc cloneStatus := cloneStatus(cloned, cloning) if repo.CloneStatus != cloneStatus { repo.CloneStatus = cloneStatus + // Since the repo has been recloned or is being cloned + // we can reset the corruption + repo.CorruptedAt = time.Time{} shouldUpdate = true } @@ -1771,7 +1774,7 @@ func (s *Server) exec(w http.ResponseWriter, r *http.Request, req *protocol.Exec stderrN = stderrW.n stderr := stderrBuf.String() - checkMaybeCorruptRepo(s.Logger, req.Repo, dir, stderr) + s.logIfCorrupt(ctx, req.Repo, dir, stderr) // write trailer w.Header().Set("X-Exec-Error", errorString(execErr)) @@ -1997,6 +2000,15 @@ func (s *Server) setRepoSize(ctx context.Context, name api.RepoName) error { return s.DB.GitserverRepos().SetRepoSize(ctx, name, dirSize(s.dir(name).Path(".")), s.Hostname) } +func (s *Server) logIfCorrupt(ctx context.Context, repo api.RepoName, dir GitDir, stderr string) { + if checkMaybeCorruptRepo(s.Logger, repo, dir, stderr) { + reason := stderr + if err := s.DB.GitserverRepos().LogCorruption(ctx, repo, reason); err != nil { + s.Logger.Warn("failed to log repo corruption", log.String("repo", string(repo)), log.Error(err)) + } + } +} + // setGitAttributes writes our global gitattributes to // gitDir/info/attributes. This will override .gitattributes inside of // repositories. It is used to unset attributes such as export-ignore. diff --git a/cmd/gitserver/server/server_test.go b/cmd/gitserver/server/server_test.go index 456a76ef138..460656026ed 100644 --- a/cmd/gitserver/server/server_test.go +++ b/cmd/gitserver/server/server_test.go @@ -815,7 +815,7 @@ func testHandleRepoDelete(t *testing.T, deletedInDB bool) { t.Fatal(err) } - cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt") + cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs") // We don't expect an error if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" { @@ -864,7 +864,7 @@ func testHandleRepoDelete(t *testing.T, deletedInDB bool) { t.Fatal(err) } - cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt") + cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs") // We don't expect an error if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" { @@ -938,7 +938,7 @@ func TestHandleRepoUpdate(t *testing.T) { } // We don't care exactly what the error is here - cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "LastError") + cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "LastError", "CorruptionLogs") // But we do care that it exists if fromDB.LastError == "" { t.Errorf("Expected an error when trying to clone from an invalid URL") @@ -967,7 +967,7 @@ func TestHandleRepoUpdate(t *testing.T) { t.Fatal(err) } - cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt") + cmpIgnored = cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs") // We don't expect an error if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" { @@ -1109,7 +1109,7 @@ func TestHandleRepoUpdateFromShard(t *testing.T) { t.Fatal(err) } - cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt") + cmpIgnored := cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "RepoSizeBytes", "UpdatedAt", "CorruptionLogs") // We don't expect an error if diff := cmp.Diff(want, fromDB, cmpIgnored); diff != "" { @@ -1272,6 +1272,7 @@ func TestCloneRepo_EnsureValidity(t *testing.T) { t.Fatalf("expected no error, got %v", err) } + // wait for repo to be cloned dst := s.dir("example.com/foo/bar") for i := 0; i < 1000; i++ { _, cloning := s.locker.Status(dst) @@ -1733,6 +1734,73 @@ func TestHeaderXRequestedWithMiddleware(t *testing.T) { }) } +func TestLogIfCorrupt(t *testing.T) { + logger := logtest.Scoped(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + db := database.NewDB(logger, dbtest.NewDB(logger, t)) + remoteDir := t.TempDir() + + reposDir := t.TempDir() + hostname := "test" + + repoName := api.RepoName("example.com/bar/foo") + s := makeTestServer(ctx, t, reposDir, remoteDir, db) + s.Hostname = hostname + + t.Run("git corruption output creates corruption log", func(t *testing.T) { + dbRepo := &types.Repo{ + Name: repoName, + URI: string(repoName), + Description: "Test", + } + + // Insert the repo into our database + err := db.Repos().Create(ctx, dbRepo) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + db.Repos().Delete(ctx, dbRepo.ID) + }) + + stdErr := "error: packfile .git/objects/pack/pack-e26c1fc0add58b7649a95f3e901e30f29395e174.pack does not match index" + + s.logIfCorrupt(ctx, repoName, s.dir(repoName), stdErr) + + fromDB, err := s.DB.GitserverRepos().GetByName(ctx, repoName) + assert.NoError(t, err) + assert.Len(t, fromDB.CorruptionLogs, 1) + assert.Contains(t, fromDB.CorruptionLogs[0].Reason, stdErr) + }) + + t.Run("non corruption output does not create corruption log", func(t *testing.T) { + dbRepo := &types.Repo{ + Name: repoName, + URI: string(repoName), + Description: "Test", + } + + // Insert the repo into our database + err := db.Repos().Create(ctx, dbRepo) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + db.Repos().Delete(ctx, dbRepo.ID) + }) + + stdErr := "Brought to you by Horsegraph" + + s.logIfCorrupt(ctx, repoName, s.dir(repoName), stdErr) + + fromDB, err := s.DB.GitserverRepos().GetByName(ctx, repoName) + assert.NoError(t, err) + assert.Len(t, fromDB.CorruptionLogs, 0) + }) +} + func mustEncodeJSONResponse(value any) string { encoded, _ := json.Marshal(value) return strings.TrimSpace(string(encoded)) diff --git a/internal/database/gitserver_repos.go b/internal/database/gitserver_repos.go index 8b689feba62..8ce134198b5 100644 --- a/internal/database/gitserver_repos.go +++ b/internal/database/gitserver_repos.go @@ -3,6 +3,7 @@ package database import ( "context" "database/sql" + "encoding/json" "fmt" "strings" "time" @@ -33,6 +34,9 @@ type GitserverRepoStore interface { GetByID(ctx context.Context, id api.RepoID) (*types.GitserverRepo, error) GetByName(ctx context.Context, name api.RepoName) (*types.GitserverRepo, error) GetByNames(ctx context.Context, names ...api.RepoName) (map[api.RepoName]*types.GitserverRepo, error) + // LogCorruption sets the corrupted at value and logs the corruption reason. Reason will be truncated if it exceeds + // MaxReasonSizeInMB + LogCorruption(ctx context.Context, name api.RepoName, reason string) error // SetCloneStatus will attempt to update ONLY the clone status of a // GitServerRepo. If a matching row does not yet exist a new one will be created. // If the status value hasn't changed, the row will not be updated. @@ -64,6 +68,9 @@ type GitserverRepoStore interface { var _ GitserverRepoStore = (*gitserverRepoStore)(nil) +// Max reason size megabyte - 1 MB +const MaxReasonSizeInMB = 1 << 20 + // gitserverRepoStore is responsible for data stored in the gitserver_repos table. type gitserverRepoStore struct { *basestore.Store @@ -87,13 +94,14 @@ func (s *gitserverRepoStore) Transact(ctx context.Context) (GitserverRepoStore, func (s *gitserverRepoStore) Update(ctx context.Context, repos ...*types.GitserverRepo) error { values := make([]*sqlf.Query, 0, len(repos)) for _, gr := range repos { - values = append(values, sqlf.Sprintf("(%s::integer, %s::text, %s::text, %s::text, %s::timestamp with time zone, %s::timestamp with time zone, %s::bigint, NOW())", + values = append(values, sqlf.Sprintf("(%s::integer, %s::text, %s::text, %s::text, %s::timestamp with time zone, %s::timestamp with time zone, %s::timestamp with time zone, %s::bigint, NOW())", gr.RepoID, gr.CloneStatus, gr.ShardID, dbutil.NewNullString(sanitizeToUTF8(gr.LastError)), gr.LastFetched, gr.LastChanged, + dbutil.NullTimeColumn(gr.CorruptedAt), &dbutil.NullInt64{N: &gr.RepoSizeBytes}, )) } @@ -111,12 +119,13 @@ SET last_error = tmp.last_error, last_fetched = tmp.last_fetched, last_changed = tmp.last_changed, + corrupted_at = tmp.corrupted_at, repo_size_bytes = tmp.repo_size_bytes, updated_at = NOW() FROM (VALUES - -- (, , , , , , ), + -- (, , , , , , , ), %s - ) AS tmp(repo_id, clone_status, shard_id, last_error, last_fetched, last_changed, repo_size_bytes) + ) AS tmp(repo_id, clone_status, shard_id, last_error, last_fetched, last_changed, corrupted_at, repo_size_bytes) WHERE tmp.repo_id = gr.repo_id ` @@ -309,7 +318,9 @@ SELECT gr.last_fetched, gr.last_changed, gr.repo_size_bytes, - gr.updated_at + gr.updated_at, + gr.corrupted_at, + gr.corruption_logs FROM gitserver_repos gr JOIN repo ON gr.repo_id = repo.id WHERE %s @@ -339,7 +350,9 @@ SELECT last_fetched, last_changed, repo_size_bytes, - updated_at + updated_at, + corrupted_at, + corruption_logs FROM gitserver_repos WHERE repo_id = %s ` @@ -350,6 +363,7 @@ func (s *gitserverRepoStore) GetByName(ctx context.Context, name api.RepoName) ( if err == sql.ErrNoRows { return nil, &errGitserverRepoNotFound{} } + return nil, err } return repo, nil } @@ -365,7 +379,9 @@ SELECT gr.last_fetched, gr.last_changed, gr.repo_size_bytes, - gr.updated_at + gr.updated_at, + gr.corrupted_at, + gr.corruption_logs FROM gitserver_repos gr JOIN repo r ON r.id = gr.repo_id WHERE r.name = %s @@ -386,7 +402,9 @@ SELECT gr.last_fetched, gr.last_changed, gr.repo_size_bytes, - gr.updated_at + gr.updated_at, + gr.corrupted_at, + gr.corruption_logs FROM gitserver_repos gr JOIN repo r on r.id = gr.repo_id WHERE r.name = ANY (%s) @@ -414,6 +432,7 @@ func (s *gitserverRepoStore) GetByNames(ctx context.Context, names ...api.RepoNa func scanGitserverRepo(scanner dbutil.Scanner) (*types.GitserverRepo, api.RepoName, error) { var gr types.GitserverRepo + var rawLogs []byte var cloneStatus string var repoName api.RepoName err := scanner.Scan( @@ -426,12 +445,18 @@ func scanGitserverRepo(scanner dbutil.Scanner) (*types.GitserverRepo, api.RepoNa &gr.LastChanged, &dbutil.NullInt64{N: &gr.RepoSizeBytes}, &gr.UpdatedAt, + &dbutil.NullTime{Time: &gr.CorruptedAt}, + &rawLogs, ) if err != nil { return nil, "", errors.Wrap(err, "scanning GitserverRepo") } gr.CloneStatus = types.ParseCloneStatus(cloneStatus) + err = json.Unmarshal(rawLogs, &gr.CorruptionLogs) + if err != nil { + return nil, repoName, errors.Wrap(err, "unmarshal of corruption_logs failed") + } return &gr, repoName, nil } @@ -439,6 +464,7 @@ func (s *gitserverRepoStore) SetCloneStatus(ctx context.Context, name api.RepoNa err := s.Exec(ctx, sqlf.Sprintf(` UPDATE gitserver_repos SET + corrupted_at = NULL, clone_status = %s, shard_id = %s, updated_at = NOW() @@ -495,6 +521,46 @@ WHERE return nil } +func (s *gitserverRepoStore) LogCorruption(ctx context.Context, name api.RepoName, reason string) error { + // trim reason to 1 MB so that we don't store huge reasons and run into trouble when it gets too large + if len(reason) > MaxReasonSizeInMB { + reason = reason[:MaxReasonSizeInMB] + } + + log := types.RepoCorruptionLog{ + Timestamp: time.Now(), + Reason: reason, + } + var rawLog []byte + if data, err := json.Marshal(log); err != nil { + return errors.Wrap(err, "could not marshal corruption_logs") + } else { + rawLog = data + } + + res, err := s.ExecResult(ctx, sqlf.Sprintf(` +UPDATE gitserver_repos as gtr +SET + corrupted_at = NOW(), + -- prepend the json and then ensure we only keep 10 items in the resulting json array + corruption_logs = (SELECT jsonb_path_query_array(%s||gtr.corruption_logs, '$[0 to 9]')), + updated_at = NOW() +WHERE + repo_id = (SELECT id FROM repo WHERE name = %s) +AND + corrupted_at IS NULL`, rawLog, name)) + if err != nil { + return errors.Wrapf(err, "logging repo corruption") + } + + if nrows, err := res.RowsAffected(); err != nil { + return errors.Wrapf(err, "getting rows affected") + } else if nrows != 1 { + return errors.New("repo not found or already corrupt") + } + return nil +} + // GitserverFetchData is the metadata associated with a fetch operation on // gitserver. type GitserverFetchData struct { @@ -510,6 +576,7 @@ func (s *gitserverRepoStore) SetLastFetched(ctx context.Context, name api.RepoNa res, err := s.ExecResult(ctx, sqlf.Sprintf(` UPDATE gitserver_repos SET + corrupted_at = NULL, last_fetched = %s, last_changed = %s, shard_id = %s, diff --git a/internal/database/gitserver_repos_test.go b/internal/database/gitserver_repos_test.go index 07bb731487b..148c4f31893 100644 --- a/internal/database/gitserver_repos_test.go +++ b/internal/database/gitserver_repos_test.go @@ -375,7 +375,7 @@ func TestGitserverReposGetByID(t *testing.T) { t.Fatal(err) } - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } } @@ -401,7 +401,7 @@ func TestGitserverReposGetByName(t *testing.T) { t.Fatal(err) } - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } } @@ -441,7 +441,7 @@ func TestGitserverReposGetByNames(t *testing.T) { sort.Slice(haveRepos, func(i, j int) bool { return haveRepos[i].RepoID < haveRepos[j].RepoID }) - if diff := cmp.Diff(gitserverRepos[:i+1], haveRepos, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepos[:i+1], haveRepos, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } } @@ -473,7 +473,7 @@ func TestSetCloneStatus(t *testing.T) { gitserverRepo.CloneStatus = types.CloneStatusCloned gitserverRepo.ShardID = shardID - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -500,7 +500,7 @@ func TestSetCloneStatus(t *testing.T) { ShardID: shardID, CloneStatus: types.CloneStatusCloned, } - if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged")); diff != "" { + if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -515,6 +515,160 @@ func TestSetCloneStatus(t *testing.T) { } } +func TestLogCorruption(t *testing.T) { + if testing.Short() { + t.Skip() + } + + logger := logtest.Scoped(t) + db := NewDB(logger, dbtest.NewDB(logger, t)) + ctx := context.Background() + + t.Run("log repo corruption sets corrupted_at time", func(t *testing.T) { + repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{ + Name: "github.com/sourcegraph/repo1", + RepoSizeBytes: 100, + CloneStatus: types.CloneStatusNotCloned, + }) + logRepoCorruption(t, db, repo.Name, "test") + + fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID) + if err != nil { + t.Fatalf("failed to get repo by id: %s", err) + } + + if fromDB.CorruptedAt.IsZero() { + t.Errorf("Expected corruptedAt time to be set. Got zero value for time %q", fromDB.CorruptedAt) + } + // We should have one corruption log entry + if len(fromDB.CorruptionLogs) != 1 { + t.Errorf("Wanted 1 Corruption log entries, got %d entries", len(fromDB.CorruptionLogs)) + } + if fromDB.CorruptionLogs[0].Timestamp.IsZero() { + t.Errorf("Corruption Log entry expected to have non zero timestamp. Got %q", fromDB.CorruptionLogs[0]) + } + if fromDB.CorruptionLogs[0].Reason != "test" { + t.Errorf("Wanted Corruption Log reason %q got %q", "test", fromDB.CorruptionLogs[0].Reason) + } + }) + t.Run("setting clone status clears corruptedAt time", func(t *testing.T) { + repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{ + Name: "github.com/sourcegraph/repo2", + RepoSizeBytes: 100, + CloneStatus: types.CloneStatusNotCloned, + }) + logRepoCorruption(t, db, repo.Name, "test 2") + + setGitserverRepoCloneStatus(t, db, repo.Name, types.CloneStatusCloned) + + fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID) + if err != nil { + t.Fatalf("failed to get repo by id: %s", err) + } + if !fromDB.CorruptedAt.IsZero() { + t.Errorf("Setting clone status should set corrupt_at value to zero time value. Got non zero value for time %q", fromDB.CorruptedAt) + } + }) + t.Run("setting last error does not clear corruptedAt time", func(t *testing.T) { + repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{ + Name: "github.com/sourcegraph/repo3", + RepoSizeBytes: 100, + CloneStatus: types.CloneStatusNotCloned, + }) + logRepoCorruption(t, db, repo.Name, "test 3") + + setGitserverRepoLastChanged(t, db, repo.Name, time.Now()) + + fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID) + if err != nil { + t.Fatalf("failed to get repo by id: %s", err) + } + if !fromDB.CorruptedAt.IsZero() { + t.Errorf("Setting Last Changed should set corrupted at value to zero time value. Got non zero value for time %q", fromDB.CorruptedAt) + } + }) + t.Run("setting clone status clears corruptedAt time", func(t *testing.T) { + repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{ + Name: "github.com/sourcegraph/repo4", + RepoSizeBytes: 100, + CloneStatus: types.CloneStatusNotCloned, + }) + logRepoCorruption(t, db, repo.Name, "test 3") + + setGitserverRepoLastError(t, db, repo.Name, "This is a TEST ERAWR") + + fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID) + if err != nil { + t.Fatalf("failed to get repo by id: %s", err) + } + if fromDB.CorruptedAt.IsZero() { + t.Errorf("Setting Last Error should not clear the corruptedAt value") + } + }) + t.Run("consecutive corruption logs appends", func(t *testing.T) { + repo, gitserverRepo := createTestRepo(ctx, t, db, &createTestRepoPayload{ + Name: "github.com/sourcegraph/repo5", + RepoSizeBytes: 100, + CloneStatus: types.CloneStatusNotCloned, + }) + for i := 0; i < 12; i++ { + logRepoCorruption(t, db, repo.Name, fmt.Sprintf("test %d", i)) + // We set the Clone status so that the 'corrupted_at' time gets cleared + // otherwise we cannot log corruption for a repo that is already corrupt + gitserverRepo.CloneStatus = types.CloneStatusCloned + gitserverRepo.CorruptedAt = time.Time{} + if err := db.GitserverRepos().Update(ctx, gitserverRepo); err != nil { + t.Fatal(err) + } + + } + + fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID) + if err != nil { + t.Fatalf("failed to retrieve repo from db: %s", err) + } + + // We added 12 entries but we only keep 10 + if len(fromDB.CorruptionLogs) != 10 { + t.Errorf("expected 10 corruption log entries but got %d", len(fromDB.CorruptionLogs)) + } + + // A log entry gets prepended to the json array, so: + // first entry = most recent log entry + // last entry = oldest log entry + // Our most recent log entry (idx 0!) should have "test 11" as the reason ie. the last element the loop + // that we added + wanted := "test 11" + if fromDB.CorruptionLogs[0].Reason != wanted { + t.Errorf("Wanted %q for last corruption log entry but got %q", wanted, fromDB.CorruptionLogs[9].Reason) + } + + }) + t.Run("large reason gets truncated", func(t *testing.T) { + repo, _ := createTestRepo(ctx, t, db, &createTestRepoPayload{ + Name: "github.com/sourcegraph/repo6", + RepoSizeBytes: 100, + CloneStatus: types.CloneStatusNotCloned, + }) + + largeReason := make([]byte, MaxReasonSizeInMB*2) + for i := 0; i < len(largeReason); i++ { + largeReason[i] = 'a' + } + + logRepoCorruption(t, db, repo.Name, string(largeReason)) + + fromDB, err := db.GitserverRepos().GetByID(ctx, repo.ID) + if err != nil { + t.Fatalf("failed to retrieve repo from db: %s", err) + } + + if len(fromDB.CorruptionLogs[0].Reason) == len(largeReason) { + t.Errorf("expected reason to be truncated - got length=%d, wanted=%d", len(fromDB.CorruptionLogs[0].Reason), MaxReasonSizeInMB) + } + }) +} + func TestSetLastError(t *testing.T) { if testing.Short() { t.Skip() @@ -546,7 +700,7 @@ func TestSetLastError(t *testing.T) { } gitserverRepo.LastError = "oops" - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -563,7 +717,7 @@ func TestSetLastError(t *testing.T) { } gitserverRepo.LastError = emptyErr - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -623,7 +777,7 @@ func TestSetRepoSize(t *testing.T) { gitserverRepo.RepoSizeBytes = 200 // If we have size, we can assume it's cloned gitserverRepo.CloneStatus = types.CloneStatusCloned - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -654,7 +808,7 @@ func TestSetRepoSize(t *testing.T) { // If we have size, we can assume it's cloned CloneStatus: types.CloneStatusCloned, } - if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged", "CloneStatus")); diff != "" { + if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "LastFetched", "LastChanged", "CloneStatus", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -696,7 +850,7 @@ func TestGitserverRepo_Update(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -717,7 +871,7 @@ func TestGitserverRepo_Update(t *testing.T) { // Set LastError to the expected error string but without the null character, because we expect // our code to work and strip it before writing to the DB. gitserverRepo.LastError = "Oops" - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -730,7 +884,7 @@ func TestGitserverRepo_Update(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } } @@ -769,7 +923,7 @@ func TestGitserverRepoUpdateMany(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(gitserverRepo1, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo1, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } }) @@ -778,7 +932,7 @@ func TestGitserverRepoUpdateMany(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo2, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } }) @@ -826,7 +980,7 @@ func TestGitserverRepoListReposWithoutSize(t *testing.T) { t.Fatal(err) } - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } @@ -851,7 +1005,7 @@ func TestGitserverRepoListReposWithoutSize(t *testing.T) { } // Check that nothing except UpdatedAt and RepoSizeBytes has been changed - if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "RepoSizeBytes")); diff != "" { + if diff := cmp.Diff(gitserverRepo, fromDB, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "RepoSizeBytes", "CorruptionLogs")); diff != "" { t.Fatal(diff) } } @@ -907,7 +1061,7 @@ func TestGitserverUpdateRepoSizes(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(repo, reloaded, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt")); diff != "" { + if diff := cmp.Diff(repo, reloaded, cmpopts.IgnoreFields(types.GitserverRepo{}, "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } // Separately make sure UpdatedAt has changed, though @@ -974,10 +1128,11 @@ func createTestRepo(ctx context.Context, t *testing.T, db DB, payload *createTes } want := &types.GitserverRepo{ - RepoID: repo.ID, - CloneStatus: types.CloneStatusNotCloned, + RepoID: repo.ID, + CloneStatus: types.CloneStatusNotCloned, + CorruptionLogs: []types.RepoCorruptionLog{}, } - if diff := cmp.Diff(want, gitserverRepo, cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "UpdatedAt")); diff != "" { + if diff := cmp.Diff(want, gitserverRepo, cmpopts.IgnoreFields(types.GitserverRepo{}, "LastFetched", "LastChanged", "UpdatedAt", "CorruptionLogs")); diff != "" { t.Fatal(diff) } diff --git a/internal/database/mocks_temp.go b/internal/database/mocks_temp.go index 060df09650e..bebaa670697 100644 --- a/internal/database/mocks_temp.go +++ b/internal/database/mocks_temp.go @@ -23132,6 +23132,9 @@ type MockGitserverRepoStore struct { // ListReposWithoutSizeFunc is an instance of a mock function object // controlling the behavior of the method ListReposWithoutSize. ListReposWithoutSizeFunc *GitserverRepoStoreListReposWithoutSizeFunc + // LogCorruptionFunc is an instance of a mock function object + // controlling the behavior of the method LogCorruption. + LogCorruptionFunc *GitserverRepoStoreLogCorruptionFunc // SetCloneStatusFunc is an instance of a mock function object // controlling the behavior of the method SetCloneStatus. SetCloneStatusFunc *GitserverRepoStoreSetCloneStatusFunc @@ -23204,6 +23207,11 @@ func NewMockGitserverRepoStore() *MockGitserverRepoStore { return }, }, + LogCorruptionFunc: &GitserverRepoStoreLogCorruptionFunc{ + defaultHook: func(context.Context, api.RepoName, string) (r0 error) { + return + }, + }, SetCloneStatusFunc: &GitserverRepoStoreSetCloneStatusFunc{ defaultHook: func(context.Context, api.RepoName, types.CloneStatus, string) (r0 error) { return @@ -23292,6 +23300,11 @@ func NewStrictMockGitserverRepoStore() *MockGitserverRepoStore { panic("unexpected invocation of MockGitserverRepoStore.ListReposWithoutSize") }, }, + LogCorruptionFunc: &GitserverRepoStoreLogCorruptionFunc{ + defaultHook: func(context.Context, api.RepoName, string) error { + panic("unexpected invocation of MockGitserverRepoStore.LogCorruption") + }, + }, SetCloneStatusFunc: &GitserverRepoStoreSetCloneStatusFunc{ defaultHook: func(context.Context, api.RepoName, types.CloneStatus, string) error { panic("unexpected invocation of MockGitserverRepoStore.SetCloneStatus") @@ -23364,6 +23377,9 @@ func NewMockGitserverRepoStoreFrom(i GitserverRepoStore) *MockGitserverRepoStore ListReposWithoutSizeFunc: &GitserverRepoStoreListReposWithoutSizeFunc{ defaultHook: i.ListReposWithoutSize, }, + LogCorruptionFunc: &GitserverRepoStoreLogCorruptionFunc{ + defaultHook: i.LogCorruption, + }, SetCloneStatusFunc: &GitserverRepoStoreSetCloneStatusFunc{ defaultHook: i.SetCloneStatus, }, @@ -24267,6 +24283,117 @@ func (c GitserverRepoStoreListReposWithoutSizeFuncCall) Results() []interface{} return []interface{}{c.Result0, c.Result1} } +// GitserverRepoStoreLogCorruptionFunc describes the behavior when the +// LogCorruption method of the parent MockGitserverRepoStore instance is +// invoked. +type GitserverRepoStoreLogCorruptionFunc struct { + defaultHook func(context.Context, api.RepoName, string) error + hooks []func(context.Context, api.RepoName, string) error + history []GitserverRepoStoreLogCorruptionFuncCall + mutex sync.Mutex +} + +// LogCorruption delegates to the next hook function in the queue and stores +// the parameter and result values of this invocation. +func (m *MockGitserverRepoStore) LogCorruption(v0 context.Context, v1 api.RepoName, v2 string) error { + r0 := m.LogCorruptionFunc.nextHook()(v0, v1, v2) + m.LogCorruptionFunc.appendCall(GitserverRepoStoreLogCorruptionFuncCall{v0, v1, v2, r0}) + return r0 +} + +// SetDefaultHook sets function that is called when the LogCorruption method +// of the parent MockGitserverRepoStore instance is invoked and the hook +// queue is empty. +func (f *GitserverRepoStoreLogCorruptionFunc) SetDefaultHook(hook func(context.Context, api.RepoName, string) error) { + f.defaultHook = hook +} + +// PushHook adds a function to the end of hook queue. Each invocation of the +// LogCorruption method of the parent MockGitserverRepoStore instance +// invokes the hook at the front of the queue and discards it. After the +// queue is empty, the default hook function is invoked for any future +// action. +func (f *GitserverRepoStoreLogCorruptionFunc) PushHook(hook func(context.Context, api.RepoName, string) error) { + f.mutex.Lock() + f.hooks = append(f.hooks, hook) + f.mutex.Unlock() +} + +// SetDefaultReturn calls SetDefaultHook with a function that returns the +// given values. +func (f *GitserverRepoStoreLogCorruptionFunc) SetDefaultReturn(r0 error) { + f.SetDefaultHook(func(context.Context, api.RepoName, string) error { + return r0 + }) +} + +// PushReturn calls PushHook with a function that returns the given values. +func (f *GitserverRepoStoreLogCorruptionFunc) PushReturn(r0 error) { + f.PushHook(func(context.Context, api.RepoName, string) error { + return r0 + }) +} + +func (f *GitserverRepoStoreLogCorruptionFunc) nextHook() func(context.Context, api.RepoName, string) error { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(f.hooks) == 0 { + return f.defaultHook + } + + hook := f.hooks[0] + f.hooks = f.hooks[1:] + return hook +} + +func (f *GitserverRepoStoreLogCorruptionFunc) appendCall(r0 GitserverRepoStoreLogCorruptionFuncCall) { + f.mutex.Lock() + f.history = append(f.history, r0) + f.mutex.Unlock() +} + +// History returns a sequence of GitserverRepoStoreLogCorruptionFuncCall +// objects describing the invocations of this function. +func (f *GitserverRepoStoreLogCorruptionFunc) History() []GitserverRepoStoreLogCorruptionFuncCall { + f.mutex.Lock() + history := make([]GitserverRepoStoreLogCorruptionFuncCall, len(f.history)) + copy(history, f.history) + f.mutex.Unlock() + + return history +} + +// GitserverRepoStoreLogCorruptionFuncCall is an object that describes an +// invocation of method LogCorruption on an instance of +// MockGitserverRepoStore. +type GitserverRepoStoreLogCorruptionFuncCall struct { + // Arg0 is the value of the 1st argument passed to this method + // invocation. + Arg0 context.Context + // Arg1 is the value of the 2nd argument passed to this method + // invocation. + Arg1 api.RepoName + // Arg2 is the value of the 3rd argument passed to this method + // invocation. + Arg2 string + // Result0 is the value of the 1st result returned from this method + // invocation. + Result0 error +} + +// Args returns an interface slice containing the arguments of this +// invocation. +func (c GitserverRepoStoreLogCorruptionFuncCall) Args() []interface{} { + return []interface{}{c.Arg0, c.Arg1, c.Arg2} +} + +// Results returns an interface slice containing the results of this +// invocation. +func (c GitserverRepoStoreLogCorruptionFuncCall) Results() []interface{} { + return []interface{}{c.Result0} +} + // GitserverRepoStoreSetCloneStatusFunc describes the behavior when the // SetCloneStatus method of the parent MockGitserverRepoStore instance is // invoked. diff --git a/internal/database/repos_test.go b/internal/database/repos_test.go index 7efc55fb675..99391a7e9a6 100644 --- a/internal/database/repos_test.go +++ b/internal/database/repos_test.go @@ -119,6 +119,15 @@ func setGitserverRepoLastError(t *testing.T, db DB, name api.RepoName, msg strin } } +func logRepoCorruption(t *testing.T, db DB, name api.RepoName, logOutput string) { + t.Helper() + + err := db.GitserverRepos().LogCorruption(context.Background(), name, logOutput) + if err != nil { + t.Fatalf("failed to log repo corruption: %s", err) + } +} + func setZoektIndexed(t *testing.T, db DB, name api.RepoName) { t.Helper() ctx := context.Background() diff --git a/internal/database/schema.json b/internal/database/schema.json index 2a4e94ef984..4973a5969ea 100755 --- a/internal/database/schema.json +++ b/internal/database/schema.json @@ -10686,6 +10686,32 @@ "GenerationExpression": "", "Comment": "" }, + { + "Name": "corrupted_at", + "Index": 10, + "TypeName": "timestamp with time zone", + "IsNullable": true, + "Default": "", + "CharacterMaximumLength": 0, + "IsIdentity": false, + "IdentityGeneration": "", + "IsGenerated": "NEVER", + "GenerationExpression": "", + "Comment": "Timestamp of when repo corruption was detected" + }, + { + "Name": "corruption_logs", + "Index": 11, + "TypeName": "jsonb", + "IsNullable": false, + "Default": "'[]'::jsonb", + "CharacterMaximumLength": 0, + "IsIdentity": false, + "IdentityGeneration": "", + "IsGenerated": "NEVER", + "GenerationExpression": "", + "Comment": "Log output of repo corruptions that have been detected - encoded as json" + }, { "Name": "last_changed", "Index": 7, diff --git a/internal/database/schema.md b/internal/database/schema.md index 0ea031c6457..02d967034c3 100755 --- a/internal/database/schema.md +++ b/internal/database/schema.md @@ -1477,6 +1477,8 @@ Indexes: last_fetched | timestamp with time zone | | not null | now() last_changed | timestamp with time zone | | not null | now() repo_size_bytes | bigint | | | + corrupted_at | timestamp with time zone | | | + corruption_logs | jsonb | | not null | '[]'::jsonb Indexes: "gitserver_repos_pkey" PRIMARY KEY, btree (repo_id) "gitserver_repo_size_bytes" btree (repo_size_bytes) @@ -1496,6 +1498,10 @@ Triggers: ``` +**corrupted_at**: Timestamp of when repo corruption was detected + +**corruption_logs**: Log output of repo corruptions that have been detected - encoded as json + # Table "public.gitserver_repos_statistics" ``` Column | Type | Collation | Nullable | Default diff --git a/internal/types/types.go b/internal/types/types.go index bf2ab7f3b58..e53ef69f46d 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -4,6 +4,7 @@ package types import ( "context" "database/sql" + "encoding/json" "fmt" "reflect" "sort" @@ -557,7 +558,26 @@ type GitserverRepo struct { LastChanged time.Time // Size of the repository in bytes. RepoSizeBytes int64 - UpdatedAt time.Time + // Time when corruption of repo was detected + CorruptedAt time.Time + UpdatedAt time.Time + // A log of the different types of corruption that was detected on this repo. The order of the log entries are + // stored from most recent to least recent and capped at 10 entries. See LogCorruption on Gitserverrepo store. + CorruptionLogs []RepoCorruptionLog +} + +// RepoCorruptionLog represents a corruption event that has been detected on a repo. +type RepoCorruptionLog struct { + // When the corruption event was detected + Timestamp time.Time `json:"time"` + // Why the repo is considered to be corrupt. Can be git output stderr output or a short reason like "missing head" + Reason string `json:"reason"` +} + +func UnmarshalCorruptionLog(data []byte) ([]RepoCorruptionLog, error) { + var logs []RepoCorruptionLog + err := json.Unmarshal(data, &logs) + return logs, err } // ExternalService is a connection to an external service. diff --git a/migrations/frontend/1670934184_add_gitserver_corruption_columns/down.sql b/migrations/frontend/1670934184_add_gitserver_corruption_columns/down.sql new file mode 100644 index 00000000000..df26d5e9ee9 --- /dev/null +++ b/migrations/frontend/1670934184_add_gitserver_corruption_columns/down.sql @@ -0,0 +1,4 @@ +ALTER TABLE gitserver_repos + DROP COLUMN IF EXISTS corrupted_at, + DROP COLUMN IF EXISTS corrupted_logs; + diff --git a/migrations/frontend/1670934184_add_gitserver_corruption_columns/metadata.yaml b/migrations/frontend/1670934184_add_gitserver_corruption_columns/metadata.yaml new file mode 100644 index 00000000000..65979d7d5c5 --- /dev/null +++ b/migrations/frontend/1670934184_add_gitserver_corruption_columns/metadata.yaml @@ -0,0 +1,2 @@ +name: add_gitserver_corruption_columns +parents: [1670350006, 1670543231] diff --git a/migrations/frontend/1670934184_add_gitserver_corruption_columns/up.sql b/migrations/frontend/1670934184_add_gitserver_corruption_columns/up.sql new file mode 100644 index 00000000000..a62f217b511 --- /dev/null +++ b/migrations/frontend/1670934184_add_gitserver_corruption_columns/up.sql @@ -0,0 +1,6 @@ +ALTER TABLE gitserver_repos + ADD COLUMN IF NOT EXISTS corrupted_at TIMESTAMP WITH TIME ZONE, + ADD COLUMN IF NOT EXISTS corruption_logs JSONB NOT NULL DEFAULT '[]'; + +COMMENT ON COLUMN gitserver_repos.corrupted_at IS 'Timestamp of when repo corruption was detected'; +COMMENT ON COLUMN gitserver_repos.corruption_logs IS 'Log output of repo corruptions that have been detected - encoded as json'; diff --git a/migrations/frontend/squashed.sql b/migrations/frontend/squashed.sql index 8d690c30237..8584d399d78 100755 --- a/migrations/frontend/squashed.sql +++ b/migrations/frontend/squashed.sql @@ -2178,9 +2178,15 @@ CREATE TABLE gitserver_repos ( updated_at timestamp with time zone DEFAULT now() NOT NULL, last_fetched timestamp with time zone DEFAULT now() NOT NULL, last_changed timestamp with time zone DEFAULT now() NOT NULL, - repo_size_bytes bigint + repo_size_bytes bigint, + corrupted_at timestamp with time zone, + corruption_logs jsonb DEFAULT '[]'::jsonb NOT NULL ); +COMMENT ON COLUMN gitserver_repos.corrupted_at IS 'Timestamp of when repo corruption was detected'; + +COMMENT ON COLUMN gitserver_repos.corruption_logs IS 'Log output of repo corruptions that have been detected - encoded as json'; + CREATE TABLE gitserver_repos_statistics ( shard_id text NOT NULL, total bigint DEFAULT 0 NOT NULL,