repos: Prevent individual cloud default external service syncing (#30032)

This commit makes it so we don't enqueue external service sync jobs for
cloud default external services also when there are no rows in the
external_service_sync_jobs table.

This case was unfortunately missed in #24412, which was a follow up from
a very similar incident to the one we are now handling, so it did not
prevent it from happening again :sadpanda:

Additionally, we prevent any cloud default service from syncing in the
Syncer.SyncExternalService method by early exiting with an error
(defense in depth).
This commit is contained in:
Tomás Senart 2022-01-24 13:23:13 +01:00 committed by GitHub
parent e136905669
commit 9e815ed41e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 80 additions and 3 deletions

View File

@ -40,6 +40,7 @@ func TestIntegration(t *testing.T) {
{"Syncer/Run", testSyncRun},
{"Syncer/MultipleServices", testSyncerMultipleServices},
{"Syncer/OrphanedRepos", testOrphanedRepo},
{"Syncer/CloudDefaultExternalServicesDontSync", testCloudDefaultExternalServicesDontSync},
{"Syncer/DeleteExternalService", testDeleteExternalService},
{"Syncer/AbortSyncWhenThereIsRepoLimitError", testAbortSyncWhenThereIsRepoLimitError},
{"Syncer/UserAndOrgReposAreCountedCorrectly", testUserAndOrgReposAreCountedCorrectly},

View File

@ -581,9 +581,9 @@ INSERT INTO external_service_sync_jobs (external_service_id)
SELECT %s
WHERE NOT EXISTS (
SELECT
FROM external_service_sync_jobs j
JOIN external_services es ON es.id = j.external_service_id
WHERE j.external_service_id = %s
FROM external_services es
LEFT JOIN external_service_sync_jobs j ON es.id = j.external_service_id
WHERE es.id = %s
AND (
j.state IN ('queued', 'processing')
OR es.cloud_default

View File

@ -275,6 +275,17 @@ func testStoreEnqueueSingleSyncJob(store *repos.Store) func(*testing.T) {
t.Fatal(err)
}
assertCount(t, 2)
// Test that cloud default external services don't get jobs enqueued also when there are no job rows.
if err = store.Exec(ctx, sqlf.Sprintf("DELETE FROM external_service_sync_jobs")); err != nil {
t.Fatal(err)
}
err = store.EnqueueSingleSyncJob(ctx, service.ID)
if err != nil {
t.Fatal(err)
}
assertCount(t, 0)
}
}

View File

@ -441,6 +441,13 @@ func (s *Syncer) notifyDeleted(ctx context.Context, deleted ...api.RepoID) {
}
}
// ErrCloudDefaultSync is returned by SyncExternalService if an attempt to
// sync a cloud default external service is done. We can't sync these external services
// because their repos are added via the lazy-syncing mechanism on sourcegraph.com
// instead of config (which is empty), so attempting to sync them would delete all of
// the lazy-added repos.
var ErrCloudDefaultSync = errors.New("cloud default external services can't be synced")
// SyncExternalService syncs repos using the supplied external service in a streaming fashion, rather than batch.
// This allows very large sync jobs (i.e. that source potentially millions of repos) to incrementally persist changes.
// Deletes of repositories that were not sourced are done at the end.
@ -462,6 +469,15 @@ func (s *Syncer) SyncExternalService(
return errors.Wrap(err, "fetching external services")
}
// We have fail-safes in place to prevent enqueuing sync jobs for cloud default
// external services, but in case those fail to prevent a sync for any reason,
// we have this additional check here. Cloud default external services have their
// repos added via the lazy-syncing mechanism on sourcegraph.com instead of config
// (which is empty), so attempting to sync them would delete all of the lazy-added repos.
if svc.CloudDefault {
return ErrCloudDefaultSync
}
// Unless our site config explicitly allows private code or the user has the
// "AllowUserExternalServicePrivate" tag, user added external services should
// only sync public code.

View File

@ -1089,6 +1089,55 @@ func testOrphanedRepo(store *repos.Store) func(*testing.T) {
}
}
func testCloudDefaultExternalServicesDontSync(store *repos.Store) func(*testing.T) {
return func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
now := time.Now()
svc1 := &types.ExternalService{
Kind: extsvc.KindGitHub,
DisplayName: "Github - Test1",
Config: `{"url": "https://github.com"}`,
CloudDefault: true,
CreatedAt: now,
UpdatedAt: now,
}
// setup services
if err := store.ExternalServiceStore.Upsert(ctx, svc1); err != nil {
t.Fatal(err)
}
githubRepo := &types.Repo{
Name: "github.com/org/foo",
Metadata: &github.Repository{},
ExternalRepo: api.ExternalRepoSpec{
ID: "foo-external-12345",
ServiceID: "https://github.com/",
ServiceType: extsvc.TypeGitHub,
},
}
syncer := &repos.Syncer{
Sourcer: func(service *types.ExternalService) (repos.Source, error) {
s := repos.NewFakeSource(svc1, nil, githubRepo)
return s, nil
},
Store: store,
Now: time.Now,
}
have := syncer.SyncExternalService(ctx, svc1.ID, 10*time.Second)
want := repos.ErrCloudDefaultSync
if have != want {
t.Fatalf("have err: %v, want %v", have, want)
}
}
}
func testConflictingSyncers(store *repos.Store) func(*testing.T) {
return func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())