bazel: move schema migrations fetching from GCS to bazel repository (#59879)

Does what it says on the tin Caveat: As this doesn't use the built-in downloaders, this probably cant make use of the repository cache. While it won't refetch it every single time (there is _some_ degree of caching), I'm not sure what will cause it to not use the cached one and refresh it. Its a very fast operation though. See https://github.com/bazelbuild/bazel/issues/19267 ## Test plan `bazel build //internal/database/migration/shared:generate_stitched_migration_graph`
2026-02-06 15:12:02 +00:00 · 2024-02-14 17:40:39 +00:00 · 2024-02-14 17:40:39 +00:00 · ba9d2e0ca2
commit ba9d2e0ca2
parent 2de86221d0
5 changed files with 56 additions and 12 deletions
--- a/7
+++ b/7
@ -468,10 +468,15 @@ exports_files(["bundle"])
 filegroup(
    name = "srcs",
    srcs = glob(["**"]),
-    visibility = ["//visibility:public"]
 )
    """,
    integrity = "sha256-Spx8LyM7k+dsGOlZ4TdAq+CNk5EzvYB/oxnY4zGpqPg=",
    strip_prefix = "sourcegraph-extensions-bundles-5.0.1",
    url = "https://github.com/sourcegraph/sourcegraph-extensions-bundles/archive/v5.0.1.zip",
 )
+
+load("//dev:schema_migrations.bzl", "schema_migrations")
+
+schema_migrations(
+    name = "schemas_migrations",
+)
--- a/dev/schema_migrations.bzl
+++ b/dev/schema_migrations.bzl
@ -0,0 +1,42 @@
+def _schema_migrations(rctx):
+    gsutil_path = rctx.path(
+        Label("@gcloud-{}-{}//:gsutil".format({
+            "mac os x": "darwin",
+            "linux": "linux",
+        }[rctx.os.name], {
+            "aarch64": "arm64",
+            "arm64": "arm64",
+            "amd64": "amd64",
+            "x86_64": "amd64",
+            "x86": "amd64",
+        }[rctx.os.arch])),
+    )
+
+    rctx.file("BUILD.bazel", content = """
+package(default_visibility = ["//visibility:public"])
+
+exports_files(["archives"])
+
+filegroup(
+    name = "srcs",
+    srcs = glob(["**"]),
+)
+""")
+
+    rctx.execute(["mkdir", "archives"])
+    rctx.report_progress("Downloading schema migrations from GCS")
+    result = rctx.execute([
+        gsutil_path,
+        "-m",
+        "cp",
+        "gs://schemas-migrations/migrations/*",
+        "archives",
+    ], timeout = 60, environment = {
+        "CLOUDSDK_CORE_PROJECT": "sourcegraph-ci",
+    })
+    if result.return_code != 0:
+        fail("Failed to download schema migrations from GCS: {}".format(result.stderr))
+
+schema_migrations = repository_rule(
+    implementation = _schema_migrations,
+)
--- a/dev/tool_deps.bzl
+++ b/dev/tool_deps.bzl
@ -17,7 +17,11 @@ filegroup(
 """

 GCLOUD_VERSION = "456.0.0"
-GCLOUD_BUILDFILE = """package(default_visibility = ["//visibility:public"])\nexports_files(["gcloud", "gsutil", "bq", "git-credential-gcloud"])"""
+GCLOUD_BUILDFILE = """
+package(default_visibility = ["//visibility:public"])
+
+exports_files(["gcloud", "gsutil", "bq", "git-credential-gcloud"])
+"""
 GCLOUD_PATCH_CMDS = [
    "ln -s google-cloud-sdk/bin/gcloud gcloud",
    "ln -s google-cloud-sdk/bin/gsutil gsutil",
--- a/internal/database/migration/shared/BUILD.bazel
+++ b/internal/database/migration/shared/BUILD.bazel
@ -24,22 +24,15 @@ genrule(
    srcs = [],
    outs = ["stitched-migration-graph.json"],
    cmd = """\
-        mkdir -p _migration_archives
-        # "-m" flag enables concurrent mode, it's significantly faster even if files are small.
-        CLOUDSDK_CORE_PROJECT="sourcegraph-ci"
-        $(location //dev/tools:gsutil) -m cp "gs://schemas-migrations/migrations/*" _migration_archives/
        $(location //internal/database/migration/shared/data/cmd/generator) \
            -output=$@ \
            -frozen-output=. \
-            -archive=_migration_archives
+            -archive=$(location @schemas_migrations//:archives)
    """,
-    tags = [
-        "no-sandbox",  # gsutil doesn't work sandboxed.
-        "requires-network",  # we're fetching files from a GCP bucket.
-    ],
    tools = [
        "//dev/tools:gsutil",
        "//internal/database/migration/shared/data/cmd/generator",
+        "@schemas_migrations//:archives",
    ],
    visibility = ["//visibility:public"],
 )
--- a/internal/database/migration/stitch/archives.go
+++ b/internal/database/migration/stitch/archives.go
@ -172,7 +172,7 @@ func (s *LocalMigrationsReader) load(path string) error {
 		}
 		s.m[s.currentVersion] = contents
 	} else {
-		fmt.Printf("WARNING: a tarball for %s already exists, constant is out of date\n.", s.currentVersion)
+		fmt.Printf("WARNING: a tarball for %s already exists, constant is out of date\n", s.currentVersion)
 	}

 	return nil