bazel: move schema migrations fetching from GCS to bazel repository (#59879)

Does what it says on the tin

Caveat:
As this doesn't use the built-in downloaders, this probably cant make use of the repository cache. While it won't refetch it every single time (there is _some_ degree of caching), I'm not sure what will cause it to not use the cached one and refresh it. Its a very fast operation though.
See https://github.com/bazelbuild/bazel/issues/19267

## Test plan

`bazel build //internal/database/migration/shared:generate_stitched_migration_graph`
This commit is contained in:
Noah S-C 2024-02-14 17:40:39 +00:00 committed by GitHub
parent 2de86221d0
commit ba9d2e0ca2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 56 additions and 12 deletions

View File

@ -468,10 +468,15 @@ exports_files(["bundle"])
filegroup(
name = "srcs",
srcs = glob(["**"]),
visibility = ["//visibility:public"]
)
""",
integrity = "sha256-Spx8LyM7k+dsGOlZ4TdAq+CNk5EzvYB/oxnY4zGpqPg=",
strip_prefix = "sourcegraph-extensions-bundles-5.0.1",
url = "https://github.com/sourcegraph/sourcegraph-extensions-bundles/archive/v5.0.1.zip",
)
load("//dev:schema_migrations.bzl", "schema_migrations")
schema_migrations(
name = "schemas_migrations",
)

42
dev/schema_migrations.bzl Normal file
View File

@ -0,0 +1,42 @@
def _schema_migrations(rctx):
gsutil_path = rctx.path(
Label("@gcloud-{}-{}//:gsutil".format({
"mac os x": "darwin",
"linux": "linux",
}[rctx.os.name], {
"aarch64": "arm64",
"arm64": "arm64",
"amd64": "amd64",
"x86_64": "amd64",
"x86": "amd64",
}[rctx.os.arch])),
)
rctx.file("BUILD.bazel", content = """
package(default_visibility = ["//visibility:public"])
exports_files(["archives"])
filegroup(
name = "srcs",
srcs = glob(["**"]),
)
""")
rctx.execute(["mkdir", "archives"])
rctx.report_progress("Downloading schema migrations from GCS")
result = rctx.execute([
gsutil_path,
"-m",
"cp",
"gs://schemas-migrations/migrations/*",
"archives",
], timeout = 60, environment = {
"CLOUDSDK_CORE_PROJECT": "sourcegraph-ci",
})
if result.return_code != 0:
fail("Failed to download schema migrations from GCS: {}".format(result.stderr))
schema_migrations = repository_rule(
implementation = _schema_migrations,
)

View File

@ -17,7 +17,11 @@ filegroup(
"""
GCLOUD_VERSION = "456.0.0"
GCLOUD_BUILDFILE = """package(default_visibility = ["//visibility:public"])\nexports_files(["gcloud", "gsutil", "bq", "git-credential-gcloud"])"""
GCLOUD_BUILDFILE = """
package(default_visibility = ["//visibility:public"])
exports_files(["gcloud", "gsutil", "bq", "git-credential-gcloud"])
"""
GCLOUD_PATCH_CMDS = [
"ln -s google-cloud-sdk/bin/gcloud gcloud",
"ln -s google-cloud-sdk/bin/gsutil gsutil",

View File

@ -24,22 +24,15 @@ genrule(
srcs = [],
outs = ["stitched-migration-graph.json"],
cmd = """\
mkdir -p _migration_archives
# "-m" flag enables concurrent mode, it's significantly faster even if files are small.
CLOUDSDK_CORE_PROJECT="sourcegraph-ci"
$(location //dev/tools:gsutil) -m cp "gs://schemas-migrations/migrations/*" _migration_archives/
$(location //internal/database/migration/shared/data/cmd/generator) \
-output=$@ \
-frozen-output=. \
-archive=_migration_archives
-archive=$(location @schemas_migrations//:archives)
""",
tags = [
"no-sandbox", # gsutil doesn't work sandboxed.
"requires-network", # we're fetching files from a GCP bucket.
],
tools = [
"//dev/tools:gsutil",
"//internal/database/migration/shared/data/cmd/generator",
"@schemas_migrations//:archives",
],
visibility = ["//visibility:public"],
)

View File

@ -172,7 +172,7 @@ func (s *LocalMigrationsReader) load(path string) error {
}
s.m[s.currentVersion] = contents
} else {
fmt.Printf("WARNING: a tarball for %s already exists, constant is out of date\n.", s.currentVersion)
fmt.Printf("WARNING: a tarball for %s already exists, constant is out of date\n", s.currentVersion)
}
return nil