sourcegraph/dev/schema_migrations.bzl

69 lines
2.0 KiB
Python
Raw Permalink Normal View History

"""
Provide a custom repository_rule to fetch database migrations from previous versions from
a GCS bucket.
The "updated_at" attribute allows to manually invalidate the cache, because the rule itself
cannot know when to do so, as it will simply skip listing the bucket otherwise.
"""
def _schema_migrations(rctx):
"""
This repository is used to download the schema migrations from GCS.
We use the GCS JSON API directly instead of gsutil or gcloud because:
- gsutil may spend up to a ~1m20s trying to contact metadata.google.internal
without a discovered way to disable that
- gcloud disallows unauthed access to an even public bucket
"""
jq_path = rctx.path(Label("@jq//:jq"))
rctx.file("BUILD.bazel", content = """
package(default_visibility = ["//visibility:public"])
exports_files(["archives"])
filegroup(
name = "srcs",
srcs = glob(["**"]),
)
""")
rctx.report_progress("Listing GCS bucket contents")
rctx.download("https://storage.googleapis.com/storage/v1/b/schemas-migrations/o?prefix=migrations/migrations-", "bucket_contents.json")
result = rctx.execute([
jq_path,
".items | map({name, mediaLink, generation})",
"bucket_contents.json",
])
if result.return_code != 0:
fail("Failed to extract bucket data from GCS API: {}".format(result.stderr))
rctx.delete("bucket_contents.json")
output = json.decode(result.stdout)
rctx.execute(["mkdir", "archives"])
rctx.report_progress("Downloading schema migrations from GCS")
download_tokens = []
for file in output:
download_tokens.append(rctx.download(
file["mediaLink"],
"archives/" + file["name"].split("/")[-1],
canonical_id = file["generation"],
block = False,
))
for token in download_tokens:
token.wait()
schema_migrations = repository_rule(
implementation = _schema_migrations,
attrs = {
"updated_at": attr.string(mandatory = True),
},
)