"""
Provide a custom repository_rule to fetch database migrations from previous versions from
a GCS bucket.

The "updated_at" attribute allows to manually invalidate the cache, because the rule itself
cannot know when to do so, as it will simply skip listing the bucket otherwise.
"""

def _schema_migrations(rctx):
    """
    This repository is used to download the schema migrations from GCS.

    We use the GCS JSON API directly instead of gsutil or gcloud because:
        - gsutil may spend up to a ~1m20s trying to contact metadata.google.internal
            without a discovered way to disable that
        - gcloud disallows unauthed access to an even public bucket
    """
    jq_path = rctx.path(Label("@jq//:jq"))

    rctx.file("BUILD.bazel", content = """
package(default_visibility = ["//visibility:public"])

exports_files(["archives"])

filegroup(
    name = "srcs",
    srcs = glob(["**"]),
)
""")

    rctx.report_progress("Listing GCS bucket contents")

    rctx.download("https://storage.googleapis.com/storage/v1/b/schemas-migrations/o?prefix=migrations/migrations-", "bucket_contents.json")

    result = rctx.execute([
        jq_path,
        ".items | map({name, mediaLink, generation})",
        "bucket_contents.json",
    ])
    if result.return_code != 0:
        fail("Failed to extract bucket data from GCS API: {}".format(result.stderr))

    rctx.delete("bucket_contents.json")

    output = json.decode(result.stdout)

    rctx.execute(["mkdir", "archives"])

    rctx.report_progress("Downloading schema migrations from GCS")

    download_tokens = []
    for file in output:
        download_tokens.append(rctx.download(
            file["mediaLink"],
            "archives/" + file["name"].split("/")[-1],
            canonical_id = file["generation"],
            block = False,
        ))

    for token in download_tokens:
        token.wait()

schema_migrations = repository_rule(
    implementation = _schema_migrations,
    attrs = {
        "updated_at": attr.string(mandatory = True),
    },
)