mirror of
https://github.com/FlipsideCrypto/flow-models.git
synced 2026-02-06 18:16:54 +00:00
* add: model to backfill * fix: node's string format * add tags * upd collection model * mainnet21 hardcoded collections history model * del tag * mainnet 21 getblock * tag for 21 * realtime models * alias num as height * realtime tags * add missing tag and newlines * backfiller * backfiller * move script to folder (renamed) python, upd test accordingly w dir name ch * upd script to accept model input, update jobs per method call * error w use_dev arg * add: silver mdoels * limit backfill job in python script * rename silver dbt models to streamline_ and move into silver/core * explicit casting to silver streamline models * add documentation to silver streamline models * run only current mainnet and history mainnet 22 first * activate schedule for gha * del hardcoded mainnet models * move history modes out of subdirs into history dir * fix GHA vars * del upstream 1+ from history step * del tag --------- Co-authored-by: Jack Forgash <jmfxyz@pm.me>
58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
# run_dbt_for_seed.py
|
|
import csv
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
|
|
def run_dbt_for_model(model_name, node_url, root_height, end_height, use_dev=False):
|
|
cmd = [
|
|
"dbt",
|
|
"run",
|
|
"--threads",
|
|
"8",
|
|
"--vars",
|
|
f'{{"node_url":"{node_url}", "start_block":{root_height}, "end_block":{end_height},"STREAMLINE_INVOKE_STREAMS":True, "STREAMLINE_USE_DEV_FOR_EXTERNAL_TABLES":{use_dev}}}',
|
|
"-s",
|
|
f"1+streamline__get_{model_name}_history"
|
|
]
|
|
|
|
subprocess.run(cmd)
|
|
|
|
|
|
def main(model_name, use_dev=False):
|
|
seed_file = "./data/seeds__network_version.csv"
|
|
|
|
with open(seed_file, "r") as file:
|
|
reader = csv.DictReader(file)
|
|
|
|
for i, row in enumerate(reader):
|
|
root_height = row["root_height"]
|
|
node_url = row["node_url"]
|
|
end_height = row["end_height"]
|
|
|
|
# segment the backfill into batches of 5 networks at a time, starting with the most recent 5
|
|
# source CSV contains 29 networks, but the first 3 (candidates 3-6) are inaccessible
|
|
# so, valid rows are 4-29, or 25 rows
|
|
if i >= 25:
|
|
run_dbt_for_model(model_name, node_url, root_height, end_height, use_dev)
|
|
else:
|
|
continue
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# accept model name as cli argument and pass to main
|
|
model_name = sys.argv[1]
|
|
# acceptable model names: blocks, collections, transactions, transaction_results
|
|
if model_name not in ["blocks", "collections", "transactions", "transaction_results"]:
|
|
raise ValueError("model_name must be one of the following: blocks, collections, transactions, transaction_results")
|
|
|
|
# use_dev is optional cli argument that accepts only True or False
|
|
use_dev = False
|
|
if len(sys.argv) > 2:
|
|
use_dev = sys.argv[2]
|
|
if use_dev not in ["True", "False"]:
|
|
raise ValueError("use_dev must be True or False")
|
|
|
|
main(model_name, use_dev)
|