diff --git a/.gitignore b/.gitignore index b6e4761..691edab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,129 +1,16 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder target/ +dbt_modules/ +# newer versions of dbt use this directory instead of dbt_modules for test dependencies +dbt_packages/ +logs/ -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv +.venv/ .python-version -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +# Visual Studio Code files +*/.vscode +*.code-workspace +.history/ +**/.DS_Store +.vscode/ \ No newline at end of file diff --git a/README.md b/README.md index 0872829..46a52ab 100644 --- a/README.md +++ b/README.md @@ -1 +1,30 @@ -# flow-models \ No newline at end of file +## Profile Set Up + +#### Use the following within profiles.yml +---- + +```yml +flow: + target: dev + outputs: + dev: + type: snowflake + account: + role: + user: + password: + region: + database: FLOW_DEV + warehouse: + schema: silver + threads: 4 + client_session_keep_alive: False + query_tag: +``` + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices \ No newline at end of file diff --git a/analysis/.gitkeep b/analysis/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt_project.yml b/dbt_project.yml new file mode 100644 index 0000000..3b35b73 --- /dev/null +++ b/dbt_project.yml @@ -0,0 +1,45 @@ +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "flow_models" +version: "1.0.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: "flow" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] +docs-paths: ["docs"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_modules" + - "dbt_packages" + +on-run-start: + - '{{create_sps()}}' + - '{{create_tasks()}}' + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ directory +# as tables. These settings can be overridden in the individual model files +# using the `{{ config(...) }}` macro. +models: + +copy_grants: true + +persist_docs: + relation: true + columns: true + +vars: + "dbt_date:time_zone": GMT diff --git a/macros/.gitkeep b/macros/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/macros/create_sps.sql b/macros/create_sps.sql new file mode 100644 index 0000000..d22bf36 --- /dev/null +++ b/macros/create_sps.sql @@ -0,0 +1,6 @@ +{% macro create_sps() %} + {% if target.database == 'FLOW' %} + CREATE SCHEMA IF NOT EXISTS _internal; + {{ sp_create_prod_clone('_internal') }}; + {% endif %} +{% endmacro %} \ No newline at end of file diff --git a/macros/create_tasks.sql b/macros/create_tasks.sql new file mode 100644 index 0000000..1dcdbe4 --- /dev/null +++ b/macros/create_tasks.sql @@ -0,0 +1,7 @@ +{% macro create_tasks() %} + {% if target.database == 'FLOW' %} + CREATE SCHEMA IF NOT EXISTS _internal; + {{ task_run_sp_create_prod_clone('_internal') }}; + {% endif %} + +{% endmacro %} \ No newline at end of file diff --git a/macros/custom_naming_macros.sql b/macros/custom_naming_macros.sql new file mode 100644 index 0000000..b8a3071 --- /dev/null +++ b/macros/custom_naming_macros.sql @@ -0,0 +1,17 @@ +{% macro generate_schema_name( + custom_schema_name = none, + node = none + ) -%} + {% set node_name = node.name %} + {% set split_name = node_name.split('__') %} + {{ split_name [0] | trim }} +{%- endmacro %} + +{% macro generate_alias_name( + custom_alias_name = none, + node = none + ) -%} + {% set node_name = node.name %} + {% set split_name = node_name.split('__') %} + {{ split_name [1] | trim }} +{%- endmacro %} diff --git a/macros/sp_create_prod_clone.sql b/macros/sp_create_prod_clone.sql new file mode 100644 index 0000000..f63149a --- /dev/null +++ b/macros/sp_create_prod_clone.sql @@ -0,0 +1,46 @@ +{% macro sp_create_prod_clone(target_schema) -%} + +create or replace procedure {{ target_schema }}.create_prod_clone(source_db_name string, destination_db_name string, role_name string) +returns boolean +language javascript +execute as caller +as +$$ + snowflake.execute({sqlText: `BEGIN TRANSACTION;`}); + try { + snowflake.execute({sqlText: `DROP DATABASE IF EXISTS ${DESTINATION_DB_NAME}`}); + snowflake.execute({sqlText: `CREATE DATABASE ${DESTINATION_DB_NAME} CLONE ${SOURCE_DB_NAME}`}); + snowflake.execute({sqlText: `DROP SCHEMA ${DESTINATION_DB_NAME}._INTERNAL`}); /* this only needs to be in prod */ + + var existing_schemas = snowflake.execute({sqlText: `SELECT table_schema + FROM ${DESTINATION_DB_NAME}.INFORMATION_SCHEMA.TABLE_PRIVILEGES + WHERE grantor IS NOT NULL + GROUP BY 1;`}); + + while (existing_schemas.next()) { + var schema = existing_schemas.getColumnValue(1) + snowflake.execute({sqlText: `GRANT OWNERSHIP ON SCHEMA ${DESTINATION_DB_NAME}.${schema} TO ROLE ${ROLE_NAME} COPY CURRENT GRANTS;`}); + } + + var existing_tables = snowflake.execute({sqlText: `SELECT table_schema, table_name + FROM ${DESTINATION_DB_NAME}.INFORMATION_SCHEMA.TABLE_PRIVILEGES + WHERE grantor IS NOT NULL + GROUP BY 1,2;`}); + + while (existing_tables.next()) { + var schema = existing_tables.getColumnValue(1) + var table_name = existing_tables.getColumnValue(2) + snowflake.execute({sqlText: `GRANT OWNERSHIP ON TABLE ${DESTINATION_DB_NAME}.${schema}.${table_name} TO ROLE ${ROLE_NAME} COPY CURRENT GRANTS;`}); + } + + snowflake.execute({sqlText: `GRANT OWNERSHIP ON DATABASE ${DESTINATION_DB_NAME} TO ROLE ${ROLE_NAME};`}) + snowflake.execute({sqlText: `COMMIT;`}); + } catch (err) { + snowflake.execute({sqlText: `ROLLBACK;`}); + throw(err); + } + + return true +$$ + +{%- endmacro %} \ No newline at end of file diff --git a/macros/task_run_sp_create_prod_clone.sql b/macros/task_run_sp_create_prod_clone.sql new file mode 100644 index 0000000..01fc81b --- /dev/null +++ b/macros/task_run_sp_create_prod_clone.sql @@ -0,0 +1,9 @@ +{% macro task_run_sp_create_prod_clone(target_schema) -%} + create or replace task {{target_schema}}.run_sp_create_prod_clone + warehouse = dbt_cloud + schedule = 'USING CRON 15 10 * * * UTC' + as + call {{ target_schema }}.create_prod_clone('flow', 'flow_dev', 'internal_dev'); + + alter task {{ target_schema }}.run_sp_create_prod_clone resume +{%- endmacro %} \ No newline at end of file diff --git a/models/bronze/bronze__blocks.sql b/models/bronze/bronze__blocks.sql new file mode 100644 index 0000000..b1338b1 --- /dev/null +++ b/models/bronze/bronze__blocks.sql @@ -0,0 +1,19 @@ +{{ config ( + materialized = 'view' +) }} + +SELECT + record_id, + offset_id, + block_id, + block_timestamp, + network, + chain_id, + tx_count, + header, + ingested_at +FROM + {{ source( + 'prod', + 'flow_blocks' + ) }} \ No newline at end of file diff --git a/models/sources.yml b/models/sources.yml new file mode 100644 index 0000000..0385c6c --- /dev/null +++ b/models/sources.yml @@ -0,0 +1,9 @@ +version: 2 + +sources: + - name: prod + database: chainwalkers + schema: prod + tables: + - name: flow_blocks + - name: flow_txs diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..fda2166 --- /dev/null +++ b/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: calogica/dbt_expectations + version: [">=0.4.0", "<0.9.0"] \ No newline at end of file diff --git a/snapshots/.gitkeep b/snapshots/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/.gitkeep b/tests/.gitkeep new file mode 100644 index 0000000..e69de29