initial setup files

2026-02-06 11:47:00 +00:00 · 2022-04-14 11:34:12 -07:00 · 2022-04-14 11:34:12 -07:00 · fdc4e6cf12
commit fdc4e6cf12
parent d388b58a01
16 changed files with 202 additions and 125 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,129 +1,16 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class

-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
 target/
+dbt_modules/
+# newer versions of dbt use this directory instead of dbt_modules for test dependencies
+dbt_packages/
+logs/

-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
+.venv/
 .python-version

-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
+# Visual Studio Code files
+*/.vscode
+*.code-workspace
+.history/
+**/.DS_Store
+.vscode/
--- a/README.md
+++ b/README.md
@ -1 +1,30 @@
-# flow-models
+## Profile Set Up
+
+#### Use the following within profiles.yml 
+----
+
+```yml
+flow:
+  target: dev
+  outputs:
+    dev:
+      type: snowflake
+      account: <ACCOUNT>
+      role: <ROLE>
+      user: <USERNAME>
+      password: <PASSWORD>
+      region: <REGION>
+      database: FLOW_DEV
+      warehouse: <WAREHOUSE>
+      schema: silver
+      threads: 4
+      client_session_keep_alive: False
+      query_tag: <TAG>
+```
+
+### Resources:
+- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
+- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
+- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
+- Find [dbt events](https://events.getdbt.com) near you
+- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
--- a/analysis/.gitkeep
+++ b/analysis/.gitkeep
--- a/data/.gitkeep
+++ b/data/.gitkeep
--- a/dbt_project.yml
+++ b/dbt_project.yml
@ -0,0 +1,45 @@
+# Name your project! Project names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: "flow_models"
+version: "1.0.0"
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project.
+profile: "flow"
+
+# These configurations specify where dbt should look for different types of files.
+# The `model-paths` config, for example, states that models in this project can be
+# found in the "models/" directory. You probably won't need to change these!
+model-paths: ["models"]
+analysis-paths: ["analysis"]
+test-paths: ["tests"]
+seed-paths: ["data"]
+macro-paths: ["macros"]
+snapshot-paths: ["snapshots"]
+docs-paths: ["docs"]
+
+target-path: "target" # directory which will store compiled SQL files
+clean-targets: # directories to be removed by `dbt clean`
+  - "target"
+  - "dbt_modules"
+  - "dbt_packages"
+
+on-run-start:
+  - '{{create_sps()}}'
+  - '{{create_tasks()}}'
+
+# Configuring models
+# Full documentation: https://docs.getdbt.com/docs/configuring-models
+
+# In this example config, we tell dbt to build all models in the example/ directory
+# as tables. These settings can be overridden in the individual model files
+# using the `{{ config(...) }}` macro.
+models:
+  +copy_grants: true
+  +persist_docs:
+    relation: true
+    columns: true
+
+vars:
+  "dbt_date:time_zone": GMT
--- a/macros/.gitkeep
+++ b/macros/.gitkeep
--- a/macros/create_sps.sql
+++ b/macros/create_sps.sql
@ -0,0 +1,6 @@
+{% macro create_sps() %}
+    {% if target.database == 'FLOW' %}
+        CREATE SCHEMA IF NOT EXISTS _internal;
+        {{ sp_create_prod_clone('_internal') }};
+    {% endif %}
+{% endmacro %}
--- a/macros/create_tasks.sql
+++ b/macros/create_tasks.sql
@ -0,0 +1,7 @@
+{% macro create_tasks() %}
+    {% if target.database == 'FLOW' %}
+        CREATE SCHEMA IF NOT EXISTS _internal;
+        {{ task_run_sp_create_prod_clone('_internal') }};
+    {% endif %}
+
+{% endmacro %}
--- a/macros/custom_naming_macros.sql
+++ b/macros/custom_naming_macros.sql
@ -0,0 +1,17 @@
+{% macro generate_schema_name(
+        custom_schema_name = none,
+        node = none
+    ) -%}
+    {% set node_name = node.name %}
+    {% set split_name = node_name.split('__') %}
+    {{ split_name [0] | trim }}
+{%- endmacro %}
+
+{% macro generate_alias_name(
+        custom_alias_name = none,
+        node = none
+    ) -%}
+    {% set node_name = node.name %}
+    {% set split_name = node_name.split('__') %}
+    {{ split_name [1] | trim }}
+{%- endmacro %}
--- a/macros/sp_create_prod_clone.sql
+++ b/macros/sp_create_prod_clone.sql
@ -0,0 +1,46 @@
+{% macro sp_create_prod_clone(target_schema) -%}
+
+create or replace procedure {{ target_schema }}.create_prod_clone(source_db_name string, destination_db_name string, role_name string)
+returns boolean 
+language javascript
+execute as caller
+as
+$$
+    snowflake.execute({sqlText: `BEGIN TRANSACTION;`});
+    try {
+        snowflake.execute({sqlText: `DROP DATABASE IF EXISTS ${DESTINATION_DB_NAME}`});
+        snowflake.execute({sqlText: `CREATE DATABASE ${DESTINATION_DB_NAME} CLONE ${SOURCE_DB_NAME}`});
+        snowflake.execute({sqlText: `DROP SCHEMA ${DESTINATION_DB_NAME}._INTERNAL`}); /* this only needs to be in prod */
+
+        var existing_schemas = snowflake.execute({sqlText: `SELECT table_schema
+            FROM ${DESTINATION_DB_NAME}.INFORMATION_SCHEMA.TABLE_PRIVILEGES
+            WHERE grantor IS NOT NULL
+            GROUP BY 1;`});
+
+        while (existing_schemas.next()) {
+            var schema = existing_schemas.getColumnValue(1)
+            snowflake.execute({sqlText: `GRANT OWNERSHIP ON SCHEMA ${DESTINATION_DB_NAME}.${schema} TO ROLE ${ROLE_NAME} COPY CURRENT GRANTS;`});
+        }
+
+        var existing_tables = snowflake.execute({sqlText: `SELECT table_schema, table_name
+            FROM ${DESTINATION_DB_NAME}.INFORMATION_SCHEMA.TABLE_PRIVILEGES
+            WHERE grantor IS NOT NULL
+            GROUP BY 1,2;`});
+
+        while (existing_tables.next()) {
+            var schema = existing_tables.getColumnValue(1)
+            var table_name = existing_tables.getColumnValue(2)
+            snowflake.execute({sqlText: `GRANT OWNERSHIP ON TABLE ${DESTINATION_DB_NAME}.${schema}.${table_name} TO ROLE ${ROLE_NAME} COPY CURRENT GRANTS;`});
+        }
+
+        snowflake.execute({sqlText: `GRANT OWNERSHIP ON DATABASE ${DESTINATION_DB_NAME} TO ROLE ${ROLE_NAME};`})
+        snowflake.execute({sqlText: `COMMIT;`});
+    } catch (err) {
+        snowflake.execute({sqlText: `ROLLBACK;`});
+        throw(err);
+    }
+    
+    return true
+$$
+
+{%- endmacro %}
--- a/macros/task_run_sp_create_prod_clone.sql
+++ b/macros/task_run_sp_create_prod_clone.sql
@ -0,0 +1,9 @@
+{% macro task_run_sp_create_prod_clone(target_schema) -%}
+    create or replace task {{target_schema}}.run_sp_create_prod_clone
+        warehouse = dbt_cloud
+        schedule = 'USING CRON 15 10 * * * UTC'
+    as
+        call {{ target_schema }}.create_prod_clone('flow', 'flow_dev', 'internal_dev');
+
+    alter task {{ target_schema }}.run_sp_create_prod_clone resume
+{%- endmacro %}
--- a/models/bronze/bronze__blocks.sql
+++ b/models/bronze/bronze__blocks.sql
@ -0,0 +1,19 @@
+{{ config (
+    materialized = 'view'
+) }}
+
+SELECT 
+    record_id, 
+    offset_id,
+    block_id,
+    block_timestamp, 
+    network, 
+    chain_id, 
+    tx_count, 
+    header, 
+    ingested_at
+FROM 
+    {{ source(
+      'prod',
+      'flow_blocks'
+    ) }} 
--- a/models/sources.yml
+++ b/models/sources.yml
@ -0,0 +1,9 @@
+version: 2
+
+sources:
+  - name: prod
+    database: chainwalkers
+    schema: prod
+    tables:
+      - name: flow_blocks
+      - name: flow_txs
--- a/packages.yml
+++ b/packages.yml
@ -0,0 +1,3 @@
+packages:
+  - package: calogica/dbt_expectations
+    version: [">=0.4.0", "<0.9.0"]
--- a/snapshots/.gitkeep
+++ b/snapshots/.gitkeep
--- a/tests/.gitkeep
+++ b/tests/.gitkeep