initial setup files

This commit is contained in:
Desmond Hui 2022-04-14 11:34:12 -07:00
parent d388b58a01
commit fdc4e6cf12
16 changed files with 202 additions and 125 deletions

135
.gitignore vendored
View File

@ -1,129 +1,16 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
dbt_modules/
# newer versions of dbt use this directory instead of dbt_modules for test dependencies
dbt_packages/
logs/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.venv/
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Visual Studio Code files
*/.vscode
*.code-workspace
.history/
**/.DS_Store
.vscode/

View File

@ -1 +1,30 @@
# flow-models
## Profile Set Up
#### Use the following within profiles.yml
----
```yml
flow:
target: dev
outputs:
dev:
type: snowflake
account: <ACCOUNT>
role: <ROLE>
user: <USERNAME>
password: <PASSWORD>
region: <REGION>
database: FLOW_DEV
warehouse: <WAREHOUSE>
schema: silver
threads: 4
client_session_keep_alive: False
query_tag: <TAG>
```
### Resources:
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
- Find [dbt events](https://events.getdbt.com) near you
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices

0
analysis/.gitkeep Normal file
View File

0
data/.gitkeep Normal file
View File

45
dbt_project.yml Normal file
View File

@ -0,0 +1,45 @@
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: "flow_models"
version: "1.0.0"
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: "flow"
# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["data"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
docs-paths: ["docs"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_modules"
- "dbt_packages"
on-run-start:
- '{{create_sps()}}'
- '{{create_tasks()}}'
# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
+copy_grants: true
+persist_docs:
relation: true
columns: true
vars:
"dbt_date:time_zone": GMT

0
macros/.gitkeep Normal file
View File

6
macros/create_sps.sql Normal file
View File

@ -0,0 +1,6 @@
{% macro create_sps() %}
{% if target.database == 'FLOW' %}
CREATE SCHEMA IF NOT EXISTS _internal;
{{ sp_create_prod_clone('_internal') }};
{% endif %}
{% endmacro %}

7
macros/create_tasks.sql Normal file
View File

@ -0,0 +1,7 @@
{% macro create_tasks() %}
{% if target.database == 'FLOW' %}
CREATE SCHEMA IF NOT EXISTS _internal;
{{ task_run_sp_create_prod_clone('_internal') }};
{% endif %}
{% endmacro %}

View File

@ -0,0 +1,17 @@
{% macro generate_schema_name(
custom_schema_name = none,
node = none
) -%}
{% set node_name = node.name %}
{% set split_name = node_name.split('__') %}
{{ split_name [0] | trim }}
{%- endmacro %}
{% macro generate_alias_name(
custom_alias_name = none,
node = none
) -%}
{% set node_name = node.name %}
{% set split_name = node_name.split('__') %}
{{ split_name [1] | trim }}
{%- endmacro %}

View File

@ -0,0 +1,46 @@
{% macro sp_create_prod_clone(target_schema) -%}
create or replace procedure {{ target_schema }}.create_prod_clone(source_db_name string, destination_db_name string, role_name string)
returns boolean
language javascript
execute as caller
as
$$
snowflake.execute({sqlText: `BEGIN TRANSACTION;`});
try {
snowflake.execute({sqlText: `DROP DATABASE IF EXISTS ${DESTINATION_DB_NAME}`});
snowflake.execute({sqlText: `CREATE DATABASE ${DESTINATION_DB_NAME} CLONE ${SOURCE_DB_NAME}`});
snowflake.execute({sqlText: `DROP SCHEMA ${DESTINATION_DB_NAME}._INTERNAL`}); /* this only needs to be in prod */
var existing_schemas = snowflake.execute({sqlText: `SELECT table_schema
FROM ${DESTINATION_DB_NAME}.INFORMATION_SCHEMA.TABLE_PRIVILEGES
WHERE grantor IS NOT NULL
GROUP BY 1;`});
while (existing_schemas.next()) {
var schema = existing_schemas.getColumnValue(1)
snowflake.execute({sqlText: `GRANT OWNERSHIP ON SCHEMA ${DESTINATION_DB_NAME}.${schema} TO ROLE ${ROLE_NAME} COPY CURRENT GRANTS;`});
}
var existing_tables = snowflake.execute({sqlText: `SELECT table_schema, table_name
FROM ${DESTINATION_DB_NAME}.INFORMATION_SCHEMA.TABLE_PRIVILEGES
WHERE grantor IS NOT NULL
GROUP BY 1,2;`});
while (existing_tables.next()) {
var schema = existing_tables.getColumnValue(1)
var table_name = existing_tables.getColumnValue(2)
snowflake.execute({sqlText: `GRANT OWNERSHIP ON TABLE ${DESTINATION_DB_NAME}.${schema}.${table_name} TO ROLE ${ROLE_NAME} COPY CURRENT GRANTS;`});
}
snowflake.execute({sqlText: `GRANT OWNERSHIP ON DATABASE ${DESTINATION_DB_NAME} TO ROLE ${ROLE_NAME};`})
snowflake.execute({sqlText: `COMMIT;`});
} catch (err) {
snowflake.execute({sqlText: `ROLLBACK;`});
throw(err);
}
return true
$$
{%- endmacro %}

View File

@ -0,0 +1,9 @@
{% macro task_run_sp_create_prod_clone(target_schema) -%}
create or replace task {{target_schema}}.run_sp_create_prod_clone
warehouse = dbt_cloud
schedule = 'USING CRON 15 10 * * * UTC'
as
call {{ target_schema }}.create_prod_clone('flow', 'flow_dev', 'internal_dev');
alter task {{ target_schema }}.run_sp_create_prod_clone resume
{%- endmacro %}

View File

@ -0,0 +1,19 @@
{{ config (
materialized = 'view'
) }}
SELECT
record_id,
offset_id,
block_id,
block_timestamp,
network,
chain_id,
tx_count,
header,
ingested_at
FROM
{{ source(
'prod',
'flow_blocks'
) }}

9
models/sources.yml Normal file
View File

@ -0,0 +1,9 @@
version: 2
sources:
- name: prod
database: chainwalkers
schema: prod
tables:
- name: flow_blocks
- name: flow_txs

3
packages.yml Normal file
View File

@ -0,0 +1,3 @@
packages:
- package: calogica/dbt_expectations
version: [">=0.4.0", "<0.9.0"]

0
snapshots/.gitkeep Normal file
View File

0
tests/.gitkeep Normal file
View File