From f370fc4804cccf9c45f0fa3b60e15721f0300b46 Mon Sep 17 00:00:00 2001 From: yulike Date: Thu, 6 Oct 2022 16:52:26 -0400 Subject: [PATCH] add the initial project --- .env | 8 ++++ .github/workflows/docs_update.yml | 37 ++++++++++++++ README.md | 41 ++++++++++++++++ dbt_project.yml | 25 +++++----- macros/create_sps.sql | 6 +++ macros/python/hex_to_int_udf.sql | 61 ++++++++++++++++++++++++ macros/streamline/api_integrations.sql | 11 +++++ macros/streamline/streamline_udfs.sql | 7 +++ macros/tests/sequence_gaps.sql | 34 +++++++++++++ macros/tests/tx_gaps.sql | 33 +++++++++++++ models/bronze/bronze__test.sql | 12 +++++ models/example/my_first_dbt_model.sql | 27 ----------- models/example/my_second_dbt_model.sql | 6 --- models/example/schema.yml | 21 -------- models/sources.yml | 9 ++++ models/streamline/streamline__blocks.sql | 12 +++++ packages.yml | 3 ++ profiles.yml | 19 ++++++++ seeds/.gitkeep | 0 19 files changed, 307 insertions(+), 65 deletions(-) create mode 100644 .env create mode 100644 .github/workflows/docs_update.yml create mode 100644 macros/create_sps.sql create mode 100644 macros/python/hex_to_int_udf.sql create mode 100644 macros/streamline/api_integrations.sql create mode 100644 macros/streamline/streamline_udfs.sql create mode 100644 macros/tests/sequence_gaps.sql create mode 100644 macros/tests/tx_gaps.sql create mode 100644 models/bronze/bronze__test.sql delete mode 100644 models/example/my_first_dbt_model.sql delete mode 100644 models/example/my_second_dbt_model.sql delete mode 100644 models/example/schema.yml create mode 100644 models/sources.yml create mode 100644 models/streamline/streamline__blocks.sql create mode 100644 packages.yml create mode 100644 profiles.yml delete mode 100644 seeds/.gitkeep diff --git a/.env b/.env new file mode 100644 index 0000000..cdecbde --- /dev/null +++ b/.env @@ -0,0 +1,8 @@ +SF_ACCOUNT=vna27887.us-east-1 +SF_USERNAME=aws_lambda_axelar_api +SF_PASSWORD=exYndogg*JdEkAm8Lion +SF_REGION=us-east-1 +SF_DATABASE=axelar_dev +SF_WAREHOUSE=DBT_CLOUD +SF_ROLE=aws_lambda_axelar_api +SF_SCHEMA=streamline \ No newline at end of file diff --git a/.github/workflows/docs_update.yml b/.github/workflows/docs_update.yml new file mode 100644 index 0000000..11e6103 --- /dev/null +++ b/.github/workflows/docs_update.yml @@ -0,0 +1,37 @@ +name: docs_update + +on: + push: + branches: + - "main" + +env: + ACCOUNT: "${{ secrets.ACCOUNT }}" + ROLE: "${{ secrets.ROLE }}" + USER: "${{ secrets.USER }}" + PASSWORD: "${{ secrets.PASSWORD }}" + REGION: "${{ secrets.REGION }}" + DATABASE_DEV: "${{ secrets.DATABASE_DEV }}" + DATABASE_PROD: "${{ secrets.DATABASE_PROD }}" + WAREHOUSE_DEV: "${{ secrets.WAREHOUSE_DEV }}" + WAREHOUSE_PROD: "${{ secrets.WAREHOUSE_PROD }}" + +jobs: + scheduled_run: + name: docs_update + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v1 + with: + python-version: "3.7.x" + + - name: install dependencies + run: | + pip install dbt-snowflake + dbt deps + + - name: Run dbt models + run: dbt run \ No newline at end of file diff --git a/README.md b/README.md index 7874ac8..f545a14 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,44 @@ Try running the following commands: - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support - Find [dbt events](https://events.getdbt.com) near you - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices + + + +Steps: + +step 1: update the profiles.yml + +```yml +ethereum: + target: dev + outputs: + dev: + type: snowflake + account: + role: + user: + password: + region: + database: ETHEREUM_DEV + warehouse: + schema: silver + threads: 12 + client_session_keep_alive: False + query_tag: + prod: + type: snowflake + account: + role: + user: + password: + region: + database: ETHEREUM + warehouse: + schema: silver + threads: 12 + client_session_keep_alive: False + query_tag: +``` + + +lambda integration \ No newline at end of file diff --git a/dbt_project.yml b/dbt_project.yml index 2096f39..51f46fe 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -22,17 +22,20 @@ snapshot-paths: ["snapshots"] target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" + - "dbt_modules" - "dbt_packages" - -# Configuring models -# Full documentation: https://docs.getdbt.com/docs/configuring-models - -# In this example config, we tell dbt to build all models in the example/ directory -# as tables. These settings can be overridden in the individual model files -# using the `{{ config(...) }}` macro. models: - axelar: - # Config indicated by + and applies to all files under models/example/ - example: - +materialized: view + +copy_grants: true + +on_schema_change: sync_all_columns + +tests: + +store_failures: true # all tests + +on-run-start: + - "{{ create_sps() }}" + +vars: + "dbt_date:time_zone": GMT + STREAMLINE_INVOKE_STREAMS: False + STREAMLINE_USE_DEV_FOR_EXTERNAL_TABLES: False \ No newline at end of file diff --git a/macros/create_sps.sql b/macros/create_sps.sql new file mode 100644 index 0000000..32d99ff --- /dev/null +++ b/macros/create_sps.sql @@ -0,0 +1,6 @@ +{% macro create_sps() %} + {% if target.database == 'AXELAR' %} + CREATE SCHEMA IF NOT EXISTS _internal; + {{ sp_create_prod_clone('_internal') }}; + {% endif %} +{% endmacro %} \ No newline at end of file diff --git a/macros/python/hex_to_int_udf.sql b/macros/python/hex_to_int_udf.sql new file mode 100644 index 0000000..d52114f --- /dev/null +++ b/macros/python/hex_to_int_udf.sql @@ -0,0 +1,61 @@ +{% macro create_udf_hex_to_int(schema) %} +create or replace function {{ schema }}.udf_hex_to_int(hex string) +returns string +language python +runtime_version = '3.8' +handler = 'hex_to_int' +as +$$ +def hex_to_int(hex) -> str: + """ + Converts hex (of any size) to int (as a string). Snowflake and java script can only handle up to 64-bit (38 digits of precision) + select hex_to_int('200000000000000000000000000000211'); + >> 680564733841876926926749214863536423441 + select hex_to_int('0x200000000000000000000000000000211'); + >> 680564733841876926926749214863536423441 + select hex_to_int(NULL); + >> NULL + """ + return (str(int(hex, 16)) if hex and hex != "0x" else None) +$$; + +{% endmacro %} + + +{% macro create_udf_hex_to_int_with_inputs(schema) %} +create or replace function {{ schema }}.udf_hex_to_int(encoding string, hex string) +returns string +language python +runtime_version = '3.8' +handler = 'hex_to_int' +as +$$ +def hex_to_int(encoding, hex) -> str: + """ + Converts hex (of any size) to int (as a string). Snowflake and java script can only handle up to 64-bit (38 digits of precision) + select hex_to_int('hex', '200000000000000000000000000000211'); + >> 680564733841876926926749214863536423441 + select hex_to_int('hex', '0x200000000000000000000000000000211'); + >> 680564733841876926926749214863536423441 + select hex_to_int('hex', NULL); + >> NULL + select hex_to_int('s2c', 'ffffffffffffffffffffffffffffffffffffffffffffffffffffffffe5b83acf'); + >> -440911153 + """ + if not hex: + return None + if encoding.lower() == 's2c': + if hex[0:2].lower() != '0x': + hex = f'0x{hex}' + + bits = len(hex[2:])*4 + value = int(hex, 0) + if value & (1 << (bits-1)): + value -= 1 << bits + return str(value) + else: + return str(int(hex, 16)) + +$$; + +{% endmacro %} \ No newline at end of file diff --git a/macros/streamline/api_integrations.sql b/macros/streamline/api_integrations.sql new file mode 100644 index 0000000..f6d2d38 --- /dev/null +++ b/macros/streamline/api_integrations.sql @@ -0,0 +1,11 @@ +{% macro create_aws_ethereum_api() %} + {% if target.name == "prod" %} + {% set sql %} + CREATE api integration IF NOT EXISTS aws_ethereum_api api_provider = aws_api_gateway api_aws_role_arn = 'arn:aws:iam::661245089684:role/snowflake-api-ethereum' api_allowed_prefixes = ( + 'https://e03pt6v501.execute-api.us-east-1.amazonaws.com/prod/', + 'https://mryeusnrob.execute-api.us-east-1.amazonaws.com/dev/' + ) enabled = TRUE; +{% endset %} + {% do run_query(sql) %} + {% endif %} +{% endmacro %} diff --git a/macros/streamline/streamline_udfs.sql b/macros/streamline/streamline_udfs.sql new file mode 100644 index 0000000..bc9abb5 --- /dev/null +++ b/macros/streamline/streamline_udfs.sql @@ -0,0 +1,7 @@ +{% macro create_udf_get_chainhead() %} + CREATE EXTERNAL FUNCTION IF NOT EXISTS streamline.udf_get_chainhead() returns variant api_integration = aws_axelar_api AS {% if target.name == "prod" %} + 'https://e03pt6v501.execute-api.us-east-1.amazonaws.com/prod/get_chainhead' + {% else %} + 'https://mryeusnrob.execute-api.us-east-1.amazonaws.com/dev/get_chainhead' + {%- endif %}; +{% endmacro %} \ No newline at end of file diff --git a/macros/tests/sequence_gaps.sql b/macros/tests/sequence_gaps.sql new file mode 100644 index 0000000..9425003 --- /dev/null +++ b/macros/tests/sequence_gaps.sql @@ -0,0 +1,34 @@ +{% test sequence_gaps( + model, + partition_by, + column_name +) %} +{%- set partition_sql = partition_by | join(", ") -%} +{%- set previous_column = "prev_" ~ column_name -%} +WITH source AS ( + SELECT + {{ partition_sql + "," if partition_sql }} + {{ column_name }}, + LAG( + {{ column_name }}, + 1 + ) over ( + {{ "PARTITION BY " ~ partition_sql if partition_sql }} + ORDER BY + {{ column_name }} ASC + ) AS {{ previous_column }} + FROM + {{ model }} +) +SELECT + {{ partition_sql + "," if partition_sql }} + {{ previous_column }}, + {{ column_name }}, + {{ column_name }} - {{ previous_column }} + - 1 AS gap +FROM + source +WHERE + {{ column_name }} - {{ previous_column }} <> 1 +ORDER BY + gap DESC {% endtest %} diff --git a/macros/tests/tx_gaps.sql b/macros/tests/tx_gaps.sql new file mode 100644 index 0000000..a9d93df --- /dev/null +++ b/macros/tests/tx_gaps.sql @@ -0,0 +1,33 @@ +{% macro tx_gaps( + model + ) %} + WITH block_base AS ( + SELECT + block_number, + tx_count + FROM + {{ ref('silver__blocks') }} + ), + model_name AS ( + SELECT + block_number, + COUNT( + DISTINCT tx_hash + ) AS model_tx_count + FROM + {{ model }} + GROUP BY + block_number + ) +SELECT + block_base.block_number, + tx_count, + model_name.block_number AS model_block_number, + model_tx_count +FROM + block_base + LEFT JOIN model_name + ON block_base.block_number = model_name.block_number +WHERE + tx_count <> model_tx_count +{% endmacro %} diff --git a/models/bronze/bronze__test.sql b/models/bronze/bronze__test.sql new file mode 100644 index 0000000..48a9da8 --- /dev/null +++ b/models/bronze/bronze__test.sql @@ -0,0 +1,12 @@ +{{ config ( + materialized = "view", + tags = ['streamline_view'] +) }} + +SELECT + ROW_NUMBER() over ( + ORDER BY + SEQ4() + ) AS id +FROM + TABLE(GENERATOR(rowcount => 1000)) diff --git a/models/example/my_first_dbt_model.sql b/models/example/my_first_dbt_model.sql deleted file mode 100644 index f31a12d..0000000 --- a/models/example/my_first_dbt_model.sql +++ /dev/null @@ -1,27 +0,0 @@ - -/* - Welcome to your first dbt model! - Did you know that you can also configure models directly within SQL files? - This will override configurations stated in dbt_project.yml - - Try changing "table" to "view" below -*/ - -{{ config(materialized='table') }} - -with source_data as ( - - select 1 as id - union all - select null as id - -) - -select * -from source_data - -/* - Uncomment the line below to remove records with null `id` values -*/ - --- where id is not null diff --git a/models/example/my_second_dbt_model.sql b/models/example/my_second_dbt_model.sql deleted file mode 100644 index c91f879..0000000 --- a/models/example/my_second_dbt_model.sql +++ /dev/null @@ -1,6 +0,0 @@ - --- Use the `ref` function to select from other models - -select * -from {{ ref('my_first_dbt_model') }} -where id = 1 diff --git a/models/example/schema.yml b/models/example/schema.yml deleted file mode 100644 index 2a53081..0000000 --- a/models/example/schema.yml +++ /dev/null @@ -1,21 +0,0 @@ - -version: 2 - -models: - - name: my_first_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - not_null - - - name: my_second_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - not_null diff --git a/models/sources.yml b/models/sources.yml new file mode 100644 index 0000000..1eef6dd --- /dev/null +++ b/models/sources.yml @@ -0,0 +1,9 @@ +version: 2 + +sources: + - name: prod + database: chainwalkers + schema: prod + tables: + - name: ethereum_blocks + - name: ethereum_txs \ No newline at end of file diff --git a/models/streamline/streamline__blocks.sql b/models/streamline/streamline__blocks.sql new file mode 100644 index 0000000..1c268a5 --- /dev/null +++ b/models/streamline/streamline__blocks.sql @@ -0,0 +1,12 @@ +{{ config ( + materialized = "view", + tags = ['streamline_view'] +) }} + +SELECT + ROW_NUMBER() over ( + ORDER BY + SEQ4() + ) AS id +FROM + TABLE(GENERATOR(rowcount => 100)) diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..fda2166 --- /dev/null +++ b/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: calogica/dbt_expectations + version: [">=0.4.0", "<0.9.0"] \ No newline at end of file diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 0000000..2e84e26 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,19 @@ +axelar: + target: dev + outputs: + dev: + type: snowflake + account: "{{ env_var('SF_ACCOUNT') }}" + # User/password auth + user: "{{ env_var('SF_USERNAME') }}" + password: "{{ env_var('SF_PASSWORD') }}" + role: "{{ env_var('SF_ROLE') }}" + schema: "{{ env_var('SF_SCHEMA') }}" + region: "{{ env_var('SF_REGION') }}" + database: "{{ env_var('SF_DATABASE') }}" + warehouse: "{{ env_var('SF_WAREHOUSE') }}" + threads: 4 + client_session_keep_alive: False + query_tag: axelar_curator + config: + send_anonymous_usage_stats: False \ No newline at end of file diff --git a/seeds/.gitkeep b/seeds/.gitkeep deleted file mode 100644 index e69de29..0000000