commit ee65e3bdab11d74f1b9101d1f15b998b88059e26 Author: Jim Myers Date: Wed Jan 12 14:15:26 2022 -0500 First commit diff --git a/.env.sample b/.env.sample new file mode 100644 index 0000000..7c4c509 --- /dev/null +++ b/.env.sample @@ -0,0 +1,8 @@ +SF_ACCOUNT=zsniary-metricsdao +SF_USERNAME= +SF_PASSWORD= +SF_REGION=us-east-1 +SF_DATABASE=NEAR +SF_WAREHOUSE=DEFAULT +SF_ROLE=PUBLIC +SF_SCHEMA=DEV \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f24862a --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +target/ +dbt_modules/ +logs/ +.env +.notes +dbt_docs.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7eac971 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM fishtownanalytics/dbt:0.21.1 +WORKDIR /support +RUN mkdir /root/.dbt +COPY profiles.yml /root/.dbt +RUN mkdir /root/near +WORKDIR /near +COPY . . +EXPOSE 8080 +ENTRYPOINT [ "bash"] \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b3ab6da --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +SHELL := /bin/bash + +dbt-console: + docker-compose run dbt_console + +dbt-docs: + docker-compose run --service-ports dbt_docs + +.PHONY: dbt-console \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..08aa66b --- /dev/null +++ b/README.md @@ -0,0 +1,112 @@ +# Near DBT Project + +Curated SQL Views and Metrics for the Near Blockchain. + +What's Near? Learn more [here](https://www.near.one/) + +## Setup + +1. [PREREQUISITE] Download [Docker for Desktop](https://www.docker.com/products/docker-desktop). +2. Create a `.env` file with the following contents (note `.env` will not be commited to source): + +``` +SF_ACCOUNT=zsniary-metricsdao +SF_USERNAME= +SF_PASSWORD= +SF_REGION=us-east-1 +SF_DATABASE=NEAR +SF_WAREHOUSE=DEFAULT +SF_ROLE=PUBLIC +SF_SCHEMA=DEV +``` + +3. New to DBT? It's pretty dope. Read up on it [here](https://www.getdbt.com/docs/) + +## Getting Started Commands + +Run the follow commands from inside the Near directory (**you must complete the Getting Started steps above^^**) + +### DBT Environment + +`make dbt-console` +This will mount your local near directory into a dbt console where dbt is installed. + +### DBT Project Docs + +`make dbt-docs` +This will compile your dbt documentation and launch a web-server at http://localhost:8080 + +## Project Overview + +`/models` - this directory contains SQL files as Jinja templates. DBT will compile these templates and wrap them into create table statements. This means all you have to do is define SQL select statements, while DBT handles the rest. The snowflake table name will match the name of the sql model file. + +`/macros` - these are helper functions defined as Jinja that can be injected into your SQL models. + +`/tests` - custom SQL tests that can be attached to tables. + +## Background on Data + +`CHAINWALKERS.PROD.NEAR_BLOCKS` - Near blocks +`CHAINWALKERS.PROD.NEAR_TXS` - Near txs + +Blocks and transactions are fed into the above two Near tables utilizing the Chainwalkers Framework. Details on the data: + +1. This is near-real time. Blocks land in this table within 3-5 minutes of being minted. +2. The table is a read-only data share in the Metrics DAO Snowflake account under the database `FLIPSIDE`. +3. The table is append-only, meaning that duplicates can exist if blocks are re-processed. The injested_at timestamp should be used to retrieve only the most recent block. Macros exist `macros/dedupe_utils.sql` to handle this. See `models/core/blocks.sql` or `/models/core/txs.sql` for an example. +4. Tx logs are decoded where an ABI exists. + +### Table Structures: + +`CHAINWALKERS.PROD.NEAR_BLOCKS` - Near Blocks + +| Column | Type | Description | +| --------------- | ------------ | ---------------------------------------------------------------- | +| record_id | VARCHAR | A unique id for the record generated by Chainwalkers | +| offset_id | NUMBER(38,0) | Synonmous with block_id for Near | +| block_id | NUMBER(38,0) | The height of the chain this block corresponds with | +| block_timestamp | TIMESTAMP | The time the block was minted | +| network | VARCHAR | The blockchain network (i.e. mainnet, testnet, etc.) | +| chain_id | VARCHAR | Synonmous with blockchain name for Near | +| tx_count | NUMBER(38,0) | The number of transactions in the block | +| header | json variant | A json queryable column containing the blocks header information | +| ingested_at | TIMESTAMP | The time this data was ingested into the table by Snowflake | + +`CHAINWALKERS.PROD.NEAR_TXS` - Near Transactions + +| Column | Type | Description | +| --------------- | ------------ | ---------------------------------------------------------------------- | +| record_id | VARCHAR | A unique id for the record generated by Chainwalkers | +| tx_id | VARCHAR | A unique on chain identifier for the transaction | +| tx_block_index | NUMBER(38,0) | The index of the transaction within the block. Starts at 0. | +| offset_id | NUMBER(38,0) | Synonmous with block_id for Near | +| block_id | NUMBER(38,0) | The height of the chain this block corresponds with | +| block_timestamp | TIMESTAMP | The time the block was minted | +| network | VARCHAR | The blockchain network (i.e. mainnet, testnet, etc.) | +| chain_id | VARCHAR | Synonmous with blockchain name for Near | +| tx_count | NUMBER(38,0) | The number of transactions in the block | +| header | json variant | A json queryable column containing the blocks header information | +| tx | array | An array of json queryable objects containing each tx and decoded logs | +| ingested_at | TIMESTAMP | The time this data was ingested into the table by Snowflake | + +## Target Database, Schemas and Tables + +Data in this DBT project is written to the `NEAR` database in MetricsDAO. + +This database has 2 schemas, one for `DEV` and one for `PROD`. As a contributer you have full permission to write to the `DEV` schema. However the `PROD` schema can only be written to by Metric DAO's DBT Cloud account. The DBT Cloud account controls running / scheduling models against the `PROD` schema. + +## Branching / PRs + +When conducting work please branch off of main with a description branch name and generate a pull request. At least one other individual must review the PR before it can be merged into main. Once merged into main DBT Cloud will run the new models and output the results into the `PROD` schema. + +When creating a PR please include the following details in the PR description: + +1. List of Tables Created or Modified +2. Description of changes. +3. Implication of changes (if any). + +## More DBT Resources: + +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/analysis/.gitkeep b/analysis/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt_project.yml b/dbt_project.yml new file mode 100644 index 0000000..6e71d97 --- /dev/null +++ b/dbt_project.yml @@ -0,0 +1,36 @@ +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "near" +version: "1.0.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: "default" + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_modules" + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ directory +# as tables. These settings can be overridden in the individual model files +# using the `{{ config(...) }}` macro. +models: + near: + # # Config indicated by + and applies to all files under models/example/ + # example: + # +materialized: view diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8b59627 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,21 @@ +version: "3.4" + +services: + dbt_console: + build: . + volumes: + - .:/near + env_file: + - .env + + dbt_docs: + build: . + volumes: + - .:/near + ports: + - 8080:8080 + expose: + - 8080 + command: "dbt_docs.sh" + env_file: + - .env diff --git a/macros/.gitkeep b/macros/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/macros/dedupe_utils.sql b/macros/dedupe_utils.sql new file mode 100644 index 0000000..a21ec5d --- /dev/null +++ b/macros/dedupe_utils.sql @@ -0,0 +1,17 @@ +{% macro deduped_blocks(table_name) -%} + ( + select + * + from {{ source("chainwalkers", table_name) }} + qualify row_number() over (partition by block_id order by ingested_at desc) = 1 + ) +{%- endmacro %} + +{% macro deduped_txs(table_name ) -%} + ( + select + * + from {{ source("chainwalkers", table_name) }} + qualify row_number() over (partition by tx_id order by ingested_at desc) = 1 + ) +{%- endmacro %} diff --git a/macros/incremental_utils.sql b/macros/incremental_utils.sql new file mode 100644 index 0000000..704951d --- /dev/null +++ b/macros/incremental_utils.sql @@ -0,0 +1,8 @@ +{% macro incremental_load_filter(time_col) -%} + -- dbt makes it easy to query your target table by using the "{{ this }}" variable. + {% if is_incremental() %} + {{ time_col }} > (select max({{ time_col }}) from {{ this }}) + {%- else -%} + true + {% endif %} +{%- endmacro %} diff --git a/models/core/blocks.sql b/models/core/blocks.sql new file mode 100644 index 0000000..2485856 --- /dev/null +++ b/models/core/blocks.sql @@ -0,0 +1,37 @@ +{{ + config( + materialized='incremental', + unique_key='block_id', + tags=['core'], + cluster_by=['block_timestamp'] + ) +}} + +with base_blocks as ( + + select * from {{ deduped_blocks("near_blocks") }} + +), + +final as ( + + select + + block_id, + block_timestamp, + header:hash::string as block_hash, + header:parent_hash::string as block_parent_hash, + header:gas_limit as gas_limit, + header:gas_used as gas_used, + header:miner::string as miner, + header:nonce::string as nonce, + header:size as size, + tx_count, + header:state_root::string as state_root, + header:receipts_root::string as receipts_root + + from base_blocks + where {{ incremental_load_filter("block_timestamp") }} +) + +select * from final diff --git a/models/core/blocks.yml b/models/core/blocks.yml new file mode 100644 index 0000000..84623c3 --- /dev/null +++ b/models/core/blocks.yml @@ -0,0 +1,70 @@ +version: 2 + +models: + - name: blocks + description: |- + This table records all the blocks of Near blockchain. + + columns: + - name: block_id + description: The block number. + tests: + - unique + - not_null + + - name: block_timestamp + description: The timestamp for when the block was collated. + tests: + - not_null + + - name: block_hash + description: Hash of the block (32 Bytes). + tests: + - unique + - not_null + + - name: block_parent_hash + description: Hash of the parent block (32 Bytes). + tests: + - unique + - not_null + + - name: gas_limit + description: The maximum gas allowed in this block. + tests: + - not_null + + - name: gas_used + description: The total used gas by all transactions in this block. + tests: + - not_null + + - name: miner + description: The address of the beneficiary to whom the mining rewards were given. + tests: + - not_null + + - name: nonce + description: Hash of the generated proof-of-work (8 Bytes). + tests: + - not_null + + - name: size + description: Integer the size of this block in bytes. + tests: + - not_null + + - name: tx_count + description: The number of transactions in the given block. + tests: + - not_null + + - name: state_root + description: The root hash that stores the entire state of the system - account balances, contract storage, contract code and account nonces are inside + tests: + - not_null + + - name: receipts_root + description: The root hash that stores receipts of all transactions included in the block. + tests: + - not_null diff --git a/models/core/txs.sql b/models/core/txs.sql new file mode 100644 index 0000000..354bebb --- /dev/null +++ b/models/core/txs.sql @@ -0,0 +1,40 @@ +{{ + config( + materialized = 'incremental', + unique_key = 'tx_hash', + tags = ['core'], + cluster_by = ['block_timestamp'] + ) +}} + +with base_txs as ( + + select * from {{ deduped_txs("near_txs") }} + +), + +final as ( + + select + + block_timestamp, + tx:nonce::string as nonce, + tx_block_index as index, + tx:bech32_from::string as native_from_address, + tx:bech32_to::string as native_to_address, + tx:from::string as from_address, + tx:to::string as to_address, + tx:value as value, + tx:block_number as block_number, + tx:block_hash::string as block_hash, + tx:gas_price as gas_price, + tx:gas as gas, + tx_id as tx_hash, + tx:input::string as data, + tx:receipt:status::string = '0x1' as status + + from base_txs +where {{ incremental_load_filter("block_timestamp") }} +) + +select * from final diff --git a/models/core/txs.yml b/models/core/txs.yml new file mode 100644 index 0000000..369feec --- /dev/null +++ b/models/core/txs.yml @@ -0,0 +1,77 @@ +version: 2 + +models: + - name: txs + description: |- + This table records all the transactions of the Near blockchain. + + columns: + - name: block_timestamp + description: The time when the block was mined. + tests: + - not_null + + - name: nonce + description: The number of transactions made by the sender prior to this one. + tests: + - not_null + + - name: index + description: Integer of the transactions index position in the block. `null` when its pending. + + - name: native_from_address + description: Native Near address of the sender. + tests: + - not_null + + - name: native_to_address + description: Native Near address of the receiver. + tests: + - not_null + + - name: from_address + description: The Ethereum address of the sender. + tests: + - not_null + + - name: to_address + description: The Ethereum address of the receiver. + tests: + - not_null + + - name: value + description: The value transfered in ATTO format (a factor of 10^18). + tests: + - not_null + + - name: block_number + description: Block number where this transaction was in. `null` when it's pending. + + - name: block_hash + description: Hash of the block where this transaction was in. `null` when it's pending. + + - name: gas_price + description: Gas price provided by the sender. + tests: + - not_null + + - name: gas + description: Gas provided by the sender. + tests: + - not_null + + - name: tx_hash + description: The hash of the transaction, this is the primary key for this table. + tests: + - not_null + - unique + + - name: data + description: Bytecode of the data sent along with the transaction. + tests: + - not_null + + - name: status + description: Boolean representing whether the transaction is successful or not. + tests: + - not_null diff --git a/models/metrics/metric_miner_count.sql b/models/metrics/metric_miner_count.sql new file mode 100644 index 0000000..77c14f3 --- /dev/null +++ b/models/metrics/metric_miner_count.sql @@ -0,0 +1,26 @@ +{{ config(materialized='table', tags=['metrics']) }} + +SELECT + date_trunc('day', block_timestamp) as metric_date, + 'daily' as metric_period, + count(distinct miner) as miner_count +FROM {{ ref("blocks") }} +GROUP BY 1 + +UNION ALL + +SELECT + date_trunc('hour', block_timestamp) as metric_date, + 'hourly' as metric_period, + count(distinct miner) as miner_count +FROM {{ ref("blocks") }} +GROUP BY 1 + +UNION ALL + +SELECT + date_trunc('minute', block_timestamp) as metric_date, + 'minute' as metric_period, + count(distinct miner) as miner_count +FROM {{ ref("blocks") }} +GROUP BY 1 diff --git a/models/metrics/schema.yml b/models/metrics/schema.yml new file mode 100644 index 0000000..47d9762 --- /dev/null +++ b/models/metrics/schema.yml @@ -0,0 +1,18 @@ +version: 2 + +models: + - name: metric_miner_count + description: "The distinct number of miners within a given period" + columns: + - name: metric_date + description: The date the metric occurred + tests: + - not_null + - name: metric_period + description: i.e. Hourly, Daily, etc. + tests: + - not_null + - name: miner_count + description: The distinct count of miners active in that period. + tests: + - not_null \ No newline at end of file diff --git a/models/sources.yml b/models/sources.yml new file mode 100644 index 0000000..8c70f23 --- /dev/null +++ b/models/sources.yml @@ -0,0 +1,55 @@ +version: 2 + +sources: + - name: chainwalkers + schema: prod + database: chainwalkers + tables: + - name: near_blocks + description: "Blocks from the Near chain. Note always use the latest ingested at timestamp as duplicates can exist in this table!" + columns: + - name: record_id + description: A unique id for the record generated by Chainwalkers + - name: offset_id + description: Synonmous with block_id for Near + - name: block_id + description: The height of the chain this block corresponds with + - name: block_timestamp + description: The time the block was minted + - name: network + description: The blockchain network (i.e. mainnet, testnet, etc.) + - name: chain_id + description: Synonmous with blockchain name for Near + - name: tx_count + description: The number of transactions in the block + - name: header + description: A json column containing the blocks header information + - name: ingested_at + description: The time this row was ingested by Chainwalkers + - name: near_txs + description: "Txs from the Near chain. Note always use the latest ingested at timestamp as duplicates can exist in this table!" + columns: + - name: record_id + description: A unique id for the record generated by Chainwalkers + - name: tx_id + description: Synonmous with transaction hash, a unique on chain identifier for the transaction + - name: tx_block_index + description: The index of the transaction within the block. Starts at 0. + - name: offset_id + description: Synonmous with block_id for Near + - name: block_id + description: The height of the chain this block corresponds with + - name: block_timestamp + description: The time the block was minted + - name: network + description: The blockchain network (i.e. mainnet, testnet, etc.) + - name: chain_id + description: Synonmous with blockchain name for Near + - name: tx_count + description: The number of transactions in the block + - name: header + description: A json column containing the blocks header information + - name: tx + description: A json object containing the tx and any decoded logs + - name: ingested_at + description: The time this row was ingested by Chainwalkers diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 0000000..250ef84 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,19 @@ +default: + target: dev + outputs: + dev: + type: snowflake + account: "{{ env_var('SF_ACCOUNT') }}" + # User/password auth + user: "{{ env_var('SF_USERNAME') }}" + password: "{{ env_var('SF_PASSWORD') }}" + role: "{{ env_var('SF_ROLE') }}" + schema: "{{ env_var('SF_SCHEMA') }}" + region: "{{ env_var('SF_REGION') }}" + database: "{{ env_var('SF_DATABASE') }}" + warehouse: "{{ env_var('SF_WAREHOUSE') }}" + threads: 4 + client_session_keep_alive: False + query_tag: near + config: + send_anonymous_usage_stats: False diff --git a/snapshots/.gitkeep b/snapshots/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/.gitkeep b/tests/.gitkeep new file mode 100644 index 0000000..e69de29