From e7615a0cc6352bdc3a5cc9165ca2cc01f398eb4b Mon Sep 17 00:00:00 2001 From: WHYTEWYLL <49686519+WHYTEWYLL@users.noreply.github.com> Date: Wed, 31 Jan 2024 13:44:59 -0300 Subject: [PATCH] Update labels (#262) * a * lavels source updated * add core view * unique id * delete coalesces --- dbt_project.yml | 4 +-- models/bronze/bronze__labels.sql | 8 +++--- models/descriptions/label.md | 5 ++++ models/descriptions/table_dim_labels.md | 5 ++++ models/gold/core/core__dim_labels.sql | 18 +++++++++++++ models/gold/core/core__dim_labels.yml | 34 +++++++++++++++++++++++++ models/silver/labels/silver__labels.sql | 24 ++++++++++++----- models/sources.yml | 8 +++--- 8 files changed, 90 insertions(+), 16 deletions(-) create mode 100644 models/descriptions/label.md create mode 100644 models/descriptions/table_dim_labels.md create mode 100644 models/gold/core/core__dim_labels.sql create mode 100644 models/gold/core/core__dim_labels.yml diff --git a/dbt_project.yml b/dbt_project.yml index 78bda0c..e03185c 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -31,8 +31,7 @@ on-run-start: - "{{create_udfs()}}" on-run-end: - - '{{ apply_meta_as_tags(results) }}' - + - "{{ apply_meta_as_tags(results) }}" # Configuring models # Full documentation: https://docs.getdbt.com/docs/configuring-models @@ -74,7 +73,6 @@ dispatch: - flow-models - dbt_snowflake_query_tags - dbt - # query-comment: # comment: '{{ dbt_snowflake_query_tags.get_query_comment(node) }}' # append: true # Snowflake removes prefixed comments. diff --git a/models/bronze/bronze__labels.sql b/models/bronze/bronze__labels.sql index 06fa634..db0e93a 100644 --- a/models/bronze/bronze__labels.sql +++ b/models/bronze/bronze__labels.sql @@ -14,11 +14,13 @@ WITH labels AS ( label_type, label_subtype, address_name, - project_name + project_name, + _is_deleted, + labels_combined_id FROM {{ source( - 'crosschain_core', - 'dim_labels' + 'crosschain_silver', + 'labels_combined' ) }} WHERE blockchain = 'flow' diff --git a/models/descriptions/label.md b/models/descriptions/label.md new file mode 100644 index 0000000..623a063 --- /dev/null +++ b/models/descriptions/label.md @@ -0,0 +1,5 @@ +{% docs label %} + +The label for this address. + +{% enddocs %} diff --git a/models/descriptions/table_dim_labels.md b/models/descriptions/table_dim_labels.md new file mode 100644 index 0000000..ac3266c --- /dev/null +++ b/models/descriptions/table_dim_labels.md @@ -0,0 +1,5 @@ +{% docs table_dim_labels %} + +The labels table is a store of one-to-one address identifiers, or an address name. Labels are broken out into a "type" (such as cex, dex, dapp, games, etc.) and a "subtype" (ex: contract_deployer, hot_wallet, token_contract, etc.) in order to help classify each address name into similar groups. Our labels are sourced from many different places, but can primarily be grouped into two categories: automatic and manual. Automatic labels are continuously labeled based on certain criteria, such as a known contract deploying another contract, behavior based algorithms for finding deposit wallets, and consistent data pulls of custom protocol APIs. Manual labels are done periodically to find addresses that cannot be found programmatically such as finding new protocol addresses, centralized exchange hot wallets, or trending addresses. Labels can also be added by our community by using our add-a-label tool (https://science.flipsidecrypto.xyz/add-a-label/) or on-chain with near (https://near.social/lord1.near/widget/Form) and are reviewed by our labels team. A label can be removed by our labels team if it is found to be incorrect or no longer relevant; this generally will only happen for mislabeled deposit wallets. + +{% enddocs %} diff --git a/models/gold/core/core__dim_labels.sql b/models/gold/core/core__dim_labels.sql new file mode 100644 index 0000000..0db2f75 --- /dev/null +++ b/models/gold/core/core__dim_labels.sql @@ -0,0 +1,18 @@ +{{ config( + materialized = 'view', + tags = ['scheduled'] +) }} + +SELECT + blockchain, + creator, + address, + address_name, + label_type, + label_subtype, + project_name AS label, + labels_id AS dim_labels_id, + inserted_timestamp, + modified_timestamp +FROM + {{ ref('silver__labels') }} diff --git a/models/gold/core/core__dim_labels.yml b/models/gold/core/core__dim_labels.yml new file mode 100644 index 0000000..b97fdb9 --- /dev/null +++ b/models/gold/core/core__dim_labels.yml @@ -0,0 +1,34 @@ +version: 2 +models: + - name: core__dim_labels + description: '{{ doc("table_dim_labels") }}' + columns: + - name: BLOCKCHAIN + description: '{{ doc("blockchain") }}' + + - name: CREATOR + description: '{{ doc("creator") }}' + + - name: ADDRESS + description: '{{ doc("address") }}' + + - name: ADDRESS_NAME + description: '{{ doc("address_name") }}' + + - name: LABEL_TYPE + description: '{{ doc("label_type") }}' + + - name: LABEL_SUBTYPE + description: '{{ doc("label_subtype") }}' + + - name: LABEL + description: '{{ doc("label") }}' + + - name: DIM_LABELS_ID + description: "{{doc('id')}}" + + - name: INSERTED_TIMESTAMP + description: "{{doc('inserted_timestamp')}}" + + - name: MODIFIED_TIMESTAMP + description: "{{doc('modified_timestamp')}}" diff --git a/models/silver/labels/silver__labels.sql b/models/silver/labels/silver__labels.sql index 019f112..1ad7f4f 100644 --- a/models/silver/labels/silver__labels.sql +++ b/models/silver/labels/silver__labels.sql @@ -1,7 +1,10 @@ {{ config( - materialized = 'table', - cluster_by = ['address'], - unique_key = 'event_id', + materialized = 'incremental', + unique_key = 'labels_id', + cluster_by = 'modified_timestamp::DATE', + incremental_strategy = 'merge', + merge_exclude_columns = ["inserted_timestamp"], + post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION ON EQUALITY(address); DELETE FROM {{ this }} WHERE _is_deleted = TRUE;", tags = ['scheduled', 'streamline_scheduled', 'scheduled_non_core'] ) }} @@ -16,14 +19,23 @@ WITH labels AS ( label_subtype, address_name, project_name, - {{ dbt_utils.generate_surrogate_key( - ['blockchain','label_type','label_subtype'] - ) }} AS labels_id, + labels_combined_id AS labels_id, SYSDATE() AS inserted_timestamp, SYSDATE() AS modified_timestamp, '{{ invocation_id }}' AS _invocation_id FROM {{ ref('bronze__labels') }} + + {% if is_incremental() %} + AND modified_timestamp >= ( + SELECT + MAX( + modified_timestamp + ) + FROM + {{ this }} + ) + {% endif %} ) SELECT * diff --git a/models/sources.yml b/models/sources.yml index 7d1c132..08f2cfa 100644 --- a/models/sources.yml +++ b/models/sources.yml @@ -8,11 +8,11 @@ sources: - name: flow_blocks - name: flow_txs - - name: crosschain_core + - name: crosschain_silver database: crosschain - schema: core + schema: silver tables: - - name: dim_labels + - name: labels_combined - name: bronze_streamline database: streamline @@ -140,7 +140,7 @@ sources: bronze_api tables: - name: allday_metadata - + - name: flow_bronze database: | {{ "FLOW" if target.database == 'FLOW' else "FLOW_DEV" }}