Update labels (#262)

* a

* lavels source updated

* add core view

* unique id

* delete coalesces
This commit is contained in:
WHYTEWYLL 2024-01-31 13:44:59 -03:00 committed by GitHub
parent 1a6a72f5f2
commit e7615a0cc6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 90 additions and 16 deletions

View File

@ -31,8 +31,7 @@ on-run-start:
- "{{create_udfs()}}"
on-run-end:
- '{{ apply_meta_as_tags(results) }}'
- "{{ apply_meta_as_tags(results) }}"
# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
@ -74,7 +73,6 @@ dispatch:
- flow-models
- dbt_snowflake_query_tags
- dbt
# query-comment:
# comment: '{{ dbt_snowflake_query_tags.get_query_comment(node) }}'
# append: true # Snowflake removes prefixed comments.

View File

@ -14,11 +14,13 @@ WITH labels AS (
label_type,
label_subtype,
address_name,
project_name
project_name,
_is_deleted,
labels_combined_id
FROM
{{ source(
'crosschain_core',
'dim_labels'
'crosschain_silver',
'labels_combined'
) }}
WHERE
blockchain = 'flow'

View File

@ -0,0 +1,5 @@
{% docs label %}
The label for this address.
{% enddocs %}

View File

@ -0,0 +1,5 @@
{% docs table_dim_labels %}
The labels table is a store of one-to-one address identifiers, or an address name. Labels are broken out into a "type" (such as cex, dex, dapp, games, etc.) and a "subtype" (ex: contract_deployer, hot_wallet, token_contract, etc.) in order to help classify each address name into similar groups. Our labels are sourced from many different places, but can primarily be grouped into two categories: automatic and manual. Automatic labels are continuously labeled based on certain criteria, such as a known contract deploying another contract, behavior based algorithms for finding deposit wallets, and consistent data pulls of custom protocol APIs. Manual labels are done periodically to find addresses that cannot be found programmatically such as finding new protocol addresses, centralized exchange hot wallets, or trending addresses. Labels can also be added by our community by using our add-a-label tool (https://science.flipsidecrypto.xyz/add-a-label/) or on-chain with near (https://near.social/lord1.near/widget/Form) and are reviewed by our labels team. A label can be removed by our labels team if it is found to be incorrect or no longer relevant; this generally will only happen for mislabeled deposit wallets.
{% enddocs %}

View File

@ -0,0 +1,18 @@
{{ config(
materialized = 'view',
tags = ['scheduled']
) }}
SELECT
blockchain,
creator,
address,
address_name,
label_type,
label_subtype,
project_name AS label,
labels_id AS dim_labels_id,
inserted_timestamp,
modified_timestamp
FROM
{{ ref('silver__labels') }}

View File

@ -0,0 +1,34 @@
version: 2
models:
- name: core__dim_labels
description: '{{ doc("table_dim_labels") }}'
columns:
- name: BLOCKCHAIN
description: '{{ doc("blockchain") }}'
- name: CREATOR
description: '{{ doc("creator") }}'
- name: ADDRESS
description: '{{ doc("address") }}'
- name: ADDRESS_NAME
description: '{{ doc("address_name") }}'
- name: LABEL_TYPE
description: '{{ doc("label_type") }}'
- name: LABEL_SUBTYPE
description: '{{ doc("label_subtype") }}'
- name: LABEL
description: '{{ doc("label") }}'
- name: DIM_LABELS_ID
description: "{{doc('id')}}"
- name: INSERTED_TIMESTAMP
description: "{{doc('inserted_timestamp')}}"
- name: MODIFIED_TIMESTAMP
description: "{{doc('modified_timestamp')}}"

View File

@ -1,7 +1,10 @@
{{ config(
materialized = 'table',
cluster_by = ['address'],
unique_key = 'event_id',
materialized = 'incremental',
unique_key = 'labels_id',
cluster_by = 'modified_timestamp::DATE',
incremental_strategy = 'merge',
merge_exclude_columns = ["inserted_timestamp"],
post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION ON EQUALITY(address); DELETE FROM {{ this }} WHERE _is_deleted = TRUE;",
tags = ['scheduled', 'streamline_scheduled', 'scheduled_non_core']
) }}
@ -16,14 +19,23 @@ WITH labels AS (
label_subtype,
address_name,
project_name,
{{ dbt_utils.generate_surrogate_key(
['blockchain','label_type','label_subtype']
) }} AS labels_id,
labels_combined_id AS labels_id,
SYSDATE() AS inserted_timestamp,
SYSDATE() AS modified_timestamp,
'{{ invocation_id }}' AS _invocation_id
FROM
{{ ref('bronze__labels') }}
{% if is_incremental() %}
AND modified_timestamp >= (
SELECT
MAX(
modified_timestamp
)
FROM
{{ this }}
)
{% endif %}
)
SELECT
*

View File

@ -8,11 +8,11 @@ sources:
- name: flow_blocks
- name: flow_txs
- name: crosschain_core
- name: crosschain_silver
database: crosschain
schema: core
schema: silver
tables:
- name: dim_labels
- name: labels_combined
- name: bronze_streamline
database: streamline
@ -140,7 +140,7 @@ sources:
bronze_api
tables:
- name: allday_metadata
- name: flow_bronze
database: |
{{ "FLOW" if target.database == 'FLOW' else "FLOW_DEV" }}