legacy prices deprecation (#572)

This commit is contained in:
tarikceric 2024-06-11 11:24:09 -07:00 committed by GitHub
parent 7b748f014c
commit 0188acc217
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 9 additions and 866 deletions

View File

@ -1,34 +0,0 @@
{{ config(
materialized = 'view',
tags = ['scheduled_non_core']
) }}
SELECT
token_address,
token_name,
symbol,
decimals,
coin_gecko_id,
coin_market_cap_id,
tags,
logo,
twitter,
website,
description,
_INSERTED_TIMESTAMP,
COALESCE (
labels_id,
{{ dbt_utils.generate_surrogate_key(
['token_address']
) }}
) AS dim_tokens_id,
COALESCE(
inserted_timestamp,
'2000-01-01'
) AS inserted_timestamp,
COALESCE(
modified_timestamp,
'2000-01-01'
) AS modified_timestamp
FROM
{{ ref('silver__token_metadata') }}

View File

@ -1,4 +0,0 @@
version: 2
models:
- name: core__dim_tokens
description: Deprecating soon! Please use price.ez_asset_metadata or price.dim_asset_metadata instead.

View File

@ -5,19 +5,15 @@
) }}
SELECT
A.token_address,
asset_id AS id, -- id column pending deprecation
token_address,
asset_id,
A.symbol,
A.name,
C.decimals, -- decimals column pending deprecation
symbol,
name,
platform AS blockchain,
platform_id AS blockchain_id,
provider,
A.inserted_timestamp,
A.modified_timestamp,
A.complete_provider_asset_metadata_id AS dim_asset_metadata_id
inserted_timestamp,
modified_timestamp,
complete_provider_asset_metadata_id AS dim_asset_metadata_id
FROM
{{ ref('silver__complete_provider_asset_metadata') }} A
LEFT JOIN {{ ref('core__dim_tokens') }} C --remove this join alongside decimal column deprecation
ON C.token_address = A.token_address
{{ ref('silver__complete_provider_asset_metadata') }}

View File

@ -1,7 +1,7 @@
version: 2
models:
- name: price__dim_asset_metadata
description: Deprecating soon! Note, only the ID and DECIMALS columns are being removed. The dim_asset_metadata table itself is not being deprecated. '{{ doc("prices_dim_asset_metadata_table_doc") }}'
description: '{{ doc("prices_dim_asset_metadata_table_doc") }}'
columns:
- name: PROVIDER
@ -18,8 +18,6 @@ models:
description: '{{ doc("prices_blockchain") }}'
- name: BLOCKCHAIN_ID
description: '{{ doc("prices_blockchain_id") }}'
- name: DECIMALS
description: '{{ doc("prices_decimals") }}'
- name: DIM_ASSET_METADATA_ID
description: '{{ doc("pk") }}'
- name: INSERTED_TIMESTAMP

View File

@ -6,7 +6,6 @@
SELECT
token_address,
asset_id AS id, -- id column pending deprecation
asset_id,
symbol,
NAME,
@ -22,7 +21,6 @@ FROM
UNION ALL
SELECT
NULL AS token_address,
asset_id AS id, -- id column pending deprecation
asset_id,
symbol,
NAME,

View File

@ -1,11 +1,9 @@
version: 2
models:
- name: price__ez_asset_metadata
description: Deprecating soon! Note, only the ID column is changing. The ez_asset_metadata table itself is not being deprecated. '{{ doc("prices_ez_asset_metadata_table_doc") }}'
description: '{{ doc("prices_ez_asset_metadata_table_doc") }}'
columns:
- name: ID
description: '{{ doc("prices_id_deprecation")}}'
- name: ASSET_ID
description: '{{ doc("prices_asset_id") }}'
- name: NAME

View File

@ -1,49 +0,0 @@
{{ config(
materialized = 'view',
meta ={ 'database_tags':{ 'table':{ 'PURPOSE': 'PRICES' }}},
tags = ['scheduled_non_core']
) }}
SELECT
b.date_hour AS recorded_hour,
token_address,
token_name,
A.symbol,
CASE
WHEN cg.imputed = FALSE THEN cg.close
WHEN cmc.imputed = FALSE THEN cmc.close
WHEN cg.imputed = TRUE THEN cg.close
WHEN cmc.imputed = TRUE THEN cmc.close
END AS CLOSE,
CASE
WHEN cg.imputed = FALSE THEN cg.imputed
WHEN cmc.imputed = FALSE THEN cmc.imputed
WHEN cg.imputed = TRUE THEN cg.imputed
WHEN cmc.imputed = TRUE THEN cmc.imputed
END AS is_imputed,
COALESCE (
token_prices_coin_gecko_hourly_id,
token_prices_coin_market_cap_hourly_id,
{{ dbt_utils.generate_surrogate_key(
['b.date_hour','token_address']
) }}
) AS ez_token_prices_hourly_id,
GREATEST(COALESCE(cg.inserted_timestamp, '2000-01-01'), COALESCE(cmc.inserted_timestamp, '2000-01-01')) AS inserted_timestamp,
GREATEST(COALESCE(cg.modified_timestamp, '2000-01-01'), COALESCE(cmc.modified_timestamp, '2000-01-01')) AS modified_timestamp
FROM
{{ ref('silver__token_metadata') }} A
CROSS JOIN {{ ref('silver__date_hours') }}
b
LEFT JOIN {{ ref('silver__token_prices_coin_gecko_hourly') }}
cg
ON A.coin_gecko_id = cg.id
AND b.date_hour = cg.recorded_hour
LEFT JOIN {{ ref('silver__token_prices_coin_market_cap_hourly') }}
cmc
ON A.coin_market_cap_id = cmc.id
AND b.date_hour = cmc.recorded_hour
WHERE
COALESCE(
cg.imputed,
cmc.imputed
) IS NOT NULL

View File

@ -1,4 +0,0 @@
version: 2
models:
- name: price__ez_token_prices_hourly
description: Deprecating soon! Please use price.ez_prices_hourly or price.fact_prices_ohlc_hourly instead.

View File

@ -1,53 +0,0 @@
{{ config(
materialized = 'view',
meta ={ 'database_tags':{ 'table':{ 'PURPOSE': 'PRICES' }}},
tags = ['scheduled_non_core']
) }}
SELECT
'coinmarketcap' AS provider,
recorded_hour,
id :: STRING AS id,
upper(symbol) as symbol,
CLOSE,
imputed,
COALESCE (
token_prices_coin_market_cap_hourly_id,
{{ dbt_utils.generate_surrogate_key(
['recorded_hour', 'id']
) }}
) AS fact_token_prices_hourly_id,
COALESCE(
inserted_timestamp,
'2000-01-01'
) AS inserted_timestamp,
COALESCE(
modified_timestamp,
'2000-01-01'
) AS modified_timestamp
FROM
{{ ref('silver__token_prices_coin_market_cap_hourly') }}
UNION
SELECT
'coingecko' AS provider,
recorded_hour,
id,
upper(symbol) as symbol,
CLOSE,
imputed,
COALESCE (
token_prices_coin_gecko_hourly_id,
{{ dbt_utils.generate_surrogate_key(
['recorded_hour', 'id']
) }}
) AS fact_token_prices_hourly_id,
COALESCE(
inserted_timestamp,
'2000-01-01'
) AS inserted_timestamp,
COALESCE(
modified_timestamp,
'2000-01-01'
) AS modified_timestamp
FROM
{{ ref('silver__token_prices_coin_gecko_hourly') }}

View File

@ -1,4 +0,0 @@
version: 2
models:
- name: price__fact_token_prices_hourly
description: Deprecating soon! Please use price.ez_prices_hourly or price.fact_prices_ohlc_hourly instead.

View File

@ -1,206 +0,0 @@
{{ config(
materialized = 'incremental',
unique_key = "token_address",
incremental_strategy = 'merge',
tags = ['scheduled_non_core']
) }}
-- this can be deprecated now? -- along with removing their sources
WITH cmc_base AS (
SELECT
VALUE :contract_address :: STRING AS token_address,
NAME AS cmc_name,
cmc_id,
symbol AS cmc_symbol,
metadata :description :: STRING AS cmc_description,
metadata :logo :: STRING AS cmc_icon,
metadata :tags AS cmc_tags,
metadata :explorer :: STRING AS cmc_explorer,
metadata :twitter AS cmc_twitter,
metadata :website AS cmc_urls,
_inserted_timestamp
FROM
{{ source(
'crosschain_silver',
'coin_market_cap_cryptocurrency_info'
) }},
LATERAL FLATTEN(
metadata :contract_address
)
WHERE
VALUE :platform :name = 'Solana'
AND COALESCE(
token_address,
''
) <> ''
),
base AS (
SELECT
DISTINCT token_address
FROM
{{ ref('silver__solscan_tokens') }}
WHERE
COALESCE(token_address, '') <> ''
{% if is_incremental() %}
AND _inserted_timestamp >= (
SELECT
MAX(_inserted_timestamp)
FROM
{{ this }}
)
{% endif %}
UNION
SELECT
DISTINCT token_address
FROM
{{ source(
'crosschain_silver',
'asset_metadata_coin_gecko'
) }}
WHERE
platform = 'solana'
AND COALESCE(token_address, '') <> ''
{% if is_incremental() %}
AND _inserted_timestamp >= (
SELECT
MAX(_inserted_timestamp)
FROM
{{ this }}
)
{% endif %}
UNION
SELECT
DISTINCT token_address
FROM
cmc_base
{% if is_incremental() %}
WHERE
_inserted_timestamp >= (
SELECT
MAX(_inserted_timestamp)
FROM
{{ this }}
)
{% endif %}
),
fin AS (
SELECT
A.token_address,
COALESCE(
cg.id,
solscan.coingecko_id
) AS coin_gecko_id,
cmc.cmc_id AS coin_market_cap_id,
solscan.name AS ss_name,
solscan.symbol AS ss_symbol,
solscan.decimals AS ss_decimals,
solscan.tags AS ss_tags,
solscan.icon AS ss_icon,
solscan.twitter AS ss_twitter,
solscan.website AS ss_website,
cg.name AS cg_name,
cg.symbol AS cg_symbol,
cmc.cmc_name,
cmc.cmc_symbol,
cmc.cmc_description,
cmc.cmc_icon,
cmc.cmc_tags,
cmc.cmc_twitter,
cmc.cmc_urls,
GREATEST(
COALESCE(
solscan._inserted_timestamp,
'1900-01-01'
),
COALESCE(
cg._inserted_timestamp,
'1900-01-01'
),
COALESCE(
cmc._inserted_timestamp,
'1900-01-01'
)
) AS _inserted_timestamp
FROM
base A
LEFT JOIN {{ ref('silver__solscan_tokens') }}
solscan
ON A.token_address = solscan.token_address
LEFT JOIN (
SELECT
id,
token_address,
NAME,
symbol,
_inserted_timestamp
FROM
{{ source(
'crosschain_silver',
'asset_metadata_coin_gecko'
) }}
WHERE
platform = 'solana' qualify(ROW_NUMBER() over(PARTITION BY token_address
ORDER BY
_inserted_timestamp DESC) = 1)
) cg
ON A.token_address = cg.token_address
LEFT JOIN cmc_base cmc
ON A.token_address = cmc.token_address
)
SELECT
token_address,
COALESCE(
cg_name,
ss_name,
cmc_name
) AS token_name,
COALESCE(
cg_symbol,
ss_symbol,
cmc_symbol
) AS symbol,
ss_decimals decimals,
coin_gecko_id,
coin_market_cap_id,
COALESCE(
CASE
WHEN COALESCE(
ss_tags,
'null'
) <> 'null'
AND COALESCE(
cmc_tags,
'null'
) <> 'null' THEN ARRAY_CAT(
ss_tags,
cmc_tags
)
END,
ss_tags,
cmc_tags
) AS tags,
COALESCE(
ss_icon,
cmc_icon
) AS logo,
COALESCE(
ss_twitter,
cmc_twitter
) AS twitter,
COALESCE(
ss_website,
cmc_urls
) AS website,
cmc_description AS description,
_inserted_timestamp,
{{ dbt_utils.generate_surrogate_key(
['token_address']
) }} AS labels_id,
SYSDATE() AS inserted_timestamp,
SYSDATE() AS modified_timestamp,
'{{ invocation_id }}' AS _invocation_id
FROM
fin

View File

@ -1,33 +0,0 @@
version: 2
models:
- name: silver__token_metadata
columns:
- name: TOKEN_ADDRESS
description: "The token address"
tests:
- not_null
- unique
- name: NAME
description: "The token name"
- name: SYMBOL
description: "The token symbol"
- name: DECIMALS
description: "The number of decimals for the token"
- name: COIN_GECKO_ID
description: "The unique id from coin gecko"
- name: COIN_MARKET_CAP_ID
description: "The unique id from coin market cap"
- name: TAGS
description: "Tags for the coin"
- name: LOGO
description: "The image of the token"
- name: TWITTER
description: "The twitter account for the project"
- name: WEBSITE
description: "The website for the project"
- name: DESCRIPTION
description: "The coin market cap description"
- name: _INSERTED_TIMESTAMP
description: "{{ doc('_inserted_timestamp') }}"
tests:
- not_null

View File

@ -1,189 +0,0 @@
{{ config(
materialized = 'incremental',
unique_key = "_unique_key",
incremental_strategy = 'merge',
cluster_by = ['recorded_hour::DATE'],
merge_exclude_columns = ["inserted_timestamp"],
tags = ['scheduled_non_core']
) }}
WITH date_hours AS (
SELECT
date_hour
FROM
{{ ref('silver__date_hours') }}
WHERE
date_hour >= '2020-04-10'
AND date_hour <= (
SELECT
MAX(recorded_hour)
FROM
{{ source(
'crosschain_silver',
'hourly_prices_coin_gecko'
) }}
)
{% if is_incremental() %}
AND date_hour > (
SELECT
MAX(recorded_hour)
FROM
{{ this }}
)
{% endif %}
),
asset_metadata AS (
SELECT
id,
symbol
FROM
{{ source(
'crosschain_silver',
'asset_metadata_coin_gecko'
) }}
WHERE
id IN (
SELECT
coin_gecko_id
FROM
{{ ref('silver__token_metadata') }}
)
GROUP BY
1,
2
),
base_date_hours_symbols AS (
SELECT
date_hour,
id,
symbol
FROM
date_hours
CROSS JOIN asset_metadata
),
base_legacy_prices AS (
SELECT
DATE_TRUNC(
'hour',
recorded_at
) AS recorded_hour,
asset_id AS id,
symbol,
price AS CLOSE
FROM
{{ source(
'crosschain_bronze',
'legacy_prices'
) }}
WHERE
provider = 'coingecko'
AND asset_id IN (
SELECT
coin_gecko_id
FROM
{{ ref('silver__token_metadata') }}
)
AND MINUTE(recorded_at) = 59
AND recorded_at :: DATE < '2022-07-20' -- use legacy data before this date
{% if is_incremental() %}
AND recorded_at > (
SELECT
MAX(recorded_hour)
FROM
{{ this }}
)
{% endif %}
),
base_prices AS (
SELECT
recorded_hour,
p.id,
m.symbol,
p.close
FROM
{{ source(
'crosschain_silver',
'hourly_prices_coin_gecko'
) }}
p
LEFT OUTER JOIN asset_metadata m
ON m.id = p.id
WHERE
p.id IN (
SELECT
coin_gecko_id
FROM
{{ ref('silver__token_metadata') }}
)
AND recorded_hour :: DATE >= '2022-07-20'
{% if is_incremental() %}
AND recorded_hour > (
SELECT
MAX(recorded_hour)
FROM
{{ this }}
)
{% endif %}
),
prices AS (
SELECT
*
FROM
base_legacy_prices
UNION
SELECT
*
FROM
base_prices
),
imputed_prices AS (
SELECT
d.*,
p.close AS hourly_close,
LAST_VALUE(
p.close ignore nulls
) over (
PARTITION BY d.symbol,
d.id
ORDER BY
d.date_hour rows unbounded preceding
) AS imputed_close
FROM
base_date_hours_symbols d
LEFT OUTER JOIN prices p
ON p.recorded_hour = d.date_hour
AND p.id = d.id
)
SELECT
p.date_hour AS recorded_hour,
p.id,
p.symbol,
COALESCE(
p.hourly_close,
p.imputed_close
) AS CLOSE,
CASE
WHEN p.hourly_close IS NULL THEN TRUE
ELSE FALSE
END AS imputed,
concat_ws(
'-',
recorded_hour,
id
) AS _unique_key,
{{ dbt_utils.generate_surrogate_key(
['recorded_hour', 'id']
) }} AS token_prices_coin_gecko_hourly_id,
SYSDATE() AS inserted_timestamp,
SYSDATE() AS modified_timestamp,
'{{ invocation_id }}' AS _invocation_id
FROM
imputed_prices p
WHERE
CLOSE IS NOT NULL qualify(ROW_NUMBER() over (PARTITION BY _unique_key
ORDER BY
symbol DESC) = 1)

View File

@ -1,33 +0,0 @@
version: 2
models:
- name: silver__token_prices_coin_gecko_hourly
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- RECORDED_HOUR
- ID
columns:
- name: RECORDED_HOUR
description: Hour that the price was recorded at
tests:
- dbt_expectations.expect_row_values_to_have_recent_data:
datepart: day
interval: 2
- name: ID
description: ID of the asset from CoinGecko
tests:
- not_null
- name: SYMBOL
description: Symbol of the token
tests:
- not_null
- name: CLOSE
description: Closing price of the recorded hour in USD
tests:
- not_null
- name: IMPUTED
description: Whether the price was imputed from an earlier record (generally used for low trade volume tokens)
tests:
- not_null
- name: _UNIQUE_KEY
description: Concatenation of multiple columns used for incremental merge

View File

@ -1,200 +0,0 @@
{{ config(
materialized = 'incremental',
unique_key = "_unique_key",
incremental_strategy = 'merge',
cluster_by = ['recorded_hour::DATE'],
merge_exclude_columns = ["inserted_timestamp"],
tags = ['scheduled_non_core']
) }}
WITH date_hours AS (
SELECT
date_hour
FROM
{{ ref('silver__date_hours') }}
WHERE
date_hour >= '2020-04-10'
AND date_hour <= (
SELECT
MAX(recorded_hour)
FROM
{{ source(
'crosschain_silver',
'hourly_prices_coin_market_cap'
) }}
)
{% if is_incremental() %}
AND date_hour > (
SELECT
MAX(recorded_hour)
FROM
{{ this }}
)
{% endif %}
),
asset_metadata AS (
SELECT
id,
symbol
FROM
{{ source(
'crosschain_silver',
'asset_metadata_coin_market_cap'
) }}
WHERE
id IN (
SELECT
coin_market_cap_id :: INT
FROM
{{ ref('silver__token_metadata') }}
WHERE
TRY_CAST(
coin_market_cap_id AS INT
) IS NOT NULL
)
GROUP BY
1,
2
),
base_date_hours_symbols AS (
SELECT
date_hour,
id,
symbol
FROM
date_hours
CROSS JOIN asset_metadata
),
base_legacy_prices AS (
SELECT
DATE_TRUNC(
'hour',
recorded_at
) AS recorded_hour,
asset_id :: NUMBER AS id,
symbol,
price AS CLOSE
FROM
{{ source(
'crosschain_bronze',
'legacy_prices'
) }}
WHERE
provider = 'coinmarketcap'
AND asset_id IN (
SELECT
coin_market_cap_id
FROM
{{ ref('silver__token_metadata') }}
WHERE
TRY_CAST(
coin_market_cap_id AS INT
) IS NOT NULL
)
AND MINUTE(recorded_at) = 59
AND recorded_at :: DATE < '2022-07-20' -- use legacy data before this date
{% if is_incremental() %}
AND recorded_at > (
SELECT
MAX(recorded_hour)
FROM
{{ this }}
)
{% endif %}
),
base_prices AS (
SELECT
recorded_hour,
p.id,
m.symbol,
p.close
FROM
{{ source(
'crosschain_silver',
'hourly_prices_coin_market_cap'
) }}
p
LEFT OUTER JOIN asset_metadata m
ON m.id = p.id
WHERE
p.id IN (
SELECT
coin_market_cap_id :: INT
FROM
{{ ref('silver__token_metadata') }}
WHERE
TRY_CAST(
coin_market_cap_id AS INT
) IS NOT NULL
)
AND recorded_hour :: DATE >= '2022-07-20'
{% if is_incremental() %}
AND recorded_hour > (
SELECT
MAX(recorded_hour)
FROM
{{ this }}
)
{% endif %}
),
prices AS (
SELECT
*
FROM
base_legacy_prices
UNION
SELECT
*
FROM
base_prices
),
imputed_prices AS (
SELECT
d.*,
p.close AS hourly_close,
LAST_VALUE(
p.close ignore nulls
) over (
PARTITION BY d.symbol
ORDER BY
d.date_hour rows unbounded preceding
) AS imputed_close
FROM
base_date_hours_symbols d
LEFT OUTER JOIN prices p
ON p.recorded_hour = d.date_hour
AND p.id = d.id
)
SELECT
p.date_hour AS recorded_hour,
p.id,
p.symbol,
COALESCE(
p.hourly_close,
p.imputed_close
) AS CLOSE,
CASE
WHEN p.hourly_close IS NULL THEN TRUE
ELSE FALSE
END AS imputed,
concat_ws(
'-',
recorded_hour,
id
) AS _unique_key,
{{ dbt_utils.generate_surrogate_key(
['recorded_hour', 'id']
) }} AS token_prices_coin_market_cap_hourly_id,
SYSDATE() AS inserted_timestamp,
SYSDATE() AS modified_timestamp,
'{{ invocation_id }}' AS _invocation_id
FROM
imputed_prices p
WHERE
CLOSE IS NOT NULL qualify(ROW_NUMBER() over (PARTITION BY _unique_key
ORDER BY
symbol DESC) = 1)

View File

@ -1,33 +0,0 @@
version: 2
models:
- name: silver__token_prices_coin_market_cap_hourly
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- RECORDED_HOUR
- ID
columns:
- name: RECORDED_HOUR
description: Hour that the price was recorded at
tests:
- dbt_expectations.expect_row_values_to_have_recent_data:
datepart: day
interval: 2
- name: ID
description: ID of the asset from CoinMarketCap
tests:
- not_null
- name: SYMBOL
description: Symbol of the token
tests:
- not_null
- name: CLOSE
description: Closing price of the recorded hour in USD
tests:
- not_null
- name: IMPUTED
description: Whether the price was imputed from an earlier record (generally used for low trade volume tokens)
tests:
- not_null
- name: _UNIQUE_KEY
description: Concatenation of multiple columns used for incremental merge

View File

@ -22,11 +22,6 @@ sources:
database: "{{ 'crosschain' if target.database == 'SOLANA' else 'crosschain_dev' }}"
schema: silver
tables:
- name: asset_metadata_coin_market_cap
- name: asset_metadata_coin_gecko
- name: hourly_prices_coin_market_cap
- name: hourly_prices_coin_gecko
- name: coin_market_cap_cryptocurrency_info
- name: apis_keys
- name: number_sequence
- name: labels_combined