gmp to SL

This commit is contained in:
Eric Laurello 2025-01-14 09:49:12 -05:00
parent 593c626278
commit d7e216accd
10 changed files with 52 additions and 31 deletions

View File

@ -56,7 +56,9 @@ jobs:
dbt run -m "axelar_models,tag:axelscan_search"
dbt run -m "axelar_models,tag:axelscan_search"
dbt run -m "axelar_models,tag:axelscan_search"
dbt run -m 1+models/streamline/realtime/streamline__axelscan_searchgmp_realtime.sql 1+models/streamline/complete/streamline__axelscan_day_counts_gmp_complete.sql --vars '{"STREAMLINE_INVOKE_STREAMS":True}'

View File

@ -2,7 +2,8 @@
materialized = 'incremental',
unique_key = 'date_day',
full_refresh = false,
tags = ['axelscan']
tags = ['axelscan'],
enabled = false
) }}
WITH dates_hist AS (

View File

@ -2,7 +2,8 @@
materialized = 'incremental',
unique_key = 'date_day',
incremental_strategy = 'delete+insert',
tags = ['axelscan']
tags = ['axelscan'],
enabled = false
) }}
WITH ids_days AS (

View File

@ -2,7 +2,8 @@
materialized = 'incremental',
full_refresh = false,
cluster_by = ['_inserted_timestamp::DATE'],
tags = ['axelscan','axelscan_search']
tags = ['axelscan','axelscan_search'],
enabled = false
) }}
WITH max_id AS (

View File

@ -2,7 +2,8 @@
materialized = 'view',
meta ={ 'database_tags':{ 'table':{ 'PURPOSE': 'AXELSCAN',
}} },
tags = ['noncore']
tags = ['noncore'],
enabled = false
) }}
SELECT

View File

@ -5,7 +5,8 @@
merge_exclude_columns = ["inserted_timestamp"],
cluster_by = 'created_at::DATE',
post_hook = "ALTER TABLE {{ this }} ADD SEARCH OPTIMIZATION ON EQUALITY(id);",
tags = ['noncore']
tags = ['noncore'],
enabled = false
) }}
WITH base AS (

View File

@ -1,7 +1,7 @@
-- depends_on: {{ ref('bronze__axelscan_day_counts_gmp') }}
{{ config (
materialized = "incremental",
unique_key = 'date_day',
unique_key = ['date_day','from_time'],
tags = ['streamline_axelscan']
) }}
@ -11,7 +11,7 @@ SELECT
VALUE :TO_TIME :: bigint AS TO_TIME,
DATA :total AS day_count,
{{ dbt_utils.generate_surrogate_key(
['date_day']
['date_day','from_time']
) }} AS axelscan_day_counts_gmp_complete_id,
inserted_timestamp,
SYSDATE() AS modified_timestamp,
@ -30,6 +30,6 @@ WHERE
{{ ref('bronze__axelscan_day_counts_gmp_FR') }}
{% endif %}
qualify(ROW_NUMBER() over (PARTITION BY date_day
qualify(ROW_NUMBER() over (PARTITION BY date_day, from_time
ORDER BY
inserted_timestamp DESC)) = 1

View File

@ -9,8 +9,10 @@
SELECT
partition_key AS date_day,
VALUE :ID :: INT AS id,
VALUE :FROM_TIME :: bigint AS from_time,
VALUE :TO_TIME :: bigint AS TO_TIME,
{{ dbt_utils.generate_surrogate_key(
['date_day','id']
['date_day','id','from_time']
) }} AS axelscan_searchgmp_complete_id,
inserted_timestamp,
SYSDATE() AS modified_timestamp,

View File

@ -4,11 +4,12 @@
func = 'streamline.udf_rest_api',
target = "{{this.schema}}.{{this.identifier}}",
params ={ "external_table" :"axelscan_day_counts_gmp",
"sql_limit" :"200",
"producer_batch_size" :"100",
"worker_batch_size" :"100",
"sql_limit" :"4000",
"producer_batch_size" :"200",
"worker_batch_size" :"200",
"sql_source" :"{{this.identifier}}",
"order_by_column": "date_day" }
"order_by_column": "ob",
"async_concurrent_requests": "5" }
),
tags = ['streamline_axelscan']
) }}
@ -17,37 +18,45 @@ WITH dates_hist AS (
SELECT
A.date_day,
DATE_PART(
(ROW_NUMBER() over (PARTITION BY A.date_day
ORDER BY
SEQ4()) - 1) * 60 + DATE_PART(
epoch_second,
A.date_day
) AS from_time,
DATE_PART(epoch_second, DATEADD (DAY, 1, A.date_day)) -1 AS TO_TIME
from_time AS ft,
from_time + 59 AS TO_TIME
FROM
{{ source(
'crosschain',
'dim_dates'
) }} A
JOIN TABLE(GENERATOR(rowcount => 1440)) x
LEFT JOIN {{ ref('streamline__axelscan_day_counts_gmp_complete') }}
b
ON A.date_day = b.date_day
AND ft = b.from_time
WHERE
A.date_day BETWEEN '2022-05-09'
A.date_day BETWEEN '2024-12-10'
AND SYSDATE() :: DATE - 2
AND b.date_day IS NULL
),
dates_recent AS (
SELECT
date_day,
DATE_PART(
(ROW_NUMBER() over (PARTITION BY date_day
ORDER BY
SEQ4()) - 1) * 60 + DATE_PART(
epoch_second,
date_day
) AS from_time,
DATE_PART(epoch_second, DATEADD (DAY, 1, date_day)) -1 AS TO_TIME
from_time + 59 AS TO_TIME
FROM
{{ source(
'crosschain',
'dim_dates'
) }}
JOIN TABLE(GENERATOR(rowcount => 1440)) x
WHERE
date_day BETWEEN SYSDATE() :: DATE - 1
AND SYSDATE() :: DATE
@ -55,7 +64,7 @@ dates_recent AS (
date_combo AS (
SELECT
date_day,
from_time,
ft AS from_time,
TO_TIME
FROM
dates_hist
@ -74,8 +83,9 @@ SELECT
) AS partition_key,
from_time,
TO_TIME,
partition_key || '-' || from_time :: STRING AS ob,
{{ target.database }}.live.udf_api(
'GET',
'POST',
'https://api.gmp.axelarscan.io',
OBJECT_CONSTRUCT(),
OBJECT_CONSTRUCT(

View File

@ -4,17 +4,18 @@
func = 'streamline.udf_rest_api',
target = "{{this.schema}}.{{this.identifier}}",
params ={ "external_table" :"axelscan_searchgmp",
"sql_limit" :"1000",
"producer_batch_size" :"500",
"worker_batch_size" :"500",
"sql_limit" :"4000",
"producer_batch_size" :"100",
"worker_batch_size" :"100",
"sql_source" :"{{this.identifier}}",
"order_by_column": "ob" }
"order_by_column": "ob",
"async_concurrent_requests" :"5" }
),
tags = ['streamline_axelscan']
) }}
--set a arbitrary limit for the number of ids to pull to speed up the performance. Shouldn't be more than 200K records in a day
--This is the max number of ids that the api supports for the searchGMP method
{% set limit = var(
'AXELSCAN_ID_LIMIT', 200000
'AXELSCAN_ID_LIMIT', 501
) %}
WITH ids AS (
@ -50,6 +51,7 @@ ids_topull AS (
b
ON A.date_day = b.date_day
AND A.id = b.id
AND A.from_time = b.from_time
WHERE
b.date_day IS NULL
)
@ -59,9 +61,11 @@ SELECT
'-'
) AS partition_key,
id,
partition_key || '-' || id :: STRING AS ob,
from_time,
TO_TIME,
partition_key || '-' || from_time || '-' || id :: STRING AS ob,
{{ target.database }}.live.udf_api(
'GET',
'POST',
'https://api.gmp.axelarscan.io',
OBJECT_CONSTRUCT(),
OBJECT_CONSTRUCT(
@ -81,5 +85,3 @@ FROM
ids_topull
ORDER BY
ob
LIMIT
2000