From 56d024d15592dabe5a657c699322d2ceaafac699 Mon Sep 17 00:00:00 2001 From: desmond-hui <97470747+desmond-hui@users.noreply.github.com> Date: Thu, 20 Oct 2022 08:20:26 -0700 Subject: [PATCH] An 1903/signers (#140) * signers table wip * merge main and remove test addresses * flatten signers and instructions arrays * daily signers wip * signers wip, some test failures * incremental model changes * incremental changes - dumped FR date limit * exclude chain admin programs * updates to incremental * updates to tests * incremental fixes * and condition * changes to incremental and formatting * updates to incremental * remove ver2 references * remove unintended change * add upper bound for _inserted_timestamp for downstream ctes * do 1 day loads * update docs * exclude chadmin programs * agg to get first/last programs * revert unintended code changes * change 1st batch range to handle inserted dates with gaps Co-authored-by: jhuhnke --- models/silver/silver__daily_signers.sql | 27 +++-- models/silver/silver__signers.sql | 139 +++++++++++++++--------- 2 files changed, 108 insertions(+), 58 deletions(-) diff --git a/models/silver/silver__daily_signers.sql b/models/silver/silver__daily_signers.sql index 6f6c2ec6..767c66d2 100644 --- a/models/silver/silver__daily_signers.sql +++ b/models/silver/silver__daily_signers.sql @@ -34,7 +34,7 @@ WHERE LEAST( DATEADD( 'day', - 10, + 1, COALESCE(MAX(_inserted_timestamp :: DATE), '2022-08-12') ), CURRENT_DATE - 1 @@ -51,9 +51,17 @@ WHERE ) {% else %} WHERE - _inserted_timestamp :: DATE = '2022-08-12' + _inserted_timestamp :: DATE BETWEEN '2022-08-12' AND '2022-08-30' {% endif %} ), +exclude_programs AS ( + SELECT + address + FROM + {{ ref('core__dim_labels') }} + WHERE + label_type = 'chadmin' +), b AS ( SELECT s.value :: STRING AS signer, @@ -90,7 +98,7 @@ b AS ( LEAST( DATEADD( 'day', - 11, + 2, COALESCE(MAX(_inserted_timestamp :: DATE), '2022-08-12') ), CURRENT_DATE - 1 @@ -99,7 +107,7 @@ b AS ( {{ this }} ) {% elif not is_incremental() %} - AND _inserted_timestamp :: DATE = '2022-08-12' + AND _inserted_timestamp :: DATE BETWEEN '2022-08-12' AND '2022-08-30' {% endif %} ), C AS ( @@ -127,7 +135,7 @@ C AS ( LEAST( DATEADD( 'day', - 11, + 2, COALESCE(MAX(_inserted_timestamp :: DATE), '2022-08-12') ), CURRENT_DATE - 1 @@ -136,7 +144,7 @@ C AS ( {{ this }} ) {% elif not is_incremental() %} - AND e._inserted_timestamp :: DATE = '2022-08-12' + AND e._inserted_timestamp :: DATE BETWEEN '2022-08-12' AND '2022-08-30' {% endif %} ), base_programs AS ( @@ -150,6 +158,9 @@ base_programs AS ( program_ids [array_size(program_ids)-1] :: STRING AS last_program_id FROM C + LEFT JOIN exclude_programs ep on ep.address = C.program_id + WHERE + ep.address is null GROUP BY tx_id ), @@ -158,13 +169,13 @@ first_last_programs AS ( b.signer, b.b_date, b.tx_id, - FIRST_VALUE(first_program_id) over ( + FIRST_VALUE(first_program_id ignore nulls) over ( PARTITION BY signer, b_date ORDER BY block_timestamp ) AS first_program_id, - LAST_VALUE(last_program_id) over ( + LAST_VALUE(last_program_id ignore nulls) over ( PARTITION BY signer, b_date ORDER BY diff --git a/models/silver/silver__signers.sql b/models/silver/silver__signers.sql index 65861ac1..825c4201 100644 --- a/models/silver/silver__signers.sql +++ b/models/silver/silver__signers.sql @@ -1,76 +1,115 @@ {{ config( - materialized = 'incremental', - unique_key = "signer", - incremental_strategy = 'delete+insert', - cluster_by = 'signer' + materialized = 'incremental', + unique_key = "signer", + incremental_strategy = 'delete+insert', + cluster_by = 'signer' ) }} WITH base_min_signers AS ( + SELECT - signer, - min(b_date) AS b_date - FROM + signer, + MIN(b_date) AS b_date + FROM {{ ref('silver__daily_signers') }} - GROUP BY + GROUP BY signer ), base_max_signers AS ( - SELECT - signer, - max(b_date) as b_date - FROM - {{ ref('silver__daily_signers') }} - GROUP BY - signer -), -final_signers_agg AS ( - select - signer, - count(*) AS num_days_active, - sum(num_txs) AS num_txs, - array_union_agg(unique_program_ids) AS programs_used, - sum(total_fees) AS total_fees, - max(_inserted_timestamp) AS _inserted_timestamp + SELECT + signer, + MAX(b_date) AS b_date FROM {{ ref('silver__daily_signers') }} - GROUP BY + GROUP BY signer ), +first_last_programs AS ( + SELECT + signer, + FIRST_VALUE( + first_program_id ignore nulls + ) over ( + PARTITION BY signer + ORDER BY + b_date + ) AS first_program_id, + FIRST_VALUE( + last_program_id ignore nulls + ) over ( + PARTITION BY signer + ORDER BY + b_date DESC + ) AS last_program_id + FROM + {{ ref('silver__daily_signers') }} +), +final_signers_agg AS ( + SELECT + ds.signer, + flp.first_program_id, + flp.last_program_id, + COUNT(*) AS num_days_active, + SUM(num_txs) AS num_txs, + array_union_agg(unique_program_ids) AS programs_used, + SUM(total_fees) AS total_fees, + MAX(_inserted_timestamp) AS _inserted_timestamp + FROM + {{ ref('silver__daily_signers') }} + ds + LEFT OUTER JOIN ( + SELECT + signer, + first_program_id, + last_program_id + FROM + first_last_programs + GROUP BY + 1, + 2, + 3 + ) flp + ON flp.signer = ds.signer + GROUP BY + 1, + 2, + 3 +), final_min_signers AS ( SELECT - ms.signer, - ms.b_date AS first_tx_date, - sd.first_program_id - FROM + ms.signer, + ms.b_date AS first_tx_date + FROM base_min_signers ms - INNER JOIN {{ ref('silver__daily_signers') }} sd - ON sd.signer = ms.signer - AND sd.b_date = ms.b_date + INNER JOIN {{ ref('silver__daily_signers') }} + sd + ON sd.signer = ms.signer + AND sd.b_date = ms.b_date ), final_max_signers AS ( - SELECT - ms.signer, - ms.b_date AS last_tx_date, - sd.last_program_id - FROM base_max_signers ms - - INNER JOIN {{ ref('silver__daily_signers') }} sd - ON sd.signer = ms.signer - AND sd.b_date = ms.b_date + SELECT + ms.signer, + ms.b_date AS last_tx_date + FROM + base_max_signers ms + INNER JOIN {{ ref('silver__daily_signers') }} + sd + ON sd.signer = ms.signer + AND sd.b_date = ms.b_date ) SELECT s_min.*, + s_agg.first_program_id, s_max.last_tx_date, - s_max.last_program_id, - s_agg.num_days_active, + s_agg.last_program_id, + s_agg.num_days_active, s_agg.num_txs, s_agg.total_fees, - s_agg.programs_used, + s_agg.programs_used, s_agg._inserted_timestamp -FROM +FROM final_min_signers s_min -JOIN final_max_signers s_max -ON s_max.signer = s_min.signer - -JOIN final_signers_agg s_agg -ON s_agg.signer = s_min.signer \ No newline at end of file + JOIN final_max_signers s_max + ON s_max.signer = s_min.signer + JOIN final_signers_agg s_agg + ON s_agg.signer = s_min.signer