use row_number to dedupe (#798)

This commit is contained in:
desmond-hui 2025-02-12 07:02:24 -08:00 committed by GitHub
parent 68c1eec35c
commit ebda48ad1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -21,6 +21,10 @@ silver_counts AS (
count(tx_id) AS transaction_count
FROM
(
-- Deduplicate using row_number() instead of `UNION` or `count(DISTINCT)` since it is faster for large datasets
SELECT
*
FROM (
SELECT
block_id,
tx_id
@ -28,7 +32,7 @@ silver_counts AS (
{{ ref('silver__transactions') }} t
WHERE
block_timestamp BETWEEN current_date - 8 AND current_timestamp - INTERVAL '12 HOUR'
UNION
UNION ALL
SELECT
block_id,
tx_id
@ -37,6 +41,9 @@ silver_counts AS (
WHERE
block_timestamp BETWEEN current_date - 8 AND current_timestamp - INTERVAL '12 HOUR'
)
QUALIFY
row_number() OVER (PARTITION BY tx_id ORDER BY block_id) = 1
)
GROUP BY
1
)