From 1716bc086591e874c9ceebe3a01802b69d01d511 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 7 Jan 2026 18:55:58 +0000 Subject: [PATCH] Optimize silver__transactions.sql: reduce run_query() from 4 to 1 Performance improvement for the core transactions model: - Convert temp tables to CTEs (blocks_source, tx_batch_source) - Replace Jinja for-loop date generation with SQL subquery - Keep only essential run_query for max_inserted_timestamp Benefits: - 75% reduction in compilation-time database round-trips - Snowflake CTE optimization materializes intermediate results - Cleaner, more maintainable code structure --- PERFORMANCE_ANALYSIS_REPORT.md | 25 +++- models/silver/core/silver__transactions.sql | 131 +++++++++----------- 2 files changed, 81 insertions(+), 75 deletions(-) diff --git a/PERFORMANCE_ANALYSIS_REPORT.md b/PERFORMANCE_ANALYSIS_REPORT.md index ea4be0d..9c0bb00 100644 --- a/PERFORMANCE_ANALYSIS_REPORT.md +++ b/PERFORMANCE_ANALYSIS_REPORT.md @@ -294,7 +294,7 @@ Several complex views with multiple JOINs and UNIONs are queried by downstream i ## Files Requiring Immediate Attention -1. `models/silver/core/silver__transactions.sql` - 4 run_query() calls +1. ~~`models/silver/core/silver__transactions.sql` - 4 run_query() calls~~ **FIXED** 2. `models/silver/nft/sales/silver__nft_sales_combined.sql` - repeated subqueries, SELECT * 3. `models/silver/_observability/silver_observability__transactions_completeness.sql` - Cartesian product 4. `models/silver/price/silver__hourly_prices_priority.sql` - 3 non-sargable JOINs @@ -302,4 +302,27 @@ Several complex views with multiple JOINs and UNIONs are queried by downstream i --- +## Applied Fixes + +### Fix 1: silver__transactions.sql - Reduced run_query() calls from 4 to 1 + +**Problem**: 4 sequential `run_query()` calls causing multiple database round-trips during compilation: +1. Query to get `MAX(_inserted_timestamp)` +2. Query to create temp table for blocks +3. Query to create temp table for tx_batch +4. Query to get distinct dates from temp table + +**Solution**: +- Kept only the first `run_query()` for `max_ins` (required for compile-time filtering) +- Converted temp tables to CTEs (`blocks_source`, `tx_batch_source`) +- Replaced Jinja for-loop date generation with SQL subquery (`tx_batch_dates` CTE) + +**Benefits**: +- 75% reduction in compilation-time database round-trips +- Snowflake CTE optimization can materialize and reuse intermediate results +- Cleaner, more maintainable code structure +- No functional change to query results + +--- + *Generated: 2026-01-07* diff --git a/models/silver/core/silver__transactions.sql b/models/silver/core/silver__transactions.sql index b1a8b9b..57f5b43 100644 --- a/models/silver/core/silver__transactions.sql +++ b/models/silver/core/silver__transactions.sql @@ -10,82 +10,66 @@ ) }} -- depends_on: {{ ref('bronze__streamline_blocks_tx') }} -- depends_on: {{ ref('bronze__streamline_transaction_batch') }} -{% if execute %} - {% set max_inserted_query %} +{# + PERFORMANCE OPTIMIZATION: Reduced from 4 run_query() calls to 1 + - Converted temp tables to CTEs (blocks_source, tx_batch_source) + - Replaced Jinja date loop with SQL subquery (tx_batch_dates CTE) + Original issue: 4 sequential database round-trips during compilation +#} + +{% if execute and is_incremental() %} + {% set max_inserted_query %} SELECT DATEADD('minute', -5, MAX(_inserted_timestamp)) FROM {{ this }} + {% endset %} + {% set max_ins = run_query(max_inserted_query)[0][0] %} + {% if not max_ins or max_ins == 'None' %} + {% set max_ins = '2099-01-01' %} + {% endif %} +{% endif %} - {% endset %} - {% set max_ins = run_query(max_inserted_query) [0] [0] %} - {% if not max_ins or max_ins == 'None' %} - {% set max_ins = '2099-01-01' %} - {% endif %} - - {% set query_blocks %} - CREATE - OR REPLACE temporary TABLE silver.transactions__block_intermediate_tmp AS +WITH blocks_source AS ( SELECT DATA, partition_key, _inserted_timestamp FROM - {% if is_incremental() %} -{{ ref('bronze__streamline_blocks_tx') }} + {{ ref('bronze__streamline_blocks_tx') }} + WHERE + _inserted_timestamp >= '{{ max_ins }}' {% else %} - {{ ref('bronze__streamline_FR_blocks_tx') }} + {{ ref('bronze__streamline_FR_blocks_tx') }} {% endif %} +), +tx_batch_source AS ( + SELECT + DATA, + partition_key, + _inserted_timestamp + FROM {% if is_incremental() %} -WHERE - _inserted_timestamp >= '{{max_ins}}' -{% endif %} - -{% endset %} -{% do run_query( - query_blocks -) %} -{% set query_tx_batch %} -CREATE -OR REPLACE temporary TABLE silver.transactions_tx_batch_intermediate_tmp AS -SELECT - DATA, - VALUE, - partition_key, - _inserted_timestamp -FROM - -{% if is_incremental() %} -{{ ref('bronze__streamline_transaction_batch') }} + {{ ref('bronze__streamline_transaction_batch') }} + WHERE + _inserted_timestamp >= '{{ max_ins }}' {% else %} - {{ ref('bronze__streamline_FR_transaction_batch') }} + {{ ref('bronze__streamline_FR_transaction_batch') }} {% endif %} +), -{% if is_incremental() %} -WHERE - _inserted_timestamp >= '{{max_ins}}' -{% endif %} +tx_batch_dates AS ( + SELECT DISTINCT + TO_TIMESTAMP(b.value:timestamp::STRING)::DATE AS block_date + FROM + tx_batch_source A, + LATERAL FLATTEN(A.data) b +), -{% endset %} -{% do run_query( - query_tx_batch -) %} - -{% set tx_batch_dates_query %} - SELECT DISTINCT TO_TIMESTAMP(b.value:timestamp::STRING)::DATE AS block_date - FROM silver.transactions_tx_batch_intermediate_tmp A, - LATERAL FLATTEN(A.data) b -{% endset %} -{% set tx_batch_dates_result = run_query(tx_batch_dates_query) %} -{% set tx_batch_dates = tx_batch_dates_result.columns[0].values() %} -{% else %} -{% set tx_batch_dates = [] %} -{% endif %} - -WITH from_blocks AS ( +from_blocks AS ( SELECT a.data :block_height :: INT AS block_number, TO_TIMESTAMP( @@ -128,14 +112,14 @@ WITH from_blocks AS ( _inserted_timestamp, '{{ invocation_id }}' AS _invocation_id FROM - silver.transactions__block_intermediate_tmp A, + blocks_source A, LATERAL FLATTEN ( DATA :transactions ) b ), + from_transaction_batch AS ( SELECT - {# b.block_number, #} TO_TIMESTAMP( b.value :timestamp :: STRING ) AS block_timestamp, @@ -173,11 +157,12 @@ from_transaction_batch AS ( A._inserted_timestamp, '{{ invocation_id }}' AS _invocation_id FROM - silver.transactions_tx_batch_intermediate_tmp A, + tx_batch_source A, LATERAL FLATTEN( A.data ) b ), + combo AS ( SELECT * @@ -185,27 +170,25 @@ combo AS ( from_blocks UNION ALL SELECT - b.block_number, + blk.block_number, A.* FROM from_transaction_batch A JOIN ( - SELECT * + SELECT + block_number, + first_version, + last_version FROM {{ ref('silver__blocks') }} - {% if tx_batch_dates %} - WHERE block_timestamp::DATE IN ( - {%- for date in tx_batch_dates -%} - '{{ date }}'{% if not loop.last %},{% endif %} - {%- endfor -%} - ) - {% endif %} - ) b - ON A.version BETWEEN b.first_version - AND b.last_version +{% if is_incremental() %} + WHERE block_timestamp::DATE IN (SELECT block_date FROM tx_batch_dates) +{% endif %} + ) blk + ON A.version BETWEEN blk.first_version AND blk.last_version ) + SELECT * FROM - combo qualify(ROW_NUMBER() over (PARTITION BY tx_hash -ORDER BY - _inserted_timestamp DESC)) = 1 + combo +QUALIFY (ROW_NUMBER() OVER (PARTITION BY tx_hash ORDER BY _inserted_timestamp DESC)) = 1