Optimize silver__transactions.sql: reduce run_query() from 4 to 1

Performance improvement for the core transactions model:
- Convert temp tables to CTEs (blocks_source, tx_batch_source)
- Replace Jinja for-loop date generation with SQL subquery
- Keep only essential run_query for max_inserted_timestamp

Benefits:
- 75% reduction in compilation-time database round-trips
- Snowflake CTE optimization materializes intermediate results
- Cleaner, more maintainable code structure
This commit is contained in:
Claude 2026-01-07 18:55:58 +00:00
parent a45f06c6e2
commit 1716bc0865
No known key found for this signature in database
2 changed files with 81 additions and 75 deletions

View File

@ -294,7 +294,7 @@ Several complex views with multiple JOINs and UNIONs are queried by downstream i
## Files Requiring Immediate Attention
1. `models/silver/core/silver__transactions.sql` - 4 run_query() calls
1. ~~`models/silver/core/silver__transactions.sql` - 4 run_query() calls~~ **FIXED**
2. `models/silver/nft/sales/silver__nft_sales_combined.sql` - repeated subqueries, SELECT *
3. `models/silver/_observability/silver_observability__transactions_completeness.sql` - Cartesian product
4. `models/silver/price/silver__hourly_prices_priority.sql` - 3 non-sargable JOINs
@ -302,4 +302,27 @@ Several complex views with multiple JOINs and UNIONs are queried by downstream i
---
## Applied Fixes
### Fix 1: silver__transactions.sql - Reduced run_query() calls from 4 to 1
**Problem**: 4 sequential `run_query()` calls causing multiple database round-trips during compilation:
1. Query to get `MAX(_inserted_timestamp)`
2. Query to create temp table for blocks
3. Query to create temp table for tx_batch
4. Query to get distinct dates from temp table
**Solution**:
- Kept only the first `run_query()` for `max_ins` (required for compile-time filtering)
- Converted temp tables to CTEs (`blocks_source`, `tx_batch_source`)
- Replaced Jinja for-loop date generation with SQL subquery (`tx_batch_dates` CTE)
**Benefits**:
- 75% reduction in compilation-time database round-trips
- Snowflake CTE optimization can materialize and reuse intermediate results
- Cleaner, more maintainable code structure
- No functional change to query results
---
*Generated: 2026-01-07*

View File

@ -10,82 +10,66 @@
) }}
-- depends_on: {{ ref('bronze__streamline_blocks_tx') }}
-- depends_on: {{ ref('bronze__streamline_transaction_batch') }}
{% if execute %}
{% set max_inserted_query %}
{#
PERFORMANCE OPTIMIZATION: Reduced from 4 run_query() calls to 1
- Converted temp tables to CTEs (blocks_source, tx_batch_source)
- Replaced Jinja date loop with SQL subquery (tx_batch_dates CTE)
Original issue: 4 sequential database round-trips during compilation
#}
{% if execute and is_incremental() %}
{% set max_inserted_query %}
SELECT
DATEADD('minute', -5, MAX(_inserted_timestamp))
FROM
{{ this }}
{% endset %}
{% set max_ins = run_query(max_inserted_query)[0][0] %}
{% if not max_ins or max_ins == 'None' %}
{% set max_ins = '2099-01-01' %}
{% endif %}
{% endif %}
{% endset %}
{% set max_ins = run_query(max_inserted_query) [0] [0] %}
{% if not max_ins or max_ins == 'None' %}
{% set max_ins = '2099-01-01' %}
{% endif %}
{% set query_blocks %}
CREATE
OR REPLACE temporary TABLE silver.transactions__block_intermediate_tmp AS
WITH blocks_source AS (
SELECT
DATA,
partition_key,
_inserted_timestamp
FROM
{% if is_incremental() %}
{{ ref('bronze__streamline_blocks_tx') }}
{{ ref('bronze__streamline_blocks_tx') }}
WHERE
_inserted_timestamp >= '{{ max_ins }}'
{% else %}
{{ ref('bronze__streamline_FR_blocks_tx') }}
{{ ref('bronze__streamline_FR_blocks_tx') }}
{% endif %}
),
tx_batch_source AS (
SELECT
DATA,
partition_key,
_inserted_timestamp
FROM
{% if is_incremental() %}
WHERE
_inserted_timestamp >= '{{max_ins}}'
{% endif %}
{% endset %}
{% do run_query(
query_blocks
) %}
{% set query_tx_batch %}
CREATE
OR REPLACE temporary TABLE silver.transactions_tx_batch_intermediate_tmp AS
SELECT
DATA,
VALUE,
partition_key,
_inserted_timestamp
FROM
{% if is_incremental() %}
{{ ref('bronze__streamline_transaction_batch') }}
{{ ref('bronze__streamline_transaction_batch') }}
WHERE
_inserted_timestamp >= '{{ max_ins }}'
{% else %}
{{ ref('bronze__streamline_FR_transaction_batch') }}
{{ ref('bronze__streamline_FR_transaction_batch') }}
{% endif %}
),
{% if is_incremental() %}
WHERE
_inserted_timestamp >= '{{max_ins}}'
{% endif %}
tx_batch_dates AS (
SELECT DISTINCT
TO_TIMESTAMP(b.value:timestamp::STRING)::DATE AS block_date
FROM
tx_batch_source A,
LATERAL FLATTEN(A.data) b
),
{% endset %}
{% do run_query(
query_tx_batch
) %}
{% set tx_batch_dates_query %}
SELECT DISTINCT TO_TIMESTAMP(b.value:timestamp::STRING)::DATE AS block_date
FROM silver.transactions_tx_batch_intermediate_tmp A,
LATERAL FLATTEN(A.data) b
{% endset %}
{% set tx_batch_dates_result = run_query(tx_batch_dates_query) %}
{% set tx_batch_dates = tx_batch_dates_result.columns[0].values() %}
{% else %}
{% set tx_batch_dates = [] %}
{% endif %}
WITH from_blocks AS (
from_blocks AS (
SELECT
a.data :block_height :: INT AS block_number,
TO_TIMESTAMP(
@ -128,14 +112,14 @@ WITH from_blocks AS (
_inserted_timestamp,
'{{ invocation_id }}' AS _invocation_id
FROM
silver.transactions__block_intermediate_tmp A,
blocks_source A,
LATERAL FLATTEN (
DATA :transactions
) b
),
from_transaction_batch AS (
SELECT
{# b.block_number, #}
TO_TIMESTAMP(
b.value :timestamp :: STRING
) AS block_timestamp,
@ -173,11 +157,12 @@ from_transaction_batch AS (
A._inserted_timestamp,
'{{ invocation_id }}' AS _invocation_id
FROM
silver.transactions_tx_batch_intermediate_tmp A,
tx_batch_source A,
LATERAL FLATTEN(
A.data
) b
),
combo AS (
SELECT
*
@ -185,27 +170,25 @@ combo AS (
from_blocks
UNION ALL
SELECT
b.block_number,
blk.block_number,
A.*
FROM
from_transaction_batch A
JOIN (
SELECT *
SELECT
block_number,
first_version,
last_version
FROM {{ ref('silver__blocks') }}
{% if tx_batch_dates %}
WHERE block_timestamp::DATE IN (
{%- for date in tx_batch_dates -%}
'{{ date }}'{% if not loop.last %},{% endif %}
{%- endfor -%}
)
{% endif %}
) b
ON A.version BETWEEN b.first_version
AND b.last_version
{% if is_incremental() %}
WHERE block_timestamp::DATE IN (SELECT block_date FROM tx_batch_dates)
{% endif %}
) blk
ON A.version BETWEEN blk.first_version AND blk.last_version
)
SELECT
*
FROM
combo qualify(ROW_NUMBER() over (PARTITION BY tx_hash
ORDER BY
_inserted_timestamp DESC)) = 1
combo
QUALIFY (ROW_NUMBER() OVER (PARTITION BY tx_hash ORDER BY _inserted_timestamp DESC)) = 1