An 4508/integrate fsc utils dynamic predicate macro (#482)

* wip

* refactor dynamic predicates macro

* wip

* bump fsc-utils to 1.16.1

* update base silver models to use dynamic predicate

* remove
This commit is contained in:
desmond-hui 2024-02-09 07:00:54 -08:00 committed by GitHub
parent 8ddfe4e3c0
commit 02bacfa997
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 14 additions and 135 deletions

View File

@ -1,12 +1,4 @@
{% macro get_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) -%}
{% set predicate_override = "" %}
{% if incremental_predicates[0] == "dynamic_range_predicate" %}
-- run some queries to dynamically determine the min + max of this 'date_column' in the new data
{% set predicate_override = dynamic_range_predicate(source, incremental_predicates[1], "DBT_INTERNAL_DEST") %}
{% endif %}
{% set predicates = [predicate_override] if predicate_override else incremental_predicates %}
-- standard merge from here
{% set merge_sql = dbt.get_merge_sql(target, source, unique_key, dest_columns, predicates) %}
{% set merge_sql = fsc_utils.get_merge_sql(target, source, unique_key, dest_columns, incremental_predicates) %}
{{ return(merge_sql) }}
{% endmacro %}

View File

@ -1,113 +0,0 @@
{% macro dynamic_range_predicate(source, predicate_column, output_alias="") -%}
{% set supported_data_types = ["INTEGER","DATE"] %}
{% set predicate_column_data_type_query %}
SELECT typeof({{predicate_column}}::variant)
FROM {{ source }}
WHERE {{predicate_column}} IS NOT NULL
LIMIT 1;
{% endset %}
{% set predicate_column_data_type = run_query(predicate_column_data_type_query).columns[0].values()[0] %}
{% if predicate_column_data_type not in supported_data_types %}
{{ exceptions.raise_compiler_error("Data type of "~ predicate_column_data_type ~" is not supported, use one of "~ supported_data_types ~" column instead") }}
{% endif %}
{% set get_start_end_query %}
SELECT
MIN(
{{ predicate_column }}
) AS full_range_start,
MAX(
{{ predicate_column }}
) AS full_range_end
FROM
{{ source }}
{% endset %}
{% set start_end_results = run_query(get_start_end_query).columns %}
{% set start_preciate_value = start_end_results[0].values()[0] %}
{% set end_predicate_value = start_end_results[1].values()[0] %}
{% set get_limits_query %}
WITH block_range AS (
{% if predicate_column_data_type == "INTEGER" %}
SELECT
SEQ4() + {{ start_preciate_value }} as predicate_value
FROM
TABLE(GENERATOR(rowcount => {{ end_predicate_value - start_preciate_value }}+1))
{% else %}
SELECT
date_day as predicate_value
FROM
crosschain.core.dim_dates
WHERE
date_day BETWEEN '{{ start_preciate_value }}' AND '{{ end_predicate_value }}'
{% endif %}
),
partition_block_counts AS (
SELECT
b.predicate_value,
COUNT(r.{{ predicate_column }}) AS count_in_window
FROM
block_range b
LEFT OUTER JOIN {{ source }}
r
ON r.{{ predicate_column }} = b.predicate_value
GROUP BY
1
),
range_groupings AS (
SELECT
predicate_value,
count_in_window,
conditional_change_event(
count_in_window > 0
) over (
ORDER BY
predicate_value
) AS group_val
FROM
partition_block_counts
),
contiguous_ranges AS (
SELECT
MIN(predicate_value) AS start_value,
MAX(predicate_value) AS end_value
FROM
range_groupings
WHERE
count_in_window > 0
GROUP BY
group_val
),
between_stmts AS (
SELECT
CONCAT(
'{{ output_alias~"." if output_alias else "" }}',
'{{ predicate_column }} between \'',
start_value,
'\' and \'',
end_value,
'\''
) AS b
FROM
contiguous_ranges
)
SELECT
CONCAT('(', LISTAGG(b, ' OR '), ')')
FROM
between_stmts
{% endset %}
{% set between_stmts = run_query(get_limits_query).columns[0].values()[0] %}
{% if between_stmts != '()' %}
/* in case empty update array */
{% set predicate_override = between_stmts %}
{% else %}
{% set predicate_override = '1=1' %}
/* need to have something or it will error since it expects at least 1 predicate */
{% endif %}
{{ return(predicate_override) }}
{% endmacro %}

View File

@ -36,7 +36,7 @@ FROM
{% do run_query(
query ~ incr
) %}
{% set between_stmts = dynamic_range_predicate("silver.decoded_instructions__intermediate_tmp","block_timestamp::date") %}
{% set between_stmts = fsc_utils.dynamic_range_predicate("silver.decoded_instructions__intermediate_tmp","block_timestamp::date") %}
{% endif %}
WITH txs AS (

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = ['block_id','tx_id','mapped_instruction_index'],
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','_inserted_timestamp::DATE'],
full_refresh = false,
tags = ['scheduled_core']

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = ['block_id','tx_id','index'],
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','_inserted_timestamp::DATE'],
full_refresh = false,
tags = ['scheduled_core']

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = ['block_id','tx_id','index'],
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','_inserted_timestamp::DATE'],
full_refresh = false,
tags = ['scheduled_core']

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = ['block_id','tx_id','index'],
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','_inserted_timestamp::DATE'],
full_refresh = false,
tags = ['scheduled_core']

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = ['block_id','tx_id','index'],
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','_inserted_timestamp::DATE','program_id'],
post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}'),
full_refresh = false,

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = "tx_id",
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','block_id','_inserted_timestamp::DATE'],
post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}'),
full_refresh = false,

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = ["block_id","tx_id","index"],
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','_inserted_timestamp::DATE'],
post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}'),
full_refresh = false,

View File

@ -1,7 +1,7 @@
{{ config(
materialized = 'incremental',
unique_key = "tx_id",
incremental_predicates = ['DBT_INTERNAL_DEST.block_timestamp::date >= LEAST(current_date-7,(select min(block_timestamp)::date from ' ~ generate_tmp_view_name(this) ~ '))'],
incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"],
cluster_by = ['block_timestamp::DATE','block_id','_inserted_timestamp::DATE'],
post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}'),
full_refresh = false,

View File

@ -6,11 +6,11 @@ packages:
- package: dbt-labs/dbt_utils
version: 1.0.0
- git: https://github.com/FlipsideCrypto/fsc-utils.git
revision: c84d623aa09ce2acb9451e6aedf5fa70c19b0b95
revision: b0b5e615143f736d3de249152cda509e47fd21fd
- package: get-select/dbt_snowflake_query_tags
version: 2.3.1
version: 2.3.3
- package: calogica/dbt_date
version: 0.7.2
- git: https://github.com/FlipsideCrypto/livequery-models.git
revision: bca494102fbd2d621d32746e9a7fe780678044f8
sha1_hash: ce25373a314499826f1bef1d1fca885141e945e9
sha1_hash: 31b810d35e40945328ca2d93e7c9e190a8901ecf

View File

@ -6,6 +6,6 @@ packages:
- package: dbt-labs/dbt_utils
version: 1.0.0
- git: https://github.com/FlipsideCrypto/fsc-utils.git
revision: "v1.11.0"
revision: "v1.16.1"
- package: get-select/dbt_snowflake_query_tags
version: [">=2.0.0", "<3.0.0"]