From 0458b47dbde66e1f8f79d00cc3eba3358d7a7ad5 Mon Sep 17 00:00:00 2001 From: desmond-hui <97470747+desmond-hui@users.noreply.github.com> Date: Fri, 12 Jan 2024 09:10:20 -0800 Subject: [PATCH] use hourly grain to better prune data (#443) --- .../bronze__streamline_FR_decoded_instructions_2.sql | 4 ++-- .../streamline/bronze__streamline_decoded_instructions_2.sql | 4 ++-- models/silver/parser/silver__decoded_instructions.sql | 2 +- .../streamline__complete_decoded_instructions_2.sql | 2 ++ 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/models/bronze/streamline/bronze__streamline_FR_decoded_instructions_2.sql b/models/bronze/streamline/bronze__streamline_FR_decoded_instructions_2.sql index 9928bec7..565797b6 100644 --- a/models/bronze/streamline/bronze__streamline_FR_decoded_instructions_2.sql +++ b/models/bronze/streamline/bronze__streamline_FR_decoded_instructions_2.sql @@ -5,8 +5,8 @@ {% set model = "decoded_instructions_2" %} {{ streamline_external_table_FR_query( model, - partition_function = "to_date(concat_ws('-', split_part(file_name, '/', 3),split_part(file_name, '/', 4), split_part(file_name, '/', 5)))", - partition_name = "_partition_by_created_date", + partition_function = "to_timestamp_ntz(concat(split_part(file_name, '/', 3),'-',split_part(file_name, '/', 4),'-',split_part(file_name, '/', 5),' ',split_part(file_name, '/', 6),':00:00.000'))", + partition_name = "_partition_by_created_date_hour", unique_key = "block_id", other_cols = "tx_id,index,inner_index,program_id,_partition_by_block_id" ) }} diff --git a/models/bronze/streamline/bronze__streamline_decoded_instructions_2.sql b/models/bronze/streamline/bronze__streamline_decoded_instructions_2.sql index dc7f8aa9..f57676b7 100644 --- a/models/bronze/streamline/bronze__streamline_decoded_instructions_2.sql +++ b/models/bronze/streamline/bronze__streamline_decoded_instructions_2.sql @@ -5,8 +5,8 @@ {% set model = "decoded_instructions_2" %} {{ streamline_external_table_query( model, - partition_function = "to_date(concat_ws('-', split_part(file_name, '/', 3),split_part(file_name, '/', 4), split_part(file_name, '/', 5)))", - partition_name = "_partition_by_created_date", + partition_function = "to_timestamp_ntz(concat(split_part(file_name, '/', 3),'-',split_part(file_name, '/', 4),'-',split_part(file_name, '/', 5),' ',split_part(file_name, '/', 6),':00:00.000'))", + partition_name = "_partition_by_created_date_hour", unique_key = "block_id", other_cols = "tx_id,index,inner_index,program_id,_partition_by_block_id" ) }} diff --git a/models/silver/parser/silver__decoded_instructions.sql b/models/silver/parser/silver__decoded_instructions.sql index b18a8b33..c326d68a 100644 --- a/models/silver/parser/silver__decoded_instructions.sql +++ b/models/silver/parser/silver__decoded_instructions.sql @@ -53,7 +53,7 @@ WHERE {{ this }} ) AND - A._partition_by_created_date >= current_date - 1 + A._partition_by_created_date_hour >= dateadd('hour', -3, MAX(current_timestamp())) {% endif %} qualify(ROW_NUMBER() over (PARTITION BY tx_id, INDEX, coalesce(inner_index,-1) diff --git a/models/streamline/decode_instructions/streamline__complete_decoded_instructions_2.sql b/models/streamline/decode_instructions/streamline__complete_decoded_instructions_2.sql index 16fe6be0..4baed7a0 100644 --- a/models/streamline/decode_instructions/streamline__complete_decoded_instructions_2.sql +++ b/models/streamline/decode_instructions/streamline__complete_decoded_instructions_2.sql @@ -26,6 +26,8 @@ WHERE FROM {{ this }} ) +AND + _partition_by_created_date_hour >= dateadd('hour', -3, MAX(current_timestamp())) {% else %} {{ ref('bronze__streamline_FR_decoded_instructions_2') }} {% endif %}