From ac3cb906a860017b3f496b0fb1584881cda29e7a Mon Sep 17 00:00:00 2001 From: Julius Remigio <14811322+juls858@users.noreply.github.com> Date: Tue, 23 May 2023 14:27:55 -0700 Subject: [PATCH 1/5] - add datashares model and macros --- macros/datashares.sql | 160 +++++++++++++++++++++++++++++ models/_datashare/_create_gold.sql | 20 ++++ 2 files changed, 180 insertions(+) create mode 100644 macros/datashares.sql create mode 100644 models/_datashare/_create_gold.sql diff --git a/macros/datashares.sql b/macros/datashares.sql new file mode 100644 index 0000000..3f1042f --- /dev/null +++ b/macros/datashares.sql @@ -0,0 +1,160 @@ +{%- macro fsc_utils.get_ancestors(node, include_depth=false, exclude_source=false) -%} +{# + Return a list of ancestors for a node in a DAG. + #} + {%- for dep in node.depends_on.nodes | unique | list recursive %} + {% if dep.startswith("model.") and "bronze__" not in dep %} + "{{- loop.depth0 ~ '-'if include_depth else '' }}{{node.config.materialized }}-{{ dep -}}", + {{- loop(graph.nodes[dep].depends_on.nodes) -}} + {% elif not exclude_source %} + "{{- loop.depth0 ~ '-'if include_depth else '' }}{{node.config.materialized }}-{{ dep -}}", + {%- endif -%} + {%- endfor %} +{%- endmacro -%} + +{% macro fsc_utils.get_view_ddl() %} +{# + Return a dictionary of view names and their DDL statements. + The DDL statements are escaped to be used in a Snowflake query. + The dictionary is converted to JSON to be used in a dbt macro.. + #} + {% if execute %} + {% set query %} + SELECT + CONCAT_WS('.', TABLE_SCHEMA, TABLE_NAME) as VIEW_NAME, + VIEW_DEFINITION + FROM {{target.database}}.INFORMATION_SCHEMA.VIEWS + WHERE TABLE_SCHEMA NOT IN ('INFORMATION_SCHEMA', 'STREAMLINE') + AND TABLE_SCHEMA NOT LIKE 'TEST_%' + {%- endset -%} + {%- set results = run_query(query) -%} + {% set ddl = {} %} + {% for key, value in results.rows %} + {%- do ddl.update({key: value|replace("$$", "\$\$")}) -%} + {%- endfor -%} + {{- tojson(ddl) -}} + {%- endif -%} +{%- endmacro -%} + +{% macro fsc_utils.replace_database_references(references_to_replace, ddl, new_database) %} +{# + Return the DDL statement for a view with the references replaced. + + references_to_replace: a dictionary of references to replace + ddl: the DDL statement to replace the references in + new_database: the new database to replace the references with +#} + {% set outer = namespace(replaced=ddl) %} + {% for key in references_to_replace %} + {%- set original = target.database ~ "." ~ key.upper() -%} + {%- set replacement = new_database ~ "." ~ key -%} + {%- set outer.replaced = outer.replaced|replace(original, replacement) -%} + {%- set original = target.database ~ "." ~ key.lower() -%} + {%- set replacement = new_database ~ "." ~ key -%} + {%- set outer.replaced = outer.replaced|replace(original, replacement) -%} + {%- endfor -%} + {% set outer.replaced = outer.replaced|replace(target.database.upper() ~ ".", "__SOURCE__.") %} + {% set outer.replaced = outer.replaced|replace(target.database.lower() ~ ".", "__SOURCE__.") %} + {{- outer.replaced -}} +{%- endmacro -%} + +{% macro fsc_utils.generate_view_ddl(dag, schema) %} +{# + Return a list of DDL statements for views in a DAG. + + dag: a DAG of views + schema: schemas to create schema DDL for + #} + {%- set ddl = fromjson(fsc_utils.get_view_ddl()) -%} + {%- set created = {} -%} + {%- set final_text = [] -%} + {%- for view, deps in dag.items() -%} + {%- for d in deps -%} + {%- set table_name = d.split(".")[-1].replace("__", ".").upper() -%} + {%- if ddl.get(table_name) and table_name not in created -%} + {%- set replaced = fsc_utils.replace_database_references(ddl.keys(), ddl[table_name], "__NEW__") -%} + {%- do final_text.append(replaced) -%} + {%- do created.update({table_name:true}) -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- set schema_ddl = [] -%} + {%- for s in schema -%} + {%- do schema_ddl.append("CREATE SCHEMA IF NOT EXISTS __NEW__." ~ s ~ ";") -%} + {%- endfor -%} + {{- toyaml(schema_ddl + final_text) -}} +{%- endmacro -%} + +{% macro fsc_utils.generate_dag_and_schemas(node_paths, materializations) %} +{# + Return a DAG of views and a list of schemas to create. + + node_paths: a list of node paths to include in the DAG + materializations: a list of materializations to include in the DAG + #} + {%- set dag = {} -%} + {%- set schema = [] -%} + {%- for key, value in graph.nodes.items() -%} + {% + if value.refs + and set(value.fqn).intersection(node_paths) + and value.config.materialized in materializations + and value.config.enabled + and not value.sources + and not key.endswith("_create_gold") + -%} + {%- set name = value.schema + "." + value.alias -%} + {%- set _result = fromyaml("[" ~ fsc_utils.get_ancestors(value, exclude_source=true)[:-1] ~ "]") -%} + {% if _result -%} + {%- do _result.insert(0, key) -%} + {%- do dag.update({name.upper() : _result | reverse|list}) -%} + {% for d in _result -%} + {%- if d.split(".")[-1].split("__")[0] not in schema -%} + {%- do schema.append(d.split(".")[-1].split("__")[0]) -%} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {%- do dag.update({name.upper() : [key] }) -%} + {%- if value.schema not in schema -%} + {%- do schema.append(value.schema) -%} + {%- endif -%} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {%- set final = {"dag": dag, "schema": schema} -%} + {{- tojson(final) -}} +{%- endmacro -%} + +{% macro fsc_utils.generate_table_views_ddl(tables, schema) %} +{# + Return a list of DDL statements for views of tables from a list. + + tables: a list of tables to create views for + schema: schemas to create schema DDL for + #} + {%- set schema_ddl = [] -%} + {%- set view_ddl = [] -%} + {% for s in schema %} + {%- do schema_ddl.append("CREATE SCHEMA IF NOT EXISTS __NEW__." ~ s ~ ";") -%} + {%- endfor -%} + {% for table in tables %} + {%- do view_ddl.append("CREATE OR REPLACE VIEW __NEW__." ~ table ~ " AS SELECT * FROM " ~ "__SOURCE__." ~ table ~";") -%} + {%- endfor -%} + {{- toyaml(schema_ddl + view_ddl) -}} +{%- endmacro -%} + +{% macro fsc_utils.generate_datashare_ddl() %} +{# + generate DDL for datashare + + Return: DDL for datashare + #} + {%- set gold_views = fromjson(fsc_utils.generate_dag_and_schemas(["gold"], ["view"])) -%} + {%- set gold_views_ddl = fromyaml(fsc_utils.generate_view_ddl(gold_views["dag"], gold_views["schema"])) -%} + {%- set gold_tables = fromjson(fsc_utils.generate_dag_and_schemas(["gold"], ["incremental", "table"])) -%} + {%- set gold_tables_ddl = fromyaml(fsc_utils.generate_table_views_ddl(gold_tables["dag"].keys(), gold_tables["schema"])) -%} + {%- set combined_ddl = gold_views_ddl + gold_tables_ddl -%} + {%- do combined_ddl.insert(0, "CREATE DATABASE IF NOT EXISTS __NEW__;") -%} + {{- "BEGIN\n" ~ (combined_ddl | join("\n")) ~ "\nEND" -}} +{%- endmacro -%} + diff --git a/models/_datashare/_create_gold.sql b/models/_datashare/_create_gold.sql new file mode 100644 index 0000000..8439f58 --- /dev/null +++ b/models/_datashare/_create_gold.sql @@ -0,0 +1,20 @@ +{{ + config( + materialized = 'incremental', + incremental_strategy = 'merge', + unique_key = 'ddl_hash', + merge_update_columns = [], + ) +}} +{% if execute %} +SELECT +$${{- fsc_utils.generate_datashare_ddl() -}}$$ AS ddl, +md5(ddl) AS ddl_hash, +sysdate() as ddl_created_at +{% else %} +SELECT +null as ddl, +null as ddl_hash, +null as ddl_created_at +from dual limit 0 +{% endif %} \ No newline at end of file From 0bd7b8d354ef11df89791e9cc8e6137adb6acd43 Mon Sep 17 00:00:00 2001 From: Julius Remigio <14811322+juls858@users.noreply.github.com> Date: Tue, 23 May 2023 14:35:31 -0700 Subject: [PATCH 2/5] - fixed macro names --- macros/datashares.sql | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/macros/datashares.sql b/macros/datashares.sql index 3f1042f..2a1ef7b 100644 --- a/macros/datashares.sql +++ b/macros/datashares.sql @@ -1,4 +1,4 @@ -{%- macro fsc_utils.get_ancestors(node, include_depth=false, exclude_source=false) -%} +{%- macro get_ancestors(node, include_depth=false, exclude_source=false) -%} {# Return a list of ancestors for a node in a DAG. #} @@ -12,7 +12,7 @@ {%- endfor %} {%- endmacro -%} -{% macro fsc_utils.get_view_ddl() %} +{% macro get_view_ddl() %} {# Return a dictionary of view names and their DDL statements. The DDL statements are escaped to be used in a Snowflake query. @@ -36,7 +36,7 @@ {%- endif -%} {%- endmacro -%} -{% macro fsc_utils.replace_database_references(references_to_replace, ddl, new_database) %} +{% macro replace_database_references(references_to_replace, ddl, new_database) %} {# Return the DDL statement for a view with the references replaced. @@ -58,7 +58,7 @@ {{- outer.replaced -}} {%- endmacro -%} -{% macro fsc_utils.generate_view_ddl(dag, schema) %} +{% macro generate_view_ddl(dag, schema) %} {# Return a list of DDL statements for views in a DAG. @@ -85,7 +85,7 @@ {{- toyaml(schema_ddl + final_text) -}} {%- endmacro -%} -{% macro fsc_utils.generate_dag_and_schemas(node_paths, materializations) %} +{% macro generate_dag_and_schemas(node_paths, materializations) %} {# Return a DAG of views and a list of schemas to create. @@ -125,7 +125,7 @@ {{- tojson(final) -}} {%- endmacro -%} -{% macro fsc_utils.generate_table_views_ddl(tables, schema) %} +{% macro generate_table_views_ddl(tables, schema) %} {# Return a list of DDL statements for views of tables from a list. @@ -143,7 +143,7 @@ {{- toyaml(schema_ddl + view_ddl) -}} {%- endmacro -%} -{% macro fsc_utils.generate_datashare_ddl() %} +{% macro generate_datashare_ddl() %} {# generate DDL for datashare From a1acd59b77c1e747a73f54dbeadca5ce5eea0d81 Mon Sep 17 00:00:00 2001 From: Julius Remigio <14811322+juls858@users.noreply.github.com> Date: Tue, 23 May 2023 14:37:51 -0700 Subject: [PATCH 3/5] - rename model to include schema --- .../_datashare/{_create_gold.sql => _datashare___create_gold.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename models/_datashare/{_create_gold.sql => _datashare___create_gold.sql} (100%) diff --git a/models/_datashare/_create_gold.sql b/models/_datashare/_datashare___create_gold.sql similarity index 100% rename from models/_datashare/_create_gold.sql rename to models/_datashare/_datashare___create_gold.sql From b5dfb94b605f9cad3a0c4f98ee51df237d1cf2c6 Mon Sep 17 00:00:00 2001 From: Julius Remigio <14811322+juls858@users.noreply.github.com> Date: Tue, 23 May 2023 14:56:08 -0700 Subject: [PATCH 4/5] - rename --- .../_datashare___create_gold.sql => datashare/_create_gold.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename models/{_datashare/_datashare___create_gold.sql => datashare/_create_gold.sql} (100%) diff --git a/models/_datashare/_datashare___create_gold.sql b/models/datashare/_create_gold.sql similarity index 100% rename from models/_datashare/_datashare___create_gold.sql rename to models/datashare/_create_gold.sql From a3e2c1b535c987d52ba8bade63f1a1e2b442d803 Mon Sep 17 00:00:00 2001 From: Julius Remigio <14811322+juls858@users.noreply.github.com> Date: Tue, 23 May 2023 14:59:33 -0700 Subject: [PATCH 5/5] - rename --- .../datashare/{_create_gold.sql => _datashare___create_gold.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename models/datashare/{_create_gold.sql => _datashare___create_gold.sql} (100%) diff --git a/models/datashare/_create_gold.sql b/models/datashare/_datashare___create_gold.sql similarity index 100% rename from models/datashare/_create_gold.sql rename to models/datashare/_datashare___create_gold.sql