diff --git a/dbt_project.yml b/dbt_project.yml index d796a060..52ad4fc0 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -48,17 +48,18 @@ vars: #Ghost Record Configuration datavault4dbt.beginning_of_all_times: {"bigquery":"0001-01-01T00-00-01","snowflake":"0001-01-01T00:00:01", "exasol": "0001-01-01 00:00:01", "postgres": "0001-01-01 00:00:01", "redshift": "0001-01-01 00:00:01", "synapse": "1901-01-01T00:00:01"} datavault4dbt.end_of_all_times: {"bigquery":"8888-12-31T23-59-59","snowflake":"8888-12-31T23:59:59", "exasol": "8888-12-31 23:59:59", "postgres": "8888-12-31 23:59:59", "redshift": "8888-12-31 23:59:59", "synapse": "8888-12-31T23:59:59"} - datavault4dbt.timestamp_format: {"bigquery":"%Y-%m-%dT%H-%M-%S","snowflake":"YYYY-MM-DDTHH24:MI:SS", "exasol": "YYYY-mm-dd HH:MI:SS", "postgres": "%Y-%m-%dT%H-%M-%S", "redshift": "YYYY-MM-DD HH24:MI:SS", "synapse": 126} + datavault4dbt.timestamp_format: {"bigquery":"%Y-%m-%dT%H-%M-%S","snowflake":"YYYY-MM-DDTHH24:MI:SS", "exasol": "YYYY-mm-dd HH:MI:SS", "postgres": "YYYY-MM-DD HH24:MI:SS", "redshift": "YYYY-MM-DD HH24:MI:SS", "synapse": 126} - datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"} - datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"} - datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "%Y-%m-%d", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"} + #datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"} + #datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"} + #datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "YYYY-MM-DD", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"} datavault4dbt.default_unknown_rsrc: 'SYSTEM' datavault4dbt.default_error_rsrc: 'ERROR' - datavault4dbt.rsrc_default_dtype: 'VARCHAR(255)' - datavault4dbt.stg_default_dtype: 'VARCHAR(255)' - datavault4dbt.derived_columns_default_dtype: 'VARCHAR(255)' + datavault4dbt.rsrc_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"} + datavault4dbt.timestamp_default_dtype: {"bigquery":"TIMESTAMP","snowflake":"TIMESTAMP_TZ", "exasol": "TIMESTAMP(3) WITH LOCAL TIME ZONE", "postgres": "TIMESTAMPTZ", "redshift": "TIMESTAMPTZ", "synapse": "datetimeoffset"} + datavault4dbt.stg_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"} + datavault4dbt.derived_columns_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"} #Datatype specific default values datavault4dbt.error_value__STRING: '(error)' @@ -81,4 +82,4 @@ models: +materialized: view raw_vault: +schema: - +materialized: table \ No newline at end of file + +materialized: table diff --git a/macros/internal/helpers/stage_processing_macros.sql b/macros/internal/helpers/stage_processing_macros.sql index daa2402b..6ecf2676 100644 --- a/macros/internal/helpers/stage_processing_macros.sql +++ b/macros/internal/helpers/stage_processing_macros.sql @@ -123,4 +123,4 @@ {%- endif %} {%- endfor -%} -{%- endmacro -%} +{%- endmacro -%} \ No newline at end of file diff --git a/macros/internal/metadata_processing/escape_column_names.sql b/macros/internal/metadata_processing/escape_column_names.sql index b12b4ceb..f923a0e5 100644 --- a/macros/internal/metadata_processing/escape_column_names.sql +++ b/macros/internal/metadata_processing/escape_column_names.sql @@ -139,8 +139,30 @@ {%- set escape_char_left = var('escape_char_left', "") -%} {%- set escape_char_right = var('escape_char_right', "") -%} + {%- set escaped_column_name = escape_char_left ~ column | lower | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%} + + {%- do return(escaped_column_name) -%} + +{%- endmacro -%} + +{%- macro redshift__escape_column_name(column) -%} + + {%- set escape_char_left = var('escape_char_left', '"') -%} + {%- set escape_char_right = var('escape_char_right', '"') -%} + + {%- set escaped_column_name = escape_char_left ~ column | lower | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%} + + {%- do return(escaped_column_name) -%} + +{%- endmacro -%} + +{%- macro exasol__escape_column_name(column) -%} + + {%- set escape_char_left = var('escape_char_left', '') -%} + {%- set escape_char_right = var('escape_char_right', '') -%} + {%- set escaped_column_name = escape_char_left ~ column | upper | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%} {%- do return(escaped_column_name) -%} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/staging/bigquery/stage.sql b/macros/staging/bigquery/stage.sql index e528b0cc..80ff3c38 100644 --- a/macros/staging/bigquery/stage.sql +++ b/macros/staging/bigquery/stage.sql @@ -177,7 +177,10 @@ {% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %} {# Setting the rsrc default datatype and length #} -{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') %} +{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %} + +{# Setting the ldts default datatype #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} WITH @@ -205,7 +208,7 @@ source_data AS ( ldts_rsrc_data AS ( SELECT - {{ ldts }} AS {{ load_datetime_col_name}}, + CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }}, CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }} {%- if datavault4dbt.is_something(sequence) %}, {{ sequence }} AS edwSequence @@ -224,7 +227,7 @@ ldts_rsrc_data AS ( {%- set columns_without_excluded_columns_tmp = [] -%} {%- for column in columns_without_excluded_columns -%} - {%- if column.name not in derived_column_names -%} + {%- if column.name | lower not in derived_column_names | map('lower') -%} {%- do columns_without_excluded_columns_tmp.append(column) -%} {%- endif -%} {%- endfor -%} @@ -369,7 +372,7 @@ ma_hashdiff_prep AS ( SELECT {% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.hashdiff_dict) -%} - + {{ multi_active_config['main_hashkey_column'] }}, {# Generates only all hashdiffs. #} {{- datavault4dbt.hash_columns(columns=processed_hash_columns, multi_active_key=multi_active_config['multi_active_key'], main_hashkey_column=multi_active_config['main_hashkey_column']) | indent(4) }}, @@ -378,6 +381,10 @@ ma_hashdiff_prep AS ( FROM main_hashkey_generation GROUP BY {{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }} + + {% do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.main_hashkey_dict)) -%} {# Add main hashkey to list of processed columns, otherwise ghost records dont get created #} + + ), hashed_columns AS ( @@ -389,9 +396,11 @@ hashed_columns AS ( {# Generates only all remaining hashkeys, that are no hashdiffs #} {%- if datavault4dbt.is_something(processed_remaining_hash_columns) %} - {{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, {# All remaining hashed_columns get calculated. #} + {{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, + {%- do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns)) -%} {# All remaining hashed_columns get calculated. #} {% endif -%} + {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(tmp_ns.hashdiff_names)) }}, {# All MA Hashdiffs are selected. #} main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #} diff --git a/macros/staging/derived_column_datatypes.sql b/macros/staging/derived_column_datatypes.sql index fabe0de5..a1e4a2e0 100644 --- a/macros/staging/derived_column_datatypes.sql +++ b/macros/staging/derived_column_datatypes.sql @@ -24,7 +24,7 @@ {%- if not datavault4dbt.is_attribute(column_value) -%} {# If the value is a static value, it is not an attribute and no datatype needs to be detected. Instead a default datatype is applied. #} - {%- set datatype = var('datavault4dbt.derived_columns_default_dtype', 'STRING') -%} + {%- set datatype = datavault4dbt.string_default_dtype(type='derived_columns') -%} {%- set value = column_value -%} {%- set col_size = "" -%} diff --git a/macros/staging/exasol/stage.sql b/macros/staging/exasol/stage.sql index 50e6402d..a7acf53a 100644 --- a/macros/staging/exasol/stage.sql +++ b/macros/staging/exasol/stage.sql @@ -172,7 +172,10 @@ {% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %} {# Setting the rsrc default datatype and length #} -{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'VARCHAR (2000000) UTF8') %} +{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %} + +{# Setting the ldts default datatype #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} WITH @@ -194,7 +197,7 @@ source_data AS ( ldts_rsrc_data AS ( SELECT - {{ ldts }} AS {{ load_datetime_col_name}}, + CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }}, CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }} {%- if datavault4dbt.is_something(sequence) %}, {{ sequence }} AS edwSequence @@ -209,6 +212,14 @@ ldts_rsrc_data AS ( {%- set last_cte = "ldts_rsrc_data" -%} {%- set final_columns_to_select = alias_columns + final_columns_to_select %} + + {%- set columns_without_excluded_columns_tmp = [] -%} + {%- for column in columns_without_excluded_columns -%} + {%- if column.name | lower not in derived_column_names | map('lower') -%} + {%- do columns_without_excluded_columns_tmp.append(column) -%} + {%- endif -%} + {%- endfor -%} + {%- set columns_without_excluded_columns = columns_without_excluded_columns_tmp |list -%} ), {%- if datavault4dbt.is_something(missing_columns) %} @@ -302,6 +313,8 @@ prejoined_columns AS ( {# Adding derived columns to the selection #} derived_columns AS ( + {%- set final_columns_to_select = datavault4dbt.process_columns_to_select(final_columns_to_select, derived_column_names) -%} + SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }}, @@ -318,15 +331,16 @@ derived_columns AS ( {# Generating Hashed Columns (hashkeys and hashdiffs for Hubs/Links/Satellites) #} {% if datavault4dbt.is_something(multi_active_config) %} -{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[]) -%} +{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[], hashdiff_dict={}) -%} {%- for column in hashed_columns.keys() -%} - {%- if column | lower == multi_active_config['main_hashkey_column']| lower and not hashed_columns[column].is_hashdiff -%} + {%- if column == multi_active_config['main_hashkey_column'] and not hashed_columns[column].is_hashdiff -%} {%- do tmp_ns.main_hashkey_dict.update({column: hashed_columns[column]}) -%} {% elif column != multi_active_config['main_hashkey_column'] and not hashed_columns[column].is_hashdiff -%} {%- do tmp_ns.remaining_hashed_columns.update({column: hashed_columns[column]}) -%} {%- elif hashed_columns[column].is_hashdiff -%} {%- do tmp_ns.hashdiff_names.append(column) -%} + {%- do tmp_ns.hashdiff_dict.update({column: hashed_columns[column]}) -%} {%- endif -%} {%- endfor -%} @@ -345,14 +359,19 @@ ma_hashdiff_prep AS ( SELECT - {% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(hashed_columns) -%} + {% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.hashdiff_dict) -%} + {{ multi_active_config['main_hashkey_column'] }}, {# Generates only all hashdiffs. #} {{- datavault4dbt.hash_columns(columns=processed_hash_columns, multi_active_key=multi_active_config['multi_active_key'], main_hashkey_column=multi_active_config['main_hashkey_column']) | indent(4) }}, {{ ldts_alias }} - FROM {{ last_cte }} - GROUP BY local.{{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }} + FROM main_hashkey_generation + GROUP BY {{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }} + + + {% do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.main_hashkey_dict)) -%} {# Add main hashkey to list of processed columns, otherwise ghost records dont get created #} + ), @@ -360,16 +379,18 @@ hashed_columns AS ( SELECT - {{ datavault4dbt.alias_all(columns=final_columns_to_select, prefix='main_hashkey_generation') }}, {# Everything from last_cte before hashed_columns. #} + {{ datavault4dbt.alias_all(columns=final_columns_to_select, prefix='main_hashkey_generation') }}, {# Everything from last_cte before hashed_columns. #} {% set processed_remaining_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns) -%} {# Generates only all remaining hashkeys, that are no hashdiffs #} {%- if datavault4dbt.is_something(processed_remaining_hash_columns) %} - {{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, {# All remaining hashed_columns get calculated. #} + {{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, + {%- do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns)) -%} {# All remaining hashed_columns get calculated. #} {% endif -%} + {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(tmp_ns.hashdiff_names)) }}, {# All MA Hashdiffs are selected. #} - main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #} + main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #} FROM main_hashkey_generation LEFT JOIN ma_hashdiff_prep @@ -461,7 +482,7 @@ unknown_values AS ( {%- if datavault4dbt.is_something(processed_hash_columns) -%}, {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as "{{ hash_column }}" + CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- if not loop.last %},{% endif %} {%- endfor -%} @@ -528,7 +549,7 @@ error_values AS ( {%- if datavault4dbt.is_something(processed_hash_columns) -%}, {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as "{{ hash_column }}" + CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- if not loop.last %},{% endif %} {%- endfor -%} diff --git a/macros/staging/hash_columns.sql b/macros/staging/hash_columns.sql index f569493d..0a2beb1a 100644 --- a/macros/staging/hash_columns.sql +++ b/macros/staging/hash_columns.sql @@ -66,6 +66,7 @@ {%- endmacro -%} + {%- macro redshift__hash_columns(columns, multi_active_key, main_hashkey_column) -%} {%- if columns is mapping and columns is not none -%} @@ -89,33 +90,34 @@ {%- endif -%} - {%- else -%} - + {%- else -%} {% if columns[col] is mapping and columns[col].is_hashdiff -%} - {%- if columns[col].use_rtrim -%} - {%- set rtrim_hashdiff = true -%} - {%- else -%} - {%- set rtrim_hashdiff = false -%} - {%- endif -%} + {{- datavault4dbt.hash(columns=columns[col]['columns'], alias=col, - is_hashdiff=columns[col]['is_hashdiff'], - rtrim_hashdiff=rtrim_hashdiff) -}} + is_hashdiff=columns[col]['is_hashdiff']) -}} + {%- elif columns[col] is not mapping -%} + {{- datavault4dbt.hash(columns=columns[col], alias=col, is_hashdiff=false) -}} {%- elif columns[col] is mapping and not columns[col].is_hashdiff -%} + {%- if execute -%} {%- do exceptions.warn("[" ~ this ~ "] Warning: You provided a list of columns under a 'columns' key, but did not provide the 'is_hashdiff' flag. Use list syntax for PKs.") -%} {% endif %} + {{- datavault4dbt.hash(columns=columns[col]['columns'], alias=col) -}} + {%- endif -%} + {{- ",\n" if not loop.last -}} + {%- endif -%} {%- endfor -%} + {%- endif %} {%- endmacro -%} - diff --git a/macros/staging/postgres/stage.sql b/macros/staging/postgres/stage.sql index 3a133740..a6126412 100644 --- a/macros/staging/postgres/stage.sql +++ b/macros/staging/postgres/stage.sql @@ -162,7 +162,7 @@ {#- Setting unknown and error keys with default values for the selected hash algorithm -#} {%- set hash = datavault4dbt.hash_method() -%} -{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'STRING') -%} +{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'VARCHAR(32)') -%} {%- set hash_default_values = fromjson(datavault4dbt.hash_default_values(hash_function=hash,hash_datatype=hash_dtype)) -%} {%- set hash_alg = hash_default_values['hash_alg'] -%} {%- set unknown_key = hash_default_values['unknown_key'] -%} @@ -178,7 +178,10 @@ {% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %} {# Setting the rsrc default datatype and length #} -{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') %} +{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %} + +{# Setting the ldts default datatype #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} WITH @@ -206,7 +209,7 @@ source_data AS ( ldts_rsrc_data AS ( SELECT - {{ ldts }} AS {{ load_datetime_col_name}}, + CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }}, CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }} {%- if datavault4dbt.is_something(sequence) %}, {{ sequence }} AS edwSequence @@ -221,11 +224,10 @@ ldts_rsrc_data AS ( {%- set last_cte = "ldts_rsrc_data" -%} {%- set final_columns_to_select = alias_columns + final_columns_to_select %} - {%- set final_columns_to_select = datavault4dbt.process_columns_to_select(final_columns_to_select, derived_column_names) | list -%} {%- set columns_without_excluded_columns_tmp = [] -%} {%- for column in columns_without_excluded_columns -%} - {%- if column.name not in derived_column_names -%} + {%- if column.name | lower not in derived_column_names | map('lower') -%} {%- do columns_without_excluded_columns_tmp.append(column) -%} {%- endif -%} {%- endfor -%} @@ -341,7 +343,7 @@ derived_columns AS ( {# Generating Hashed Columns (hashkeys and hashdiffs for Hubs/Links/Satellites) #} {% if datavault4dbt.is_something(multi_active_config) %} -{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[]) -%} +{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[], hashdiff_dict={}) -%} {%- for column in hashed_columns.keys() -%} {%- if column == multi_active_config['main_hashkey_column'] and not hashed_columns[column].is_hashdiff -%} @@ -350,6 +352,7 @@ derived_columns AS ( {%- do tmp_ns.remaining_hashed_columns.update({column: hashed_columns[column]}) -%} {%- elif hashed_columns[column].is_hashdiff -%} {%- do tmp_ns.hashdiff_names.append(column) -%} + {%- do tmp_ns.hashdiff_dict.update({column: hashed_columns[column]}) -%} {%- endif -%} {%- endfor -%} @@ -368,15 +371,20 @@ ma_hashdiff_prep AS ( SELECT - {% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(hashed_columns) -%} + {% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.hashdiff_dict) -%} + {{ multi_active_config['main_hashkey_column'] }}, {# Generates only all hashdiffs. #} {{- datavault4dbt.hash_columns(columns=processed_hash_columns, multi_active_key=multi_active_config['multi_active_key'], main_hashkey_column=multi_active_config['main_hashkey_column']) | indent(4) }}, {{ ldts_alias }} - FROM {{ last_cte }} + FROM main_hashkey_generation GROUP BY {{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }} + + {% do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.main_hashkey_dict)) -%} {# Add main hashkey to list of processed columns, otherwise ghost records dont get created #} + + ), hashed_columns AS ( @@ -388,9 +396,11 @@ hashed_columns AS ( {# Generates only all remaining hashkeys, that are no hashdiffs #} {%- if datavault4dbt.is_something(processed_remaining_hash_columns) %} - {{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, {# All remaining hashed_columns get calculated. #} + {{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, + {%- do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns)) -%} {# All remaining hashed_columns get calculated. #} {% endif -%} + {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(tmp_ns.hashdiff_names)) }}, {# All MA Hashdiffs are selected. #} main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #} diff --git a/macros/staging/redshift/stage.sql b/macros/staging/redshift/stage.sql index 262c6e53..b3b5c56f 100644 --- a/macros/staging/redshift/stage.sql +++ b/macros/staging/redshift/stage.sql @@ -37,7 +37,6 @@ {%- set source_relation = source(source_name, source_table_name) -%} {%- set all_source_columns = datavault4dbt.source_columns(source_relation=source_relation) -%} - {%- elif source_model is not mapping and source_model is not none -%} {{ log('source_model is not mapping and not none: ' ~ source_model, false) }} @@ -162,7 +161,7 @@ {#- Setting unknown and error keys with default values for the selected hash algorithm -#} {%- set hash = datavault4dbt.hash_method() -%} -{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'STRING') -%} +{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'VARCHAR(32)') -%} {%- set hash_default_values = fromjson(datavault4dbt.hash_default_values(hash_function=hash,hash_datatype=hash_dtype)) -%} {%- set hash_alg = hash_default_values['hash_alg'] -%} {%- set unknown_key = hash_default_values['unknown_key'] -%} @@ -178,7 +177,10 @@ {% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %} {# Setting the rsrc default datatype and length #} -{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') %} +{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %} + +{# Setting the ldts default datatype #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} WITH @@ -187,7 +189,6 @@ source_data AS ( SELECT {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(all_source_columns)) if all_source_columns else " *" }} - FROM {{ source_relation }} {% if is_incremental() %} @@ -206,7 +207,7 @@ source_data AS ( ldts_rsrc_data AS ( SELECT - {{ ldts }} AS {{ load_datetime_col_name}}, + CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }}, CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }} {%- if datavault4dbt.is_something(sequence) %}, {{ sequence }} AS edwSequence @@ -221,11 +222,11 @@ ldts_rsrc_data AS ( {%- set last_cte = "ldts_rsrc_data" -%} {%- set final_columns_to_select = alias_columns + final_columns_to_select %} - {%- set final_columns_to_select = datavault4dbt.process_columns_to_select(final_columns_to_select, derived_column_names) | list -%} + {# {%- set final_columns_to_select = datavault4dbt.process_columns_to_select(final_columns_to_select, derived_column_names) | list -%} #} {%- set columns_without_excluded_columns_tmp = [] -%} {%- for column in columns_without_excluded_columns -%} - {%- if column.name not in derived_column_names -%} + {%- if column.name | lower not in derived_column_names | map('lower') -%} {%- do columns_without_excluded_columns_tmp.append(column) -%} {%- endif -%} {%- endfor -%} @@ -258,7 +259,7 @@ missing_columns AS ( prejoined_columns AS ( SELECT - {% if final_columns_to_select | length > 0 -%} + {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} {% endif %} {%- for col, vals in prejoined_columns.items() -%} @@ -436,9 +437,9 @@ unknown_values AS ( '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, - {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} + {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column and derived_columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} + {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='unknown') }} {%- if not loop.last %},{% endif -%} {%- endfor -%} @@ -469,7 +470,7 @@ unknown_values AS ( {% if column.name|lower == vals['bk']|lower -%} {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=col) }} + {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='unknown', alias=col) }} {%- endif -%} {%- endfor -%} @@ -508,7 +509,7 @@ error_values AS ( {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} + {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='error') }} {%- if not loop.last %},{% endif -%} {%- endfor -%} @@ -536,7 +537,7 @@ error_values AS ( {% for column in pj_relation_columns -%} {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=col) -}} + {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='error', alias=col) -}} {%- endif -%} {%- endfor -%} {%- if not loop.last -%},{%- endif %} diff --git a/macros/staging/snowflake/stage.sql b/macros/staging/snowflake/stage.sql index b2b16185..38e5c92c 100644 --- a/macros/staging/snowflake/stage.sql +++ b/macros/staging/snowflake/stage.sql @@ -183,7 +183,10 @@ {% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %} {# Setting the rsrc default datatype and length #} -{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') %} +{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %} + +{# Setting the ldts default datatype #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} WITH @@ -211,7 +214,7 @@ source_data AS ( ldts_rsrc_data AS ( SELECT - {{ ldts }} AS {{ load_datetime_col_name}}, + CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }}, CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }} {%- if datavault4dbt.is_something(sequence) %}, {{ sequence }} AS edwSequence @@ -229,7 +232,7 @@ ldts_rsrc_data AS ( {{ log('derived_column_names: '~ derived_column_names, false) }} {%- set columns_without_excluded_columns_tmp = [] -%} {%- for column in columns_without_excluded_columns -%} - {%- if column.name not in derived_column_names -%} + {%- if column.name | lower not in derived_column_names | map('lower') -%} {%- do columns_without_excluded_columns_tmp.append(column) -%} {%- endif -%} {%- endfor -%} diff --git a/macros/staging/synapse/stage.sql b/macros/staging/synapse/stage.sql index 2751b4c9..4cb39326 100644 --- a/macros/staging/synapse/stage.sql +++ b/macros/staging/synapse/stage.sql @@ -173,7 +173,10 @@ {% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %} {# Setting the rsrc default datatype and length #} -{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') %} +{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %} + +{# Setting the ldts default datatype #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} WITH @@ -204,7 +207,7 @@ source_data AS ( ldts_rsrc_data AS ( SELECT - {{ ldts }} AS {{ load_datetime_col_name}}, + CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }}, CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }} {%- if datavault4dbt.is_something(sequence) %}, {{ sequence }} AS edwSequence @@ -223,7 +226,7 @@ ldts_rsrc_data AS ( {%- set columns_without_excluded_columns_tmp = [] -%} {%- for column in columns_without_excluded_columns -%} - {%- if column.name not in exclude_column_names -%} + {%- if column.name | lower not in derived_column_names | map('lower') -%} {%- do columns_without_excluded_columns_tmp.append(column) -%} {%- endif -%} {%- endfor -%} diff --git a/macros/supporting/datatypes.sql b/macros/supporting/datatypes.sql index 405da944..cf8d14c9 100644 --- a/macros/supporting/datatypes.sql +++ b/macros/supporting/datatypes.sql @@ -3,7 +3,7 @@ {%- endmacro -%} {%- macro default__type_timestamp() -%} - {{ type_timestamp() }} + {{ dbt.type_timestamp() }} {%- endmacro -%} {%- macro synapse__type_timestamp() -%} diff --git a/macros/supporting/end_of_all_times.sql b/macros/supporting/end_of_all_times.sql index 427c6294..e3b1a55b 100644 --- a/macros/supporting/end_of_all_times.sql +++ b/macros/supporting/end_of_all_times.sql @@ -126,4 +126,25 @@ {{ return(end_of_all_times) }} {%- endmacro -%} +{%- macro postgres__end_of_all_times() %} +{%- set global_var = var('datavault4dbt.end_of_all_times', none) -%} +{%- set end_of_all_times = '' -%} + +{%- if global_var is mapping -%} + {%- if 'postgres' in global_var.keys()|map('lower') -%} + {% set end_of_all_times = global_var['postgres'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.end_of_all_times' to a dictionary, but have not included the adapter you use (postgres) as a key. Applying the default value.") -%} + {% endif %} + {%- set end_of_all_times = "8888-12-31 23:59:59" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set end_of_all_times = global_var -%} +{%- else -%} + {%- set end_of_all_times = "8888-12-31 23:59:59" -%} +{%- endif -%} + +{{ return(end_of_all_times) }} +{%- endmacro -%} diff --git a/macros/supporting/ghost_record_per_datatype.sql b/macros/supporting/ghost_record_per_datatype.sql index 27a22a82..3f67a249 100644 --- a/macros/supporting/ghost_record_per_datatype.sql +++ b/macros/supporting/ghost_record_per_datatype.sql @@ -76,7 +76,7 @@ {%- if ghost_record_type == 'unknown' -%} {%- if datatype == 'TIMESTAMP' or datatype == 'TIMESTAMP WITH LOCAL TIMEZONE' %} {{- datavault4dbt.string_to_timestamp( timestamp_format , beginning_of_all_times) }} as "{{ column_name }}" - {%- elif datatype == 'DATE'-%} TO_DATE('{{ beginning_of_all_times_date }}', '{{ date_format }}' ) as "{{ alias }}" + {%- elif datatype == 'DATE'-%} TO_DATE('{{ beginning_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} {%- elif datatype.upper().startswith('VARCHAR') -%} {%- if col_size is not none -%} {%- set unknown_dtype_length = col_size | int -%} @@ -89,22 +89,22 @@ {%- set unknown_dtype_length = inside_parenthesis | int -%} {%- endif -%} {%- if unknown_dtype_length < unknown_value__STRING|length -%} - CAST('{{ unknown_value_alt__STRING }}' as {{ datatype }} ) as "{{ alias }}" + CAST('{{ unknown_value_alt__STRING }}' as {{ datatype }} ) as {{ alias }} {%- else -%} - CAST('{{ unknown_value__STRING }}' as {{ datatype }} ) as "{{ alias }}" + CAST('{{ unknown_value__STRING }}' as {{ datatype }} ) as {{ alias }} {%- endif -%} - {%- elif datatype.upper().startswith('CHAR') -%} CAST('{{ unknown_value_alt__STRING }}' as {{ datatype }}) as "{{ alias }}" - {%- elif datatype.upper().startswith('DECIMAL') -%} CAST('0' as {{ datatype }}) as "{{ alias }}" - {%- elif datatype == 'DOUBLE PRECISION' %} CAST('0' as DOUBLE PRECISION) as "{{ alias }}" - {%- elif datatype == 'BOOLEAN' %} FALSE as "{{ alias }}" - {%- elif datatype.upper().startswith('HASHTYPE') -%} CAST('{{ unknown_value__HASHTYPE }}' as {{ datatype }}) as "{{ alias }}" - {%- else %} CAST(NULL as {{ datatype }}) as "{{ alias }}" + {%- elif datatype.upper().startswith('CHAR') -%} CAST('{{ unknown_value_alt__STRING }}' as {{ datatype }}) as {{ alias }} + {%- elif datatype.upper().startswith('DECIMAL') -%} CAST('0' as {{ datatype }}) as {{ alias }} + {%- elif datatype == 'DOUBLE PRECISION' %} CAST('0' as DOUBLE PRECISION) as {{ alias }} + {%- elif datatype == 'BOOLEAN' %} FALSE as {{ alias }} + {%- elif datatype.upper().startswith('HASHTYPE') -%} CAST('{{ unknown_value__HASHTYPE }}' as {{ datatype }}) as {{ alias }} + {%- else %} CAST(NULL as {{ datatype }}) as {{ alias }} {% endif %} {%- elif ghost_record_type == 'error' -%} {%- if datatype == 'TIMESTAMP' or datatype == 'TIMESTAMP WITH LOCAL TIME ZONE' %} {{- datavault4dbt.string_to_timestamp( timestamp_format , end_of_all_times) }} as "{{ column_name }}" - {%- elif datatype == 'DATE'-%} TO_DATE('{{ end_of_all_times_date }}', '{{ date_format }}' ) as "{{ alias }}" + {%- elif datatype == 'DATE'-%} TO_DATE('{{ end_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} {%- elif datatype.upper().startswith('VARCHAR') -%} {%- if col_size is not none -%} {%- set error_dtype_length = col_size | int -%} @@ -117,16 +117,16 @@ {%- set error_dtype_length = inside_parenthesis | int -%} {%- endif -%} {%- if error_dtype_length < error_value__STRING|length -%} - CAST('{{ error_value_alt__STRING }}' as {{ datatype }} ) as "{{ alias }}" + CAST('{{ error_value_alt__STRING }}' as {{ datatype }} ) as {{ alias }} {%- else -%} - CAST('{{ error_value__STRING }}' as {{ datatype }} ) as "{{ alias }}" + CAST('{{ error_value__STRING }}' as {{ datatype }} ) as {{ alias }} {%- endif -%} - {%- elif datatype.upper().startswith('CHAR') -%} CAST('{{ error_value_alt__STRING }}' as {{ datatype }}) as "{{ alias }}" - {%- elif datatype.upper().startswith('DECIMAL') -%} CAST('-1' as {{ datatype }}) as "{{ alias }}" - {%- elif datatype == 'DOUBLE PRECISION' %} CAST('-1' as DOUBLE PRECISION) as "{{ alias }}" - {%- elif datatype == 'BOOLEAN' %} FALSE as "{{ alias }}" - {%- elif datatype.upper().startswith('HASHTYPE') -%} CAST('{{ error_value__HASHTYPE }}' as {{ datatype }}) as "{{ alias }}" - {%- else %} CAST(NULL as {{ datatype }}) as "{{ alias }}" + {%- elif datatype.upper().startswith('CHAR') -%} CAST('{{ error_value_alt__STRING }}' as {{ datatype }}) as {{ alias }} + {%- elif datatype.upper().startswith('DECIMAL') -%} CAST('-1' as {{ datatype }}) as {{ alias }} + {%- elif datatype == 'DOUBLE PRECISION' %} CAST('-1' as DOUBLE PRECISION) as {{ alias }} + {%- elif datatype == 'BOOLEAN' %} FALSE as {{ alias }} + {%- elif datatype.upper().startswith('HASHTYPE') -%} CAST('{{ error_value__HASHTYPE }}' as {{ datatype }}) as {{ alias }} + {%- else %} CAST(NULL as {{ datatype }}) as {{ alias }} {% endif %} {%- else -%} @@ -162,7 +162,7 @@ {%- if ghost_record_type == 'unknown' -%} {%- if datatype in ['TIMESTAMP_NTZ','TIMESTAMP'] %}{{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} AS {{ alias }} {%- elif datatype == 'DATE'-%} TO_DATE('{{ beginning_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} - {%- elif datatype in ['STRING', 'VARCHAR'] %}'{{ unknown_value__STRING }}' AS {{ alias }} + {%- elif datatype in ['STRING', 'VARCHAR','TEXT'] %}'{{ unknown_value__STRING }}' AS {{ alias }} {%- elif datatype == 'CHAR' %}CAST('{{ unknown_value_alt__STRING }}' as {{ datatype }} ) as {{ alias }} {%- elif datatype.upper().startswith('VARCHAR(') or datatype.upper().startswith('CHAR(') -%} {%- if col_size is not none -%} @@ -187,7 +187,7 @@ {%- elif ghost_record_type == 'error' -%} {%- if datatype in ['TIMESTAMP_NTZ','TIMESTAMP'] %}{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} AS {{ alias }} {%- elif datatype == 'DATE'-%} TO_DATE('{{ end_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} - {%- elif datatype in ['STRING','VARCHAR'] %}'{{ error_value__STRING }}' AS {{ alias }} + {%- elif datatype in ['STRING','VARCHAR','TEXT'] %}'{{ error_value__STRING }}' AS {{ alias }} {%- elif datatype == 'CHAR' %}CAST('{{ error_value_alt__STRING }}' as {{ datatype }} ) as {{ alias }} {%- elif datatype.upper().startswith('VARCHAR(') or datatype.upper().startswith('CHAR(') -%} {%- if col_size is not none -%} @@ -304,14 +304,12 @@ {%- macro postgres__ghost_record_per_datatype(column_name, datatype, ghost_record_type, col_size, alias) -%} - {%- set beginning_of_all_times = datavault4dbt.beginning_of_all_times() -%} {%- set end_of_all_times = datavault4dbt.end_of_all_times() -%} {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set beginning_of_all_times_date = var('datavault4dbt.beginning_of_all_times_date', '0001-01-01') -%} {%- set end_of_all_times_date = var('datavault4dbt.end_of_all_times_date', '8888-12-31') -%} - {%- set date_format = var('datavault4dbt.date_format', 'YYYY-mm-dd') -%} {%- set unknown_value__STRING = var('datavault4dbt.unknown_value__STRING', '(unknown)') -%} @@ -319,20 +317,22 @@ {%- set datatype = datatype | string | upper | trim -%} {%- if ghost_record_type == 'unknown' -%} - {%- if datatype == 'TIMESTAMP' %} {{ datavault4dbt.string_to_timestamp( timestamp_format , beginning_of_all_times) }} as {{ alias }} - {%- elif datatype == 'DATE'-%} TO_DATE('{{ beginning_of_all_times_date }}', '{{ date_format }}' ) as "{{ alias }}" - {%- elif datatype == 'STRING' %} '{{unknown_value__STRING}}' as {{ alias }} - {%- elif datatype == 'INT64' %} CAST('0' as INT64) as {{ alias }} - {%- elif datatype == 'FLOAT64' %} CAST('0' as FLOAT64) as {{ alias }} + {%- if 'TIMESTAMP' in datatype %}{{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times_date) }} AS {{ alias }} + {%- elif datatype == 'DATE'-%} TO_DATE('{{ beginning_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} + {%- elif datatype == 'TEXT' %} CAST('{{unknown_value__STRING}}' as TEXT) as {{ alias }} + {%- elif datatype == 'VARCHAR' %} CAST('{{unknown_value__STRING}}' as VARCHAR) as {{ alias }} + {%- elif datatype == 'INTEGER' %} CAST('0' as INTEGER) as {{ alias }} + {%- elif datatype == 'DOUBLE PRECISION' %} CAST('0' as DOUBLE PRECISION) as {{ alias }} {%- elif datatype == 'BOOLEAN' %} CAST('FALSE' as BOOLEAN) as {{ alias }} {%- else %} CAST(NULL as {{ datatype }}) as {{ alias }} {% endif %} -{%- elif ghost_record_type == 'error' -%} - {%- if datatype == 'TIMESTAMP' %} {{ datavault4dbt.string_to_timestamp( timestamp_format , end_of_all_times) }} as {{ alias }} - {%- elif datatype == 'DATE'-%} TO_DATE('{{ end_of_all_times_date }}', '{{ date_format }}' ) as "{{ alias }}" - {%- elif datatype == 'STRING' %} '{{error_value__STRING}}' as {{ alias }} - {%- elif datatype == 'INT64' %} CAST('-1' as INT64) as {{ alias }} - {%- elif datatype == 'FLOAT64' %} CAST('-1' as FLOAT64) as {{ alias }} +{%- elif ghost_record_type == 'error' -%} + {%- if 'TIMESTAMP' in datatype %}{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} as {{ alias }} + {%- elif datatype == 'DATE'-%} TO_DATE('{{ end_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} + {%- elif datatype == 'TEXT' %} CAST('{{error_value__STRING}}' as TEXT) as {{ alias }} + {%- elif datatype == 'VARCHAR' %} CAST('{{error_value__STRING}}' as VARCHAR) as {{ alias }} + {%- elif datatype == 'INTEGER' %} CAST('-1' as INTEGER) as {{ alias }} + {%- elif datatype == 'DOUBLE PRECISION' %} CAST('-1' as DOUBLE PRECISION) as {{ alias }} {%- elif datatype == 'BOOLEAN' %} CAST('FALSE' as BOOLEAN) as {{ alias }} {%- else %} CAST(NULL as {{ datatype }}) as {{ alias }} {% endif %} @@ -356,36 +356,39 @@ {%- set unknown_value__STRING = var('datavault4dbt.unknown_value__STRING', '(unknown)') -%} {%- set error_value__STRING = var('datavault4dbt.error_value__STRING', '(error)') -%} + +{%- set hash = datavault4dbt.hash_method() -%} +{%- set hash_default_values = datavault4dbt.hash_default_values(hash_function=hash) -%} +{%- set hash_alg= hash_default_values['hash_alg'] -%} +{%- set unknown_value__HASHTYPE = hash_default_values['unknown_key'] -%} +{%- set error_value__HASHTYPE = hash_default_values['error_key'] -%} + {%- set datatype = datatype | string | upper | trim -%} {%- if ghost_record_type == 'unknown' -%} - {%- if datatype == 'TIMESTAMP' %} {{ datavault4dbt.string_to_timestamp( timestamp_format , beginning_of_all_times) }} as {{ alias }} - {%- elif datatype == 'DATE'-%} TO_DATE('{{ beginning_of_all_times_date }}', '{{ date_format }}' ) as "{{ alias }}" - {%- elif datatype == 'VARCHAR' %} '{{unknown_value__STRING}}' as {{ alias }} - {%- elif datatype == 'CHARACTER' %} '{{unknown_value__STRING}}' as {{ alias }} - {%- elif datatype == 'INT' %} CAST('0' as INT) as {{ alias }} - {%- elif datatype == 'INT2' %} CAST('0' as INT2) as {{ alias }} - {%- elif datatype == 'INT8' %} CAST('0' as INT8) as {{ alias }} - {%- elif datatype == 'NUMERIC' %} CAST('0' as NUMERIC) as {{ alias }} - {%- elif datatype == 'FLOAT4' %} CAST('0' as FLOAT4) as {{ alias }} - {%- elif datatype == 'FLOAT' %} CAST('0' as FLOAT) as {{ alias }} - {%- elif datatype == 'BOOLEAN' %} CAST('FALSE' as BOOLEAN) as {{ alias }} - {%- elif datatype == 'VARBINARY' %} 'NULL'::varbyte as {{ alias }} + {%- if 'TIMESTAMP' in datatype %}{{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times_date) }} AS {{ alias }} + {%- elif datatype == 'TIMETZ' %} CAST('00:00:01 UTC' as TIMETZ) as {{ alias }} + {%- elif datatype == 'TIME' %} CAST('00:00:01' as TIME) as {{ alias }} + {%- elif datatype == 'DATE'-%} TO_DATE('{{ beginning_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} + {%- elif 'CHAR' in datatype or datatype == 'TEXT' %} '{{unknown_value__STRING}}' as {{ alias }} + {%- elif datatype in ['INTEGER', 'INT', 'INT2', 'INT4', 'INT8', 'SMALLINT', 'BIGINT', 'REAL', 'FLOAT4', 'DOUBLE PRECISION', 'DOUBLE', 'FLOAT', 'FLOAT8'] %} CAST(0 as {{ datatype }}) as {{ alias }} + {%- elif 'DECIMAL' in datatype or 'NUMERIC' in datatype %} CAST(0 as {{ datatype }}) as {{ alias }} + {%- elif datatype in ['BOOLEAN', 'BOOL'] %} CAST('FALSE' as BOOLEAN) as {{ alias }} + {%- elif datatype in ['VARBYTE', 'VARBINARY', 'BINARY VARYING'] %} CAST('{{ unknown_value__HASHTYPE }}' as {{ datatype }}) as {{ alias }} + {%- elif datatype == 'GEOMETRY' %} CAST(ST_POINT(0, 90) as {{ datatype }}) as {{ alias }} {%- else %} CAST(NULL as {{ datatype }}) as {{ alias }} {% endif %} {%- elif ghost_record_type == 'error' -%} - {%- if datatype == 'TIMESTAMP' %} {{ datavault4dbt.string_to_timestamp( timestamp_format , end_of_all_times) }} as {{ alias }} - {%- elif datatype == 'DATE'-%} TO_DATE('{{ end_of_all_times_date }}', '{{ date_format }}' ) as "{{ alias }}" - {%- elif datatype == 'VARCHAR' %} '{{error_value__STRING}}' as {{ alias }} - {%- elif datatype == 'CHARACTER' %} '{{error_value__STRING}}' as {{ alias }} - {%- elif datatype == 'INT' %} CAST('-1' as INT) as {{ alias }} - {%- elif datatype == 'INT2' %} CAST('-1' as INT2) as {{ alias }} - {%- elif datatype == 'INT8' %} CAST('-1' as INT8) as {{ alias }} - {%- elif datatype == 'NUMERIC' %} CAST('-1' as NUMERIC) as {{ alias }} - {%- elif datatype == 'FLOAT4' %} CAST('-1' as FLOAT4) as {{ alias }} - {%- elif datatype == 'FLOAT' %} CAST('-1' as FLOAT) as {{ alias }} - {%- elif datatype == 'BOOLEAN' %} CAST('FALSE' as BOOLEAN) as {{ alias }} - {%- elif datatype == 'VARBINARY' %} 'NULL'::varbyte as {{ alias }} + {%- if 'TIMESTAMP' in datatype %}{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} AS {{ alias }} + {%- elif datatype == 'TIMETZ' %} CAST('23:59:59 UTC' as TIMETZ) as {{ alias }} + {%- elif datatype == 'TIME' %} CAST('23:59:59' as TIME) as {{ alias }} + {%- elif datatype == 'DATE'-%} TO_DATE('{{ end_of_all_times_date }}', '{{ date_format }}' ) as {{ alias }} + {%- elif 'CHAR' in datatype or datatype == 'TEXT' %} '{{error_value__STRING}}' as {{ alias }} + {%- elif datatype in ['INTEGER', 'INT', 'INT2', 'INT4', 'INT8', 'SMALLINT', 'BIGINT', 'REAL', 'FLOAT4', 'DOUBLE PRECISION', 'DOUBLE', 'FLOAT', 'FLOAT8'] %} CAST(-1 as {{ datatype }}) as {{ alias }} + {%- elif 'DECIMAL' in datatype or 'NUMERIC' in datatype %} CAST(-1 as {{ datatype }}) as {{ alias }} + {%- elif datatype in ['BOOLEAN', 'BOOL'] %} CAST('FALSE' as BOOLEAN) as {{ alias }} + {%- elif datatype in ['VARBYTE', 'VARBINARY', 'BINARY VARYING'] %} CAST('{{ error_value__HASHTYPE }}' as {{ datatype }}) as {{ alias }} + {%- elif datatype == 'GEOMETRY' %} CAST(ST_POINT(0, 90) as {{ datatype }}) as {{ alias }} {%- else %} CAST(NULL as {{ datatype }}) as {{ alias }} {% endif %} {%- else -%} @@ -393,5 +396,4 @@ {{ exceptions.raise_compiler_error("Invalid Ghost Record Type. Accepted are 'unknown' and 'error'.") }} {%- endif %} {%- endif -%} - {%- endmacro -%} diff --git a/macros/supporting/hash.sql b/macros/supporting/hash.sql index 6c8cecf0..c6d2ff9c 100644 --- a/macros/supporting/hash.sql +++ b/macros/supporting/hash.sql @@ -223,7 +223,7 @@ {%- endmacro -%} -{%- macro postgres__hash(columns, alias, is_hashdiff, multi_active_key, main_hashkey_column, rtrim_hashdiff) -%} +{%- macro postgres__hash(columns, alias, is_hashdiff, multi_active_key, main_hashkey_column) -%} {%- set hash = var('datavault4dbt.hash', 'MD5') -%} @@ -244,7 +244,11 @@ {%- set unknown_key = hash_default_values['unknown_key'] -%} {%- set error_key = hash_default_values['error_key'] -%} -{%- set attribute_standardise = datavault4dbt.attribute_standardise() %} +{%- if is_hashdiff -%} + {%- set attribute_standardise = datavault4dbt.attribute_standardise(hash_type='hashdiff') %} +{%- else -%} + {%- set attribute_standardise = datavault4dbt.attribute_standardise(hash_type='hashkey') %} +{%- endif -%} {#- If single column to hash -#} @@ -295,7 +299,7 @@ {%- endmacro -%} -{%- macro redshift__hash(columns, alias, is_hashdiff, multi_active_key, main_hashkey_column, rtrim_hashdiff) -%} +{%- macro redshift__hash(columns, alias, is_hashdiff, multi_active_key, main_hashkey_column) -%} {%- set hash = var('datavault4dbt.hash', 'MD5') -%} {%- set concat_string = var('concat_string', '|') -%} @@ -306,7 +310,7 @@ {%- set hashdiff_input_case_sensitive = var('datavault4dbt.hashdiff_input_case_sensitive', TRUE) -%} {#- Select hashing algorithm -#} -{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'VARCHAR') -%} +{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'VARCHAR(32)') -%} {{ log('hash type in hash macro: ' ~ hash_dtype, false) }} {%- set hash_default_values = fromjson(datavault4dbt.hash_default_values(hash_function=hash,hash_datatype=hash_dtype)) -%} {%- set hash_alg = hash_default_values['hash_alg'] -%} diff --git a/macros/supporting/hash_standardization.sql b/macros/supporting/hash_standardization.sql index b65abc20..0c144450 100644 --- a/macros/supporting/hash_standardization.sql +++ b/macros/supporting/hash_standardization.sql @@ -42,7 +42,15 @@ CONCAT('\"', REPLACE(REPLACE(REPLACE(TRIM(CAST([EXPRESSION] AS STRING)), '\\', ' {%- macro postgres__attribute_standardise(hash_type) -%} -CONCAT('"', REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(TRIM(BOTH ' ' FROM CAST([EXPRESSION] AS VARCHAR)), '\\', '\\\\'), '[QUOTE]', '\"'), '[NULL_PLACEHOLDER_STRING]', '--'), '"') +{% if hash_type == 'hashkey' %} + + '"' || REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(TRIM(BOTH ' ' FROM CAST([EXPRESSION] AS VARCHAR)), '\\', '\\\\'), '[QUOTE]', '\"'), '[NULL_PLACEHOLDER_STRING]', '--') || '"' + +{% else %} + + CONCAT('"', REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(TRIM(BOTH ' ' FROM CAST([EXPRESSION] AS VARCHAR)), '\\', '\\\\'), '[QUOTE]', '\"'), '[NULL_PLACEHOLDER_STRING]', '--'), '"') + +{% endif %} {%- endmacro -%} @@ -263,13 +271,13 @@ CONCAT('"', REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(TRIM(BOTH ' ' FROM CAST([EXPRE {%- if case_sensitive -%} {%- set standardise_prefix = "COALESCE({}(NULLIF(CAST(REPLACE(REPLACE(REPLACE(REPLACE(UPPER(".format(hash_alg)-%} {%- if alias is not none -%} - {%- set standardise_suffix = "\n), '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]'))), CAST({} AS {})) AS {}".format(zero_key, datatype, alias)-%} + {%- set standardise_suffix = "\n), '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]')), CAST({} AS {})) AS {}".format(zero_key, datatype, alias)-%} {%- else -%} - {%- set standardise_suffix = "\n), '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]'))), CAST({} AS {}))".format(zero_key, datatype)-%} + {%- set standardise_suffix = "\n), '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]')), CAST({} AS {}))".format(zero_key, datatype)-%} {%- endif -%} {%- else -%} {%- set standardise_prefix = "COALESCE({}(NULLIF(CAST(REPLACE(REPLACE(REPLACE(REPLACE(".format(hash_alg)-%} - {%- set standardise_suffix = "\n, '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]'))), CAST({} AS {})) AS {}".format(zero_key, datatype, alias)-%} + {%- set standardise_suffix = "\n, '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]')), CAST({} AS {})) AS {}".format(zero_key, datatype, alias)-%} {%- endif -%} {%- endif -%} @@ -650,16 +658,16 @@ CONCAT('"', REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(TRIM(BOTH ' ' FROM CAST([EXPRE {%- set standardise_prefix = "COALESCE(LOWER({}(LISTAGG(NULLIF(CAST(REPLACE(REPLACE(REPLACE(REPLACE(".format(hash_alg)-%} {%- if alias is not none -%} - {%- set standardise_suffix = "\n), '\\n', '') \n, '\\t', '') \n, '\\v', '') \n, '\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {}))), {}) AS {}".format(multi_active_key,zero_key, alias)-%} + {%- set standardise_suffix = "\n, '\\n', '') \n, '\\t', '') \n, '\\v', '') \n, '\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {}))), {}) AS {}".format(multi_active_key,zero_key, alias)-%} {%- else -%} - {%- set standardise_suffix = "\n), '\\n', '') \n, '\\t', '') \n, '\\v', '') \n, '\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {}))), {})".format(multi_active_key,zero_key)-%} + {%- set standardise_suffix = "\n, '\\n', '') \n, '\\t', '') \n, '\\v', '') \n, '\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {}))), {})".format(multi_active_key,zero_key)-%} {%- endif -%} {%- endif -%} {%- else -%} {%- if case_sensitive -%} - {%- set standardise_prefix = "COALESCE({}(STRING_AGG(NULLIF(CAST(REPLACE(REPLACE(REPLACE(REPLACE(UPPER(".format(hash_alg)-%} + {%- set standardise_prefix = "COALESCE({}(LISTAGG(NULLIF(CAST(REPLACE(REPLACE(REPLACE(REPLACE(UPPER(".format(hash_alg)-%} {%- if alias is not none -%} {%- set standardise_suffix = "\n), '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {})), {}) AS {}".format(multi_active_key,zero_key, alias)-%} @@ -667,10 +675,10 @@ CONCAT('"', REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(TRIM(BOTH ' ' FROM CAST([EXPRE {%- set standardise_suffix = "\n)), '\\n', '') \n, '\\t', '') \n, '\\v', '') \n, '\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {})), {})".format(multi_active_key,zero_key)-%} {%- endif -%} {%- else -%} - {%- set standardise_prefix = "COALESCE({}(STRING_AGG(NULLIF(CAST(REPLACE(REPLACE(REPLACE(REPLACE(".format(hash_alg)-%} + {%- set standardise_prefix = "COALESCE({}(LISTAGG(NULLIF(CAST(REPLACE(REPLACE(REPLACE(REPLACE(".format(hash_alg)-%} {%- if alias is not none -%} - {%- set standardise_suffix = "\n), '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {})), {}) AS {}".format(multi_active_key,zero_key, alias)-%} + {%- set standardise_suffix = "\n, '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {})), {}) AS {}".format(multi_active_key,zero_key, alias)-%} {%- else -%} {%- set standardise_suffix = "\n), '\\\\n', '') \n, '\\\\t', '') \n, '\\\\v', '') \n, '\\\\r', '') AS VARCHAR), '[ALL_NULL]')) within group (ORDER BY {})), {})".format(multi_active_key,zero_key)-%} {%- endif -%} diff --git a/macros/supporting/string_default_dtype.sql b/macros/supporting/string_default_dtype.sql new file mode 100644 index 00000000..1f5ee6bc --- /dev/null +++ b/macros/supporting/string_default_dtype.sql @@ -0,0 +1,209 @@ +{%- macro string_default_dtype(type=none) %} + + {{ return( adapter.dispatch('string_default_dtype', 'datavault4dbt')(type=type) ) }} + +{%- endmacro -%} + + +{%- macro default__string_default_dtype(type) %} + +{%- if type == 'rsrc' %} + {%- set global_var = var('datavault4dbt.rsrc_default_dtype', none) -%} +{%- elif type == 'stg' %} + {%- set global_var = var('datavault4dbt.stg_default_dtype', none) -%} +{%- elif type == 'derived_columns' %} + {%- set global_var = var('datavault4dbt.derived_columns_default_dtype', none) -%} +{%- else %} + {%- set global_var = none %} +{%- endif %} + +{%- set string_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'bigquery' in global_var.keys()|map('lower') -%} + {% set string_default_dtype = global_var['bigquery'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt." ~ type ~ "_default_dtype' to a dictionary, but have not included the adapter you use (bigquery) as a key. Applying the default value.") -%} + {% endif %} + {%- set string_default_dtype = "STRING" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set string_default_dtype = global_var -%} +{%- else -%} + {%- set string_default_dtype = "STRING" -%} +{%- endif -%} + +{{ return(string_default_dtype) }} + +{%- endmacro -%} + + +{%- macro snowflake__string_default_dtype(type) %} + +{%- if type == 'rsrc' %} + {%- set global_var = var('datavault4dbt.rsrc_default_dtype', none) -%} +{%- elif type == 'stg' %} + {%- set global_var = var('datavault4dbt.stg_default_dtype', none) -%} +{%- elif type == 'derived_columns' %} + {%- set global_var = var('datavault4dbt.derived_columns_default_dtype', none) -%} +{%- else %} + {%- set global_var = none %} +{%- endif %} + +{%- set string_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'snowflake' in global_var.keys()|map('lower') -%} + {% set string_default_dtype = global_var['snowflake'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt." ~ type ~ "_default_dtype' to a dictionary, but have not included the adapter you use (snowflake) as a key. Applying the default value.") -%} + {% endif %} + {%- set string_default_dtype = "VARCHAR" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set string_default_dtype = global_var -%} +{%- else -%} + {%- set string_default_dtype = "VARCHAR" -%} +{%- endif -%} + +{{ return(string_default_dtype) }} + +{%- endmacro -%} + + +{%- macro exasol__string_default_dtype(type) %} + +{%- if type == 'rsrc' %} + {%- set global_var = var('datavault4dbt.rsrc_default_dtype', none) -%} +{%- elif type == 'stg' %} + {%- set global_var = var('datavault4dbt.stg_default_dtype', none) -%} +{%- elif type == 'derived_columns' %} + {%- set global_var = var('datavault4dbt.derived_columns_default_dtype', none) -%} +{%- else %} + {%- set global_var = none %} +{%- endif %} + +{%- set string_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'exasol' in global_var.keys()|map('lower') -%} + {% set string_default_dtype = global_var['exasol'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt." ~ type ~ "_default_dtype' to a dictionary, but have not included the adapter you use (exasol) as a key. Applying the default value.") -%} + {% endif %} + {%- set string_default_dtype = "VARCHAR (2000000) UTF8" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set string_default_dtype = global_var -%} +{%- else -%} + {%- set string_default_dtype = "VARCHAR (2000000) UTF8" -%} +{%- endif -%} + +{{ return(string_default_dtype) }} + +{%- endmacro -%} + + +{%- macro synapse__string_default_dtype(type) %} + +{%- if type == 'rsrc' %} + {%- set global_var = var('datavault4dbt.rsrc_default_dtype', none) -%} +{%- elif type == 'stg' %} + {%- set global_var = var('datavault4dbt.stg_default_dtype', none) -%} +{%- elif type == 'derived_columns' %} + {%- set global_var = var('datavault4dbt.derived_columns_default_dtype', none) -%} +{%- else %} + {%- set global_var = none %} +{%- endif %} + +{%- set string_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'synapse' in global_var.keys()|map('lower') -%} + {% set string_default_dtype = global_var['synapse'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt." ~ type ~ "_default_dtype' to a dictionary, but have not included the adapter you use (synapse) as a key. Applying the default value.") -%} + {% endif %} + {%- set string_default_dtype = "VARCHAR" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set string_default_dtype = global_var -%} +{%- else -%} + {%- set string_default_dtype = "VARCHAR" -%} +{%- endif -%} + +{{ return(string_default_dtype) }} + +{%- endmacro -%} + + +{%- macro postgres__string_default_dtype(type) %} + +{%- if type == 'rsrc' %} + {%- set global_var = var('datavault4dbt.rsrc_default_dtype', none) -%} +{%- elif type == 'stg' %} + {%- set global_var = var('datavault4dbt.stg_default_dtype', none) -%} +{%- elif type == 'derived_columns' %} + {%- set global_var = var('datavault4dbt.derived_columns_default_dtype', none) -%} +{%- else %} + {%- set global_var = none %} +{%- endif %} + +{%- set string_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'postgres' in global_var.keys()|map('lower') -%} + {% set string_default_dtype = global_var['postgres'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt." ~ type ~ "_default_dtype' to a dictionary, but have not included the adapter you use (postgres) as a key. Applying the default value.") -%} + {% endif %} + {%- set string_default_dtype = "VARCHAR" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set string_default_dtype = global_var -%} +{%- else -%} + {%- set string_default_dtype = "VARCHAR" -%} +{%- endif -%} + +{{ return(string_default_dtype) }} + +{%- endmacro -%} + + +{%- macro redshift__string_default_dtype(type) %} + +{%- if type == 'rsrc' %} + {%- set global_var = var('datavault4dbt.rsrc_default_dtype', none) -%} +{%- elif type == 'stg' %} + {%- set global_var = var('datavault4dbt.stg_default_dtype', none) -%} +{%- elif type == 'derived_columns' %} + {%- set global_var = var('datavault4dbt.derived_columns_default_dtype', none) -%} +{%- else %} + {%- set global_var = none %} +{%- endif %} + +{%- set string_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'redshift' in global_var.keys()|map('lower') -%} + {% set string_default_dtype = global_var['redshift'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt." ~ type ~ "_default_dtype' to a dictionary, but have not included the adapter you use (redshift) as a key. Applying the default value.") -%} + {% endif %} + {%- set string_default_dtype = "VARCHAR" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set string_default_dtype = global_var -%} +{%- else -%} + {%- set string_default_dtype = "VARCHAR" -%} +{%- endif -%} + +{{ return(string_default_dtype) }} + +{%- endmacro -%} diff --git a/macros/supporting/string_to_timestamp.sql b/macros/supporting/string_to_timestamp.sql index e9ca08b8..d5d5a065 100644 --- a/macros/supporting/string_to_timestamp.sql +++ b/macros/supporting/string_to_timestamp.sql @@ -24,6 +24,5 @@ {%- endmacro -%} {%- macro redshift__string_to_timestamp(format, timestamp) -%} - TO_TIMESTAMP('{{ timestamp }}', '{{ format }}') + CAST(TO_TIMESTAMP('{{ timestamp }}', '{{ format }}') AS {{ datavault4dbt.timestamp_default_dtype() }}) {%- endmacro -%} - diff --git a/macros/supporting/timestamp_default_dtype.sql b/macros/supporting/timestamp_default_dtype.sql new file mode 100644 index 00000000..fb5facad --- /dev/null +++ b/macros/supporting/timestamp_default_dtype.sql @@ -0,0 +1,155 @@ +{%- macro timestamp_default_dtype() %} + + {{ return( adapter.dispatch('timestamp_default_dtype', 'datavault4dbt')() ) }} + +{%- endmacro -%} + + +{%- macro default__timestamp_default_dtype() %} + +{%- set global_var = var('datavault4dbt.timestamp_default_dtype', none) -%} +{%- set timestamp_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'bigquery' in global_var.keys()|map('lower') -%} + {% set timestamp_default_dtype = global_var['bigquery'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.timestamp_default_dtype' to a dictionary, but have not included the adapter you use (bigquery) as a key. Applying the default value.") -%} + {% endif %} + {%- set timestamp_default_dtype = "TIMESTAMP" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set timestamp_default_dtype = global_var -%} +{%- else -%} + {%- set timestamp_default_dtype = "TIMESTAMP" -%} +{%- endif -%} + +{{ return(timestamp_default_dtype) }} + +{%- endmacro -%} + + +{%- macro snowflake__timestamp_default_dtype() %} + +{%- set global_var = var('datavault4dbt.timestamp_default_dtype', none) -%} +{%- set timestamp_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'snowflake' in global_var.keys()|map('lower') -%} + {% set timestamp_default_dtype = global_var['snowflake'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.timestamp_default_dtype' to a dictionary, but have not included the adapter you use (snowflake) as a key. Applying the default value.") -%} + {% endif %} + {%- set timestamp_default_dtype = "TIMESTAMP_TZ" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set timestamp_default_dtype = global_var -%} +{%- else -%} + {%- set timestamp_default_dtype = "TIMESTAMP_TZ" -%} +{%- endif -%} + +{{ return(timestamp_default_dtype) }} + +{%- endmacro -%} + + +{%- macro exasol__timestamp_default_dtype() %} + +{%- set global_var = var('datavault4dbt.timestamp_default_dtype', none) -%} +{%- set timestamp_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'exasol' in global_var.keys()|map('lower') -%} + {% set timestamp_default_dtype = global_var['exasol'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.timestamp_default_dtype' to a dictionary, but have not included the adapter you use (exasol) as a key. Applying the default value.") -%} + {% endif %} + {%- set timestamp_default_dtype = "TIMESTAMP(3) WITH LOCAL TIME ZONE" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set timestamp_default_dtype = global_var -%} +{%- else -%} + {%- set timestamp_default_dtype = "TIMESTAMP(3) WITH LOCAL TIME ZONE" -%} +{%- endif -%} + +{{ return(timestamp_default_dtype) }} + +{%- endmacro -%} + + +{%- macro synapse__timestamp_default_dtype() %} + +{%- set global_var = var('datavault4dbt.timestamp_default_dtype', none) -%} +{%- set timestamp_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'synapse' in global_var.keys()|map('lower') -%} + {% set timestamp_default_dtype = global_var['synapse'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.timestamp_default_dtype' to a dictionary, but have not included the adapter you use (synapse) as a key. Applying the default value.") -%} + {% endif %} + {%- set timestamp_default_dtype = "datetimeoffset" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set timestamp_default_dtype = global_var -%} +{%- else -%} + {%- set timestamp_default_dtype = "datetimeoffset" -%} +{%- endif -%} + +{{ return(timestamp_default_dtype) }} + +{%- endmacro -%} + + +{%- macro postgres__timestamp_default_dtype() %} + +{%- set global_var = var('datavault4dbt.timestamp_default_dtype', none) -%} +{%- set timestamp_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'postgres' in global_var.keys()|map('lower') -%} + {% set timestamp_default_dtype = global_var['postgres'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.timestamp_default_dtype' to a dictionary, but have not included the adapter you use (postgres) as a key. Applying the default value.") -%} + {% endif %} + {%- set timestamp_default_dtype = "TIMESTAMPTZ" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set timestamp_default_dtype = global_var -%} +{%- else -%} + {%- set timestamp_default_dtype = "TIMESTAMPTZ" -%} +{%- endif -%} + +{{ return(timestamp_default_dtype) }} + +{%- endmacro -%} + + +{%- macro redshift__timestamp_default_dtype() %} + +{%- set global_var = var('datavault4dbt.timestamp_default_dtype', none) -%} +{%- set timestamp_default_dtype = '' -%} + +{%- if global_var is mapping -%} + {%- if 'redshift' in global_var.keys()|map('lower') -%} + {% set timestamp_default_dtype = global_var['redshift'] %} + {%- else -%} + {%- if execute -%} + {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.timestamp_default_dtype' to a dictionary, but have not included the adapter you use (redshift) as a key. Applying the default value.") -%} + {% endif %} + {%- set timestamp_default_dtype = "TIMESTAMPTZ" -%} + {% endif %} +{%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} + {%- set timestamp_default_dtype = global_var -%} +{%- else -%} + {%- set timestamp_default_dtype = "TIMESTAMPTZ" -%} +{%- endif -%} + +{{ return(timestamp_default_dtype) }} + +{%- endmacro -%} diff --git a/macros/supporting/timestamp_format.sql b/macros/supporting/timestamp_format.sql index 67906e1f..965a65be 100644 --- a/macros/supporting/timestamp_format.sql +++ b/macros/supporting/timestamp_format.sql @@ -117,12 +117,12 @@ {%- if execute -%} {%- do exceptions.warn("Warning: You have set the global variable 'datavault4dbt.timestamp_format' to a dictionary, but have not included the adapter you use (postgres) as a key. Applying the default value.") -%} {% endif %} - {%- set timestamp_format = "%Y-%m-%dT%H-%M-%S" -%} + {%- set timestamp_format = "YYYY-MM-DD HH24:MI:SS" -%} {% endif %} {%- elif global_var is not mapping and datavault4dbt.is_something(global_var) -%} {%- set timestamp_format = global_var -%} {%- else -%} - {%- set timestamp_format = "%Y-%m-%dT%H-%M-%S" -%} + {%- set timestamp_format = "YYYY-MM-DD HH24:MI:SS" -%} {%- endif -%} {{ return(timestamp_format) }} diff --git a/macros/tables/bigquery/rec_track_sat.sql b/macros/tables/bigquery/rec_track_sat.sql index d2d0c530..2d842a28 100644 --- a/macros/tables/bigquery/rec_track_sat.sql +++ b/macros/tables/bigquery/rec_track_sat.sql @@ -9,8 +9,12 @@ {%- set rsrc_error = var('datavault4dbt.default_error_rsrc', 'ERROR') -%} {# Setting the rsrc and stg_alias default datatype and length #} -{%- set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') -%} -{%- set stg_default_dtype = var('datavault4dbt.stg_default_dtype', 'STRING') -%} +{%- set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') -%} +{%- set stg_default_dtype = datavault4dbt.string_default_dtype(type='stg') -%} + +{# Setting the ldts to the default datatype for timestamps #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} + {%- set ns = namespace(last_cte = '', source_included_before = {}, source_models_rsrc_dict={}, has_rsrc_static_defined=true) -%} {%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} @@ -132,7 +136,7 @@ WITH {%- for rsrc_static in rsrc_statics %} SELECT DISTINCT {{ hk_column }} AS {{ tracked_hashkey }}, - {{ src_ldts }}, + CAST({{ src_ldts }} AS {{ldts_default_dtype }}) AS {{ src_ldts }}, CAST('{{ rsrc_static }}' AS {{ rsrc_default_dtype }} ) AS {{ src_rsrc }}, CAST(UPPER('{{ source_model.name }}') AS {{ stg_default_dtype }}) AS {{ src_stg }} FROM {{ ref(source_model.name) }} src @@ -153,7 +157,7 @@ WITH src_new_{{ source_number}} AS ( SELECT DISTINCT {{ hk_column }} AS {{ tracked_hashkey }}, - {{ src_ldts }}, + CAST({{ src_ldts }} AS {{ldts_default_dtype }}) AS {{ src_ldts }}, CAST({{ src_rsrc }} AS {{ rsrc_default_dtype }}) AS {{ src_rsrc }}, CAST(UPPER('{{ source_model.name }}') AS {{ stg_default_dtype }}) AS {{ src_stg }} FROM {{ ref(source_model.name) }} src diff --git a/macros/tables/bigquery/ref_hub.sql b/macros/tables/bigquery/ref_hub.sql index 2283bc29..14014c71 100644 --- a/macros/tables/bigquery/ref_hub.sql +++ b/macros/tables/bigquery/ref_hub.sql @@ -141,7 +141,10 @@ WITH {{ src_ldts }}, {{ src_rsrc }} FROM {{ ref(source_model.name) }} src - + WHERE NOT ( + {% for ref_key in source_model['ref_keys'] -%} + {{ ref_key}} IS NULL {%- if not loop.last %} AND {% endif -%} + {% endfor -%} ) {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number] %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON ({%- for rsrc_static in rsrc_statics -%} @@ -149,7 +152,7 @@ WITH {%- if not loop.last -%} OR {% endif -%} {%- endfor %}) - WHERE src.{{ src_ldts }} > max.max_ldts + AND src.{{ src_ldts }} > max.max_ldts {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} diff --git a/macros/tables/bigquery/ref_sat_v1.sql b/macros/tables/bigquery/ref_sat_v1.sql index 073c1d32..7a9473a3 100644 --- a/macros/tables/bigquery/ref_sat_v1.sql +++ b/macros/tables/bigquery/ref_sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(ref_sat_v0) -%} diff --git a/macros/tables/bigquery/ref_table.sql b/macros/tables/bigquery/ref_table.sql index 1dd47105..59bc49dd 100644 --- a/macros/tables/bigquery/ref_table.sql +++ b/macros/tables/bigquery/ref_table.sql @@ -57,7 +57,7 @@ dates AS ( {{ src_ldts }} FROM {{ ref(satellite|string) }} WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} - {% if not loop.last -%} UNION {% endif %} + {% if not loop.last -%} UNION DISTINCT {% endif %} {%- endfor %} ) @@ -142,4 +142,4 @@ ref_table AS ( SELECT * FROM ref_table -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/sat_v1.sql b/macros/tables/bigquery/sat_v1.sql index 52c82455..0258c138 100644 --- a/macros/tables/bigquery/sat_v1.sql +++ b/macros/tables/bigquery/sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(sat_v0) -%} diff --git a/macros/tables/exasol/control_snap_v0.sql b/macros/tables/exasol/control_snap_v0.sql index 2e0bfcce..6bc85c29 100644 --- a/macros/tables/exasol/control_snap_v0.sql +++ b/macros/tables/exasol/control_snap_v0.sql @@ -28,7 +28,7 @@ initial_timestamps AS src.* FROM initial_timestamps src - WHERE src.sdts > (SELECT MAX(t."{{ sdts_alias }}") FROM {{ this }} t) + WHERE src.sdts > (SELECT MAX(t.{{ sdts_alias }}) FROM {{ this }} t) {%- set last_cte = 'incremental_cte' -%} ) @@ -37,7 +37,7 @@ initial_timestamps AS , enriched_timestamps AS ( SELECT - sdts as "{{ sdts_alias }}", + sdts as {{ sdts_alias }}, TRUE as force_active, sdts AS replacement_sdts, CONCAT('Snapshot ', DATE_TRUNC('day', TO_DATE(sdts))) AS caption, diff --git a/macros/tables/exasol/control_snap_v1.sql b/macros/tables/exasol/control_snap_v1.sql index f9fa8ba5..b6531154 100644 --- a/macros/tables/exasol/control_snap_v1.sql +++ b/macros/tables/exasol/control_snap_v1.sql @@ -48,34 +48,33 @@ virtual_logic AS ( CASE WHEN {% if 'daily' in log_logic.keys() %} - {%- if log_logic['daily']['forever'] == 'TRUE' -%} + {%- if log_logic['daily']['forever'] is true -%} {%- set ns.forever_status = 'TRUE' -%} - (1=1) - {%- else %} + (1=1) + {%- else %} {%- set daily_duration = log_logic['daily']['duration'] -%} {%- set daily_unit = log_logic['daily']['unit'] -%} (DATE_TRUNC('DAY', TO_DATE(c.{{ sdts_alias }})) BETWEEN ADD_{{ daily_unit}}S(CURRENT_DATE, -{{ daily_duration }}) AND CURRENT_DATE) {%- endif -%} {%- endif %} - {%- if 'monthly' in log_logic.keys() %} + {%- if 'weekly' in log_logic.keys() %} OR {%- if log_logic['weekly']['forever'] is true -%} {%- set ns.forever_status = 'TRUE' -%} (c.is_weekly = TRUE) {%- else %} - {%- set weekly_duration = log_logic['weekly']['duration'] -%} {%- set weekly_unit = log_logic['weekly']['unit'] -%} - ((DATE_TRUNC('DAY', TO_DATE(c.{{ sdts_alias }})) BETWEEN ADD_{{ weekly_unit}}S(CURRENT_DATE, -{{ weekly_duration }}) AND CURRENT_DATE) AND (c.is_weekly = TRUE)) {%- endif -%} {% endif -%} - {%- if 'monthly' in log_logic.keys() %} OR - {%- if log_logic['monthly']['forever'] == 'TRUE' -%} + {%- if 'monthly' in log_logic.keys() %} + OR + {%- if log_logic['monthly']['forever'] is true -%} {%- set ns.forever_status = 'TRUE' %} (c.is_monthly = TRUE) {%- else %} diff --git a/macros/tables/exasol/hub.sql b/macros/tables/exasol/hub.sql index 6b6f2c2e..af048fb0 100644 --- a/macros/tables/exasol/hub.sql +++ b/macros/tables/exasol/hub.sql @@ -1,4 +1,4 @@ -{%- macro exasol__hub(hashkey, business_keys, src_ldts, src_rsrc, source_models) -%} +{%- macro exasol__hub(hashkey, business_keys, src_ldts, src_rsrc, source_models, disable_hwm) -%} {%- set end_of_all_times = datavault4dbt.end_of_all_times() -%} {%- set timestamp_format = datavault4dbt.timestamp_format() -%} @@ -12,51 +12,15 @@ {# If no specific hk_column is defined for each source, we apply the values set in the hashkey variable. #} {# If no rsrc_static parameter is defined in ANY of the source models then the whole code block of record_source performance lookup is not executed #} {# For the use of record_source performance lookup it is required that every source model has the parameter rsrc_static defined and it cannot be an empty string #} -{%- if source_models is not mapping -%} +{%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} {%- set source_models = {source_models: {}} -%} {%- endif -%} -{%- for source_model in source_models.keys() -%} - - {%- if 'hk_column' not in source_models[source_model].keys() -%} - {%- do source_models[source_model].update({'hk_column': hashkey}) -%} - {%- endif -%} - - {%- if 'bk_columns' in source_models[source_model].keys() -%} - {%- set bk_column_input = source_models[source_model]['bk_columns'] -%} - - {%- if not (bk_column_input is iterable and bk_column_input is not string) -%} - {%- set bk_column_input = [bk_column_input] -%} - {%- endif -%} - - {%- do source_models[source_model].update({'bk_columns': bk_column_input}) -%} - {%- elif not datavault4dbt.is_list(bk_column_input) -%} - {%- set bk_list = datavault4dbt.expand_column_list(columns=[bk_column_input]) -%} - {%- do source_models[source_model].update({'bk_columns': bk_list}) -%} - {%- else -%}{%- do source_models[source_model].update({'bk_columns': business_keys}) -%} - {%- endif -%} - - {%- if 'rsrc_static' not in source_models[source_model].keys() -%} - {%- set ns.has_rsrc_static_defined = false -%} - {%- else -%} - - {%- if not (source_models[source_model]['rsrc_static'] is iterable and source_models[source_model]['rsrc_static'] is not string) -%} - - {%- if source_models[source_model]['rsrc_static'] == '' or source_models[source_model]['rsrc_static'] is none -%} - {%- if execute -%} - {{ exceptions.raise_compiler_error("If rsrc_static is defined -> it must not be an empty string ") }} - {%- endif %} - {%- else -%} - {%- do ns.source_models_rsrc_dict.update({source_model : [source_models[source_model]['rsrc_static']] } ) -%} - {%- endif -%} - - {%- elif source_models[source_model]['rsrc_static'] is iterable -%} - {%- do ns.source_models_rsrc_dict.update({source_model : source_models[source_model]['rsrc_static'] } ) -%} - {%- endif -%} - - {%- endif -%} - -{%- endfor -%} +{%- set source_model_values = fromjson(datavault4dbt.source_model_processing(source_models=source_models, parameters={'hk_column':hashkey}, business_keys=business_keys)) -%} +{%- set source_models = source_model_values['source_model_list'] -%} +{%- set ns.has_rsrc_static_defined = source_model_values['has_rsrc_static_defined'] -%} +{%- set ns.source_models_rsrc_dict = source_model_values['source_models_rsrc_dict'] -%} +{{ log('source_models: '~source_models, false) }} {%- set final_columns_to_select = [hashkey] + business_keys + [src_ldts] + [src_rsrc] -%} @@ -73,18 +37,20 @@ WITH FROM {{ this }} ), - {%- if ns.has_rsrc_static_defined -%} - {% for source_model in source_models.keys() %} + {%- if ns.has_rsrc_static_defined and not disable_hwm -%} + {% for source_model in source_models %} {# Create a query with a rsrc_static column with each rsrc_static for each source model. #} - {%- set source_number = loop.index | string -%} - {%- set rsrc_statics = ns.source_models_rsrc_dict[source_model] -%} + {%- set source_number = source_model.id | string -%} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number] -%} + + {{log('rsrc_statics: '~ rsrc_statics, false) }} {%- set rsrc_static_query_source -%} SELECT count(*) FROM ( {%- for rsrc_static in rsrc_statics -%} - SELECT {{ this }}.{{ src_rsrc }}, + SELECT t.{{ src_rsrc }}, '{{ rsrc_static }}' AS rsrc_static - FROM {{ this }} + FROM {{ this }} t WHERE {{ src_rsrc }} like '{{ rsrc_static }}' {%- if not loop.last %} UNION ALL @@ -93,12 +59,14 @@ WITH ) {% endset %} + {{ log('rsrc static query: '~rsrc_static_query_source, false) }} + rsrc_static_{{ source_number }} AS ( {%- for rsrc_static in rsrc_statics -%} SELECT - {{ this }}.*, + t.*, '{{ rsrc_static }}' AS rsrc_static - FROM {{ this }} + FROM {{ this }} t WHERE {{ src_rsrc }} like '{{ rsrc_static }}' {%- if not loop.last %} UNION ALL @@ -122,17 +90,17 @@ WITH {%- endif -%} - {%- do ns.source_included_before.update({source_model: source_in_target}) -%} + {%- do ns.source_included_before.update({source_model.id: source_in_target}) -%} {% endfor -%} - {%- if source_models.keys() | length > 1 %} + {%- if source_models | length > 1 %} rsrc_static_union AS ( {# Create one unionized table over all sources. It will be the same as the already existing hub, but extended by the rsrc_static column. #} - {% for source_model in source_models.keys() %} - {%- set source_number = loop.index | string -%} + {% for source_model in source_models %} + {%- set source_number = source_model.id | string -%} SELECT rsrc_static_{{ source_number }}.* FROM rsrc_static_{{ source_number }} @@ -155,48 +123,37 @@ WITH GROUP BY rsrc_static ), - {%- else -%} - {%- if source_models.keys() | length == 1 %} - - max_ldts_single_src AS ( - {# Calculate the max load date timestamp of the whole table when there is only one source. #} - - SELECT - MAX({{ src_ldts }}) as max_ldts - FROM {{ this }} - WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} - ), - {%- endif %} {%- endif %} {% endif -%} -{% for source_model in source_models.keys() %} +{% for source_model in source_models %} - {%- set source_number = loop.index | string -%} + {%- set source_number = source_model.id | string -%} {%- if ns.has_rsrc_static_defined -%} - {%- set rsrc_statics = ns.source_models_rsrc_dict[source_model] -%} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number|string] -%} {%- endif -%} - {%- if 'hk_column' not in source_models[source_model].keys() %} + {%- if 'hk_column' not in source_model.keys() %} {%- set hk_column = hashkey -%} {%- else -%} - {%- set hk_column = source_models[source_model]['hk_column'] -%} + {%- set hk_column = source_model['hk_column'] -%} {% endif %} src_new_{{ source_number }} AS ( SELECT {{ hk_column }} AS {{ hashkey }}, - {% for bk in source_models[source_model]['bk_columns'] -%} + {% for bk in source_model['bk_columns'] -%} {{ bk }}, {% endfor -%} {{ src_ldts }}, {{ src_rsrc }} - FROM {{ ref(source_model) }} src + FROM {{ ref(source_model.name) }} src + {{ log('rsrc_statics defined?: ' ~ ns.source_models_rsrc_dict[source_number|string], false) }} - {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_model] %} + {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number|int] and not disable_hwm %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON ({%- for rsrc_static in rsrc_statics -%} max.rsrc_static = '{{ rsrc_static }}' @@ -204,8 +161,12 @@ WITH {% endif -%} {%- endfor %}) WHERE src.{{ src_ldts }} > max.max_ldts - {%- elif is_incremental() and source_models.keys() | length == 1 and not ns.has_rsrc_static_defined %} - WHERE src.{{ src_ldts }} > (SELECT max.max_ldts FROM max_ldts_single_src max) + {%- elif is_incremental() and source_models | length == 1 and not ns.has_rsrc_static_defined and not disable_hwm %} + WHERE src.{{ src_ldts }} > ( + SELECT MAX({{ src_ldts }}) + FROM {{ this }} + WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} + ) {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} @@ -213,18 +174,18 @@ WITH ), {%- endfor -%} -{%- if source_models.keys() | length > 1 %} +{%- if source_models | length > 1 %} source_new_union AS ( - {%- for source_model in source_models.keys() -%} + {%- for source_model in source_models -%} - {%- set source_number = loop.index | string -%} + {%- set source_number = source_model.id | string -%} SELECT {{ hashkey }}, - {% for bk in source_models[source_model]['bk_columns'] -%} + {% for bk in source_model['bk_columns'] -%} {{ bk }} AS {{ business_keys[loop.index - 1] }}, {% endfor -%} diff --git a/macros/tables/exasol/link.sql b/macros/tables/exasol/link.sql index b760f963..626ade7c 100644 --- a/macros/tables/exasol/link.sql +++ b/macros/tables/exasol/link.sql @@ -1,4 +1,4 @@ -{%- macro exasol__link(link_hashkey, foreign_hashkeys, source_models, src_ldts, src_rsrc) -%} +{%- macro exasol__link(link_hashkey, foreign_hashkeys, source_models, src_ldts, src_rsrc, disable_hwm) -%} {%- if not (foreign_hashkeys is iterable and foreign_hashkeys is not string) -%} @@ -16,49 +16,15 @@ {# If no specific link_hk and fk_columns are defined for each source, we apply the values set in the link_hashkey and foreign_hashkeys variable. #} {# If no rsrc_static parameter is defined in ANY of the source models then the whole code block of record_source performance lookup is not executed #} {# For the use of record_source performance lookup it is required that every source model has the parameter rsrc_static defined and it cannot be an empty string #} -{%- if source_models is not mapping -%} +{%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} {%- set source_models = {source_models: {}} -%} {%- endif -%} -{%- for source_model in source_models.keys() %} - - {%- if 'fk_columns' not in source_models[source_model].keys() -%} - - {%- do source_models[source_model].update({'fk_columns': foreign_hashkeys}) -%} - - {%- endif -%} - - {%- if 'link_hk' not in source_models[source_model].keys() -%} - - {%- do source_models[source_model].update({'link_hk': link_hashkey}) -%} - - {%- endif -%} - - {%- if 'rsrc_static' not in source_models[source_model].keys() -%} - - {%- set ns.has_rsrc_static_defined = false -%} - - {%- else -%} - - {%- if not (source_models[source_model]['rsrc_static'] is iterable and source_models[source_model]['rsrc_static'] is not string) -%} - - {%- if source_models[source_model]['rsrc_static'] == '' or source_models[source_model]['rsrc_static'] is none -%} - - {%- if execute -%} - {{ exceptions.raise_compiler_error("If rsrc_static is defined -> it must not be an empty string ") }} - {%- endif %} - - {%- else -%} - {%- do ns.source_models_rsrc_dict.update({source_model : [source_models[source_model]['rsrc_static']] } ) -%} - {%- endif -%} - - {%- elif source_models[source_model]['rsrc_static'] is iterable -%} - {%- do ns.source_models_rsrc_dict.update({source_model : source_models[source_model]['rsrc_static'] } ) -%} - {%- endif -%} - - {%- endif -%} - -{% endfor %} +{%- set source_model_values = fromjson(datavault4dbt.source_model_processing(source_models=source_models, parameters={'link_hk':link_hashkey}, foreign_hashkeys=foreign_hashkeys)) -%} +{%- set source_models = source_model_values['source_model_list'] -%} +{%- set ns.has_rsrc_static_defined = source_model_values['has_rsrc_static_defined'] -%} +{%- set ns.source_models_rsrc_dict = source_model_values['source_models_rsrc_dict'] -%} +{{ log('source_models: '~source_models, false) }} {%- set final_columns_to_select = [link_hashkey] + foreign_hashkeys + [src_ldts] + [src_rsrc] -%} @@ -75,19 +41,20 @@ WITH FROM {{ this }} ), - {%- if ns.has_rsrc_static_defined -%} - {% for source_model in source_models.keys() %} + {%- if ns.has_rsrc_static_defined and not disable_hwm -%} + {% for source_model in source_models %} {# Create a query with a rsrc_static column with each rsrc_static for each source model. #} + {%- set source_number = source_model.id | string -%} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number] -%} - {%- set source_number = loop.index | string -%} - {%- set rsrc_statics = ns.source_models_rsrc_dict[source_model] -%} + {{log('rsrc_statics: '~ rsrc_statics, false) }} {%- set rsrc_static_query_source -%} SELECT count(*) FROM ( {%- for rsrc_static in rsrc_statics -%} - SELECT {{ this }}.{{ src_rsrc }}, + SELECT t.{{ src_rsrc }}, '{{ rsrc_static }}' AS rsrc_static - FROM {{ this }} + FROM {{ this }} t WHERE {{ src_rsrc }} like '{{ rsrc_static }}' {%- if not loop.last %} UNION ALL @@ -98,9 +65,9 @@ WITH rsrc_static_{{ source_number }} AS ( {%- for rsrc_static in rsrc_statics -%} - SELECT {{ this }}.*, + SELECT t.*, '{{ rsrc_static }}' AS rsrc_static - FROM {{ this }} + FROM {{ this }} t WHERE {{ src_rsrc }} like '{{ rsrc_static }}' {%- if not loop.last %} UNION ALL @@ -123,18 +90,19 @@ WITH {%- endif -%} {%- endif -%} - {%- do ns.source_included_before.update({source_model: source_in_target}) -%} + + {%- do ns.source_included_before.update({source_model.id: source_in_target}) -%} {% endfor -%} - {%- if source_models.keys() | length > 1 %} + {%- if source_models | length > 1 %} rsrc_static_union AS ( {# Create one unionized table over all sources. It will be the same as the already existing link, but extended by the rsrc_static column. #} - {% for source_model in source_models.keys() %} - {%- set source_number = loop.index | string -%} + {% for source_model in source_models %} + {%- set source_number = source_model.id | string -%} SELECT rsrc_static_{{ source_number }}.* FROM rsrc_static_{{ source_number }} @@ -158,46 +126,39 @@ WITH GROUP BY rsrc_static ), - {%- else -%} - {%- if source_models.keys() | length == 1 %} - - max_ldts_single_src AS ( - {# Calculate the max load date timestamp of the whole table when there is only one source. #} - - SELECT - MAX({{ src_ldts }}) as max_ldts - FROM {{ this }} - WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} - ), - {%- endif %} {%- endif %} {% endif -%} -{% for source_model in source_models.keys() %} +{% for source_model in source_models %} + {# Select all deduplicated records from each source, and filter for records that are newer than the max ldts inside the existing link, if incremental. #} - {%- set source_number = loop.index | string -%} + {%- set source_number = source_model.id | string -%} {%- if ns.has_rsrc_static_defined -%} - {%- set rsrc_statics = ns.source_models_rsrc_dict[source_model] -%} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number|string] -%} {%- endif -%} - {{ log('rsrc_static defined: ' ~ ns.has_rsrc_static_defined , false) }} - {{ log('source_included_before: ' ~ ns.source_included_before[source_model|string] , false) }} + {%- if 'link_hk' not in source_model.keys() %} + {%- set link_hk = link_hashkey -%} + {%- else -%} + {%- set link_hk = source_model['link_hk'] -%} + {% endif %} src_new_{{ source_number }} AS ( SELECT - {{ source_models[source_model]['link_hk'] }} AS {{ link_hashkey }}, - {% for fk in source_models[source_model]['fk_columns']|list -%} + {{ link_hk }} AS {{ link_hashkey }}, + {% for fk in source_model['fk_columns'] -%} {{ fk }}, {% endfor -%} {{ src_ldts }}, {{ src_rsrc }} - FROM {{ ref(source_model|string) }} src + FROM {{ ref(source_model.name) }} src + {{ log('rsrc_statics defined?: ' ~ ns.source_models_rsrc_dict[source_number|string], false) }} - {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_model|string] %} + {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number|int] and not disable_hwm %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON ({%- for rsrc_static in rsrc_statics -%} max.rsrc_static = '{{ rsrc_static }}' @@ -205,8 +166,12 @@ WITH {% endif -%} {%- endfor %}) WHERE src.{{ src_ldts }} > max.max_ldts - {%- elif is_incremental() and source_models.keys() | length == 1 and not ns.has_rsrc_static_defined %} - WHERE src.{{ src_ldts }} > (SELECT max.max_ldts FROM max_ldts_single_src max) + {%- elif is_incremental() and source_models | length == 1 and not ns.has_rsrc_static_defined and not disable_hwm %} + WHERE src.{{ src_ldts }} > ( + SELECT MAX({{ src_ldts }}) + FROM {{ this }} + WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} + ) {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} @@ -214,18 +179,18 @@ WITH ), {%- endfor -%} -{%- if source_models.keys() | length > 1 %} +{%- if source_models | length > 1 %} source_new_union AS ( {# Unionize the new records from all sources. #} - {%- for source_model in source_models.keys() -%} + {%- for source_model in source_models -%} - {%- set source_number = loop.index | string -%} + {%- set source_number = source_model.id | string -%} SELECT {{ link_hashkey }}, - {% for fk in source_models[source_model]['fk_columns']|list %} + {% for fk in source_model['fk_columns']|list %} {{ fk }} AS {{ foreign_hashkeys[loop.index - 1] }}, {% endfor -%} {{ src_ldts }}, diff --git a/macros/tables/exasol/nh_link.sql b/macros/tables/exasol/nh_link.sql index f089a626..eef983d1 100644 --- a/macros/tables/exasol/nh_link.sql +++ b/macros/tables/exasol/nh_link.sql @@ -9,51 +9,15 @@ {# If no specific link_hk, fk_columns, or payload are defined for each source, we apply the values set in the link_hashkey, foreign_hashkeys, and payload variable. #} {# If no rsrc_static parameter is defined in ANY of the source models then the whole code block of record_source performance lookup is not executed #} {# For the use of record_source performance lookup it is required that every source model has the parameter rsrc_static defined and it cannot be an empty string #} -{%- if source_models is not mapping -%} +{%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} {%- set source_models = {source_models: {}} -%} {%- endif -%} -{%- for source_model in source_models.keys() %} - - {%- if 'fk_columns' not in source_models[source_model].keys() -%} - - {%- do source_models[source_model].update({'fk_columns': foreign_hashkeys}) -%} - - {%- endif -%} - - {%- if 'link_hk' not in source_models[source_model].keys() -%} - - {%- do source_models[source_model].update({'link_hk': link_hashkey}) -%} - - {%- endif -%} - - {%- if 'payload' not in source_models[source_model].keys() -%} - - {%- do source_models[source_model].update({'payload': payload}) -%} - - {%- endif -%} - - {%- if 'rsrc_static' not in source_models[source_model].keys() -%} - - {%- set ns.has_rsrc_static_defined = false -%} - - {%- else -%} - {%- if not (source_models[source_model]['rsrc_static'] is iterable and source_models[source_model]['rsrc_static'] is not string) -%} - {%- if source_models[source_model]['rsrc_static'] == '' or source_models[source_model]['rsrc_static'] is none -%} - {%- if execute -%} - {{ exceptions.raise_compiler_error("If rsrc_static is defined -> it must not be an empty string ") }} - {%- endif %} - {%- else -%} - {%- do ns.source_models_rsrc_dict.update({source_model : [source_models[source_model]['rsrc_static']] } ) -%} - {%- endif -%} - {%- elif source_models[source_model]['rsrc_static'] is iterable -%} - {%- do ns.source_models_rsrc_dict.update({source_model : source_models[source_model]['rsrc_static'] } ) -%} - {%- endif -%} - - {%- endif -%} - - -{% endfor %} +{%- set source_model_values = fromjson(datavault4dbt.source_model_processing(source_models=source_models, parameters={'link_hk':link_hashkey}, foreign_hashkeys=foreign_hashkeys, payload=payload)) -%} +{%- set source_models = source_model_values['source_model_list'] -%} +{%- set ns.has_rsrc_static_defined = source_model_values['has_rsrc_static_defined'] -%} +{%- set ns.source_models_rsrc_dict = source_model_values['source_models_rsrc_dict'] -%} +{{ log('source_models: '~source_models, false) }} {%- set final_columns_to_select = [link_hashkey] + foreign_hashkeys + [src_ldts] + [src_rsrc] + payload -%} @@ -70,19 +34,20 @@ WITH FROM {{ this }} ), - {%- if ns.has_rsrc_static_defined -%} - {% for source_model in source_models.keys() %} + {%- if ns.has_rsrc_static_defined and not disable_hwm -%} + {% for source_model in source_models %} {# Create a query with a rsrc_static column with each rsrc_static for each source model. #} + {%- set source_number = source_model.id | string -%} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number] -%} - {%- set source_number = loop.index | string -%} - {%- set rsrc_statics = ns.source_models_rsrc_dict[source_model] -%} + {{log('rsrc_statics: '~ rsrc_statics, false) }} {%- set rsrc_static_query_source -%} SELECT count(*) FROM ( {%- for rsrc_static in rsrc_statics -%} - SELECT {{ this }}.{{ src_rsrc }}, + SELECT t.{{ src_rsrc }}, '{{ rsrc_static }}' AS rsrc_static - FROM {{ this }} + FROM {{ this }} t WHERE {{ src_rsrc }} like '{{ rsrc_static }}' {%- if not loop.last %} UNION ALL @@ -93,9 +58,9 @@ WITH rsrc_static_{{ source_number }} AS ( {%- for rsrc_static in rsrc_statics -%} - SELECT {{ this }}.*, + SELECT t.*, '{{ rsrc_static }}' AS rsrc_static - FROM {{ this }} + FROM {{ this }} t WHERE {{ src_rsrc }} like '{{ rsrc_static }}' {%- if not loop.last %} UNION ALL @@ -118,18 +83,19 @@ WITH {%- endif -%} {%- endif -%} - {%- do ns.source_included_before.update({source_model: source_in_target}) -%} + + {%- do ns.source_included_before.update({source_model.id: source_in_target}) -%} {% endfor -%} - {%- if source_models.keys() | length > 1 %} + {%- if source_models | length > 1 %} rsrc_static_union AS ( {# Create one unionized table over all sources. It will be the same as the already existing nh_link, but extended by the rsrc_static column. #} - {% for source_model in source_models.keys() %} - {%- set source_number = loop.index | string -%} + {% for source_model in source_models %} + {%- set source_number = source_model.id | string -%} SELECT rsrc_static_{{ source_number }}.* FROM rsrc_static_{{ source_number }} @@ -153,45 +119,39 @@ WITH GROUP BY rsrc_static ), - {%- else -%} - {%- if source_models.keys() | length == 1 %} - - max_ldts_single_src AS ( - {# Calculate the max load date timestamp of the whole table when there is only one source. #} - - SELECT - MAX({{ src_ldts }}) as max_ldts - FROM {{ this }} - WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} - ), - {%- endif %} {%- endif %} {% endif -%} -{%- for source_model in source_models.keys() %} +{% for source_model in source_models %} + {# Select all deduplicated records from each source, and filter for records that are newer than the max ldts inside the existing link, if incremental. #} - {%- set source_number = loop.index | string -%} + {%- set source_number = source_model.id | string -%} + {%- if ns.has_rsrc_static_defined -%} - {%- set rsrc_statics = source_models[source_model]['rsrc_static'] %} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number|string] -%} {%- endif -%} + {%- if 'link_hk' not in source_model.keys() %} + {%- set link_hk = link_hashkey -%} + {%- else -%} + {%- set link_hk = source_model['link_hk'] -%} + {% endif %} + src_new_{{ source_number }} AS ( SELECT - {{ source_models[source_model]['link_hk'] }} AS {{ link_hashkey }}, - - {% for fk in source_models[source_model]['fk_columns']|list %} + {{ link_hk }} AS {{ link_hashkey }}, + {% for fk in source_model['fk_columns'] -%} {{ fk }}, - {%- endfor %} - + {% endfor -%} {{ src_ldts }}, {{ src_rsrc }}, - {{ datavault4dbt.print_list(source_models[source_model]['payload']) | indent(3) }} + {{ datavault4dbt.print_list(source_model['payload']) | indent(3) }} - FROM {{ ref(source_model|string) }} src + FROM {{ ref(source_model.name) }} src {# If the model is incremental and all sources has rsrc_static defined and valid and the source was already included before in the target transactional link #} {# then an inner join is performed on the CTE for the maximum load date timestamp per record source static to get the records that match any of the rsrc_static present in it #} @@ -204,8 +164,12 @@ src_new_{{ source_number }} AS ( {% endif -%} {%- endfor %}) WHERE src.{{ src_ldts }} > max.max_ldts - {%- elif is_incremental() and source_models.keys() | length == 1 and not ns.has_rsrc_static_defined %} - WHERE src.{{ src_ldts }} > (SELECT max.max_ldts FROM max_ldts_single_src max) + {%- elif is_incremental() and source_models | length == 1 and not ns.has_rsrc_static_defined and not disable_hwm %} + WHERE src.{{ src_ldts }} > ( + SELECT MAX({{ src_ldts }}) + FROM {{ this }} + WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} + ) {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} @@ -213,26 +177,25 @@ src_new_{{ source_number }} AS ( ), {%- endfor -%} -{%- if source_models.keys() | length > 1 %} +{%- if source_models | length > 1 %} source_new_union AS ( {# Unionize the new records from all sources. #} - {%- for source_model in source_models.keys() -%} + {%- for source_model in source_models -%} - {%- set source_number = loop.index | string -%} + {%- set source_number = source_model.id | string -%} SELECT {{ link_hashkey }}, - - {% for fk in source_models[source_model]['fk_columns']|list %} + {% for fk in source_model['fk_columns']|list %} {{ fk }} AS {{ foreign_hashkeys[loop.index - 1] }}, {% endfor -%} {{ src_ldts }}, {{ src_rsrc }}, - {% for col in source_models[source_model]['payload']|list %} + {% for col in source_model['payload']|list %} {{ col }} AS {{ payload[loop.index - 1] }} {%- if not loop.last %}, {%- endif %} {% endfor -%} @@ -251,6 +214,8 @@ source_new_union AS ( {%- endif %} +{%- if not source_is_single_batch %} + earliest_hk_over_all_sources AS ( {# Deduplicate the unionized records again to only insert the earliest one. #} @@ -264,16 +229,18 @@ earliest_hk_over_all_sources AS ( ), +{%- endif %} + records_to_insert AS ( {# Select everything from the previous CTE, if its incremental then filter for hashkeys that are not already in the link. #} SELECT - {{ datavault4dbt.print_list(final_columns_to_select) }} + {{ datavault4dbt.print_list(final_columns_to_select) | indent(4) }} FROM {{ ns.last_cte }} {%- if is_incremental() %} WHERE {{ link_hashkey }} NOT IN (SELECT * FROM distinct_target_hashkeys) - {% endif -%} + {% endif %} ) SELECT * FROM records_to_insert diff --git a/macros/tables/exasol/rec_track_sat.sql b/macros/tables/exasol/rec_track_sat.sql index 92d30703..0e3ea4e8 100644 --- a/macros/tables/exasol/rec_track_sat.sql +++ b/macros/tables/exasol/rec_track_sat.sql @@ -1,4 +1,4 @@ -{%- macro exasol__rec_track_sat(tracked_hashkey, source_models, src_ldts, src_rsrc, src_stg) -%} +{%- macro exasol__rec_track_sat(tracked_hashkey, source_models, src_ldts, src_rsrc, src_stg, disable_hwm) -%} {%- set beginning_of_all_times = datavault4dbt.beginning_of_all_times() -%} {%- set end_of_all_times = datavault4dbt.end_of_all_times() -%} @@ -9,44 +9,19 @@ {%- set rsrc_error = var('datavault4dbt.default_error_rsrc', 'ERROR') -%} {# Setting the rsrc and stg_alias default datatype and length #} -{%- set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'VARCHAR (2000000) UTF8') -%} -{%- set stg_default_dtype = var('datavault4dbt.stg_default_dtype', 'VARCHAR (200) UTF8') -%} +{%- set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') -%} +{%- set stg_default_dtype = datavault4dbt.string_default_dtype(type='stg') -%} {%- set ns = namespace(last_cte = '', source_included_before = {}, source_models_rsrc_dict={}, has_rsrc_static_defined=true) -%} -{%- if source_models is not mapping -%} +{%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} {%- set source_models = {source_models: {}} -%} {%- endif -%} - -{# If no specific hk_column is defined for each source, we apply the values set in the tracked_hashkey input variable. #} -{# If no rsrc_static parameter is defined in a source model then the record source performance look up wont be executed #} -{%- for source_model in source_models.keys() %} - - {%- if 'hk_column' not in source_models[source_model].keys() -%} - {%- do source_models[source_model].update({'hk_column': tracked_hashkey}) -%} - {%- endif -%} - - {%- if 'rsrc_static' not in source_models[source_model].keys() -%} - {%- set ns.has_rsrc_static_defined = false -%} - {%- else -%} - - {%- if not (source_models[source_model]['rsrc_static'] is iterable and source_models[source_model]['rsrc_static'] is not string) -%} - - {%- if source_models[source_model]['rsrc_static'] == '' or source_models[source_model]['rsrc_static'] is none -%} - {%- if execute -%} - {{ exceptions.raise_compiler_error("If rsrc_static is defined -> it must not be an empty string ") }} - {%- endif %} - {%- else -%} - {%- do ns.source_models_rsrc_dict.update({source_model : [source_models[source_model]['rsrc_static']] } ) -%} - {%- endif -%} - - {%- elif source_models[source_model]['rsrc_static'] is iterable -%} - {%- do ns.source_models_rsrc_dict.update({source_model : source_models[source_model]['rsrc_static'] } ) -%} - {%- endif -%} - - {%- endif -%} - -{% endfor %} +{%- set source_model_values = fromjson(datavault4dbt.source_model_processing(source_models=source_models, parameters={'hk_column':tracked_hashkey})) -%} +{%- set source_models = source_model_values['source_model_list'] -%} +{%- set ns.has_rsrc_static_defined = source_model_values['has_rsrc_static_defined'] -%} +{%- set ns.source_models_rsrc_dict = source_model_values['source_models_rsrc_dict'] -%} +{{ log('source_models: '~source_models, false) }} {%- set final_columns_to_select = [tracked_hashkey] + [src_ldts] + [src_rsrc] + [src_stg] -%} @@ -63,13 +38,13 @@ WITH {{ datavault4dbt.concat_ws(concat_columns) }} as concat FROM {{ this }} ), - {%- if ns.has_rsrc_static_defined -%} + {%- if ns.has_rsrc_static_defined and not disable_hwm -%} rsrc_static_unionized AS ( - {% for source_model in source_models.keys() %} + {% for source_model in source_models %} {# Create a query with a rsrc_static column with each rsrc_static for each source model. #} - {%- set source_number = loop.index | string -%} - {%- set hk_column = source_models[source_model]['hk_column'] -%} - {%- set rsrc_statics = ns.source_models_rsrc_dict[source_model] -%} + {%- set source_number = source_model.id | string -%} + {%- set hk_column = source_model['hk_column'] -%} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number] -%} {%- set rsrc_static_query_source_count -%} SELECT count(*) FROM ( @@ -117,7 +92,7 @@ WITH {%- endif -%} {%- endif -%} - {%- do ns.source_included_before.update({source_model: source_in_target}) -%} + {%- do ns.source_included_before.update({source_model.id: source_in_target}) -%} {# Unionize over all sources #} {%- if not loop.last %} UNION ALL @@ -137,18 +112,6 @@ WITH GROUP BY rsrc_static ), - {%- else -%} - {%- if source_models.keys() | length == 1 %} - - max_ldts_single_src AS ( - {# Calculate the max load date timestamp of the whole table when there is only one source. #} - - SELECT - MAX({{ src_ldts }}) as max_ldts - FROM {{ this }} - WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} - ), - {%- endif %} {%- endif %} {% endif -%} @@ -158,12 +121,12 @@ WITH rows from that source are loaded into the satellite. #} -{%- for source_model in source_models.keys() %} +{%- for source_model in source_models %} - {%- set source_number = loop.index | string -%} - {%- set hk_column = source_models[source_model]['hk_column'] -%} + {%- set source_number = source_model.id | string -%} + {%- set hk_column = source_model['hk_column'] -%} {%- if ns.has_rsrc_static_defined -%} - {%- set rsrc_statics = ns.source_models_rsrc_dict[source_model] -%} + {%- set rsrc_statics = ns.source_models_rsrc_dict[source_number|string] -%} src_new_{{ source_number }} AS ( {%- for rsrc_static in rsrc_statics %} @@ -171,16 +134,15 @@ WITH {{ hk_column }} AS {{ tracked_hashkey }}, {{ src_ldts }}, CAST('{{ rsrc_static }}' AS {{ rsrc_default_dtype }} ) AS {{ src_rsrc }}, - CAST(UPPER('{{ source_model }}') AS {{ stg_default_dtype }}) AS {{ src_stg }} - FROM {{ ref(source_model) }} src + CAST(UPPER('{{ source_model.name }}') AS {{ stg_default_dtype }}) AS {{ src_stg }} + FROM {{ ref(source_model.name) }} src - {%- if is_incremental() and ns.source_included_before[source_model] %} + {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number|int] and not disable_hwm %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON max.rsrc_static = '{{ rsrc_static }}' WHERE src.{{ src_ldts }} > max.max_ldts {%- endif %} - {%- if not loop.last %} UNION ALL {% endif -%} @@ -193,10 +155,14 @@ WITH {{ hk_column }} AS {{ tracked_hashkey }}, {{ src_ldts }}, CAST({{ src_rsrc }} AS {{ rsrc_default_dtype }}) AS {{ src_rsrc }}, - CAST(UPPER('{{ source_model }}') AS {{ stg_default_dtype }}) AS {{ src_stg }} - FROM {{ ref(source_model) }} src - {%- if is_incremental() and source_models.keys() | length == 1 %} - WHERE src.{{ src_ldts }} > (SELECT max.max_ldts FROM max_ldts_single_src max) + CAST(UPPER('{{ source_model.name }}') AS {{ stg_default_dtype }}) AS {{ src_stg }} + FROM {{ ref(source_model.name) }} src + {%- if is_incremental() and source_models | length == 1 and not disable_hwm %} + WHERE src.{{ src_ldts }} > ( + SELECT MAX({{ src_ldts }}) + FROM {{ this }} + WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} + ) {%- endif %} ), {%- endif -%} @@ -209,12 +175,12 @@ WITH If more than one source model is selected, all previously created deduplicated CTEs are unionized. #} -{%- if source_models.keys() | length > 1 %} +{%- if source_models | length > 1 %} source_new_union AS ( - {% for source_model in source_models.keys() %} - {%- set hk_column = source_models[source_model]['hk_column'] -%} - {%- set source_number = loop.index | string -%} + {% for source_model in source_models %} + {%- set hk_column = source_model['hk_column'] -%} + {%- set source_number = source_model.id | string -%} SELECT {{ tracked_hashkey }}, diff --git a/macros/tables/exasol/ref_hub.sql b/macros/tables/exasol/ref_hub.sql index 7668797c..0a268726 100644 --- a/macros/tables/exasol/ref_hub.sql +++ b/macros/tables/exasol/ref_hub.sql @@ -141,6 +141,10 @@ WITH {{ src_ldts }}, {{ src_rsrc }} FROM {{ ref(source_model.name) }} src + WHERE NOT ( + {% for ref_key in source_model['ref_keys'] -%} + {{ ref_key}} IS NULL {%- if not loop.last %} AND {% endif -%} + {% endfor -%} ) {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number] %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON @@ -149,7 +153,7 @@ WITH {%- if not loop.last -%} OR {% endif -%} {%- endfor %}) - WHERE src.{{ src_ldts }} > max.max_ldts + AND src.{{ src_ldts }} > max.max_ldts {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} diff --git a/macros/tables/exasol/ref_sat_v1.sql b/macros/tables/exasol/ref_sat_v1.sql index 540e23ea..7900e128 100644 --- a/macros/tables/exasol/ref_sat_v1.sql +++ b/macros/tables/exasol/ref_sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(ref_sat_v0) -%} diff --git a/macros/tables/postgres/control_snap_v0.sql b/macros/tables/postgres/control_snap_v0.sql index 07e9778a..2d908a46 100644 --- a/macros/tables/postgres/control_snap_v0.sql +++ b/macros/tables/postgres/control_snap_v0.sql @@ -1,4 +1,4 @@ -{%- macro postgres__control_snap_v0(start_date, daily_snapshot_time, sdts_alias, end_date) -%} +{%- macro postgres__control_snap_v0(start_date, daily_snapshot_time, sdts_alias, end_date=none) -%} {%- set timestamp_format = datavault4dbt.timestamp_format() -%} diff --git a/macros/tables/postgres/control_snap_v1.sql b/macros/tables/postgres/control_snap_v1.sql index 6fd549b3..2c081549 100644 --- a/macros/tables/postgres/control_snap_v1.sql +++ b/macros/tables/postgres/control_snap_v1.sql @@ -56,7 +56,7 @@ virtual_logic AS ( {%- set daily_duration = log_logic['daily']['duration'] -%} {%- set daily_unit = log_logic['daily']['unit'] -%} - c.{{ sdts_alias }} BETWEEN CURRENT_TIMESTAMP - INTERVAL '{{ daily_duration }}' {{ daily_unit }} AND CURRENT_DATE + 1 + c.{{ sdts_alias }} BETWEEN CURRENT_TIMESTAMP - INTERVAL '{{ daily_duration }} {{ daily_unit }}' AND CURRENT_DATE + 1 {%- endif -%} {%- endif %} @@ -71,7 +71,7 @@ virtual_logic AS ( {%- set weekly_unit = log_logic['weekly']['unit'] -%} ( - c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '{{ weekly_duration }}' {{ weekly_unit }} AND CURRENT_DATE + c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '{{ weekly_duration }} {{ weekly_unit }}' AND CURRENT_DATE AND (c.is_weekly = TRUE) ) @@ -89,7 +89,7 @@ virtual_logic AS ( {%- set monthly_unit = log_logic['monthly']['unit'] -%} ( - c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '{{ monthly_duration }}' {{ monthly_unit }} AND CURRENT_DATE + c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '{{ monthly_duration }} {{ monthly_unit }}' AND CURRENT_DATE AND (c.is_monthly = TRUE) ) @@ -107,7 +107,7 @@ virtual_logic AS ( {%- set yearly_unit = log_logic['yearly']['unit'] -%} ( - DATE FROM c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '{{ yearly_duration }}' {{ yearly_unit }} AND CURRENT_DATE + DATE FROM c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '{{ yearly_duration }} {{ yearly_unit }}' AND CURRENT_DATE AND (c.is_yearly = TRUE) ) @@ -139,11 +139,11 @@ virtual_logic AS ( ELSE FALSE END AS is_last_year, CASE - WHEN c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '1' YEAR AND CURRENT_DATE THEN TRUE + WHEN c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '1 YEAR' AND CURRENT_DATE THEN TRUE ELSE FALSE END AS is_rolling_year, CASE - WHEN c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '2' YEAR AND CURRENT_DATE - INTERVAL '1' YEAR THEN TRUE + WHEN c.{{ sdts_alias }} BETWEEN CURRENT_DATE - INTERVAL '2 YEAR' AND CURRENT_DATE - INTERVAL '1 YEAR' THEN TRUE ELSE FALSE END AS is_last_rolling_year, c.comment diff --git a/macros/tables/postgres/nh_link.sql b/macros/tables/postgres/nh_link.sql index 0f8395cb..f691d0eb 100644 --- a/macros/tables/postgres/nh_link.sql +++ b/macros/tables/postgres/nh_link.sql @@ -249,7 +249,7 @@ records_to_insert AS ( {%- if is_incremental() %} WHERE NOT EXISTS (SELECT 1 FROM distinct_target_hashkeys - WHERE distinct_target_hashkeys.{{ link_hashkey }} = earliest_hk_over_all_sources.{{ link_hashkey }}) + WHERE distinct_target_hashkeys.{{ link_hashkey }} = {{ns.last_cte}}.{{ link_hashkey }}) {% endif %} ) diff --git a/macros/tables/postgres/pit.sql b/macros/tables/postgres/pit.sql index 8d59daa2..4927e7ae 100644 --- a/macros/tables/postgres/pit.sql +++ b/macros/tables/postgres/pit.sql @@ -1,7 +1,7 @@ {%- macro postgres__pit(tracked_entity, hashkey, sat_names, ldts, ledts, sdts, snapshot_relation, dimension_key,snapshot_trigger_column=none, custom_rsrc=none, pit_type=none) -%} {%- set hash = var('datavault4dbt.hash', 'MD5') -%} -{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'STRING') -%} +{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'VARCHAR(32)') -%} {%- set hash_default_values = fromjson(datavault4dbt.hash_default_values(hash_function=hash,hash_datatype=hash_dtype)) -%} {%- set hash_alg = hash_default_values['hash_alg'] -%} {%- set unknown_key = hash_default_values['unknown_key'] -%} diff --git a/macros/tables/postgres/rec_track_sat.sql b/macros/tables/postgres/rec_track_sat.sql index 8030bab6..c1afa111 100644 --- a/macros/tables/postgres/rec_track_sat.sql +++ b/macros/tables/postgres/rec_track_sat.sql @@ -9,8 +9,8 @@ {%- set rsrc_error = var('datavault4dbt.default_error_rsrc', 'ERROR') -%} {# Setting the rsrc and stg_alias default datatype and length #} -{%- set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') -%} -{%- set stg_default_dtype = var('datavault4dbt.stg_default_dtype', 'STRING') -%} +{%- set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') -%} +{%- set stg_default_dtype = datavault4dbt.string_default_dtype(type='stg') -%} {%- set ns = namespace(last_cte = '', source_included_before = {}, source_models_rsrc_dict={}, has_rsrc_static_defined=true) -%} {%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} diff --git a/macros/tables/postgres/ref_hub.sql b/macros/tables/postgres/ref_hub.sql index 09bf7338..90a49ac2 100644 --- a/macros/tables/postgres/ref_hub.sql +++ b/macros/tables/postgres/ref_hub.sql @@ -141,6 +141,10 @@ WITH {{ src_ldts }}, {{ src_rsrc }} FROM {{ ref(source_model.name) }} src + WHERE NOT ( + {% for ref_key in source_model['ref_keys'] -%} + {{ ref_key}} IS NULL {%- if not loop.last %} AND {% endif -%} + {% endfor -%} ) {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number] %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON @@ -149,7 +153,7 @@ WITH {%- if not loop.last -%} OR {% endif -%} {%- endfor %}) - WHERE src.{{ src_ldts }} > max.max_ldts + AND src.{{ src_ldts }} > max.max_ldts {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} @@ -188,15 +192,16 @@ source_new_union AS ( earliest_ref_key_over_all_sources_prep AS ( -{%- for source_model in source_models -%} +{%- for source_model in source_models %} SELECT lcte.*, - ROW_NUMBER() OVER (PARTITION BY {% for ref_key in source_model['ref_keys'] -%} - {{ ref_key}} + ROW_NUMBER() OVER (PARTITION BY {% for ref_key in ref_keys -%} + {{ ref_key}} {% if not loop.last %}, {% endif -%} {% endfor -%} ORDER BY {{ src_ldts}}) as rn - FROM {{ ns.last_cte }} AS lcte) -{%- endfor -%}, + FROM {{ ns.last_cte }} AS lcte +{% if not loop.last %} UNION {% endif %} +{%- endfor -%}), earliest_ref_key_over_all_sources AS ( diff --git a/macros/tables/postgres/sat_v0.sql b/macros/tables/postgres/sat_v0.sql index daaef7fa..01230730 100644 --- a/macros/tables/postgres/sat_v0.sql +++ b/macros/tables/postgres/sat_v0.sql @@ -1,4 +1,4 @@ -{%- macro postgres__sat_v0(parent_hashkey, src_hashdiff, src_payload, src_ldts, src_rsrc, source_model) -%} +{%- macro postgres__sat_v0(parent_hashkey, src_hashdiff, src_payload, src_ldts, src_rsrc, source_model, disable_hwm, source_is_single_batch) -%} {%- set beginning_of_all_times = datavault4dbt.beginning_of_all_times() -%} {%- set end_of_all_times = datavault4dbt.end_of_all_times() -%} diff --git a/macros/tables/postgres/sat_v1.sql b/macros/tables/postgres/sat_v1.sql index 8c2cf4ca..c48bcb51 100644 --- a/macros/tables/postgres/sat_v1.sql +++ b/macros/tables/postgres/sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(sat_v0) -%} diff --git a/macros/tables/redshift/control_snap_v0.sql b/macros/tables/redshift/control_snap_v0.sql index b80dc033..b63d7665 100644 --- a/macros/tables/redshift/control_snap_v0.sql +++ b/macros/tables/redshift/control_snap_v0.sql @@ -1,13 +1,13 @@ -{%- macro redshift__control_snap_v0(start_date, daily_snapshot_time, sdts_alias) -%} +{%- macro redshift__control_snap_v0(start_date, daily_snapshot_time, sdts_alias, end_date) -%} {%- set timestamp_format = datavault4dbt.timestamp_format() -%} - +{%- set timestamp_value = start_date ~ ' ' ~ daily_snapshot_time -%} {%- if not datavault4dbt.is_something(sdts_alias) -%} {%- set sdts_alias = var('datavault4dbt.sdts_alias', 'sdts') -%} {%- endif -%} with recursive generate_dates({{ sdts_alias }}) as ( - Select to_timestamp('{{ start_date }} {{ daily_snapshot_time }}', '{{ timestamp_format }}') as {{ sdts_alias }} + Select {{ datavault4dbt.string_to_timestamp(timestamp_format, timestamp_value) }} as {{ sdts_alias }} union all select {{ sdts_alias }} + 1 from generate_dates diff --git a/macros/tables/redshift/nh_link.sql b/macros/tables/redshift/nh_link.sql index 5b9e6891..2b0f2f49 100644 --- a/macros/tables/redshift/nh_link.sql +++ b/macros/tables/redshift/nh_link.sql @@ -249,7 +249,7 @@ records_to_insert AS ( {%- if is_incremental() %} WHERE NOT EXISTS (SELECT 1 FROM distinct_target_hashkeys - WHERE distinct_target_hashkeys.{{ link_hashkey }} = earliest_hk_over_all_sources.{{ link_hashkey }}) + WHERE distinct_target_hashkeys.{{ link_hashkey }} = {{ ns.last_cte }}.{{ link_hashkey }}) {% endif %} ) diff --git a/macros/tables/redshift/pit.sql b/macros/tables/redshift/pit.sql index 6ea7514b..b576f739 100644 --- a/macros/tables/redshift/pit.sql +++ b/macros/tables/redshift/pit.sql @@ -1,11 +1,12 @@ {%- macro redshift__pit(tracked_entity, hashkey, sat_names, ldts, ledts, sdts, snapshot_relation, dimension_key,snapshot_trigger_column=none, custom_rsrc=none, pit_type=none) -%} {%- set hash = var('datavault4dbt.hash', 'MD5') -%} -{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'STRING') -%} +{%- set hash_dtype = var('datavault4dbt.hash_datatype', 'VARCHAR(32)') -%} {%- set hash_default_values = fromjson(datavault4dbt.hash_default_values(hash_function=hash,hash_datatype=hash_dtype)) -%} {%- set hash_alg = hash_default_values['hash_alg'] -%} {%- set unknown_key = hash_default_values['unknown_key'] -%} {%- set error_key = hash_default_values['error_key'] -%} +{%- set string_default_dtype = datavault4dbt.string_default_dtype() -%} {%- if hash_dtype == 'BYTES' -%} {%- set hashkey_string = 'TO_HEX({})'.format(datavault4dbt.prefix([hashkey],'te')) -%} @@ -46,10 +47,10 @@ pit_records AS ( SELECT {% if datavault4dbt.is_something(pit_type) -%} - {{ datavault4dbt.as_constant(pit_type) }} as type, + CAST({{ datavault4dbt.as_constant(pit_type) }} as {{ string_default_dtype }} ) as type, {%- endif %} {% if datavault4dbt.is_something(custom_rsrc) -%} - '{{ custom_rsrc }}' as {{ rsrc }}, + CAST('{{ custom_rsrc }}' as {{ string_default_dtype }} ) as {{ rsrc }}, {%- endif %} {{ datavault4dbt.hash(columns=hashed_cols, alias=dimension_key, @@ -64,12 +65,13 @@ pit_records AS ( FROM {{ ref(tracked_entity) }} te + {% if datavault4dbt.is_something(snapshot_trigger_column) -%} FULL OUTER JOIN {{ ref(snapshot_relation) }} snap - {% if datavault4dbt.is_something(snapshot_trigger_column) -%} ON snap.{{ snapshot_trigger_column }} = true {% else -%} - ON 1=1 + CROSS JOIN + {{ ref(snapshot_relation) }} snap {%- endif %} {% for satellite in sat_names %} {%- set sat_columns = datavault4dbt.source_columns(ref(satellite)) %} @@ -80,7 +82,7 @@ pit_records AS ( SELECT {{ hashkey }}, {{ ldts }}, - COALESCE(LEAD(TIMESTAMP_SUB({{ ldts }}, INTERVAL 1 MICROSECOND)) OVER (PARTITION BY {{ hashkey }} ORDER BY {{ ldts }}),{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }}) AS {{ ledts }} + COALESCE(LEAD(DATEADD(microsecond,-1, {{ ldts }})) OVER (PARTITION BY {{ hashkey }} ORDER BY {{ ldts }}),{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }}) AS {{ ledts }} FROM {{ ref(satellite) }} ) {{ satellite }} {% endif %} diff --git a/macros/tables/redshift/rec_track_sat.sql b/macros/tables/redshift/rec_track_sat.sql index 0f1f11f6..51dc5288 100644 --- a/macros/tables/redshift/rec_track_sat.sql +++ b/macros/tables/redshift/rec_track_sat.sql @@ -9,8 +9,8 @@ {%- set rsrc_error = var('datavault4dbt.default_error_rsrc', 'ERROR') -%} {# Setting the rsrc and stg_alias default datatype and length #} -{%- set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') -%} -{%- set stg_default_dtype = var('datavault4dbt.stg_default_dtype', 'STRING') -%} +{%- set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') -%} +{%- set stg_default_dtype = datavault4dbt.string_default_dtype(type='stg') -%} {%- set ns = namespace(last_cte = '', source_included_before = {}, source_models_rsrc_dict={}, has_rsrc_static_defined=true) -%} {%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} diff --git a/macros/tables/redshift/ref_hub.sql b/macros/tables/redshift/ref_hub.sql index 60e230ab..b9624c39 100644 --- a/macros/tables/redshift/ref_hub.sql +++ b/macros/tables/redshift/ref_hub.sql @@ -141,6 +141,10 @@ WITH {{ src_ldts }}, {{ src_rsrc }} FROM {{ ref(source_model.name) }} src + WHERE NOT ( + {% for ref_key in source_model['ref_keys'] -%} + {{ ref_key}} IS NULL {%- if not loop.last %} AND {% endif -%} + {% endfor -%} ) {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number] %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON @@ -149,7 +153,7 @@ WITH {%- if not loop.last -%} OR {% endif -%} {%- endfor %}) - WHERE src.{{ src_ldts }} > max.max_ldts + AND src.{{ src_ldts }} > max.max_ldts {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} @@ -186,26 +190,18 @@ source_new_union AS ( {%- endif %} - -earliest_ref_key_over_all_sources_prep AS ( -{%- for source_model in source_models -%} - SELECT - lcte.*, - ROW_NUMBER() OVER (PARTITION BY {% for ref_key in source_model['ref_keys'] -%} - {{ ref_key}} - {% endfor -%} - ORDER BY {{ src_ldts}}) as rn - FROM {{ ns.last_cte }} AS lcte) -{%- endfor -%}, - earliest_ref_key_over_all_sources AS ( - {#- Deduplicate the unionized records again to only insert the earliest one. #} + {#- Deduplicate the unionized records to only insert the earliest one. #} SELECT lcte.* - FROM earliest_ref_key_over_all_sources_prep AS lcte - WHERE rn = 1 - {%- set ns.last_cte = 'earliest_ref_key_over_all_sources' -%}), + FROM {{ ns.last_cte }} AS lcte + + QUALIFY ROW_NUMBER() OVER (PARTITION BY {%- for ref_key in ref_keys %} {{ref_key}} {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }}) = 1 + + {%- set ns.last_cte = 'earliest_ref_key_over_all_sources' -%} + +), records_to_insert AS ( {#- Select everything from the previous CTE, if incremental filter for hashkeys that are not already in the hub. #} diff --git a/macros/tables/redshift/ref_table.sql b/macros/tables/redshift/ref_table.sql index 70672828..20f2cb8c 100644 --- a/macros/tables/redshift/ref_table.sql +++ b/macros/tables/redshift/ref_table.sql @@ -125,8 +125,7 @@ ref_table AS ( FROM {{ ref(ref_hub) }} h - FULL OUTER JOIN dates ld - ON 1 = 1 + CROSS JOIN dates ld {% for satellite in ref_satellites_dict.keys() %} @@ -146,4 +145,4 @@ ref_table AS ( SELECT * FROM ref_table -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/redshift/sat_v0.sql b/macros/tables/redshift/sat_v0.sql index 1d2c1ee2..e04d4f66 100644 --- a/macros/tables/redshift/sat_v0.sql +++ b/macros/tables/redshift/sat_v0.sql @@ -1,4 +1,4 @@ -{%- macro redshift__sat_v0(parent_hashkey, src_hashdiff, src_payload, src_ldts, src_rsrc, source_model) -%} +{%- macro redshift__sat_v0(parent_hashkey, src_hashdiff, src_payload, src_ldts, src_rsrc, source_model, disable_hwm, source_is_single_batch) -%} {%- set beginning_of_all_times = datavault4dbt.beginning_of_all_times() -%} {%- set end_of_all_times = datavault4dbt.end_of_all_times() -%} diff --git a/macros/tables/redshift/sat_v1.sql b/macros/tables/redshift/sat_v1.sql index cd86f527..b1ce49df 100644 --- a/macros/tables/redshift/sat_v1.sql +++ b/macros/tables/redshift/sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(sat_v0) -%} diff --git a/macros/tables/ref_sat_v1.sql b/macros/tables/ref_sat_v1.sql index 28cf3903..7b3ea907 100644 --- a/macros/tables/ref_sat_v1.sql +++ b/macros/tables/ref_sat_v1.sql @@ -28,7 +28,7 @@ add_is_current_flag: true {%- set src_ldts = datavault4dbt.replace_standard(src_ldts, 'datavault4dbt.ldts_alias', 'ldts') -%} {%- set src_rsrc = datavault4dbt.replace_standard(src_rsrc, 'datavault4dbt.rsrc_alias', 'rsrc') -%} - {%- set src_ledts = datavault4dbt.replace_standard(src_ledts, 'datavault4dbt.ledts_alias', 'ledts') -%} + {%- set ledts_alias = datavault4dbt.replace_standard(ledts_alias, 'datavault4dbt.ledts_alias', 'ledts') -%} {{ adapter.dispatch('ref_sat_v1', 'datavault4dbt')(ref_sat_v0=ref_sat_v0, ref_keys=ref_keys, diff --git a/macros/tables/snowflake/hub.sql b/macros/tables/snowflake/hub.sql index 2dc6ac33..aa39ec4f 100644 --- a/macros/tables/snowflake/hub.sql +++ b/macros/tables/snowflake/hub.sql @@ -147,7 +147,7 @@ WITH SELECT {{ hk_column }} AS {{ hashkey }}, {% for bk in source_model['bk_columns'] -%} - {{ bk }}, + {{ bk }} AS {{ business_keys[loop.index - 1] }}, {% endfor -%} {{ src_ldts }}, @@ -188,7 +188,7 @@ source_new_union AS ( {{ hashkey }}, {% for bk in source_model['bk_columns'] -%} - {{ bk }} AS {{ business_keys[loop.index - 1] }}, + {{ business_keys[loop.index - 1] }}, {% endfor -%} {{ src_ldts }}, diff --git a/macros/tables/snowflake/rec_track_sat.sql b/macros/tables/snowflake/rec_track_sat.sql index 18260bda..685434a0 100644 --- a/macros/tables/snowflake/rec_track_sat.sql +++ b/macros/tables/snowflake/rec_track_sat.sql @@ -9,8 +9,8 @@ {%- set rsrc_error = var('datavault4dbt.default_error_rsrc', 'ERROR') -%} {# Setting the rsrc and stg_alias default datatype and length #} -{%- set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') -%} -{%- set stg_default_dtype = var('datavault4dbt.stg_default_dtype', 'STRING') -%} +{%- set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') -%} +{%- set stg_default_dtype = datavault4dbt.string_default_dtype(type='stg') -%} {%- set ns = namespace(last_cte = '', source_included_before = {}, source_models_rsrc_dict={}, has_rsrc_static_defined=true) -%} {%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} @@ -81,7 +81,7 @@ WITH {%- set source_in_target = true -%} {%- if execute -%} - {%- set rsrc_static_result = run_query(rsrc_static_query_source) -%} + {%- set rsrc_static_result = run_query(rsrc_static_query_source_count) -%} {%- set row_count = rsrc_static_result.columns[0].values()[0] -%} diff --git a/macros/tables/snowflake/ref_hub.sql b/macros/tables/snowflake/ref_hub.sql index e4e729c2..6c7d9e4f 100644 --- a/macros/tables/snowflake/ref_hub.sql +++ b/macros/tables/snowflake/ref_hub.sql @@ -141,6 +141,10 @@ WITH {{ src_ldts }}, {{ src_rsrc }} FROM {{ ref(source_model.name) }} src + WHERE NOT ( + {% for ref_key in source_model['ref_keys'] -%} + {{ ref_key}} IS NULL {%- if not loop.last %} AND {% endif -%} + {% endfor -%} ) {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number] %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON @@ -149,7 +153,7 @@ WITH {%- if not loop.last -%} OR {% endif -%} {%- endfor %}) - WHERE src.{{ src_ldts }} > max.max_ldts + AND src.{{ src_ldts }} > max.max_ldts {%- endif %} {%- set ns.last_cte = "src_new_{}".format(source_number) %} diff --git a/macros/tables/snowflake/ref_sat_v0.sql b/macros/tables/snowflake/ref_sat_v0.sql index 8c9b0922..f08e7e4d 100644 --- a/macros/tables/snowflake/ref_sat_v0.sql +++ b/macros/tables/snowflake/ref_sat_v0.sql @@ -29,10 +29,10 @@ source_data AS ( SELECT {% for ref_key in parent_ref_keys %} - {{ref_key}}, + "{{ref_key}}", {% endfor %} {{ ns.src_hashdiff }} as {{ ns.hdiff_alias }}, - {{ datavault4dbt.print_list(source_cols) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_cols)) if source_cols else " *" }} FROM {{ source_relation }} {%- if is_incremental() and not disable_hwm %} @@ -50,12 +50,12 @@ latest_entries_in_sat AS ( SELECT {% for ref_key in parent_ref_keys %} - {{ref_key}}, + "{{ref_key}}", {% endfor %} {{ ns.hdiff_alias }} FROM {{ this }} - QUALIFY ROW_NUMBER() OVER(PARTITION BY {%- for ref_key in parent_ref_keys %} {{ref_key}} {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }} DESC) = 1 + QUALIFY ROW_NUMBER() OVER(PARTITION BY {%- for ref_key in parent_ref_keys %} "{{ref_key}}" {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }} DESC) = 1 ), {%- endif %} @@ -67,17 +67,17 @@ deduplicated_numbered_source AS ( SELECT {% for ref_key in parent_ref_keys %} - {{ref_key}}, + "{{ref_key}}", {% endfor %} {{ ns.hdiff_alias }}, - {{ datavault4dbt.print_list(source_cols) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_cols)) if source_cols else " *" }} {% if is_incremental() -%} - , ROW_NUMBER() OVER(PARTITION BY {%- for ref_key in parent_ref_keys %} {{ref_key}} {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }}) as rn + , ROW_NUMBER() OVER(PARTITION BY {%- for ref_key in parent_ref_keys %} "{{ref_key}}" {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }}) as rn {%- endif %} FROM source_data QUALIFY CASE - WHEN {{ ns.hdiff_alias }} = LAG({{ ns.hdiff_alias }}) OVER(PARTITION BY {%- for ref_key in parent_ref_keys %} {{ref_key}} {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }}) THEN FALSE + WHEN {{ ns.hdiff_alias }} = LAG({{ ns.hdiff_alias }}) OVER(PARTITION BY {%- for ref_key in parent_ref_keys %} "{{ref_key}}" {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }}) THEN FALSE ELSE TRUE END ), @@ -90,10 +90,10 @@ records_to_insert AS ( SELECT {% for ref_key in parent_ref_keys %} - {{ref_key}}, + "{{ref_key}}", {% endfor %} {{ ns.hdiff_alias }}, - {{ datavault4dbt.print_list(source_cols) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_cols)) if source_cols else " *" }} FROM deduplicated_numbered_source {%- if is_incremental() %} WHERE NOT EXISTS ( diff --git a/macros/tables/snowflake/ref_sat_v1.sql b/macros/tables/snowflake/ref_sat_v1.sql index c0cb3f40..1b305f42 100644 --- a/macros/tables/snowflake/ref_sat_v1.sql +++ b/macros/tables/snowflake/ref_sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(ref_sat_v0) -%} @@ -24,20 +23,20 @@ end_dated_source AS ( SELECT {% for ref_key in ref_keys %} - {{ref_key}}, + "{{ref_key}}", {% endfor %} {{ hashdiff }}, {{ src_rsrc }}, {{ src_ldts }}, - COALESCE(LEAD({{ src_ldts }} - INTERVAL '1 MICROSECOND') OVER (PARTITION BY {%- for ref_key in ref_keys %} {{ref_key}} {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }}),{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }}) as {{ ledts_alias }}, - {{ datavault4dbt.print_list(source_columns_to_select) }} + COALESCE(LEAD({{ src_ldts }} - INTERVAL '1 MICROSECOND') OVER (PARTITION BY {%- for ref_key in ref_keys %} "{{ref_key}}" {%- if not loop.last %}, {% endif %}{% endfor %} ORDER BY {{ src_ldts }}),{{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }}) as {{ ledts_alias }}, + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_columns_to_select)) if source_columns_to_select else " *" }} FROM {{ source_relation }} ) SELECT {% for ref_key in ref_keys %} - {{ref_key}}, + "{{ref_key}}", {% endfor %} {{ hashdiff }}, {{ src_rsrc }}, @@ -49,7 +48,7 @@ SELECT ELSE FALSE END AS {{ is_current_col_alias }}, {% endif -%} - {{ datavault4dbt.print_list(source_columns_to_select) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_columns_to_select)) if source_columns_to_select else " *" }} FROM end_dated_source {%- endmacro -%} diff --git a/macros/tables/snowflake/sat_v0.sql b/macros/tables/snowflake/sat_v0.sql index 6ed13abb..53008f91 100644 --- a/macros/tables/snowflake/sat_v0.sql +++ b/macros/tables/snowflake/sat_v0.sql @@ -28,7 +28,7 @@ source_data AS ( SELECT {{ parent_hashkey }}, {{ ns.src_hashdiff }} as {{ ns.hdiff_alias }}, - {{ datavault4dbt.print_list(source_cols) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_cols)) if source_cols else " *" }} FROM {{ source_relation }} {%- if is_incremental() %} @@ -62,7 +62,7 @@ deduplicated_numbered_source AS ( SELECT {{ parent_hashkey }}, {{ ns.hdiff_alias }}, - {{ datavault4dbt.print_list(source_cols) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_cols)) if source_cols else " *" }} {% if is_incremental() -%} , ROW_NUMBER() OVER(PARTITION BY {{ parent_hashkey }} ORDER BY {{ src_ldts }}) as rn {%- endif %} @@ -83,7 +83,7 @@ records_to_insert AS ( SELECT {{ parent_hashkey }}, {{ ns.hdiff_alias }}, - {{ datavault4dbt.print_list(source_cols) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_cols)) if source_cols else " *" }} FROM deduplicated_numbered_source {%- if is_incremental() %} WHERE NOT EXISTS ( diff --git a/macros/tables/snowflake/sat_v1.sql b/macros/tables/snowflake/sat_v1.sql index 0e936fd0..1077a64a 100644 --- a/macros/tables/snowflake/sat_v1.sql +++ b/macros/tables/snowflake/sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(sat_v0) -%} @@ -27,7 +26,7 @@ end_dated_source AS ( {{ src_ldts }}, COALESCE(LEAD({{ src_ldts }} - INTERVAL '1 MICROSECOND') OVER (PARTITION BY {{ hashkey }} ORDER BY {{ src_ldts }}), {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }}) AS {{ ledts_alias }} {%- if include_payload -%}, - {{ datavault4dbt.print_list(source_columns_to_select) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_columns_to_select)) if source_columns_to_select else " *" }} {%- endif %} FROM {{ source_relation }} @@ -46,7 +45,7 @@ SELECT END AS {{ is_current_col_alias }} {% endif -%} {%- if include_payload -%}, - {{ datavault4dbt.print_list(source_columns_to_select) }} + {{- "\n\n " ~ datavault4dbt.print_list(datavault4dbt.escape_column_names(source_columns_to_select)) if source_columns_to_select else " *" }} {%- endif %} FROM end_dated_source diff --git a/macros/tables/synapse/control_snap_v0.sql b/macros/tables/synapse/control_snap_v0.sql index e1e4557e..acc7acd1 100644 --- a/macros/tables/synapse/control_snap_v0.sql +++ b/macros/tables/synapse/control_snap_v0.sql @@ -1,41 +1,54 @@ {%- macro synapse__control_snap_v0(start_date, end_date, daily_snapshot_time, sdts_alias) -%} -{{ log('start_date: '~ start_date, true)}} -WITH initial_timestamps AS ( - SELECT - CAST(CAST('{{ start_date }}' AS VARCHAR) + ' ' + '{{ daily_snapshot_time }}' AS DATETIME) + CAST(rn - 1 AS INT) AS {{ sdts_alias }} - FROM - ( - SELECT - TOP (DATEDIFF(DAY, '{{ start_date }}', '{{ end_date }}') + 1) - ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS rn - FROM - sys.all_objects s1 - CROSS JOIN - sys.all_objects s2 - ) AS system_row + +{% if datavault4dbt.is_nothing(end_date) %} + {% set end_date = datavault4dbt.current_timestamp() %} +{% endif %} + +WITH + +{#- To generate a large amount of row for creation of the date-series #} +initial_timestamps_prep AS ( + SELECT 1 AS num UNION ALL + SELECT 2 UNION ALL + SELECT 3 UNION ALL + SELECT 4 UNION ALL + SELECT 5 UNION ALL + SELECT 6 UNION ALL + SELECT 7 UNION ALL + SELECT 8 UNION ALL + SELECT 9 UNION ALL + SELECT 10 UNION ALL + SELECT 11 UNION ALL + SELECT 12 + ), + +initial_timestamps AS ( + SELECT TOP (DATEDIFF(DAY, '{{ start_date }}', {{ end_date}}) + 1) + DATEADD(DAY, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1, '{{ start_date }}' ) AS {{ sdts_alias }} + FROM initial_timestamps_prep s1 + CROSS JOIN initial_timestamps_prep s2 + CROSS JOIN initial_timestamps_prep s3 + CROSS JOIN initial_timestamps_prep s4 + CROSS JOIN initial_timestamps_prep s5 ), enriched_timestamps AS ( SELECT - CONVERT(datetime2, {{ sdts_alias }}) as {{ sdts_alias }}, - CONVERT(varchar, {{ sdts_alias }}, 23) as {{ sdts_alias }}_date, + CONVERT(datetime2(6), {{ sdts_alias }}) as {{ sdts_alias }}, + CONVERT(varchar, {{ sdts_alias }}, 23) as {{ sdts_alias }}_date, 1 as force_active, - {{ sdts_alias }} AS replacement_{{ sdts_alias }}, - CONCAT('Snapshot ', CONVERT(date, {{ sdts_alias }}, 23)) AS caption, - DATEPART(YEAR, {{ sdts_alias }}) as year, - DATEPART(QUARTER, {{ sdts_alias }}) as quarter, - DATEPART(MONTH, {{ sdts_alias }}) as month, - DATEPART(DAY, {{ sdts_alias }}) as day_of_month, - DATEPART(DAYOFYEAR, {{ sdts_alias }}) as day_of_year, - DATEPART(WEEKDAY, {{ sdts_alias }}) as weekday, - DATEPART(WEEK, {{ sdts_alias }}) as week, - DATEPART(ISO_WEEK, {{ sdts_alias }}) as iso_week, - CASE WHEN DATEPART(weekday, {{ sdts_alias }}) = 7 THEN 1 ELSE 0 END AS is_end_of_week, - CASE WHEN LEAD(DATEPART(Day, {{ sdts_alias }}), 1) OVER (ORDER BY {{ sdts_alias }}) = 1 THEN 1 ELSE 0 END AS is_end_of_month, - CASE WHEN LEAD(DATEPART(QUARTER, {{ sdts_alias }}), 1) OVER (ORDER BY {{ sdts_alias }}) != DATEPART(QUARTER, {{ sdts_alias }}) THEN 1 ELSE 0 END as is_end_of_quarter, - CASE WHEN LEAD(DATEPART(Dayofyear, {{ sdts_alias }}), 1) OVER (ORDER BY {{ sdts_alias }}) = 1 THEN 1 ELSE 0 END AS is_end_of_year, - NULL AS comment + CONVERT(datetime2(6), {{ sdts_alias }}) AS replacement_{{ sdts_alias }}, + CONCAT('Snapshot ', CONVERT(date, {{ sdts_alias }}, 23)) AS caption, + CASE WHEN DATEPART(HOUR, {{ sdts_alias }}) = 0 AND DATEPART(MINUTE, {{ sdts_alias }}) = 0 AND DATEPART(SECOND, {{ sdts_alias }}) = 0 THEN 1 ELSE 0 END AS is_hourly, + CASE WHEN DATEPART(HOUR, {{ sdts_alias }}) = 0 AND DATEPART(MINUTE, {{ sdts_alias }}) = 0 AND DATEPART(SECOND, {{ sdts_alias }}) = 0 THEN 1 ELSE 0 END AS is_daily, + CASE WHEN DATEPART(WEEKDAY, {{ sdts_alias }}) = 1 THEN 1 ELSE 0 END AS is_weekly, -- assuming 1 is Monday + CASE WHEN DAY({{ sdts_alias }}) = 1 THEN 1 ELSE 0 END AS is_monthly, + CASE WHEN LEAD(DATEPART(DAY, {{ sdts_alias }}), 1) OVER (ORDER BY {{ sdts_alias }}) = 1 THEN 1 ELSE 0 END AS is_end_of_month, + CASE WHEN (MONTH({{ sdts_alias }}) IN (1, 4, 7, 10) AND DAY({{ sdts_alias }}) = 1) THEN 1 ELSE 0 END AS is_quarterly, + CASE WHEN MONTH({{ sdts_alias }}) = 1 AND DAY({{ sdts_alias }}) = 1 THEN 1 ELSE 0 END AS is_yearly, + CASE WHEN LEAD(DATEPART(DAYOFYEAR, {{ sdts_alias }}), 1) OVER (ORDER BY {{ sdts_alias }}) = 1 THEN 1 ELSE 0 END AS is_end_of_year, + NULL AS comment FROM initial_timestamps ) SELECT * FROM enriched_timestamps diff --git a/macros/tables/synapse/control_snap_v1.sql b/macros/tables/synapse/control_snap_v1.sql index 21483de5..c2d5259e 100644 --- a/macros/tables/synapse/control_snap_v1.sql +++ b/macros/tables/synapse/control_snap_v1.sql @@ -1,5 +1,4 @@ {%- macro synapse__control_snap_v1(control_snap_v0, log_logic, sdts_alias) -%} - {%- set snapshot_trigger_column = var('datavault4dbt.snapshot_trigger_column', 'is_active') -%} {%- set ns = namespace(forever_status_dict={}, log_logic_list=[], col_name='', log_logic={}) %} @@ -69,23 +68,29 @@ dynamic as (SELECT src.{{ sdts_alias }}, src.{{ sdts_alias }}_date, src.force_active, - CASE WHEN itp.{{ sdts_alias }} is not null THEN 1 ELSE 0 END AS is_in_the_past, - CASE WHEN itp.rn = 1 THEN 1 ELSE 0 END AS is_current, - CASE WHEN src.year = DATEPART(YEAR, GETDATE()) THEN 1 ELSE 0 END as is_current_year, - CASE WHEN src.year = DATEPART(YEAR, GETDATE())-1 THEN 1 ELSE 0 END as is_last_year, - CASE WHEN DATEDIFF(day, src.{{ sdts_alias }}, GETDATE()) between 0 and 365 THEN 1 ELSE 0 END as is_current_rolling_year, + {# CASE WHEN itp.{{ sdts_alias }} is not null THEN 1 ELSE 0 END AS is_in_the_past, #} + CASE WHEN itp.rn = 1 THEN 1 ELSE 0 END AS is_latest, + CASE WHEN DATEPART(YEAR, src.{{ sdts_alias }}) = DATEPART(YEAR, GETDATE()) THEN 1 ELSE 0 END as is_current_year, + CASE WHEN DATEPART(YEAR, src.{{ sdts_alias }}) = DATEPART(YEAR, GETDATE())-1 THEN 1 ELSE 0 END as is_last_year, + CASE WHEN DATEDIFF(day, src.{{ sdts_alias }}, GETDATE()) between 0 and 365 THEN 1 ELSE 0 END as is_rolling_year, CASE WHEN DATEDIFF(day, src.{{ sdts_alias }}, GETDATE()) between 366 and 730 THEN 1 ELSE 0 END as is_last_rolling_year, - src.year, - src.quarter, - src.month, - src.day_of_month, - src.day_of_year, - src.weekday, - src.week, - src.iso_week, - src.is_end_of_week, + {# src.year, #} + {# src.quarter, #} + {# src.month, #} + {# src.day_of_month, #} + {# src.day_of_year, #} + {# src.weekday, #} + {# src.week, #} + {# src.iso_week, #} + src.is_hourly, + src.is_daily, + src.is_weekly, + src.is_monthly, + {# src.is_end_of_week, #} src.is_end_of_month, - src.is_end_of_quarter, + src.is_quarterly, + {# src.is_end_of_quarter, #} + src.is_yearly, src.is_end_of_year @@ -130,11 +135,11 @@ log_logic AS ( {%- if 'weekly' in logic_definition.keys() %} OR {%- if logic_definition['weekly']['forever'] is true -%} {%- do ns.forever_status_dict.update({col_name: 'TRUE'}) -%} - (c.is_end_of_week = 1) + (DATEPART(weekday, c.{{ sdts_alias }}) = 7) {%- else %} {%- set weekly_duration = logic_definition['weekly']['duration'] -%} {%- set weekly_unit = logic_definition['weekly']['unit'] %} - ((c.{{ sdts_alias }} BETWEEN DATEADD({{ weekly_unit }}, -{{ weekly_duration }}, GETDATE()) AND GETDATE()) AND (c.is_end_of_week = 1)) + ((c.{{ sdts_alias }} BETWEEN DATEADD({{ weekly_unit }}, -{{ weekly_duration }}, GETDATE()) AND GETDATE()) AND (DATEPART(weekday, c.{{ sdts_alias }}) = 7)) {%- endif -%} {% endif -%} @@ -171,4 +176,4 @@ log_logic AS ( SELECT * FROM log_logic -{%- endmacro -%} +{%- endmacro -%} \ No newline at end of file diff --git a/macros/tables/synapse/hub.sql b/macros/tables/synapse/hub.sql index 20647481..c4d4170a 100644 --- a/macros/tables/synapse/hub.sql +++ b/macros/tables/synapse/hub.sql @@ -14,7 +14,7 @@ {# If no specific hk_column is defined for each source, we apply the values set in the hashkey variable. #} {# If no rsrc_static parameter is defined in ANY of the source models then the whole code block of record_source performance lookup is not executed #} {# For the use of record_source performance lookup it is required that every source model has the parameter rsrc_static defined and it cannot be an empty string #} -{%- if source_models is not mapping -%} +{%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} {%- set source_models = {source_models: {}} -%} {%- endif -%} diff --git a/macros/tables/synapse/link.sql b/macros/tables/synapse/link.sql index 6981c5d9..00498b05 100644 --- a/macros/tables/synapse/link.sql +++ b/macros/tables/synapse/link.sql @@ -16,7 +16,7 @@ {# If no specific link_hk and fk_columns are defined for each source, we apply the values set in the link_hashkey and foreign_hashkeys variable. #} {# If no rsrc_static parameter is defined in ANY of the source models then the whole code block of record_source performance lookup is not executed #} {# For the use of record_source performance lookup it is required that every source model has the parameter rsrc_static defined and it cannot be an empty string #} -{%- if source_models is not mapping -%} +{%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} {%- set source_models = {source_models: {}} -%} {%- endif -%} diff --git a/macros/tables/synapse/pit.sql b/macros/tables/synapse/pit.sql index 53f24111..3ddd7448 100644 --- a/macros/tables/synapse/pit.sql +++ b/macros/tables/synapse/pit.sql @@ -42,7 +42,7 @@ pit_records AS ( SELECT {% if datavault4dbt.is_something(pit_type) -%} - '{{ datavault4dbt.as_constant(pit_type) }}' as type, + {{ datavault4dbt.as_constant(pit_type) }} as type, {%- endif %} {% if datavault4dbt.is_something(custom_rsrc) -%} '{{ custom_rsrc }}' as {{ rsrc }}, diff --git a/macros/tables/synapse/rec_track_sat.sql b/macros/tables/synapse/rec_track_sat.sql index 9415491a..62165dd7 100644 --- a/macros/tables/synapse/rec_track_sat.sql +++ b/macros/tables/synapse/rec_track_sat.sql @@ -9,8 +9,8 @@ {%- set rsrc_error = var('datavault4dbt.default_error_rsrc', 'ERROR') -%} {# Setting the rsrc and stg_alias default datatype and length #} -{%- set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') -%} -{%- set stg_default_dtype = var('datavault4dbt.stg_default_dtype', 'STRING') -%} +{%- set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') -%} +{%- set stg_default_dtype = datavault4dbt.string_default_dtype(type='stg') -%} {%- set ns = namespace(last_cte = '', source_included_before = {}, source_models_rsrc_dict={}, has_rsrc_static_defined=true) -%} {%- if source_models is not mapping and not datavault4dbt.is_list(source_models) -%} diff --git a/macros/tables/synapse/ref_hub.sql b/macros/tables/synapse/ref_hub.sql index 229f433c..0df9bb6f 100644 --- a/macros/tables/synapse/ref_hub.sql +++ b/macros/tables/synapse/ref_hub.sql @@ -144,6 +144,10 @@ WITH {{ src_ldts }}, {{ src_rsrc }} FROM {{ ref(source_model.name) }} src + WHERE NOT ( + {% for ref_key in source_model['ref_keys'] -%} + {{ ref_key}} IS NULL {%- if not loop.last %} AND {% endif -%} + {% endfor -%} ) {%- if is_incremental() and ns.has_rsrc_static_defined and ns.source_included_before[source_number|int] and not disable_hwm %} INNER JOIN max_ldts_per_rsrc_static_in_target max ON @@ -152,9 +156,9 @@ WITH {%- if not loop.last -%} OR {% endif -%} {%- endfor %}) - WHERE src.{{ src_ldts }} > max.max_ldts + AND src.{{ src_ldts }} > max.max_ldts {%- elif is_incremental() and source_models | length == 1 and not ns.has_rsrc_static_defined and not disable_hwm %} - WHERE src.{{ src_ldts }} > ( + AND src.{{ src_ldts }} > ( SELECT COALESCE(MAX({{ src_ldts }}), {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }}) FROM {{ this }} WHERE {{ src_ldts }} != {{ datavault4dbt.string_to_timestamp(timestamp_format, end_of_all_times) }} diff --git a/macros/tables/synapse/ref_sat_v1.sql b/macros/tables/synapse/ref_sat_v1.sql index 5c2608cb..daf2abac 100644 --- a/macros/tables/synapse/ref_sat_v1.sql +++ b/macros/tables/synapse/ref_sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(ref_sat_v0) -%} diff --git a/macros/tables/synapse/ref_table.sql b/macros/tables/synapse/ref_table.sql index 05153668..111fe820 100644 --- a/macros/tables/synapse/ref_table.sql +++ b/macros/tables/synapse/ref_table.sql @@ -71,16 +71,11 @@ dates AS ( {% elif snapshot_relation is not none %} {%- set date_column = sdts_alias -%} - + SELECT - {{ date_column }} - FROM ( - - SELECT - {{ sdts_alias }} - FROM {{ ref(snapshot_relation) }} - WHERE {{ snapshot_trigger_column }} = 1 - ) + {{ sdts_alias }} + FROM {{ ref(snapshot_relation) }} + WHERE {{ snapshot_trigger_column }} = 1 {%- endif %} @@ -92,7 +87,6 @@ dates AS ( ) {%- endif -%} - ), ref_table AS ( diff --git a/macros/tables/synapse/sat_v1.sql b/macros/tables/synapse/sat_v1.sql index f8bfc599..b127f628 100644 --- a/macros/tables/synapse/sat_v1.sql +++ b/macros/tables/synapse/sat_v1.sql @@ -4,7 +4,6 @@ {%- set timestamp_format = datavault4dbt.timestamp_format() -%} {%- set is_current_col_alias = var('datavault4dbt.is_current_col_alias', 'IS_CURRENT') -%} -{%- set ledts_alias = var('datavault4dbt.ledts_alias', 'ledts') -%} {%- set source_relation = ref(sat_v0) -%}