From 7c60f8031fd23ca000179e8fab66f5697f75046e Mon Sep 17 00:00:00 2001 From: Fiona Zhao Date: Thu, 10 Apr 2025 11:14:14 -0400 Subject: [PATCH 1/5] Support preservation of database data type case in the generate-model-yml --- integration_tests/macros/bool_type_value.sql | 13 ++++++++++ .../macros/integer_type_value.sql | 10 ++++--- .../macros/operations/create_source_table.sql | 4 +-- integration_tests/macros/text_type_value.sql | 14 +++++----- ...l_yaml_case_sensitive_data_types_false.sql | 25 ++++++++++++++++++ ...el_yaml_case_sensitive_data_types_true.sql | 26 +++++++++++++++++++ macros/generate_model_yaml.sql | 20 +++++++------- macros/helpers/helpers.sql | 8 +++--- 8 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 integration_tests/macros/bool_type_value.sql create mode 100644 integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_false.sql create mode 100644 integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_true.sql diff --git a/integration_tests/macros/bool_type_value.sql b/integration_tests/macros/bool_type_value.sql new file mode 100644 index 0000000..0ae1a6d --- /dev/null +++ b/integration_tests/macros/bool_type_value.sql @@ -0,0 +1,13 @@ +{%- macro bool_type_value(case_sensitive=False) -%} +{%- if target.type == "redshift" -%} + {%- if case_sensitive -%}BOOLEAN{%- else -%}boolean{%- endif -%} +{%- elif target.type == "snowflake" -%} + {%- if case_sensitive -%}BOOLEAN{%- else -%}boolean{%- endif -%} +{%- elif target.type == "bigquery" -%} + {%- if case_sensitive -%}BOOL{%- else -%}bool{%- endif -%} +{%- elif target.type == "postgres" -%} + boolean +{%- else -%} + {%- if case_sensitive -%}BOOLEAN{%- else -%}boolean{%- endif -%} +{%- endif -%} +{%- endmacro -%} \ No newline at end of file diff --git a/integration_tests/macros/integer_type_value.sql b/integration_tests/macros/integer_type_value.sql index 207a43e..973d690 100644 --- a/integration_tests/macros/integer_type_value.sql +++ b/integration_tests/macros/integer_type_value.sql @@ -1,9 +1,11 @@ -{%- macro integer_type_value() -%} +{%- macro integer_type_value(case_sensitive=False) -%} {%- if target.type == "snowflake" -%} -number + {%- if case_sensitive -%}NUMBER{%- else -%}number{%- endif -%} {%- elif target.type == "bigquery" -%} -int64 + {%- if case_sensitive -%}INT64{%- else -%}int64{%- endif -%} +{%- elif target.type == "postgres" -%} + integer {%- else -%} -integer + {%- if case_sensitive -%}INTEGER{%- else -%}integer{%- endif -%} {%- endif -%} {%- endmacro -%} diff --git a/integration_tests/macros/operations/create_source_table.sql b/integration_tests/macros/operations/create_source_table.sql index 9607d60..537164f 100644 --- a/integration_tests/macros/operations/create_source_table.sql +++ b/integration_tests/macros/operations/create_source_table.sql @@ -9,7 +9,7 @@ {% do adapter.create_schema(target_schema) %} {% set drop_table_sql %} -drop table if exists {{ target_schema }}.codegen_integration_tests__data_source_table {% if target.type == "redshift" %}cascade{% endif %} +drop table if exists {{ target_schema }}.codegen_integration_tests__data_source_table {% if target.type in ["redshift", "postgres"] %}cascade{% endif %} {% endset %} {{ run_query(drop_table_sql) }} @@ -26,7 +26,7 @@ create table {{ target_schema }}.codegen_integration_tests__data_source_table as {{ run_query(create_table_sql) }} {% set drop_table_sql_case_sensitive %} -drop table if exists {{ target_schema }}.codegen_integration_tests__data_source_table_case_sensitive {% if target.type == "redshift" %}cascade{% endif %} +drop table if exists {{ target_schema }}.codegen_integration_tests__data_source_table_case_sensitive {% if target.type in ["redshift", "postgres"] %}cascade{% endif %} {% endset %} {{ run_query(drop_table_sql_case_sensitive) }} diff --git a/integration_tests/macros/text_type_value.sql b/integration_tests/macros/text_type_value.sql index 514d4a8..9ebc713 100644 --- a/integration_tests/macros/text_type_value.sql +++ b/integration_tests/macros/text_type_value.sql @@ -1,11 +1,13 @@ -{%- macro text_type_value() -%} +{%- macro text_type_value(case_sensitive=False) -%} {%- if target.type == "redshift"-%} -character varying + {%- if case_sensitive -%}CHARACTER VARYING{%- else -%}character varying{%- endif -%} {%- elif target.type == "snowflake" -%} -varchar + {%- if case_sensitive -%}VARCHAR{%- else -%}varchar{%- endif -%} {%- elif target.type == "bigquery" -%} -string + {%- if case_sensitive -%}STRING{%- else -%}string{%- endif -%} +{%- elif target.type == "postgres" -%} + text {%- else -%} -text + {%- if case_sensitive -%}TEXT{%- else -%}text{%- endif -%} {%- endif -%} -{%- endmacro -%} +{%- endmacro -%} \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_false.sql b/integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_false.sql new file mode 100644 index 0000000..8d7ec3b --- /dev/null +++ b/integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_false.sql @@ -0,0 +1,25 @@ +{# Test with case_sensitive_data_types=False (default behavior) #} +{% set actual_model_yaml_lowercase = codegen.generate_model_yaml( + model_names=['model_from_source'], + case_sensitive_data_types=False + ) +%} + +{% set expected_model_yaml_lowercase %} +version: 2 + +models: + - name: model_from_source + description: "" + columns: + - name: my_integer_col + data_type: {{ integer_type_value() | lower }} + description: "" + + - name: my_bool_col + data_type: {{ bool_type_value() | lower }} + description: "" + +{% endset %} + +{{ assert_equal (actual_model_yaml_lowercase | trim, expected_model_yaml_lowercase | trim) }} \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_true.sql b/integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_true.sql new file mode 100644 index 0000000..5d034cd --- /dev/null +++ b/integration_tests/tests/test_generate_model_yaml_case_sensitive_data_types_true.sql @@ -0,0 +1,26 @@ +{# Test with case_sensitive_data_types=True #} +{% set actual_model_yaml_uppercase = codegen.generate_model_yaml( + model_names=['model_from_source'], + case_sensitive_data_types=True + ) +%} + +{% set expected_model_yaml_uppercase %} +version: 2 + +models: + - name: model_from_source + description: "" + columns: + - name: my_integer_col + data_type: {{ integer_type_value(case_sensitive=True) }} + description: "" + + - name: my_bool_col + data_type: {{ bool_type_value(case_sensitive=True) }} + description: "" + +{% endset %} + + +{{ assert_equal (actual_model_yaml_uppercase | trim, expected_model_yaml_uppercase | trim) }} \ No newline at end of file diff --git a/macros/generate_model_yaml.sql b/macros/generate_model_yaml.sql index d4fbbee..ba20a79 100644 --- a/macros/generate_model_yaml.sql +++ b/macros/generate_model_yaml.sql @@ -1,34 +1,34 @@ -{% macro generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types, parent_column_name="") %} - {{ return(adapter.dispatch('generate_column_yaml', 'codegen')(column, model_yaml, column_desc_dict, include_data_types, parent_column_name)) }} +{% macro generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types, parent_column_name="", case_sensitive_data_types=False) %} + {{ return(adapter.dispatch('generate_column_yaml', 'codegen')(column, model_yaml, column_desc_dict, include_data_types, parent_column_name, case_sensitive_data_types)) }} {% endmacro %} -{% macro default__generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types, parent_column_name) %} +{% macro default__generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types, parent_column_name, case_sensitive_data_types) %} {% if parent_column_name %} {% set column_name = parent_column_name ~ "." ~ column.name %} {% else %} {% set column_name = column.name %} {% endif %} - {% do model_yaml.append(' - name: ' ~ column_name | lower ) %} + {% do model_yaml.append(' - name: ' ~ column_name | lower ) %} {% if include_data_types %} - {% do model_yaml.append(' data_type: ' ~ codegen.data_type_format_model(column)) %} + {% do model_yaml.append(' data_type: ' ~ codegen.data_type_format_model(column, case_sensitive_data_types)) %} {% endif %} {% do model_yaml.append(' description: ' ~ (column_desc_dict.get(column.name | lower,'') | tojson)) %} {% do model_yaml.append('') %} {% if column.fields|length > 0 %} {% for child_column in column.fields %} - {% set model_yaml = codegen.generate_column_yaml(child_column, model_yaml, column_desc_dict, include_data_types, parent_column_name=column_name) %} + {% set model_yaml = codegen.generate_column_yaml(child_column, model_yaml, column_desc_dict, include_data_types, parent_column_name=column_name, case_sensitive_data_types=case_sensitive_data_types) %} {% endfor %} {% endif %} {% do return(model_yaml) %} {% endmacro %} -{% macro generate_model_yaml(model_names=[], upstream_descriptions=False, include_data_types=True) -%} - {{ return(adapter.dispatch('generate_model_yaml', 'codegen')(model_names, upstream_descriptions, include_data_types)) }} +{% macro generate_model_yaml(model_names=[], upstream_descriptions=False, include_data_types=True, case_sensitive_data_types=False) -%} + {{ return(adapter.dispatch('generate_model_yaml', 'codegen')(model_names, upstream_descriptions, include_data_types, case_sensitive_data_types)) }} {%- endmacro %} -{% macro default__generate_model_yaml(model_names, upstream_descriptions, include_data_types) %} +{% macro default__generate_model_yaml(model_names, upstream_descriptions, include_data_types, case_sensitive_data_types) %} {% set model_yaml=[] %} @@ -49,7 +49,7 @@ {% set column_desc_dict = codegen.build_dict_column_descriptions(model) if upstream_descriptions else {} %} {% for column in columns %} - {% set model_yaml = codegen.generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types) %} + {% set model_yaml = codegen.generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types, case_sensitive_data_types=case_sensitive_data_types) %} {% endfor %} {% endfor %} {% endif %} diff --git a/macros/helpers/helpers.sql b/macros/helpers/helpers.sql index d7fa19e..e848d05 100644 --- a/macros/helpers/helpers.sql +++ b/macros/helpers/helpers.sql @@ -79,14 +79,14 @@ {{ return(formatted['data_type'] | lower) }} {% endmacro %} -{% macro data_type_format_model(column) -%} - {{ return(adapter.dispatch('data_type_format_model', 'codegen')(column)) }} +{% macro data_type_format_model(column, case_sensitive_data_types=False) -%} + {{ return(adapter.dispatch('data_type_format_model', 'codegen')(column, case_sensitive_data_types)) }} {%- endmacro %} {# format a column data type for a model #} -{% macro default__data_type_format_model(column) %} +{% macro default__data_type_format_model(column, case_sensitive_data_types) %} {% set formatted = codegen.format_column(column) %} - {{ return(formatted['data_type'] | lower) }} + {{ return(formatted['data_type'] if case_sensitive_data_types else formatted['data_type'] | lower) }} {% endmacro %} {# retrieve entire resource dictionary based on unique id #} From db08608967fe3ec0b5bb6213b33173980129d720 Mon Sep 17 00:00:00 2001 From: Fiona Zhao Date: Thu, 10 Apr 2025 12:00:35 -0400 Subject: [PATCH 2/5] Update the README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 438d248..7c74b6d 100644 --- a/README.md +++ b/README.md @@ -257,6 +257,7 @@ schema.yml file. - `model_names` (required): The model(s) you wish to generate YAML for. - `upstream_descriptions` (optional, default=False): Whether you want to include descriptions for identical column names from upstream models and sources. - `include_data_types` (optional, default=True): Whether you want to add data types to your model column definitions. +- `case_sensitive_data_types` (optional, default=False): Whether you want to have the data types with the same case as the target database. ### Usage: From 2b546dda88bc600149d4cd95f94e7c87a019a2db Mon Sep 17 00:00:00 2001 From: Fiona Zhao Date: Thu, 10 Apr 2025 11:14:14 -0400 Subject: [PATCH 3/5] Fix expected data type in integration_test --- docker-compose.yml | 3 +++ integration_tests/macros/bool_type_value.sql | 4 ++-- integration_tests/macros/integer_type_value.sql | 4 +++- integration_tests/macros/text_type_value.sql | 2 +- run_test.sh | 12 ++++++++++++ 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 88f7559..11ddf82 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,5 +4,8 @@ services: image: cimg/postgres:17.0 environment: - POSTGRES_USER=root + - POSTGRES_DATABASE=codegen_test + - POSTGRES_PASSWORD=password + - POSTGRES_SCHEMA=codegen_integration_tests_postgres ports: - "5432:5432" diff --git a/integration_tests/macros/bool_type_value.sql b/integration_tests/macros/bool_type_value.sql index 0ae1a6d..a1f9a2a 100644 --- a/integration_tests/macros/bool_type_value.sql +++ b/integration_tests/macros/bool_type_value.sql @@ -1,10 +1,10 @@ {%- macro bool_type_value(case_sensitive=False) -%} {%- if target.type == "redshift" -%} - {%- if case_sensitive -%}BOOLEAN{%- else -%}boolean{%- endif -%} + boolean {%- elif target.type == "snowflake" -%} {%- if case_sensitive -%}BOOLEAN{%- else -%}boolean{%- endif -%} {%- elif target.type == "bigquery" -%} - {%- if case_sensitive -%}BOOL{%- else -%}bool{%- endif -%} + {%- if case_sensitive -%}BOOLEAN{%- else -%}boolean{%- endif -%} {%- elif target.type == "postgres" -%} boolean {%- else -%} diff --git a/integration_tests/macros/integer_type_value.sql b/integration_tests/macros/integer_type_value.sql index 973d690..5b1ef8f 100644 --- a/integration_tests/macros/integer_type_value.sql +++ b/integration_tests/macros/integer_type_value.sql @@ -1,5 +1,7 @@ {%- macro integer_type_value(case_sensitive=False) -%} -{%- if target.type == "snowflake" -%} +{%- if target.type == "redshift" -%} + integer +{%- elif target.type == "snowflake" -%} {%- if case_sensitive -%}NUMBER{%- else -%}number{%- endif -%} {%- elif target.type == "bigquery" -%} {%- if case_sensitive -%}INT64{%- else -%}int64{%- endif -%} diff --git a/integration_tests/macros/text_type_value.sql b/integration_tests/macros/text_type_value.sql index 9ebc713..4c70ffa 100644 --- a/integration_tests/macros/text_type_value.sql +++ b/integration_tests/macros/text_type_value.sql @@ -1,6 +1,6 @@ {%- macro text_type_value(case_sensitive=False) -%} {%- if target.type == "redshift"-%} - {%- if case_sensitive -%}CHARACTER VARYING{%- else -%}character varying{%- endif -%} + text {%- elif target.type == "snowflake" -%} {%- if case_sensitive -%}VARCHAR{%- else -%}varchar{%- endif -%} {%- elif target.type == "bigquery" -%} diff --git a/run_test.sh b/run_test.sh index e19bc3d..978dc87 100755 --- a/run_test.sh +++ b/run_test.sh @@ -4,6 +4,18 @@ echo `pwd` cd integration_tests cp ci/sample.profiles.yml profiles.yml +export POSTGRES_HOST=localhost +export POSTGRES_USER=root +export DBT_ENV_SECRET_POSTGRES_PASS=password +export POSTGRES_PORT=5432 +export POSTGRES_DATABASE=codegen_test +export POSTGRES_SCHEMA=codegen_integration_tests_postgres + +# Create database if it doesn't exist +PGPASSWORD=$DBT_ENV_SECRET_POSTGRES_PASS psql -h $POSTGRES_HOST -U $POSTGRES_USER -d postgres -tc "SELECT 1 FROM pg_database WHERE datname = '$POSTGRES_DATABASE'" | grep -q 1 || \ +PGPASSWORD=$DBT_ENV_SECRET_POSTGRES_PASS psql -h $POSTGRES_HOST -U $POSTGRES_USER -d postgres -c "CREATE DATABASE $POSTGRES_DATABASE" + +dbt --warn-error clean --target $1 || exit 1 dbt --warn-error deps --target $1 || exit 1 dbt --warn-error run-operation create_source_table --target $1 || exit 1 dbt --warn-error seed --target $1 --full-refresh || exit 1 From 4927212433a4f34bcbfbeb0a4a5c164e13a8d13f Mon Sep 17 00:00:00 2001 From: Fiona Zhao Date: Thu, 10 Apr 2025 21:25:40 -0400 Subject: [PATCH 4/5] Revert changes that are not needed --- docker-compose.yml | 3 --- run_test.sh | 11 ----------- 2 files changed, 14 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 11ddf82..88f7559 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,8 +4,5 @@ services: image: cimg/postgres:17.0 environment: - POSTGRES_USER=root - - POSTGRES_DATABASE=codegen_test - - POSTGRES_PASSWORD=password - - POSTGRES_SCHEMA=codegen_integration_tests_postgres ports: - "5432:5432" diff --git a/run_test.sh b/run_test.sh index 978dc87..9b9b5f2 100755 --- a/run_test.sh +++ b/run_test.sh @@ -4,17 +4,6 @@ echo `pwd` cd integration_tests cp ci/sample.profiles.yml profiles.yml -export POSTGRES_HOST=localhost -export POSTGRES_USER=root -export DBT_ENV_SECRET_POSTGRES_PASS=password -export POSTGRES_PORT=5432 -export POSTGRES_DATABASE=codegen_test -export POSTGRES_SCHEMA=codegen_integration_tests_postgres - -# Create database if it doesn't exist -PGPASSWORD=$DBT_ENV_SECRET_POSTGRES_PASS psql -h $POSTGRES_HOST -U $POSTGRES_USER -d postgres -tc "SELECT 1 FROM pg_database WHERE datname = '$POSTGRES_DATABASE'" | grep -q 1 || \ -PGPASSWORD=$DBT_ENV_SECRET_POSTGRES_PASS psql -h $POSTGRES_HOST -U $POSTGRES_USER -d postgres -c "CREATE DATABASE $POSTGRES_DATABASE" - dbt --warn-error clean --target $1 || exit 1 dbt --warn-error deps --target $1 || exit 1 dbt --warn-error run-operation create_source_table --target $1 || exit 1 From 174a7a192c0832e9ab89351762c5e38505ec6187 Mon Sep 17 00:00:00 2001 From: Fiona Zhao Date: Thu, 10 Apr 2025 21:33:38 -0400 Subject: [PATCH 5/5] Fix expected data type in integration_test --- integration_tests/macros/text_type_value.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/macros/text_type_value.sql b/integration_tests/macros/text_type_value.sql index 4c70ffa..e1176f7 100644 --- a/integration_tests/macros/text_type_value.sql +++ b/integration_tests/macros/text_type_value.sql @@ -1,6 +1,6 @@ {%- macro text_type_value(case_sensitive=False) -%} {%- if target.type == "redshift"-%} - text + character varying {%- elif target.type == "snowflake" -%} {%- if case_sensitive -%}VARCHAR{%- else -%}varchar{%- endif -%} {%- elif target.type == "bigquery" -%}