diff --git a/CHANGELOG.md b/CHANGELOG.md index 58de5f1..368d5fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,33 @@ ---> # Unreleased + +## 🚨 Breaking change + +`include_data_types` parameter added to `generate_model_yaml` and behavior changed for `generate_source`. Both default to `true` +and are lowercase to align with the dbt style guide. Scale & precision are **not** included. Previous logic for `generate_source` defaulted to `false` and the resulting data types were uppercase and included scale & precision ([#122](https://github.com/dbt-labs/dbt-codegen/pull/122)). + +[Dispatch](https://docs.getdbt.com/reference/dbt-jinja-functions/dispatch) can be used to utilize the column data type formatting of previous versions. Namely, by adding this macro to your project: +```sql +{% macro default__data_type_format_source(column) %} + {{ return(column.data_type | upper) }} +{% endmacro %} +``` + +And then adding this within `dbt_project.yml`: +```yaml +dispatch: + - macro_namespace: codegen + search_order: ['my_project', 'codegen'] +``` + + ## New features - Addition of the [create_base_models](macros/create_base_models.sql) This macro generates a series of terminal commands (appended w) bash script which creates a new file in your dbt project based off the results of the [generate_base_model](macros/generate_base_model.sql) macro. Therefore, instead of outputting in the terminal, it will create the file for you. - Add `include_data_types` flag to `generate_source` macro ([#76](https://github.com/dbt-labs/dbt-codegen/pull/76)) - Add `get_models` macro in helper macros. This macro retrieves a list of models with specified prefix at the specified directory. It is designed to make creating yamls for multiple models easier. +- Add `include_data_types` flag to `generate_model_yaml` macro ([#122](https://github.com/dbt-labs/dbt-codegen/pull/122)) - Add optional arguments to include database and schema properties in `sources.yml` generated from `generate_source` ([#123](https://github.com/dbt-labs/dbt-codegen/issues/123)) ## Fixes @@ -28,6 +50,7 @@ This macro generates a series of terminal commands (appended w) bash script whic ## Contributors: - [@fivetran-joemarkiewicz](https://github.com/fivetran-joemarkiewicz) (#83) - [@GSokol](https://github.com/GSokol) (#76) +- [@linbug](https://github.com/linbug) (#120) # dbt-codegen v0.9.0 diff --git a/README.md b/README.md index 50056a8..653d1ca 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ source data is in. column names to your source definition. * `include_descriptions` (optional, default=False): Whether you want to add description placeholders to your source definition. -* `include_data_types` (optional, default=False): Whether you want to add data +* `include_data_types` (optional, default=True): Whether you want to add data types to your source columns definitions. * `table_pattern` (optional, default='%'): A table prefix / postfix that you want to subselect from all available tables within a given schema. @@ -87,10 +87,16 @@ or $ dbt run-operation generate_source --args '{"schema_name": "jaffle_shop", "database_name": "raw", "table_names":["table_1", "table_2"]}' ``` -Including data types: +or if you want to include column names and data types: ``` -$ dbt run-operation generate_source --args '{"schema_name": "jaffle_shop", "generate_columns": "true", "include_data_types": "true"}' +$ dbt run-operation generate_source --args '{"schema_name": "jaffle_shop", "generate_columns": true}' +``` + +or if you want to include column names without data types (the behavior dbt-codegen <= v0.9.0): + +``` +$ dbt run-operation generate_source --args '{"schema_name": "jaffle_shop", "generate_columns": true, "include_data_types": false}' ``` 2. The YAML for the source will be logged to the command line @@ -208,6 +214,7 @@ schema.yml file. ### Arguments: * `model_names` (required): The model(s) you wish to generate YAML for. * `upstream_descriptions` (optional, default=False): Whether you want to include descriptions for identical column names from upstream models. +* `include_data_types` (optional, default=True): Whether you want to add data types to your model column definitions. ### Usage: 1. Create a model. @@ -241,10 +248,13 @@ version: 2 models: - name: customers + description: "" columns: - name: customer_id + data_type: integer description: "" - name: customer_name + data_type: text description: "" ``` @@ -255,7 +265,7 @@ This macro generates the SQL for a given model with all references pulled up int ### Arguments: * `model_name` (required): The model you wish to generate SQL with import CTEs for. -* `leading_commas` (optional, default = false): Whether you want your commas to be leading (vs trailing). +* `leading_commas` (optional, default=False): Whether you want your commas to be leading (vs trailing). ### Usage: 1. Create a model with your original SQL query diff --git a/integration_tests/macros/integer_type_value.sql b/integration_tests/macros/integer_type_value.sql index f447110..207a43e 100644 --- a/integration_tests/macros/integer_type_value.sql +++ b/integration_tests/macros/integer_type_value.sql @@ -1,9 +1,9 @@ {%- macro integer_type_value() -%} {%- if target.type == "snowflake" -%} -NUMBER(38,0) +number {%- elif target.type == "bigquery" -%} -INT64 +int64 {%- else -%} -INTEGER +integer {%- endif -%} {%- endmacro -%} diff --git a/integration_tests/macros/text_type_value.sql b/integration_tests/macros/text_type_value.sql index 929813f..514d4a8 100644 --- a/integration_tests/macros/text_type_value.sql +++ b/integration_tests/macros/text_type_value.sql @@ -1,11 +1,11 @@ -{%- macro text_type_value(text_length) -%} -{%- if target.type == "redshift" -%} -CHARACTER VARYING({{ text_length }}) +{%- macro text_type_value() -%} +{%- if target.type == "redshift"-%} +character varying {%- elif target.type == "snowflake" -%} -CHARACTER VARYING(16777216) +varchar {%- elif target.type == "bigquery" -%} -STRING +string {%- else -%} -TEXT +text {%- endif -%} {%- endmacro -%} diff --git a/integration_tests/tests/test_generate_model_struct_yaml.sql b/integration_tests/tests/test_generate_model_struct_yaml.sql index fc33bf4..148323b 100644 --- a/integration_tests/tests/test_generate_model_struct_yaml.sql +++ b/integration_tests/tests/test_generate_model_struct_yaml.sql @@ -10,7 +10,8 @@ ) %} {% set actual_source_yaml = codegen.generate_model_yaml( - model_names=['model_struct'] + model_names=['model_struct'], + include_data_types=False ) %} diff --git a/integration_tests/tests/test_generate_model_yaml.sql b/integration_tests/tests/test_generate_model_yaml.sql index dbb9747..18e7a6a 100644 --- a/integration_tests/tests/test_generate_model_yaml.sql +++ b/integration_tests/tests/test_generate_model_yaml.sql @@ -11,9 +11,11 @@ models: description: "" columns: - name: col_a + data_type: {{ integer_type_value() }} description: "" - name: col_b + data_type: {{ text_type_value() }} description: "" {% endset %} diff --git a/integration_tests/tests/test_generate_model_yaml_multiple_models.sql b/integration_tests/tests/test_generate_model_yaml_multiple_models.sql index 843959f..c3dc203 100644 --- a/integration_tests/tests/test_generate_model_yaml_multiple_models.sql +++ b/integration_tests/tests/test_generate_model_yaml_multiple_models.sql @@ -1,5 +1,6 @@ {% set actual_model_yaml = codegen.generate_model_yaml( - model_names=['data__a_relation','data__b_relation'] + model_names=['data__a_relation','data__b_relation'], + include_data_types=False ) %} diff --git a/integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql b/integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql index 71ecd89..73e6687 100644 --- a/integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql +++ b/integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql @@ -1,6 +1,7 @@ {% set actual_model_yaml = codegen.generate_model_yaml( model_names=['child_model'], - upstream_descriptions=True + upstream_descriptions=True, + include_data_types=False ) %} diff --git a/integration_tests/tests/test_generate_source_all_args.sql b/integration_tests/tests/test_generate_source_all_args.sql index f287aef..cf6703a 100644 --- a/integration_tests/tests/test_generate_source_all_args.sql +++ b/integration_tests/tests/test_generate_source_all_args.sql @@ -33,7 +33,7 @@ sources: data_type: {{ integer_type_value() }} description: "" - name: col_b - data_type: {{ text_type_value(1) }} + data_type: {{ text_type_value() }} description: "" - name: data__b_relation @@ -43,26 +43,26 @@ sources: data_type: {{ integer_type_value() }} description: "" - name: col_b - data_type: {{ text_type_value(1) }} + data_type: {{ text_type_value() }} description: "" - name: data__campaign_analytics description: "" columns: - name: source - data_type: {{ text_type_value(8) }} + data_type: {{ text_type_value() }} description: "" - name: medium - data_type: {{ text_type_value(8) }} + data_type: {{ text_type_value() }} description: "" - name: source_medium - data_type: {{ text_type_value(2) }} + data_type: {{ text_type_value() }} description: "" - name: analytics data_type: {{ integer_type_value() }} description: "" - name: col_x - data_type: {{ text_type_value(1) }} + data_type: {{ text_type_value() }} description: "" {% endset %} diff --git a/integration_tests/tests/test_generate_source_some_tables.sql b/integration_tests/tests/test_generate_source_some_tables.sql index 525f712..1d7b43f 100644 --- a/integration_tests/tests/test_generate_source_some_tables.sql +++ b/integration_tests/tests/test_generate_source_some_tables.sql @@ -6,7 +6,8 @@ database_name=target.database, table_names=['data__a_relation'], generate_columns=True, - include_descriptions=True + include_descriptions=True, + include_data_types=False ) %} diff --git a/macros/generate_model_yaml.sql b/macros/generate_model_yaml.sql index cdeab71..f226536 100644 --- a/macros/generate_model_yaml.sql +++ b/macros/generate_model_yaml.sql @@ -1,4 +1,4 @@ -{% macro generate_column_yaml(column, model_yaml, column_desc_dict, parent_column_name="") %} +{% macro generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types, parent_column_name="") %} {% if parent_column_name %} {% set column_name = parent_column_name ~ "." ~ column.name %} {% else %} @@ -6,18 +6,21 @@ {% endif %} {% do model_yaml.append(' - name: ' ~ column_name | lower ) %} + {% if include_data_types %} + {% do model_yaml.append(' data_type: ' ~ codegen.data_type_format_model(column)) %} + {% endif %} {% do model_yaml.append(' description: "' ~ column_desc_dict.get(column.name | lower,'') ~ '"') %} {% do model_yaml.append('') %} {% if column.fields|length > 0 %} {% for child_column in column.fields %} - {% set model_yaml = codegen.generate_column_yaml(child_column, model_yaml, column_desc_dict, parent_column_name=column_name) %} + {% set model_yaml = codegen.generate_column_yaml(child_column, model_yaml, column_desc_dict, include_data_types, parent_column_name=column_name) %} {% endfor %} {% endif %} {% do return(model_yaml) %} {% endmacro %} -{% macro generate_model_yaml(model_names=[], upstream_descriptions=False) %} +{% macro generate_model_yaml(model_names=[], upstream_descriptions=False, include_data_types=True) %} {% set model_yaml=[] %} @@ -38,7 +41,7 @@ {% set column_desc_dict = codegen.build_dict_column_descriptions(model) if upstream_descriptions else {} %} {% for column in columns %} - {% set model_yaml = codegen.generate_column_yaml(column, model_yaml, column_desc_dict) %} + {% set model_yaml = codegen.generate_column_yaml(column, model_yaml, column_desc_dict, include_data_types) %} {% endfor %} {% endfor %} {% endif %} diff --git a/macros/generate_source.sql b/macros/generate_source.sql index 5097655..31fd3e7 100644 --- a/macros/generate_source.sql +++ b/macros/generate_source.sql @@ -15,7 +15,7 @@ --- -{% macro generate_source(schema_name, database_name=target.database, generate_columns=False, include_descriptions=False, include_data_types=False, table_pattern='%', exclude='', name=schema_name, table_names=None, include_database=False, include_schema=False) %} +{% macro generate_source(schema_name, database_name=target.database, generate_columns=False, include_descriptions=False, include_data_types=True, table_pattern='%', exclude='', name=schema_name, table_names=None, include_database=False, include_schema=False) %} {% set sources_yaml=[] %} {% do sources_yaml.append('version: 2') %} @@ -62,7 +62,7 @@ {% for column in columns %} {% do sources_yaml.append(' - name: ' ~ column.name | lower ) %} {% if include_data_types %} - {% do sources_yaml.append(' data_type: ' ~ (column.data_type | upper ) ) %} + {% do sources_yaml.append(' data_type: ' ~ codegen.data_type_format_source(column)) %} {% endif %} {% if include_descriptions %} {% do sources_yaml.append(' description: ""' ) %} diff --git a/macros/helpers/helpers.sql b/macros/helpers/helpers.sql index fd28c87..557858e 100644 --- a/macros/helpers/helpers.sql +++ b/macros/helpers/helpers.sql @@ -57,4 +57,24 @@ {% endfor %} {% endif %} {{ return(model_names) }} -{% endmacro %} \ No newline at end of file +{% endmacro %} + +{% macro data_type_format_source(column) -%} + {{ return(adapter.dispatch('data_type_format_source', 'codegen')(column)) }} +{%- endmacro %} + +{# format a column data type for a source #} +{% macro default__data_type_format_source(column) %} + {% set formatted = codegen.format_column(column) %} + {{ return(formatted['data_type'] | lower) }} +{% endmacro %} + +{% macro data_type_format_model(column) -%} + {{ return(adapter.dispatch('data_type_format_model', 'codegen')(column)) }} +{%- endmacro %} + +{# format a column data type for a model #} +{% macro default__data_type_format_model(column) %} + {% set formatted = codegen.format_column(column) %} + {{ return(formatted['data_type'] | lower) }} +{% endmacro %} diff --git a/macros/vendored/dbt_core/format_column.sql b/macros/vendored/dbt_core/format_column.sql new file mode 100644 index 0000000..a7a6669 --- /dev/null +++ b/macros/vendored/dbt_core/format_column.sql @@ -0,0 +1,5 @@ +{% macro format_column(column) -%} + {% set data_type = column.dtype %} + {% set formatted = column.column.lower() ~ " " ~ data_type %} + {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} +{%- endmacro -%}