Skip to content

Commit

Permalink
Bigquery repeated data type (#236)
Browse files Browse the repository at this point in the history
* Handle BigQuery repeated fields data_types

* include nested repated structs

* override repeated struct data_type with array

* Add trailing newline

* update changelog

* Update CHANGELOG.md

---------

Co-authored-by: Doug Beatty <[email protected]>
  • Loading branch information
Thrasi and dbeatty10 authored Dec 13, 2024
1 parent 2f20e94 commit 5a31c62
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 1 deletion.
13 changes: 13 additions & 0 deletions integration_tests/models/model_repeated.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% if target.type == "bigquery" %}

{#--- This exists to test the BigQuery-specific behavior requested in #190 -#}
select
[1, 2] AS repeated_int,
[
STRUCT(1 as nested_int_field, [STRUCT("a" as string_field)] as nested_repeated_struct),
STRUCT(2 AS nested_int_field, [STRUCT("a" as string_field)] as nested_repeated_struct)
] as repeated_struct

{% else %}
select 1 as int_field
{% endif %}
56 changes: 56 additions & 0 deletions integration_tests/tests/test_generate_model_repeated_yaml.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{% set raw_schema = generate_schema_name('raw_data') %}

{% set actual_source_yaml = codegen.generate_model_yaml(
model_names=['model_repeated']
)
%}

{% if target.type == "bigquery" %}

{% set expected_source_yaml %}
version: 2

models:
- name: model_repeated
description: ""
columns:
- name: repeated_int
data_type: array<int64>
description: ""

- name: repeated_struct
data_type: array
description: ""

- name: repeated_struct.nested_int_field
data_type: int64
description: ""

- name: repeated_struct.nested_repeated_struct
data_type: array
description: ""

- name: repeated_struct.nested_repeated_struct.string_field
data_type: string
description: ""

{% endset %}

{% else %}

{% set expected_source_yaml %}
version: 2

models:
- name: model_repeated
description: ""
columns:
- name: int_field
data_type: {{ integer_type_value() }}
description: ""

{% endset %}

{% endif %}

{{ assert_equal (actual_source_yaml | trim, expected_source_yaml | trim) }}
2 changes: 1 addition & 1 deletion integration_tests/tests/test_helper_get_models.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
{% set actual_list = codegen.get_models(prefix='model_')|sort %}
{% endif %}

{% set expected_list = ['model_data_a', 'model_from_source', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %}
{% set expected_list = ['model_data_a', 'model_from_source', 'model_repeated', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %}

{{ assert_equal (actual_list, expected_list) }}
16 changes: 16 additions & 0 deletions macros/vendored/dbt_core/format_column.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
{% macro format_column(column) -%}
{{ return(adapter.dispatch('format_column', 'codegen')(column)) }}
{%- endmacro %}

{# Vendored from: https://github.com/dbt-labs/dbt-adapters/blob/c7b12aee533184bad391a657d1753539d1dd496a/dbt/include/global_project/macros/relations/column/columns_spec_ddl.sql#L85-L89 #}
{% macro default__format_column(column) -%}
{% set data_type = column.dtype %}
{% set formatted = column.column.lower() ~ " " ~ data_type %}
{{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }}
{%- endmacro -%}

{# Vendored from: https://github.com/dbt-labs/dbt-bigquery/blob/4d255b2f854d21d5d8871bdaa8d7ab47e7e863a3/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql#L1-L5 #}
{# But modified to handle https://github.com/dbt-labs/dbt-codegen/issues/190 #}
{% macro bigquery__format_column(column) -%}
{% set data_type = column.data_type %}
{% if column.mode.lower() == "repeated" and column.dtype.lower() == "record" %}
{% set data_type = "array" %}
{% endif %}
{% set formatted = column.column.lower() ~ " " ~ data_type %}
{{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }}
{%- endmacro -%}

0 comments on commit 5a31c62

Please sign in to comment.