Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add file_format and table_format configurations #438

Merged
merged 2 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20241015-174143.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Add file_format and table_format configurations
time: 2024-10-15T17:41:43.584728+02:00
custom:
Author: damian3031
Issue: ""
PR: "438"
48 changes: 40 additions & 8 deletions dbt/include/trino/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
{% macro trino__create_csv_table(model, agate_table) %}
{%- set column_override = model['config'].get('column_types', {}) -%}
{%- set quote_seed_column = model['config'].get('quote_columns', None) -%}
{%- set _properties = config.get('properties') -%}

{% set sql %}
create table {{ this.render() }} (
Expand All @@ -68,7 +67,7 @@
{%- set column_name = (col_name | string) -%}
{{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%}
{%- endfor -%}
) {{ properties(_properties) }}
) {{ properties() }}
{% endset %}

{% call statement('_') -%}
Expand All @@ -78,10 +77,44 @@
{{ return(sql) }}
{% endmacro %}

{% macro properties(properties) %}
{%- if properties is not none -%}
{% macro properties() %}
{%- set _properties = config.get('properties') -%}
{%- set table_format = config.get('table_format') -%}
{%- set file_format = config.get('file_format') -%}

{%- if file_format -%}
{%- if _properties -%}
{%- if _properties.format -%}
{% set msg %}
You can specify either 'file_format' or 'properties.format' configurations, but not both.
{% endset %}
{% do exceptions.raise_compiler_error(msg) %}
{%- else -%}
{%- do _properties.update({'format': file_format}) -%}
{%- endif -%}
{%- else -%}
{%- set _properties = {'format': file_format} -%}
{%- endif -%}
{%- endif -%}

{%- if table_format -%}
{%- if _properties -%}
{%- if _properties.type -%}
{% set msg %}
You can specify either 'table_format' or 'properties.type' configurations, but not both.
{% endset %}
{% do exceptions.raise_compiler_error(msg) %}
{%- else -%}
{%- do _properties.update({'type': table_format}) -%}
{%- endif -%}
{%- else -%}
{%- set _properties = {'type': table_format} -%}
{%- endif -%}
{%- endif -%}

{%- if _properties is not none -%}
WITH (
{%- for key, value in properties.items() -%}
{%- for key, value in _properties.items() -%}
{{ key }} = {{ value }}
{%- if not loop.last -%}{{ ',\n ' }}{%- endif -%}
{%- endfor -%}
Expand All @@ -100,7 +133,6 @@
{%- endmacro -%}

{% macro trino__create_table_as(temporary, relation, sql, replace=False) -%}
{%- set _properties = config.get('properties') -%}

{%- if replace -%}
{%- set or_replace = ' or replace' -%}
Expand All @@ -117,7 +149,7 @@
{{ get_assert_columns_equivalent(sql) }}
{%- set sql = get_select_subquery(sql) %}
{{ comment(model.get('description')) }}
{{ properties(_properties) }}
{{ properties() }}
;

insert into {{ relation }}
Expand All @@ -130,7 +162,7 @@

create{{ or_replace }} table {{ relation }}
{{ comment(model.get('description')) }}
{{ properties(_properties) }}
{{ properties() }}
as (
{{ sql }}
);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{%- macro trino__get_create_materialized_view_as_sql(target_relation, sql) -%}
{%- set _properties = config.get('properties') -%}
create materialized view {{ target_relation }}
{{ properties(_properties) }}
{{ properties() }}
as
{{ sql }}
;
Expand Down
165 changes: 165 additions & 0 deletions tests/functional/adapter/test_table_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import pytest
from dbt.tests.util import run_dbt, run_dbt_and_capture

from tests.functional.adapter.materialization.fixtures import model_sql, seed_csv


class BaseTableProperties:
# Everything that goes in the "seeds" directory
@pytest.fixture(scope="class")
def seeds(self):
return {
"seed.csv": seed_csv,
}

# Everything that goes in the "models" directory
@pytest.fixture(scope="class")
def models(self):
return {
"model.sql": model_sql,
}


@pytest.mark.iceberg
class TestTableProperties(BaseTableProperties):
# Configuration in dbt_project.yml
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"name": "properties_test",
"models": {
"+materialized": "table",
"+properties": {
"format": "'PARQUET'",
"format_version": "2",
},
},
}

def test_table_properties(self, project):
# Seed seed
results = run_dbt(["seed"], expect_pass=True)
assert len(results) == 1

# Create model with properties
results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True)
assert len(results) == 1
assert "WITH (" in logs
assert "format = 'PARQUET'" in logs
assert "format_version = 2" in logs


@pytest.mark.iceberg
class TestFileFormatConfig(BaseTableProperties):
# Configuration in dbt_project.yml
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"name": "properties_test",
"models": {
"+materialized": "table",
"file_format": "'PARQUET'",
},
}

def test_table_properties(self, project):
# Seed seed
results = run_dbt(["seed"], expect_pass=True)
assert len(results) == 1

# Create model with properties
results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True)
assert len(results) == 1
assert "WITH (" in logs
assert "format = 'PARQUET'" in logs


@pytest.mark.iceberg
class TestFileFormatConfigAndFormatTablePropertyFail(BaseTableProperties):
# Configuration in dbt_project.yml
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"name": "properties_test",
"models": {
"+materialized": "table",
"+properties": {
"format": "'PARQUET'",
},
"file_format": "'ORC'",
},
}

def test_table_properties(self, project):
# Seed seed
results = run_dbt(["seed"], expect_pass=True)
assert len(results) == 1

# Create model with properties
results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=False)
assert len(results) == 1
assert (
"You can specify either 'file_format' or 'properties.format' configurations, but not both."
in logs
)


@pytest.mark.hive
# Setting `type` property is available only in Starburst Galaxy
# https://docs.starburst.io/starburst-galaxy/data-engineering/working-with-data-lakes/table-formats/gl-iceberg.html
@pytest.mark.skip_profile("trino_starburst")
class TestTableFormatConfig(BaseTableProperties):
# Configuration in dbt_project.yml
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"name": "properties_test",
"models": {
"+materialized": "table",
"table_format": "'iceberg'",
},
}

def test_table_properties(self, project):
# Seed seed
results = run_dbt(["seed"], expect_pass=True)
assert len(results) == 1

# Create model with properties
results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True)
assert len(results) == 1
assert "WITH (" in logs
assert "type = 'iceberg'" in logs


@pytest.mark.hive
# Setting `type` property is available only in Starburst Galaxy
# https://docs.starburst.io/starburst-galaxy/data-engineering/working-with-data-lakes/table-formats/gl-iceberg.html
@pytest.mark.skip_profile("trino_starburst")
class TestTableFormatConfigAndTypeTablePropertyFail(BaseTableProperties):
# Configuration in dbt_project.yml
@pytest.fixture(scope="class")
def project_config_update(self):
return {
"name": "properties_test",
"models": {
"+materialized": "table",
"+properties": {
"type": "'iceberg'",
},
"table_format": "'iceberg'",
},
}

def test_table_properties(self, project):
# Seed seed
results = run_dbt(["seed"], expect_pass=True)
assert len(results) == 1

# Create model with properties
results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=False)
assert len(results) == 1
assert (
"You can specify either 'table_format' or 'properties.type' configurations, but not both."
in logs
)
Loading