diff --git a/.changes/unreleased/Fixed-20240912-171349.yaml b/.changes/unreleased/Fixed-20240912-171349.yaml new file mode 100644 index 000000000..b4f97c252 --- /dev/null +++ b/.changes/unreleased/Fixed-20240912-171349.yaml @@ -0,0 +1,3 @@ +kind: Fixed +body: Fixed an issue when indexes would break on columns containing spaces +time: 2024-09-12T17:13:49.62438+01:00 diff --git a/dbt/adapters/firebolt/impl.py b/dbt/adapters/firebolt/impl.py index 67fa58e3d..add2b137c 100644 --- a/dbt/adapters/firebolt/impl.py +++ b/dbt/adapters/firebolt/impl.py @@ -1,7 +1,6 @@ import re import time from dataclasses import dataclass -from datetime import datetime from typing import Any, List, Mapping, Optional, Union import agate @@ -29,21 +28,45 @@ from dbt.adapters.firebolt.relation import FireboltRelation +def quote_columns(columns: Union[str, List[str]]) -> Union[str, List[str]]: + if isinstance(columns, str): + return f'"{columns}"' + quoted_columns = [] + for col in columns: + if col.startswith('"') and col.endswith('"'): + quoted_columns.append(col) + else: + quoted_columns.append(f'"{col}"') + return quoted_columns + + @dataclass class FireboltIndexConfig(dbtClassMixin): index_type: str + index_name: Optional[str] = None join_columns: Optional[Union[str, List[str]]] = None key_columns: Optional[Union[str, List[str]]] = None dimension_column: Optional[Union[str, List[str]]] = None aggregation: Optional[Union[str, List[str]]] = None + def __post_init__(self) -> None: + # quote unquoted columns + if self.join_columns: + self.join_columns = quote_columns(self.join_columns) + if self.key_columns: + self.key_columns = quote_columns(self.key_columns) + if self.dimension_column: + self.dimension_column = quote_columns(self.dimension_column) + def render_name(self, relation: FireboltRelation) -> str: """ Name an index according to the following format, joined by `_`: index type, relation name, key/join columns, timestamp (unix & UTC) example index name: join_my_model_customer_id_1633504263. """ - now_unix = str(int(time.mktime(datetime.utcnow().timetuple()))) + if self.index_name: + return self.index_name + now_unix = str(int(time.time())) # If column_names is a list with > 1 members, join with _, # otherwise do not. We were getting index names like # join__idx__emf_customers__f_i_r_s_t___n_a_m_e__165093112. @@ -51,14 +74,23 @@ def render_name(self, relation: FireboltRelation) -> str: spine_col = ( '_'.join(column_names) if isinstance(column_names, list) else column_names ) + # Additional hash to ensure uniqueness after spaces are removed + column_hash = str(hash(spine_col))[:5] inputs = [ f'{self.index_type}_idx', relation.identifier, spine_col, + column_hash, now_unix, ] - string = '__'.join([x for x in inputs if x is not None]) - return string + index_name = '__'.join([x for x in inputs if x is not None]) + if len(index_name) > 255: + # Firebolt index names must be <= 255 characters. + # If the index name is too long, hash it. + return f'{self.index_type}_idx_{hash(index_name)}' + # Remove any spaces or quotes from the index name. + index_name = index_name.replace(' ', '_').replace('"', '') + return index_name @classmethod def parse(cls, raw_index: Optional[Mapping]) -> Optional['FireboltIndexConfig']: diff --git a/dbt/include/firebolt/macros/relations/table/create.sql b/dbt/include/firebolt/macros/relations/table/create.sql index 5b5a47c8d..5bd33a401 100644 --- a/dbt/include/firebolt/macros/relations/table/create.sql +++ b/dbt/include/firebolt/macros/relations/table/create.sql @@ -49,9 +49,9 @@ {%- if primary_index %} primary INDEX {% if primary_index is iterable and primary_index is not string %} - {{ primary_index | join(', ') }} + "{{ primary_index | join('", "') }}" {%- else -%} - {{ primary_index }} + "{{ primary_index }}" {%- endif -%} {%- endif -%} diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py new file mode 100644 index 000000000..cfaf71775 --- /dev/null +++ b/tests/unit/test_index.py @@ -0,0 +1,124 @@ +import pytest +from dbt.adapters.contracts.relation import Path +from dbt_common.exceptions import CompilationError + +from dbt.adapters.firebolt.impl import ( + FireboltConfig, + FireboltIndexConfig, + FireboltRelation, + quote_columns, +) + + +@pytest.mark.parametrize( + 'column, expected', + [ + ('column', '"column"'), + (['column1', 'column2'], ['"column1"', '"column2"']), + (['"column1"', '"column2"'], ['"column1"', '"column2"']), + (['column 1', 'column 2'], ['"column 1"', '"column 2"']), + ], +) +def test_quote_columns(column, expected): + assert quote_columns(column) == expected + + +def test_firebolt_index_config_render_name(): + relation = FireboltRelation( + path=Path(database='my_database', schema='my_schema', identifier='my_model') + ) + index_config = FireboltIndexConfig( + index_type='join', + join_columns=['column1', 'column2'], + dimension_column='dimension', + ) + assert index_config.render_name(relation).startswith( + 'join_idx__my_model__column1_column2' + ) + + +def test_firebolt_index_config_render_name_with_special_characters(): + relation = FireboltRelation( + path=Path(database='my_database', schema='my_schema', identifier='my_model') + ) + index_config = FireboltIndexConfig( + index_type='join', + join_columns=['"column 1"', '"column2"'], + dimension_column='dimension', + ) + assert index_config.render_name(relation).startswith( + 'join_idx__my_model__column_1_column2' + ) + + +def test_firebolt_index_config_render_with_long_name(): + relation = FireboltRelation( + path=Path(database='my_database', schema='my_schema', identifier='my_model') + ) + index_config = FireboltIndexConfig( + index_type='join', join_columns=['column1'] * 50, dimension_column='dimension' + ) + assert len(index_config.render_name(relation)) < 255 + + +def test_firebolt_index_config_parse(): + raw_index = { + 'index_type': 'join', + 'join_columns': ['column1', 'column2'], + 'dimension_column': 'dimension', + } + index_config = FireboltIndexConfig.parse(raw_index) + assert index_config.index_type == 'join' + assert index_config.join_columns == ['"column1"', '"column2"'] + assert index_config.dimension_column == '"dimension"' + + +@pytest.mark.parametrize( + 'raw_index', + [ + # invalid index type + { + 'index_type': 'fantastic', + 'join_columns': ['column1'], + 'dimension_column': 'dimension', + }, + # missing dimension column + {'index_type': 'join', 'join_columns': ['column1']}, + # missing aggregation + {'index_type': 'aggregating', 'key_columns': ['key1']}, + # missing aggregation but containing extra columns + { + 'index_type': 'aggregating', + 'key_columns': ['key1'], + 'join_columns': ['column1'], + 'dimension_column': 'dimension', + }, + ], +) +def test_firebolt_index_config_parse_with_errors(raw_index): + with pytest.raises(CompilationError): + FireboltIndexConfig.parse(raw_index) + + +def test_firebolt_index_config_parse_invalid(): + with pytest.raises(CompilationError) as e: + FireboltIndexConfig.parse({'index_type': 'join', 'join_columns': [1222]}) + assert 'Could not parse index' in str(e.value) + + +def test_firebolt_config(): + config = FireboltConfig( + indexes=[ + FireboltIndexConfig( + index_type='join', + join_columns=['column1', 'column2'], + dimension_column='dimension', + ), + FireboltIndexConfig( + index_type='aggregating', + key_columns=['key1', 'key2'], + aggregation='aggregation', + ), + ] + ) + assert len(config.indexes) == 2