Skip to content

Commit

Permalink
fix(FIR-12466): Allow indexes on columns with spaces (#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
ptiurin authored Sep 17, 2024
1 parent f434826 commit ff0ed62
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .changes/unreleased/Fixed-20240912-171349.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kind: Fixed
body: Fixed an issue when indexes would break on columns containing spaces
time: 2024-09-12T17:13:49.62438+01:00
40 changes: 36 additions & 4 deletions dbt/adapters/firebolt/impl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
import time
from dataclasses import dataclass
from datetime import datetime
from typing import Any, List, Mapping, Optional, Union

import agate
Expand Down Expand Up @@ -29,36 +28,69 @@
from dbt.adapters.firebolt.relation import FireboltRelation


def quote_columns(columns: Union[str, List[str]]) -> Union[str, List[str]]:
if isinstance(columns, str):
return f'"{columns}"'
quoted_columns = []
for col in columns:
if col.startswith('"') and col.endswith('"'):
quoted_columns.append(col)
else:
quoted_columns.append(f'"{col}"')
return quoted_columns


@dataclass
class FireboltIndexConfig(dbtClassMixin):
index_type: str
index_name: Optional[str] = None
join_columns: Optional[Union[str, List[str]]] = None
key_columns: Optional[Union[str, List[str]]] = None
dimension_column: Optional[Union[str, List[str]]] = None
aggregation: Optional[Union[str, List[str]]] = None

def __post_init__(self) -> None:
# quote unquoted columns
if self.join_columns:
self.join_columns = quote_columns(self.join_columns)
if self.key_columns:
self.key_columns = quote_columns(self.key_columns)
if self.dimension_column:
self.dimension_column = quote_columns(self.dimension_column)

def render_name(self, relation: FireboltRelation) -> str:
"""
Name an index according to the following format, joined by `_`:
index type, relation name, key/join columns, timestamp (unix & UTC)
example index name: join_my_model_customer_id_1633504263.
"""
now_unix = str(int(time.mktime(datetime.utcnow().timetuple())))
if self.index_name:
return self.index_name
now_unix = str(int(time.time()))
# If column_names is a list with > 1 members, join with _,
# otherwise do not. We were getting index names like
# join__idx__emf_customers__f_i_r_s_t___n_a_m_e__165093112.
column_names = self.key_columns or self.join_columns
spine_col = (
'_'.join(column_names) if isinstance(column_names, list) else column_names
)
# Additional hash to ensure uniqueness after spaces are removed
column_hash = str(hash(spine_col))[:5]
inputs = [
f'{self.index_type}_idx',
relation.identifier,
spine_col,
column_hash,
now_unix,
]
string = '__'.join([x for x in inputs if x is not None])
return string
index_name = '__'.join([x for x in inputs if x is not None])
if len(index_name) > 255:
# Firebolt index names must be <= 255 characters.
# If the index name is too long, hash it.
return f'{self.index_type}_idx_{hash(index_name)}'
# Remove any spaces or quotes from the index name.
index_name = index_name.replace(' ', '_').replace('"', '')
return index_name

@classmethod
def parse(cls, raw_index: Optional[Mapping]) -> Optional['FireboltIndexConfig']:
Expand Down
4 changes: 2 additions & 2 deletions dbt/include/firebolt/macros/relations/table/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@
{%- if primary_index %}
primary INDEX {% if primary_index is iterable and primary_index is not string %}
{{ primary_index | join(', ') }}
"{{ primary_index | join('", "') }}"
{%- else -%}
{{ primary_index }}
"{{ primary_index }}"
{%- endif -%}
{%- endif -%}
Expand Down
124 changes: 124 additions & 0 deletions tests/unit/test_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import pytest
from dbt.adapters.contracts.relation import Path
from dbt_common.exceptions import CompilationError

from dbt.adapters.firebolt.impl import (
FireboltConfig,
FireboltIndexConfig,
FireboltRelation,
quote_columns,
)


@pytest.mark.parametrize(
'column, expected',
[
('column', '"column"'),
(['column1', 'column2'], ['"column1"', '"column2"']),
(['"column1"', '"column2"'], ['"column1"', '"column2"']),
(['column 1', 'column 2'], ['"column 1"', '"column 2"']),
],
)
def test_quote_columns(column, expected):
assert quote_columns(column) == expected


def test_firebolt_index_config_render_name():
relation = FireboltRelation(
path=Path(database='my_database', schema='my_schema', identifier='my_model')
)
index_config = FireboltIndexConfig(
index_type='join',
join_columns=['column1', 'column2'],
dimension_column='dimension',
)
assert index_config.render_name(relation).startswith(
'join_idx__my_model__column1_column2'
)


def test_firebolt_index_config_render_name_with_special_characters():
relation = FireboltRelation(
path=Path(database='my_database', schema='my_schema', identifier='my_model')
)
index_config = FireboltIndexConfig(
index_type='join',
join_columns=['"column 1"', '"column2"'],
dimension_column='dimension',
)
assert index_config.render_name(relation).startswith(
'join_idx__my_model__column_1_column2'
)


def test_firebolt_index_config_render_with_long_name():
relation = FireboltRelation(
path=Path(database='my_database', schema='my_schema', identifier='my_model')
)
index_config = FireboltIndexConfig(
index_type='join', join_columns=['column1'] * 50, dimension_column='dimension'
)
assert len(index_config.render_name(relation)) < 255


def test_firebolt_index_config_parse():
raw_index = {
'index_type': 'join',
'join_columns': ['column1', 'column2'],
'dimension_column': 'dimension',
}
index_config = FireboltIndexConfig.parse(raw_index)
assert index_config.index_type == 'join'
assert index_config.join_columns == ['"column1"', '"column2"']
assert index_config.dimension_column == '"dimension"'


@pytest.mark.parametrize(
'raw_index',
[
# invalid index type
{
'index_type': 'fantastic',
'join_columns': ['column1'],
'dimension_column': 'dimension',
},
# missing dimension column
{'index_type': 'join', 'join_columns': ['column1']},
# missing aggregation
{'index_type': 'aggregating', 'key_columns': ['key1']},
# missing aggregation but containing extra columns
{
'index_type': 'aggregating',
'key_columns': ['key1'],
'join_columns': ['column1'],
'dimension_column': 'dimension',
},
],
)
def test_firebolt_index_config_parse_with_errors(raw_index):
with pytest.raises(CompilationError):
FireboltIndexConfig.parse(raw_index)


def test_firebolt_index_config_parse_invalid():
with pytest.raises(CompilationError) as e:
FireboltIndexConfig.parse({'index_type': 'join', 'join_columns': [1222]})
assert 'Could not parse index' in str(e.value)


def test_firebolt_config():
config = FireboltConfig(
indexes=[
FireboltIndexConfig(
index_type='join',
join_columns=['column1', 'column2'],
dimension_column='dimension',
),
FireboltIndexConfig(
index_type='aggregating',
key_columns=['key1', 'key2'],
aggregation='aggregation',
),
]
)
assert len(config.indexes) == 2

0 comments on commit ff0ed62

Please sign in to comment.