Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix config change detection not working for multiple sortkey in materialized views #841

6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240610-104114.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix config change detection not working for multiple sortkey in materialized views
time: 2024-06-10T10:41:14.721411-03:00
custom:
Author: lvitti
Issue: "838"
18 changes: 13 additions & 5 deletions dbt/adapters/redshift/relation_configs/materialized_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,13 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
"query": agate.Table(
agate.Row({"definition": "<query>")}
),
"columns": agate.Table(
agate.Row({
"column": "<column_name>",
"sort_key_position": <int>,
"is_dist_key: any(true, false),
})
),
}

Additional columns in either value is fine, as long as `sortkey` and `sortstyle` are available.
Expand Down Expand Up @@ -199,11 +206,12 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
{"dist": RedshiftDistConfig.parse_relation_results(materialized_view)}
)

# TODO: this only shows the first column in the sort key
if materialized_view.get("sortkey1"):
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(materialized_view)}
)
if columns := relation_results.get("columns"):
sort_columns = [row for row in columns.rows if row.get("sort_key_position", 0) > 0]
if sort_columns:
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(sort_columns)}
)

return config_dict

Expand Down
37 changes: 22 additions & 15 deletions dbt/adapters/redshift/relation_configs/sort.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from dbt.adapters.contracts.relation import RelationConfig
from typing import Optional, FrozenSet, Set, Dict, Any, TYPE_CHECKING
from typing import Optional, Set, Dict, Any, TYPE_CHECKING, Tuple

from dbt.adapters.relation_configs import (
RelationConfigChange,
Expand Down Expand Up @@ -46,7 +46,7 @@ class RedshiftSortConfig(RedshiftRelationConfigBase, RelationConfigValidationMix
"""

sortstyle: Optional[RedshiftSortStyle] = None
sortkey: Optional[FrozenSet[str]] = None
sortkey: Optional[Tuple[str]] = None

def __post_init__(self):
# maintains `frozen=True` while allowing for a variable default on `sort_type`
Expand Down Expand Up @@ -103,7 +103,7 @@ def validation_rules(self) -> Set[RelationConfigValidationRule]:
def from_dict(cls, config_dict) -> Self:
kwargs_dict = {
"sortstyle": config_dict.get("sortstyle"),
"sortkey": frozenset(column for column in config_dict.get("sortkey", {})),
"sortkey": tuple(column for column in config_dict.get("sortkey", {})),
}
sort: Self = super().from_dict(kwargs_dict) # type: ignore
return sort # type: ignore
Expand Down Expand Up @@ -133,12 +133,12 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any
if isinstance(sortkey, str):
sortkey = [sortkey]

config_dict.update({"sortkey": set(sortkey)})
config_dict.update({"sortkey": tuple(sortkey)})

return config_dict

@classmethod
def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict:
def parse_relation_results(cls, relation_results_entry: "agate.MappedSequence") -> dict:
"""
Translate agate objects from the database into a standard dictionary.

Expand All @@ -147,19 +147,26 @@ def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict:
Processing of `sortstyle` has been omitted here, which means it's the default (compound).

Args:
relation_results_entry: the description of the sortkey and sortstyle from the database in this format:

agate.Row({
...,
"sortkey1": "<column_name>",
...
})
relation_results_entry: The list of rows that contains the sortkey in this format:
mikealfare marked this conversation as resolved.
Show resolved Hide resolved
[
agate.Row({
...,
"column": "<column_name>",
"sort_key_position": <int>,
...
}),
]

Returns: a standard dictionary describing this `RedshiftSortConfig` instance
"""
if sortkey := relation_results_entry.get("sortkey1"):
return {"sortkey": {sortkey}}
return {}
sort_config = []

sorted_columns = sorted(relation_results_entry, key=lambda x: x["sort_key_position"])
for column in sorted_columns:
if column.get("sort_key_position"):
sort_config.append(column.get("column"))

return {"sortkey": sort_config}


@dataclass(frozen=True, eq=True, unsafe_hash=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@
{%- endset %}
{% set _materialized_view = run_query(_materialized_view_sql) %}

{%- set _column_descriptor_sql -%}
SELECT
a.attname as column,
a.attisdistkey as is_dist_key,
a.attsortkeyord as sort_key_position
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE
n.nspname ilike '{{ relation.schema }}'
AND c.relname LIKE 'mv_tbl__{{ relation.identifier }}__%'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tried this with multiple config changes for the materialized view? Is there only ever one relation of the form mv_tbl__{{ relation.identifier }}__%? Do we need to look at the latest version? I'm asking because I'm genuinely not sure how Redshift manages that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the way we're using it, they have only one relationship.
However, I'll do a deeper analysis regarding that and get back to you with feedback.
Good observation, thanks

{%- endset %}
{% set _column_descriptor = run_query(_column_descriptor_sql) %}

{%- set _query_sql -%}
select
vw.definition
Expand All @@ -34,6 +48,10 @@
{%- endset %}
{% set _query = run_query(_query_sql) %}

{% do return({'materialized_view': _materialized_view, 'query': _query}) %}
{% do return({
'materialized_view': _materialized_view,
'query': _query,
'columns': _column_descriptor,
})%}

{% endmacro %}
55 changes: 55 additions & 0 deletions tests/unit/test_materialized_view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import Mock

import agate
import pytest

from dbt.adapters.redshift.relation_configs import RedshiftMaterializedViewConfig
Expand Down Expand Up @@ -53,3 +54,57 @@ def test_redshift_materialized_view_config_throws_expected_exception_with_invali
)
with pytest.raises(ValueError):
config.parse_relation_config(model_node)


def test_redshift_materialized_view_parse_relation_results_handles_multiples_sort_key():
materialized_view = agate.Table.from_object(
[],
[
"database",
"schema",
"table",
"diststyle",
"sortkey1",
"autorefresh",
],
)

column_descriptor = agate.Table.from_object(
mikealfare marked this conversation as resolved.
Show resolved Hide resolved
[
{
"column": "my_column",
"is_dist_key": True,
"sort_key_position": 1,
},
{
"column": "my_column2",
"is_dist_key": True,
"sort_key_position": 2,
},
{
"column": "my_column5",
"is_dist_key": False,
"sort_key_position": 0,
},
],
)

query = agate.Table.from_object(
[
{
"definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
}
]
)

relation_results = {
"materialized_view": materialized_view,
"columns": column_descriptor,
"query": query,
}

config_dict = RedshiftMaterializedViewConfig.parse_relation_results(relation_results)

assert isinstance(config_dict["sort"], dict)
assert config_dict["sort"]["sortkey"][0] == "my_column"
assert config_dict["sort"]["sortkey"][1] == "my_column2"
142 changes: 141 additions & 1 deletion tests/unit/test_relation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from unittest.mock import Mock

import agate
import pytest

from dbt.adapters.redshift.relation import RedshiftRelation
from dbt.adapters.contracts.relation import RelationType
from dbt.adapters.contracts.relation import (
RelationType,
RelationConfig,
)

from dbt.adapters.redshift.relation_configs.sort import RedshiftSortStyle


def test_renameable_relation():
Expand All @@ -15,3 +25,133 @@ def test_renameable_relation():
RelationType.Table,
}
)


@pytest.fixture
def materialized_view_without_sort_key_from_db():
materialized_view = agate.Table.from_object(
[
{
"database": "my_db",
"schema": "my_schema",
"table": "my_table",
}
],
)

column_descriptor = agate.Table.from_object([])

query = agate.Table.from_object(
[
{
"definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
}
]
)

relation_results = {
"materialized_view": materialized_view,
"columns": column_descriptor,
"query": query,
}
return relation_results


@pytest.fixture
def materialized_view_without_sort_key_config():
relation_config = Mock(spec=RelationConfig)

relation_config.database = "my_db"
relation_config.identifier = "my_table"
relation_config.schema = "my_schema"
relation_config.config = Mock()
relation_config.config.extra = {}
relation_config.config.sort = ""
relation_config.compiled_code = (
"create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
)
return relation_config


@pytest.fixture
def materialized_view_multiple_sort_key_from_db(materialized_view_without_sort_key_from_db):
materialized_view_without_sort_key_from_db["columns"] = agate.Table.from_object(
[
{
"column": "my_column",
"is_dist_key": True,
"sort_key_position": 1,
},
{
"column": "my_column2",
"is_dist_key": True,
"sort_key_position": 2,
},
],
)
return materialized_view_without_sort_key_from_db


@pytest.fixture
def materialized_view_multiple_sort_key_config(materialized_view_without_sort_key_config):
materialized_view_without_sort_key_config.config.extra = {
"sort_type": RedshiftSortStyle.compound,
"sort": ["my_column", "my_column2"],
}

return materialized_view_without_sort_key_config


def test_materialized_view_config_changeset_without_sort_key_empty_changes(
materialized_view_without_sort_key_from_db,
materialized_view_without_sort_key_config,
):
change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_without_sort_key_from_db,
materialized_view_without_sort_key_config,
)

assert change_set is None


def test_materialized_view_config_changeset_multiple_sort_key_without_changes(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is None


def test_materialized_view_config_changeset_multiple_sort_key_with_changes(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):
materialized_view_multiple_sort_key_config.config.extra["sort"].append("my_column3")

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is not None
assert change_set.sort.context.sortkey == ("my_column", "my_column2", "my_column3")


def test_materialized_view_config_changeset_multiple_sort_key_with_changes_in_order_column(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):
materialized_view_multiple_sort_key_config.config.extra["sort"] = ["my_column2", "my_column"]

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is not None
assert change_set.sort.context.sortkey == ("my_column2", "my_column")
Loading