Skip to content

Commit

Permalink
Merge pull request #3 from Shiphero/lvitti/backport_1.8_fix_config_ch…
Browse files Browse the repository at this point in the history
…ange_detection_not_working_for_multiple_sortkey

[Backport 1.8] fix config change detection not working for multiple sortkey
  • Loading branch information
lvitti authored Jun 18, 2024
2 parents 817712d + b07ac9c commit 52f321f
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 19 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240610-104114.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix config change detection not working for multiple sortkey in materialized views
time: 2024-06-10T10:41:14.721411-03:00
custom:
Author: lvitti
Issue: "838"
21 changes: 16 additions & 5 deletions dbt/adapters/redshift/relation_configs/materialized_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,15 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
"query": agate.Table(
agate.Row({"definition": "<query>")}
),
"column_descriptor": agate.Table(
agate.Row({
"schema": "<schema_name>",
"table": "<table_name>",
"column": "<column_name>",
"is_sort_key": any(true, false),
"is_dist_key: any(true, false),
})
),
}
Additional columns in either value is fine, as long as `sortkey` and `sortstyle` are available.
Expand Down Expand Up @@ -197,11 +206,13 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
{"dist": RedshiftDistConfig.parse_relation_results(materialized_view)}
)

# TODO: this only shows the first column in the sort key
if materialized_view.get("sortkey1"):
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(materialized_view)}
)
column_descriptor = relation_results.get("column_descriptor")
if column_descriptor:
sort_columns = [row for row in column_descriptor.rows if row.get("is_sort_key")]
if sort_columns:
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(sort_columns)}
)

return config_dict

Expand Down
28 changes: 16 additions & 12 deletions dbt/adapters/redshift/relation_configs/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dbt.adapters.contracts.relation import RelationConfig
from typing import Optional, FrozenSet, Set, Dict, Any

import agate
from agate import MappedSequence
from dbt.adapters.relation_configs import (
RelationConfigChange,
RelationConfigChangeAction,
Expand Down Expand Up @@ -136,7 +136,7 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any
return config_dict

@classmethod
def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict:
def parse_relation_results(cls, relation_results_entry: MappedSequence) -> dict:
"""
Translate agate objects from the database into a standard dictionary.
Expand All @@ -145,20 +145,24 @@ def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict:
Processing of `sortstyle` has been omitted here, which means it's the default (compound).
Args:
relation_results_entry: the description of the sortkey and sortstyle from the database in this format:
agate.Row({
...,
"sortkey1": "<column_name>",
...
})
relation_results_entry: The list of rows that contains the sortkey in this format:
[
agate.Row({
...,
"column": "<column_name>",
"is_sort_key": True,
...
}),
]
Returns: a standard dictionary describing this `RedshiftSortConfig` instance
"""
if sortkey := relation_results_entry.get("sortkey1"):
return {"sortkey": {sortkey}}
return {}
sort_config = []
for column in relation_results_entry:
if column.get("is_sort_key"):
sort_config.append(column.get("column"))

return {"sortkey": sort_config}

@dataclass(frozen=True, eq=True, unsafe_hash=True)
class RedshiftSortConfigChange(RelationConfigChange, RelationConfigValidationMixin):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@
{%- endset %}
{% set _materialized_view = run_query(_materialized_view_sql) %}

{%- set _column_descriptor_sql -%}
SELECT
n.nspname AS schema,
c.relname AS table,
a.attname as column,
a.attisdistkey as is_dist_key,
a.attsortkeyord > 0 as is_sort_key
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE
n.nspname ilike '{{ relation.schema }}'
AND c.relname LIKE 'mv_tbl__{{ relation.identifier }}__%'
{%- endset %}
{% set _column_descriptor = run_query(_column_descriptor_sql) %}

{%- set _query_sql -%}
select
vw.definition
Expand All @@ -34,6 +50,10 @@
{%- endset %}
{% set _query = run_query(_query_sql) %}

{% do return({'materialized_view': _materialized_view, 'query': _query}) %}
{% do return({
'materialized_view': _materialized_view,
'query': _query,
'column_descriptor': _column_descriptor,
})%}

{% endmacro %}
56 changes: 56 additions & 0 deletions tests/unit/relation_configs/test_materialized_view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import Mock

import agate
import pytest

from dbt.adapters.redshift.relation_configs import RedshiftMaterializedViewConfig
Expand Down Expand Up @@ -53,3 +54,58 @@ def test_redshift_materialized_view_config_throws_expected_exception_with_invali
)
with pytest.raises(ValueError):
config.parse_relation_config(model_node)


def test_redshift_materialized_view_parse_relation_results_handles_multiples_sort_key():
materialized_view = agate.Table.from_object(
[],
[
"database",
"schema",
"table",
"diststyle",
"sortkey1",
"autorefresh",
],
)

column_descriptor = agate.Table.from_object(
[
{
"schema": "my_schema",
"table": "my_table",
"column": "my_column",
"is_dist_key": True,
"is_sort_key": True,
},
{
"schema": "my_schema",
"table": "my_table",
"column": "my_column2",
"is_dist_key": True,
"is_sort_key": True,
},
],
)

query = agate.Table.from_object(
[
{
"definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
}
]
)

relation_results = {
"materialized_view": materialized_view,
"column_descriptor": column_descriptor,
"query": query,
}

config_dict = RedshiftMaterializedViewConfig.parse_relation_results(
relation_results
)

assert isinstance(config_dict["sort"], dict)
assert config_dict["sort"]["sortkey"][0] == "my_column"
assert config_dict["sort"]["sortkey"][1] == "my_column2"
127 changes: 126 additions & 1 deletion tests/unit/test_renamed_relations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from unittest.mock import Mock

import agate
import pytest

from dbt.adapters.redshift.relation import RedshiftRelation
from dbt.adapters.contracts.relation import RelationType
from dbt.adapters.contracts.relation import (
RelationType,
RelationConfig,
)

from dbt.adapters.redshift.relation_configs.sort import RedshiftSortStyle


def test_renameable_relation():
Expand All @@ -15,3 +25,118 @@ def test_renameable_relation():
RelationType.Table,
}
)


@pytest.fixture
def materialized_view_without_sort_key_from_db():
materialized_view = agate.Table.from_object(
[
{
"database": "my_db",
"schema": "my_schema",
"table": "my_table",
}
],
)

column_descriptor = agate.Table.from_object([])

query = agate.Table.from_object(
[
{
"definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
}
]
)

relation_results = {
"materialized_view": materialized_view,
"column_descriptor": column_descriptor,
"query": query,
}
return relation_results


@pytest.fixture
def materialized_view_without_sort_key_config():
relation_config = Mock(spec=RelationConfig)

relation_config.database = "my_db"
relation_config.identifier = "my_table"
relation_config.schema = "my_schema"
relation_config.config = Mock()
relation_config.config.extra = {}
relation_config.config.sort = ""
relation_config.compiled_code = "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
return relation_config


@pytest.fixture
def materialized_view_multiple_sort_key_from_db(materialized_view_without_sort_key_from_db):
materialized_view_without_sort_key_from_db["column_descriptor"] = agate.Table.from_object(
[
{
"schema": "my_schema",
"table": "my_table",
"column": "my_column",
"is_dist_key": True,
"is_sort_key": True,
},
{
"schema": "my_schema",
"table": "my_table",
"column": "my_column2",
"is_dist_key": True,
"is_sort_key": True,
},
],
)
return materialized_view_without_sort_key_from_db


@pytest.fixture
def materialized_view_multiple_sort_key_config(materialized_view_without_sort_key_config):
materialized_view_without_sort_key_config.config.extra = {
"sort_type": RedshiftSortStyle.compound,
"sort": ["my_column", "my_column2"],
}

return materialized_view_without_sort_key_config

def test_materialized_view_config_changeset_without_sort_key_empty_changes(
materialized_view_without_sort_key_from_db,
materialized_view_without_sort_key_config,
):
change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_without_sort_key_from_db,
materialized_view_without_sort_key_config,
)

assert change_set is None

def test_materialized_view_config_changeset_multiple_sort_key_without_changes(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is None


def test_materialized_view_config_changeset_multiple_sort_key_with_changes(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):
materialized_view_multiple_sort_key_config.config.extra["sort"].append("my_column3")

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is not None
assert change_set.sort.context.sortkey == frozenset({"my_column", "my_column2", "my_column3"})

0 comments on commit 52f321f

Please sign in to comment.