From 70500796a7de4591bed2b7f87eeb95060ac449a9 Mon Sep 17 00:00:00 2001 From: Luciano Vitti Date: Fri, 7 Jun 2024 09:46:26 -0300 Subject: [PATCH 1/3] fix config change detection not working for multiple sortkey --- .../relation_configs/materialized_view.py | 15 ++- .../redshift/relation_configs/sort.py | 28 +++-- .../relations/materialized_view/describe.sql | 22 +++- .../test_materialized_view.py | 56 ++++++++++ tests/unit/test_renamed_relations.py | 104 +++++++++++++++++- 5 files changed, 208 insertions(+), 17 deletions(-) diff --git a/dbt/adapters/redshift/relation_configs/materialized_view.py b/dbt/adapters/redshift/relation_configs/materialized_view.py index f6d93754e..0743fbf37 100644 --- a/dbt/adapters/redshift/relation_configs/materialized_view.py +++ b/dbt/adapters/redshift/relation_configs/materialized_view.py @@ -167,6 +167,15 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict: "query": agate.Table( agate.Row({"definition": "")} ), + "column_descriptor": agate.Table( + agate.Row({ + "schema": "", + "table": "", + "column": "", + "is_sort_key": any(true, false), + "is_dist_key: any(true, false), + }) + ), } Additional columns in either value is fine, as long as `sortkey` and `sortstyle` are available. @@ -197,10 +206,10 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict: {"dist": RedshiftDistConfig.parse_relation_results(materialized_view)} ) - # TODO: this only shows the first column in the sort key - if materialized_view.get("sortkey1"): + column_descriptor = relation_results.get("column_descriptor") + if column_descriptor and len(column_descriptor.rows) > 0: config_dict.update( - {"sort": RedshiftSortConfig.parse_relation_results(materialized_view)} + {"sort": RedshiftSortConfig.parse_relation_results(column_descriptor.rows)} ) return config_dict diff --git a/dbt/adapters/redshift/relation_configs/sort.py b/dbt/adapters/redshift/relation_configs/sort.py index 91152615e..ac3582670 100644 --- a/dbt/adapters/redshift/relation_configs/sort.py +++ b/dbt/adapters/redshift/relation_configs/sort.py @@ -2,7 +2,7 @@ from dbt.adapters.contracts.relation import RelationConfig from typing import Optional, FrozenSet, Set, Dict, Any -import agate +from agate import MappedSequence from dbt.adapters.relation_configs import ( RelationConfigChange, RelationConfigChangeAction, @@ -136,7 +136,7 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: + def parse_relation_results(cls, relation_results_entry: MappedSequence) -> dict: """ Translate agate objects from the database into a standard dictionary. @@ -145,20 +145,24 @@ def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: Processing of `sortstyle` has been omitted here, which means it's the default (compound). Args: - relation_results_entry: the description of the sortkey and sortstyle from the database in this format: - - agate.Row({ - ..., - "sortkey1": "", - ... - }) + relation_results_entry: The list of rows that contains the sortkey in this format: + [ + agate.Row({ + ..., + "column": "", + "is_sort_key": True, + ... + }), + ] Returns: a standard dictionary describing this `RedshiftSortConfig` instance """ - if sortkey := relation_results_entry.get("sortkey1"): - return {"sortkey": {sortkey}} - return {} + sort_config = [] + for column in relation_results_entry: + if column.get("is_sort_key"): + sort_config.append(column.get("column")) + return {"sortkey": sort_config} @dataclass(frozen=True, eq=True, unsafe_hash=True) class RedshiftSortConfigChange(RelationConfigChange, RelationConfigValidationMixin): diff --git a/dbt/include/redshift/macros/relations/materialized_view/describe.sql b/dbt/include/redshift/macros/relations/materialized_view/describe.sql index a56d0756e..77dd0e708 100644 --- a/dbt/include/redshift/macros/relations/materialized_view/describe.sql +++ b/dbt/include/redshift/macros/relations/materialized_view/describe.sql @@ -24,6 +24,22 @@ {%- endset %} {% set _materialized_view = run_query(_materialized_view_sql) %} + {%- set _column_descriptor_sql -%} + SELECT + n.nspname AS schema, + c.relname AS table, + a.attname as column, + a.attisdistkey as is_dist_key, + a.attsortkeyord > 0 as is_sort_key + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + JOIN pg_attribute a ON a.attrelid = c.oid + WHERE + n.nspname ilike '{{ relation.schema }}' + AND c.relname LIKE 'mv_tbl__{{ relation.identifier }}__%' + {%- endset %} + {% set _column_descriptor = run_query(_column_descriptor_sql) %} + {%- set _query_sql -%} select vw.definition @@ -34,6 +50,10 @@ {%- endset %} {% set _query = run_query(_query_sql) %} - {% do return({'materialized_view': _materialized_view, 'query': _query}) %} + {% do return({ + 'materialized_view': _materialized_view, + 'query': _query, + 'column_descriptor': _column_descriptor, + })%} {% endmacro %} diff --git a/tests/unit/relation_configs/test_materialized_view.py b/tests/unit/relation_configs/test_materialized_view.py index 8e4f6ca3e..fb077d412 100644 --- a/tests/unit/relation_configs/test_materialized_view.py +++ b/tests/unit/relation_configs/test_materialized_view.py @@ -1,5 +1,6 @@ from unittest.mock import Mock +import agate import pytest from dbt.adapters.redshift.relation_configs import RedshiftMaterializedViewConfig @@ -53,3 +54,58 @@ def test_redshift_materialized_view_config_throws_expected_exception_with_invali ) with pytest.raises(ValueError): config.parse_relation_config(model_node) + + +def test_redshift_materialized_view_parse_relation_results_handles_multiples_sort_key(): + materialized_view = agate.Table.from_object( + [], + [ + "database", + "schema", + "table", + "diststyle", + "sortkey1", + "autorefresh", + ], + ) + + column_descriptor = agate.Table.from_object( + [ + { + "schema": "my_schema", + "table": "my_table", + "column": "my_column", + "is_dist_key": True, + "is_sort_key": True, + }, + { + "schema": "my_schema", + "table": "my_table", + "column": "my_column2", + "is_dist_key": True, + "is_sort_key": True, + }, + ], + ) + + query = agate.Table.from_object( + [ + { + "definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + } + ] + ) + + relation_results = { + "materialized_view": materialized_view, + "column_descriptor": column_descriptor, + "query": query, + } + + config_dict = RedshiftMaterializedViewConfig.parse_relation_results( + relation_results + ) + + assert isinstance(config_dict["sort"], dict) + assert config_dict["sort"]["sortkey"][0] == "my_column" + assert config_dict["sort"]["sortkey"][1] == "my_column2" diff --git a/tests/unit/test_renamed_relations.py b/tests/unit/test_renamed_relations.py index 4817ab100..d6c4f483c 100644 --- a/tests/unit/test_renamed_relations.py +++ b/tests/unit/test_renamed_relations.py @@ -1,5 +1,15 @@ +from unittest.mock import Mock + +import agate +import pytest + from dbt.adapters.redshift.relation import RedshiftRelation -from dbt.adapters.contracts.relation import RelationType +from dbt.adapters.contracts.relation import ( + RelationType, + RelationConfig, +) + +from dbt.adapters.redshift.relation_configs.sort import RedshiftSortStyle def test_renameable_relation(): @@ -15,3 +25,95 @@ def test_renameable_relation(): RelationType.Table, } ) + + +@pytest.fixture +def materialized_view_multiple_sort_key_from_db(): + materialized_view = agate.Table.from_object( + [ + { + "database": "my_db", + "schema": "my_schema", + "table": "my_table", + } + ], + ) + + column_descriptor = agate.Table.from_object( + [ + { + "schema": "my_schema", + "table": "my_table", + "column": "my_column", + "is_dist_key": True, + "is_sort_key": True, + }, + { + "schema": "my_schema", + "table": "my_table", + "column": "my_column2", + "is_dist_key": True, + "is_sort_key": True, + }, + ], + ) + + query = agate.Table.from_object( + [ + { + "definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + } + ] + ) + + relation_results = { + "materialized_view": materialized_view, + "column_descriptor": column_descriptor, + "query": query, + } + return relation_results + + +@pytest.fixture +def materialized_view_multiple_sort_key_config(): + relation_config = Mock(spec=RelationConfig) + + relation_config.database = "my_db" + relation_config.identifier = "my_table" + relation_config.schema = "my_schema" + relation_config.config = Mock() + relation_config.config.extra = { + "sort_type": RedshiftSortStyle.compound, + "sort": ["my_column", "my_column2"], + } + relation_config.config.sort = "" + relation_config.compiled_code = "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + return relation_config + + +def test_materialized_view_config_changeset_multiple_sort_key_without_changes( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is None + + +def test_materialized_view_config_changeset_multiple_sort_key_with_changes( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + materialized_view_multiple_sort_key_config.config.extra["sort"].append("my_column3") + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is not None + assert change_set.sort.context.sortkey == frozenset({"my_column", "my_column2", "my_column3"}) From 284c34565340d3ee663cac3e65d66a0c3a573a0e Mon Sep 17 00:00:00 2001 From: Luciano Vitti Date: Fri, 7 Jun 2024 13:08:04 -0300 Subject: [PATCH 2/3] add test --- .../relation_configs/materialized_view.py | 10 +-- tests/unit/test_renamed_relations.py | 71 ++++++++++++------- 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/dbt/adapters/redshift/relation_configs/materialized_view.py b/dbt/adapters/redshift/relation_configs/materialized_view.py index 0743fbf37..1c418159c 100644 --- a/dbt/adapters/redshift/relation_configs/materialized_view.py +++ b/dbt/adapters/redshift/relation_configs/materialized_view.py @@ -207,10 +207,12 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict: ) column_descriptor = relation_results.get("column_descriptor") - if column_descriptor and len(column_descriptor.rows) > 0: - config_dict.update( - {"sort": RedshiftSortConfig.parse_relation_results(column_descriptor.rows)} - ) + if column_descriptor: + sort_columns = [row for row in column_descriptor.rows if row.get("is_sort_key")] + if sort_columns: + config_dict.update( + {"sort": RedshiftSortConfig.parse_relation_results(sort_columns)} + ) return config_dict diff --git a/tests/unit/test_renamed_relations.py b/tests/unit/test_renamed_relations.py index d6c4f483c..f2d8a5373 100644 --- a/tests/unit/test_renamed_relations.py +++ b/tests/unit/test_renamed_relations.py @@ -28,7 +28,7 @@ def test_renameable_relation(): @pytest.fixture -def materialized_view_multiple_sort_key_from_db(): +def materialized_view_without_sort_key_from_db(): materialized_view = agate.Table.from_object( [ { @@ -39,24 +39,7 @@ def materialized_view_multiple_sort_key_from_db(): ], ) - column_descriptor = agate.Table.from_object( - [ - { - "schema": "my_schema", - "table": "my_table", - "column": "my_column", - "is_dist_key": True, - "is_sort_key": True, - }, - { - "schema": "my_schema", - "table": "my_table", - "column": "my_column2", - "is_dist_key": True, - "is_sort_key": True, - }, - ], - ) + column_descriptor = agate.Table.from_object([]) query = agate.Table.from_object( [ @@ -75,22 +58,62 @@ def materialized_view_multiple_sort_key_from_db(): @pytest.fixture -def materialized_view_multiple_sort_key_config(): +def materialized_view_without_sort_key_config(): relation_config = Mock(spec=RelationConfig) relation_config.database = "my_db" relation_config.identifier = "my_table" relation_config.schema = "my_schema" relation_config.config = Mock() - relation_config.config.extra = { - "sort_type": RedshiftSortStyle.compound, - "sort": ["my_column", "my_column2"], - } + relation_config.config.extra = {} relation_config.config.sort = "" relation_config.compiled_code = "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" return relation_config +@pytest.fixture +def materialized_view_multiple_sort_key_from_db(materialized_view_without_sort_key_from_db): + materialized_view_without_sort_key_from_db["column_descriptor"] = agate.Table.from_object( + [ + { + "schema": "my_schema", + "table": "my_table", + "column": "my_column", + "is_dist_key": True, + "is_sort_key": True, + }, + { + "schema": "my_schema", + "table": "my_table", + "column": "my_column2", + "is_dist_key": True, + "is_sort_key": True, + }, + ], + ) + return materialized_view_without_sort_key_from_db + + +@pytest.fixture +def materialized_view_multiple_sort_key_config(materialized_view_without_sort_key_config): + materialized_view_without_sort_key_config.config.extra = { + "sort_type": RedshiftSortStyle.compound, + "sort": ["my_column", "my_column2"], + } + + return materialized_view_without_sort_key_config + +def test_materialized_view_config_changeset_without_sort_key_empty_changes( + materialized_view_without_sort_key_from_db, + materialized_view_without_sort_key_config, +): + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_without_sort_key_from_db, + materialized_view_without_sort_key_config, + ) + + assert change_set is None + def test_materialized_view_config_changeset_multiple_sort_key_without_changes( materialized_view_multiple_sort_key_from_db, materialized_view_multiple_sort_key_config, From b07ac9c937502663d5d324c4a0e6ab3ad93980e7 Mon Sep 17 00:00:00 2001 From: Luciano Vitti Date: Mon, 10 Jun 2024 10:41:55 -0300 Subject: [PATCH 3/3] changelog --- .changes/unreleased/Fixes-20240610-104114.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20240610-104114.yaml diff --git a/.changes/unreleased/Fixes-20240610-104114.yaml b/.changes/unreleased/Fixes-20240610-104114.yaml new file mode 100644 index 000000000..d220288f8 --- /dev/null +++ b/.changes/unreleased/Fixes-20240610-104114.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix config change detection not working for multiple sortkey in materialized views +time: 2024-06-10T10:41:14.721411-03:00 +custom: + Author: lvitti + Issue: "838"