Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug] Use a list instead of a set for index changes to perserve order #73

Merged
merged 5 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240425-133401.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Replace usage of `Set` with `List` to fix issue with index updates intermittently happening out of order
time: 2024-04-25T13:34:01.018399-04:00
custom:
Author: mikealfare
Issue: "72"
20 changes: 12 additions & 8 deletions dbt/adapters/postgres/relation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass, field
from typing import FrozenSet, Optional, Set
from typing import FrozenSet, List, Optional

from dbt.adapters.base.relation import BaseRelation
from dbt.adapters.contracts.relation import RelationConfig, RelationType
Expand Down Expand Up @@ -79,7 +79,7 @@ def _get_index_config_changes(
self,
existing_indexes: FrozenSet[PostgresIndexConfig],
new_indexes: FrozenSet[PostgresIndexConfig],
) -> Set[PostgresIndexConfigChange]:
) -> List[PostgresIndexConfigChange]:
"""
Get the index updates that will occur as a result of a new run

Expand All @@ -90,18 +90,22 @@ def _get_index_config_changes(
3. Index is old -> drop these
4. Indexes are not equal -> drop old, create new -> two actions

Returns: a set of index updates in the form {"action": "drop/create", "context": <IndexConfig>}
*Note:*
The order of the operations matters here because if the same index is dropped and recreated
(e.g. via --full-refresh) then we need to drop it first, then create it.

Returns: an ordered list of index updates in the form {"action": "drop/create", "context": <IndexConfig>}
"""
drop_changes = set(
drop_changes = [
PostgresIndexConfigChange.from_dict(
{"action": RelationConfigChangeAction.drop, "context": index}
)
for index in existing_indexes.difference(new_indexes)
)
create_changes = set(
]
create_changes = [
PostgresIndexConfigChange.from_dict(
{"action": RelationConfigChangeAction.create, "context": index}
)
for index in new_indexes.difference(existing_indexes)
)
return set().union(drop_changes, create_changes) # type: ignore
]
return drop_changes + create_changes
4 changes: 2 additions & 2 deletions dbt/adapters/postgres/relation_configs/materialized_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict:

@dataclass
class PostgresMaterializedViewConfigChangeCollection:
indexes: Set[PostgresIndexConfigChange] = field(default_factory=set)
indexes: List[PostgresIndexConfigChange] = field(default_factory=list)

@property
def requires_full_refresh(self) -> bool:
return any(index.requires_full_refresh for index in self.indexes)

@property
def has_changes(self) -> bool:
return self.indexes != set()
return self.indexes != []
60 changes: 60 additions & 0 deletions tests/unit/test_materialized_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from copy import deepcopy

from dbt.adapters.contracts.relation import RelationType
from dbt.adapters.relation_configs.config_change import RelationConfigChangeAction

from dbt.adapters.postgres.relation import PostgresRelation
from dbt.adapters.postgres.relation_configs import PostgresIndexConfig


def test_index_config_changes():
index_0_old = {
"name": "my_index_0",
"column_names": {"column_0"},
"unique": True,
"method": "btree",
}
index_1_old = {
"name": "my_index_1",
"column_names": {"column_1"},
"unique": True,
"method": "btree",
}
index_2_old = {
"name": "my_index_2",
"column_names": {"column_2"},
"unique": True,
"method": "btree",
}
existing_indexes = frozenset(
PostgresIndexConfig.from_dict(index) for index in [index_0_old, index_1_old, index_2_old]
)

index_0_new = deepcopy(index_0_old)
index_2_new = deepcopy(index_2_old)
index_2_new.update(method="hash")
index_3_new = {
"name": "my_index_3",
"column_names": {"column_3"},
"unique": True,
"method": "hash",
}
new_indexes = frozenset(
PostgresIndexConfig.from_dict(index) for index in [index_0_new, index_2_new, index_3_new]
)

relation = PostgresRelation.create(
database="my_database",
schema="my_schema",
identifier="my_materialized_view",
type=RelationType.MaterializedView,
)

index_changes = relation._get_index_config_changes(existing_indexes, new_indexes)

assert isinstance(index_changes, list)
assert len(index_changes) == len(["drop 1", "drop 2", "create 2", "create 3"])
assert index_changes[0].action == RelationConfigChangeAction.drop
mikealfare marked this conversation as resolved.
Show resolved Hide resolved
assert index_changes[1].action == RelationConfigChangeAction.drop
assert index_changes[2].action == RelationConfigChangeAction.create
assert index_changes[3].action == RelationConfigChangeAction.create
Loading