diff --git a/dbt/adapters/databricks/relation_configs/materialized_view.py b/dbt/adapters/databricks/relation_configs/materialized_view.py index a01aba954..885b08051 100644 --- a/dbt/adapters/databricks/relation_configs/materialized_view.py +++ b/dbt/adapters/databricks/relation_configs/materialized_view.py @@ -13,6 +13,11 @@ from dbt.exceptions import DbtRuntimeError from dbt.adapters.databricks.relation_configs.base import DatabricksRelationConfigBase +from dbt.adapters.databricks.relation_configs.partition import ( + DatabricksPartitionConfig, + DatabricksPartitionConfigChange, +) +from dbt.adapters.databricks.relation_configs.schedule import DatabricksScheduleConfigChange from dbt.adapters.databricks.utils import evaluate_bool @@ -41,8 +46,8 @@ class DatabricksMaterializedViewConfig(DatabricksRelationConfigBase, RelationCon database_name: str query: str backup: bool = True - partition: str # to be done - schedule: str + partition: Optional[str] = None # to be done + schedule: Optional[str] = None @property def path(self) -> str: @@ -52,11 +57,9 @@ def path(self) -> str: if part is not None ) - # can be filled out later - # @property - # def validation_rules(self) -> Set[RelationConfigValidationRule]: - # # sort and dist rules get run by default with the mixin - # return {} + @property + def validation_rules(self) -> Set[RelationConfigValidationRule]: + return {} @classmethod def from_dict(cls, config_dict) -> "DatabricksMaterializedViewConfig": @@ -130,48 +133,24 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict: "mv_name": materialized_view.get("table"), "schema_name": materialized_view.get("schema"), "database_name": materialized_view.get("database"), - "autorefresh": materialized_view.get("autorefresh"), + "schedule": materialized_view.get("schedule"), "query": cls._parse_query(query.get("definition")), } - # the default for materialized views differs from the default for diststyle in general - # only set it if we got a value - if materialized_view.get("diststyle"): - config_dict.update( - {"dist": DatabricksDistConfig.parse_relation_results(materialized_view)} - ) - - # TODO: this only shows the first column in the sort key - if materialized_view.get("sortkey1"): - config_dict.update( - {"sort": DatabricksSortConfig.parse_relation_results(materialized_view)} - ) - return config_dict -# @dataclass(frozen=True, eq=True, unsafe_hash=True) -# class DatabricksAutoRefreshConfigChange(RelationConfigChange): -# context: Optional[bool] = None - -# @property -# def requires_full_refresh(self) -> bool: -# return False - - @dataclass class DatabricksMaterializedViewConfigChangeset: - dist: Optional[DatabricksPartitionConfigChange] = None - autorefresh: Optional[DatabricksAutoRefreshConfigChange] = None + partition: Optional[DatabricksPartitionConfigChange] = None + schedule: Optional[DatabricksScheduleConfigChange] = None @property def requires_full_refresh(self) -> bool: return any( { - self.autorefresh.requires_full_refresh if self.autorefresh else False, - self.backup.requires_full_refresh if self.backup else False, - self.dist.requires_full_refresh if self.dist else False, - self.sort.requires_full_refresh if self.sort else False, + self.schedule.requires_full_refresh if self.schedule else False, + self.partition.requires_full_refresh if self.partition else False, } ) @@ -179,9 +158,7 @@ def requires_full_refresh(self) -> bool: def has_changes(self) -> bool: return any( { - self.backup if self.backup else False, - self.dist if self.dist else False, - self.sort if self.sort else False, - self.autorefresh if self.autorefresh else False, + self.schedule if self.schedule else False, + self.partition if self.partition else False, } ) diff --git a/dbt/adapters/databricks/relation_configs/partition.py b/dbt/adapters/databricks/relation_configs/partition.py index 301be8872..29a5e102b 100644 --- a/dbt/adapters/databricks/relation_configs/partition.py +++ b/dbt/adapters/databricks/relation_configs/partition.py @@ -12,13 +12,11 @@ from dbt.dataclass_schema import StrEnum from dbt.exceptions import DbtRuntimeError -from dbt.adapters.databrick.relation_configs.base import DatabrickRelationConfigBase - - +from dbt.adapters.databricks.relation_configs.base import DatabricksRelationConfigBase @dataclass(frozen=True, eq=True, unsafe_hash=True) -class DatabricksPartitionConfig(DatabrickRelationConfigBase, RelationConfigValidationMixin): +class DatabricksPartitionConfig(DatabricksRelationConfigBase, RelationConfigValidationMixin): """ This config fallows the specs found here: https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html#partitioned-by @@ -40,20 +38,19 @@ def __post_init__(self): @property def validation_rules(self) -> Set[RelationConfigValidationRule]: - - #TODO + # TODO pass @classmethod def from_dict(cls, config_dict): - #TODO - + # TODO + pass @classmethod def parse_model_node(cls, model_node: ModelNode) -> dict: - #TODO + # TODO pass @classmethod @@ -76,18 +73,14 @@ def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: Returns: a standard dictionary describing this `DatabrickSortConfig` instance """ - #TODO + # TODO pass @dataclass(frozen=True, eq=True, unsafe_hash=True) -class DatabrickSortConfigChange(RelationConfigChange, RelationConfigValidationMixin): +class DatabricksPartitionConfigChange(RelationConfigChange): + context: Optional[bool] = None @property def requires_full_refresh(self) -> bool: - return True - - @property - def validation_rules(self) -> Set[RelationConfigValidationRule]: - #TODO - pass + return False diff --git a/dbt/adapters/databricks/relation_configs/schedule.py b/dbt/adapters/databricks/relation_configs/schedule.py new file mode 100644 index 000000000..867026316 --- /dev/null +++ b/dbt/adapters/databricks/relation_configs/schedule.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass +from typing import Optional, Set + +import agate +from dbt.adapters.relation_configs import ( + RelationConfigChange, + RelationConfigChangeAction, + RelationConfigValidationMixin, + RelationConfigValidationRule, +) +from dbt.contracts.graph.nodes import ModelNode +from dbt.adapters.databricks.relation_configs.base import DatabricksRelationConfigBase + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class DatabricksScheduleConfig(DatabricksRelationConfigBase, RelationConfigValidationMixin): + """ + This config fallows the specs found here: + https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html#partitioned-by + + The following parameters are configurable by dbt: + - partition_column: the column identifier to be sorted + - column_type: the type of the specified thing + """ + + # sortkey: Optional[FrozenSet[str]] = None + + def __post_init__(self): + # # maintains `frozen=True` while allowing for a variable default on `sort_type` + # if self.sortstyle is None and self.sortkey is None: + # object.__setattr__(self, "sortstyle", DatabrickSortStyle.default()) + # elif self.sortstyle is None: + # object.__setattr__(self, "sortstyle", DatabrickSortStyle.default_with_columns()) + super().__post_init__() + + @property + def validation_rules(self) -> Set[RelationConfigValidationRule]: + # TODO + + pass + + @classmethod + def from_dict(cls, config_dict): + # TODO + + pass + + @classmethod + def parse_model_node(cls, model_node: ModelNode) -> dict: + # TODO + pass + + @classmethod + def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: + """ + Translate agate objects from the database into a standard dictionary. + + Note: + This was only built for materialized views, which does not specify a sortstyle. + Processing of `sortstyle` has been omitted here, which means it's the default (compound). + + Args: + relation_results_entry: the description of the sortkey and sortstyle from the database in this format: + + agate.Row({ + ..., + "sortkey1": "", + ... + }) + + Returns: a standard dictionary describing this `DatabrickSortConfig` instance + """ + # TODO + pass + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class DatabricksScheduleConfigChange(RelationConfigChange): + context: Optional[bool] = None + + @property + def requires_full_refresh(self) -> bool: + return False diff --git a/tests/unit/relation_configs/test_materialized_view.py b/tests/unit/relation_configs/test_materialized_view.py index 789069f16..443dc1830 100644 --- a/tests/unit/relation_configs/test_materialized_view.py +++ b/tests/unit/relation_configs/test_materialized_view.py @@ -2,7 +2,9 @@ import pytest -from dbt.adapters.databricks.relation_configs import DatabricksMaterializedViewConfig +from dbt.adapters.databricks.relation_configs.materialized_view import ( + DatabricksMaterializedViewConfig, +) @pytest.mark.parametrize("bool_value", [True, False, "True", "False", "true", "false"])