diff --git a/CHANGELOG.md b/CHANGELOG.md index 01e4099ae..3836b040d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Allow schema to be specified in testing (thanks @case-k-git!) ([538](https://github.com/databricks/dbt-databricks/pull/538)) - Fix dbt incremental_strategy behavior by fixing schema table existing check (thanks @case-k-git!) ([530](https://github.com/databricks/dbt-databricks/pull/530)) - Fixed bug that was causing streaming tables to be dropped and recreated instead of refreshed. ([552](https://github.com/databricks/dbt-databricks/pull/552)) +- Fixed Hive performance regression by streamlining materialization type acquisition ([557](https://github.com/databricks/dbt-databricks/pull/557)) - Fix: Python models authentication could be overridden by a `.netrc` file in the user's home directory ([338](https://github.com/databricks/dbt-databricks/pull/338)) ### Under the Hood diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index 0a4b142c3..2eaa6a87f 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -283,46 +283,77 @@ def get_relations_without_caching(self, relation: DatabricksRelation) -> Table: # if there are any table types to be resolved if any(not row[3] for row in new_rows): - # Get view names and create a dictionary of view name to materialization - relation_all_tables = self.Relation.create( - database=relation.database, schema=relation.schema, identifier="*" - ) + if is_hive_metastore(relation.database): + new_rows = self._get_hive_types(relation, new_rows) + else: + new_rows = self._get_uc_types(relation, new_rows) - with self._catalog(relation.database): - views = self.execute_macro(SHOW_VIEWS_MACRO_NAME, kwargs=kwargs) - tables = self.execute_macro( - SHOW_TABLE_EXTENDED_MACRO_NAME, - kwargs={"schema_relation": relation_all_tables}, - ) - view_names: Dict[str, bool] = { - view["viewName"]: view.get("isMaterialized", False) for view in views - } - table_names: Dict[str, bool] = { - table["tableName"]: (self._parse_type(table["information"]) == "STREAMING_TABLE") - for table in tables - } - - # create a new collection of rows with the correct table types - new_rows = [ + return Table( + new_rows, + column_names=["database_name", "schema_name", "name", "kind"], + column_types=[Text(), Text(), Text(), Text()], + ) + + def _get_hive_types( + self, relation: DatabricksRelation, new_rows: List[Tuple[Optional[str], str, str, str]] + ) -> List[Tuple[Optional[str], str, str, str]]: + kwargs = {"relation": relation} + + with self._catalog(relation.database): + views = self.execute_macro(SHOW_VIEWS_MACRO_NAME, kwargs=kwargs) + + view_names = set(views.columns["viewName"].values()) # type: ignore[attr-defined] + return [ ( row[0], row[1], row[2], - str( - row[3] - if row[3] - else self._type_from_names(row[0], row[2], view_names, table_names) - ), + str(RelationType.View if row[2] in view_names else RelationType.Table), ) for row in new_rows ] - return Table( - new_rows, - column_names=["database_name", "schema_name", "name", "kind"], - column_types=[Text(), Text(), Text(), Text()], + def _get_uc_types( + self, relation: DatabricksRelation, new_rows: List[Tuple[Optional[str], str, str, str]] + ) -> List[Tuple[Optional[str], str, str, str]]: + kwargs = {"relation": relation} + + # Get view names and create a dictionary of view name to materialization + relation_all_tables = self.Relation.create( + database=relation.database, schema=relation.schema, identifier="*" ) + with self._catalog(relation.database): + views = self.execute_macro(SHOW_VIEWS_MACRO_NAME, kwargs=kwargs) + tables = self.execute_macro( + SHOW_TABLE_EXTENDED_MACRO_NAME, + kwargs={"schema_relation": relation_all_tables}, + ) + view_names: Dict[str, bool] = { + view["viewName"]: view.get("isMaterialized", False) for view in views + } + table_names: Dict[str, bool] = { + table["tableName"]: (self._parse_type(table["information"]) == "STREAMING_TABLE") + for table in tables + } + + # create a new collection of rows with the correct table types + new_rows = [ + ( + row[0], + row[1], + row[2], + str( + row[3] + if row[3] + else self._type_from_names(row[0], row[2], view_names, table_names) + ), + ) + for row in new_rows + ] + + return new_rows + def _parse_type(self, information: str) -> str: type_entry = [ entry.split(":")[1].strip()