From 06f6216d88b4c707e4f9cf6a12c147d8d4fc8685 Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Tue, 12 Sep 2023 14:53:55 -0700 Subject: [PATCH 1/3] avoiding describe extended on hive_metastore since we know tables are of type table --- dbt/adapters/databricks/impl.py | 2 ++ .../models/new_model.sql | 7 ++++ .../seeds/preexisting_data.csv | 5 +++ .../test_avoid_describe_extended.py | 35 +++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 tests/integration/avoid_describe_extended/models/new_model.sql create mode 100644 tests/integration/avoid_describe_extended/seeds/preexisting_data.csv create mode 100644 tests/integration/avoid_describe_extended/test_avoid_describe_extended.py diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index 2c6739b7b..2ad345e47 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -264,6 +264,8 @@ def typeFromNames( if view_names[name] else DatabricksRelationType.View ) + elif database is None or database == "hive_metastore": + return DatabricksRelationType.Table else: # not a view so it might be a streaming table # get extended information to determine diff --git a/tests/integration/avoid_describe_extended/models/new_model.sql b/tests/integration/avoid_describe_extended/models/new_model.sql new file mode 100644 index 000000000..01a4c688f --- /dev/null +++ b/tests/integration/avoid_describe_extended/models/new_model.sql @@ -0,0 +1,7 @@ +{{ config( + materialized = 'table' +) }} + +select cast(1 as bigint) as id, 'hello' as msg +union all +select cast(2 as bigint) as id, 'goodbye' as msg \ No newline at end of file diff --git a/tests/integration/avoid_describe_extended/seeds/preexisting_data.csv b/tests/integration/avoid_describe_extended/seeds/preexisting_data.csv new file mode 100644 index 000000000..c96e569bd --- /dev/null +++ b/tests/integration/avoid_describe_extended/seeds/preexisting_data.csv @@ -0,0 +1,5 @@ +id,msg +1,hello +2,goodbye +2,yo +3,anyway \ No newline at end of file diff --git a/tests/integration/avoid_describe_extended/test_avoid_describe_extended.py b/tests/integration/avoid_describe_extended/test_avoid_describe_extended.py new file mode 100644 index 000000000..c7aed92c0 --- /dev/null +++ b/tests/integration/avoid_describe_extended/test_avoid_describe_extended.py @@ -0,0 +1,35 @@ +import os +import pytest +from tests.integration.base import DBTIntegrationTest, use_profile + + +class TestAvoidDescribeExtended(DBTIntegrationTest): + """Tests in this class exist to ensure we don't call describe extended unnecessarily. + This became a problem due to needing to discern tables from streaming tables, which is not + relevant on hive, but users on hive were having all of their tables describe extended-ed. + We only need to call describe extended if we are using a UC catalog and we can't determine the + type of the materialization.""" + + @property + def schema(self): + return "schema" + + @property + def models(self): + return "models" + + def _test_avoid_describe_extended(self): + # Add some existing data to ensure we don't try to 'describe extended' it. + self.run_dbt(["seed"]) + _, log_output = self.run_dbt_and_capture(["run"]) + self.assertNotIn("describe extended", log_output) + + @use_profile("databricks_cluster") + def test_avoid_describe_extended_databricks_cluster(self): + """When UC is not enabled, we can assumed that all tables are regular tables""" + self._test_avoid_describe_extended() + + @use_profile("databricks_uc_sql_endpoint") + def test_avoid_describe_extended_databricks_uc_sql_endpoint(self): + """When UC is enabled, regular tables are marked as such""" + self._test_avoid_describe_extended() From fdf35ec93608e01f3f534066e585b83fbd09eec7 Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Tue, 12 Sep 2023 14:59:28 -0700 Subject: [PATCH 2/3] linter --- .../avoid_describe_extended/test_avoid_describe_extended.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/avoid_describe_extended/test_avoid_describe_extended.py b/tests/integration/avoid_describe_extended/test_avoid_describe_extended.py index c7aed92c0..f3b903b75 100644 --- a/tests/integration/avoid_describe_extended/test_avoid_describe_extended.py +++ b/tests/integration/avoid_describe_extended/test_avoid_describe_extended.py @@ -1,5 +1,3 @@ -import os -import pytest from tests.integration.base import DBTIntegrationTest, use_profile From 32d3abc0ad5688390c9793d610c64651a2f1de0a Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Tue, 12 Sep 2023 15:27:26 -0700 Subject: [PATCH 3/3] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 43616749b..09e5e349d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Fixes - Fixed an issue with AWS OAuth M2M flow ([#445](https://github.com/databricks/dbt-databricks/pull/445)) +- Fixed an issue where every table in hive_metastore would get described ([#446](https://github.com/databricks/dbt-databricks/pull/446)) ## dbt-databricks 1.6.3 (September 8, 2023)