From 5101389cb523a2766712b3a2c7789302f80654a4 Mon Sep 17 00:00:00 2001 From: nszoni Date: Wed, 7 Feb 2024 16:17:03 +0100 Subject: [PATCH 01/14] partial 1.7 feature changes --- CHANGELOG.md | 5 +++ dbt/adapters/synapse/__version__.py | 2 +- dev_requirements.txt | 2 +- setup.py | 2 +- tests/functional/adapter/test_dbt_clone.py | 5 +++ tests/functional/adapter/test_seed.py | 17 ++++++++++ .../adapter/test_store_test_failures.py | 32 +++++++++++++++++++ 7 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 tests/functional/adapter/test_dbt_clone.py create mode 100644 tests/functional/adapter/test_seed.py create mode 100644 tests/functional/adapter/test_store_test_failures.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 202eee12..602ba97e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ # Changelog +## v1.7.4rc1 + +* Support for [dbt-core 1.7](https://github.com/dbt-labs/dbt-core/releases/tag/v1.7.0) + + ## v1.4.0 #### Features diff --git a/dbt/adapters/synapse/__version__.py b/dbt/adapters/synapse/__version__.py index d619c757..0a032a1d 100644 --- a/dbt/adapters/synapse/__version__.py +++ b/dbt/adapters/synapse/__version__.py @@ -1 +1 @@ -version = "1.4.0" +version = "1.7.4rc1" diff --git a/dev_requirements.txt b/dev_requirements.txt index b4833c9b..43a8dfef 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -4,7 +4,7 @@ wheel==0.40.0 pre-commit==2.21.0;python_version<"3.8" pre-commit==3.3.1;python_version>="3.8" pytest-dotenv==0.5.2 -dbt-tests-adapter~=1.4.5 +dbt-tests-adapter~=1.7.4 aiohttp==3.8.3 azure-mgmt-synapse==2.0.0 flaky==3.7.0 diff --git a/setup.py b/setup.py index b82646ad..40737ff6 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ "Anders Swanson", "Sam Debruyn", ] -dbt_version = "1.4" +dbt_version = "1.7" dbt_sqlserver_requirement = "dbt-sqlserver~=1.4.0" description = """An Azure Synapse adapter plugin for dbt""" diff --git a/tests/functional/adapter/test_dbt_clone.py b/tests/functional/adapter/test_dbt_clone.py new file mode 100644 index 00000000..83ec1db4 --- /dev/null +++ b/tests/functional/adapter/test_dbt_clone.py @@ -0,0 +1,5 @@ +from dbt.tests.adapter.dbt_clone.test_dbt_clone import TestCloneSameTargetAndState + + +class TestCloneSameTargetAndStateSynapse(TestCloneSameTargetAndState): + pass diff --git a/tests/functional/adapter/test_seed.py b/tests/functional/adapter/test_seed.py new file mode 100644 index 00000000..1830429e --- /dev/null +++ b/tests/functional/adapter/test_seed.py @@ -0,0 +1,17 @@ +from dbt.tests.adapter.simple_seed.test_seed import ( + SeedUniqueDelimiterTestBase, + TestSeedWithEmptyDelimiter, + TestSeedWithWrongDelimiter, +) + + +class SeedUniqueDelimiterTestBaseSynapse(SeedUniqueDelimiterTestBase): + pass + + +class TestSeedWithWrongDelimiterSynapse(TestSeedWithWrongDelimiter): + pass + + +class TestSeedWithEmptyDelimiterSynapse(TestSeedWithEmptyDelimiter): + pass diff --git a/tests/functional/adapter/test_store_test_failures.py b/tests/functional/adapter/test_store_test_failures.py new file mode 100644 index 00000000..8b1a519c --- /dev/null +++ b/tests/functional/adapter/test_store_test_failures.py @@ -0,0 +1,32 @@ +from dbt.tests.adapter.store_test_failures_tests.basic import ( + StoreTestFailuresAsExceptions, + StoreTestFailuresAsGeneric, + StoreTestFailuresAsInteractions, + StoreTestFailuresAsProjectLevelEphemeral, + StoreTestFailuresAsProjectLevelOff, + StoreTestFailuresAsProjectLevelView, +) + + +class TestStoreTestFailuresAsInteractionsBase(StoreTestFailuresAsInteractions): + pass + + +class TestStoreTestFailuresAsProjectLevelOffBase(StoreTestFailuresAsProjectLevelOff): + pass + + +class TestStoreTestFailuresAsProjectLevelViewBase(StoreTestFailuresAsProjectLevelView): + pass + + +class TestStoreTestFailuresAsGenericBase(StoreTestFailuresAsGeneric): + pass + + +class TestStoreTestFailuresAsProjectLevelEphemeralBase(StoreTestFailuresAsProjectLevelEphemeral): + pass + + +class TestStoreTestFailuresAsExceptionsBase(StoreTestFailuresAsExceptions): + pass From 68780e149320ad55dc8602aba0d7a03a5bb7cdc5 Mon Sep 17 00:00:00 2001 From: nszoni Date: Tue, 13 Feb 2024 14:05:44 +0100 Subject: [PATCH 02/14] add new requirements --- dev_requirements.txt | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 8cc0790d..813d3866 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,13 +1,12 @@ -pytest==7.2.0 -twine==4.0.2 -wheel==0.40.0 -pre-commit==2.21.0;python_version<"3.8" -pre-commit==3.3.1;python_version>="3.8" +pytest==8.0.0 +twine==5.0.0 +wheel==0.42.0 +pre-commit==3.5.0 pytest-dotenv==0.5.2 dbt-tests-adapter~=1.7.4 -pyodbc==4.0.39 --no-binary :all: # if Windows, remove +pyodbc==5.0.1 aiohttp==3.8.3 azure-mgmt-synapse==2.0.0 flaky==3.7.0 -pytest-xdist==3.3.0 +pytest-xdist==3.5.0 -e . From 96206b2005b18de239294ed25633db39c2523a21 Mon Sep 17 00:00:00 2001 From: nszoni Date: Tue, 13 Feb 2024 14:06:00 +0100 Subject: [PATCH 03/14] change back to ref dicts --- dbt/adapters/synapse/synapse_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/synapse/synapse_adapter.py b/dbt/adapters/synapse/synapse_adapter.py index 9e6dfddb..5c02b579 100644 --- a/dbt/adapters/synapse/synapse_adapter.py +++ b/dbt/adapters/synapse/synapse_adapter.py @@ -1,5 +1,5 @@ from dbt.adapters.base.relation import BaseRelation -from dbt.adapters.cache import _make_ref_key_msg +from dbt.adapters.cache import _make_ref_key_dict from dbt.adapters.fabric import FabricAdapter from dbt.adapters.sql.impl import CREATE_SCHEMA_MACRO_NAME from dbt.events.functions import fire_event @@ -13,7 +13,7 @@ class SynapseAdapter(FabricAdapter): def create_schema(self, relation: BaseRelation) -> None: relation = relation.without_identifier() - fire_event(SchemaCreation(relation=_make_ref_key_msg(relation))) + fire_event(SchemaCreation(relation=_make_ref_key_dict(relation))) macro_name = CREATE_SCHEMA_MACRO_NAME kwargs = { "relation": relation, From 5b9b6c012791938b3d7ea1e130faa0ae3f3a3841 Mon Sep 17 00:00:00 2001 From: nszoni Date: Tue, 13 Feb 2024 14:07:41 +0100 Subject: [PATCH 04/14] add dbt clone adapter test --- tests/functional/adapter/test_dbt_clone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/adapter/test_dbt_clone.py b/tests/functional/adapter/test_dbt_clone.py index 83ec1db4..62e564f1 100644 --- a/tests/functional/adapter/test_dbt_clone.py +++ b/tests/functional/adapter/test_dbt_clone.py @@ -1,5 +1,5 @@ from dbt.tests.adapter.dbt_clone.test_dbt_clone import TestCloneSameTargetAndState -class TestCloneSameTargetAndStateSynapse(TestCloneSameTargetAndState): +class TestCloneSameTargetAndState(TestCloneSameTargetAndState): pass From fdbd8e187d75151f922541b7e6fd917115d44040 Mon Sep 17 00:00:00 2001 From: nszoni Date: Wed, 14 Feb 2024 11:58:17 +0100 Subject: [PATCH 05/14] add seeds adapter zone tests --- tests/functional/adapter/data/seed_bom.csv | 11 + tests/functional/adapter/data/tmp.csv | 20001 ------------------- tests/functional/adapter/test_seed.py | 452 +- 3 files changed, 453 insertions(+), 20011 deletions(-) create mode 100644 tests/functional/adapter/data/seed_bom.csv delete mode 100644 tests/functional/adapter/data/tmp.csv diff --git a/tests/functional/adapter/data/seed_bom.csv b/tests/functional/adapter/data/seed_bom.csv new file mode 100644 index 00000000..df299921 --- /dev/null +++ b/tests/functional/adapter/data/seed_bom.csv @@ -0,0 +1,11 @@ +seed_id,first_name,email,ip_address,birthday +1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31 +2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14 +3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57 +4,Sandra,sgeorge3@livejournal.com,229.235.252.98,1973-07-19 10:52:43 +5,Fred,fwoods4@google.cn,78.229.170.124,2012-09-30 16:38:29 +6,Stephen,shanson5@livejournal.com,182.227.157.105,1995-11-07 21:40:50 +7,William,wmartinez6@upenn.edu,135.139.249.50,1982-09-05 03:11:59 +8,Jessica,jlong7@hao123.com,203.62.178.210,1991-10-16 11:03:15 +9,Douglas,dwhite8@tamu.edu,178.187.247.1,1979-10-01 09:49:48 +10,Lisa,lcoleman9@nydailynews.com,168.234.128.249,2011-05-26 07:45:49 diff --git a/tests/functional/adapter/data/tmp.csv b/tests/functional/adapter/data/tmp.csv deleted file mode 100644 index 4c120ea6..00000000 --- a/tests/functional/adapter/data/tmp.csv +++ /dev/null @@ -1,20001 +0,0 @@ -seed_iddiff --git a/tests/functional/adapter/test_seed.py b/tests/functional/adapter/test_seed.py index 1830429e..81fa424c 100644 --- a/tests/functional/adapter/test_seed.py +++ b/tests/functional/adapter/test_seed.py @@ -1,17 +1,449 @@ -from dbt.tests.adapter.simple_seed.test_seed import ( - SeedUniqueDelimiterTestBase, - TestSeedWithEmptyDelimiter, - TestSeedWithWrongDelimiter, +import csv +from codecs import BOM_UTF8 +from pathlib import Path + +import pytest +from dbt.tests.adapter.simple_seed.fixtures import ( + models__downstream_from_seed_actual, + models__downstream_from_seed_pipe_separated, + models__from_basic_seed, +) +from dbt.tests.adapter.simple_seed.seeds import ( + seed__unicode_csv, + seed__with_dots_csv, + seeds__disabled_in_config_csv, + seeds__enabled_in_config_csv, + seeds__tricky_csv, + seeds__wont_parse_csv, +) +from dbt.tests.util import ( + check_relations_equal, + check_table_does_exist, + check_table_does_not_exist, + copy_file, + read_file, + rm_file, + run_dbt, ) +seed__actual_csv = """ +seed_id,first_name,email,ip_address,birthday +1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31 +2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14 +3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57 +4,Sandra,sgeorge3@livejournal.com,229.235.252.98,1973-07-19 10:52:43 +5,Fred,fwoods4@google.cn,78.229.170.124,2012-09-30 16:38:29 +6,Stephen,shanson5@livejournal.com,182.227.157.105,1995-11-07 21:40:50 +7,William,wmartinez6@upenn.edu,135.139.249.50,1982-09-05 03:11:59 +8,Jessica,jlong7@hao123.com,203.62.178.210,1991-10-16 11:03:15 +9,Douglas,dwhite8@tamu.edu,178.187.247.1,1979-10-01 09:49:48 +10,Lisa,lcoleman9@nydailynews.com,168.234.128.249,2011-05-26 07:45:49 +""".lstrip() + +""" +NOTE: Table value constructor is not supported in Azure Synapse Analytics. +Instead, subsequent INSERT statements can be executed to insert multiple rows. +In Azure Synapse Analytics, +insert values can only be constant literal values or variable references. +To insert a non-literal, set a variable to non-constant value and insert the variable. +""" + +seeds__expected_sql = """ +create table {schema}.seed_expected ( +seed_id int, +first_name varchar(100), +email varchar(50), +ip_address varchar(100), +birthday datetime2(6) +); + +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (1,'Larry','lking0@miitbeian.gov.cn','69.135.206.194','2008-09-12 19:08:31'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (2,'Larry','lperkins1@toplist.cz','64.210.133.162','1978-05-09 04:15:14'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (3,'Anna','amontgomery2@miitbeian.gov.cn','168.104.64.114','2011-10-16 04:07:57'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (4,'Sandra','sgeorge3@livejournal.com','229.235.252.98','1973-07-19 10:52:43'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (5,'Fred','fwoods4@google.cn','78.229.170.124','2012-09-30 16:38:29'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (6,'Stephen','shanson5@livejournal.com','182.227.157.105','1995-11-07 21:40:50'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (7,'William','wmartinez6@upenn.edu','135.139.249.50','1982-09-05 03:11:59'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (8,'Jessica','jlong7@hao123.com','203.62.178.210','1991-10-16 11:03:15'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (9,'Douglas','dwhite8@tamu.edu','178.187.247.1','1979-10-01 09:49:48'); +INSERT INTO {schema}.seed_expected + ("seed_id","first_name","email","ip_address","birthday") +VALUES + (10,'Lisa','lcoleman9@nydailynews.com','168.234.128.249','2011-05-26 07:45:49'); +""" + +seeds__pipe_separated_csv = """ +seed_id|first_name|email|ip_address|birthday +1|Larry|lking0@miitbeian.gov.cn|69.135.206.194|2008-09-12 19:08:31 +2|Larry|lperkins1@toplist.cz|64.210.133.162|1978-05-09 04:15:14 +3|Anna|amontgomery2@miitbeian.gov.cn|168.104.64.114|2011-10-16 04:07:57 +4|Sandra|sgeorge3@livejournal.com|229.235.252.98|1973-07-19 10:52:43 +5|Fred|fwoods4@google.cn|78.229.170.124|2012-09-30 16:38:29 +6|Stephen|shanson5@livejournal.com|182.227.157.105|1995-11-07 21:40:50 +7|William|wmartinez6@upenn.edu|135.139.249.50|1982-09-05 03:11:59 +8|Jessica|jlong7@hao123.com|203.62.178.210|1991-10-16 11:03:15 +9|Douglas|dwhite8@tamu.edu|178.187.247.1|1979-10-01 09:49:48 +10|Lisa|lcoleman9@nydailynews.com|168.234.128.249|2011-05-26 07:45:49 +""".lstrip() + + +class SeedConfigBase(object): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": { + "quote_columns": False, + }, + } + + +class SeedTestBase(SeedConfigBase): + @pytest.fixture(scope="class", autouse=True) + def setUp(self, project): + """Create table for ensuring seeds and models used in tests build correctly""" + project.run_sql(seeds__expected_sql) + + @pytest.fixture(scope="class") + def seeds(self, test_data_dir): + return {"seed_actual.csv": seed__actual_csv} + + @pytest.fixture(scope="class") + def models(self): + return { + "models__downstream_from_seed_actual.sql": models__downstream_from_seed_actual, + } + + def _build_relations_for_test(self, project): + """The testing environment needs seeds and models to interact with""" + seed_result = run_dbt(["seed"]) + assert len(seed_result) == 1 + check_relations_equal(project.adapter, ["seed_expected", "seed_actual"]) + + run_result = run_dbt() + assert len(run_result) == 1 + check_relations_equal( + project.adapter, ["models__downstream_from_seed_actual", "seed_expected"] + ) + + def _check_relation_end_state(self, run_result, project, exists: bool): + assert len(run_result) == 1 + check_relations_equal(project.adapter, ["seed_actual", "seed_expected"]) + if exists: + check_table_does_exist(project.adapter, "models__downstream_from_seed_actual") + else: + check_table_does_not_exist(project.adapter, "models__downstream_from_seed_actual") + + +class TestBasicSeedTests(SeedTestBase): + def test_simple_seed(self, project): + """Build models and observe that run truncates a seed and re-inserts rows""" + self._build_relations_for_test(project) + self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True) + + def test_simple_seed_full_refresh_flag(self, project): + """Drop the seed_actual table and re-create. + Verifies correct behavior by the absence of the + model which depends on seed_actual.""" + self._build_relations_for_test(project) + self._check_relation_end_state( + run_result=run_dbt(["seed", "--full-refresh"]), project=project, exists=True + ) + + +class TestSeedConfigFullRefreshOn(SeedTestBase): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": {"quote_columns": False, "full_refresh": True}, + } + + def test_simple_seed_full_refresh_config(self, project): + """config option should drop current model and cascade drop to downstream models""" + self._build_relations_for_test(project) + self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True) + + +class TestSeedConfigFullRefreshOff(SeedTestBase): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": {"quote_columns": False, "full_refresh": False}, + } + + def test_simple_seed_full_refresh_config(self, project): + """Config options should override a full-refresh flag because config is higher priority""" + self._build_relations_for_test(project) + self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True) + self._check_relation_end_state( + run_result=run_dbt(["seed", "--full-refresh"]), project=project, exists=True + ) + + +class TestSeedCustomSchema(SeedTestBase): + @pytest.fixture(scope="class", autouse=True) + def setUp(self, project): + """Create table for ensuring seeds and models used in tests build correctly""" + project.run_sql(seeds__expected_sql) + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": { + "schema": "custom_schema", + "quote_columns": False, + }, + } + + def test_simple_seed_with_schema(self, project): + seed_results = run_dbt(["seed"]) + assert len(seed_results) == 1 + custom_schema = f"{project.test_schema}_custom_schema" + check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"]) + + # this should truncate the seed_actual table, then re-insert + results = run_dbt(["seed"]) + assert len(results) == 1 + custom_schema = f"{project.test_schema}_custom_schema" + check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"]) + + def test_simple_seed_with_drop_and_schema(self, project): + seed_results = run_dbt(["seed"]) + assert len(seed_results) == 1 + custom_schema = f"{project.test_schema}_custom_schema" + check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"]) + + # this should drop the seed table, then re-create + results = run_dbt(["seed", "--full-refresh"]) + assert len(results) == 1 + custom_schema = f"{project.test_schema}_custom_schema" + check_relations_equal(project.adapter, [f"{custom_schema}.seed_actual", "seed_expected"]) + + +@pytest.mark.skip(reason="Cascade is not supported in Drop Schema") +class TestSimpleSeedEnabledViaConfig(object): + @pytest.fixture(scope="session") + def seeds(self): + return { + "seed_enabled.csv": seeds__enabled_in_config_csv, + "seed_disabled.csv": seeds__disabled_in_config_csv, + "seed_tricky.csv": seeds__tricky_csv, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": { + "test": {"seed_enabled": {"enabled": True}, "seed_disabled": {"enabled": False}}, + "quote_columns": False, + }, + } + + @pytest.fixture(scope="function") + def clear_test_schema(self, project): + yield + project.run_sql(f"drop schema if exists {project.test_schema}") + + def test_simple_seed_with_disabled(self, clear_test_schema, project): + results = run_dbt(["seed"]) + assert len(results) == 2 + check_table_does_exist(project.adapter, "seed_enabled") + check_table_does_not_exist(project.adapter, "seed_disabled") + check_table_does_exist(project.adapter, "seed_tricky") + + def test_simple_seed_selection(self, clear_test_schema, project): + results = run_dbt(["seed", "--select", "seed_enabled"]) + assert len(results) == 1 + check_table_does_exist(project.adapter, "seed_enabled") + check_table_does_not_exist(project.adapter, "seed_disabled") + check_table_does_not_exist(project.adapter, "seed_tricky") + + def test_simple_seed_exclude(self, clear_test_schema, project): + results = run_dbt(["seed", "--exclude", "seed_enabled"]) + assert len(results) == 1 + check_table_does_not_exist(project.adapter, "seed_enabled") + check_table_does_not_exist(project.adapter, "seed_disabled") + check_table_does_exist(project.adapter, "seed_tricky") + + +class TestSeedParsing(SeedConfigBase): + @pytest.fixture(scope="class", autouse=True) + def setUp(self, project): + """Create table for ensuring seeds and models used in tests build correctly""" + project.run_sql(seeds__expected_sql) + + @pytest.fixture(scope="class") + def seeds(self): + return {"seed.csv": seeds__wont_parse_csv} + + @pytest.fixture(scope="class") + def models(self): + return {"model.sql": models__from_basic_seed} + + def test_dbt_run_skips_seeds(self, project): + # run does not try to parse the seed files + assert len(run_dbt()) == 1 + + # make sure 'dbt seed' fails, otherwise our test is invalid! + run_dbt(["seed"], expect_pass=False) + + +class TestSimpleSeedWithBOM(SeedConfigBase): + # Reference: BOM = byte order mark; see + # https://www.ibm.com/docs/en/netezza?topic=formats-byte-order-mark + # Tests for hidden unicode character in csv + @pytest.fixture(scope="class", autouse=True) + def setUp(self, project): + """Create table for ensuring seeds and models used in tests build correctly""" + project.run_sql(seeds__expected_sql) + copy_file( + project.test_data_dir, + "seed_bom.csv", + project.project_root / Path("seeds") / "seed_bom.csv", + "", + ) + + def test_simple_seed(self, project): + seed_result = run_dbt(["seed"]) + assert len(seed_result) == 1 + # encoding param must be specified in open, so long as Python reads files with a + # default file encoding for character sets beyond extended ASCII. + with open( + project.project_root / Path("seeds") / Path("seed_bom.csv"), encoding="utf-8" + ) as fp: + assert fp.read(1) == BOM_UTF8.decode("utf-8") + check_relations_equal(project.adapter, ["seed_expected", "seed_bom"]) + + +class TestSeedSpecificFormats(SeedConfigBase): + """Expect all edge cases to build""" + + @staticmethod + def _make_big_seed(test_data_dir): + big_seed_path = test_data_dir / Path("tmp.csv") + with open(big_seed_path, "w") as f: + writer = csv.writer(f) + writer.writerow(["seed_id"]) + for i in range(0, 20000): + writer.writerow([i]) + return big_seed_path + + @pytest.fixture(scope="class") + def seeds(self, test_data_dir): + big_seed_path = self._make_big_seed(test_data_dir) + big_seed = read_file(big_seed_path) + + yield { + "big_seed.csv": big_seed, + "seed.with.dots.csv": seed__with_dots_csv, + "seed_unicode.csv": seed__unicode_csv, + } + rm_file(test_data_dir / Path("tmp.csv")) + + def test_simple_seed(self, project): + results = run_dbt(["seed"]) + assert len(results) == 3 + + +class SeedUniqueDelimiterTestBase(SeedConfigBase): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": {"quote_columns": False, "delimiter": "|"}, + } + + @pytest.fixture(scope="class", autouse=True) + def setUp(self, project): + """Create table for ensuring seeds and models used in tests build correctly""" + project.run_sql(seeds__expected_sql) + + @pytest.fixture(scope="class") + def seeds(self, test_data_dir): + return {"seed_pipe_separated.csv": seeds__pipe_separated_csv} + + @pytest.fixture(scope="class") + def models(self): + return { + "models__downstream_from_seed_pipe_separated.sql": models__downstream_from_seed_pipe_separated, + } + + def _build_relations_for_test(self, project): + """The testing environment needs seeds and models to interact with""" + seed_result = run_dbt(["seed"]) + assert len(seed_result) == 1 + check_relations_equal(project.adapter, ["seed_expected", "seed_pipe_separated"]) + + run_result = run_dbt() + assert len(run_result) == 1 + check_relations_equal( + project.adapter, ["models__downstream_from_seed_pipe_separated", "seed_expected"] + ) + + def _check_relation_end_state(self, run_result, project, exists: bool): + assert len(run_result) == 1 + check_relations_equal(project.adapter, ["seed_pipe_separated", "seed_expected"]) + if exists: + check_table_does_exist(project.adapter, "models__downstream_from_seed_pipe_separated") + else: + check_table_does_not_exist( + project.adapter, "models__downstream_from_seed_pipe_separated" + ) + + +class TestSeedWithUniqueDelimiter(SeedUniqueDelimiterTestBase): + def test_seed_with_unique_delimiter(self, project): + """Testing correct run of seeds with a unique delimiter (pipe in this case)""" + self._build_relations_for_test(project) + self._check_relation_end_state(run_result=run_dbt(["seed"]), project=project, exists=True) + -class SeedUniqueDelimiterTestBaseSynapse(SeedUniqueDelimiterTestBase): - pass +class TestSeedWithWrongDelimiter(SeedUniqueDelimiterTestBase): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": {"quote_columns": False, "delimiter": ";"}, + } + def test_seed_with_wrong_delimiter(self, project): + """Testing failure of running dbt seed with a wrongly configured delimiter""" + seed_result = run_dbt(["seed"], expect_pass=False) + assert "incorrect syntax" in seed_result.results[0].message.lower() -class TestSeedWithWrongDelimiterSynapse(TestSeedWithWrongDelimiter): - pass +class TestSeedWithEmptyDelimiter(SeedUniqueDelimiterTestBase): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "seeds": {"quote_columns": False, "delimiter": ""}, + } -class TestSeedWithEmptyDelimiterSynapse(TestSeedWithEmptyDelimiter): - pass + def test_seed_with_empty_delimiter(self, project): + """Testing failure of running dbt seed with an empty configured delimiter value""" + seed_result = run_dbt(["seed"], expect_pass=False) + assert "compilation error" in seed_result.results[0].message.lower() From 5c390b3c26402dd2020c6cc4908e5fcb7bc039a7 Mon Sep 17 00:00:00 2001 From: nszoni Date: Wed, 14 Feb 2024 16:07:12 +0100 Subject: [PATCH 06/14] add custom date spine along with tests --- .../synapse/macros/utils/date_spine.sql | 114 ++++++++++++++++++ tests/functional/adapter/test_date_spine.py | 5 + 2 files changed, 119 insertions(+) create mode 100644 dbt/include/synapse/macros/utils/date_spine.sql create mode 100644 tests/functional/adapter/test_date_spine.py diff --git a/dbt/include/synapse/macros/utils/date_spine.sql b/dbt/include/synapse/macros/utils/date_spine.sql new file mode 100644 index 00000000..1efc7db1 --- /dev/null +++ b/dbt/include/synapse/macros/utils/date_spine.sql @@ -0,0 +1,114 @@ +{% macro synapse__date_spine_sql(datepart, start_date, end_date) %} + + + with + + l0 as ( + + select c + from (select 1 union all select 1) as d(c) + + ), + l1 as ( + + select + 1 as c + from l0 as a + cross join l0 as b + + ), + + l2 as ( + + select 1 as c + from l1 as a + cross join l1 as b + ), + + l3 as ( + + select 1 as c + from l2 as a + cross join l2 as b + ), + + l4 as ( + + select 1 as c + from l3 as a + cross join l3 as b + ), + + l5 as ( + + select 1 as c + from l4 as a + cross join l4 as b + ), + + nums as ( + + select row_number() over (order by (select null)) as rownum + from l5 + ), + + rawdata as ( + + select top ({{ dbt.datediff(start_date, end_date, datepart)}}) rownum -1 as n + from nums + order by rownum + ), + + all_periods as ( + + select cast(( + {{ + dbt.dateadd( + datepart, + 'n', + start_date + ) + }} + ) as date) as date_{{datepart}} + from rawdata + ), + + filtered as ( + + select * + from all_periods + where date_{{datepart}} <= {{ end_date }} + + ) + + select * from filtered + +{% endmacro %} + + +{% macro synapse__date_spine(datepart, start_date, end_date) -%} + + {% set date_spine_query %} + + {{ synapse__date_spine_sql(datepart, start_date, end_date) }} order by 1 + + {% endset %} + + + {% set results = run_query(date_spine_query) %} + + {% if execute %} + + {% set results_list = results.columns[0].values() %} + + {% else %} + + {% set results_list = [] %} + + {% endif %} + + {%- for date_field in results_list %} + select '{{ date_field }}' as date_{{datepart}} {{ 'union all ' if not loop.last else '' }} + {% endfor -%} + +{% endmacro %} diff --git a/tests/functional/adapter/test_date_spine.py b/tests/functional/adapter/test_date_spine.py new file mode 100644 index 00000000..b332971c --- /dev/null +++ b/tests/functional/adapter/test_date_spine.py @@ -0,0 +1,5 @@ +from dbt.tests.adapter.utils.test_date_spine import BaseDateSpine + + +class TestDateSpineSynapse(BaseDateSpine): + pass From 1e1c9b8377f298ba29d0260a0b707de41668007f Mon Sep 17 00:00:00 2001 From: nszoni Date: Wed, 14 Feb 2024 16:10:51 +0100 Subject: [PATCH 07/14] alter generate series to fit synapse limitations and add tests --- .../synapse/macros/utils/generate_series.sql | 61 +++++++++++++++++++ .../adapter/test_generate_series.py | 48 +++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 dbt/include/synapse/macros/utils/generate_series.sql create mode 100644 tests/functional/adapter/test_generate_series.py diff --git a/dbt/include/synapse/macros/utils/generate_series.sql b/dbt/include/synapse/macros/utils/generate_series.sql new file mode 100644 index 00000000..88fac113 --- /dev/null +++ b/dbt/include/synapse/macros/utils/generate_series.sql @@ -0,0 +1,61 @@ +{# +Specifying more than one WITH clause in a CTE isn't allowed. For example, if a CTE query definition contains a subquery, +that subquery can't contain a nested WITH clause that defines another CTE. +#} + +{% macro get_powers_of_two(upper_bound) %} + {{ return(adapter.dispatch('get_powers_of_two', 'dbt')(upper_bound)) }} +{% endmacro %} + +{% macro default__get_powers_of_two(upper_bound) %} + + {% if upper_bound <= 0 %} + {{ exceptions.raise_compiler_error("upper bound must be positive") }} + {% endif %} + + {% for _ in range(1, 100) %} + {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %} + {% endfor %} + +{% endmacro %} + + +{% macro generate_series(upper_bound) %} + {{ return(adapter.dispatch('generate_series', 'dbt')(upper_bound)) }} +{% endmacro %} + +{% macro default__generate_series(upper_bound) %} + + {% set n = dbt.get_powers_of_two(upper_bound) %} + + with p as ( + select 0 as generated_number union all select 1 + ), unioned as ( + + select + + {% for i in range(n) %} + p{{i}}.generated_number * power(2, {{i}}) + {% if not loop.last %} + {% endif %} + {% endfor %} + + 1 + as generated_number + + from + + {% for i in range(n) %} + p as p{{i}} + {% if not loop.last %} cross join {% endif %} + {% endfor %} + + ), + + generate_series as ( + select * + from unioned + where generated_number <= {{upper_bound}} + ) + + select * from generate_series + +{% endmacro %} diff --git a/tests/functional/adapter/test_generate_series.py b/tests/functional/adapter/test_generate_series.py new file mode 100644 index 00000000..2e726dfe --- /dev/null +++ b/tests/functional/adapter/test_generate_series.py @@ -0,0 +1,48 @@ +import pytest +from dbt.tests.adapter.utils.base_utils import BaseUtils +from dbt.tests.adapter.utils.fixture_generate_series import models__test_generate_series_yml + +# Cause of overriding fixture: Specifying more than one WITH clause in a CTE isn't allowed. +# For example, if a CTE query definition contains a subquery, +# that subquery can't contain a nested WITH clause that defines another CTE. + +models__test_generate_series_sql = """ + {{ dbt.generate_series(10) }} + left join ( + select 1 as expected + union all + select 2 as expected + union all + select 3 as expected + union all + select 4 as expected + union all + select 5 as expected + union all + select 6 as expected + union all + select 7 as expected + union all + select 8 as expected + union all + select 9 as expected + union all + select 10 as expected + ) as expected_numbers + on generate_series.generated_number = expected_numbers.expected +""" + + +class BaseGenerateSeries(BaseUtils): + @pytest.fixture(scope="class") + def models(self): + return { + "test_generate_series.yml": models__test_generate_series_yml, + "test_generate_series.sql": self.interpolate_macro_namespace( + models__test_generate_series_sql, "generate_series" + ), + } + + +class TestGenerateSeries(BaseGenerateSeries): + pass From 44acdba69c55f60f53ecac264a11450d3ad8552d Mon Sep 17 00:00:00 2001 From: nszoni Date: Wed, 14 Feb 2024 16:11:21 +0100 Subject: [PATCH 08/14] add get_intervals_between test --- tests/functional/adapter/test_get_intervals_between.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/functional/adapter/test_get_intervals_between.py diff --git a/tests/functional/adapter/test_get_intervals_between.py b/tests/functional/adapter/test_get_intervals_between.py new file mode 100644 index 00000000..b2e836ff --- /dev/null +++ b/tests/functional/adapter/test_get_intervals_between.py @@ -0,0 +1,5 @@ +from dbt.tests.adapter.utils.test_get_intervals_between import BaseGetIntervalsBetween + + +class TestGetIntervalsBetweenSynapse(BaseGetIntervalsBetween): + pass From da06e4ac3216a35d1b7c725c694f089d336d3234 Mon Sep 17 00:00:00 2001 From: nszoni Date: Wed, 14 Feb 2024 16:11:39 +0100 Subject: [PATCH 09/14] add get_powers_of_two test --- tests/functional/adapter/test_get_powers_of_two.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/functional/adapter/test_get_powers_of_two.py diff --git a/tests/functional/adapter/test_get_powers_of_two.py b/tests/functional/adapter/test_get_powers_of_two.py new file mode 100644 index 00000000..36499b63 --- /dev/null +++ b/tests/functional/adapter/test_get_powers_of_two.py @@ -0,0 +1,5 @@ +from dbt.tests.adapter.utils.test_get_powers_of_two import BaseGetPowersOfTwo + + +class TestGetPowersOfTwoSynapse(BaseGetPowersOfTwo): + pass From 55e26b1203dc9d6988d43e5d94c9a5ad06cd4bf8 Mon Sep 17 00:00:00 2001 From: nszoni Date: Thu, 15 Feb 2024 11:07:32 +0100 Subject: [PATCH 10/14] add relation tests --- .../test_get_last_relation_modified.py | 59 ++++++ .../test_list_relations_without_caching.py | 168 ++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 tests/functional/adapter/test_get_last_relation_modified.py create mode 100644 tests/functional/adapter/test_list_relations_without_caching.py diff --git a/tests/functional/adapter/test_get_last_relation_modified.py b/tests/functional/adapter/test_get_last_relation_modified.py new file mode 100644 index 00000000..f141a700 --- /dev/null +++ b/tests/functional/adapter/test_get_last_relation_modified.py @@ -0,0 +1,59 @@ +import os + +import pytest +from dbt.cli.main import dbtRunner + +freshness_via_metadata_schema_yml = """version: 2 +sources: + - name: test_source + freshness: + warn_after: {count: 10, period: hour} + error_after: {count: 1, period: day} + schema: "{{ env_var('DBT_GET_LAST_RELATION_TEST_SCHEMA') }}" + tables: + - name: test_table +""" + + +class TestGetLastRelationModified: + @pytest.fixture(scope="class", autouse=True) + def set_env_vars(self, project): + os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] = project.test_schema + yield + del os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] + + @pytest.fixture(scope="class") + def models(self): + return {"schema.yml": freshness_via_metadata_schema_yml} + + @pytest.fixture(scope="class") + def custom_schema(self, project, set_env_vars): + with project.adapter.connection_named("__test"): + relation = project.adapter.Relation.create( + database=project.database, schema=os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] + ) + project.adapter.drop_schema(relation) + project.adapter.create_schema(relation) + + yield relation.schema + + with project.adapter.connection_named("__test"): + project.adapter.drop_schema(relation) + + def test_get_last_relation_modified(self, project, set_env_vars, custom_schema): + project.run_sql( + f"create table {custom_schema}.test_table (id int, name varchar(100) not null);" + ) + + warning_or_error = False + + def probe(e): + nonlocal warning_or_error + if e.info.level in ["warning", "error"]: + warning_or_error = True + + runner = dbtRunner(callbacks=[probe]) + runner.invoke(["source", "freshness"]) + + # The 'source freshness' command should succeed without warnings or errors. + assert not warning_or_error diff --git a/tests/functional/adapter/test_list_relations_without_caching.py b/tests/functional/adapter/test_list_relations_without_caching.py new file mode 100644 index 00000000..68c894a5 --- /dev/null +++ b/tests/functional/adapter/test_list_relations_without_caching.py @@ -0,0 +1,168 @@ +import json + +import pytest +from dbt.tests.util import run_dbt, run_dbt_and_capture + +NUM_VIEWS = 10 +NUM_EXPECTED_RELATIONS = 1 + NUM_VIEWS + +TABLE_BASE_SQL = """ +{{ config(materialized='table') }} + +select 1 as id +""".lstrip() + +VIEW_X_SQL = """ +select id from {{ ref('my_model_base') }} +""".lstrip() + +# TODO - fix the call +# {% set relation_list_result = fabric__list_relations_without_caching(schema_relation) %} +MACROS__VALIDATE__SYNAPSE__LIST_RELATIONS_WITHOUT_CACHING = """ +{% macro validate_list_relations_without_caching(schema_relation) -%} + + {% call statement('list_relations_without_caching', fetch_result=True) -%} + select + table_catalog as [database], + table_name as [name], + table_schema as [schema], + case when table_type = 'BASE TABLE' then 'table' + when table_type = 'VIEW' then 'view' + else table_type + end as table_type + + from INFORMATION_SCHEMA.TABLES + where table_schema like '{{ schema_relation }}' + {% endcall %} + + {% set relation_list_result = load_result('list_relations_without_caching').table %} + {% set n_relations = relation_list_result | length %} + {{ log("n_relations: " ~ n_relations) }} +{% endmacro %} +""" + + +def parse_json_logs(json_log_output): + parsed_logs = [] + for line in json_log_output.split("\n"): + try: + log = json.loads(line) + except ValueError: + continue + + parsed_logs.append(log) + + return parsed_logs + + +def find_result_in_parsed_logs(parsed_logs, result_name): + return next( + ( + item["data"]["msg"] + for item in parsed_logs + if result_name in item["data"].get("msg", "msg") + ), + False, + ) + + +def find_exc_info_in_parsed_logs(parsed_logs, exc_info_name): + return next( + ( + item["data"]["exc_info"] + for item in parsed_logs + if exc_info_name in item["data"].get("exc_info", "exc_info") + ), + False, + ) + + +class TestListRelationsWithoutCachingSingle: + @pytest.fixture(scope="class") + def models(self): + my_models = {"my_model_base.sql": TABLE_BASE_SQL} + for view in range(0, NUM_VIEWS): + my_models.update({f"my_model_{view}.sql": VIEW_X_SQL}) + + return my_models + + @pytest.fixture(scope="class") + def macros(self): + return { + "validate_list_relations_without_caching.sql": MACROS__VALIDATE__SYNAPSE__LIST_RELATIONS_WITHOUT_CACHING, + } + + def test__fabric__list_relations_without_caching(self, project): + """ + validates that fabric__list_relations_without_caching + macro returns a single record + """ + run_dbt(["run", "-s", "my_model_base"]) + + # database = project.database + schemas = project.created_schemas + + for schema in schemas: + # schema_relation = BaseRelation.create(schema=schema, database=database) + # schema_relation = f"{database}.{schema}" + kwargs = {"schema_relation": schema} + _, log_output = run_dbt_and_capture( + [ + "--debug", + # "--log-format=json", + "run-operation", + "validate_list_relations_without_caching", + "--args", + str(kwargs), + ] + ) + + # parsed_logs = parse_json_logs(log_output) + # print(parsed_logs) + # n_relations = find_result_in_parsed_logs(parsed_logs, "n_relations") + + # assert n_relations == "n_relations: 1" + assert "n_relations: 1" in log_output + + +class TestListRelationsWithoutCachingFull: + @pytest.fixture(scope="class") + def models(self): + my_models = {"my_model_base.sql": TABLE_BASE_SQL} + for view in range(0, NUM_VIEWS): + my_models.update({f"my_model_{view}.sql": VIEW_X_SQL}) + + return my_models + + @pytest.fixture(scope="class") + def macros(self): + return { + "validate_list_relations_without_caching.sql": MACROS__VALIDATE__SYNAPSE__LIST_RELATIONS_WITHOUT_CACHING, + } + + def test__synapse__list_relations_without_caching(self, project): + # purpose of the first run is to create the replicated views in the target schema + run_dbt(["run"]) + + # database = project.database + schemas = project.created_schemas + + for schema in schemas: + # schema_relation = f"{database}.{schema}" + kwargs = {"schema_relation": schema} + _, log_output = run_dbt_and_capture( + [ + "--debug", + # "--log-format=json", + "run-operation", + "validate_list_relations_without_caching", + "--args", + str(kwargs), + ] + ) + + # parsed_logs = parse_json_logs(log_output) + # n_relations = find_result_in_parsed_logs(parsed_logs, "n_relations") + + # assert n_relations == f"n_relations: {NUM_EXPECTED_RELATIONS}" + assert f"n_relations: {NUM_EXPECTED_RELATIONS}" in log_output From 5c190ffbd26b44ff67f76e1a8cf934ad8f0bd028 Mon Sep 17 00:00:00 2001 From: nszoni Date: Thu, 15 Feb 2024 17:35:16 +0100 Subject: [PATCH 11/14] add bom csv fixture --- .pre-commit-config.yaml | 1 + tests/functional/adapter/data/seed_bom.csv | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 35fba6e0..15625256 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,7 @@ repos: - id: check-merge-conflict - id: no-commit-to-branch - id: fix-byte-order-marker + exclude: ^tests/functional/adapter/data/seed_bom.csv - id: mixed-line-ending - id: check-docstring-first - repo: 'https://github.com/adrienverge/yamllint' diff --git a/tests/functional/adapter/data/seed_bom.csv b/tests/functional/adapter/data/seed_bom.csv index df299921..e4405ad1 100644 --- a/tests/functional/adapter/data/seed_bom.csv +++ b/tests/functional/adapter/data/seed_bom.csv @@ -1,4 +1,4 @@ -seed_id,first_name,email,ip_address,birthday +seed_id,first_name,email,ip_address,birthday 1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31 2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14 3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57 From 98fd4295d8a8e2e4037977899ba3789066dc269d Mon Sep 17 00:00:00 2001 From: nszoni Date: Thu, 15 Feb 2024 17:36:00 +0100 Subject: [PATCH 12/14] decouple get catalog relations macro --- .../synapse/macros/adapters/catalog.sql | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 dbt/include/synapse/macros/adapters/catalog.sql diff --git a/dbt/include/synapse/macros/adapters/catalog.sql b/dbt/include/synapse/macros/adapters/catalog.sql new file mode 100644 index 00000000..e959b79c --- /dev/null +++ b/dbt/include/synapse/macros/adapters/catalog.sql @@ -0,0 +1,147 @@ +{% macro synapse__get_catalog_tables_sql(information_schemas) -%} +-- avoid with statement to be able to wrap into CTE (limitation of Synapse) + + SELECT + table_name, + table_schema, + principal_name, + table_type + FROM ( + SELECT + table_name, + schema_name AS table_schema, + COALESCE(relations.principal_id, schemas.principal_id) AS owner_principal_id, + table_type + FROM ( + SELECT + name AS table_name, + schema_id AS schema_id, + principal_id AS principal_id, + 'BASE TABLE' AS table_type + FROM + sys.tables {{ information_schema_hints() }} + UNION ALL + SELECT + name AS table_name, + schema_id AS schema_id, + principal_id AS principal_id, + 'VIEW' AS table_type + FROM + sys.views {{ information_schema_hints() }} + ) AS relations + JOIN ( + SELECT + name AS schema_name, + schema_id AS schema_id, + principal_id AS principal_id + FROM + sys.schemas {{ information_schema_hints() }} + ) AS schemas ON relations.schema_id = schemas.schema_id + ) AS relations_with_metadata + JOIN ( + SELECT + name AS principal_name, + principal_id AS principal_id + FROM + sys.database_principals {{ information_schema_hints() }} + ) AS principals ON relations_with_metadata.owner_principal_id = principals.principal_id + +{% endmacro %} + +{% macro synapse__get_catalog_columns_sql(information_schemas) -%} + + select + table_catalog as table_database, + table_schema, + table_name, + column_name, + ordinal_position as column_index, + data_type as column_type + from INFORMATION_SCHEMA.COLUMNS {{ information_schema_hints() }} + +{% endmacro %} + +{% macro synapse__get_catalog_schemas_where_clause_sql(schemas) -%} + where ({%- for schema in schemas -%} + upper(table_schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + {%- endfor -%}) +{%- endmacro %} + +{% macro synapse__get_catalog_relations_where_clause_sql(relations) %} + + where ( + {%- for relation in relations -%} + {% if relation.schema and relation.identifier %} + ( + upper(table_schema) = upper('{{ relation.schema }}') + and upper(table_name) = upper('{{ relation.identifier }}') + ) + {% elif relation.schema %} + ( + upper(table_schema) = upper('{{ relation.schema }}') + ) + {% else %} + {% do exceptions.raise_compiler_error( + '`get_catalog_relations` requires a list of relations, each with a schema' + ) %} + {% endif %} + + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + +{% endmacro %} + +{% macro synapse__get_catalog_results_sql() -%} + select + cols.table_database, + tv.table_schema, + tv.table_name, + tv.table_type, + null as table_comment, + tv.principal_name as table_owner, + cols.column_name, + cols.column_index, + cols.column_type, + null as column_comment + from tables as tv + join columns as cols on tv.table_schema = cols.table_schema and tv.table_name = cols.table_name + order by cols.column_index +{%- endmacro %} + +-- combine everything into the get_catalog_(relations) macro +{% macro synapse__get_catalog(information_schema, schemas) -%} + + {% set query %} + with tables as ( + {{ synapse__get_catalog_tables_sql(information_schema) }} + {{ synapse__get_catalog_schemas_where_clause_sql(schemas) }} + ), + columns as ( + {{ synapse__get_catalog_columns_sql(information_schema) }} + {{ synapse__get_catalog_schemas_where_clause_sql(schemas) }} + ) + {{ synapse__get_catalog_results_sql() }} + {%- endset -%} + + {{ return(run_query(query)) }} + +{%- endmacro %} + +{% macro synapse__get_catalog_relations(information_schema, relations) -%} + + {% set query %} + with tables as ( + {{ synapse__get_catalog_tables_sql(information_schema) }} + {{ synapse__get_catalog_relations_where_clause_sql(relations) }} + ), + columns as ( + {{ synapse__get_catalog_columns_sql(information_schema) }} + {{ synapse__get_catalog_relations_where_clause_sql(relations) }} + ) + {{ synapse__get_catalog_results_sql() }} + {%- endset -%} + + {{ return(run_query(query)) }} + +{%- endmacro %} From 174d7ad861b6f3649a84ef9980aca980a1b7bd49 Mon Sep 17 00:00:00 2001 From: nszoni Date: Thu, 15 Feb 2024 17:47:37 +0100 Subject: [PATCH 13/14] add changelog --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b593686..c1db77db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ ## v1.7.0rc1 * Support for [dbt-core 1.7](https://github.com/dbt-labs/dbt-core/releases/tag/v1.7.0) + * Decouple `get_catalog` macro (marco override test is skipped as it is not covering the behavior) + * implement custom `date_spine` and `generate_series` macro logic for synapse to match nested-CTE limitation + * Add UTF-8-BOM CSV and exclude from precommit format override + * New/extended adapter test zones + - get_last_relation_modified + - date_spine + - generate_series + - get_intervals_between + - get_powers_of_two + - store_test_failures + - dbt_clone (same target and state) + - seed ## v1.4.1rc1 From 10b5857039f5cf4d81f046b4d07aa656bc951371 Mon Sep 17 00:00:00 2001 From: nszoni Date: Thu, 22 Feb 2024 17:07:50 +0100 Subject: [PATCH 14/14] adapt macros and tests for 1.7 --- .../synapse/macros/adapters/relation.sql | 24 ++-- .../synapse/macros/adapters/replace.sql | 50 +++++++ .../create_materialized_view_as.sql | 30 +++++ .../materialized_view/materialized_view.sql | 127 +++++++++++++++--- .../models/table/create_table_as.sql | 1 - tests/functional/adapter/test_constraints.py | 44 ++++-- .../adapter/test_materialized_views.py | 13 +- 7 files changed, 252 insertions(+), 37 deletions(-) create mode 100644 dbt/include/synapse/macros/adapters/replace.sql create mode 100644 dbt/include/synapse/macros/materializations/models/materialized_view/create_materialized_view_as.sql diff --git a/dbt/include/synapse/macros/adapters/relation.sql b/dbt/include/synapse/macros/adapters/relation.sql index e5136e56..9dd4cf12 100644 --- a/dbt/include/synapse/macros/adapters/relation.sql +++ b/dbt/include/synapse/macros/adapters/relation.sql @@ -5,22 +5,26 @@ {% endmacro %} {% macro synapse__drop_relation_script(relation) -%} + {% if relation is not none %} {% if relation.type == 'view' or relation.type == 'materialized_view' -%} {% set object_id_type = 'V' %} {% elif relation.type == 'table'%} {% set object_id_type = 'U' %} {%- else -%} invalid target name {% endif %} - - if object_id ('{{ relation.include(database=False) }}','{{ object_id_type }}') is not null - {% if relation.type == 'view' or relation.type == 'materialized_view' -%} - begin - drop view {{ relation.include(database=False) }} - end - {% elif relation.type == 'table' %} - begin - drop {{ relation.type }} {{ relation.include(database=False) }} - end + if object_id ('{{ relation.include(database=False) }}','{{ object_id_type }}') is not null + {% if relation.type == 'view' or relation.type == 'materialized_view' -%} + begin + drop view {{ relation.include(database=False) }} + end + {% elif relation.type == 'table' %} + begin + drop {{ relation.type }} {{ relation.include(database=False) }} + end + {% endif %} + {% else %} + -- no object to drop + select 1 as nothing {% endif %} {% endmacro %} diff --git a/dbt/include/synapse/macros/adapters/replace.sql b/dbt/include/synapse/macros/adapters/replace.sql new file mode 100644 index 00000000..ec7bedf9 --- /dev/null +++ b/dbt/include/synapse/macros/adapters/replace.sql @@ -0,0 +1,50 @@ +{% macro get_replace_sql(existing_relation, target_relation, sql) %} + {{- log('Applying REPLACE to: ' ~ existing_relation) -}} + {{- adapter.dispatch('get_replace_sql', 'synapse')(existing_relation, target_relation, sql) -}} +{% endmacro %} + + +{% macro synapse__get_replace_sql(existing_relation, target_relation, sql) %} + + {# /* use a create or replace statement if possible */ #} + + {% set is_replaceable = existing_relation.type == target_relation_type and existing_relation.can_be_replaced %} + + {% if is_replaceable and existing_relation.is_view %} + {{ get_replace_view_sql(target_relation, sql) }} + + {% elif is_replaceable and existing_relation.is_table %} + {{ get_replace_table_sql(target_relation, sql) }} + + {% elif is_replaceable and existing_relation.is_materialized_view %} + {{ get_replace_materialized_view_sql(target_relation, sql) }} + + {# /* a create or replace statement is not possible, so try to stage and/or backup to be safe */ #} + + {# /* create target_relation as an intermediate relation, then swap it out with the existing one using a backup */ #} + {%- elif target_relation.can_be_renamed and existing_relation.can_be_renamed -%} + {{ get_create_intermediate_sql(target_relation, sql) }}; + {{ get_create_backup_sql(existing_relation) }}; + {{ get_rename_intermediate_sql(target_relation) }}; + {{ synapse__drop_relation(existing_relation) }} + + {# /* create target_relation as an intermediate relation, then swap it out with the existing one without using a backup */ #} + {%- elif target_relation.can_be_renamed -%} + {{ get_create_intermediate_sql(target_relation, sql) }}; + {{ synapse__drop_relation(existing_relation) }}; + {{ get_rename_intermediate_sql(target_relation) }} + + {# /* create target_relation in place by first backing up the existing relation */ #} + {%- elif existing_relation.can_be_renamed -%} + {{ get_create_backup_sql(existing_relation) }}; + {{ get_create_sql(target_relation, sql) }}; + {{ synapse__drop_relation(existing_relation) }} + + {# /* no renaming is allowed, so just drop and create */ #} + {%- else -%} + {{ synapse__drop_relation(existing_relation) }}; + {{ get_create_sql(target_relation, sql) }} + + {%- endif -%} + +{% endmacro %} diff --git a/dbt/include/synapse/macros/materializations/models/materialized_view/create_materialized_view_as.sql b/dbt/include/synapse/macros/materializations/models/materialized_view/create_materialized_view_as.sql new file mode 100644 index 00000000..7a4319a9 --- /dev/null +++ b/dbt/include/synapse/macros/materializations/models/materialized_view/create_materialized_view_as.sql @@ -0,0 +1,30 @@ +{% macro ref(model_name) %} + + {% do return(builtins.ref(model_name).include(database=false)) %} + +{% endmacro %} + + +{% macro synapse__get_replace_materialized_view_as_sql(relation, sql, existing_relation, backup_relation, intermediate_relation) %} + {# Synapse does not have ALTER...RENAME function, so use synapse__rename_relation_script #} + + {%- set dist = config.get('dist', default="ROUND_ROBIN") -%} + EXEC(' + CREATE materialized view [{{intermediate_relation.schema}}].[{{intermediate_relation.identifier}}] + WITH ( DISTRIBUTION = {{dist}} ) + AS {{ sql }} + '); + + {{ synapse__rename_relation_script(existing_relation, backup_relation) }} + {{ synapse__rename_relation_script(intermediate_relation, relation) }} + +{% endmacro %} + +{% macro synapse__get_create_materialized_view_as_sql(relation, sql) %} + {%- set dist = config.get('dist', default="ROUND_ROBIN") -%} + + CREATE materialized view [{{relation.schema}}].[{{relation.identifier}}] + WITH ( DISTRIBUTION = {{dist}} ) + AS {{ sql }} + +{% endmacro %} diff --git a/dbt/include/synapse/macros/materializations/models/materialized_view/materialized_view.sql b/dbt/include/synapse/macros/materializations/models/materialized_view/materialized_view.sql index 2dff1b4b..f7251d93 100644 --- a/dbt/include/synapse/macros/materializations/models/materialized_view/materialized_view.sql +++ b/dbt/include/synapse/macros/materializations/models/materialized_view/materialized_view.sql @@ -1,29 +1,122 @@ -{% macro ref(model_name) %} +{% materialization materialized_view, adapter='synapse' %} + {% set existing_relation = load_cached_relation(this) %} + {% set target_relation = this.incorporate(type=this.MaterializedView) %} + {% set intermediate_relation = make_intermediate_relation(target_relation) %} + {% set backup_relation_type = this.MaterializedView if existing_relation is none else existing_relation.type %} + {% set backup_relation = make_backup_relation(target_relation, backup_relation_type) %} - {% do return(builtins.ref(model_name).include(database=false)) %} + {{ materialized_view_setup(backup_relation, intermediate_relation, pre_hooks) }} + + {% set build_sql = materialized_view_get_build_sql(existing_relation, target_relation, backup_relation, intermediate_relation) %} + + {% if build_sql == '' %} + {{ materialized_view_execute_no_op(target_relation) }} + {% else %} + {{ materialized_view_execute_build_sql(build_sql, existing_relation, target_relation, post_hooks) }} + {% endif %} + + {{ materialized_view_teardown(backup_relation, intermediate_relation, post_hooks) }} + + {{ return({'relations': [target_relation]}) }} + +{% endmaterialization %} + + +{% macro materialized_view_setup(backup_relation, intermediate_relation, pre_hooks) %} + + -- backup_relation and intermediate_relation should not already exist in the database + -- it's possible these exist because of a previous run that exited unexpectedly + {% set preexisting_backup_relation = load_cached_relation(backup_relation) %} + {% set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) %} + + they return none + + -- drop the temp relations if they exist already in the database + {{ synapse__drop_relation(preexisting_backup_relation) }} + {{ synapse__drop_relation(preexisting_intermediate_relation) }} + + {{ run_hooks(pre_hooks, inside_transaction=False) }} + +{% endmacro %} + + +{% macro materialized_view_teardown(backup_relation, intermediate_relation, post_hooks) %} + + -- drop the temp relations if they exist to leave the database clean for the next run + {{ synapse__drop_relation_script(backup_relation) }} + {{ synapse__drop_relation_script(intermediate_relation) }} + + {{ run_hooks(post_hooks, inside_transaction=False) }} {% endmacro %} -{% macro synapse__get_replace_materialized_view_as_sql(relation, sql, existing_relation, backup_relation, intermediate_relation) %} - {# Synapse does not have ALTER...RENAME function, so use synapse__rename_relation_script #} - {%- set dist = config.get('dist', default="ROUND_ROBIN") -%} - EXEC(' - CREATE materialized view {{ intermediate_relation.include(database=False) }} - WITH ( DISTRIBUTION = {{dist}} ) - AS {{ sql }} - '); +{% macro materialized_view_get_build_sql(existing_relation, target_relation, backup_relation, intermediate_relation) %} - {{ synapse__rename_relation_script(existing_relation, backup_relation) }} - {{ synapse__rename_relation_script(intermediate_relation, relation) }} + {% set full_refresh_mode = should_full_refresh() %} + + -- determine the scenario we're in: create, full_refresh, alter, refresh data + {% if existing_relation is none %} + {% set build_sql = get_create_materialized_view_as_sql(target_relation, sql) %} + {% elif full_refresh_mode or not existing_relation.is_materialized_view %} + {% set build_sql = get_replace_sql(existing_relation, target_relation, sql) %} + {% else %} + -- get config options + {% set on_configuration_change = config.get('on_configuration_change') %} + {% set configuration_changes = get_materialized_view_configuration_changes(existing_relation, config) %} + + {% if configuration_changes is none %} + {% set build_sql = refresh_materialized_view(target_relation) %} + + {% elif on_configuration_change == 'apply' %} + {% set build_sql = get_alter_materialized_view_as_sql(target_relation, configuration_changes, sql, existing_relation, backup_relation, intermediate_relation) %} + {% elif on_configuration_change == 'continue' %} + {% set build_sql = '' %} + {{ exceptions.warn("Configuration changes were identified and `on_configuration_change` was set to `continue` for `" ~ target_relation ~ "`") }} + {% elif on_configuration_change == 'fail' %} + {{ exceptions.raise_fail_fast_error("Configuration changes were identified and `on_configuration_change` was set to `fail` for `" ~ target_relation ~ "`") }} + + {% else %} + -- this only happens if the user provides a value other than `apply`, 'skip', 'fail' + {{ exceptions.raise_compiler_error("Unexpected configuration scenario") }} + + {% endif %} + + {% endif %} + + {% do return(build_sql) %} {% endmacro %} -{% macro synapse__get_create_materialized_view_as_sql(relation, sql) %} - {%- set dist = config.get('dist', default="ROUND_ROBIN") -%} - CREATE materialized view {{ relation.include(database=False) }} - WITH ( DISTRIBUTION = {{dist}} ) - AS {{ sql }} +{% macro materialized_view_execute_no_op(target_relation) %} + {% do store_raw_result( + name="main", + message="skip " ~ target_relation, + code="skip", + rows_affected="-1" + ) %} +{% endmacro %} + + +{% macro materialized_view_execute_build_sql(build_sql, existing_relation, target_relation, post_hooks) %} + + -- `BEGIN` happens here: + {{ run_hooks(pre_hooks, inside_transaction=True) }} + + {% set grant_config = config.get('grants') %} + + {% call statement(name="main") %} + {{ build_sql }} + {% endcall %} + + {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} + {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} + + {% do persist_docs(target_relation, model) %} + + {{ run_hooks(post_hooks, inside_transaction=True) }} + + {{ adapter.commit() }} {% endmacro %} diff --git a/dbt/include/synapse/macros/materializations/models/table/create_table_as.sql b/dbt/include/synapse/macros/materializations/models/table/create_table_as.sql index 744e7872..ac27d7ce 100644 --- a/dbt/include/synapse/macros/materializations/models/table/create_table_as.sql +++ b/dbt/include/synapse/macros/materializations/models/table/create_table_as.sql @@ -31,7 +31,6 @@ {{ "["~column~"]" }}{{ ", " if not loop.last }} {% endfor %} {%endset%} - {{ synapse__build_model_constraints(relation) }} INSERT INTO [{{relation.schema}}].[{{relation.identifier}}] ({{listColumns}}) SELECT {{listColumns}} FROM [{{tmp_relation.schema}}].[{{tmp_relation.identifier}}] diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py index fe93747b..2761a1d9 100644 --- a/tests/functional/adapter/test_constraints.py +++ b/tests/functional/adapter/test_constraints.py @@ -510,10 +510,6 @@ def expected_sql(self): select ''blue'' as color,1 as id,''2019-01-01'' as date_day;'); create table ([id] int not null,[color] varchar(100),[date_day] varchar(100)) with(distribution = round_robin,heap) - alter table add constraint - primary key nonclustered(id)not enforced; - alter table add constraint - unique nonclustered(color,date_day)not enforced; insert into ([id],[color],[date_day]) select [id],[color],[date_day] from if object_id is not null begin drop view end @@ -585,13 +581,15 @@ def test__constraints_enforcement_rollback( # Make a contract-breaking change to the model write_file(null_model_sql, "models", "my_model.sql") - + # drops the previous table before + # when there is an exception, cant rollback failing_results = run_dbt(["run", "-s", "my_model"], expect_pass=False) assert len(failing_results) == 1 # Verify the previous table still exists relation = relation_from_name(project.adapter, "my_model") - old_model_exists_sql = f"select * from {relation}" + model_backup = str(relation).replace("my_model", "my_model__dbt_backup") + old_model_exists_sql = f"select * from {model_backup}" old_model_exists = project.run_sql(old_model_exists_sql, fetch="all") assert len(old_model_exists) == 1 assert old_model_exists[0][1] == expected_color @@ -653,7 +651,7 @@ def expected_sql(self): if object_id is not null begin drop view end if object_id is not null begin drop table end exec(\'create view as select \'\'blue\'\' as "from",1 as id,\'\'2019-01-01\'\' as date_day;\'); - create table ([id] integer not null,[from] varchar(100)not null,[date_day] varchar(100)) + create table ([id] int not null,[from] varchar(100)not null,[date_day] varchar(100)) with(distribution = round_robin,heap) insert into ([id],[from],[date_day]) select [id],[from],[date_day] from @@ -692,7 +690,37 @@ class TestTableConstraintsRollbackSynapse(BaseConstraintsRollback): class TestIncrementalConstraintsRollbackSynapse(BaseIncrementalConstraintsRollback): - pass + def test__constraints_enforcement_rollback( + self, project, expected_color, expected_error_messages, null_model_sql + ): + results = run_dbt(["run", "-s", "my_model"]) + assert len(results) == 1 + + # Make a contract-breaking change to the model + write_file(null_model_sql, "models", "my_model.sql") + # drops the previous table before + # when there is an exception, cant rollback + failing_results = run_dbt(["run", "-s", "my_model"], expect_pass=False) + assert len(failing_results) == 1 + + # Verify the previous table still exists, + # for incremental we are not creating backups, because its not a create replace + relation = relation_from_name(project.adapter, "my_model") + old_model_exists_sql = f"select * from {relation}" + old_model_exists = project.run_sql(old_model_exists_sql, fetch="all") + assert len(old_model_exists) == 1 + assert old_model_exists[0][1] == expected_color + + # Confirm this model was contracted + # TODO: is this step really necessary? + manifest = get_manifest(project.project_root) + model_id = "model.test.my_model" + my_model_config = manifest.nodes[model_id].config + contract_actual_config = my_model_config.contract + assert contract_actual_config.enforced is True + + # Its result includes the expected error messages + self.assert_expected_error_messages(failing_results[0].message, expected_error_messages) class TestTableContractSqlHeaderSynapse(BaseTableContractSqlHeader): diff --git a/tests/functional/adapter/test_materialized_views.py b/tests/functional/adapter/test_materialized_views.py index 06e86220..576257cc 100644 --- a/tests/functional/adapter/test_materialized_views.py +++ b/tests/functional/adapter/test_materialized_views.py @@ -42,7 +42,8 @@ def drop_cascade(project, test_model_identifier): # SYNAPSE HAS NO "DROP SCHEMA...CASCADE" # so drop all test materializations, to allow drop my_seed - # "my_materialized_view" always created in setup(), so always need to be dropped before my_seed + # "my_materialized_view" and its backup always created in setup(), + # so always need to be dropped before my_seed for identifier in ["my_materialized_view", test_model_identifier]: project.run_sql( f""" @@ -55,6 +56,16 @@ def drop_cascade(project, test_model_identifier): begin drop table "{project.test_schema}"."{identifier}" end + + if object_id ('"{project.test_schema}"."{identifier}__dbt_backup"','V') is not null + begin + drop view "{project.test_schema}"."{identifier}__dbt_backup" + end + + if object_id ('"{project.test_schema}"."{identifier}__dbt_backup"','U') is not null + begin + drop table "{project.test_schema}"."{identifier}__dbt_backup" + end """ ) # then drop object my_seed, to allow drop schema