diff --git a/CHANGELOG.md b/CHANGELOG.md index 259516d96..29346ff3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,18 @@ -## dbt-snowflake 1.0.0 (Release TBD) +## dbt-snowflake 1.1.0 (TBD) + +### Features +- Adds tests for incremental model unique key parameter ([#91](https://github.com/dbt-labs/dbt-snowflake/issues/91)) + +### Fixes +- Add unique\_id field to docs generation test catalogs; a follow-on PR to core PR ([#4168](https://github.com/dbt-labs/dbt-core/pull/4618)) + +## dbt-snowflake 1.0.0 (December 3rd, 2021) ## dbt-snowflake 1.0.0rc2 (November 24, 2021) ### Fixes - Apply query tags for Seed and Snapshot materialisations ([#20](https://github.com/dbt-labs/dbt-snowflake/issues/20), [#48](https://github.com/dbt-labs/dbt-snowflake/issues/48)) - Adds column-level comments to Snowflake views ([#17](https://github.com/dbt-labs/dbt-snowflake/issues/17)) -- Add unique\_id field to docs generation test catalogs; a follow-on PR to core PR ([#4168](https://github.com/dbt-labs/dbt-core/pull/4618)) ### Under the hood - Resolves an issue caused when the Snowflake OCSP server is not accessible, by exposing the `insecure_mode` boolean avalable in the Snowflake python connector ([#31](https://github.com/dbt-labs/dbt-snowflake/issues/31), [#49](https://github.com/dbt-labs/dbt-snowflake/pull/49)) diff --git a/tests/integration/docs_generate_tests/test_docs_generate.py b/tests/integration/docs_generate_tests/test_docs_generate.py index e70f70c5c..118d121ee 100644 --- a/tests/integration/docs_generate_tests/test_docs_generate.py +++ b/tests/integration/docs_generate_tests/test_docs_generate.py @@ -418,7 +418,7 @@ def rendered_tst_config(self, **updates): 'database': None, 'schema': 'dbt_test__audit', 'alias': None, - 'meta': {}, + 'meta': {} } result.update(updates) return result diff --git a/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql new file mode 100644 index 000000000..6f12560d9 --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql @@ -0,0 +1,17 @@ +{{ + config( + materialized='incremental', + unique_key=['state', 'state'] + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql b/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql new file mode 100644 index 000000000..de2129d9b --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/empty_str_unique_key.sql @@ -0,0 +1,16 @@ +-- ensure model with empty string unique key should build normally + +{{ + config( + materialized='incremental', + unique_key='' + ) +}} + +select + * +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql new file mode 100644 index 000000000..3ebb5b416 --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/empty_unique_key_list.sql @@ -0,0 +1,14 @@ +-- model with empty list unique key should build normally + +{{ + config( + materialized='incremental', + unique_key=[] + ) +}} + +select * from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql b/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql new file mode 100644 index 000000000..99223458a --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/expected/one_str__overwrite.sql @@ -0,0 +1,21 @@ +{{ + config( + materialized='table' + ) +}} + +select + 'CT'::varchar(2) as state, + 'Hartford'::varchar(12) as county, + 'Hartford'::varchar(12) as city, + '2022-02-14'::date as last_visit_date +union all +select 'MA','Suffolk','Boston','2020-02-12' +union all +select 'NJ','Mercer','Trenton','2022-01-01' +union all +select 'NY','Kings','Brooklyn','2021-04-02' +union all +select 'NY','New York','Manhattan','2021-04-01' +union all +select 'PA','Philadelphia','Philadelphia','2021-05-21' diff --git a/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql b/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql new file mode 100644 index 000000000..99223458a --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql @@ -0,0 +1,21 @@ +{{ + config( + materialized='table' + ) +}} + +select + 'CT'::varchar(2) as state, + 'Hartford'::varchar(12) as county, + 'Hartford'::varchar(12) as city, + '2022-02-14'::date as last_visit_date +union all +select 'MA','Suffolk','Boston','2020-02-12' +union all +select 'NJ','Mercer','Trenton','2022-01-01' +union all +select 'NY','Kings','Brooklyn','2021-04-02' +union all +select 'NY','New York','Manhattan','2021-04-01' +union all +select 'PA','Philadelphia','Philadelphia','2021-05-21' diff --git a/tests/integration/incremental_unique_id_test/models/no_unique_key.sql b/tests/integration/incremental_unique_id_test/models/no_unique_key.sql new file mode 100644 index 000000000..d803a142b --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/no_unique_key.sql @@ -0,0 +1,15 @@ +-- no specified unique key should cause no special build behavior + +{{ + config( + materialized='incremental' + ) +}} + +select + * +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql new file mode 100644 index 000000000..333e445ab --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql @@ -0,0 +1,21 @@ +-- a multi-argument unique key list should see overwriting on rows in the model +-- where all unique key fields apply +-- N.B. needed for direct comparison with seed + +{{ + config( + materialized='incremental', + unique_key=['state', 'county', 'city'] + ) +}} + +select + state as state, + county as county, + city as city, + last_visit_date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql b/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql new file mode 100644 index 000000000..cf43effeb --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/not_found_unique_key.sql @@ -0,0 +1,16 @@ +-- a model with a unique key not found in the table itself will error out + +{{ + config( + materialized='incremental', + unique_key='thisisnotacolumn' + ) +}} + +select + * +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql new file mode 100644 index 000000000..8cbde9659 --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/not_found_unique_key_list.sql @@ -0,0 +1,10 @@ +-- a unique key list with any element not in the model itself should error out + +{{ + config( + materialized='incremental', + unique_key=['state', 'thisisnotacolumn'] + ) +}} + +select * from {{ ref('seed') }} diff --git a/tests/integration/incremental_unique_id_test/models/str_unique_key.sql b/tests/integration/incremental_unique_id_test/models/str_unique_key.sql new file mode 100644 index 000000000..e2fb1424f --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/str_unique_key.sql @@ -0,0 +1,21 @@ +-- a unique key with a string should trigger to overwrite behavior when +-- the source has entries in conflict (i.e. more than one row per unique key +-- combination) + +{{ + config( + materialized='incremental', + unique_key='state' + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql new file mode 100644 index 000000000..f81c9feb6 --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/trinary_unique_key_list.sql @@ -0,0 +1,20 @@ +-- a multi-argument unique key list should see overwriting on rows in the model +-- where all unique key fields apply + +{{ + config( + materialized='incremental', + unique_key=['state', 'county', 'city'] + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql b/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql new file mode 100644 index 000000000..537f92dfe --- /dev/null +++ b/tests/integration/incremental_unique_id_test/models/unary_unique_key_list.sql @@ -0,0 +1,20 @@ +-- a one argument unique key list should result in overwritting semantics for +-- that one matching field + +{{ + config( + materialized='incremental', + unique_key=['state'] + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql b/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql new file mode 100644 index 000000000..185a7ca3a --- /dev/null +++ b/tests/integration/incremental_unique_id_test/seeds/add_new_rows.sql @@ -0,0 +1,12 @@ +-- Insert statement which when applied to seed.csv sees incremental model +-- grow in size while not (necessarily) diverging from the seed itself. + +-- insert two new rows, both of which should be in incremental model +-- with any unique columns +insert into {schema}.seed + (state, county, city, last_visit_date) +values ('WA','King','Seattle','2022-02-01'); + +insert into {schema}.seed + (state, county, city, last_visit_date) +values ('CA','Los Angeles','Los Angeles','2022-02-01'); diff --git a/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql b/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql new file mode 100644 index 000000000..f1e8e152f --- /dev/null +++ b/tests/integration/incremental_unique_id_test/seeds/duplicate_insert.sql @@ -0,0 +1,9 @@ +-- Insert statement which when applied to seed.csv triggers the inplace +-- overwrite strategy of incremental models. Seed and incremental model +-- diverge. + +-- insert new row, which should not be in incremental model +-- with primary or first three columns unique +insert into {schema}.seed + (state, county, city, last_visit_date) +values ('CT','Hartford','Hartford','2022-02-14'); diff --git a/tests/integration/incremental_unique_id_test/seeds/seed.csv b/tests/integration/incremental_unique_id_test/seeds/seed.csv new file mode 100644 index 000000000..2cd4ef06e --- /dev/null +++ b/tests/integration/incremental_unique_id_test/seeds/seed.csv @@ -0,0 +1,7 @@ +state,county,city,last_visit_date +CT,Hartford,Hartford,2020-09-23 +MA,Suffolk,Boston,2020-02-12 +NJ,Mercer,Trenton,2022-01-01 +NY,Kings,Brooklyn,2021-04-02 +NY,New York,Manhattan,2021-04-01 +PA,Philadelphia,Philadelphia,2021-05-21 diff --git a/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py b/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py new file mode 100644 index 000000000..419a2f1f4 --- /dev/null +++ b/tests/integration/incremental_unique_id_test/test_incremental_unique_id.py @@ -0,0 +1,299 @@ +from dbt.contracts.results import RunStatus +from tests.integration.base import DBTIntegrationTest, use_profile +from collections import namedtuple +from pathlib import Path + + +TestResults = namedtuple( + 'TestResults', + ['seed_count', 'model_count', 'seed_rows', 'inc_test_model_count', + 'opt_model_count', 'relation'], +) + + +class TestIncrementalUniqueKeyBase(DBTIntegrationTest): + @property + def schema(self): + return 'incremental_unique_key' + + @property + def models(self): + return 'models' + + def update_incremental_model(self, incremental_model): + '''update incremental model after the seed table has been updated''' + model_result_set = self.run_dbt(['run', '--select', incremental_model]) + return len(model_result_set) + + def setup_test(self, seed, incremental_model, update_sql_file): + '''build a test case and return values for assertions + [INFO] Models must be in place to test incremental model + construction and merge behavior. Database touches are side + effects to extract counts (which speak to health of unique keys).''' + #idempotently create some number of seeds and incremental models''' + seed_count = len(self.run_dbt( + ['seed', '--select', seed, '--full-refresh'] + )) + model_count = len(self.run_dbt( + ['run', '--select', incremental_model, '--full-refresh'] + )) + + # update seed in anticipation of incremental model update + row_count_query = 'select * from {}.{}'.format( + self.unique_schema(), + seed + ) + self.run_sql_file(Path('seeds') / Path(update_sql_file + '.sql')) + seed_rows = len(self.run_sql(row_count_query, fetch='all')) + + # propagate seed state to incremental model according to unique keys + inc_test_model_count = self.update_incremental_model( + incremental_model=incremental_model + ) + + return (seed_count, model_count, seed_rows, inc_test_model_count) + + def test_scenario_correctness(self, expected_fields, test_case_fields): + '''Invoke assertions to verify correct build functionality''' + # 1. test seed(s) should build afresh + self.assertEqual( + expected_fields.seed_count, test_case_fields.seed_count + ) + # 2. test model(s) should build afresh + self.assertEqual( + expected_fields.model_count, test_case_fields.model_count + ) + # 3. seeds should have intended row counts post update + self.assertEqual( + expected_fields.seed_rows, test_case_fields.seed_rows + ) + # 4. incremental test model(s) should be updated + self.assertEqual( + expected_fields.inc_test_model_count, + test_case_fields.inc_test_model_count + ) + # 5. extra incremental model(s) should be built; optional since + # comparison may be between an incremental model and seed + if (expected_fields.opt_model_count and + test_case_fields.opt_model_count): + self.assertEqual( + expected_fields.opt_model_count, + test_case_fields.opt_model_count + ) + # 6. result table should match intended result set (itself a relation) + self.assertTablesEqual( + expected_fields.relation, test_case_fields.relation + ) + + def stub_expected_fields( + self, relation, seed_rows, opt_model_count=None + ): + return TestResults( + seed_count=1, model_count=1, seed_rows=seed_rows, + inc_test_model_count=1, opt_model_count=opt_model_count, + relation=relation + ) + + def fail_to_build_inc_missing_unique_key_column(self, incremental_model_name): + '''should pass back error state when trying build an incremental + model whose unique key or keylist includes a column missing + from the incremental model''' + seed_count = len(self.run_dbt( + ['seed', '--select', 'seed', '--full-refresh'] + )) + # unique keys are not applied on first run, so two are needed + self.run_dbt( + ['run', '--select', incremental_model_name, '--full-refresh'], + expect_pass=True + ) + run_result = self.run_dbt( + ['run', '--select', incremental_model_name], + expect_pass=False + ).results[0] + + return run_result.status, run_result.message + + +class TestIncrementalWithoutUniqueKey(TestIncrementalUniqueKeyBase): + @use_profile('snowflake') + def test__snowflake_no_unique_keys(self): + '''with no unique keys, seed and model should match''' + seed='seed' + seed_rows=8 + incremental_model='no_unique_key' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + +class TestIncrementalStrUniqueKey(TestIncrementalUniqueKeyBase): + @use_profile('snowflake') + def test__snowflake_empty_str_unique_key(self): + '''with empty string for unique key, seed and model should match''' + seed='seed' + seed_rows=8 + incremental_model='empty_str_unique_key' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('snowflake') + def test__snowflake_one_unique_key(self): + '''with one unique key, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='str_unique_key' + update_sql_file='duplicate_insert' + expected_model='one_str__overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('snowflake') + def test__snowflake_bad_unique_key(self): + '''expect compilation error from unique key not being a column''' + + err_msg = "invalid identifier 'DBT_INTERNAL_SOURCE.THISISNOTACOLUMN'" + + (status, exc) = self.fail_to_build_inc_missing_unique_key_column( + incremental_model_name='not_found_unique_key' + ) + + self.assertEqual(status, RunStatus.Error) + self.assertTrue(err_msg in exc) + + +class TestIncrementalListUniqueKey(TestIncrementalUniqueKeyBase): + @use_profile('snowflake') + def test__snowflake_empty_unique_key_list(self): + '''with no unique keys, seed and model should match''' + seed='seed' + seed_rows=8 + incremental_model='empty_unique_key_list' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('snowflake') + def test__snowflake_unary_unique_key_list(self): + '''with one unique key, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='unary_unique_key_list' + update_sql_file='duplicate_insert' + expected_model='unique_key_list__inplace_overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('snowflake') + def test__snowflake_duplicated_unary_unique_key_list(self): + '''with two of the same unique key, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='duplicated_unary_unique_key_list' + update_sql_file='duplicate_insert' + expected_model='unique_key_list__inplace_overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('snowflake') + def test__snowflake_trinary_unique_key_list(self): + '''with three unique keys, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='trinary_unique_key_list' + update_sql_file='duplicate_insert' + expected_model='unique_key_list__inplace_overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('snowflake') + def test__snowflake_trinary_unique_key_list_no_update(self): + '''even with three unique keys, adding distinct rows to seed does not + cause seed and model to diverge''' + seed='seed' + seed_rows=8 + incremental_model='nontyped_trinary_unique_key_list' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('snowflake') + def test__snowflake_bad_unique_key_list(self): + '''expect compilation error from unique key not being a column''' + + err_msg = "invalid identifier 'DBT_INTERNAL_SOURCE.THISISNOTACOLUMN'" + + (status, exc) = self.fail_to_build_inc_missing_unique_key_column( + incremental_model_name='not_found_unique_key_list' + ) + + self.assertEqual(status, RunStatus.Error) + self.assertTrue(err_msg in exc)