From 236d69f35f6a0744a5b58052ba1e05888ac4b496 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Fri, 11 Mar 2022 12:54:21 -0600 Subject: [PATCH 1/5] pre-commit additions --- .../models/incremental/merge.sql | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql b/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql index 57205c4e8d2..6f38d9c9912 100644 --- a/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql +++ b/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql @@ -56,13 +56,26 @@ {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%} - {% if unique_key is not none %} - delete from {{ target }} - where ({{ unique_key }}) in ( - select ({{ unique_key }}) - from {{ source }} - ); - {% endif %} + {% if unique_key is not none and unique_key != [] %} + {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %} + delete from {{target }} + using {{ source }} + where ( + {% for key in unique_key %} + {{ source }}.{{ key }} = {{ target }}.{{ key }} + {{ "and " if not loop.last }} + {% endfor %} + ); + {% else %} + delete from {{ target }} + where ( + {{ unique_key }}) in ( + select ({{ unique_key }}) + from {{ source }} + ); + + {% endif %} + {% endif %} insert into {{ target }} ({{ dest_cols_csv }}) ( From b6bdedba49fd76d120eb33c3b550d2721b81efd4 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Mon, 14 Mar 2022 11:24:38 -0500 Subject: [PATCH 2/5] added changie changelog entry --- .changes/unreleased/Features-20220314-112341.yaml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changes/unreleased/Features-20220314-112341.yaml diff --git a/.changes/unreleased/Features-20220314-112341.yaml b/.changes/unreleased/Features-20220314-112341.yaml new file mode 100644 index 00000000000..b5a8fd17413 --- /dev/null +++ b/.changes/unreleased/Features-20220314-112341.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Allow unique key to take a list implementation for postgres/redshift +time: 2022-03-14T11:23:41.293726-05:00 +custom: + Author: McKnight-42 + Issue: "4738" + PR: "4858" From 6f194b4d250194fbdfea7b71979a2012fbadc7dc Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Mon, 14 Mar 2022 13:40:03 -0500 Subject: [PATCH 3/5] moving integration test over --- .../duplicated_unary_unique_key_list.sql | 17 + .../models/empty_str_unique_key.sql | 16 + .../models/empty_unique_key_list.sql | 14 + .../models/expected/one_str__overwrite.sql | 21 ++ .../unique_key_list__inplace_overwrite.sql | 21 ++ .../models/no_unique_key.sql | 15 + .../nontyped_trinary_unique_key_list.sql | 21 ++ .../models/not_found_unique_key.sql | 16 + .../models/not_found_unique_key_list.sql | 10 + .../models/str_unique_key.sql | 21 ++ .../models/trinary_unique_key_list.sql | 20 ++ .../models/unary_unique_key_list.sql | 20 ++ .../seeds/add_new_rows.sql | 12 + .../seeds/duplicate_insert.sql | 9 + .../seeds/seed.csv | 7 + .../test_incremental_unique_id.py | 299 ++++++++++++++++++ 16 files changed, 539 insertions(+) create mode 100644 test/integration/076_incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/empty_str_unique_key.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/empty_unique_key_list.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/no_unique_key.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/not_found_unique_key.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/not_found_unique_key_list.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/str_unique_key.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/trinary_unique_key_list.sql create mode 100644 test/integration/076_incremental_unique_id_test/models/unary_unique_key_list.sql create mode 100644 test/integration/076_incremental_unique_id_test/seeds/add_new_rows.sql create mode 100644 test/integration/076_incremental_unique_id_test/seeds/duplicate_insert.sql create mode 100644 test/integration/076_incremental_unique_id_test/seeds/seed.csv create mode 100644 test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py diff --git a/test/integration/076_incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql b/test/integration/076_incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql new file mode 100644 index 00000000000..6f12560d9ec --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/duplicated_unary_unique_key_list.sql @@ -0,0 +1,17 @@ +{{ + config( + materialized='incremental', + unique_key=['state', 'state'] + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/empty_str_unique_key.sql b/test/integration/076_incremental_unique_id_test/models/empty_str_unique_key.sql new file mode 100644 index 00000000000..de2129d9bfb --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/empty_str_unique_key.sql @@ -0,0 +1,16 @@ +-- ensure model with empty string unique key should build normally + +{{ + config( + materialized='incremental', + unique_key='' + ) +}} + +select + * +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/empty_unique_key_list.sql b/test/integration/076_incremental_unique_id_test/models/empty_unique_key_list.sql new file mode 100644 index 00000000000..3ebb5b41675 --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/empty_unique_key_list.sql @@ -0,0 +1,14 @@ +-- model with empty list unique key should build normally + +{{ + config( + materialized='incremental', + unique_key=[] + ) +}} + +select * from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql b/test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql new file mode 100644 index 00000000000..99223458a1f --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql @@ -0,0 +1,21 @@ +{{ + config( + materialized='table' + ) +}} + +select + 'CT'::varchar(2) as state, + 'Hartford'::varchar(12) as county, + 'Hartford'::varchar(12) as city, + '2022-02-14'::date as last_visit_date +union all +select 'MA','Suffolk','Boston','2020-02-12' +union all +select 'NJ','Mercer','Trenton','2022-01-01' +union all +select 'NY','Kings','Brooklyn','2021-04-02' +union all +select 'NY','New York','Manhattan','2021-04-01' +union all +select 'PA','Philadelphia','Philadelphia','2021-05-21' diff --git a/test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql b/test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql new file mode 100644 index 00000000000..99223458a1f --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql @@ -0,0 +1,21 @@ +{{ + config( + materialized='table' + ) +}} + +select + 'CT'::varchar(2) as state, + 'Hartford'::varchar(12) as county, + 'Hartford'::varchar(12) as city, + '2022-02-14'::date as last_visit_date +union all +select 'MA','Suffolk','Boston','2020-02-12' +union all +select 'NJ','Mercer','Trenton','2022-01-01' +union all +select 'NY','Kings','Brooklyn','2021-04-02' +union all +select 'NY','New York','Manhattan','2021-04-01' +union all +select 'PA','Philadelphia','Philadelphia','2021-05-21' diff --git a/test/integration/076_incremental_unique_id_test/models/no_unique_key.sql b/test/integration/076_incremental_unique_id_test/models/no_unique_key.sql new file mode 100644 index 00000000000..d803a142b38 --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/no_unique_key.sql @@ -0,0 +1,15 @@ +-- no specified unique key should cause no special build behavior + +{{ + config( + materialized='incremental' + ) +}} + +select + * +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql b/test/integration/076_incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql new file mode 100644 index 00000000000..333e445abca --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/nontyped_trinary_unique_key_list.sql @@ -0,0 +1,21 @@ +-- a multi-argument unique key list should see overwriting on rows in the model +-- where all unique key fields apply +-- N.B. needed for direct comparison with seed + +{{ + config( + materialized='incremental', + unique_key=['state', 'county', 'city'] + ) +}} + +select + state as state, + county as county, + city as city, + last_visit_date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/not_found_unique_key.sql b/test/integration/076_incremental_unique_id_test/models/not_found_unique_key.sql new file mode 100644 index 00000000000..cf43effeb2f --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/not_found_unique_key.sql @@ -0,0 +1,16 @@ +-- a model with a unique key not found in the table itself will error out + +{{ + config( + materialized='incremental', + unique_key='thisisnotacolumn' + ) +}} + +select + * +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/not_found_unique_key_list.sql b/test/integration/076_incremental_unique_id_test/models/not_found_unique_key_list.sql new file mode 100644 index 00000000000..8cbde9659f1 --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/not_found_unique_key_list.sql @@ -0,0 +1,10 @@ +-- a unique key list with any element not in the model itself should error out + +{{ + config( + materialized='incremental', + unique_key=['state', 'thisisnotacolumn'] + ) +}} + +select * from {{ ref('seed') }} diff --git a/test/integration/076_incremental_unique_id_test/models/str_unique_key.sql b/test/integration/076_incremental_unique_id_test/models/str_unique_key.sql new file mode 100644 index 00000000000..e2fb1424fd6 --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/str_unique_key.sql @@ -0,0 +1,21 @@ +-- a unique key with a string should trigger to overwrite behavior when +-- the source has entries in conflict (i.e. more than one row per unique key +-- combination) + +{{ + config( + materialized='incremental', + unique_key='state' + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/trinary_unique_key_list.sql b/test/integration/076_incremental_unique_id_test/models/trinary_unique_key_list.sql new file mode 100644 index 00000000000..f81c9feb68b --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/trinary_unique_key_list.sql @@ -0,0 +1,20 @@ +-- a multi-argument unique key list should see overwriting on rows in the model +-- where all unique key fields apply + +{{ + config( + materialized='incremental', + unique_key=['state', 'county', 'city'] + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/models/unary_unique_key_list.sql b/test/integration/076_incremental_unique_id_test/models/unary_unique_key_list.sql new file mode 100644 index 00000000000..537f92dfe89 --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/models/unary_unique_key_list.sql @@ -0,0 +1,20 @@ +-- a one argument unique key list should result in overwritting semantics for +-- that one matching field + +{{ + config( + materialized='incremental', + unique_key=['state'] + ) +}} + +select + state::varchar(2) as state, + county::varchar(12) as county, + city::varchar(12) as city, + last_visit_date::date as last_visit_date +from {{ ref('seed') }} + +{% if is_incremental() %} + where last_visit_date > (select max(last_visit_date) from {{ this }}) +{% endif %} diff --git a/test/integration/076_incremental_unique_id_test/seeds/add_new_rows.sql b/test/integration/076_incremental_unique_id_test/seeds/add_new_rows.sql new file mode 100644 index 00000000000..185a7ca3af6 --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/seeds/add_new_rows.sql @@ -0,0 +1,12 @@ +-- Insert statement which when applied to seed.csv sees incremental model +-- grow in size while not (necessarily) diverging from the seed itself. + +-- insert two new rows, both of which should be in incremental model +-- with any unique columns +insert into {schema}.seed + (state, county, city, last_visit_date) +values ('WA','King','Seattle','2022-02-01'); + +insert into {schema}.seed + (state, county, city, last_visit_date) +values ('CA','Los Angeles','Los Angeles','2022-02-01'); diff --git a/test/integration/076_incremental_unique_id_test/seeds/duplicate_insert.sql b/test/integration/076_incremental_unique_id_test/seeds/duplicate_insert.sql new file mode 100644 index 00000000000..f1e8e152f1d --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/seeds/duplicate_insert.sql @@ -0,0 +1,9 @@ +-- Insert statement which when applied to seed.csv triggers the inplace +-- overwrite strategy of incremental models. Seed and incremental model +-- diverge. + +-- insert new row, which should not be in incremental model +-- with primary or first three columns unique +insert into {schema}.seed + (state, county, city, last_visit_date) +values ('CT','Hartford','Hartford','2022-02-14'); diff --git a/test/integration/076_incremental_unique_id_test/seeds/seed.csv b/test/integration/076_incremental_unique_id_test/seeds/seed.csv new file mode 100644 index 00000000000..2cd4ef06ea8 --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/seeds/seed.csv @@ -0,0 +1,7 @@ +state,county,city,last_visit_date +CT,Hartford,Hartford,2020-09-23 +MA,Suffolk,Boston,2020-02-12 +NJ,Mercer,Trenton,2022-01-01 +NY,Kings,Brooklyn,2021-04-02 +NY,New York,Manhattan,2021-04-01 +PA,Philadelphia,Philadelphia,2021-05-21 diff --git a/test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py b/test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py new file mode 100644 index 00000000000..f8c0b37170e --- /dev/null +++ b/test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py @@ -0,0 +1,299 @@ +from dbt.contracts.results import RunStatus +from test.integration.base import DBTIntegrationTest, use_profile +from collections import namedtuple +from pathlib import Path + + +TestResults = namedtuple( + 'TestResults', + ['seed_count', 'model_count', 'seed_rows', 'inc_test_model_count', + 'opt_model_count', 'relation'], +) + + +class TestIncrementalUniqueKeyBase(DBTIntegrationTest): + @property + def schema(self): + return 'incremental_unique_key' + + @property + def models(self): + return 'models' + + def update_incremental_model(self, incremental_model): + '''update incremental model after the seed table has been updated''' + model_result_set = self.run_dbt(['run', '--select', incremental_model]) + return len(model_result_set) + + def setup_test(self, seed, incremental_model, update_sql_file): + '''build a test case and return values for assertions + [INFO] Models must be in place to test incremental model + construction and merge behavior. Database touches are side + effects to extract counts (which speak to health of unique keys).''' + #idempotently create some number of seeds and incremental models''' + seed_count = len(self.run_dbt( + ['seed', '--select', seed, '--full-refresh'] + )) + model_count = len(self.run_dbt( + ['run', '--select', incremental_model, '--full-refresh'] + )) + + # update seed in anticipation of incremental model update + row_count_query = 'select * from {}.{}'.format( + self.unique_schema(), + seed + ) + self.run_sql_file(Path('seeds') / Path(update_sql_file + '.sql')) + seed_rows = len(self.run_sql(row_count_query, fetch='all')) + + # propagate seed state to incremental model according to unique keys + inc_test_model_count = self.update_incremental_model( + incremental_model=incremental_model + ) + + return (seed_count, model_count, seed_rows, inc_test_model_count) + + def test_scenario_correctness(self, expected_fields, test_case_fields): + '''Invoke assertions to verify correct build functionality''' + # 1. test seed(s) should build afresh + self.assertEqual( + expected_fields.seed_count, test_case_fields.seed_count + ) + # 2. test model(s) should build afresh + self.assertEqual( + expected_fields.model_count, test_case_fields.model_count + ) + # 3. seeds should have intended row counts post update + self.assertEqual( + expected_fields.seed_rows, test_case_fields.seed_rows + ) + # 4. incremental test model(s) should be updated + self.assertEqual( + expected_fields.inc_test_model_count, + test_case_fields.inc_test_model_count + ) + # 5. extra incremental model(s) should be built; optional since + # comparison may be between an incremental model and seed + if (expected_fields.opt_model_count and + test_case_fields.opt_model_count): + self.assertEqual( + expected_fields.opt_model_count, + test_case_fields.opt_model_count + ) + # 6. result table should match intended result set (itself a relation) + self.assertTablesEqual( + expected_fields.relation, test_case_fields.relation + ) + + def stub_expected_fields( + self, relation, seed_rows, opt_model_count=None + ): + return TestResults( + seed_count=1, model_count=1, seed_rows=seed_rows, + inc_test_model_count=1, opt_model_count=opt_model_count, + relation=relation + ) + + def fail_to_build_inc_missing_unique_key_column(self, incremental_model_name): + '''should pass back error state when trying build an incremental + model whose unique key or keylist includes a column missing + from the incremental model''' + seed_count = len(self.run_dbt( + ['seed', '--select', 'seed', '--full-refresh'] + )) + # unique keys are not applied on first run, so two are needed + self.run_dbt( + ['run', '--select', incremental_model_name, '--full-refresh'], + expect_pass=True + ) + run_result = self.run_dbt( + ['run', '--select', incremental_model_name], + expect_pass=False + ).results[0] + + return run_result.status, run_result.message + + +class TestIncrementalWithoutUniqueKey(TestIncrementalUniqueKeyBase): + @use_profile('postgres') + def test__postgres_no_unique_keys(self): + '''with no unique keys, seed and model should match''' + seed='seed' + seed_rows=8 + incremental_model='no_unique_key' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + +class TestIncrementalStrUniqueKey(TestIncrementalUniqueKeyBase): + @use_profile('postgres') + def test__postgres_empty_str_unique_key(self): + '''with empty string for unique key, seed and model should match''' + seed='seed' + seed_rows=8 + incremental_model='empty_str_unique_key' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('postgres') + def test__postgres_one_unique_key(self): + '''with one unique key, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='str_unique_key' + update_sql_file='duplicate_insert' + expected_model='one_str__overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('postgres') + def test__postgres_bad_unique_key(self): + '''expect compilation error from unique key not being a column''' + + err_msg = "invalid identifier 'DBT_INTERNAL_SOURCE.THISISNOTACOLUMN'" + + (status, exc) = self.fail_to_build_inc_missing_unique_key_column( + incremental_model_name='not_found_unique_key' + ) + + self.assertEqual(status, RunStatus.Error) + self.assertTrue(err_msg in exc) + + +class TestIncrementalListUniqueKey(TestIncrementalUniqueKeyBase): + @use_profile('postgres') + def test__postgres_empty_unique_key_list(self): + '''with no unique keys, seed and model should match''' + seed='seed' + seed_rows=8 + incremental_model='empty_unique_key_list' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('postgres') + def test__postgres_unary_unique_key_list(self): + '''with one unique key, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='unary_unique_key_list' + update_sql_file='duplicate_insert' + expected_model='unique_key_list__inplace_overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('postgres') + def test__postgres_duplicated_unary_unique_key_list(self): + '''with two of the same unique key, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='duplicated_unary_unique_key_list' + update_sql_file='duplicate_insert' + expected_model='unique_key_list__inplace_overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('postgres') + def test__postgres_trinary_unique_key_list(self): + '''with three unique keys, model will overwrite existing row''' + seed='seed' + seed_rows=7 + incremental_model='trinary_unique_key_list' + update_sql_file='duplicate_insert' + expected_model='unique_key_list__inplace_overwrite' + + expected_fields = self.stub_expected_fields( + relation=expected_model, seed_rows=seed_rows, opt_model_count=1 + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=self.update_incremental_model(expected_model), + relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('postgres') + def test__postgres_trinary_unique_key_list_no_update(self): + '''even with three unique keys, adding distinct rows to seed does not + cause seed and model to diverge''' + seed='seed' + seed_rows=8 + incremental_model='nontyped_trinary_unique_key_list' + update_sql_file='add_new_rows' + + expected_fields = self.stub_expected_fields( + relation=seed, seed_rows=seed_rows + ) + test_case_fields = TestResults( + *self.setup_test(seed, incremental_model, update_sql_file), + opt_model_count=None, relation=incremental_model + ) + + self.test_scenario_correctness(expected_fields, test_case_fields) + + @use_profile('postgres') + def test__postgres_bad_unique_key_list(self): + '''expect compilation error from unique key not being a column''' + + err_msg = "invalid identifier 'DBT_INTERNAL_SOURCE.THISISNOTACOLUMN'" + + (status, exc) = self.fail_to_build_inc_missing_unique_key_column( + incremental_model_name='not_found_unique_key_list' + ) + + self.assertEqual(status, RunStatus.Error) + self.assertTrue(err_msg in exc) From 5aafc44af8fce3e54dea2b25802c4d103dc4dc8d Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 14 Mar 2022 15:02:18 -0400 Subject: [PATCH 4/5] Pair programming --- .../materializations/models/incremental/merge.sql | 2 +- .../models/expected/one_str__overwrite.sql | 10 +++++----- .../expected/unique_key_list__inplace_overwrite.sql | 10 +++++----- .../test_incremental_unique_id.py | 8 ++------ 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql b/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql index 6f38d9c9912..422b2ccd33b 100644 --- a/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql +++ b/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql @@ -56,7 +56,7 @@ {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%} - {% if unique_key is not none and unique_key != [] %} + {% if unique_key %} {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %} delete from {{target }} using {{ source }} diff --git a/test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql b/test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql index 99223458a1f..b992636958b 100644 --- a/test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql +++ b/test/integration/076_incremental_unique_id_test/models/expected/one_str__overwrite.sql @@ -10,12 +10,12 @@ select 'Hartford'::varchar(12) as city, '2022-02-14'::date as last_visit_date union all -select 'MA','Suffolk','Boston','2020-02-12' +select 'MA'::varchar(2),'Suffolk'::varchar(12),'Boston'::varchar(12),'2020-02-12'::date union all -select 'NJ','Mercer','Trenton','2022-01-01' +select 'NJ'::varchar(2),'Mercer'::varchar(12),'Trenton'::varchar(12),'2022-01-01'::date union all -select 'NY','Kings','Brooklyn','2021-04-02' +select 'NY'::varchar(2),'Kings'::varchar(12),'Brooklyn'::varchar(12),'2021-04-02'::date union all -select 'NY','New York','Manhattan','2021-04-01' +select 'NY'::varchar(2),'New York'::varchar(12),'Manhattan'::varchar(12),'2021-04-01'::date union all -select 'PA','Philadelphia','Philadelphia','2021-05-21' +select 'PA'::varchar(2),'Philadelphia'::varchar(12),'Philadelphia'::varchar(12),'2021-05-21'::date diff --git a/test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql b/test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql index 99223458a1f..b992636958b 100644 --- a/test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql +++ b/test/integration/076_incremental_unique_id_test/models/expected/unique_key_list__inplace_overwrite.sql @@ -10,12 +10,12 @@ select 'Hartford'::varchar(12) as city, '2022-02-14'::date as last_visit_date union all -select 'MA','Suffolk','Boston','2020-02-12' +select 'MA'::varchar(2),'Suffolk'::varchar(12),'Boston'::varchar(12),'2020-02-12'::date union all -select 'NJ','Mercer','Trenton','2022-01-01' +select 'NJ'::varchar(2),'Mercer'::varchar(12),'Trenton'::varchar(12),'2022-01-01'::date union all -select 'NY','Kings','Brooklyn','2021-04-02' +select 'NY'::varchar(2),'Kings'::varchar(12),'Brooklyn'::varchar(12),'2021-04-02'::date union all -select 'NY','New York','Manhattan','2021-04-01' +select 'NY'::varchar(2),'New York'::varchar(12),'Manhattan'::varchar(12),'2021-04-01'::date union all -select 'PA','Philadelphia','Philadelphia','2021-05-21' +select 'PA'::varchar(2),'Philadelphia'::varchar(12),'Philadelphia'::varchar(12),'2021-05-21'::date diff --git a/test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py b/test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py index f8c0b37170e..fa6a5ba6e59 100644 --- a/test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py +++ b/test/integration/076_incremental_unique_id_test/test_incremental_unique_id.py @@ -177,14 +177,12 @@ def test__postgres_one_unique_key(self): def test__postgres_bad_unique_key(self): '''expect compilation error from unique key not being a column''' - err_msg = "invalid identifier 'DBT_INTERNAL_SOURCE.THISISNOTACOLUMN'" - (status, exc) = self.fail_to_build_inc_missing_unique_key_column( incremental_model_name='not_found_unique_key' ) self.assertEqual(status, RunStatus.Error) - self.assertTrue(err_msg in exc) + self.assertTrue("thisisnotacolumn" in exc.lower()) class TestIncrementalListUniqueKey(TestIncrementalUniqueKeyBase): @@ -289,11 +287,9 @@ def test__postgres_trinary_unique_key_list_no_update(self): def test__postgres_bad_unique_key_list(self): '''expect compilation error from unique key not being a column''' - err_msg = "invalid identifier 'DBT_INTERNAL_SOURCE.THISISNOTACOLUMN'" - (status, exc) = self.fail_to_build_inc_missing_unique_key_column( incremental_model_name='not_found_unique_key_list' ) self.assertEqual(status, RunStatus.Error) - self.assertTrue(err_msg in exc) + self.assertTrue("thisisnotacolumn" in exc.lower()) From 5cfa2cdd1f447940ca48e13bd9f2eb0084d411e5 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Thu, 17 Mar 2022 13:58:32 -0500 Subject: [PATCH 5/5] removing ref to mapping as seems to be unnecessary check, unique_key tests pass locally for postgres --- .../macros/materializations/models/incremental/merge.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql b/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql index 422b2ccd33b..563cae960e9 100644 --- a/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql +++ b/core/dbt/include/global_project/macros/materializations/models/incremental/merge.sql @@ -57,7 +57,7 @@ {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%} {% if unique_key %} - {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %} + {% if unique_key is sequence and unique_key is not string %} delete from {{target }} using {{ source }} where (