Skip to content

Commit

Permalink
update of macro for postgres/redshift use of unique_key as a list (#4858
Browse files Browse the repository at this point in the history
)

* pre-commit additions

* added changie changelog entry

* moving integration test over

* Pair programming

* removing ref to mapping as seems to be unnecessary check, unique_key tests pass locally for postgres

Co-authored-by: Jeremy Cohen <[email protected]>
  • Loading branch information
McKnight-42 and jtcohen6 authored Mar 22, 2022
1 parent 9c5ee59 commit ea5a9da
Show file tree
Hide file tree
Showing 18 changed files with 562 additions and 7 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20220314-112341.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Allow unique key to take a list implementation for postgres/redshift
time: 2022-03-14T11:23:41.293726-05:00
custom:
Author: McKnight-42
Issue: "4738"
PR: "4858"
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,26 @@

{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}

{% if unique_key is not none %}
delete from {{ target }}
where ({{ unique_key }}) in (
select ({{ unique_key }})
from {{ source }}
);
{% endif %}
{% if unique_key %}
{% if unique_key is sequence and unique_key is not string %}
delete from {{target }}
using {{ source }}
where (
{% for key in unique_key %}
{{ source }}.{{ key }} = {{ target }}.{{ key }}
{{ "and " if not loop.last }}
{% endfor %}
);
{% else %}
delete from {{ target }}
where (
{{ unique_key }}) in (
select ({{ unique_key }})
from {{ source }}
);

{% endif %}
{% endif %}

insert into {{ target }} ({{ dest_cols_csv }})
(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{{
config(
materialized='incremental',
unique_key=['state', 'state']
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- ensure model with empty string unique key should build normally

{{
config(
materialized='incremental',
unique_key=''
)
}}

select
*
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- model with empty list unique key should build normally

{{
config(
materialized='incremental',
unique_key=[]
)
}}

select * from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table'
)
}}

select
'CT'::varchar(2) as state,
'Hartford'::varchar(12) as county,
'Hartford'::varchar(12) as city,
'2022-02-14'::date as last_visit_date
union all
select 'MA'::varchar(2),'Suffolk'::varchar(12),'Boston'::varchar(12),'2020-02-12'::date
union all
select 'NJ'::varchar(2),'Mercer'::varchar(12),'Trenton'::varchar(12),'2022-01-01'::date
union all
select 'NY'::varchar(2),'Kings'::varchar(12),'Brooklyn'::varchar(12),'2021-04-02'::date
union all
select 'NY'::varchar(2),'New York'::varchar(12),'Manhattan'::varchar(12),'2021-04-01'::date
union all
select 'PA'::varchar(2),'Philadelphia'::varchar(12),'Philadelphia'::varchar(12),'2021-05-21'::date
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table'
)
}}

select
'CT'::varchar(2) as state,
'Hartford'::varchar(12) as county,
'Hartford'::varchar(12) as city,
'2022-02-14'::date as last_visit_date
union all
select 'MA'::varchar(2),'Suffolk'::varchar(12),'Boston'::varchar(12),'2020-02-12'::date
union all
select 'NJ'::varchar(2),'Mercer'::varchar(12),'Trenton'::varchar(12),'2022-01-01'::date
union all
select 'NY'::varchar(2),'Kings'::varchar(12),'Brooklyn'::varchar(12),'2021-04-02'::date
union all
select 'NY'::varchar(2),'New York'::varchar(12),'Manhattan'::varchar(12),'2021-04-01'::date
union all
select 'PA'::varchar(2),'Philadelphia'::varchar(12),'Philadelphia'::varchar(12),'2021-05-21'::date
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- no specified unique key should cause no special build behavior

{{
config(
materialized='incremental'
)
}}

select
*
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- a multi-argument unique key list should see overwriting on rows in the model
-- where all unique key fields apply
-- N.B. needed for direct comparison with seed

{{
config(
materialized='incremental',
unique_key=['state', 'county', 'city']
)
}}

select
state as state,
county as county,
city as city,
last_visit_date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- a model with a unique key not found in the table itself will error out

{{
config(
materialized='incremental',
unique_key='thisisnotacolumn'
)
}}

select
*
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- a unique key list with any element not in the model itself should error out

{{
config(
materialized='incremental',
unique_key=['state', 'thisisnotacolumn']
)
}}

select * from {{ ref('seed') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- a unique key with a string should trigger to overwrite behavior when
-- the source has entries in conflict (i.e. more than one row per unique key
-- combination)

{{
config(
materialized='incremental',
unique_key='state'
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- a multi-argument unique key list should see overwriting on rows in the model
-- where all unique key fields apply

{{
config(
materialized='incremental',
unique_key=['state', 'county', 'city']
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- a one argument unique key list should result in overwritting semantics for
-- that one matching field

{{
config(
materialized='incremental',
unique_key=['state']
)
}}

select
state::varchar(2) as state,
county::varchar(12) as county,
city::varchar(12) as city,
last_visit_date::date as last_visit_date
from {{ ref('seed') }}

{% if is_incremental() %}
where last_visit_date > (select max(last_visit_date) from {{ this }})
{% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- Insert statement which when applied to seed.csv sees incremental model
-- grow in size while not (necessarily) diverging from the seed itself.

-- insert two new rows, both of which should be in incremental model
-- with any unique columns
insert into {schema}.seed
(state, county, city, last_visit_date)
values ('WA','King','Seattle','2022-02-01');

insert into {schema}.seed
(state, county, city, last_visit_date)
values ('CA','Los Angeles','Los Angeles','2022-02-01');
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- Insert statement which when applied to seed.csv triggers the inplace
-- overwrite strategy of incremental models. Seed and incremental model
-- diverge.

-- insert new row, which should not be in incremental model
-- with primary or first three columns unique
insert into {schema}.seed
(state, county, city, last_visit_date)
values ('CT','Hartford','Hartford','2022-02-14');
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
state,county,city,last_visit_date
CT,Hartford,Hartford,2020-09-23
MA,Suffolk,Boston,2020-02-12
NJ,Mercer,Trenton,2022-01-01
NY,Kings,Brooklyn,2021-04-02
NY,New York,Manhattan,2021-04-01
PA,Philadelphia,Philadelphia,2021-05-21
Loading

0 comments on commit ea5a9da

Please sign in to comment.