From bdcba36ab35c79c6e679c0baf0890c0a179e45e4 Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Sun, 17 Jul 2022 21:50:59 -0400 Subject: [PATCH 01/10] adds _select_event_stream helper --- macros/funnel.sql | 2 +- macros/helpers/_select_event_stream.sql | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 macros/helpers/_select_event_stream.sql diff --git a/macros/funnel.sql b/macros/funnel.sql index 295619b..2aaf1cf 100644 --- a/macros/funnel.sql +++ b/macros/funnel.sql @@ -1,5 +1,5 @@ {% macro funnel(steps=none, event_stream=none) %} - with event_stream as ( {% if not (event_stream|string|trim).startswith('select ') %} select * from {% endif %} {{ event_stream }} ) + with event_stream as {{ dbt_product_analytics._select_event_stream(event_stream) }} {% for step in steps %} , event_stream_step_{{ loop.index }} as ( select event_stream.* diff --git a/macros/helpers/_select_event_stream.sql b/macros/helpers/_select_event_stream.sql new file mode 100644 index 0000000..f26667f --- /dev/null +++ b/macros/helpers/_select_event_stream.sql @@ -0,0 +1,3 @@ +{% macro _select_event_stream(event_stream) -%} + ( {% if not (event_stream|string|trim).startswith('select ') %} select * from {% endif %} {{ event_stream }} ) +{%- endmacro %} \ No newline at end of file From 26525fb986d459c92763a8e983daaea4d64bc2cf Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Sun, 17 Jul 2022 21:51:15 -0400 Subject: [PATCH 02/10] adds untested retention macro --- macros/retention.sql | 54 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 macros/retention.sql diff --git a/macros/retention.sql b/macros/retention.sql new file mode 100644 index 0000000..b93c899 --- /dev/null +++ b/macros/retention.sql @@ -0,0 +1,54 @@ +{% macro retention(event_stream, first_action, second_action, start_date, periods=[1,7,14,30,60,120], period_type='day', dimensions=[]) %} + with event_stream as {{ dbt_product_analytics._select_event_stream(event_stream) }} + + , first_events as ( + select user_id, min(event_date) as first_event_date + {% for dimension in dimensions %}, {{ dimension }} {% endfor %} + from event_stream + where event_type = '{{ first_action }}' + and event_date = '{{ start_date }}' + group by 1 order by 1 + ) + + , first_event_counts as ( + select + {% for dimension in dimensions %}, {{ dimension }} {% endfor %} + , count(*) as unique_users_{{ period_type }}_0 + from first_events + {% for dimension in dimensions -%} + {% if loop.first %} group by {% endif %} {{ loop.index }} + {%- endfor %} + ) + + {% for period in periods %} + , secondary_events_{{ period }} as ( + select {% for dimension in dimensions %} {{ dimension }}, {% endfor %} + count(distinct first_events.user_id) as unique_users_{{ period_type }}_{{ period }} + from event_stream + where event_stream.event_type = '{{ second_action }}' + and event_stream.event_date >= '{{ start_date }}' + and event_stream.event_date < '{{ start_date }}' + 'interval {{ period }} {{ period type }}' + and user_id in ( + select user_id from first_events + ) + {% for dimension in dimensions -%} + {% if loop.first %} group by {% endif %} {{ loop.index }} + {%- endfor %} + ) + {% endfor %} + + , final as ( + select + {% for dimension in dimensions %} {{ dimension }}, {% endfor %} + {% for period in periods %} unique_users_{{ period_type }}_{{ period }} {% if not loop.last %}, {% endif %} {% endfor %} + from first_event_counts + {% for period in periods %} + left join secondary_events_{{ period }} + on 1 = 1 + {% for dimension in dimensions %} + and first_event_counts.{{ dimension }} = secondary_events_{{ period }}.{{ dimension }} + {% endfor %} + ) + + select * from final +{% endmacro %} \ No newline at end of file From 0832015da7913aa0d6a3737f98ed401fbdadea7d Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Sun, 17 Jul 2022 21:52:40 -0400 Subject: [PATCH 03/10] fixes typos --- macros/retention.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macros/retention.sql b/macros/retention.sql index b93c899..10aed53 100644 --- a/macros/retention.sql +++ b/macros/retention.sql @@ -27,7 +27,7 @@ from event_stream where event_stream.event_type = '{{ second_action }}' and event_stream.event_date >= '{{ start_date }}' - and event_stream.event_date < '{{ start_date }}' + 'interval {{ period }} {{ period type }}' + and event_stream.event_date < '{{ start_date }}' + 'interval {{ period }} {{ period_type }}' and user_id in ( select user_id from first_events ) @@ -48,6 +48,7 @@ {% for dimension in dimensions %} and first_event_counts.{{ dimension }} = secondary_events_{{ period }}.{{ dimension }} {% endfor %} + {% endfor %} ) select * from final From 86582144d9137c0ea9cbd5cc57f59fadf24c5358 Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Mon, 18 Jul 2022 07:20:26 -0400 Subject: [PATCH 04/10] retention works on postgres --- .../product_analytics/retention_orders.sql | 6 ++++ .../models/product_analytics/schema.yml | 6 ++++ .../seeds/dmt_expected__retention.csv | 2 ++ macros/retention.sql | 34 ++++++++++++++----- packages.yml | 3 ++ 5 files changed, 42 insertions(+), 9 deletions(-) create mode 100644 integration_tests/models/product_analytics/retention_orders.sql create mode 100644 integration_tests/seeds/dmt_expected__retention.csv create mode 100644 packages.yml diff --git a/integration_tests/models/product_analytics/retention_orders.sql b/integration_tests/models/product_analytics/retention_orders.sql new file mode 100644 index 0000000..2aae5f5 --- /dev/null +++ b/integration_tests/models/product_analytics/retention_orders.sql @@ -0,0 +1,6 @@ +{{ dbt_product_analytics.retention( + event_stream=ref('order_events'), + first_action='completed', + second_action='completed', + start_date='2018-01-17' +)}} \ No newline at end of file diff --git a/integration_tests/models/product_analytics/schema.yml b/integration_tests/models/product_analytics/schema.yml index 65daccb..b4d2619 100644 --- a/integration_tests/models/product_analytics/schema.yml +++ b/integration_tests/models/product_analytics/schema.yml @@ -7,3 +7,9 @@ models: input_mapping: ref('orders'): ref('raw_orders_simple') expected_output: ref('dmt_expected__funnel_simple') + - name: retention_orders + tests: + - dbt_datamocktool.unit_test: + input_mapping: + ref('order_events'): ref('order_events') + expected_output: ref('dmt_expected__retention') diff --git a/integration_tests/seeds/dmt_expected__retention.csv b/integration_tests/seeds/dmt_expected__retention.csv new file mode 100644 index 0000000..a2dba34 --- /dev/null +++ b/integration_tests/seeds/dmt_expected__retention.csv @@ -0,0 +1,2 @@ +unique_users_day_0,unique_users_day_1,unique_users_day_7,unique_users_day_14,unique_users_day_30,unique_users_day_60,unique_users_day_120 +2,0,0,0,0,0,1 \ No newline at end of file diff --git a/macros/retention.sql b/macros/retention.sql index 10aed53..6992412 100644 --- a/macros/retention.sql +++ b/macros/retention.sql @@ -1,19 +1,34 @@ -{% macro retention(event_stream, first_action, second_action, start_date, periods=[1,7,14,30,60,120], period_type='day', dimensions=[]) %} +{% macro retention(event_stream=None, first_action=None, second_action=None, start_date=None, periods=[1,7,14,30,60,120], period_type='day', dimensions=[]) %} + {% if event_stream is none %} + {{ exceptions.raise_compiler_error('parameter \'event_stream\' must be provided')}} + {% endif %} + + {% if first_action is none %} + {{ exceptions.raise_compiler_error('parameter \'first_action\' must be provided')}} + {% endif %} + + {% if second_action is none %} + {{ exceptions.raise_compiler_error('parameter \'second_action\' must be provided')}} + {% endif %} + + {% if start_date is none %} + {{ exceptions.raise_compiler_error('parameter \'start_date\' must be provided')}} + {% endif %} + with event_stream as {{ dbt_product_analytics._select_event_stream(event_stream) }} , first_events as ( - select user_id, min(event_date) as first_event_date + select distinct user_id {% for dimension in dimensions %}, {{ dimension }} {% endfor %} from event_stream where event_type = '{{ first_action }}' and event_date = '{{ start_date }}' - group by 1 order by 1 ) , first_event_counts as ( select - {% for dimension in dimensions %}, {{ dimension }} {% endfor %} - , count(*) as unique_users_{{ period_type }}_0 + {% for dimension in dimensions %} {{ dimension }}, {% endfor %} + count(*) as unique_users_{{ period_type }}_0 from first_events {% for dimension in dimensions -%} {% if loop.first %} group by {% endif %} {{ loop.index }} @@ -23,11 +38,11 @@ {% for period in periods %} , secondary_events_{{ period }} as ( select {% for dimension in dimensions %} {{ dimension }}, {% endfor %} - count(distinct first_events.user_id) as unique_users_{{ period_type }}_{{ period }} + count(distinct user_id) as unique_users_{{ period_type }}_{{ period }} from event_stream - where event_stream.event_type = '{{ second_action }}' - and event_stream.event_date >= '{{ start_date }}' - and event_stream.event_date < '{{ start_date }}' + 'interval {{ period }} {{ period_type }}' + where event_type = '{{ second_action }}' + and event_date > '{{ start_date }}' + and event_date < {{ dbt_utils.dateadd(datepart=period_type, interval=period, from_date_or_timestamp="cast('" ~ start_date ~ "' as date)") }} and user_id in ( select user_id from first_events ) @@ -40,6 +55,7 @@ , final as ( select {% for dimension in dimensions %} {{ dimension }}, {% endfor %} + unique_users_{{ period_type }}_0, {% for period in periods %} unique_users_{{ period_type }}_{{ period }} {% if not loop.last %}, {% endif %} {% endfor %} from first_event_counts {% for period in periods %} diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..15afc74 --- /dev/null +++ b/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: [">=0.8.6"] From 2cc072bb994dceab281694d89c92defa2a9a37f8 Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Mon, 18 Jul 2022 07:57:53 -0400 Subject: [PATCH 05/10] some date cast fixes for trino and other dbs --- .gitignore | 1 + macros/retention.sql | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 7164422..112af0a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ target/ dbt_modules/ +dbt_packages/ logs/ **/.DS_Store diff --git a/macros/retention.sql b/macros/retention.sql index 6992412..75cd67a 100644 --- a/macros/retention.sql +++ b/macros/retention.sql @@ -22,7 +22,7 @@ {% for dimension in dimensions %}, {{ dimension }} {% endfor %} from event_stream where event_type = '{{ first_action }}' - and event_date = '{{ start_date }}' + and event_date = cast('{{ start_date }}' as date) ) , first_event_counts as ( @@ -41,7 +41,7 @@ count(distinct user_id) as unique_users_{{ period_type }}_{{ period }} from event_stream where event_type = '{{ second_action }}' - and event_date > '{{ start_date }}' + and event_date > cast('{{ start_date }}' as date) and event_date < {{ dbt_utils.dateadd(datepart=period_type, interval=period, from_date_or_timestamp="cast('" ~ start_date ~ "' as date)") }} and user_id in ( select user_id from first_events From ae50f1d01980425a12fd8de6ecb23e21cbffc822 Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Mon, 18 Jul 2022 07:59:59 -0400 Subject: [PATCH 06/10] adds run_integration_tests script --- integration_tests/run_integration_tests.sh | 1 + 1 file changed, 1 insertion(+) create mode 100755 integration_tests/run_integration_tests.sh diff --git a/integration_tests/run_integration_tests.sh b/integration_tests/run_integration_tests.sh new file mode 100755 index 0000000..0ee3316 --- /dev/null +++ b/integration_tests/run_integration_tests.sh @@ -0,0 +1 @@ +dbt clean && dbt deps && dbt seed && dbt run && dbt test \ No newline at end of file From 8ba59a485c1a009b5ba21b861beaa2c8d744b3e3 Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Mon, 18 Jul 2022 08:45:08 -0400 Subject: [PATCH 07/10] retention works on trino --- macros/helpers/_dateadd.sql | 11 +++++++++++ macros/retention.sql | 2 +- packages.yml | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 macros/helpers/_dateadd.sql diff --git a/macros/helpers/_dateadd.sql b/macros/helpers/_dateadd.sql new file mode 100644 index 0000000..48d1d37 --- /dev/null +++ b/macros/helpers/_dateadd.sql @@ -0,0 +1,11 @@ +{% macro _dateadd(datepart, interval, from_date_or_timestamp) %} + {{ return(adapter.dispatch('_dateadd', 'dbt_product_analytics')(datepart, interval, from_date_or_timestamp)) }} +{% endmacro %} + +{% macro default___dateadd(datepart, interval, from_date_or_timestamp) %} + {{ return(adapter.dispatch('dateadd', 'dbt_utils')(datepart, interval, from_date_or_timestamp)) }} +{% endmacro %} + +{% macro trino___dateadd(datepart, interval, from_date_or_timestamp) %} + {{ from_date_or_timestamp }} + interval '{{ interval }}' {{ datepart }} +{% endmacro %} \ No newline at end of file diff --git a/macros/retention.sql b/macros/retention.sql index 75cd67a..60d40ac 100644 --- a/macros/retention.sql +++ b/macros/retention.sql @@ -42,7 +42,7 @@ from event_stream where event_type = '{{ second_action }}' and event_date > cast('{{ start_date }}' as date) - and event_date < {{ dbt_utils.dateadd(datepart=period_type, interval=period, from_date_or_timestamp="cast('" ~ start_date ~ "' as date)") }} + and event_date < {{ dbt_product_analytics._dateadd(datepart=period_type, interval=period, from_date_or_timestamp="cast('" ~ start_date ~ "' as date)") }} and user_id in ( select user_id from first_events ) diff --git a/packages.yml b/packages.yml index 15afc74..73dab13 100644 --- a/packages.yml +++ b/packages.yml @@ -1,3 +1,3 @@ packages: - package: dbt-labs/dbt_utils - version: [">=0.8.6"] + version: 0.8.6 From ea4b99413a270eaea0b7115305a5b1004664109d Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Mon, 18 Jul 2022 08:45:28 -0400 Subject: [PATCH 08/10] cast as date for trino --- macros/event_stream.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/event_stream.sql b/macros/event_stream.sql index cbd3444..3335b5d 100644 --- a/macros/event_stream.sql +++ b/macros/event_stream.sql @@ -10,9 +10,9 @@ from {{ from }} where 1 = 1 {% if start_date is not none %} - and {{ date_col }} >= '{{ start_date }}' + and {{ date_col }} >= cast('{{ start_date }}' as date) {% endif %} {% if end_date is not none %} - and {{ date_col }} < '{{ end_date }}' + and {{ date_col }} < cast('{{ end_date }}' as date) {% endif %} {% endmacro %} \ No newline at end of file From 405827cd025bd9ef0204f44e2ff74f76059fe0d5 Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Mon, 18 Jul 2022 08:57:47 -0400 Subject: [PATCH 09/10] updates readme --- README.md | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fa63a23..adf3b7a 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ _Currently supports event streams and funnel analysis. More features will be add Add the following to your `packages.yml`: ```yaml - - package: mjirv/dbt_product_analytics - version: [">=0.0.2"] +- package: mjirv/dbt_product_analytics + version: [">=0.0.2"] ``` ## Usage @@ -73,3 +73,37 @@ michael=# select * from dbt_product_analytics.funnel_orders ; completed | 2 | 0.13333333333333333333 | 0.13333333333333333333 returned | 1 | 0.06666666666666666667 | 0.50000000000000000000 ``` + +### retention() ([source](https://github.com/mjirv/dbt_product_analytics/blob/main/macros/retention.sql)) + +_Runs a retention analysis, i.e. tells you how many people who did `first_action` on `start_date` came back to do `second_action` in the date windows chosen_ + +#### Usage + +Example: + +```sql +{{ dbt_product_analytics.retention( + event_stream=ref('order_events'), + first_action='completed', + second_action='completed', + start_date='2018-01-17' +)}} +``` + +Output: + +```sql +michael=# select * from dbt_product_analytics.retention_orders ; + unique_users_day_0 | unique_users_day_1 | unique_users_day_7 | unique_users_day_14 | unique_users_day_30 | unique_users_day_60 | unique_users_day_120 +--------------------+--------------------+--------------------+---------------------+---------------------+---------------------+---------------------- + 2 | 0 | 0 | 0 | 0 | 0 | 1 +``` + +Advanced: + +Three other parameters are available: `periods`, `period_type`, and `dimensions`. + +- `period`: The period windows you want look at (defaults to `[1, 7, 14, 30, 60, 120])` +- `period_type`: The date type you want to use (defaults to `day`) +- `dimensions`: A list of columns from your event stream that you want to group by (defaults to `[]`) From 67ca0ae20ab3c4bbf4b9c78efd45bf350b18928b Mon Sep 17 00:00:00 2001 From: Michael Irvine Date: Mon, 18 Jul 2022 09:05:05 -0400 Subject: [PATCH 10/10] fixes readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index adf3b7a..8761d0c 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Add the following to your `packages.yml`: ## Usage -**dbt Product Analytics** provides two macros: `event_stream()` and `funnel()`. +**dbt Product Analytics** provides three macros: `event_stream()`, `funnel()`, and `retention()`. Use them in models and analyses like any other dbt macro.