Skip to content

Commit

Permalink
Merge pull request #3 from mjirv/feat-add_retention
Browse files Browse the repository at this point in the history
Feat: adds retention analysis
  • Loading branch information
mjirv authored Jul 18, 2022
2 parents 5ebfe27 + 67ca0ae commit 5514ba7
Show file tree
Hide file tree
Showing 12 changed files with 145 additions and 19 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

target/
dbt_modules/
dbt_packages/
logs/
**/.DS_Store
40 changes: 37 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ _Currently supports event streams and funnel analysis. More features will be add
Add the following to your `packages.yml`:

```yaml
- package: mjirv/dbt_product_analytics
version: [">=0.0.2"]
- package: mjirv/dbt_product_analytics
version: [">=0.0.2"]
```
## Usage
**dbt Product Analytics** provides two macros: `event_stream()` and `funnel()`.
**dbt Product Analytics** provides three macros: `event_stream()`, `funnel()`, and `retention()`.

Use them in models and analyses like any other dbt macro.

Expand Down Expand Up @@ -73,3 +73,37 @@ michael=# select * from dbt_product_analytics.funnel_orders ;
completed | 2 | 0.13333333333333333333 | 0.13333333333333333333
returned | 1 | 0.06666666666666666667 | 0.50000000000000000000
```

### retention() ([source](https://github.com/mjirv/dbt_product_analytics/blob/main/macros/retention.sql))

_Runs a retention analysis, i.e. tells you how many people who did `first_action` on `start_date` came back to do `second_action` in the date windows chosen_

#### Usage

Example:

```sql
{{ dbt_product_analytics.retention(
event_stream=ref('order_events'),
first_action='completed',
second_action='completed',
start_date='2018-01-17'
)}}
```

Output:

```sql
michael=# select * from dbt_product_analytics.retention_orders ;
unique_users_day_0 | unique_users_day_1 | unique_users_day_7 | unique_users_day_14 | unique_users_day_30 | unique_users_day_60 | unique_users_day_120
--------------------+--------------------+--------------------+---------------------+---------------------+---------------------+----------------------
2 | 0 | 0 | 0 | 0 | 0 | 1
```

Advanced:

Three other parameters are available: `periods`, `period_type`, and `dimensions`.

- `period`: The period windows you want look at (defaults to `[1, 7, 14, 30, 60, 120])`
- `period_type`: The date type you want to use (defaults to `day`)
- `dimensions`: A list of columns from your event stream that you want to group by (defaults to `[]`)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{{ dbt_product_analytics.retention(
event_stream=ref('order_events'),
first_action='completed',
second_action='completed',
start_date='2018-01-17'
)}}
6 changes: 6 additions & 0 deletions integration_tests/models/product_analytics/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ models:
input_mapping:
ref('orders'): ref('raw_orders_simple')
expected_output: ref('dmt_expected__funnel_simple')
- name: retention_orders
tests:
- dbt_datamocktool.unit_test:
input_mapping:
ref('order_events'): ref('order_events')
expected_output: ref('dmt_expected__retention')
1 change: 1 addition & 0 deletions integration_tests/run_integration_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dbt clean && dbt deps && dbt seed && dbt run && dbt test
2 changes: 2 additions & 0 deletions integration_tests/seeds/dmt_expected__retention.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
unique_users_day_0,unique_users_day_1,unique_users_day_7,unique_users_day_14,unique_users_day_30,unique_users_day_60,unique_users_day_120
2,0,0,0,0,0,1
16 changes: 2 additions & 14 deletions macros/event_stream.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,9 @@
from {{ from }}
where 1 = 1
{% if start_date is not none %}
and {{ date_col }} >= '{{ start_date }}'
and {{ date_col }} >= cast('{{ start_date }}' as date)
{% endif %}
{% if end_date is not none %}
and {{ date_col }} < '{{ end_date }}'
and {{ date_col }} < cast('{{ end_date }}' as date)
{% endif %}
{% endmacro %}

{% macro trino__event_stream(from, event_type_col, user_id_col, date_col, start_date, end_date) %}
select {{ event_type_col }} as event_type, {{ user_id_col }} as user_id, {{ date_col }} as event_date
from {{ from }}
where 1 = 1
{% if start_date is not none %}
and {{ date_col }} >= date '{{ start_date }}'
{% endif %}
{% if end_date is not none %}
and {{ date_col }} < date '{{ end_date }}'
{% endif %}
{% endmacro %}
4 changes: 2 additions & 2 deletions macros/funnel.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{% endmacro %}

{% macro default__funnel(steps, event_stream) %}
with event_stream as ( {% if not (event_stream|string|trim).startswith('select ') %} select * from {% endif %} {{ event_stream }} )
with event_stream as {{ dbt_product_analytics._select_event_stream(event_stream) }}
{% for step in steps %}
, event_stream_step_{{ loop.index }} as (
select event_stream.*
Expand Down Expand Up @@ -45,7 +45,7 @@
{% endmacro %}

{% macro snowflake__funnel(steps, event_stream) %}
with event_stream as ( {% if not (event_stream|string|trim).startswith('select ') %} select * from {% endif %} {{ event_stream }} )
with event_stream as {{ dbt_product_analytics._select_event_stream(event_stream) }}

, steps as (
{% for step in steps %}
Expand Down
11 changes: 11 additions & 0 deletions macros/helpers/_dateadd.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{% macro _dateadd(datepart, interval, from_date_or_timestamp) %}
{{ return(adapter.dispatch('_dateadd', 'dbt_product_analytics')(datepart, interval, from_date_or_timestamp)) }}
{% endmacro %}

{% macro default___dateadd(datepart, interval, from_date_or_timestamp) %}
{{ return(adapter.dispatch('dateadd', 'dbt_utils')(datepart, interval, from_date_or_timestamp)) }}
{% endmacro %}

{% macro trino___dateadd(datepart, interval, from_date_or_timestamp) %}
{{ from_date_or_timestamp }} + interval '{{ interval }}' {{ datepart }}
{% endmacro %}
3 changes: 3 additions & 0 deletions macros/helpers/_select_event_stream.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% macro _select_event_stream(event_stream) -%}
( {% if not (event_stream|string|trim).startswith('select ') %} select * from {% endif %} {{ event_stream }} )
{%- endmacro %}
71 changes: 71 additions & 0 deletions macros/retention.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{% macro retention(event_stream=None, first_action=None, second_action=None, start_date=None, periods=[1,7,14,30,60,120], period_type='day', dimensions=[]) %}
{% if event_stream is none %}
{{ exceptions.raise_compiler_error('parameter \'event_stream\' must be provided')}}
{% endif %}

{% if first_action is none %}
{{ exceptions.raise_compiler_error('parameter \'first_action\' must be provided')}}
{% endif %}

{% if second_action is none %}
{{ exceptions.raise_compiler_error('parameter \'second_action\' must be provided')}}
{% endif %}

{% if start_date is none %}
{{ exceptions.raise_compiler_error('parameter \'start_date\' must be provided')}}
{% endif %}

with event_stream as {{ dbt_product_analytics._select_event_stream(event_stream) }}

, first_events as (
select distinct user_id
{% for dimension in dimensions %}, {{ dimension }} {% endfor %}
from event_stream
where event_type = '{{ first_action }}'
and event_date = cast('{{ start_date }}' as date)
)

, first_event_counts as (
select
{% for dimension in dimensions %} {{ dimension }}, {% endfor %}
count(*) as unique_users_{{ period_type }}_0
from first_events
{% for dimension in dimensions -%}
{% if loop.first %} group by {% endif %} {{ loop.index }}
{%- endfor %}
)

{% for period in periods %}
, secondary_events_{{ period }} as (
select {% for dimension in dimensions %} {{ dimension }}, {% endfor %}
count(distinct user_id) as unique_users_{{ period_type }}_{{ period }}
from event_stream
where event_type = '{{ second_action }}'
and event_date > cast('{{ start_date }}' as date)
and event_date < {{ dbt_product_analytics._dateadd(datepart=period_type, interval=period, from_date_or_timestamp="cast('" ~ start_date ~ "' as date)") }}
and user_id in (
select user_id from first_events
)
{% for dimension in dimensions -%}
{% if loop.first %} group by {% endif %} {{ loop.index }}
{%- endfor %}
)
{% endfor %}

, final as (
select
{% for dimension in dimensions %} {{ dimension }}, {% endfor %}
unique_users_{{ period_type }}_0,
{% for period in periods %} unique_users_{{ period_type }}_{{ period }} {% if not loop.last %}, {% endif %} {% endfor %}
from first_event_counts
{% for period in periods %}
left join secondary_events_{{ period }}
on 1 = 1
{% for dimension in dimensions %}
and first_event_counts.{{ dimension }} = secondary_events_{{ period }}.{{ dimension }}
{% endfor %}
{% endfor %}
)

select * from final
{% endmacro %}
3 changes: 3 additions & 0 deletions packages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
packages:
- package: dbt-labs/dbt_utils
version: 0.8.6

0 comments on commit 5514ba7

Please sign in to comment.