Skip to content

Commit

Permalink
Found a better workaround for missing count distinct window
Browse files Browse the repository at this point in the history
  • Loading branch information
joellabes committed May 27, 2024
1 parent 0c192a9 commit 317e4d7
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions macros/utils/_count_num_rows_in_status.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,18 @@
{% endmacro %}

{%- macro postgres___count_num_rows_in_status() -%}
{#- Postgres/Redshift doesn't support count(distinct) inside of window functions :( -#}
{#- modified rows are the only ones that return two rows per PK/row num pairing, so just need to be halved -#}
(count(*) over (partition by dbt_audit_row_status)) / case when dbt_audit_row_status = 'modified' then 2 else 1 end
_count_num_rows_in_status_without_distinct_window_func()
{% endmacro %}

{%- macro databricks___count_num_rows_in_status() -%}
_count_num_rows_in_status_without_distinct_window_func()
{% endmacro %}

{% macro _count_num_rows_in_status_without_distinct_window_func() %}
{#- Some platforms don't support count(distinct) inside of window functions -#}
{#- You can get the same outcome by dense_rank, assuming no nulls (we've already handled that) #}
{# https://stackoverflow.com/a/22347502 -#}
dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key, dbt_audit_pk_row_num)
+ dense_rank() over (partition by dbt_audit_row_status order by dbt_audit_surrogate_key desc, dbt_audit_pk_row_num desc)
- 1
{% endmacro %}

0 comments on commit 317e4d7

Please sign in to comment.