Skip to content

Commit

Permalink
Merge pull request #812 from dbt-labs/ensure-consistent-date-part-ext…
Browse files Browse the repository at this point in the history
…ract-results

Ensure extract calls return consistent results across engines
  • Loading branch information
tlento authored Oct 25, 2023
2 parents 9cd2d09 + 5ca6a61 commit 97752a7
Show file tree
Hide file tree
Showing 472 changed files with 24,848 additions and 34,847 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231015-170649.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Ensure extract calls return consistent results across engines
time: 2023-10-15T17:06:49.646146-07:00
custom:
Author: tlento
Issue: "792"
24 changes: 22 additions & 2 deletions metricflow/sql/render/big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from metricflow.sql.sql_exprs import (
SqlCastToTimestampExpression,
SqlDateTruncExpression,
SqlExtractExpression,
SqlGenerateUuidExpression,
SqlPercentileExpression,
SqlPercentileFunctionType,
Expand Down Expand Up @@ -136,11 +137,30 @@ def render_date_part(self, date_part: DatePart) -> str:
return "dayofyear"
if date_part == DatePart.DOW:
return "dayofweek"
if date_part == DatePart.WEEK:
return "isoweek"

return super().render_date_part(date_part)

@override
def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderResult:
"""Renders extract expressions with required output conversions for BigQuery.
BigQuery does not have native support for the ISO standard day of week output of 1 (Monday) - 7 (Sunday).
Instead, BigQuery returns 1 (Sunday) - 7 (Monday). Therefore, we need custom rendering logic to normalize
the return values to the ISO standard.
"""
extract_rendering_result = super().visit_extract_expr(node)

if node.date_part is not DatePart.DOW:
return extract_rendering_result

extract_stmt = extract_rendering_result.sql
case_expr = f"IF({extract_stmt} = 1, 7, {extract_stmt} - 1)"

return SqlExpressionRenderResult(
sql=case_expr,
bind_parameters=extract_rendering_result.bind_parameters,
)

@override
def visit_time_delta_expr(self, node: SqlSubtractTimeIntervalExpression) -> SqlExpressionRenderResult:
"""Render time delta for BigQuery, which requires ISO prefixing for the WEEK granularity value."""
Expand Down
8 changes: 8 additions & 0 deletions metricflow/sql/render/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_exprs import SqlPercentileExpression, SqlPercentileFunctionType
from metricflow.time.date_part import DatePart


class DatabricksSqlExpressionRenderer(DefaultSqlExpressionRenderer):
Expand All @@ -23,6 +24,13 @@ class DatabricksSqlExpressionRenderer(DefaultSqlExpressionRenderer):
def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctionType]:
return {SqlPercentileFunctionType.CONTINUOUS, SqlPercentileFunctionType.APPROXIMATE_DISCRETE}

@override
def render_date_part(self, date_part: DatePart) -> str:
if date_part is DatePart.DOW:
return "DAYOFWEEK_ISO"

return super().render_date_part(date_part)

@override
def visit_percentile_expr(self, node: SqlPercentileExpression) -> SqlExpressionRenderResult:
"""Render a percentile expression for Databricks."""
Expand Down
8 changes: 7 additions & 1 deletion metricflow/sql/render/expr_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,13 @@ def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderR
)

def render_date_part(self, date_part: DatePart) -> str:
"""Render DATE PART for an EXTRACT expression."""
"""Render DATE PART for an EXTRACT expression.
For DatePart.DOW (day of week) we use the ISO date part to ensure all engines return consistent results.
"""
if date_part is DatePart.DOW:
return "isodow"

return date_part.value

def visit_time_delta_expr(self, node: SqlSubtractTimeIntervalExpression) -> SqlExpressionRenderResult: # noqa: D
Expand Down
33 changes: 32 additions & 1 deletion metricflow/sql/render/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@
)
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_exprs import SqlGenerateUuidExpression, SqlPercentileExpression, SqlPercentileFunctionType
from metricflow.sql.sql_exprs import (
SqlExtractExpression,
SqlGenerateUuidExpression,
SqlPercentileExpression,
SqlPercentileFunctionType,
)
from metricflow.time.date_part import DatePart


class RedshiftSqlExpressionRenderer(DefaultSqlExpressionRenderer):
Expand Down Expand Up @@ -59,6 +65,31 @@ def visit_percentile_expr(self, node: SqlPercentileExpression) -> SqlExpressionR
bind_parameters=params,
)

@override
def render_date_part(self, date_part: DatePart) -> str:
return date_part.value

@override
def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderResult:
"""Renders extract expressions with required output conversions for Redshift.
Redshift does not have native support for the ISO standard day of week output of 1 (Monday) - 7 (Sunday).
Instead, Redshift returns 0 (Sunday) - 6 (Monday). Therefore, we need custom rendering logic to normalize
the return values to the ISO standard.
"""
extract_rendering_result = super().visit_extract_expr(node)

if node.date_part is not DatePart.DOW:
return extract_rendering_result

extract_stmt = extract_rendering_result.sql
case_expr = f"CASE WHEN {extract_stmt} = 0 THEN {extract_stmt} + 7 ELSE {extract_stmt} END"

return SqlExpressionRenderResult(
sql=case_expr,
bind_parameters=extract_rendering_result.bind_parameters,
)

@override
def visit_generate_uuid_expr(self, node: SqlGenerateUuidExpression) -> SqlExpressionRenderResult:
"""Generates a "good enough" random key to simulate a UUID.
Expand Down
8 changes: 8 additions & 0 deletions metricflow/sql/render/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_exprs import SqlGenerateUuidExpression, SqlPercentileExpression, SqlPercentileFunctionType
from metricflow.time.date_part import DatePart


class SnowflakeSqlExpressionRenderer(DefaultSqlExpressionRenderer):
Expand All @@ -28,6 +29,13 @@ def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctio
SqlPercentileFunctionType.APPROXIMATE_CONTINUOUS,
}

@override
def render_date_part(self, date_part: DatePart) -> str:
if date_part is DatePart.DOW:
return "dayofweekiso"

return super().render_date_part(date_part)

@override
def visit_generate_uuid_expr(self, node: SqlGenerateUuidExpression) -> SqlExpressionRenderResult:
return SqlExpressionRenderResult(
Expand Down
3 changes: 0 additions & 3 deletions metricflow/test/integration/test_cases/itest_metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,6 @@ integration_test:
{"name": "metric_time", "date_part": "dow"},
{"name": "metric_time", "date_part": "doy"},
{"name": "metric_time", "date_part": "day"},
{"name": "metric_time", "date_part": "week"},
]
check_query: |
SELECT
Expand All @@ -1089,14 +1088,12 @@ integration_test:
, {{ render_extract("ds", DatePart.DOW) }} AS metric_time__extract_dow
, {{ render_extract("ds", DatePart.DOY) }} AS metric_time__extract_doy
, {{ render_extract("ds", DatePart.DAY) }} AS metric_time__extract_day
, {{ render_extract("ds", DatePart.WEEK) }} AS metric_time__extract_week
FROM {{ source_schema }}.fct_bookings
GROUP BY
{{ render_extract("ds", DatePart.QUARTER) }}
, {{ render_extract("ds", DatePart.DOW) }}
, {{ render_extract("ds", DatePart.DOY) }}
, {{ render_extract("ds", DatePart.DAY) }}
, {{ render_extract("ds", DatePart.WEEK) }};
---
integration_test:
name: derived_metric_offset_window_and_date_part
Expand Down
4 changes: 2 additions & 2 deletions metricflow/test/model/test_data_warehouse_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def test_build_dimension_tasks( # noqa: D
)
# on semantic model query with all dimensions
assert len(tasks) == 1
# 1 categorical dimension task, 1 time dimension task, 4 granularity based time dimension tasks, 7 date_part tasks
assert len(tasks[0].on_fail_subtasks) == 13
# 1 categorical dimension task, 1 time dimension task, 4 granularity based time dimension tasks, 6 date_part tasks
assert len(tasks[0].on_fail_subtasks) == 12


def test_validate_dimensions( # noqa: D
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1903,7 +1903,6 @@ def test_simple_query_with_multiple_date_parts( # noqa: D
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.DAY),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.DOW),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.DOY),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.WEEK),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.MONTH),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.QUARTER),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.YEAR),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS ds__extract_month
, EXTRACT(isoweek FROM revenue_src_10006.created_at) AS ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS ds__extract_day
, EXTRACT(dayofweek FROM revenue_src_10006.created_at) AS ds__extract_dow
, IF(EXTRACT(dayofweek FROM revenue_src_10006.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_10006.created_at) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_10006.created_at) AS ds__extract_doy
, DATE_TRUNC(revenue_src_10006.created_at, day) AS company__ds__day
, DATE_TRUNC(revenue_src_10006.created_at, isoweek) AS company__ds__week
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS company__ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS company__ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS company__ds__extract_month
, EXTRACT(isoweek FROM revenue_src_10006.created_at) AS company__ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS company__ds__extract_day
, EXTRACT(dayofweek FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, IF(EXTRACT(dayofweek FROM revenue_src_10006.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_10006.created_at) - 1) AS company__ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_10006.created_at) AS company__ds__extract_doy
, revenue_src_10006.user_id AS user
, revenue_src_10006.user_id AS company__user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds) AS ds__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS ds__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds) AS ds__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds) AS ds__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) AS ds__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds) AS ds__extract_doy
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, day) AS ds_partitioned__day
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, isoweek) AS ds_partitioned__week
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) - 1) AS ds_partitioned__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type
, DATE_TRUNC(id_verifications_src_10003.ds, day) AS verification__ds__day
Expand All @@ -34,9 +32,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds) AS verification__ds__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS verification__ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS verification__ds__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds) AS verification__ds__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds) AS verification__ds__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) AS verification__ds__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) - 1) AS verification__ds__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds) AS verification__ds__extract_doy
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, day) AS verification__ds_partitioned__day
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, isoweek) AS verification__ds_partitioned__week
Expand All @@ -46,9 +43,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) - 1) AS verification__ds_partitioned__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type AS verification__verification_type
, id_verifications_src_10003.verification_id AS verification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ SELECT
, EXTRACT(year FROM users_latest_src_10008.ds) AS ds_latest__extract_year
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS ds_latest__extract_month
, EXTRACT(isoweek FROM users_latest_src_10008.ds) AS ds_latest__extract_week
, EXTRACT(day FROM users_latest_src_10008.ds) AS ds_latest__extract_day
, EXTRACT(dayofweek FROM users_latest_src_10008.ds) AS ds_latest__extract_dow
, IF(EXTRACT(dayofweek FROM users_latest_src_10008.ds) = 1, 7, EXTRACT(dayofweek FROM users_latest_src_10008.ds) - 1) AS ds_latest__extract_dow
, EXTRACT(dayofyear FROM users_latest_src_10008.ds) AS ds_latest__extract_doy
, users_latest_src_10008.home_state_latest
, DATE_TRUNC(users_latest_src_10008.ds, day) AS user__ds_latest__day
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM users_latest_src_10008.ds) AS user__ds_latest__extract_year
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS user__ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS user__ds_latest__extract_month
, EXTRACT(isoweek FROM users_latest_src_10008.ds) AS user__ds_latest__extract_week
, EXTRACT(day FROM users_latest_src_10008.ds) AS user__ds_latest__extract_day
, EXTRACT(dayofweek FROM users_latest_src_10008.ds) AS user__ds_latest__extract_dow
, IF(EXTRACT(dayofweek FROM users_latest_src_10008.ds) = 1, 7, EXTRACT(dayofweek FROM users_latest_src_10008.ds) - 1) AS user__ds_latest__extract_dow
, EXTRACT(dayofyear FROM users_latest_src_10008.ds) AS user__ds_latest__extract_doy
, users_latest_src_10008.home_state_latest AS user__home_state_latest
, users_latest_src_10008.user_id AS user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS ds__extract_month
, EXTRACT(week FROM revenue_src_10006.created_at) AS ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS ds__extract_day
, EXTRACT(dow FROM revenue_src_10006.created_at) AS ds__extract_dow
, EXTRACT(DAYOFWEEK_ISO FROM revenue_src_10006.created_at) AS ds__extract_dow
, EXTRACT(doy FROM revenue_src_10006.created_at) AS ds__extract_doy
, DATE_TRUNC('day', revenue_src_10006.created_at) AS company__ds__day
, DATE_TRUNC('week', revenue_src_10006.created_at) AS company__ds__week
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS company__ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS company__ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS company__ds__extract_month
, EXTRACT(week FROM revenue_src_10006.created_at) AS company__ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS company__ds__extract_day
, EXTRACT(dow FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, EXTRACT(DAYOFWEEK_ISO FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, EXTRACT(doy FROM revenue_src_10006.created_at) AS company__ds__extract_doy
, revenue_src_10006.user_id AS user
, revenue_src_10006.user_id AS company__user
Expand Down
Loading

0 comments on commit 97752a7

Please sign in to comment.