Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure extract calls return consistent results across engines #812

Merged
merged 4 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231015-170649.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Ensure extract calls return consistent results across engines
time: 2023-10-15T17:06:49.646146-07:00
custom:
Author: tlento
Issue: "792"
24 changes: 22 additions & 2 deletions metricflow/sql/render/big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from metricflow.sql.sql_exprs import (
SqlCastToTimestampExpression,
SqlDateTruncExpression,
SqlExtractExpression,
SqlGenerateUuidExpression,
SqlPercentileExpression,
SqlPercentileFunctionType,
Expand Down Expand Up @@ -136,11 +137,30 @@ def render_date_part(self, date_part: DatePart) -> str:
return "dayofyear"
if date_part == DatePart.DOW:
return "dayofweek"
if date_part == DatePart.WEEK:
return "isoweek"

return super().render_date_part(date_part)

@override
def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderResult:
"""Renders extract expressions with required output conversions for BigQuery.
BigQuery does not have native support for the ISO standard day of week output of 1 (Monday) - 7 (Sunday).
Instead, BigQuery returns 1 (Sunday) - 7 (Monday). Therefore, we need custom rendering logic to normalize
the return values to the ISO standard.
"""
extract_rendering_result = super().visit_extract_expr(node)

if node.date_part is not DatePart.DOW:
return extract_rendering_result

extract_stmt = extract_rendering_result.sql
case_expr = f"IF({extract_stmt} = 1, 7, {extract_stmt} - 1)"

return SqlExpressionRenderResult(
sql=case_expr,
bind_parameters=extract_rendering_result.bind_parameters,
)

@override
def visit_time_delta_expr(self, node: SqlSubtractTimeIntervalExpression) -> SqlExpressionRenderResult:
"""Render time delta for BigQuery, which requires ISO prefixing for the WEEK granularity value."""
Expand Down
8 changes: 8 additions & 0 deletions metricflow/sql/render/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_exprs import SqlPercentileExpression, SqlPercentileFunctionType
from metricflow.time.date_part import DatePart


class DatabricksSqlExpressionRenderer(DefaultSqlExpressionRenderer):
Expand All @@ -23,6 +24,13 @@ class DatabricksSqlExpressionRenderer(DefaultSqlExpressionRenderer):
def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctionType]:
return {SqlPercentileFunctionType.CONTINUOUS, SqlPercentileFunctionType.APPROXIMATE_DISCRETE}

@override
def render_date_part(self, date_part: DatePart) -> str:
if date_part is DatePart.DOW:
return "DAYOFWEEK_ISO"

return super().render_date_part(date_part)

@override
def visit_percentile_expr(self, node: SqlPercentileExpression) -> SqlExpressionRenderResult:
"""Render a percentile expression for Databricks."""
Expand Down
8 changes: 7 additions & 1 deletion metricflow/sql/render/expr_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,13 @@ def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderR
)

def render_date_part(self, date_part: DatePart) -> str:
"""Render DATE PART for an EXTRACT expression."""
"""Render DATE PART for an EXTRACT expression.
For DatePart.DOW (day of week) we use the ISO date part to ensure all engines return consistent results.
"""
if date_part is DatePart.DOW:
return "isodow"

return date_part.value

def visit_time_delta_expr(self, node: SqlSubtractTimeIntervalExpression) -> SqlExpressionRenderResult: # noqa: D
Expand Down
33 changes: 32 additions & 1 deletion metricflow/sql/render/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@
)
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_exprs import SqlGenerateUuidExpression, SqlPercentileExpression, SqlPercentileFunctionType
from metricflow.sql.sql_exprs import (
SqlExtractExpression,
SqlGenerateUuidExpression,
SqlPercentileExpression,
SqlPercentileFunctionType,
)
from metricflow.time.date_part import DatePart


class RedshiftSqlExpressionRenderer(DefaultSqlExpressionRenderer):
Expand Down Expand Up @@ -59,6 +65,31 @@ def visit_percentile_expr(self, node: SqlPercentileExpression) -> SqlExpressionR
bind_parameters=params,
)

@override
def render_date_part(self, date_part: DatePart) -> str:
return date_part.value

@override
def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderResult:
"""Renders extract expressions with required output conversions for Redshift.
Redshift does not have native support for the ISO standard day of week output of 1 (Monday) - 7 (Sunday).
Instead, Redshift returns 0 (Sunday) - 6 (Monday). Therefore, we need custom rendering logic to normalize
the return values to the ISO standard.
"""
extract_rendering_result = super().visit_extract_expr(node)

if node.date_part is not DatePart.DOW:
return extract_rendering_result

extract_stmt = extract_rendering_result.sql
case_expr = f"CASE WHEN {extract_stmt} = 0 THEN {extract_stmt} + 7 ELSE {extract_stmt} END"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason you use IF() for some engines but CASE WHEN for others?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, would it be better to define SqlExpressionNodes for IF() and CASE WHEN statements?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used IF because it's shorter, but it's not ANSI standard so not all engines support it. That's probably a mistake, maybe I'll update it before merging.

As for the SqlExpressionNodes - yes, long term I think that's what we should do. We can have a SqlCaseExpressionNode that does nicely formatted case statements, that seems broadly useful. I didn't bother here because I couldn't figure out how to cleanly transform the extract node without doing it outside of the rendering layer. There are a bunch of options to think through.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, after getting annoyed by the visitor pattern yet again (I think I really went over the edge with #819 ) I've come to the conclusion that using a CASE expression node has to happen inside the renderer.

I'm going to just merge this as-is and then I'll follow up with a CASE statement node and see if I can hijack the rendering accordingly. If I can't make it nicely formatted I'll just stick with what's here until we push an update to reformat our SQL to dbt-like standards - given the bad formatting we have already the shorter IF seems fine for BigQuery even if it's not consistent with the CASE/WHEN we need to use for other engines.


return SqlExpressionRenderResult(
sql=case_expr,
bind_parameters=extract_rendering_result.bind_parameters,
)

@override
def visit_generate_uuid_expr(self, node: SqlGenerateUuidExpression) -> SqlExpressionRenderResult:
"""Generates a "good enough" random key to simulate a UUID.
Expand Down
8 changes: 8 additions & 0 deletions metricflow/sql/render/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_exprs import SqlGenerateUuidExpression, SqlPercentileExpression, SqlPercentileFunctionType
from metricflow.time.date_part import DatePart


class SnowflakeSqlExpressionRenderer(DefaultSqlExpressionRenderer):
Expand All @@ -28,6 +29,13 @@ def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctio
SqlPercentileFunctionType.APPROXIMATE_CONTINUOUS,
}

@override
def render_date_part(self, date_part: DatePart) -> str:
if date_part is DatePart.DOW:
return "dayofweekiso"

return super().render_date_part(date_part)

@override
def visit_generate_uuid_expr(self, node: SqlGenerateUuidExpression) -> SqlExpressionRenderResult:
return SqlExpressionRenderResult(
Expand Down
3 changes: 0 additions & 3 deletions metricflow/test/integration/test_cases/itest_metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1067,7 +1067,6 @@ integration_test:
{"name": "metric_time", "date_part": "dow"},
{"name": "metric_time", "date_part": "doy"},
{"name": "metric_time", "date_part": "day"},
{"name": "metric_time", "date_part": "week"},
]
check_query: |
SELECT
Expand All @@ -1076,14 +1075,12 @@ integration_test:
, {{ render_extract("ds", DatePart.DOW) }} AS metric_time__extract_dow
, {{ render_extract("ds", DatePart.DOY) }} AS metric_time__extract_doy
, {{ render_extract("ds", DatePart.DAY) }} AS metric_time__extract_day
, {{ render_extract("ds", DatePart.WEEK) }} AS metric_time__extract_week
FROM {{ source_schema }}.fct_bookings
GROUP BY
{{ render_extract("ds", DatePart.QUARTER) }}
, {{ render_extract("ds", DatePart.DOW) }}
, {{ render_extract("ds", DatePart.DOY) }}
, {{ render_extract("ds", DatePart.DAY) }}
, {{ render_extract("ds", DatePart.WEEK) }};
---
integration_test:
name: derived_metric_offset_window_and_date_part
Expand Down
4 changes: 2 additions & 2 deletions metricflow/test/model/test_data_warehouse_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def test_build_dimension_tasks( # noqa: D
)
# on semantic model query with all dimensions
assert len(tasks) == 1
# 1 categorical dimension task, 1 time dimension task, 4 granularity based time dimension tasks, 7 date_part tasks
assert len(tasks[0].on_fail_subtasks) == 13
# 1 categorical dimension task, 1 time dimension task, 4 granularity based time dimension tasks, 6 date_part tasks
assert len(tasks[0].on_fail_subtasks) == 12


def test_validate_dimensions( # noqa: D
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1903,7 +1903,6 @@ def test_simple_query_with_multiple_date_parts( # noqa: D
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.DAY),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.DOW),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.DOY),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.WEEK),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.MONTH),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.QUARTER),
DataSet.metric_time_dimension_spec(time_granularity=TimeGranularity.DAY, date_part=DatePart.YEAR),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS ds__extract_month
, EXTRACT(isoweek FROM revenue_src_10006.created_at) AS ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS ds__extract_day
, EXTRACT(dayofweek FROM revenue_src_10006.created_at) AS ds__extract_dow
, IF(EXTRACT(dayofweek FROM revenue_src_10006.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_10006.created_at) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_10006.created_at) AS ds__extract_doy
, DATE_TRUNC(revenue_src_10006.created_at, day) AS company__ds__day
, DATE_TRUNC(revenue_src_10006.created_at, isoweek) AS company__ds__week
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS company__ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS company__ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS company__ds__extract_month
, EXTRACT(isoweek FROM revenue_src_10006.created_at) AS company__ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS company__ds__extract_day
, EXTRACT(dayofweek FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, IF(EXTRACT(dayofweek FROM revenue_src_10006.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_10006.created_at) - 1) AS company__ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_10006.created_at) AS company__ds__extract_doy
, revenue_src_10006.user_id AS user
, revenue_src_10006.user_id AS company__user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds) AS ds__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS ds__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds) AS ds__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds) AS ds__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) AS ds__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds) AS ds__extract_doy
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, day) AS ds_partitioned__day
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, isoweek) AS ds_partitioned__week
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) - 1) AS ds_partitioned__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type
, DATE_TRUNC(id_verifications_src_10003.ds, day) AS verification__ds__day
Expand All @@ -34,9 +32,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds) AS verification__ds__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS verification__ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS verification__ds__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds) AS verification__ds__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds) AS verification__ds__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) AS verification__ds__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) - 1) AS verification__ds__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds) AS verification__ds__extract_doy
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, day) AS verification__ds_partitioned__day
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, isoweek) AS verification__ds_partitioned__week
Expand All @@ -46,9 +43,8 @@ SELECT
, EXTRACT(year FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_year
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_month
, EXTRACT(isoweek FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_week
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) - 1) AS verification__ds_partitioned__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type AS verification__verification_type
, id_verifications_src_10003.verification_id AS verification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ SELECT
, EXTRACT(year FROM users_latest_src_10008.ds) AS ds_latest__extract_year
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS ds_latest__extract_month
, EXTRACT(isoweek FROM users_latest_src_10008.ds) AS ds_latest__extract_week
, EXTRACT(day FROM users_latest_src_10008.ds) AS ds_latest__extract_day
, EXTRACT(dayofweek FROM users_latest_src_10008.ds) AS ds_latest__extract_dow
, IF(EXTRACT(dayofweek FROM users_latest_src_10008.ds) = 1, 7, EXTRACT(dayofweek FROM users_latest_src_10008.ds) - 1) AS ds_latest__extract_dow
, EXTRACT(dayofyear FROM users_latest_src_10008.ds) AS ds_latest__extract_doy
, users_latest_src_10008.home_state_latest
, DATE_TRUNC(users_latest_src_10008.ds, day) AS user__ds_latest__day
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM users_latest_src_10008.ds) AS user__ds_latest__extract_year
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS user__ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS user__ds_latest__extract_month
, EXTRACT(isoweek FROM users_latest_src_10008.ds) AS user__ds_latest__extract_week
, EXTRACT(day FROM users_latest_src_10008.ds) AS user__ds_latest__extract_day
, EXTRACT(dayofweek FROM users_latest_src_10008.ds) AS user__ds_latest__extract_dow
, IF(EXTRACT(dayofweek FROM users_latest_src_10008.ds) = 1, 7, EXTRACT(dayofweek FROM users_latest_src_10008.ds) - 1) AS user__ds_latest__extract_dow
, EXTRACT(dayofyear FROM users_latest_src_10008.ds) AS user__ds_latest__extract_doy
, users_latest_src_10008.home_state_latest AS user__home_state_latest
, users_latest_src_10008.user_id AS user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS ds__extract_month
, EXTRACT(week FROM revenue_src_10006.created_at) AS ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS ds__extract_day
, EXTRACT(dow FROM revenue_src_10006.created_at) AS ds__extract_dow
, EXTRACT(DAYOFWEEK_ISO FROM revenue_src_10006.created_at) AS ds__extract_dow
, EXTRACT(doy FROM revenue_src_10006.created_at) AS ds__extract_doy
, DATE_TRUNC('day', revenue_src_10006.created_at) AS company__ds__day
, DATE_TRUNC('week', revenue_src_10006.created_at) AS company__ds__week
Expand All @@ -21,9 +20,8 @@ SELECT
, EXTRACT(year FROM revenue_src_10006.created_at) AS company__ds__extract_year
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS company__ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS company__ds__extract_month
, EXTRACT(week FROM revenue_src_10006.created_at) AS company__ds__extract_week
, EXTRACT(day FROM revenue_src_10006.created_at) AS company__ds__extract_day
, EXTRACT(dow FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, EXTRACT(DAYOFWEEK_ISO FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, EXTRACT(doy FROM revenue_src_10006.created_at) AS company__ds__extract_doy
, revenue_src_10006.user_id AS user
, revenue_src_10006.user_id AS company__user
Expand Down
Loading