Skip to content

Commit

Permalink
Engine-specific rendering for sub-daily granularity options (#1258)
Browse files Browse the repository at this point in the history
Support for rendering `DATE_TRUNC` statements with sub-daily granularity
options.
Includes validation for engines that don't support certain
granularities.
Note about validation: warehouse validations in YAML would be ideal, but
we don't have those currently. Query parsing validations would also be
ideal, but the query parse has no awareness of the engine being queried.
This seemed like the next best option, and it's such an rare edge case
that I'm not too concerned about optimizing this error.
  • Loading branch information
courtneyholcomb authored Jun 11, 2024
1 parent b4971a0 commit 06a1467
Show file tree
Hide file tree
Showing 243 changed files with 5,690 additions and 5,618 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20240607-161232.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Support sub-daily granularity options in SQL rendering for all supported SQL
engines.
time: 2024-06-07T16:12:32.270538-07:00
custom:
Author: courtneyholcomb
Issue: "1258"
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
description: Derived offset metric with 2 different agg_time_dimensions
type: derived
type_params:
expr: revenue - revenue_daily
expr: revenue - revenue_last_7_days
metrics:
- name: revenue
- name: revenue_daily
Expand Down
30 changes: 29 additions & 1 deletion metricflow/protocols/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from abc import abstractmethod
from enum import Enum
from typing import Protocol
from typing import Protocol, Set

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters

from metricflow.data_table.mf_table import MetricFlowDataTable
Expand All @@ -24,6 +26,32 @@ class SqlEngine(Enum):
DATABRICKS = "Databricks"
TRINO = "Trino"

@property
def unsupported_granularities(self) -> Set[TimeGranularity]:
"""Granularities that can't be used with this SqlEngine.
We allow the smallest granularity the SqlEngine supports for its base TIMESTAMP type and all our required
operations (e.g., DATE_TRUNC). For example, when we added support for these granularities
Trino supported more precise types for storage and access, but Trino's base TIMESTAMP type and
DATE_TRUNC function only supported millisecond precision.
"""
if self is SqlEngine.SNOWFLAKE:
return set()
elif self is SqlEngine.BIGQUERY:
return {TimeGranularity.NANOSECOND}
elif self is SqlEngine.DATABRICKS:
return {TimeGranularity.NANOSECOND}
elif self is SqlEngine.DUCKDB:
return {TimeGranularity.NANOSECOND}
elif self is SqlEngine.POSTGRES:
return {TimeGranularity.NANOSECOND}
elif self is SqlEngine.REDSHIFT:
return {TimeGranularity.NANOSECOND}
elif self is SqlEngine.TRINO:
return {TimeGranularity.NANOSECOND, TimeGranularity.MICROSECOND}
else:
assert_values_exhausted(self)


class SqlClient(Protocol):
"""Base interface for SqlClient instances used inside MetricFlow.
Expand Down
7 changes: 6 additions & 1 deletion metricflow/sql/render/big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters
from typing_extensions import override

from metricflow.protocols.sql_client import SqlEngine
from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
SqlExpressionRenderer,
Expand All @@ -31,6 +32,8 @@
class BigQuerySqlExpressionRenderer(DefaultSqlExpressionRenderer):
"""Expression renderer for the BigQuery engine."""

sql_engine = SqlEngine.BIGQUERY

@property
@override
def double_data_type(self) -> str:
Expand Down Expand Up @@ -120,14 +123,16 @@ def visit_cast_to_timestamp_expr(self, node: SqlCastToTimestampExpression) -> Sq
@override
def visit_date_trunc_expr(self, node: SqlDateTruncExpression) -> SqlExpressionRenderResult:
"""Render DATE_TRUNC for BigQuery, which takes the opposite argument order from Snowflake and Redshift."""
self._validate_granularity_for_engine(node.time_granularity)

arg_rendered = self.render_sql_expr(node.arg)

prefix = ""
if node.time_granularity == TimeGranularity.WEEK:
prefix = "iso"

return SqlExpressionRenderResult(
sql=f"DATE_TRUNC({arg_rendered.sql}, {prefix}{node.time_granularity.value})",
sql=f"DATETIME_TRUNC({arg_rendered.sql}, {prefix}{node.time_granularity.value})",
bind_parameters=arg_rendered.bind_parameters,
)

Expand Down
3 changes: 3 additions & 0 deletions metricflow/sql/render/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from metricflow_semantics.errors.error_classes import UnsupportedEngineFeatureError
from typing_extensions import override

from metricflow.protocols.sql_client import SqlEngine
from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
SqlExpressionRenderer,
Expand All @@ -19,6 +20,8 @@
class DatabricksSqlExpressionRenderer(DefaultSqlExpressionRenderer):
"""Expression renderer for the Databricks engine."""

sql_engine = SqlEngine.DATABRICKS

@property
@override
def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctionType]:
Expand Down
3 changes: 3 additions & 0 deletions metricflow/sql/render/duckdb_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters
from typing_extensions import override

from metricflow.protocols.sql_client import SqlEngine
from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
SqlExpressionRenderer,
Expand All @@ -24,6 +25,8 @@
class DuckDbSqlExpressionRenderer(DefaultSqlExpressionRenderer):
"""Expression renderer for the DuckDB engine."""

sql_engine = SqlEngine.DUCKDB

@property
@override
def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctionType]:
Expand Down
16 changes: 15 additions & 1 deletion metricflow/sql/render/expr_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from abc import ABC, abstractmethod
from collections import namedtuple
from dataclasses import dataclass
from typing import Collection, List
from typing import TYPE_CHECKING, Collection, List, Optional

import jinja2
from dbt_semantic_interfaces.type_enums.date_part import DatePart
Expand Down Expand Up @@ -41,6 +41,10 @@
)
from metricflow.sql.sql_plan import SqlSelectColumn

if TYPE_CHECKING:
from metricflow.protocols.sql_client import SqlEngine


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -93,6 +97,10 @@ def can_render_percentile_function(self, percentile_type: SqlPercentileFunctionT
class DefaultSqlExpressionRenderer(SqlExpressionRenderer):
"""Renders the SQL query plan assuming ANSI SQL."""

@property
def sql_engine(self) -> Optional[SqlEngine]: # noqa: D102
return None

@property
@override
def double_data_type(self) -> str:
Expand Down Expand Up @@ -263,7 +271,13 @@ def visit_cast_to_timestamp_expr( # noqa: D102
bind_parameters=arg_rendered.bind_parameters,
)

def _validate_granularity_for_engine(self, time_granularity: TimeGranularity) -> None:
if self.sql_engine and time_granularity in self.sql_engine.unsupported_granularities:
raise RuntimeError(f"{self.sql_engine.name} does not support time granularity {time_granularity.name}.")

def visit_date_trunc_expr(self, node: SqlDateTruncExpression) -> SqlExpressionRenderResult: # noqa: D102
self._validate_granularity_for_engine(node.time_granularity)

arg_rendered = self.render_sql_expr(node.arg)

return SqlExpressionRenderResult(
Expand Down
3 changes: 3 additions & 0 deletions metricflow/sql/render/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters
from typing_extensions import override

from metricflow.protocols.sql_client import SqlEngine
from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
SqlExpressionRenderer,
Expand All @@ -25,6 +26,8 @@
class PostgresSqlExpressionRenderer(DefaultSqlExpressionRenderer):
"""Expression renderer for the PostgreSQL engine."""

sql_engine = SqlEngine.POSTGRES

@property
@override
def double_data_type(self) -> str:
Expand Down
3 changes: 3 additions & 0 deletions metricflow/sql/render/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters
from typing_extensions import override

from metricflow.protocols.sql_client import SqlEngine
from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
SqlExpressionRenderer,
Expand All @@ -25,6 +26,8 @@
class RedshiftSqlExpressionRenderer(DefaultSqlExpressionRenderer):
"""Expression renderer for the Redshift engine."""

sql_engine = SqlEngine.REDSHIFT

@property
@override
def double_data_type(self) -> str:
Expand Down
3 changes: 3 additions & 0 deletions metricflow/sql/render/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters
from typing_extensions import override

from metricflow.protocols.sql_client import SqlEngine
from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
SqlExpressionRenderer,
Expand All @@ -24,6 +25,8 @@
class SnowflakeSqlExpressionRenderer(DefaultSqlExpressionRenderer):
"""Expression renderer for the Snowflake engine."""

sql_engine = SqlEngine.SNOWFLAKE

@property
@override
def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctionType]:
Expand Down
3 changes: 3 additions & 0 deletions metricflow/sql/render/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters
from typing_extensions import override

from metricflow.protocols.sql_client import SqlEngine
from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
SqlExpressionRenderer,
Expand All @@ -27,6 +28,8 @@
class TrinoSqlExpressionRenderer(DefaultSqlExpressionRenderer):
"""Expression renderer for the Trino engine."""

sql_engine = SqlEngine.TRINO

@property
@override
def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctionType]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,23 @@ FROM (
SELECT
1 AS visits
, visits_source_src_28000.user_id AS visitors
, DATE_TRUNC(visits_source_src_28000.ds, day) AS ds__day
, DATE_TRUNC(visits_source_src_28000.ds, isoweek) AS ds__week
, DATE_TRUNC(visits_source_src_28000.ds, month) AS ds__month
, DATE_TRUNC(visits_source_src_28000.ds, quarter) AS ds__quarter
, DATE_TRUNC(visits_source_src_28000.ds, year) AS ds__year
, DATETIME_TRUNC(visits_source_src_28000.ds, day) AS ds__day
, DATETIME_TRUNC(visits_source_src_28000.ds, isoweek) AS ds__week
, DATETIME_TRUNC(visits_source_src_28000.ds, month) AS ds__month
, DATETIME_TRUNC(visits_source_src_28000.ds, quarter) AS ds__quarter
, DATETIME_TRUNC(visits_source_src_28000.ds, year) AS ds__year
, EXTRACT(year FROM visits_source_src_28000.ds) AS ds__extract_year
, EXTRACT(quarter FROM visits_source_src_28000.ds) AS ds__extract_quarter
, EXTRACT(month FROM visits_source_src_28000.ds) AS ds__extract_month
, EXTRACT(day FROM visits_source_src_28000.ds) AS ds__extract_day
, IF(EXTRACT(dayofweek FROM visits_source_src_28000.ds) = 1, 7, EXTRACT(dayofweek FROM visits_source_src_28000.ds) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM visits_source_src_28000.ds) AS ds__extract_doy
, visits_source_src_28000.referrer_id
, DATE_TRUNC(visits_source_src_28000.ds, day) AS visit__ds__day
, DATE_TRUNC(visits_source_src_28000.ds, isoweek) AS visit__ds__week
, DATE_TRUNC(visits_source_src_28000.ds, month) AS visit__ds__month
, DATE_TRUNC(visits_source_src_28000.ds, quarter) AS visit__ds__quarter
, DATE_TRUNC(visits_source_src_28000.ds, year) AS visit__ds__year
, DATETIME_TRUNC(visits_source_src_28000.ds, day) AS visit__ds__day
, DATETIME_TRUNC(visits_source_src_28000.ds, isoweek) AS visit__ds__week
, DATETIME_TRUNC(visits_source_src_28000.ds, month) AS visit__ds__month
, DATETIME_TRUNC(visits_source_src_28000.ds, quarter) AS visit__ds__quarter
, DATETIME_TRUNC(visits_source_src_28000.ds, year) AS visit__ds__year
, EXTRACT(year FROM visits_source_src_28000.ds) AS visit__ds__extract_year
, EXTRACT(quarter FROM visits_source_src_28000.ds) AS visit__ds__extract_quarter
, EXTRACT(month FROM visits_source_src_28000.ds) AS visit__ds__extract_month
Expand Down Expand Up @@ -220,23 +220,23 @@ FROM (
SELECT
1 AS visits
, visits_source_src_28000.user_id AS visitors
, DATE_TRUNC(visits_source_src_28000.ds, day) AS ds__day
, DATE_TRUNC(visits_source_src_28000.ds, isoweek) AS ds__week
, DATE_TRUNC(visits_source_src_28000.ds, month) AS ds__month
, DATE_TRUNC(visits_source_src_28000.ds, quarter) AS ds__quarter
, DATE_TRUNC(visits_source_src_28000.ds, year) AS ds__year
, DATETIME_TRUNC(visits_source_src_28000.ds, day) AS ds__day
, DATETIME_TRUNC(visits_source_src_28000.ds, isoweek) AS ds__week
, DATETIME_TRUNC(visits_source_src_28000.ds, month) AS ds__month
, DATETIME_TRUNC(visits_source_src_28000.ds, quarter) AS ds__quarter
, DATETIME_TRUNC(visits_source_src_28000.ds, year) AS ds__year
, EXTRACT(year FROM visits_source_src_28000.ds) AS ds__extract_year
, EXTRACT(quarter FROM visits_source_src_28000.ds) AS ds__extract_quarter
, EXTRACT(month FROM visits_source_src_28000.ds) AS ds__extract_month
, EXTRACT(day FROM visits_source_src_28000.ds) AS ds__extract_day
, IF(EXTRACT(dayofweek FROM visits_source_src_28000.ds) = 1, 7, EXTRACT(dayofweek FROM visits_source_src_28000.ds) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM visits_source_src_28000.ds) AS ds__extract_doy
, visits_source_src_28000.referrer_id
, DATE_TRUNC(visits_source_src_28000.ds, day) AS visit__ds__day
, DATE_TRUNC(visits_source_src_28000.ds, isoweek) AS visit__ds__week
, DATE_TRUNC(visits_source_src_28000.ds, month) AS visit__ds__month
, DATE_TRUNC(visits_source_src_28000.ds, quarter) AS visit__ds__quarter
, DATE_TRUNC(visits_source_src_28000.ds, year) AS visit__ds__year
, DATETIME_TRUNC(visits_source_src_28000.ds, day) AS visit__ds__day
, DATETIME_TRUNC(visits_source_src_28000.ds, isoweek) AS visit__ds__week
, DATETIME_TRUNC(visits_source_src_28000.ds, month) AS visit__ds__month
, DATETIME_TRUNC(visits_source_src_28000.ds, quarter) AS visit__ds__quarter
, DATETIME_TRUNC(visits_source_src_28000.ds, year) AS visit__ds__year
, EXTRACT(year FROM visits_source_src_28000.ds) AS visit__ds__extract_year
, EXTRACT(quarter FROM visits_source_src_28000.ds) AS visit__ds__extract_quarter
, EXTRACT(month FROM visits_source_src_28000.ds) AS visit__ds__extract_month
Expand Down Expand Up @@ -342,22 +342,22 @@ FROM (
SELECT
1 AS buys
, buys_source_src_28000.user_id AS buyers
, DATE_TRUNC(buys_source_src_28000.ds, day) AS ds__day
, DATE_TRUNC(buys_source_src_28000.ds, isoweek) AS ds__week
, DATE_TRUNC(buys_source_src_28000.ds, month) AS ds__month
, DATE_TRUNC(buys_source_src_28000.ds, quarter) AS ds__quarter
, DATE_TRUNC(buys_source_src_28000.ds, year) AS ds__year
, DATETIME_TRUNC(buys_source_src_28000.ds, day) AS ds__day
, DATETIME_TRUNC(buys_source_src_28000.ds, isoweek) AS ds__week
, DATETIME_TRUNC(buys_source_src_28000.ds, month) AS ds__month
, DATETIME_TRUNC(buys_source_src_28000.ds, quarter) AS ds__quarter
, DATETIME_TRUNC(buys_source_src_28000.ds, year) AS ds__year
, EXTRACT(year FROM buys_source_src_28000.ds) AS ds__extract_year
, EXTRACT(quarter FROM buys_source_src_28000.ds) AS ds__extract_quarter
, EXTRACT(month FROM buys_source_src_28000.ds) AS ds__extract_month
, EXTRACT(day FROM buys_source_src_28000.ds) AS ds__extract_day
, IF(EXTRACT(dayofweek FROM buys_source_src_28000.ds) = 1, 7, EXTRACT(dayofweek FROM buys_source_src_28000.ds) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM buys_source_src_28000.ds) AS ds__extract_doy
, DATE_TRUNC(buys_source_src_28000.ds, day) AS buy__ds__day
, DATE_TRUNC(buys_source_src_28000.ds, isoweek) AS buy__ds__week
, DATE_TRUNC(buys_source_src_28000.ds, month) AS buy__ds__month
, DATE_TRUNC(buys_source_src_28000.ds, quarter) AS buy__ds__quarter
, DATE_TRUNC(buys_source_src_28000.ds, year) AS buy__ds__year
, DATETIME_TRUNC(buys_source_src_28000.ds, day) AS buy__ds__day
, DATETIME_TRUNC(buys_source_src_28000.ds, isoweek) AS buy__ds__week
, DATETIME_TRUNC(buys_source_src_28000.ds, month) AS buy__ds__month
, DATETIME_TRUNC(buys_source_src_28000.ds, quarter) AS buy__ds__quarter
, DATETIME_TRUNC(buys_source_src_28000.ds, year) AS buy__ds__year
, EXTRACT(year FROM buys_source_src_28000.ds) AS buy__ds__extract_year
, EXTRACT(quarter FROM buys_source_src_28000.ds) AS buy__ds__extract_quarter
, EXTRACT(month FROM buys_source_src_28000.ds) AS buy__ds__extract_month
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ FROM (
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['visits', 'metric_time__day']
SELECT
DATE_TRUNC(ds, day) AS metric_time__day
DATETIME_TRUNC(ds, day) AS metric_time__day
, 1 AS visits
FROM ***************************.fct_visits visits_source_src_28000
) subq_18
Expand Down Expand Up @@ -76,8 +76,8 @@ FROM (
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['visits', 'ds__day', 'metric_time__day', 'user']
SELECT
DATE_TRUNC(ds, day) AS ds__day
, DATE_TRUNC(ds, day) AS metric_time__day
DATETIME_TRUNC(ds, day) AS ds__day
, DATETIME_TRUNC(ds, day) AS metric_time__day
, user_id AS user
, 1 AS visits
FROM ***************************.fct_visits visits_source_src_28000
Expand All @@ -87,7 +87,7 @@ FROM (
-- Metric Time Dimension 'ds'
-- Add column with generated UUID
SELECT
DATE_TRUNC(ds, day) AS ds__day
DATETIME_TRUNC(ds, day) AS ds__day
, user_id AS user
, 1 AS buys
, GENERATE_UUID() AS mf_internal_uuid
Expand Down
Loading

0 comments on commit 06a1467

Please sign in to comment.