Skip to content

Commit

Permalink
Handle dry run and approx percentile for Trino along with more explic…
Browse files Browse the repository at this point in the history
…it castings
  • Loading branch information
sarbmeetka committed Oct 9, 2023
1 parent 17060bd commit a7e8cbf
Show file tree
Hide file tree
Showing 37 changed files with 94 additions and 73 deletions.
13 changes: 12 additions & 1 deletion metricflow/cli/dbt_connectors/adapter_backed_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,18 @@ def dry_run(
request_id = SqlRequestId(f"mf_rid__{random_id()}")
connection_name = f"MetricFlow_dry_run_request_{request_id}"
# TODO - consolidate to self._adapter.validate_sql() when all implementations will work from within MetricFlow
if self.sql_engine_type is SqlEngine.BIGQUERY:

# Trino has a bug where explain command actually creates table. Wrapping with validate to avoid this.
# See https://github.com/trinodb/trino/issues/130
if self.sql_engine_type is SqlEngine.TRINO:
with self._adapter.connection_named(connection_name):
# Either the response will be bool value or a string with error message from Trino.
result = self._adapter.execute(f"EXPLAIN (type validate) {stmt}", auto_begin=True, fetch=True)
has_error = False if str(result[0]) == "SUCCESS" else True
if has_error:
raise DbtDatabaseError("Encountered error in Trino dry run.")

elif self.sql_engine_type is SqlEngine.BIGQUERY:
with self._adapter.connection_named(connection_name):
self._adapter.validate_sql(stmt)
else:
Expand Down
6 changes: 3 additions & 3 deletions metricflow/sql/render/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from typing import Collection

from dateutil.parser import parse
from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from typing_extensions import override
from dateutil.parser import parse

from metricflow.sql.render.expr_renderer import (
DefaultSqlExpressionRenderer,
Expand All @@ -15,11 +15,11 @@
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_exprs import (
SqlBetweenExpression,
SqlGenerateUuidExpression,
SqlPercentileExpression,
SqlPercentileFunctionType,
SqlTimeDeltaExpression,
SqlBetweenExpression
)


Expand Down Expand Up @@ -75,7 +75,7 @@ def visit_percentile_expr(self, node: SqlPercentileExpression) -> SqlExpressionR
function_str = "PERCENTILE_DISC"
elif node.percentile_args.function_type is SqlPercentileFunctionType.APPROXIMATE_CONTINUOUS:
return SqlExpressionRenderResult(
sql=f"approx_quantile({arg_rendered.sql}, {percentile})",
sql=f"approx_percentile({arg_rendered.sql}, {percentile})",
bind_parameters=params,
)
elif node.percentile_args.function_type is SqlPercentileFunctionType.APPROXIMATE_DISCRETE:
Expand Down
10 changes: 10 additions & 0 deletions metricflow/test/generate_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
"engine_url": postgres://...",
"engine_password": "..."
},
"trino": {
"engine_url": trino://...",
"engine_password": "..."
},
}
EOF
)
Expand Down Expand Up @@ -66,6 +70,7 @@ class MetricFlowTestCredentialSetForAllEngines(FrozenBaseModel): # noqa: D
big_query: MetricFlowTestCredentialSet
databricks: MetricFlowTestCredentialSet
postgres: MetricFlowTestCredentialSet
trino: MetricFlowTestCredentialSet

@property
def as_configurations(self) -> Sequence[MetricFlowTestConfiguration]: # noqa: D
Expand Down Expand Up @@ -94,6 +99,10 @@ def as_configurations(self) -> Sequence[MetricFlowTestConfiguration]: # noqa: D
engine=SqlEngine.POSTGRES,
credential_set=self.postgres,
),
MetricFlowTestConfiguration(
engine=SqlEngine.TRINO,
credential_set=self.trino,
),
)


Expand Down Expand Up @@ -149,6 +158,7 @@ def run_tests(test_configuration: MetricFlowTestConfiguration, test_file_paths:
or test_configuration.engine is SqlEngine.BIGQUERY
or test_configuration.engine is SqlEngine.DATABRICKS
or test_configuration.engine is SqlEngine.POSTGRES
or test_configuration.engine is SqlEngine.TRINO
):
engine_name = test_configuration.engine.value.lower()
os.environ["MF_TEST_ADAPTER_TYPE"] = engine_name
Expand Down
26 changes: 13 additions & 13 deletions metricflow/test/integration/test_cases/itest_granularity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.WEEK) }} AS metric_time__week
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__week
{{ render_date_trunc("ds", TimeGranularity.WEEK) }}
---
integration_test:
name: query_granularity_for_sum_month
Expand All @@ -39,7 +39,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.MONTH) }} AS metric_time__month
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__month
{{ render_date_trunc("ds", TimeGranularity.MONTH) }}
---
integration_test:
name: query_granularity_for_sum_quarter
Expand All @@ -53,7 +53,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.QUARTER) }} AS metric_time__quarter
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__quarter
{{ render_date_trunc("ds", TimeGranularity.QUARTER) }}
---
integration_test:
name: query_granularity_for_sum_year
Expand All @@ -67,7 +67,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.YEAR) }} AS metric_time__year
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__year
{{ render_date_trunc("ds", TimeGranularity.YEAR) }}
---
integration_test:
name: query_granularity_for_count_distinct_day
Expand Down Expand Up @@ -95,7 +95,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.WEEK) }} AS metric_time__week
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__week
{{ render_date_trunc("ds", TimeGranularity.WEEK) }}
---
integration_test:
name: query_granularity_for_count_distinct_month
Expand All @@ -109,7 +109,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.MONTH) }} AS metric_time__month
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__month
{{ render_date_trunc("ds", TimeGranularity.MONTH) }}
---
integration_test:
name: query_granularity_for_count_distinct_quarter
Expand All @@ -123,7 +123,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.QUARTER) }} AS metric_time__quarter
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__quarter
{{ render_date_trunc("ds", TimeGranularity.QUARTER) }}
---
integration_test:
name: query_granularity_for_count_distinct_year
Expand All @@ -137,7 +137,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.YEAR) }} AS metric_time__year
FROM {{ source_schema }}.fct_bookings_extended
GROUP BY
metric_time__year
{{ render_date_trunc("ds", TimeGranularity.YEAR) }}
---
integration_test:
name: query_granularity_for_joined_dundered_dimension_day
Expand Down Expand Up @@ -185,7 +185,7 @@ integration_test:
FROM {{ source_schema }}.fct_bookings_extended_monthly
WHERE {{ render_time_constraint("ds", "2020-01-01", "2020-02-29") }}
GROUP BY
metric_time__month
ds
---
integration_test:
name: metric_with_non_day_granularity_on_non_boundaries
Expand All @@ -201,7 +201,7 @@ integration_test:
FROM {{ source_schema }}.fct_bookings_extended_monthly
WHERE {{ render_time_constraint("ds", "2020-01-01", "2020-02-29") }}
GROUP BY
metric_time__month
ds
---
integration_test:
name: weekly_metric_on_non_boundaries
Expand All @@ -217,7 +217,7 @@ integration_test:
FROM {{ source_schema }}.fct_bookings_extended
WHERE {{ render_time_constraint("ds", "2020-01-13", "2020-01-26") }}
GROUP BY
metric_time__week
{{ render_date_trunc("ds", TimeGranularity.WEEK) }}
---
integration_test:
name: daily_metric_with_monthly_time_dimension
Expand All @@ -233,7 +233,7 @@ integration_test:
FROM {{ source_schema }}.fct_bookings_extended
WHERE {{ render_time_constraint("ds", "2020-01-01", "2020-01-31") }}
GROUP BY
metric_time__month
{{ render_date_trunc("ds", TimeGranularity.MONTH) }}
---
integration_test:
name: metrics_with_different_time_granularities
Expand Down Expand Up @@ -273,7 +273,7 @@ integration_test:
ON a.ds = b.ds
) c
GROUP BY
metric_time__month
ds
---
integration_test:
name: metrics_with_different_time_granularities_and_no_metric_time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,6 @@ integration_test:
, SUM(booking_value) AS booking_value
FROM {{ source_schema }}.fct_bookings
GROUP BY
metric_time__month
{{ render_date_trunc("ds", TimeGranularity.MONTH) }}
ORDER BY
metric_time__month DESC, booking_value
22 changes: 11 additions & 11 deletions metricflow/test/integration/test_cases/itest_scd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ integration_test:
ON a.listing_id = b.listing_id
AND a.ds >= b.active_from AND (a.ds < b.active_to OR b.active_to is NULL)
GROUP BY
listing__capacity, metric_time__day
capacity, ds
---
integration_test:
name: basic_scd_constrained_metric
Expand All @@ -35,7 +35,7 @@ integration_test:
WHERE b.capacity >= 3
GROUP BY
is_instant
, metric_time__day
, ds
---
integration_test:
name: scd_constrained_metric_with_nulls
Expand All @@ -61,8 +61,8 @@ integration_test:
WHERE b.is_lux OR b.is_lux IS NULL
GROUP BY
is_instant
, metric_time__day
, listing__is_lux
, ds
, is_lux
---
integration_test:
name: scd_grouped_metric_with_second_dim
Expand All @@ -87,9 +87,9 @@ integration_test:
LEFT OUTER JOIN {{ source_schema }}.dim_users_latest c
ON b.user_id = c.user_id
GROUP BY
metric_time__day
, listing__is_lux
, listing__user__home_state_latest
a.ds
, b.is_lux
, c.home_state_latest
---
integration_test:
name: scd_multi_hop_groupby_through_scd
Expand All @@ -112,8 +112,8 @@ integration_test:
LEFT OUTER JOIN {{ source_schema }}.dim_users_latest c
ON b.user_id = c.user_id
GROUP BY
metric_time__day
, listing__user__home_state_latest
a.ds
, c.home_state_latest
---
integration_test:
name: scd_multi_hop_groupby_to_scd
Expand All @@ -136,5 +136,5 @@ integration_test:
ON b.lux_listing_id = c.lux_listing_id
AND a.ds >= c.valid_from AND (a.ds < c.valid_to OR c.valid_to is NULL)
GROUP BY
metric_time__day
, listing__lux_listing__is_confirmed_lux
a.ds
, c.is_confirmed_lux
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ integration_test:
, MAX(ds) AS ds
FROM {{ source_schema }}.fct_accounts
GROUP BY
metric_time__week
{{ render_date_trunc("ds", TimeGranularity.WEEK) }}
, user_id
) b
ON
Expand Down Expand Up @@ -85,7 +85,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.DAY) }} AS metric_time__day
FROM {{ source_schema }}.fct_accounts
GROUP BY
metric_time__day
{{ render_date_trunc("ds", TimeGranularity.DAY) }}
) b
ON
a.ds = b.ds
Expand Down Expand Up @@ -167,7 +167,7 @@ integration_test:
, {{ render_date_trunc("ds", TimeGranularity.WEEK) }} AS metric_time__week
FROM {{ source_schema }}.fct_accounts
GROUP BY
metric_time__week
{{ render_date_trunc("ds", TimeGranularity.WEEK) }}
) b
ON
a.ds = b.ds
Expand Down Expand Up @@ -226,7 +226,7 @@ integration_test:
model: SIMPLE_MODEL
metrics: ["total_account_balance_first_day"]
group_bys: ["account__account_type"]
where_filter: "{{ render_time_dimension_template('account__ds', 'day') }} >= '2020-01-03'"
where_filter: "{{ render_time_dimension_template('account__ds', 'day') }} >= {{ cast_to_ts('2020-01-03') }}"
check_query: |
SELECT
b.account_type AS account__account_type
Expand All @@ -243,7 +243,7 @@ integration_test:
, account_balance AS total_account_balance_first_day
FROM {{ source_schema }}.fct_accounts
) a
WHERE ds >= '2020-01-03'
WHERE ds >= {{ cast_to_ts('2020-01-03') }}
) b
INNER JOIN (
SELECT
Expand All @@ -253,7 +253,7 @@ integration_test:
ds
FROM {{ source_schema }}.fct_accounts
) c
WHERE ds >= '2020-01-03'
WHERE ds >= {{ cast_to_ts('2020-01-03') }}
) d
ON
b.ds = d.ds__complete
Expand Down
6 changes: 3 additions & 3 deletions metricflow/test/integration/test_configured_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ def render_between_time_constraint(
start_time: str,
stop_time: str,
) -> str:
""" Render an expression like "ds between timestamp '2020-01-01' AND timestamp '2020-01-02'"
since Trino require timestamp literals to be wrapped in a timestamp() function.
"""
"""Render an expression like "ds between timestamp '2020-01-01' AND timestamp '2020-01-02'" since Trino require timestamp literals to be wrapped in a timestamp() function."""
start_expr = self.cast_to_ts(f"{start_time}")
stop_expr = self.cast_to_ts(f"{stop_time}")
return f"{expr} BETWEEN {start_expr} AND {stop_expr}"
Expand Down Expand Up @@ -309,6 +307,7 @@ def test_case(
render_dimension_template=check_query_helpers.render_dimension_template,
render_entity_template=check_query_helpers.render_entity_template,
render_time_dimension_template=check_query_helpers.render_time_dimension_template,
cast_to_ts=check_query_helpers.cast_to_ts,
)
if case.where_filter
else None,
Expand All @@ -334,6 +333,7 @@ def test_case(
render_percentile_expr=check_query_helpers.render_percentile_expr,
mf_time_spine_source=semantic_manifest_lookup.time_spine_source.spine_table.sql,
double_data_type_name=check_query_helpers.double_data_type_name,
cast_to_ts=check_query_helpers.cast_to_ts,
)
)
# If we sort, it's effectively not checking the order whatever order that the output was would be overwritten.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,4 @@ FROM (
) subq_0
) subq_1
) subq_2
WHERE subq_2.metric_time__day BETWEEN '2020-01-01' AND '2020-01-02'
WHERE subq_2.metric_time__day BETWEEN timestamp '2020-01-01' AND timestamp '2020-01-02'
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ SELECT
, ds AS metric_time__day
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_10001
WHERE ds BETWEEN '2020-01-01' AND '2020-01-02'
WHERE ds BETWEEN timestamp '2020-01-01' AND timestamp '2020-01-02'
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2000-01-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN timestamp '2000-01-01' AND timestamp '2020-01-01'
) subq_2
) subq_3
GROUP BY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ SELECT
DATE_TRUNC('month', created_at) AS ds__month
, SUM(revenue) AS revenue_all_time
FROM ***************************.fct_revenue revenue_src_10006
WHERE created_at BETWEEN '2000-01-01' AND '2020-01-01'
WHERE created_at BETWEEN timestamp '2000-01-01' AND timestamp '2020-01-01'
GROUP BY
DATE_TRUNC('month', created_at)
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ FROM (
FROM ***************************.fct_revenue revenue_src_10006
) subq_0
) subq_1
WHERE subq_1.metric_time__day BETWEEN '2019-12-01' AND '2020-01-01'
WHERE subq_1.metric_time__day BETWEEN timestamp '2019-12-01' AND timestamp '2020-01-01'
) subq_2
) subq_3
GROUP BY
Expand Down
Loading

0 comments on commit a7e8cbf

Please sign in to comment.