Skip to content

Commit

Permalink
Write basic SQL rendering tests for custom granularities in filters
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Sep 23, 2024
1 parent 3ce6b5e commit 7b5c4f4
Show file tree
Hide file tree
Showing 68 changed files with 5,833 additions and 63 deletions.
3 changes: 2 additions & 1 deletion metricflow/plan_conversion/dataflow_to_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1440,7 +1440,8 @@ def visit_join_to_custom_granularity_node(self, node: JoinToCustomGranularityNod
break
assert parent_time_dimension_instance, (
"JoinToCustomGranularityNode's expected time_dimension_spec not found in parent dataset instances. "
"This indicates internal misconfiguration."
f"This indicates internal misconfiguration. Expected: {node.time_dimension_spec.with_base_grain}; "
f"Got: {[instance.spec for instance in parent_data_set.instance_set.time_dimension_instances]}"
)

# Build join expression.
Expand Down
113 changes: 113 additions & 0 deletions tests_metricflow/query_rendering/test_custom_granularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

import pytest
from _pytest.fixtures import FixtureRequest
from dbt_semantic_interfaces.implementations.filters.where_filter import PydanticWhereFilter
from dbt_semantic_interfaces.references import EntityReference
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from metricflow_semantics.query.query_parser import MetricFlowQueryParser
from metricflow_semantics.specs.metric_spec import MetricSpec
from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec
from metricflow_semantics.specs.time_dimension_spec import TimeDimensionSpec
Expand Down Expand Up @@ -267,3 +269,114 @@ def test_simple_metric_with_custom_granularity_and_join( # noqa: D103
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


# TODO: optimizer - could collapse subquery
@pytest.mark.sql_engine_snapshot
def test_simple_metric_with_custom_granularity_filter(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
query_parser: MetricFlowQueryParser,
) -> None:
"""Simple metric queried with a filter on a custom grain, where that grain is not used in the group by."""
query_spec = query_parser.parse_and_validate_query(
metric_names=("bookings",),
where_constraint=PydanticWhereFilter(
where_sql_template=("{{ TimeDimension('metric_time', 'martian_day') }} = '2020-01-01'")
),
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


# TODO: optimizer - could collapse subquery
@pytest.mark.sql_engine_snapshot
def test_simple_metric_with_custom_granularity_in_filter_and_group_by(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
query_parser: MetricFlowQueryParser,
) -> None:
"""Simple metric queried with a filter on a custom grain, where that grain is also used in the group by."""
query_spec = query_parser.parse_and_validate_query(
metric_names=("bookings",),
group_by_names=("metric_time__martian_day",),
where_constraint=PydanticWhereFilter(
where_sql_template=("{{ TimeDimension('metric_time', 'martian_day') }} = '2020-01-01'")
),
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_no_metrics_with_custom_granularity_filter(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
query_parser: MetricFlowQueryParser,
) -> None:
"""Group by items only queried with a filter on a custom grain, where that grain is not used in the group by."""
query_spec = query_parser.parse_and_validate_query(
group_by_names=("listing__ds__day",),
where_constraint=PydanticWhereFilter(
where_sql_template=("{{ TimeDimension('listing__ds', 'martian_day') }} = '2020-01-01'")
),
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_no_metrics_with_custom_granularity_in_filter_and_group_by(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
query_parser: MetricFlowQueryParser,
) -> None:
"""Group by items only queried with a filter on a custom grain, where that grain is also used in the group by."""
query_spec = query_parser.parse_and_validate_query(
group_by_names=("listing__ds__martian_day",),
where_constraint=PydanticWhereFilter(
where_sql_template=("{{ TimeDimension('listing__ds', 'martian_day') }} = '2020-01-01'")
),
).query_spec

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
-- Pass Only Elements: ['metric_time__day', 'metric_time__martian_day', 'user__bio_added_ts__martian_day', 'user__bio_added_ts__month']
SELECT
subq_6.user__bio_added_ts__martian_day
, subq_6.metric_time__martian_day
subq_6.metric_time__martian_day
, subq_6.user__bio_added_ts__martian_day
, subq_6.user__bio_added_ts__month
, subq_6.metric_time__day
FROM (
Expand Down Expand Up @@ -188,8 +188,8 @@ FROM (
, subq_0.user__home_state AS user__home_state
, subq_0.new_users AS new_users
, subq_0.archived_users AS archived_users
, subq_4.martian_day AS metric_time__martian_day
, subq_5.martian_day AS user__bio_added_ts__martian_day
, subq_4.martian_day AS user__bio_added_ts__martian_day
, subq_5.martian_day AS metric_time__martian_day
FROM (
-- Read Elements From Semantic Model 'users_ds_source'
SELECT
Expand Down Expand Up @@ -427,14 +427,14 @@ FROM (
LEFT OUTER JOIN
***************************.mf_time_spine subq_4
ON
subq_0.metric_time__day = subq_4.ds
subq_0.user__bio_added_ts__day = subq_4.ds
LEFT OUTER JOIN
***************************.mf_time_spine subq_5
ON
subq_0.user__bio_added_ts__day = subq_5.ds
subq_0.metric_time__day = subq_5.ds
) subq_6
GROUP BY
user__bio_added_ts__martian_day
, metric_time__martian_day
metric_time__martian_day
, user__bio_added_ts__martian_day
, user__bio_added_ts__month
, metric_time__day
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
-- Join Standard Outputs
-- Pass Only Elements: ['metric_time__day', 'metric_time__martian_day', 'user__bio_added_ts__martian_day', 'user__bio_added_ts__month']
SELECT
subq_12.martian_day AS user__bio_added_ts__martian_day
, subq_11.martian_day AS metric_time__martian_day
subq_12.martian_day AS metric_time__martian_day
, subq_11.martian_day AS user__bio_added_ts__martian_day
, DATETIME_TRUNC(users_ds_source_src_28000.bio_added_ts, month) AS user__bio_added_ts__month
, DATETIME_TRUNC(time_spine_src_28006.ds, day) AS metric_time__day
FROM ***************************.dim_users users_ds_source_src_28000
Expand All @@ -13,13 +13,13 @@ CROSS JOIN
LEFT OUTER JOIN
***************************.mf_time_spine subq_11
ON
subq_7.metric_time__day = subq_11.ds
DATETIME_TRUNC(users_ds_source_src_28000.bio_added_ts, day) = subq_11.ds
LEFT OUTER JOIN
***************************.mf_time_spine subq_12
ON
DATETIME_TRUNC(users_ds_source_src_28000.bio_added_ts, day) = subq_12.ds
subq_7.metric_time__day = subq_12.ds
GROUP BY
user__bio_added_ts__martian_day
, metric_time__martian_day
metric_time__martian_day
, user__bio_added_ts__martian_day
, user__bio_added_ts__month
, metric_time__day
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
-- Pass Only Elements: ['listing__ds__day',]
SELECT
subq_2.listing__ds__day
FROM (
-- Constrain Output with WHERE
SELECT
subq_1.listing__ds__martian_day
, subq_1.ds__day
, subq_1.ds__week
, subq_1.ds__month
, subq_1.ds__quarter
, subq_1.ds__year
, subq_1.ds__extract_year
, subq_1.ds__extract_quarter
, subq_1.ds__extract_month
, subq_1.ds__extract_day
, subq_1.ds__extract_dow
, subq_1.ds__extract_doy
, subq_1.created_at__day
, subq_1.created_at__week
, subq_1.created_at__month
, subq_1.created_at__quarter
, subq_1.created_at__year
, subq_1.created_at__extract_year
, subq_1.created_at__extract_quarter
, subq_1.created_at__extract_month
, subq_1.created_at__extract_day
, subq_1.created_at__extract_dow
, subq_1.created_at__extract_doy
, subq_1.listing__ds__day
, subq_1.listing__ds__week
, subq_1.listing__ds__month
, subq_1.listing__ds__quarter
, subq_1.listing__ds__year
, subq_1.listing__ds__extract_year
, subq_1.listing__ds__extract_quarter
, subq_1.listing__ds__extract_month
, subq_1.listing__ds__extract_day
, subq_1.listing__ds__extract_dow
, subq_1.listing__ds__extract_doy
, subq_1.listing__created_at__day
, subq_1.listing__created_at__week
, subq_1.listing__created_at__month
, subq_1.listing__created_at__quarter
, subq_1.listing__created_at__year
, subq_1.listing__created_at__extract_year
, subq_1.listing__created_at__extract_quarter
, subq_1.listing__created_at__extract_month
, subq_1.listing__created_at__extract_day
, subq_1.listing__created_at__extract_dow
, subq_1.listing__created_at__extract_doy
, subq_1.listing
, subq_1.user
, subq_1.listing__user
, subq_1.country_latest
, subq_1.is_lux_latest
, subq_1.capacity_latest
, subq_1.listing__country_latest
, subq_1.listing__is_lux_latest
, subq_1.listing__capacity_latest
, subq_1.listings
, subq_1.largest_listing
, subq_1.smallest_listing
FROM (
-- Join to Custom Granularity Dataset
-- Read Elements From Semantic Model 'listings_latest'
SELECT
1 AS listings
, listings_latest_src_28000.capacity AS largest_listing
, listings_latest_src_28000.capacity AS smallest_listing
, DATETIME_TRUNC(listings_latest_src_28000.created_at, day) AS ds__day
, DATETIME_TRUNC(listings_latest_src_28000.created_at, isoweek) AS ds__week
, DATETIME_TRUNC(listings_latest_src_28000.created_at, month) AS ds__month
, DATETIME_TRUNC(listings_latest_src_28000.created_at, quarter) AS ds__quarter
, DATETIME_TRUNC(listings_latest_src_28000.created_at, year) AS ds__year
, EXTRACT(year FROM listings_latest_src_28000.created_at) AS ds__extract_year
, EXTRACT(quarter FROM listings_latest_src_28000.created_at) AS ds__extract_quarter
, EXTRACT(month FROM listings_latest_src_28000.created_at) AS ds__extract_month
, EXTRACT(day FROM listings_latest_src_28000.created_at) AS ds__extract_day
, IF(EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) = 1, 7, EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM listings_latest_src_28000.created_at) AS ds__extract_doy
, DATETIME_TRUNC(listings_latest_src_28000.created_at, day) AS created_at__day
, DATETIME_TRUNC(listings_latest_src_28000.created_at, isoweek) AS created_at__week
, DATETIME_TRUNC(listings_latest_src_28000.created_at, month) AS created_at__month
, DATETIME_TRUNC(listings_latest_src_28000.created_at, quarter) AS created_at__quarter
, DATETIME_TRUNC(listings_latest_src_28000.created_at, year) AS created_at__year
, EXTRACT(year FROM listings_latest_src_28000.created_at) AS created_at__extract_year
, EXTRACT(quarter FROM listings_latest_src_28000.created_at) AS created_at__extract_quarter
, EXTRACT(month FROM listings_latest_src_28000.created_at) AS created_at__extract_month
, EXTRACT(day FROM listings_latest_src_28000.created_at) AS created_at__extract_day
, IF(EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) = 1, 7, EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) - 1) AS created_at__extract_dow
, EXTRACT(dayofyear FROM listings_latest_src_28000.created_at) AS created_at__extract_doy
, listings_latest_src_28000.country AS country_latest
, listings_latest_src_28000.is_lux AS is_lux_latest
, listings_latest_src_28000.capacity AS capacity_latest
, DATETIME_TRUNC(listings_latest_src_28000.created_at, day) AS listing__ds__day
, DATETIME_TRUNC(listings_latest_src_28000.created_at, isoweek) AS listing__ds__week
, DATETIME_TRUNC(listings_latest_src_28000.created_at, month) AS listing__ds__month
, DATETIME_TRUNC(listings_latest_src_28000.created_at, quarter) AS listing__ds__quarter
, DATETIME_TRUNC(listings_latest_src_28000.created_at, year) AS listing__ds__year
, EXTRACT(year FROM listings_latest_src_28000.created_at) AS listing__ds__extract_year
, EXTRACT(quarter FROM listings_latest_src_28000.created_at) AS listing__ds__extract_quarter
, EXTRACT(month FROM listings_latest_src_28000.created_at) AS listing__ds__extract_month
, EXTRACT(day FROM listings_latest_src_28000.created_at) AS listing__ds__extract_day
, IF(EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) = 1, 7, EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) - 1) AS listing__ds__extract_dow
, EXTRACT(dayofyear FROM listings_latest_src_28000.created_at) AS listing__ds__extract_doy
, DATETIME_TRUNC(listings_latest_src_28000.created_at, day) AS listing__created_at__day
, DATETIME_TRUNC(listings_latest_src_28000.created_at, isoweek) AS listing__created_at__week
, DATETIME_TRUNC(listings_latest_src_28000.created_at, month) AS listing__created_at__month
, DATETIME_TRUNC(listings_latest_src_28000.created_at, quarter) AS listing__created_at__quarter
, DATETIME_TRUNC(listings_latest_src_28000.created_at, year) AS listing__created_at__year
, EXTRACT(year FROM listings_latest_src_28000.created_at) AS listing__created_at__extract_year
, EXTRACT(quarter FROM listings_latest_src_28000.created_at) AS listing__created_at__extract_quarter
, EXTRACT(month FROM listings_latest_src_28000.created_at) AS listing__created_at__extract_month
, EXTRACT(day FROM listings_latest_src_28000.created_at) AS listing__created_at__extract_day
, IF(EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) = 1, 7, EXTRACT(dayofweek FROM listings_latest_src_28000.created_at) - 1) AS listing__created_at__extract_dow
, EXTRACT(dayofyear FROM listings_latest_src_28000.created_at) AS listing__created_at__extract_doy
, listings_latest_src_28000.country AS listing__country_latest
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, listings_latest_src_28000.listing_id AS listing
, listings_latest_src_28000.user_id AS user
, listings_latest_src_28000.user_id AS listing__user
, subq_0.martian_day AS listing__ds__martian_day
FROM ***************************.dim_listings_latest listings_latest_src_28000
LEFT OUTER JOIN
***************************.mf_time_spine subq_0
ON
listings_latest_src_28000.listing__ds__day = subq_0.ds
) subq_1
WHERE listing__ds__martian_day = '2020-01-01'
) subq_2
GROUP BY
listing__ds__day
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- Constrain Output with WHERE
-- Pass Only Elements: ['listing__ds__day',]
SELECT
listing__ds__day
FROM (
-- Join to Custom Granularity Dataset
-- Read Elements From Semantic Model 'listings_latest'
SELECT
DATETIME_TRUNC(listings_latest_src_28000.created_at, day) AS listing__ds__day
, subq_3.martian_day AS listing__ds__martian_day
FROM ***************************.dim_listings_latest listings_latest_src_28000
LEFT OUTER JOIN
***************************.mf_time_spine subq_3
ON
listings_latest_src_28000.listing__ds__day = subq_3.ds
) subq_4
WHERE listing__ds__martian_day = '2020-01-01'
GROUP BY
listing__ds__day
Loading

0 comments on commit 7b5c4f4

Please sign in to comment.