Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rendering test for predicate pushdown of saved query filters #1380

Merged
merged 2 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,21 @@ saved_query:
- trailing_2_months_revenue
group_by:
- TimeDimension('metric_time', 'day')
---
saved_query:
name: saved_query_with_metric_joins_and_filter
description: |
Saved query that includes multiple metrics from different input sources requiring a metric join and
a query-level filter on a mix of categorical and time dimension filters.
Note this will not run on Trino due to the invalid filter construction - Trino will not cast string literals
to timestamp types the way other engines do.
query_params:
metrics:
- bookings
- views
- bookings_per_view
group_by:
- Dimension('listing__capacity_latest')
where:
- "{{ Dimension('listing__is_lux_latest') }}"
- "{{ TimeDimension('metric_time', 'day') }} >= '2020-01-02'"
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from _pytest.fixtures import FixtureRequest
from dbt_semantic_interfaces.implementations.filters.where_filter import PydanticWhereFilter
from metricflow_semantics.query.query_parser import MetricFlowQueryParser
from metricflow_semantics.specs.query_param_implementations import SavedQueryParameter
from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration

from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder
Expand Down Expand Up @@ -309,3 +310,36 @@ def test_simple_join_to_time_spine_pushdown_filter_application(
dataflow_plan_builder=dataflow_plan_builder,
query_spec=parsed_query.query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_saved_query_with_metric_joins_and_filter(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
query_parser: MetricFlowQueryParser,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
) -> None:
"""Tests rendering a query where we join to a time spine and query the filter input.

This should produce a SQL query that applies the filter outside of the time spine join.
"""
parsed_query = query_parser.parse_and_validate_saved_query(
saved_query_parameter=SavedQueryParameter("saved_query_with_metric_joins_and_filter"),
where_filter=None,
limit=None,
time_constraint_start=None,
time_constraint_end=None,
order_by_names=None,
order_by_parameters=None,
)

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=parsed_query.query_spec,
)

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
-- Combine Aggregated Outputs
SELECT
COALESCE(subq_61.listing__capacity_latest, subq_73.listing__capacity_latest, subq_98.listing__capacity_latest) AS listing__capacity_latest
, MAX(subq_61.bookings) AS bookings
, MAX(subq_73.views) AS views
, MAX(CAST(subq_98.bookings AS FLOAT64) / CAST(NULLIF(subq_98.views, 0) AS FLOAT64)) AS bookings_per_view
FROM (
-- Constrain Output with WHERE
-- Pass Only Elements: ['bookings', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(bookings) AS bookings
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['bookings', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_52.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_52.bookings AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_52
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_52.listing = listings_latest_src_28000.listing_id
) subq_57
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_61
FULL OUTER JOIN (
-- Constrain Output with WHERE
-- Pass Only Elements: ['views', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(views) AS views
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['views', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_64.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_64.views AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['views', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_28000
) subq_64
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_64.listing = listings_latest_src_28000.listing_id
) subq_69
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_73
ON
subq_61.listing__capacity_latest = subq_73.listing__capacity_latest
FULL OUTER JOIN (
-- Combine Aggregated Outputs
SELECT
COALESCE(subq_85.listing__capacity_latest, subq_97.listing__capacity_latest) AS listing__capacity_latest
, MAX(subq_85.bookings) AS bookings
, MAX(subq_97.views) AS views
FROM (
-- Constrain Output with WHERE
-- Pass Only Elements: ['bookings', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(bookings) AS bookings
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['bookings', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_76.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_76.bookings AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_76
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_76.listing = listings_latest_src_28000.listing_id
) subq_81
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_85
FULL OUTER JOIN (
-- Constrain Output with WHERE
-- Pass Only Elements: ['views', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(views) AS views
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['views', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_88.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_88.views AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['views', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_28000
) subq_88
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_88.listing = listings_latest_src_28000.listing_id
) subq_93
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_97
ON
subq_85.listing__capacity_latest = subq_97.listing__capacity_latest
GROUP BY
listing__capacity_latest
) subq_98
ON
COALESCE(subq_61.listing__capacity_latest, subq_73.listing__capacity_latest) = subq_98.listing__capacity_latest
GROUP BY
listing__capacity_latest
Loading
Loading