Skip to content

Commit

Permalink
Add rendering test for predicate pushdown of saved query filters (#1380)
Browse files Browse the repository at this point in the history
A customer reported problems with predicate pushdown removing
filters from certain subqueries. The scenario they were using
involved a saved query with multiple metrics and a set of filters
defined in the saved query itself.

This scenario represents a gap in our test coverage, so we add
a rendering test here. Note the test does not reproduce the
reported issue in any way, but it is useful to have for
the purposes of future development work so we add it here.
  • Loading branch information
tlento authored Aug 28, 2024
1 parent ad63f5b commit 22d5f6b
Show file tree
Hide file tree
Showing 17 changed files with 11,211 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,21 @@ saved_query:
- trailing_2_months_revenue
group_by:
- TimeDimension('metric_time', 'day')
---
saved_query:
name: saved_query_with_metric_joins_and_filter
description: |
Saved query that includes multiple metrics from different input sources requiring a metric join and
a query-level filter on a mix of categorical and time dimension filters.
Note this will not run on Trino due to the invalid filter construction - Trino will not cast string literals
to timestamp types the way other engines do.
query_params:
metrics:
- bookings
- views
- bookings_per_view
group_by:
- Dimension('listing__capacity_latest')
where:
- "{{ Dimension('listing__is_lux_latest') }}"
- "{{ TimeDimension('metric_time', 'day') }} >= '2020-01-02'"
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from _pytest.fixtures import FixtureRequest
from dbt_semantic_interfaces.implementations.filters.where_filter import PydanticWhereFilter
from metricflow_semantics.query.query_parser import MetricFlowQueryParser
from metricflow_semantics.specs.query_param_implementations import SavedQueryParameter
from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration

from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder
Expand Down Expand Up @@ -309,3 +310,36 @@ def test_simple_join_to_time_spine_pushdown_filter_application(
dataflow_plan_builder=dataflow_plan_builder,
query_spec=parsed_query.query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_saved_query_with_metric_joins_and_filter(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
query_parser: MetricFlowQueryParser,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
) -> None:
"""Tests rendering a query where we join to a time spine and query the filter input.
This should produce a SQL query that applies the filter outside of the time spine join.
"""
parsed_query = query_parser.parse_and_validate_saved_query(
saved_query_parameter=SavedQueryParameter("saved_query_with_metric_joins_and_filter"),
where_filter=None,
limit=None,
time_constraint_start=None,
time_constraint_end=None,
order_by_names=None,
order_by_parameters=None,
)

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=parsed_query.query_spec,
)

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
-- Combine Aggregated Outputs
SELECT
COALESCE(subq_61.listing__capacity_latest, subq_73.listing__capacity_latest, subq_98.listing__capacity_latest) AS listing__capacity_latest
, MAX(subq_61.bookings) AS bookings
, MAX(subq_73.views) AS views
, MAX(CAST(subq_98.bookings AS FLOAT64) / CAST(NULLIF(subq_98.views, 0) AS FLOAT64)) AS bookings_per_view
FROM (
-- Constrain Output with WHERE
-- Pass Only Elements: ['bookings', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(bookings) AS bookings
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['bookings', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_52.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_52.bookings AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_52
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_52.listing = listings_latest_src_28000.listing_id
) subq_57
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_61
FULL OUTER JOIN (
-- Constrain Output with WHERE
-- Pass Only Elements: ['views', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(views) AS views
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['views', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_64.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_64.views AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['views', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_28000
) subq_64
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_64.listing = listings_latest_src_28000.listing_id
) subq_69
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_73
ON
subq_61.listing__capacity_latest = subq_73.listing__capacity_latest
FULL OUTER JOIN (
-- Combine Aggregated Outputs
SELECT
COALESCE(subq_85.listing__capacity_latest, subq_97.listing__capacity_latest) AS listing__capacity_latest
, MAX(subq_85.bookings) AS bookings
, MAX(subq_97.views) AS views
FROM (
-- Constrain Output with WHERE
-- Pass Only Elements: ['bookings', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(bookings) AS bookings
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['bookings', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_76.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_76.bookings AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_76
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_76.listing = listings_latest_src_28000.listing_id
) subq_81
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_85
FULL OUTER JOIN (
-- Constrain Output with WHERE
-- Pass Only Elements: ['views', 'listing__capacity_latest']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing__capacity_latest
, SUM(views) AS views
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['views', 'listing__capacity_latest', 'listing__is_lux_latest', 'metric_time__day']
SELECT
subq_88.metric_time__day AS metric_time__day
, listings_latest_src_28000.is_lux AS listing__is_lux_latest
, listings_latest_src_28000.capacity AS listing__capacity_latest
, subq_88.views AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['views', 'metric_time__day', 'listing']
SELECT
DATETIME_TRUNC(ds, day) AS metric_time__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_28000
) subq_88
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_88.listing = listings_latest_src_28000.listing_id
) subq_93
WHERE ( listing__is_lux_latest ) AND ( metric_time__day >= '2020-01-02' )
GROUP BY
listing__capacity_latest
) subq_97
ON
subq_85.listing__capacity_latest = subq_97.listing__capacity_latest
GROUP BY
listing__capacity_latest
) subq_98
ON
COALESCE(subq_61.listing__capacity_latest, subq_73.listing__capacity_latest) = subq_98.listing__capacity_latest
GROUP BY
listing__capacity_latest
Loading

0 comments on commit 22d5f6b

Please sign in to comment.