Skip to content

Commit

Permalink
Enable SourceScanOptimizer in query rendering tests
Browse files Browse the repository at this point in the history
The query rendering test suite recently added support for
DataflowPlanOptimizer usage, but that only included the
PredicatePushdownOptimizer to support development.

Now that we are getting ready to release we need to see how the
queries come out with all optimizers in place, so we add the
missing SourceScanOptimizer to the set.
  • Loading branch information
tlento committed Jun 26, 2024
1 parent 27328d4 commit 7da2900
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 264 deletions.
5 changes: 4 additions & 1 deletion tests_metricflow/query_rendering/compare_rendered_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ def render_and_check(
)

# Run dataflow -> sql conversion with all optimizers
optimizations = (DataflowPlanOptimization.PREDICATE_PUSHDOWN,)
optimizations = (
DataflowPlanOptimization.SOURCE_SCAN,
DataflowPlanOptimization.PREDICATE_PUSHDOWN,
)
if is_distinct_values_plan:
optimized_plan = dataflow_plan_builder.build_plan_for_distinct_values(query_spec, optimizations=optimizations)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,22 @@ SELECT
metric_time__day
, (bookings - ref_bookings) * 1.0 / bookings AS non_referred_bookings_pct
FROM (
-- Combine Aggregated Outputs
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
COALESCE(subq_15.metric_time__day, subq_20.metric_time__day) AS metric_time__day
, MAX(subq_15.ref_bookings) AS ref_bookings
, MAX(subq_20.bookings) AS bookings
metric_time__day
, SUM(referred_bookings) AS ref_bookings
, SUM(bookings) AS bookings
FROM (
-- Aggregate Measures
-- Compute Metrics via Expressions
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['referred_bookings', 'bookings', 'metric_time__day']
SELECT
metric_time__day
, SUM(referred_bookings) AS ref_bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['referred_bookings', 'metric_time__day']
SELECT
DATE_TRUNC('day', ds) AS metric_time__day
, CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_13
GROUP BY
metric_time__day
) subq_15
FULL OUTER JOIN (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
metric_time__day
, SUM(bookings) AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'metric_time__day']
SELECT
DATE_TRUNC('day', ds) AS metric_time__day
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_18
GROUP BY
metric_time__day
) subq_20
ON
subq_15.metric_time__day = subq_20.metric_time__day
DATE_TRUNC('day', ds) AS metric_time__day
, 1 AS bookings
, CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_13
GROUP BY
COALESCE(subq_15.metric_time__day, subq_20.metric_time__day)
) subq_21
metric_time__day
) subq_15
Original file line number Diff line number Diff line change
Expand Up @@ -5,103 +5,58 @@ SELECT
FROM (
-- Combine Aggregated Outputs
SELECT
COALESCE(subq_34.metric_time__day, subq_39.metric_time__day, subq_44.metric_time__day) AS metric_time__day
, MAX(subq_34.non_referred) AS non_referred
, MAX(subq_39.instant) AS instant
, MAX(subq_44.bookings) AS bookings
COALESCE(subq_28.metric_time__day, subq_33.metric_time__day) AS metric_time__day
, MAX(subq_28.non_referred) AS non_referred
, MAX(subq_33.instant) AS instant
, MAX(subq_33.bookings) AS bookings
FROM (
-- Compute Metrics via Expressions
SELECT
metric_time__day
, (bookings - ref_bookings) * 1.0 / bookings AS non_referred
FROM (
-- Combine Aggregated Outputs
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
COALESCE(subq_27.metric_time__day, subq_32.metric_time__day) AS metric_time__day
, MAX(subq_27.ref_bookings) AS ref_bookings
, MAX(subq_32.bookings) AS bookings
metric_time__day
, SUM(referred_bookings) AS ref_bookings
, SUM(bookings) AS bookings
FROM (
-- Aggregate Measures
-- Compute Metrics via Expressions
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['referred_bookings', 'bookings', 'metric_time__day']
SELECT
metric_time__day
, SUM(referred_bookings) AS ref_bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['referred_bookings', 'metric_time__day']
SELECT
DATE_TRUNC('day', ds) AS metric_time__day
, CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_25
GROUP BY
metric_time__day
) subq_27
FULL OUTER JOIN (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
metric_time__day
, SUM(bookings) AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'metric_time__day']
SELECT
DATE_TRUNC('day', ds) AS metric_time__day
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_30
GROUP BY
metric_time__day
) subq_32
ON
subq_27.metric_time__day = subq_32.metric_time__day
DATE_TRUNC('day', ds) AS metric_time__day
, 1 AS bookings
, CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_25
GROUP BY
COALESCE(subq_27.metric_time__day, subq_32.metric_time__day)
) subq_33
) subq_34
metric_time__day
) subq_27
) subq_28
FULL OUTER JOIN (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
metric_time__day
, SUM(instant_bookings) AS instant
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['instant_bookings', 'metric_time__day']
SELECT
DATE_TRUNC('day', ds) AS metric_time__day
, CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_37
GROUP BY
metric_time__day
) subq_39
ON
subq_34.metric_time__day = subq_39.metric_time__day
FULL OUTER JOIN (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
metric_time__day
, SUM(bookings) AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'metric_time__day']
-- Pass Only Elements: ['instant_bookings', 'bookings', 'metric_time__day']
SELECT
DATE_TRUNC('day', ds) AS metric_time__day
, 1 AS bookings
, CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_42
) subq_31
GROUP BY
metric_time__day
) subq_44
) subq_33
ON
COALESCE(subq_34.metric_time__day, subq_39.metric_time__day) = subq_44.metric_time__day
subq_28.metric_time__day = subq_33.metric_time__day
GROUP BY
COALESCE(subq_34.metric_time__day, subq_39.metric_time__day, subq_44.metric_time__day)
) subq_45
COALESCE(subq_28.metric_time__day, subq_33.metric_time__day)
) subq_34
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,37 @@ FROM (
-- Combine Aggregated Outputs
SELECT
MAX(subq_45.average_booking_value) AS average_booking_value
, MAX(subq_57.bookings) AS bookings
, MAX(subq_64.booking_value) AS booking_value
, MAX(subq_45.bookings) AS bookings
, MAX(subq_52.booking_value) AS booking_value
FROM (
-- Constrain Output with WHERE
-- Pass Only Elements: ['average_booking_value',]
-- Pass Only Elements: ['average_booking_value', 'bookings']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
AVG(average_booking_value) AS average_booking_value
, SUM(bookings) AS bookings
FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['average_booking_value', 'listing__is_lux_latest', 'booking__is_instant']
-- Pass Only Elements: ['average_booking_value', 'bookings', 'listing__is_lux_latest', 'booking__is_instant']
SELECT
listings_latest_src_28000.is_lux AS listing__is_lux_latest
, subq_36.bookings AS bookings
, subq_36.average_booking_value AS average_booking_value
FROM (
-- Constrain Output with WHERE
-- Pass Only Elements: ['average_booking_value', 'booking__is_instant', 'listing']
-- Pass Only Elements: ['average_booking_value', 'bookings', 'booking__is_instant', 'listing']
SELECT
listing
, bookings
, average_booking_value
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
SELECT
listing_id AS listing
, is_instant AS booking__is_instant
, 1 AS bookings
, booking_value AS average_booking_value
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_34
Expand All @@ -48,36 +52,6 @@ FROM (
) subq_41
WHERE listing__is_lux_latest
) subq_45
CROSS JOIN (
-- Join Standard Outputs
-- Pass Only Elements: ['bookings', 'listing__is_lux_latest', 'booking__is_instant']
-- Pass Only Elements: ['bookings',]
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
SUM(subq_49.bookings) AS bookings
FROM (
-- Constrain Output with WHERE
-- Pass Only Elements: ['bookings', 'booking__is_instant', 'listing']
SELECT
listing
, bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
SELECT
listing_id AS listing
, is_instant AS booking__is_instant
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_47
WHERE booking__is_instant
) subq_49
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_28000
ON
subq_49.listing = listings_latest_src_28000.listing_id
) subq_57
CROSS JOIN (
-- Constrain Output with WHERE
-- Pass Only Elements: ['booking_value', 'booking__is_instant']
Expand All @@ -93,8 +67,8 @@ FROM (
is_instant AS booking__is_instant
, booking_value
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_59
) subq_47
WHERE booking__is_instant
) subq_64
) subq_65
) subq_66
) subq_52
) subq_53
) subq_54
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ FROM (
-- Join Standard Outputs
-- Pass Only Elements: ['listings', 'listing__bookings_per_booker']
SELECT
CAST(subq_34.bookings AS DOUBLE) / CAST(NULLIF(subq_34.bookers, 0) AS DOUBLE) AS listing__bookings_per_booker
CAST(subq_28.bookings AS DOUBLE) / CAST(NULLIF(subq_28.bookers, 0) AS DOUBLE) AS listing__bookings_per_booker
, subq_23.listings AS listings
FROM (
-- Read Elements From Semantic Model 'listings_latest'
Expand All @@ -20,48 +20,26 @@ FROM (
FROM ***************************.dim_listings_latest listings_latest_src_28000
) subq_23
LEFT OUTER JOIN (
-- Combine Aggregated Outputs
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
COALESCE(subq_28.listing, subq_33.listing) AS listing
, MAX(subq_28.bookings) AS bookings
, MAX(subq_33.bookers) AS bookers
listing
, SUM(bookings) AS bookings
, COUNT(DISTINCT bookers) AS bookers
FROM (
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
listing
, SUM(bookings) AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookings', 'listing']
SELECT
listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_28000
) subq_26
GROUP BY
listing
) subq_28
FULL OUTER JOIN (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements: ['bookers', 'listing']
-- Aggregate Measures
-- Compute Metrics via Expressions
-- Pass Only Elements: ['bookings', 'bookers', 'listing']
SELECT
listing_id AS listing
, COUNT(DISTINCT guest_id) AS bookers
, 1 AS bookings
, guest_id AS bookers
FROM ***************************.fct_bookings bookings_source_src_28000
GROUP BY
listing_id
) subq_33
ON
subq_28.listing = subq_33.listing
) subq_26
GROUP BY
COALESCE(subq_28.listing, subq_33.listing)
) subq_34
listing
) subq_28
ON
subq_23.listing = subq_34.listing
) subq_38
subq_23.listing = subq_28.listing
) subq_32
WHERE listing__bookings_per_booker > 1
Loading

0 comments on commit 7da2900

Please sign in to comment.