Skip to content

Commit

Permalink
Use FULL OUTER JOIN for ratio metrics, too
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Nov 3, 2023
1 parent 3e5e8c2 commit dbb8a2f
Show file tree
Hide file tree
Showing 44 changed files with 781 additions and 871 deletions.
3 changes: 1 addition & 2 deletions metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,13 @@ def _build_metrics_output_node(
f"For {metric.type} metric: {metric_spec}, needed metrics are:\n"
f"{pformat_big_objects(metric_input_specs=metric_input_specs)}"
)
join_type = SqlJoinType.FULL_OUTER if metric.type is MetricType.DERIVED else SqlJoinType.INNER
compute_metrics_node = ComputeMetricsNode(
parent_node=self._build_metrics_output_node(
metric_specs=metric_input_specs,
queried_linkable_specs=queried_linkable_specs,
where_constraint=where_constraint,
time_range_constraint=time_range_constraint,
combine_metrics_join_type=join_type,
combine_metrics_join_type=SqlJoinType.FULL_OUTER,
),
metric_specs=[metric_spec],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ integration_test:
WHERE is_instant
GROUP BY ds
) a
JOIN (
FULL OUTER JOIN (
SELECT
CAST(NULLIF(MAX(booking_value), 0) AS {{ double_data_type_name }} ) AS max_booking_value
, ds
Expand Down Expand Up @@ -48,7 +48,7 @@ integration_test:
WHERE listings_latest.is_lux
GROUP BY fct_bookings.ds
) a
JOIN (
FULL OUTER JOIN (
SELECT
CAST(NULLIF(MAX(booking_value), 0) AS {{ double_data_type_name }} ) AS max_booking_value
, ds
Expand Down Expand Up @@ -79,7 +79,7 @@ integration_test:
WHERE listings_latest.is_lux
GROUP BY fct_bookings.ds
) a
JOIN (
FULL OUTER JOIN (
SELECT
CAST(NULLIF(SUM(booking_value), 0) AS {{ double_data_type_name }} ) AS booking_value
, ds
Expand Down Expand Up @@ -107,7 +107,7 @@ integration_test:
WHERE is_instant
GROUP BY ds
) a
JOIN (
FULL OUTER JOIN (
SELECT
CAST(NULLIF(SUM(booking_value), 0) AS {{ double_data_type_name }} ) AS booking_value
, ds
Expand Down Expand Up @@ -153,7 +153,7 @@ integration_test:
WHERE dul_west.home_state_latest IN ('CA', 'HI', 'WA')
GROUP BY fa_west_filtered.ds
) a
JOIN (
FULL OUTER JOIN (
SELECT
CAST(SUM(account_balance) AS {{ double_data_type_name }}) AS total_account_balance_first_day
, fa_east_filtered.ds
Expand Down
4 changes: 2 additions & 2 deletions metricflow/test/integration/test_cases/itest_metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ integration_test:
GROUP BY
ds
) groupby_8cbdaa28
JOIN (
FULL OUTER JOIN (
SELECT
SUM(1) AS views
, ds
Expand Down Expand Up @@ -350,7 +350,7 @@ integration_test:
GROUP BY
ds
) groupby_8cbdaa28
JOIN (
FULL OUTER JOIN (
SELECT
SUM(1) AS listings
, created_at AS ds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ FROM (
SELECT
COALESCE(subq_9.ds__day, subq_19.ds__day) AS ds__day
, COALESCE(subq_9.listing__country_latest, subq_19.listing__country_latest) AS listing__country_latest
, subq_9.bookings AS bookings
, subq_19.views AS views
, MAX(subq_9.bookings) AS bookings
, MAX(subq_19.views) AS views
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -387,7 +387,7 @@ FROM (
, listing__country_latest
) subq_8
) subq_9
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_18.ds__day
Expand Down Expand Up @@ -689,20 +689,11 @@ FROM (
) subq_19
ON
(
(
subq_9.listing__country_latest = subq_19.listing__country_latest
) OR (
(
subq_9.listing__country_latest IS NULL
) AND (
subq_19.listing__country_latest IS NULL
)
)
subq_9.listing__country_latest = subq_19.listing__country_latest
) AND (
(
subq_9.ds__day = subq_19.ds__day
) OR (
(subq_9.ds__day IS NULL) AND (subq_19.ds__day IS NULL)
)
subq_9.ds__day = subq_19.ds__day
)
GROUP BY
ds__day
, listing__country_latest
) subq_20
Original file line number Diff line number Diff line change
@@ -1,82 +1,80 @@
-- Combine Metrics
-- Compute Metrics via Expressions
SELECT
COALESCE(subq_30.ds__day, subq_40.ds__day) AS ds__day
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest) AS listing__country_latest
, CAST(subq_30.bookings AS FLOAT64) / CAST(NULLIF(subq_40.views, 0) AS FLOAT64) AS bookings_per_view
ds__day
, listing__country_latest
, CAST(bookings AS FLOAT64) / CAST(NULLIF(views, 0) AS FLOAT64) AS bookings_per_view
FROM (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['bookings', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
-- Combine Metrics
SELECT
subq_23.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_23.bookings) AS bookings
COALESCE(subq_30.ds__day, subq_40.ds__day) AS ds__day
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest) AS listing__country_latest
, MAX(subq_30.bookings) AS bookings
, MAX(subq_40.views) AS views
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Join Standard Outputs
-- Pass Only Elements:
-- ['bookings', 'ds__day', 'listing']
-- ['bookings', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
DATE_TRUNC(ds, day) AS ds__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_10001
) subq_23
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_23.listing = listings_latest_src_10004.listing_id
GROUP BY
ds__day
, listing__country_latest
) subq_30
INNER JOIN (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['views', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_33.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_33.views) AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
subq_23.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_23.bookings) AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements:
-- ['bookings', 'ds__day', 'listing']
SELECT
DATE_TRUNC(ds, day) AS ds__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_10001
) subq_23
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_23.listing = listings_latest_src_10004.listing_id
GROUP BY
ds__day
, listing__country_latest
) subq_30
FULL OUTER JOIN (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['views', 'ds__day', 'listing']
-- ['views', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
DATE_TRUNC(ds, day) AS ds__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_10009
) subq_33
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
subq_33.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_33.views) AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements:
-- ['views', 'ds__day', 'listing']
SELECT
DATE_TRUNC(ds, day) AS ds__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_10009
) subq_33
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_33.listing = listings_latest_src_10004.listing_id
GROUP BY
ds__day
, listing__country_latest
) subq_40
ON
subq_33.listing = listings_latest_src_10004.listing_id
GROUP BY
ds__day
, listing__country_latest
) subq_40
ON
(
(
subq_30.listing__country_latest = subq_40.listing__country_latest
) OR (
(
subq_30.listing__country_latest IS NULL
) AND (
subq_40.listing__country_latest IS NULL
)
)
) AND (
(
) AND (
subq_30.ds__day = subq_40.ds__day
) OR (
(subq_30.ds__day IS NULL) AND (subq_40.ds__day IS NULL)
)
)
GROUP BY
ds__day
, listing__country_latest
) subq_41
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ FROM (
SELECT
COALESCE(subq_9.ds__day, subq_19.ds__day) AS ds__day
, COALESCE(subq_9.listing__country_latest, subq_19.listing__country_latest) AS listing__country_latest
, subq_9.bookings AS bookings
, subq_19.views AS views
, MAX(subq_9.bookings) AS bookings
, MAX(subq_19.views) AS views
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -387,7 +387,7 @@ FROM (
, subq_7.listing__country_latest
) subq_8
) subq_9
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_18.ds__day
Expand Down Expand Up @@ -689,20 +689,11 @@ FROM (
) subq_19
ON
(
(
subq_9.listing__country_latest = subq_19.listing__country_latest
) OR (
(
subq_9.listing__country_latest IS NULL
) AND (
subq_19.listing__country_latest IS NULL
)
)
subq_9.listing__country_latest = subq_19.listing__country_latest
) AND (
(
subq_9.ds__day = subq_19.ds__day
) OR (
(subq_9.ds__day IS NULL) AND (subq_19.ds__day IS NULL)
)
subq_9.ds__day = subq_19.ds__day
)
GROUP BY
COALESCE(subq_9.ds__day, subq_19.ds__day)
, COALESCE(subq_9.listing__country_latest, subq_19.listing__country_latest)
) subq_20
Loading

0 comments on commit dbb8a2f

Please sign in to comment.