Skip to content

Commit

Permalink
Use FULL OUTER JOIN when combining derived metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Nov 3, 2023
1 parent 6f6de91 commit a32a3df
Show file tree
Hide file tree
Showing 51 changed files with 552 additions and 680 deletions.
2 changes: 1 addition & 1 deletion metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def _build_metrics_output_node(
queried_linkable_specs=queried_linkable_specs,
where_constraint=where_constraint,
time_range_constraint=time_range_constraint,
combine_metrics_join_type=SqlJoinType.INNER,
combine_metrics_join_type=SqlJoinType.FULL_OUTER,
),
metric_specs=[metric_spec],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ FROM (
SELECT
COALESCE(subq_9.ds__day, subq_19.ds__day) AS ds__day
, COALESCE(subq_9.listing__country_latest, subq_19.listing__country_latest) AS listing__country_latest
, subq_9.bookings AS bookings
, subq_19.views AS views
, MAX(subq_9.bookings) AS bookings
, MAX(subq_19.views) AS views
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -387,7 +387,7 @@ FROM (
, subq_7.listing__country_latest
) subq_8
) subq_9
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_18.ds__day
Expand Down Expand Up @@ -689,20 +689,11 @@ FROM (
) subq_19
ON
(
(
subq_9.listing__country_latest = subq_19.listing__country_latest
) OR (
(
subq_9.listing__country_latest IS NULL
) AND (
subq_19.listing__country_latest IS NULL
)
)
subq_9.listing__country_latest = subq_19.listing__country_latest
) AND (
(
subq_9.ds__day = subq_19.ds__day
) OR (
(subq_9.ds__day IS NULL) AND (subq_19.ds__day IS NULL)
)
subq_9.ds__day = subq_19.ds__day
)
GROUP BY
COALESCE(subq_9.ds__day, subq_19.ds__day)
, COALESCE(subq_9.listing__country_latest, subq_19.listing__country_latest)
) subq_20
Original file line number Diff line number Diff line change
@@ -1,82 +1,80 @@
-- Combine Metrics
-- Compute Metrics via Expressions
SELECT
COALESCE(subq_30.ds__day, subq_40.ds__day) AS ds__day
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest) AS listing__country_latest
, CAST(subq_30.bookings AS DOUBLE) / CAST(NULLIF(subq_40.views, 0) AS DOUBLE) AS bookings_per_view
ds__day
, listing__country_latest
, CAST(bookings AS DOUBLE) / CAST(NULLIF(views, 0) AS DOUBLE) AS bookings_per_view
FROM (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['bookings', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
-- Combine Metrics
SELECT
subq_23.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_23.bookings) AS bookings
COALESCE(subq_30.ds__day, subq_40.ds__day) AS ds__day
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest) AS listing__country_latest
, MAX(subq_30.bookings) AS bookings
, MAX(subq_40.views) AS views
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Join Standard Outputs
-- Pass Only Elements:
-- ['bookings', 'ds__day', 'listing']
-- ['bookings', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_10001
) subq_23
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_23.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_23.ds__day
, listings_latest_src_10004.country
) subq_30
INNER JOIN (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['views', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_33.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_33.views) AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
subq_23.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_23.bookings) AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements:
-- ['bookings', 'ds__day', 'listing']
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_10001
) subq_23
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_23.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_23.ds__day
, listings_latest_src_10004.country
) subq_30
FULL OUTER JOIN (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['views', 'ds__day', 'listing']
-- ['views', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_10009
) subq_33
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
subq_33.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_33.views) AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements:
-- ['views', 'ds__day', 'listing']
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_10009
) subq_33
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_33.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_33.ds__day
, listings_latest_src_10004.country
) subq_40
ON
subq_33.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_33.ds__day
, listings_latest_src_10004.country
) subq_40
ON
(
(
subq_30.listing__country_latest = subq_40.listing__country_latest
) OR (
(
subq_30.listing__country_latest IS NULL
) AND (
subq_40.listing__country_latest IS NULL
)
)
) AND (
(
) AND (
subq_30.ds__day = subq_40.ds__day
) OR (
(subq_30.ds__day IS NULL) AND (subq_40.ds__day IS NULL)
)
)
GROUP BY
COALESCE(subq_30.ds__day, subq_40.ds__day)
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest)
) subq_41
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ FROM (
-- Combine Metrics
SELECT
COALESCE(subq_7.metric_time__day, subq_15.metric_time__day) AS metric_time__day
, subq_7.bookings_fill_nulls_with_0 AS bookings_fill_nulls_with_0
, subq_15.bookings_2_weeks_ago AS bookings_2_weeks_ago
, MAX(subq_7.bookings_fill_nulls_with_0) AS bookings_fill_nulls_with_0
, MAX(subq_15.bookings_2_weeks_ago) AS bookings_2_weeks_ago
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -238,7 +238,7 @@ FROM (
subq_4.metric_time__day = subq_3.metric_time__day
) subq_6
) subq_7
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_14.metric_time__day
Expand Down Expand Up @@ -555,13 +555,7 @@ FROM (
) subq_14
) subq_15
ON
(
subq_7.metric_time__day = subq_15.metric_time__day
) OR (
(
subq_7.metric_time__day IS NULL
) AND (
subq_15.metric_time__day IS NULL
)
)
subq_7.metric_time__day = subq_15.metric_time__day
GROUP BY
COALESCE(subq_7.metric_time__day, subq_15.metric_time__day)
) subq_16
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ FROM (
-- Combine Metrics
SELECT
COALESCE(subq_24.metric_time__day, subq_32.metric_time__day) AS metric_time__day
, subq_24.bookings_fill_nulls_with_0 AS bookings_fill_nulls_with_0
, subq_32.bookings_2_weeks_ago AS bookings_2_weeks_ago
, MAX(subq_24.bookings_fill_nulls_with_0) AS bookings_fill_nulls_with_0
, MAX(subq_32.bookings_2_weeks_ago) AS bookings_2_weeks_ago
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -41,7 +41,7 @@ FROM (
subq_22.ds = subq_20.metric_time__day
) subq_23
) subq_24
INNER JOIN (
FULL OUTER JOIN (
-- Join to Time Spine Dataset
-- Pass Only Elements:
-- ['bookings', 'metric_time__day']
Expand All @@ -65,13 +65,7 @@ FROM (
subq_28.ds
) subq_32
ON
(
subq_24.metric_time__day = subq_32.metric_time__day
) OR (
(
subq_24.metric_time__day IS NULL
) AND (
subq_32.metric_time__day IS NULL
)
)
subq_24.metric_time__day = subq_32.metric_time__day
GROUP BY
COALESCE(subq_24.metric_time__day, subq_32.metric_time__day)
) subq_33
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ FROM (
-- Combine Metrics
SELECT
COALESCE(subq_4.metric_time__day, subq_9.metric_time__day) AS metric_time__day
, subq_4.ref_bookings AS ref_bookings
, subq_9.bookings AS bookings
, MAX(subq_4.ref_bookings) AS ref_bookings
, MAX(subq_9.bookings) AS bookings
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -224,7 +224,7 @@ FROM (
subq_2.metric_time__day
) subq_3
) subq_4
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_8.metric_time__day
Expand Down Expand Up @@ -441,13 +441,7 @@ FROM (
) subq_8
) subq_9
ON
(
subq_4.metric_time__day = subq_9.metric_time__day
) OR (
(
subq_4.metric_time__day IS NULL
) AND (
subq_9.metric_time__day IS NULL
)
)
subq_4.metric_time__day = subq_9.metric_time__day
GROUP BY
COALESCE(subq_4.metric_time__day, subq_9.metric_time__day)
) subq_10
Loading

0 comments on commit a32a3df

Please sign in to comment.