Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Appropriate handling for group by source nodes in ComputeMetricsBranchCombiner #1212

Merged
merged 4 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -761,3 +761,14 @@ metric:
type_params:
measure: listings
filter: "{{ Metric('bookings', ['listing']) }} > 2"
---
metric:
name: popular_listing_bookings_per_booker
description: bookings per booker for listings with at least 10 views
type: ratio
type_params:
numerator:
name: listings
denominator:
name: listings
filter: "{{ Metric('views', ['listing']) }} > 10"
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
'listing__min_booking_value',
'listing__nested_fill_nulls_without_time_spine',
'listing__non_referred_bookings_pct',
'listing__popular_listing_bookings_per_booker',
'listing__referred_bookings',
'listing__smallest_listing',
'listing__twice_bookings_fill_nulls_with_0_without_time_spine',
Expand Down Expand Up @@ -289,13 +290,15 @@
'user__listing__user__min_booking_value',
'user__listing__user__nested_fill_nulls_without_time_spine',
'user__listing__user__non_referred_bookings_pct',
'user__listing__user__popular_listing_bookings_per_booker',
'user__listing__user__referred_bookings',
'user__listing__user__smallest_listing',
'user__listing__user__twice_bookings_fill_nulls_with_0_without_time_spine',
'user__listing__user__views',
'user__listing__user__views_times_booking_value',
'user__listings',
'user__lux_listings',
'user__popular_listing_bookings_per_booker',
'user__regional_starting_balance_ratios',
'user__revenue',
'user__revenue_all_time',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('listing',)") min_booking_value ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") nested_fill_nulls_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") non_referred_bookings_pct ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") popular_listing_bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") referred_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") smallest_listing ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") twice_bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
Expand Down Expand Up @@ -166,6 +167,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('listing', 'user')") min_booking_value ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") nested_fill_nulls_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") non_referred_bookings_pct ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") popular_listing_bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") referred_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") smallest_listing ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") twice_bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
Expand All @@ -180,6 +182,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('user',)") largest_listing ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") listings ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") lux_listings ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") popular_listing_bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") regional_starting_balance_ratios ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") revenue ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") revenue_all_time ['JOINED', 'METRIC']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
'company__user__company__largest_listing',
'company__user__company__listings',
'company__user__company__lux_listings',
'company__user__company__popular_listing_bookings_per_booker',
'company__user__company__regional_starting_balance_ratios',
'company__user__company__revenue',
'company__user__company__revenue_all_time',
Expand Down Expand Up @@ -359,6 +360,7 @@
'listing__min_booking_value',
'listing__nested_fill_nulls_without_time_spine',
'listing__non_referred_bookings_pct',
'listing__popular_listing_bookings_per_booker',
'listing__referred_bookings',
'listing__smallest_listing',
'listing__twice_bookings_fill_nulls_with_0_without_time_spine',
Expand Down Expand Up @@ -411,6 +413,7 @@
'lux_listing__listing__lux_listing__min_booking_value',
'lux_listing__listing__lux_listing__nested_fill_nulls_without_time_spine',
'lux_listing__listing__lux_listing__non_referred_bookings_pct',
'lux_listing__listing__lux_listing__popular_listing_bookings_per_booker',
'lux_listing__listing__lux_listing__referred_bookings',
'lux_listing__listing__lux_listing__smallest_listing',
'lux_listing__listing__lux_listing__twice_bookings_fill_nulls_with_0_without_time_spine',
Expand Down Expand Up @@ -521,13 +524,15 @@
'user__listing__user__min_booking_value',
'user__listing__user__nested_fill_nulls_without_time_spine',
'user__listing__user__non_referred_bookings_pct',
'user__listing__user__popular_listing_bookings_per_booker',
'user__listing__user__referred_bookings',
'user__listing__user__smallest_listing',
'user__listing__user__twice_bookings_fill_nulls_with_0_without_time_spine',
'user__listing__user__views',
'user__listing__user__views_times_booking_value',
'user__listings',
'user__lux_listings',
'user__popular_listing_bookings_per_booker',
'user__regional_starting_balance_ratios',
'user__revenue',
'user__revenue_all_time',
Expand Down
17 changes: 16 additions & 1 deletion metricflow/dataflow/nodes/compute_metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Sequence, Set
from typing import Sequence, Set, Tuple

from metricflow_semantics.dag.id_prefix import IdPrefix, StaticIdPrefix
from metricflow_semantics.dag.mf_dag import DisplayedProperty
Expand Down Expand Up @@ -82,9 +82,24 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa:
return (
isinstance(other_node, self.__class__)
and other_node.metric_specs == self.metric_specs
and other_node.aggregated_to_elements == self.aggregated_to_elements
and other_node.for_group_by_source_node == self.for_group_by_source_node
)

def can_combine(self, other_node: ComputeMetricsNode) -> Tuple[bool, str]:
"""Check certain node attributes against another node to determine if the two can be combined.

Return a bool and a string reason for the failure to combine (if applicable) to be used in logging for
ComputeMetricsBranchCombiner.
"""
if not other_node.aggregated_to_elements == self.aggregated_to_elements:
return False, "nodes are aggregated to different elements"

if other_node.for_group_by_source_node != self.for_group_by_source_node:
return False, "one node is a group by metric source node"

return True, ""

def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> ComputeMetricsNode: # noqa: D102
assert len(new_parent_nodes) == 1
return ComputeMetricsNode(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,11 +285,12 @@ def visit_compute_metrics_node(self, node: ComputeMetricsNode) -> ComputeMetrics
)
return ComputeMetricsBranchCombinerResult()

if not self._current_left_node.aggregated_to_elements == current_right_node.aggregated_to_elements:
can_combine, combine_failure_reason = self._current_left_node.can_combine(current_right_node)
if not can_combine:
self._log_combine_failure(
left_node=self._current_left_node,
right_node=current_right_node,
combine_failure_reason="nodes are aggregated to different elements",
combine_failure_reason=combine_failure_reason,
)
return ComputeMetricsBranchCombinerResult()

Expand All @@ -308,6 +309,7 @@ def visit_compute_metrics_node(self, node: ComputeMetricsNode) -> ComputeMetrics
parent_node=combined_parent_node,
metric_specs=unique_metric_specs,
aggregated_to_elements=current_right_node.aggregated_to_elements,
for_group_by_source_node=current_right_node.for_group_by_source_node,
)
self._log_combine_success(
left_node=self._current_left_node,
Expand Down
19 changes: 19 additions & 0 deletions tests_metricflow/integration/test_cases/itest_metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2067,3 +2067,22 @@ integration_test:
) subq
ON l.listing_id = subq.listing_id
WHERE view__listing__views > 2
---
integration_test:
name: test_filter_defined_on_ratio_metric
description: Query a ratio metric with a metric-level filter
model: SIMPLE_MODEL
metrics: ["popular_listing_bookings_per_booker"]
check_query: |
SELECT
CAST(SUM(1) AS {{ double_data_type_name }}) / CAST(NULLIF(SUM(1), 0) AS {{ double_data_type_name }}) AS popular_listing_bookings_per_booker
FROM {{ source_schema }}.dim_listings_latest l
LEFT OUTER JOIN (
SELECT
listing_id
, SUM(1) AS views
FROM {{ source_schema }}.fct_views v
GROUP BY listing_id
) metric_subquery
ON l.listing_id = metric_subquery.listing_id
WHERE views > 10
Loading