Skip to content

Commit

Permalink
Bug fix: Apply metric_time filters to time spine when needed (#1455)
Browse files Browse the repository at this point in the history
There is currently a bug where, if you query a join_to_timespine metric
with a metric_time filter that is not applied in the group by, you will
end up with more rows than expected.
To fix that, we need to apply any metric_time filters to the time spine
table before joining that table to the aggregated measure. This needs to
happen before the join instead of after because after the join we may
not have access to metric_time at the grain needed for the filter. This
implements that behavior.
  • Loading branch information
courtneyholcomb authored Oct 16, 2024
1 parent ae82e8f commit d56c603
Show file tree
Hide file tree
Showing 138 changed files with 12,483 additions and 2,319 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20241009-171939.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: 'Bug fix: when querying a join_to_timespine metric with a metric_time filter
that is not included in the group by, unexpected output rows were included.'
time: 2024-10-09T17:19:39.244779-07:00
custom:
Author: courtneyholcomb
Issue: "1450"
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,17 @@ metric:
filter: "{{ Dimension('booking__is_instant') }}"
filter: "{{ Entity('listing') }} IS NOT NULL"
---
metric:
name: bookings_join_to_time_spine_with_tiered_filters
description: simple metric that joins to timespine and has metric_time filters at both the measure and metric level
type: simple
type_params:
measure:
name: bookings
join_to_timespine: true
filter: "{{ TimeDimension('metric_time', 'day') }} >= '2020-01-02'"
filter: "{{ TimeDimension('metric_time', 'day') }} <= '2020-01-02'"
---
metric:
name: every_two_days_bookers_fill_nulls_with_0
description: cumulative metric filling 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,12 @@ metric:
measure:
name: archived_users
time_granularity: hour
---
metric:
name: archived_users_join_to_time_spine
description: subdaily metric joining to time spine
type: simple
type_params:
measure:
name: archived_users
join_to_timespine: true
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
'listing__booking__listing__bookings_fill_nulls_with_0',
'listing__booking__listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__booking__listing__bookings_join_to_time_spine',
'listing__booking__listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__booking__listing__bookings_per_booker',
'listing__booking__listing__bookings_per_dollar',
'listing__booking__listing__derived_bookings_0',
Expand Down Expand Up @@ -58,6 +59,7 @@
'listing__bookings_fill_nulls_with_0',
'listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__bookings_join_to_time_spine',
'listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__bookings_per_booker',
'listing__bookings_per_dollar',
'listing__bookings_per_listing',
Expand Down Expand Up @@ -194,6 +196,7 @@
'user__archived_at__week',
'user__archived_at__year',
'user__archived_users',
'user__archived_users_join_to_time_spine',
'user__bio_added_ts__day',
'user__bio_added_ts__extract_day',
'user__bio_added_ts__extract_day',
Expand Down Expand Up @@ -430,6 +433,7 @@
'user__listing__user__bookings_fill_nulls_with_0',
'user__listing__user__bookings_fill_nulls_with_0_without_time_spine',
'user__listing__user__bookings_join_to_time_spine',
'user__listing__user__bookings_join_to_time_spine_with_tiered_filters',
'user__listing__user__bookings_per_booker',
'user__listing__user__bookings_per_dollar',
'user__listing__user__bookings_per_listing',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_fill_nulls_with_0 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_join_to_time_spine_with_tiered_filters ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_per_dollar ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") derived_bookings_0 ['JOINED', 'METRIC']
Expand Down Expand Up @@ -82,6 +83,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('listing',)") bookings_fill_nulls_with_0 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_join_to_time_spine_with_tiered_filters ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_per_dollar ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_per_listing ['JOINED', 'METRIC']
Expand Down Expand Up @@ -143,6 +145,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_fill_nulls_with_0 ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_join_to_time_spine_with_tiered_filters ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_per_dollar ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_per_listing ['JOINED', 'METRIC']
Expand Down Expand Up @@ -183,6 +186,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('revenue_instance', 'user')") revenue_all_time ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") active_listings ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") archived_users ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") archived_users_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") current_account_balance_by_user ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") identity_verifications ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") largest_listing ['JOINED', 'METRIC']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
'company__listing__user__company__bookings_fill_nulls_with_0',
'company__listing__user__company__bookings_fill_nulls_with_0_without_time_spine',
'company__listing__user__company__bookings_join_to_time_spine',
'company__listing__user__company__bookings_join_to_time_spine_with_tiered_filters',
'company__listing__user__company__bookings_per_booker',
'company__listing__user__company__bookings_per_dollar',
'company__listing__user__company__bookings_per_view',
Expand Down Expand Up @@ -94,6 +95,7 @@
'company__user',
'company__user__company__active_listings',
'company__user__company__archived_users',
'company__user__company__archived_users_join_to_time_spine',
'company__user__company__current_account_balance_by_user',
'company__user__company__identity_verifications',
'company__user__company__largest_listing',
Expand Down Expand Up @@ -139,6 +141,7 @@
'guest__booking__guest__bookings_fill_nulls_with_0',
'guest__booking__guest__bookings_fill_nulls_with_0_without_time_spine',
'guest__booking__guest__bookings_join_to_time_spine',
'guest__booking__guest__bookings_join_to_time_spine_with_tiered_filters',
'guest__booking__guest__bookings_per_booker',
'guest__booking__guest__bookings_per_dollar',
'guest__booking__guest__derived_bookings_0',
Expand Down Expand Up @@ -173,6 +176,7 @@
'guest__bookings_fill_nulls_with_0',
'guest__bookings_fill_nulls_with_0_without_time_spine',
'guest__bookings_join_to_time_spine',
'guest__bookings_join_to_time_spine_with_tiered_filters',
'guest__bookings_per_booker',
'guest__bookings_per_dollar',
'guest__derived_bookings_0',
Expand Down Expand Up @@ -218,6 +222,7 @@
'host__booking__host__bookings_fill_nulls_with_0',
'host__booking__host__bookings_fill_nulls_with_0_without_time_spine',
'host__booking__host__bookings_join_to_time_spine',
'host__booking__host__bookings_join_to_time_spine_with_tiered_filters',
'host__booking__host__bookings_per_booker',
'host__booking__host__bookings_per_dollar',
'host__booking__host__derived_bookings_0',
Expand Down Expand Up @@ -252,6 +257,7 @@
'host__bookings_fill_nulls_with_0',
'host__bookings_fill_nulls_with_0_without_time_spine',
'host__bookings_join_to_time_spine',
'host__bookings_join_to_time_spine_with_tiered_filters',
'host__bookings_per_booker',
'host__bookings_per_dollar',
'host__derived_bookings_0',
Expand Down Expand Up @@ -298,6 +304,7 @@
'listing__booking__listing__bookings_fill_nulls_with_0',
'listing__booking__listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__booking__listing__bookings_join_to_time_spine',
'listing__booking__listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__booking__listing__bookings_per_booker',
'listing__booking__listing__bookings_per_dollar',
'listing__booking__listing__derived_bookings_0',
Expand Down Expand Up @@ -333,6 +340,7 @@
'listing__bookings_fill_nulls_with_0',
'listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__bookings_join_to_time_spine',
'listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__bookings_per_booker',
'listing__bookings_per_dollar',
'listing__bookings_per_listing',
Expand Down Expand Up @@ -405,6 +413,7 @@
'lux_listing__listing__lux_listing__bookings_fill_nulls_with_0',
'lux_listing__listing__lux_listing__bookings_fill_nulls_with_0_without_time_spine',
'lux_listing__listing__lux_listing__bookings_join_to_time_spine',
'lux_listing__listing__lux_listing__bookings_join_to_time_spine_with_tiered_filters',
'lux_listing__listing__lux_listing__bookings_per_booker',
'lux_listing__listing__lux_listing__bookings_per_dollar',
'lux_listing__listing__lux_listing__bookings_per_listing',
Expand Down Expand Up @@ -471,6 +480,7 @@
'user__archived_at__extract_year',
'user__archived_at__hour',
'user__archived_users',
'user__archived_users_join_to_time_spine',
'user__bio_added_ts__extract_day',
'user__bio_added_ts__extract_dow',
'user__bio_added_ts__extract_doy',
Expand Down Expand Up @@ -546,6 +556,7 @@
'user__listing__user__bookings_fill_nulls_with_0',
'user__listing__user__bookings_fill_nulls_with_0_without_time_spine',
'user__listing__user__bookings_join_to_time_spine',
'user__listing__user__bookings_join_to_time_spine_with_tiered_filters',
'user__listing__user__bookings_per_booker',
'user__listing__user__bookings_per_dollar',
'user__listing__user__bookings_per_listing',
Expand Down
30 changes: 23 additions & 7 deletions metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1738,7 +1738,7 @@ def _build_aggregated_measure_from_measure_source_node(
metric_input_measure_specs=(metric_input_measure_spec,),
)

# Joining to time spine after aggregation is for measures that specify `join_to_timespine` in the YAML spec.
# Joining to time spine after aggregation is for measures that specify `join_to_timespine: true` in the YAML spec.
after_aggregation_time_spine_join_description = (
metric_input_measure_spec.after_aggregation_time_spine_join_description
)
Expand All @@ -1747,6 +1747,20 @@ def _build_aggregated_measure_from_measure_source_node(
f"Expected {SqlJoinType.LEFT_OUTER} for joining to time spine after aggregation. Remove this if "
f"there's a new use case."
)
# Find filters that contain only metric_time or agg_time_dimension. They will be applied to the time spine table.
agg_time_only_filters: List[WhereFilterSpec] = []
non_agg_time_filters: List[WhereFilterSpec] = []
for filter_spec in metric_input_measure_spec.filter_spec_set.after_measure_aggregation_filter_specs:
included_agg_time_specs = filter_spec.linkable_spec_set.included_agg_time_dimension_specs_for_measure(
measure_reference=measure_spec.reference, semantic_model_lookup=self._semantic_model_lookup
)
if set(included_agg_time_specs) == set(filter_spec.linkable_spec_set.as_tuple):
agg_time_only_filters.append(filter_spec)
else:
non_agg_time_filters.append(filter_spec)

# TODO: split this node into TimeSpineSourceNode and JoinToTimeSpineNode - then can use standard nodes here
# like JoinToCustomGranularityNode, WhereConstraintNode, etc.
output_node: DataflowPlanNode = JoinToTimeSpineNode.create(
parent_node=aggregate_measures_node,
requested_agg_time_dimension_specs=queried_agg_time_dimension_specs,
Expand All @@ -1755,19 +1769,21 @@ def _build_aggregated_measure_from_measure_source_node(
time_range_constraint=predicate_pushdown_state.time_range_constraint,
offset_window=after_aggregation_time_spine_join_description.offset_window,
offset_to_grain=after_aggregation_time_spine_join_description.offset_to_grain,
time_spine_filters=agg_time_only_filters,
)

# Since new rows might have been added due to time spine join, re-apply constraints here. Only re-apply filters
# for specs that are also in the queried specs, since those are the only ones that might have changed after the
# time spine join.
queried_filter_specs = [
# for specs that are also in the queried specs, since those are the only ones that could change after the time
# spine join. Exclude filters that contain only metric_time or an agg time dimension, since were already applied
# to the time spine table.
queried_non_agg_time_filter_specs = [
filter_spec
for filter_spec in metric_input_measure_spec.filter_spec_set.after_measure_aggregation_filter_specs
for filter_spec in non_agg_time_filters
if set(filter_spec.linkable_specs).issubset(set(queried_linkable_specs.as_tuple))
]
if len(queried_filter_specs) > 0:
if len(queried_non_agg_time_filter_specs) > 0:
output_node = WhereConstraintNode.create(
parent_node=output_node, where_specs=queried_filter_specs, always_apply=True
parent_node=output_node, where_specs=queried_non_agg_time_filter_specs, always_apply=True
)

# TODO: this will break if you query by agg_time_dimension but apply a time constraint on metric_time.
Expand Down
Loading

0 comments on commit d56c603

Please sign in to comment.