Skip to content

Commit

Permalink
Don't add measure-level filters to post-agg filters (#1452)
Browse files Browse the repository at this point in the history
## Context

We should respect filter hierarchy and have,
- measure-level filters should be applied only pre-aggregation
- metric-level filters and query-level filters should be applied pre AND
post-aggregation

Meaning, measure-level filters should not be applied post-aggregation.

### Refactor
Changed `filter_specs` from being a tuple of filter specs all mashed
together to a `WhereFilterSpecSet` where it stores which levels each of
the filters were defined at. This was changed inside `MetricSpec` and
`MetricInputMeasureSpec`.

Resolves SL-2956
  • Loading branch information
WilliamDee authored Oct 15, 2024
1 parent 15a6ae7 commit 022c88b
Show file tree
Hide file tree
Showing 96 changed files with 4,513 additions and 1,896 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Optional, Tuple
from typing import Optional

from dbt_semantic_interfaces.dataclass_serialization import SerializableDataclass
from dbt_semantic_interfaces.protocols import MetricTimeWindow
Expand All @@ -10,7 +10,7 @@

from metricflow_semantics.specs.instance_spec import InstanceSpec, InstanceSpecVisitor
from metricflow_semantics.specs.non_additive_dimension_spec import NonAdditiveDimensionSpec
from metricflow_semantics.specs.where_filter.where_filter_spec import WhereFilterSpec
from metricflow_semantics.specs.where_filter.where_filter_spec_set import WhereFilterSpecSet
from metricflow_semantics.sql.sql_join_type import SqlJoinType
from metricflow_semantics.visitor import VisitorOutputT

Expand Down Expand Up @@ -62,7 +62,7 @@ class MetricInputMeasureSpec(SerializableDataclass):
offset_window: Optional[MetricTimeWindow] = None
offset_to_grain: Optional[TimeGranularity] = None
cumulative_description: Optional[CumulativeMeasureDescription] = None
filter_specs: Tuple[WhereFilterSpec, ...] = ()
filter_spec_set: WhereFilterSpecSet = WhereFilterSpecSet()
alias: Optional[str] = None
before_aggregation_time_spine_join_description: Optional[JoinToTimeSpineDescription] = None
after_aggregation_time_spine_join_description: Optional[JoinToTimeSpineDescription] = None
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Optional, Tuple
from typing import Optional

from dbt_semantic_interfaces.implementations.metric import PydanticMetricTimeWindow
from dbt_semantic_interfaces.references import MetricReference
from dbt_semantic_interfaces.type_enums import TimeGranularity

from metricflow_semantics.specs.instance_spec import InstanceSpec, InstanceSpecVisitor
from metricflow_semantics.specs.where_filter.where_filter_spec import WhereFilterSpec
from metricflow_semantics.specs.where_filter.where_filter_spec_set import WhereFilterSpecSet
from metricflow_semantics.visitor import VisitorOutputT


@dataclass(frozen=True)
class MetricSpec(InstanceSpec): # noqa: D101
# Time-over-time could go here
element_name: str
filter_specs: Tuple[WhereFilterSpec, ...] = ()
filter_spec_set: WhereFilterSpecSet = WhereFilterSpecSet()
alias: Optional[str] = None
offset_window: Optional[PydanticMetricTimeWindow] = None
offset_to_grain: Optional[TimeGranularity] = None
Expand Down Expand Up @@ -48,4 +48,4 @@ def has_time_offset(self) -> bool: # noqa: D102

def without_offset(self) -> MetricSpec:
"""Represents the metric spec with any time offsets removed."""
return MetricSpec(element_name=self.element_name, filter_specs=self.filter_specs, alias=self.alias)
return MetricSpec(element_name=self.element_name, filter_spec_set=self.filter_spec_set, alias=self.alias)
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Tuple

from dbt_semantic_interfaces.dataclass_serialization import SerializableDataclass

from metricflow_semantics.specs.where_filter.where_filter_spec import WhereFilterSpec


@dataclass(frozen=True)
class WhereFilterSpecSet(SerializableDataclass):
"""Class to encapsulate filters needed at a certain point of the queried metric.
This class splits up the filters based on where it was defined at, which can then be used to
determine where each filter should be applied during each step of the metric building process.
measure-level: filters defined on the input_measure
metric-level: filters defined on the input_metric or metric:filter
query-level: filters defined at query time
"""

measure_level_filter_specs: Tuple[WhereFilterSpec, ...] = ()
metric_level_filter_specs: Tuple[WhereFilterSpec, ...] = ()
query_level_filter_specs: Tuple[WhereFilterSpec, ...] = ()

@property
def after_measure_aggregation_filter_specs(self) -> Tuple[WhereFilterSpec, ...]:
"""Returns filters relevant to post-measure aggregation."""
return self.metric_level_filter_specs + self.query_level_filter_specs

@property
def all_filter_specs(self) -> Tuple[WhereFilterSpec, ...]:
"""Returns all the filters in this class.
Generally, before measure aggregation, all filters should be applied.
"""
return self.measure_level_filter_specs + self.metric_level_filter_specs + self.query_level_filter_specs

def merge(self, other: WhereFilterSpecSet) -> WhereFilterSpecSet:
"""Merge 2 WhereFilterSpecSet together."""
return WhereFilterSpecSet(
measure_level_filter_specs=self.measure_level_filter_specs + other.measure_level_filter_specs,
metric_level_filter_specs=self.metric_level_filter_specs + other.metric_level_filter_specs,
query_level_filter_specs=self.query_level_filter_specs + other.query_level_filter_specs,
)
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,17 @@ metric:
fill_nulls_with: 0
time_granularity: week
---
metric:
name: instant_bookings_with_measure_filter
description: simple metric joining to time spine with measure filter
type: simple
type_params:
measure:
name: bookings
join_to_timespine: true
filter: "{{ Dimension('booking__is_instant') }}"
filter: "{{ Entity('listing') }} IS NOT NULL"
---
metric:
name: every_two_days_bookers_fill_nulls_with_0
description: cumulative metric filling 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
'listing__booking__listing__instant_booking_value',
'listing__booking__listing__instant_booking_value_ratio',
'listing__booking__listing__instant_bookings',
'listing__booking__listing__instant_bookings_with_measure_filter',
'listing__booking__listing__instant_lux_booking_value_rate',
'listing__booking__listing__instant_plus_non_referred_bookings_pct',
'listing__booking__listing__lux_booking_fraction_of_max_value',
Expand Down Expand Up @@ -114,6 +115,7 @@
'listing__instant_booking_value',
'listing__instant_booking_value_ratio',
'listing__instant_bookings',
'listing__instant_bookings_with_measure_filter',
'listing__instant_lux_booking_value_rate',
'listing__instant_plus_non_referred_bookings_pct',
'listing__is_lux_latest',
Expand Down Expand Up @@ -441,6 +443,7 @@
'user__listing__user__instant_booking_value',
'user__listing__user__instant_booking_value_ratio',
'user__listing__user__instant_bookings',
'user__listing__user__instant_bookings_with_measure_filter',
'user__listing__user__instant_lux_booking_value_rate',
'user__listing__user__instant_plus_non_referred_bookings_pct',
'user__listing__user__largest_listing',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('booking', 'listing')") instant_booking_value ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") instant_booking_value_ratio ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") instant_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") instant_bookings_with_measure_filter ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") instant_lux_booking_value_rate ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") instant_plus_non_referred_bookings_pct ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") lux_booking_fraction_of_max_value ['JOINED', 'METRIC']
Expand Down Expand Up @@ -97,6 +98,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('listing',)") instant_booking_value ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") instant_booking_value_ratio ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") instant_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") instant_bookings_with_measure_filter ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") instant_lux_booking_value_rate ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") instant_plus_non_referred_bookings_pct ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") largest_listing ['JOINED', 'METRIC']
Expand Down Expand Up @@ -157,6 +159,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('listing', 'user')") instant_booking_value ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") instant_booking_value_ratio ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") instant_bookings ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") instant_bookings_with_measure_filter ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") instant_lux_booking_value_rate ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") instant_plus_non_referred_bookings_pct ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") largest_listing ['JOINED', 'METRIC']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
'company__listing__user__company__instant_booking_value',
'company__listing__user__company__instant_booking_value_ratio',
'company__listing__user__company__instant_bookings',
'company__listing__user__company__instant_bookings_with_measure_filter',
'company__listing__user__company__instant_lux_booking_value_rate',
'company__listing__user__company__instant_plus_non_referred_bookings_pct',
'company__listing__user__company__lux_booking_fraction_of_max_value',
Expand Down Expand Up @@ -148,6 +149,7 @@
'guest__booking__guest__instant_booking_value',
'guest__booking__guest__instant_booking_value_ratio',
'guest__booking__guest__instant_bookings',
'guest__booking__guest__instant_bookings_with_measure_filter',
'guest__booking__guest__instant_lux_booking_value_rate',
'guest__booking__guest__instant_plus_non_referred_bookings_pct',
'guest__booking__guest__lux_booking_fraction_of_max_value',
Expand Down Expand Up @@ -181,6 +183,7 @@
'guest__instant_booking_value',
'guest__instant_booking_value_ratio',
'guest__instant_bookings',
'guest__instant_bookings_with_measure_filter',
'guest__instant_lux_booking_value_rate',
'guest__instant_plus_non_referred_bookings_pct',
'guest__lux_booking_fraction_of_max_value',
Expand Down Expand Up @@ -225,6 +228,7 @@
'host__booking__host__instant_booking_value',
'host__booking__host__instant_booking_value_ratio',
'host__booking__host__instant_bookings',
'host__booking__host__instant_bookings_with_measure_filter',
'host__booking__host__instant_lux_booking_value_rate',
'host__booking__host__instant_plus_non_referred_bookings_pct',
'host__booking__host__lux_booking_fraction_of_max_value',
Expand Down Expand Up @@ -258,6 +262,7 @@
'host__instant_booking_value',
'host__instant_booking_value_ratio',
'host__instant_bookings',
'host__instant_bookings_with_measure_filter',
'host__instant_lux_booking_value_rate',
'host__instant_plus_non_referred_bookings_pct',
'host__lux_booking_fraction_of_max_value',
Expand Down Expand Up @@ -303,6 +308,7 @@
'listing__booking__listing__instant_booking_value',
'listing__booking__listing__instant_booking_value_ratio',
'listing__booking__listing__instant_bookings',
'listing__booking__listing__instant_bookings_with_measure_filter',
'listing__booking__listing__instant_lux_booking_value_rate',
'listing__booking__listing__instant_plus_non_referred_bookings_pct',
'listing__booking__listing__lux_booking_fraction_of_max_value',
Expand Down Expand Up @@ -356,6 +362,7 @@
'listing__instant_booking_value',
'listing__instant_booking_value_ratio',
'listing__instant_bookings',
'listing__instant_bookings_with_measure_filter',
'listing__instant_lux_booking_value_rate',
'listing__instant_plus_non_referred_bookings_pct',
'listing__is_lux_latest',
Expand Down Expand Up @@ -411,6 +418,7 @@
'lux_listing__listing__lux_listing__instant_booking_value',
'lux_listing__listing__lux_listing__instant_booking_value_ratio',
'lux_listing__listing__lux_listing__instant_bookings',
'lux_listing__listing__lux_listing__instant_bookings_with_measure_filter',
'lux_listing__listing__lux_listing__instant_lux_booking_value_rate',
'lux_listing__listing__lux_listing__instant_plus_non_referred_bookings_pct',
'lux_listing__listing__lux_listing__largest_listing',
Expand Down Expand Up @@ -551,6 +559,7 @@
'user__listing__user__instant_booking_value',
'user__listing__user__instant_booking_value_ratio',
'user__listing__user__instant_bookings',
'user__listing__user__instant_bookings_with_measure_filter',
'user__listing__user__instant_lux_booking_value_rate',
'user__listing__user__instant_plus_non_referred_bookings_pct',
'user__listing__user__largest_listing',
Expand Down
116 changes: 116 additions & 0 deletions metricflow-semantics/tests_metricflow_semantics/test_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@
from metricflow_semantics.specs.entity_spec import EntitySpec, LinklessEntitySpec
from metricflow_semantics.specs.group_by_metric_spec import GroupByMetricSpec
from metricflow_semantics.specs.instance_spec import InstanceSpec, LinkableInstanceSpec
from metricflow_semantics.specs.linkable_spec_set import LinkableSpecSet
from metricflow_semantics.specs.measure_spec import MeasureSpec
from metricflow_semantics.specs.metric_spec import MetricSpec
from metricflow_semantics.specs.spec_set import InstanceSpecSet
from metricflow_semantics.specs.time_dimension_spec import TimeDimensionSpec
from metricflow_semantics.specs.where_filter.where_filter_spec import WhereFilterSpec
from metricflow_semantics.specs.where_filter.where_filter_spec_set import WhereFilterSpecSet
from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters
from metricflow_semantics.time.granularity import ExpandedTimeGranularity


Expand Down Expand Up @@ -212,3 +216,115 @@ def test_linkless_entity() -> None:

set_with_entity_spec.add(linkless_entity_spec)
assert len(set_with_entity_spec) == 1


@pytest.fixture
def where_filter_spec_set() -> WhereFilterSpecSet: # noqa: D103
return WhereFilterSpecSet(
measure_level_filter_specs=(
WhereFilterSpec(
where_sql="measure is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
),
metric_level_filter_specs=(
WhereFilterSpec(
where_sql="metric is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
),
query_level_filter_specs=(
WhereFilterSpec(
where_sql="query is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
),
)


def test_where_filter_spec_set_all_specs(where_filter_spec_set: WhereFilterSpecSet) -> None: # noqa: D103
assert set(where_filter_spec_set.all_filter_specs) == {
WhereFilterSpec(
where_sql="measure is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
WhereFilterSpec(
where_sql="metric is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
WhereFilterSpec(
where_sql="query is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
}


def test_where_filter_spec_set_post_aggregation_specs(where_filter_spec_set: WhereFilterSpecSet) -> None: # noqa: D103
assert set(where_filter_spec_set.after_measure_aggregation_filter_specs) == {
WhereFilterSpec(
where_sql="metric is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
WhereFilterSpec(
where_sql="query is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
}


def test_where_filter_spec_set_merge(where_filter_spec_set: WhereFilterSpecSet) -> None: # noqa: D103
spec_set1 = WhereFilterSpecSet(
measure_level_filter_specs=(
WhereFilterSpec(
where_sql="measure is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
),
)
spec_set2 = WhereFilterSpecSet(
metric_level_filter_specs=(
WhereFilterSpec(
where_sql="metric is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
),
)

assert spec_set1.merge(spec_set2) == WhereFilterSpecSet(
measure_level_filter_specs=(
WhereFilterSpec(
where_sql="measure is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
),
metric_level_filter_specs=(
WhereFilterSpec(
where_sql="metric is true",
bind_parameters=SqlBindParameters(),
linkable_element_unions=(),
linkable_spec_set=LinkableSpecSet(),
),
),
)
Loading

0 comments on commit 022c88b

Please sign in to comment.