Skip to content

Commit

Permalink
/* PR_START p--short-term-perf 16 */ Consolidate property filters int…
Browse files Browse the repository at this point in the history
…o a class.
  • Loading branch information
plypaul committed Oct 2, 2024
1 parent 7659965 commit cf2796e
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 86 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import FrozenSet

from typing_extensions import Self, override

from metricflow_semantics.collection_helpers.merger import Mergeable
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty


@dataclass(frozen=True)
class LinkableElementFilter(Mergeable):
"""Describes a way to filter the `LinkableElements` in a `LinkableElementSet`."""

with_any_of: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties()
without_any_of: FrozenSet[LinkableElementProperty] = frozenset()
without_all_of: FrozenSet[LinkableElementProperty] = frozenset()

@override
def merge(self: Self, other: LinkableElementFilter) -> LinkableElementFilter:
return LinkableElementFilter(
with_any_of=self.with_any_of.union(other.with_any_of),
without_any_of=self.without_any_of.union(other.without_any_of),
without_all_of=self.without_all_of.union(other.without_all_of),
)

@classmethod
@override
def empty_instance(cls) -> LinkableElementFilter:
return LinkableElementFilter()
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from collections import defaultdict
from dataclasses import dataclass, field
from functools import cached_property
from typing import Dict, FrozenSet, List, Sequence, Set, Tuple
from typing import Dict, List, Sequence, Set, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.references import SemanticModelReference
from typing_extensions import override

from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import (
ElementPathKey,
LinkableDimension,
Expand Down Expand Up @@ -221,16 +221,18 @@ def linkable_elements_for_path_key(self, path_key: ElementPathKey) -> Sequence[L

def filter(
self,
with_any_of: FrozenSet[LinkableElementProperty],
without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
without_all_of: FrozenSet[LinkableElementProperty] = frozenset(),
element_filter: LinkableElementFilter,
) -> LinkableElementSet:
"""Filter elements in the set.
First, only elements with at least one property in the "with_any_of" set are retained. Then, any elements with
a property in "without_any_of" set are removed. Lastly, any elements with all properties in without_all_of
are removed.
"""
with_any_of = element_filter.with_any_of
without_any_of = element_filter.without_any_of
without_all_of = element_filter.without_all_of

key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = {}
key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = {}
key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from metricflow_semantics.mf_logging.pretty_print import mf_pformat
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import (
ElementPathKey,
LinkableDimension,
Expand Down Expand Up @@ -105,7 +106,8 @@ def __init__(
if metric.type is MetricType.CUMULATIVE:
linkable_sets_for_measure.append(
self._get_linkable_element_set_for_measure(
measure, without_any_of=frozenset({LinkableElementProperty.DATE_PART})
measure,
LinkableElementFilter(without_any_of=frozenset({LinkableElementProperty.DATE_PART})),
)
)
elif (
Expand Down Expand Up @@ -593,16 +595,15 @@ def _get_joined_elements_without_cache(
def _get_linkable_element_set_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties(),
without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
element_filter: LinkableElementFilter = LinkableElementFilter(),
) -> LinkableElementSet:
"""See get_linkable_element_set_for_measure()."""
measure_semantic_model = self._get_semantic_model_for_measure(measure_reference)

elements_in_semantic_model = self._get_elements_in_semantic_model(measure_semantic_model)

# Filter out group-by metrics if not specified by the property as there can be a large number of them.
if LinkableElementProperty.METRIC not in without_any_of:
if LinkableElementProperty.METRIC not in element_filter.without_any_of:
metrics_linked_to_semantic_model = self.get_joinable_metrics_for_semantic_model(
semantic_model=measure_semantic_model,
using_join_path=SemanticModelJoinPath(left_semantic_model_reference=measure_semantic_model.reference),
Expand All @@ -620,42 +621,35 @@ def _get_linkable_element_set_for_measure(
metric_time_elements,
joined_elements,
)
).filter(
with_any_of=with_any_of,
without_any_of=without_any_of,
)
).filter(element_filter)

def get_linkable_element_set_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: FrozenSet[LinkableElementProperty],
without_any_of: FrozenSet[LinkableElementProperty],
element_filter: LinkableElementFilter,
) -> LinkableElementSet:
"""Get the valid linkable elements for the given measure."""
return self._get_linkable_element_set_for_measure(
measure_reference=measure_reference,
with_any_of=with_any_of,
without_any_of=without_any_of,
element_filter=element_filter,
)

def get_linkable_elements_for_distinct_values_query(
self,
with_any_of: FrozenSet[LinkableElementProperty],
without_any_of: FrozenSet[LinkableElementProperty],
element_filter: LinkableElementFilter,
) -> LinkableElementSet:
"""Returns queryable items for a distinct group-by-item values query.
A distinct group-by-item values query does not include any metrics.
"""
return self._no_metric_linkable_element_set.filter(with_any_of=with_any_of, without_any_of=without_any_of)
return self._no_metric_linkable_element_set.filter(element_filter)

# TODO: the results of this method don't actually match what will be allowed for the metric. This method checks
# _metric_to_linkable_element_sets, while the actual group by resolution DAG calls _get_linkable_element_set_for_measure.
def get_linkable_elements_for_metrics(
self,
metric_references: Sequence[MetricReference],
with_any_of: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties(),
without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
element_filter: LinkableElementFilter = LinkableElementFilter(),
) -> LinkableElementSet:
"""Gets the valid linkable elements that are common to all requested metrics."""
linkable_element_sets = []
Expand All @@ -667,10 +661,7 @@ def get_linkable_elements_for_metrics(
# Using .only_unique_path_keys to exclude ambiguous elements where there are multiple join paths to get
# a dimension / entity.
metric_result = LinkableElementSet.intersection_by_path_key(
[
element_set.only_unique_path_keys.filter(with_any_of=with_any_of, without_any_of=without_any_of)
for element_set in element_sets
]
[element_set.only_unique_path_keys.filter(element_filter) for element_set in element_sets]
)
linkable_element_sets.append(metric_result)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import functools
import logging
import time
from typing import Dict, FrozenSet, Optional, Sequence, Set
from typing import Dict, Optional, Sequence, Set

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.protocols.metric import Metric, MetricInputMeasure, MetricType
Expand All @@ -13,7 +13,7 @@

from metricflow_semantics.errors.error_classes import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet
from metricflow_semantics.model.semantics.linkable_spec_resolver import (
ValidLinkableSpecResolver,
Expand Down Expand Up @@ -66,18 +66,13 @@ def __init__(
def linkable_elements_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
element_filter: LinkableElementFilter = LinkableElementFilter(),
) -> LinkableElementSet:
"""Return the set of linkable elements reachable from a given measure."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

start_time = time.time()
linkable_element_set = self._linkable_spec_resolver.get_linkable_element_set_for_measure(
measure_reference=measure_reference,
with_any_of=frozen_with_any_of,
without_any_of=frozen_without_any_of,
element_filter=element_filter,
)
logger.debug(
LazyFormat(
Expand All @@ -89,31 +84,18 @@ def linkable_elements_for_measure(

@functools.lru_cache
def linkable_elements_for_no_metrics_query(
self,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
self, element_set_filter: LinkableElementFilter = LinkableElementFilter()
) -> LinkableElementSet:
"""Return the reachable linkable elements for a dimension values query with no metrics."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query(
with_any_of=frozen_with_any_of,
without_any_of=frozen_without_any_of,
)
return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query(element_set_filter)

@functools.lru_cache
def linkable_elements_for_metrics(
self,
metric_references: Sequence[MetricReference],
with_any_property: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties(),
without_any_property: FrozenSet[LinkableElementProperty] = frozenset(),
self, metric_references: Sequence[MetricReference], element_set_filter: LinkableElementFilter
) -> LinkableElementSet:
"""Retrieve the matching set of linkable elements common to all metrics requested (intersection)."""
return self._linkable_spec_resolver.get_linkable_elements_for_metrics(
metric_references=metric_references,
with_any_of=with_any_property,
without_any_of=without_any_property,
metric_references=metric_references, element_filter=element_set_filter
)

def get_metrics(self, metric_references: Sequence[MetricReference]) -> Sequence[Metric]: # noqa: D102
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.query.group_by_item.candidate_push_down.group_by_item_candidate import GroupByItemCandidateSet
from metricflow_semantics.query.group_by_item.filter_spec_resolution.filter_location import (
WhereFilterLocation,
Expand Down Expand Up @@ -187,8 +188,10 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu

items_available_for_measure = self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_measure(
measure_reference=node.measure_reference,
with_any_of=self._with_any_property,
without_any_of=frozenset(without_any_property),
element_filter=LinkableElementFilter(
with_any_of=self._with_any_property or LinkableElementProperty.all_properties(),
without_any_of=frozenset(without_any_property),
),
)

# The following is needed to handle limitation of cumulative metrics. Filtering could be done at the measure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import (
ElementPathKey,
LinkableDimension,
Expand Down Expand Up @@ -265,7 +266,7 @@ def test_filter_with_any_of() -> None:
filter_properties = frozenset([LinkableElementProperty.JOINED, LinkableElementProperty.ENTITY])
linkable_set = _linkable_set_with_uniques_and_duplicates()

filtered_set = linkable_set.filter(with_any_of=filter_properties)
filtered_set = linkable_set.filter(LinkableElementFilter(with_any_of=filter_properties))

filtered_dimensions = [
dim for dim in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_dimensions.values())
Expand Down Expand Up @@ -304,7 +305,9 @@ def test_filter_without_any_of() -> None:
without_any_of_properties = frozenset([LinkableElementProperty.ENTITY, LinkableElementProperty.METRIC])
linkable_set = _linkable_set_with_uniques_and_duplicates()

filtered_set = linkable_set.filter(with_any_of=with_any_of_properties, without_any_of=without_any_of_properties)
filtered_set = linkable_set.filter(
LinkableElementFilter(with_any_of=with_any_of_properties, without_any_of=without_any_of_properties)
)

filtered_dimensions = [
dim for dim in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_dimensions.values())
Expand Down Expand Up @@ -340,7 +343,9 @@ def test_filter_without_all_of() -> None:
without_all_of_properties = frozenset([LinkableElementProperty.JOINED, LinkableElementProperty.ENTITY])
linkable_set = _linkable_set_with_uniques_and_duplicates()

filtered_set = linkable_set.filter(with_any_of=with_any_of_properties, without_all_of=without_all_of_properties)
filtered_set = linkable_set.filter(
LinkableElementFilter(with_any_of=with_any_of_properties, without_all_of=without_all_of_properties)
)

filtered_metrics = [
metric for metric in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_metrics.values())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import SemanticModelJoinPath, SemanticModelJoinPathElement
from metricflow_semantics.model.semantics.linkable_spec_resolver import ValidLinkableSpecResolver
from metricflow_semantics.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS
Expand Down Expand Up @@ -58,8 +59,9 @@ def test_all_properties( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="views")],
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset({}),
element_filter=LinkableElementFilter(
with_any_of=LinkableElementProperty.all_properties(), without_any_of=frozenset()
),
),
)

Expand All @@ -75,8 +77,9 @@ def test_one_property( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="views")],
with_any_of=frozenset({LinkableElementProperty.LOCAL}),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=frozenset({LinkableElementProperty.LOCAL}), without_any_of=frozenset()
),
),
)

Expand All @@ -92,8 +95,9 @@ def test_metric_time_property_for_cumulative_metric( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="trailing_2_months_revenue")],
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}), without_any_of=frozenset()
),
),
)

Expand All @@ -109,8 +113,9 @@ def test_metric_time_property_for_derived_metrics( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="bookings_per_view")],
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}), without_any_of=frozenset()
),
),
)

Expand All @@ -126,8 +131,10 @@ def test_cyclic_join_manifest( # noqa: D103
set_id="result0",
linkable_element_set=cyclic_join_manifest_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="listings")],
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset(),
),
),
)

Expand Down Expand Up @@ -192,8 +199,10 @@ def test_linkable_element_set_as_spec_set(
linkable_spec_set = InstanceSpecSet.create_from_specs(
simple_model_spec_resolver.get_linkable_element_set_for_measure(
MeasureReference(element_name="listings"),
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset({}),
element_filter=LinkableElementFilter(
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset(),
),
).specs
)
assert_spec_set_snapshot_equal(
Expand Down
Loading

0 comments on commit cf2796e

Please sign in to comment.