Skip to content

Commit

Permalink
Add MetricTimeDefaultGranularityPattern (#1322)
Browse files Browse the repository at this point in the history
Adds a new `SpecPattern` called `MetricTimeDefaultGranularityPattern`.
This will be used to apply the `default_granularity` from a queried
metric to `metric_time` if no granularity is specified.

Also includes some related cleanup:
- [Rename BaseTimeGrainPattern ->
MinimumTimeGrainPattern](4bb4ab7)
- [Remove unused only_apply_for_metric_time param from
MinimumTimeGrainPattern](613050a)
  • Loading branch information
courtneyholcomb authored Jul 15, 2024
1 parent 072b8d5 commit 478bd85
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka
"""
start_time = time.time()

# Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`.
# Spec patterns need all specs to match properly e.g. `MinimumTimeGrainPattern`.
matching_specs: Sequence[InstanceSpec] = self.specs

for spec_pattern in spec_patterns:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
MetricFlowQueryResolutionIssueSet,
)
from metricflow_semantics.query.suggestion_generator import QueryItemSuggestionGenerator
from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern
from metricflow_semantics.specs.patterns.minimum_time_grain import MinimumTimeGrainPattern
from metricflow_semantics.specs.patterns.no_group_by_metric import NoGroupByMetricPattern
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.patterns.typed_patterns import TimeDimensionPattern
Expand Down Expand Up @@ -102,7 +102,7 @@ def resolve_matching_item_for_querying(
)

push_down_result = push_down_result.filter_candidates_by_pattern(
BaseTimeGrainPattern(),
MinimumTimeGrainPattern(),
)
logger.info(
f"Spec pattern:\n"
Expand Down Expand Up @@ -152,7 +152,7 @@ def resolve_matching_item_for_filters(

push_down_visitor = _PushDownGroupByItemCandidatesVisitor(
manifest_lookup=self._manifest_lookup,
source_spec_patterns=(spec_pattern, BaseTimeGrainPattern()),
source_spec_patterns=(spec_pattern, MinimumTimeGrainPattern()),
suggestion_generator=suggestion_generator,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
ResolverInputForQuery,
ResolverInputForQueryLevelWhereFilterIntersection,
)
from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern
from metricflow_semantics.specs.patterns.metric_time_pattern import MetricTimePattern
from metricflow_semantics.specs.patterns.minimum_time_grain import MinimumTimeGrainPattern
from metricflow_semantics.specs.patterns.none_date_part import NoneDatePartPattern
from metricflow_semantics.specs.query_param_implementations import DimensionOrEntityParameter, MetricParameter
from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec
Expand Down Expand Up @@ -153,7 +153,7 @@ def _metric_time_granularity(time_dimension_specs: Sequence[TimeDimensionSpec])

for pattern_to_apply in (
MetricTimePattern(),
BaseTimeGrainPattern(),
MinimumTimeGrainPattern(),
NoneDatePartPattern(),
):
matching_specs = pattern_to_apply.match(matching_specs)
Expand All @@ -164,7 +164,7 @@ def _metric_time_granularity(time_dimension_specs: Sequence[TimeDimensionSpec])

assert (
len(time_dimension_specs) == 1
), f"Bug with BaseTimeGrainPattern - should have returned exactly 1 spec but got {time_dimension_specs}"
), f"Bug with MinimumTimeGrainPattern - should have returned exactly 1 spec but got {time_dimension_specs}"

return time_dimension_specs[0].time_granularity

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from metricflow_semantics.naming.naming_scheme import QueryItemNamingScheme
from metricflow_semantics.query.similarity import top_fuzzy_matches
from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern
from metricflow_semantics.specs.patterns.minimum_time_grain import MinimumTimeGrainPattern
from metricflow_semantics.specs.patterns.no_group_by_metric import NoGroupByMetricPattern
from metricflow_semantics.specs.patterns.none_date_part import NoneDatePartPattern
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
Expand All @@ -24,9 +24,9 @@ class QueryItemSuggestionGenerator:

# Adding these filters so that we don't get multiple suggestions that are similar, but with different
# grains. Some additional thought is needed to tweak this as the base grain may not be the best suggestion.
FILTER_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = (BaseTimeGrainPattern(), NoneDatePartPattern())
FILTER_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = (MinimumTimeGrainPattern(), NoneDatePartPattern())
GROUP_BY_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = (
BaseTimeGrainPattern(),
MinimumTimeGrainPattern(),
NoneDatePartPattern(),
NoGroupByMetricPattern(),
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import annotations

from collections import defaultdict
from typing import Dict, Optional, Sequence, Set, Tuple

from dbt_semantic_interfaces.type_enums import TimeGranularity
from typing_extensions import override

from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
LinkableInstanceSpec,
TimeDimensionSpec,
TimeDimensionSpecComparisonKey,
TimeDimensionSpecField,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


class MetricTimeDefaultGranularityPattern(SpecPattern):
"""A pattern that matches metric_time specs if they have the default granularity for the requested metrics.
This is used to determine the granularity that should be used for metric_time if no granularity is specified.
Spec passes through if granularity is already selected or if no metrics were queried, since no default is needed.
All non-metric_time specs are passed through.
e.g., if a metric with default_granularity MONTH is queried
inputs:
[
TimeDimensionSpec('metric_time', 'day'),
TimeDimensionSpec('metric_time', 'week'),
TimeDimensionSpec('metric_time', 'month'),
DimensionSpec('listing__country'),
]
matches:
[
TimeDimensionSpec('metric_time', 'month'),
DimensionSpec('listing__country'),
]
"""

def __init__(self, max_metric_default_time_granularity: Optional[TimeGranularity]) -> None: # noqa: D107
self._max_metric_default_time_granularity = max_metric_default_time_granularity

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
spec_set = group_specs_by_type(candidate_specs)

# If there are no metrics or metric_time specs in the query, skip this filter.
if not (self._max_metric_default_time_granularity and spec_set.metric_time_specs):
return candidate_specs

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, Tuple[TimeDimensionSpec, ...]] = defaultdict(tuple)
for metric_time_spec in spec_set.metric_time_specs:
spec_key = metric_time_spec.comparison_key(exclude_fields=(TimeDimensionSpecField.TIME_GRANULARITY,))
spec_key_to_grains[spec_key].add(metric_time_spec.time_granularity)
spec_key_to_specs[spec_key] += (metric_time_spec,)

matched_metric_time_specs: Tuple[TimeDimensionSpec, ...] = ()
for spec_key, time_grains in spec_key_to_grains.items():
if self._max_metric_default_time_granularity in time_grains:
matched_metric_time_specs += (
spec_key_to_specs[spec_key][0].with_grain(self._max_metric_default_time_granularity),
)
else:
# If default_granularity is not in the available options, then time granularity was specified in the request
# and a default is not needed here. Pass all options through for this spec key.
matched_metric_time_specs += spec_key_to_specs[spec_key]

matching_specs: Sequence[LinkableInstanceSpec] = (
spec_set.dimension_specs
+ matched_metric_time_specs
+ tuple(spec for spec in spec_set.time_dimension_specs if not spec.is_metric_time)
+ spec_set.entity_specs
+ spec_set.group_by_metric_specs
)

return matching_specs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from dbt_semantic_interfaces.type_enums import TimeGranularity
from typing_extensions import override

from metricflow_semantics.specs.patterns.metric_time_pattern import MetricTimePattern
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
Expand All @@ -18,7 +17,7 @@
from metricflow_semantics.specs.spec_set import group_specs_by_type


class BaseTimeGrainPattern(SpecPattern):
class MinimumTimeGrainPattern(SpecPattern):
"""A pattern that matches linkable specs, but for time dimension specs, only the one with the finest grain.
e.g.
Expand Down Expand Up @@ -46,25 +45,8 @@ class BaseTimeGrainPattern(SpecPattern):
by the base grain of metric_time.
"""

def __init__(self, only_apply_for_metric_time: bool = False) -> None:
"""Initializer.
Args:
only_apply_for_metric_time: If set, only remove time dimension specs with a non-base grain if it's for
metric_time. This parameter is useful for implementing restrictions on cumulative metrics as they can only
be queried by the base grain of metric_time.
TODO: This is a little odd. This can be replaced once composite patterns are supported.
"""
self._only_apply_for_metric_time = only_apply_for_metric_time

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
if self._only_apply_for_metric_time:
metric_time_specs = MetricTimePattern().match(candidate_specs)
other_specs = tuple(spec for spec in candidate_specs if spec not in metric_time_specs)

return other_specs + tuple(BaseTimeGrainPattern(only_apply_for_metric_time=False).match(metric_time_specs))

spec_set = group_specs_by_type(candidate_specs)

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from __future__ import annotations

from collections import defaultdict
from typing import Dict, List, Sequence, Set

from dbt_semantic_interfaces.type_enums import TimeGranularity
from typing_extensions import override

from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
LinkableInstanceSpec,
TimeDimensionSpec,
TimeDimensionSpecComparisonKey,
TimeDimensionSpecField,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


class MinimumTimeGrainPattern(SpecPattern):
"""A pattern that matches linkable specs, but for time dimension specs, only the one with the finest grain.
e.g.
inputs:
[
TimeDimensionSpec('metric_time', 'day'),
TimeDimensionSpec('metric_time', 'month.'),
DimensionSpec('listing__country'),
]
matches:
[
TimeDimensionSpec('metric_time', 'day'),
DimensionSpec('listing__country'),
]
The finest grain represents the defined grain of the time dimension in the semantic model when evaluating specs
of the source.
This pattern helps to implement matching of group-by-items for where filters - in those cases, an ambiguously
specified group-by-item can only match to time dimension spec with the base grain.
"""

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
spec_set = group_specs_by_type(candidate_specs)

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, List[TimeDimensionSpec]] = defaultdict(list)
for time_dimension_spec in spec_set.time_dimension_specs:
spec_key = time_dimension_spec.comparison_key(exclude_fields=(TimeDimensionSpecField.TIME_GRANULARITY,))
spec_key_to_grains[spec_key].add(time_dimension_spec.time_granularity)
spec_key_to_specs[spec_key].append(time_dimension_spec)

matched_time_dimension_specs: List[TimeDimensionSpec] = []
for spec_key, time_grains in spec_key_to_grains.items():
matched_time_dimension_specs.append(spec_key_to_specs[spec_key][0].with_grain(min(time_grains)))

matching_specs: Sequence[LinkableInstanceSpec] = (
spec_set.dimension_specs
+ tuple(matched_time_dimension_specs)
+ spec_set.entity_specs
+ spec_set.group_by_metric_specs
)

return matching_specs

0 comments on commit 478bd85

Please sign in to comment.