Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MetricTimeDefaultGranularityPattern #1322

Merged
merged 3 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka
"""
start_time = time.time()

# Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`.
# Spec patterns need all specs to match properly e.g. `MinimumTimeGrainPattern`.
matching_specs: Sequence[InstanceSpec] = self.specs

for spec_pattern in spec_patterns:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
MetricFlowQueryResolutionIssueSet,
)
from metricflow_semantics.query.suggestion_generator import QueryItemSuggestionGenerator
from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern
from metricflow_semantics.specs.patterns.minimum_time_grain import MinimumTimeGrainPattern
from metricflow_semantics.specs.patterns.no_group_by_metric import NoGroupByMetricPattern
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.patterns.typed_patterns import TimeDimensionPattern
Expand Down Expand Up @@ -102,7 +102,7 @@ def resolve_matching_item_for_querying(
)

push_down_result = push_down_result.filter_candidates_by_pattern(
BaseTimeGrainPattern(),
MinimumTimeGrainPattern(),
)
logger.info(
f"Spec pattern:\n"
Expand Down Expand Up @@ -152,7 +152,7 @@ def resolve_matching_item_for_filters(

push_down_visitor = _PushDownGroupByItemCandidatesVisitor(
manifest_lookup=self._manifest_lookup,
source_spec_patterns=(spec_pattern, BaseTimeGrainPattern()),
source_spec_patterns=(spec_pattern, MinimumTimeGrainPattern()),
suggestion_generator=suggestion_generator,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
ResolverInputForQuery,
ResolverInputForQueryLevelWhereFilterIntersection,
)
from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern
from metricflow_semantics.specs.patterns.metric_time_pattern import MetricTimePattern
from metricflow_semantics.specs.patterns.minimum_time_grain import MinimumTimeGrainPattern
from metricflow_semantics.specs.patterns.none_date_part import NoneDatePartPattern
from metricflow_semantics.specs.query_param_implementations import DimensionOrEntityParameter, MetricParameter
from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec
Expand Down Expand Up @@ -153,7 +153,7 @@ def _metric_time_granularity(time_dimension_specs: Sequence[TimeDimensionSpec])

for pattern_to_apply in (
MetricTimePattern(),
BaseTimeGrainPattern(),
MinimumTimeGrainPattern(),
NoneDatePartPattern(),
):
matching_specs = pattern_to_apply.match(matching_specs)
Expand All @@ -164,7 +164,7 @@ def _metric_time_granularity(time_dimension_specs: Sequence[TimeDimensionSpec])

assert (
len(time_dimension_specs) == 1
), f"Bug with BaseTimeGrainPattern - should have returned exactly 1 spec but got {time_dimension_specs}"
), f"Bug with MinimumTimeGrainPattern - should have returned exactly 1 spec but got {time_dimension_specs}"

return time_dimension_specs[0].time_granularity

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from metricflow_semantics.naming.naming_scheme import QueryItemNamingScheme
from metricflow_semantics.query.similarity import top_fuzzy_matches
from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern
from metricflow_semantics.specs.patterns.minimum_time_grain import MinimumTimeGrainPattern
from metricflow_semantics.specs.patterns.no_group_by_metric import NoGroupByMetricPattern
from metricflow_semantics.specs.patterns.none_date_part import NoneDatePartPattern
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
Expand All @@ -24,9 +24,9 @@ class QueryItemSuggestionGenerator:

# Adding these filters so that we don't get multiple suggestions that are similar, but with different
# grains. Some additional thought is needed to tweak this as the base grain may not be the best suggestion.
FILTER_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = (BaseTimeGrainPattern(), NoneDatePartPattern())
FILTER_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = (MinimumTimeGrainPattern(), NoneDatePartPattern())
GROUP_BY_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = (
BaseTimeGrainPattern(),
MinimumTimeGrainPattern(),
NoneDatePartPattern(),
NoGroupByMetricPattern(),
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import annotations

from collections import defaultdict
from typing import Dict, Optional, Sequence, Set, Tuple

from dbt_semantic_interfaces.type_enums import TimeGranularity
from typing_extensions import override

from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
LinkableInstanceSpec,
TimeDimensionSpec,
TimeDimensionSpecComparisonKey,
TimeDimensionSpecField,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


class MetricTimeDefaultGranularityPattern(SpecPattern):
"""A pattern that matches metric_time specs if they have the default granularity for the requested metrics.

This is used to determine the granularity that should be used for metric_time if no granularity is specified.
Spec passes through if granularity is already selected or if no metrics were queried, since no default is needed.
All non-metric_time specs are passed through.

e.g., if a metric with default_granularity MONTH is queried

inputs:
[
TimeDimensionSpec('metric_time', 'day'),
TimeDimensionSpec('metric_time', 'week'),
TimeDimensionSpec('metric_time', 'month'),
DimensionSpec('listing__country'),
]

matches:
[
TimeDimensionSpec('metric_time', 'month'),
DimensionSpec('listing__country'),
]
"""

def __init__(self, max_metric_default_time_granularity: Optional[TimeGranularity]) -> None: # noqa: D107
self._max_metric_default_time_granularity = max_metric_default_time_granularity

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
spec_set = group_specs_by_type(candidate_specs)

# If there are no metrics or metric_time specs in the query, skip this filter.
if not (self._max_metric_default_time_granularity and spec_set.metric_time_specs):
return candidate_specs

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, Tuple[TimeDimensionSpec, ...]] = defaultdict(tuple)
for metric_time_spec in spec_set.metric_time_specs:
spec_key = metric_time_spec.comparison_key(exclude_fields=(TimeDimensionSpecField.TIME_GRANULARITY,))
spec_key_to_grains[spec_key].add(metric_time_spec.time_granularity)
spec_key_to_specs[spec_key] += (metric_time_spec,)

matched_metric_time_specs: Tuple[TimeDimensionSpec, ...] = ()
for spec_key, time_grains in spec_key_to_grains.items():
if self._max_metric_default_time_granularity in time_grains:
matched_metric_time_specs += (
spec_key_to_specs[spec_key][0].with_grain(self._max_metric_default_time_granularity),
)
else:
# If default_granularity is not in the available options, then time granularity was specified in the request
# and a default is not needed here. Pass all options through for this spec key.
matched_metric_time_specs += spec_key_to_specs[spec_key]

matching_specs: Sequence[LinkableInstanceSpec] = (
spec_set.dimension_specs
+ matched_metric_time_specs
+ tuple(spec for spec in spec_set.time_dimension_specs if not spec.is_metric_time)
+ spec_set.entity_specs
+ spec_set.group_by_metric_specs
)

return matching_specs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from dbt_semantic_interfaces.type_enums import TimeGranularity
from typing_extensions import override

from metricflow_semantics.specs.patterns.metric_time_pattern import MetricTimePattern
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
Expand All @@ -18,7 +17,7 @@
from metricflow_semantics.specs.spec_set import group_specs_by_type


class BaseTimeGrainPattern(SpecPattern):
class MinimumTimeGrainPattern(SpecPattern):
"""A pattern that matches linkable specs, but for time dimension specs, only the one with the finest grain.

e.g.
Expand Down Expand Up @@ -46,25 +45,8 @@ class BaseTimeGrainPattern(SpecPattern):
by the base grain of metric_time.
"""

def __init__(self, only_apply_for_metric_time: bool = False) -> None:
"""Initializer.

Args:
only_apply_for_metric_time: If set, only remove time dimension specs with a non-base grain if it's for
metric_time. This parameter is useful for implementing restrictions on cumulative metrics as they can only
be queried by the base grain of metric_time.
TODO: This is a little odd. This can be replaced once composite patterns are supported.
"""
self._only_apply_for_metric_time = only_apply_for_metric_time

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
if self._only_apply_for_metric_time:
metric_time_specs = MetricTimePattern().match(candidate_specs)
other_specs = tuple(spec for spec in candidate_specs if spec not in metric_time_specs)

return other_specs + tuple(BaseTimeGrainPattern(only_apply_for_metric_time=False).match(metric_time_specs))

spec_set = group_specs_by_type(candidate_specs)

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from __future__ import annotations

from collections import defaultdict
from typing import Dict, List, Sequence, Set

from dbt_semantic_interfaces.type_enums import TimeGranularity
from typing_extensions import override

from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
LinkableInstanceSpec,
TimeDimensionSpec,
TimeDimensionSpecComparisonKey,
TimeDimensionSpecField,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


class MinimumTimeGrainPattern(SpecPattern):
"""A pattern that matches linkable specs, but for time dimension specs, only the one with the finest grain.

e.g.

inputs:
[
TimeDimensionSpec('metric_time', 'day'),
TimeDimensionSpec('metric_time', 'month.'),
DimensionSpec('listing__country'),
]

matches:
[
TimeDimensionSpec('metric_time', 'day'),
DimensionSpec('listing__country'),
]

The finest grain represents the defined grain of the time dimension in the semantic model when evaluating specs
of the source.

This pattern helps to implement matching of group-by-items for where filters - in those cases, an ambiguously
specified group-by-item can only match to time dimension spec with the base grain.
"""

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
spec_set = group_specs_by_type(candidate_specs)

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, List[TimeDimensionSpec]] = defaultdict(list)
for time_dimension_spec in spec_set.time_dimension_specs:
spec_key = time_dimension_spec.comparison_key(exclude_fields=(TimeDimensionSpecField.TIME_GRANULARITY,))
spec_key_to_grains[spec_key].add(time_dimension_spec.time_granularity)
spec_key_to_specs[spec_key].append(time_dimension_spec)

matched_time_dimension_specs: List[TimeDimensionSpec] = []
for spec_key, time_grains in spec_key_to_grains.items():
matched_time_dimension_specs.append(spec_key_to_specs[spec_key][0].with_grain(min(time_grains)))

matching_specs: Sequence[LinkableInstanceSpec] = (
spec_set.dimension_specs
+ tuple(matched_time_dimension_specs)
+ spec_set.entity_specs
+ spec_set.group_by_metric_specs
)

return matching_specs
Loading