diff --git a/.changes/unreleased/Features-20240628-074617.yaml b/.changes/unreleased/Features-20240628-074617.yaml new file mode 100644 index 0000000000..daecb746f9 --- /dev/null +++ b/.changes/unreleased/Features-20240628-074617.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Use default_granularity to resolve metric_time. +time: 2024-06-28T07:46:17.768805-07:00 +custom: + Author: courtneyholcomb + Issue: "1310" diff --git a/extra-hatch-configuration/requirements.txt b/extra-hatch-configuration/requirements.txt index db1991c6ae..0a9548db9c 100644 --- a/extra-hatch-configuration/requirements.txt +++ b/extra-hatch-configuration/requirements.txt @@ -1,5 +1,5 @@ Jinja2>=3.1.3 -dbt-semantic-interfaces==0.6.1 +dbt-semantic-interfaces==0.6.4 more-itertools>=8.10.0, <10.2.0 pydantic>=1.10.0, <3.0 tabulate>=0.8.9 diff --git a/metricflow-semantics/extra-hatch-configuration/requirements.txt b/metricflow-semantics/extra-hatch-configuration/requirements.txt index 6131ae86f3..121e58c68d 100644 --- a/metricflow-semantics/extra-hatch-configuration/requirements.txt +++ b/metricflow-semantics/extra-hatch-configuration/requirements.txt @@ -1,5 +1,5 @@ # dbt Cloud depends on metricflow-semantics (dependency set in dbt-mantle), so DSI must always point to a production version here. -dbt-semantic-interfaces>=0.6.1, <2.0.0 +dbt-semantic-interfaces>=0.6.4 graphviz>=0.18.2, <0.21 python-dateutil>=2.9.0, <2.10.0 rapidfuzz>=3.0, <4.0 diff --git a/metricflow-semantics/metricflow_semantics/model/dbt_manifest_parser.py b/metricflow-semantics/metricflow_semantics/model/dbt_manifest_parser.py index 638da5283f..903ac85fb5 100644 --- a/metricflow-semantics/metricflow_semantics/model/dbt_manifest_parser.py +++ b/metricflow-semantics/metricflow_semantics/model/dbt_manifest_parser.py @@ -11,6 +11,7 @@ ConvertMedianToPercentileRule, ) from dbt_semantic_interfaces.transformations.cumulative_type_params import SetCumulativeTypeParamsRule +from dbt_semantic_interfaces.transformations.default_granularity import SetDefaultGranularityRule from dbt_semantic_interfaces.transformations.names import LowerCaseNamesRule from dbt_semantic_interfaces.transformations.proxy_measure import CreateProxyMeasureRule from dbt_semantic_interfaces.transformations.semantic_manifest_transformer import ( @@ -38,6 +39,7 @@ def parse_manifest_from_dbt_generated_manifest(manifest_json_string: str) -> Pyd ConvertMedianToPercentileRule(), DedupeMetricInputMeasuresRule(), # Remove once fix is in core SetCumulativeTypeParamsRule(), + SetDefaultGranularityRule(), ), ) model = PydanticSemanticManifestTransformer.transform(raw_model, rules) diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py index effa5daf92..378719f45f 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py @@ -386,7 +386,7 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka """ start_time = time.time() - # Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`. + # Spec patterns need all specs to match properly e.g. `MinimumTimeGrainPattern`. matching_specs: Sequence[InstanceSpec] = self.specs for spec_pattern in spec_patterns: diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py index 21e5fb8ac1..5b5f153595 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py @@ -210,3 +210,18 @@ def get_min_queryable_time_granularity(self, metric_reference: MetricReference) minimum_queryable_granularity = defined_time_granularity return minimum_queryable_granularity + + def get_default_granularity_for_metrics( + self, metric_references: Sequence[MetricReference] + ) -> Optional[TimeGranularity]: + """When querying a group of metrics, the default granularity will be the largest of the metrics' default granularities.""" + max_default_granularity: Optional[TimeGranularity] = None + for metric_reference in metric_references: + default_granularity = self.get_metric(metric_reference).default_granularity + assert ( + default_granularity + ), f"No default_granularity set for {metric_reference}. Something has been misconfigured." + if not max_default_granularity or default_granularity.to_int() > max_default_granularity.to_int(): + max_default_granularity = default_granularity + + return max_default_granularity diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py index d820efb9a0..0fbc1b0c21 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py @@ -104,7 +104,7 @@ def get_dimension(self, dimension_reference: DimensionReference) -> Dimension: def get_time_dimension(self, time_dimension_reference: TimeDimensionReference) -> Dimension: """Retrieves a full dimension object by name.""" - return self.get_dimension(dimension_reference=time_dimension_reference.dimension_reference()) + return self.get_dimension(dimension_reference=time_dimension_reference.dimension_reference) @property def measure_references(self) -> Sequence[MeasureReference]: diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py index 0c3efcb5cc..989606f5f6 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py @@ -347,6 +347,7 @@ def _resolve_specs_for_where_filters( input_str=group_by_item_in_where_filter.object_builder_str, spec_pattern=group_by_item_in_where_filter.spec_pattern, resolution_node=current_node, + filter_location=filter_location, ) # The paths in the issue set are generated relative to the current node. For error messaging, it seems more # helpful for those paths to be relative to the query. To do, we have to add nodes from the resolution path. diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py index dbc861467e..2c9274ee69 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py @@ -6,7 +6,7 @@ from dbt_semantic_interfaces.call_parameter_sets import TimeDimensionCallParameterSet from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME -from dbt_semantic_interfaces.references import SemanticModelReference, TimeDimensionReference +from dbt_semantic_interfaces.references import MetricReference, SemanticModelReference, TimeDimensionReference from dbt_semantic_interfaces.type_enums import TimeGranularity from typing_extensions import override @@ -20,14 +20,16 @@ PushDownResult, _PushDownGroupByItemCandidatesVisitor, ) +from metricflow_semantics.query.group_by_item.filter_spec_resolution.filter_location import WhereFilterLocation from metricflow_semantics.query.group_by_item.resolution_dag.dag import GroupByItemResolutionDag, ResolutionDagSinkNode from metricflow_semantics.query.group_by_item.resolution_path import MetricFlowQueryResolutionPath from metricflow_semantics.query.issues.group_by_item_resolver.ambiguous_group_by_item import AmbiguousGroupByItemIssue from metricflow_semantics.query.issues.issues_base import ( MetricFlowQueryResolutionIssueSet, ) -from metricflow_semantics.query.suggestion_generator import QueryItemSuggestionGenerator -from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern +from metricflow_semantics.query.suggestion_generator import QueryItemSuggestionGenerator, QueryPartForSuggestions +from metricflow_semantics.specs.patterns.metric_time_default_granularity import MetricTimeDefaultGranularityPattern +from metricflow_semantics.specs.patterns.min_time_grain import MinimumTimeGrainPattern from metricflow_semantics.specs.patterns.no_group_by_metric import NoGroupByMetricPattern from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern from metricflow_semantics.specs.patterns.typed_patterns import TimeDimensionPattern @@ -80,11 +82,15 @@ def resolve_matching_item_for_querying( self, spec_pattern: SpecPattern, suggestion_generator: Optional[QueryItemSuggestionGenerator], + queried_metrics: Sequence[MetricReference], + only_use_minimum_grain: bool = False, ) -> GroupByItemResolution: - """Returns the spec that corresponds the one described by spec_pattern and is valid for the query. + """Returns the spec that corresponds to the one described by spec_pattern and is valid for the query. For queries, if the pattern matches to a spec for the same element at different grains, the spec with the finest - common grain is returned. + common grain is returned, unless the spec is metric_time, in which case the default grain is returned. + + If only_use_minimum_grain is True, will use minimum grain instead of default for metric_time, too. """ push_down_visitor = _PushDownGroupByItemCandidatesVisitor( manifest_lookup=self._manifest_lookup, @@ -101,9 +107,18 @@ def resolve_matching_item_for_querying( issue_set=push_down_result.issue_set, ) - push_down_result = push_down_result.filter_candidates_by_pattern( - BaseTimeGrainPattern(), - ) + filters_to_use: Tuple[SpecPattern, ...] = (MinimumTimeGrainPattern(),) + if not only_use_minimum_grain: + # Default pattern must come first to avoid removing default grain options prematurely. + filters_to_use = ( + MetricTimeDefaultGranularityPattern( + metric_lookup=self._manifest_lookup.metric_lookup, queried_metrics=queried_metrics + ), + ) + filters_to_use + + for filter_to_use in filters_to_use: + push_down_result = push_down_result.filter_candidates_by_pattern(filter_to_use) + logger.info( f"Spec pattern:\n" f"{indent(mf_pformat(spec_pattern))}\n" @@ -135,6 +150,7 @@ def resolve_matching_item_for_filters( input_str: str, spec_pattern: SpecPattern, resolution_node: ResolutionDagSinkNode, + filter_location: WhereFilterLocation, ) -> GroupByItemResolution: """Returns the spec that matches the spec_pattern associated with the filter in the given node. @@ -147,12 +163,22 @@ def resolve_matching_item_for_filters( suggestion_generator = QueryItemSuggestionGenerator( input_naming_scheme=ObjectBuilderNamingScheme(), input_str=input_str, - candidate_filters=QueryItemSuggestionGenerator.FILTER_ITEM_CANDIDATE_FILTERS, + query_part=QueryPartForSuggestions.WHERE_FILTER, + metric_lookup=self._manifest_lookup.metric_lookup, + queried_metrics=filter_location.metric_references, ) push_down_visitor = _PushDownGroupByItemCandidatesVisitor( manifest_lookup=self._manifest_lookup, - source_spec_patterns=(spec_pattern, BaseTimeGrainPattern()), + source_spec_patterns=( + spec_pattern, + # MetricTimeDefaultGranularityPattern must come before MinimumTimeGrainPattern to ensure we don't remove the + # default grain from candiate set prematurely. + MetricTimeDefaultGranularityPattern( + metric_lookup=self._manifest_lookup.metric_lookup, queried_metrics=filter_location.metric_references + ), + MinimumTimeGrainPattern(), + ), suggestion_generator=suggestion_generator, ) @@ -210,16 +236,18 @@ def resolve_available_items( issue_set=push_down_result.issue_set, ) - def resolve_min_metric_time_grain(self) -> TimeGranularity: + def resolve_min_metric_time_grain(self, metrics_in_query: Sequence[MetricReference]) -> TimeGranularity: """Returns the finest time grain of metric_time for querying.""" metric_time_grain_resolution = self.resolve_matching_item_for_querying( spec_pattern=TimeDimensionPattern.from_call_parameter_set( TimeDimensionCallParameterSet( entity_path=(), time_dimension_reference=TimeDimensionReference(element_name=METRIC_TIME_ELEMENT_NAME), - ) + ), ), suggestion_generator=None, + queried_metrics=metrics_in_query, + only_use_minimum_grain=True, ) metric_time_spec_set = ( group_specs_by_type((metric_time_grain_resolution.spec,)) diff --git a/metricflow-semantics/metricflow_semantics/query/query_parser.py b/metricflow-semantics/metricflow_semantics/query/query_parser.py index f6177ca01d..0b52bdd734 100644 --- a/metricflow-semantics/metricflow_semantics/query/query_parser.py +++ b/metricflow-semantics/metricflow_semantics/query/query_parser.py @@ -11,7 +11,7 @@ ) from dbt_semantic_interfaces.protocols import SavedQuery from dbt_semantic_interfaces.protocols.where_filter import WhereFilter -from dbt_semantic_interfaces.references import SemanticModelReference +from dbt_semantic_interfaces.references import MetricReference, SemanticModelReference from dbt_semantic_interfaces.type_enums import TimeGranularity from metricflow_semantics.assert_one_arg import assert_at_most_one_arg_set @@ -52,8 +52,8 @@ ResolverInputForQuery, ResolverInputForQueryLevelWhereFilterIntersection, ) -from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern from metricflow_semantics.specs.patterns.metric_time_pattern import MetricTimePattern +from metricflow_semantics.specs.patterns.min_time_grain import MinimumTimeGrainPattern from metricflow_semantics.specs.patterns.none_date_part import NoneDatePartPattern from metricflow_semantics.specs.query_param_implementations import DimensionOrEntityParameter, MetricParameter from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec @@ -147,13 +147,14 @@ def _get_saved_query(self, saved_query_parameter: SavedQueryParameter) -> SavedQ return matching_saved_queries[0] - @staticmethod - def _metric_time_granularity(time_dimension_specs: Sequence[TimeDimensionSpec]) -> Optional[TimeGranularity]: + def _get_smallest_requested_metric_time_granularity( + self, time_dimension_specs: Sequence[TimeDimensionSpec] + ) -> Optional[TimeGranularity]: matching_specs: Sequence[InstanceSpec] = time_dimension_specs for pattern_to_apply in ( MetricTimePattern(), - BaseTimeGrainPattern(), + MinimumTimeGrainPattern(), NoneDatePartPattern(), ): matching_specs = pattern_to_apply.match(matching_specs) @@ -164,7 +165,7 @@ def _metric_time_granularity(time_dimension_specs: Sequence[TimeDimensionSpec]) assert ( len(time_dimension_specs) == 1 - ), f"Bug with BaseTimeGrainPattern - should have returned exactly 1 spec but got {time_dimension_specs}" + ), f"Bug with MinimumTimeGrainPattern - should have returned exactly 1 spec but got {time_dimension_specs}" return time_dimension_specs[0].time_granularity @@ -173,19 +174,23 @@ def _adjust_time_constraint( resolution_dag: GroupByItemResolutionDag, time_dimension_specs_in_query: Sequence[TimeDimensionSpec], time_constraint: TimeRangeConstraint, + metrics_in_query: Sequence[MetricReference], ) -> TimeRangeConstraint: - metric_time_granularity = MetricFlowQueryParser._metric_time_granularity(time_dimension_specs_in_query) + """Change the time range so that the ends are at the ends of the requested time granularity windows. + + e.g. [2020-01-15, 2020-2-15] with MONTH granularity -> [2020-01-01, 2020-02-29] + """ + metric_time_granularity = self._get_smallest_requested_metric_time_granularity(time_dimension_specs_in_query) if metric_time_granularity is None: + # This indicates there were no metric time specs in the query, so use smallest available granularity for metric_time. group_by_item_resolver = GroupByItemResolver( manifest_lookup=self._manifest_lookup, resolution_dag=resolution_dag, ) - metric_time_granularity = group_by_item_resolver.resolve_min_metric_time_grain() - - """Change the time range so that the ends are at the ends of the appropriate time granularity windows. + metric_time_granularity = group_by_item_resolver.resolve_min_metric_time_grain( + metrics_in_query=metrics_in_query + ) - e.g. [2020-01-15, 2020-2-15] with MONTH granularity -> [2020-01-01, 2020-02-29] - """ return self._time_period_adjuster.expand_time_constraint_to_fill_granularity( time_constraint=time_constraint, granularity=metric_time_granularity, @@ -495,13 +500,13 @@ def _parse_and_validate_query( resolution_dag=query_resolution.resolution_dag, time_dimension_specs_in_query=query_spec.time_dimension_specs, time_constraint=time_constraint, + metrics_in_query=tuple( + metric_resolver_input.spec_pattern.metric_reference + for metric_resolver_input in resolver_inputs_for_metrics + ), ) logger.info(f"Time constraint after adjustment is: {time_constraint}") - - return ParseQueryResult( - query_spec=query_spec.with_time_range_constraint(time_constraint), - queried_semantic_models=query_resolution.queried_semantic_models, - ) + query_spec = query_spec.with_time_range_constraint(time_constraint) return ParseQueryResult( query_spec=query_spec, diff --git a/metricflow-semantics/metricflow_semantics/query/query_resolver.py b/metricflow-semantics/metricflow_semantics/query/query_resolver.py index a1517b5462..84a95c25fd 100644 --- a/metricflow-semantics/metricflow_semantics/query/query_resolver.py +++ b/metricflow-semantics/metricflow_semantics/query/query_resolver.py @@ -52,9 +52,8 @@ ResolverInputForQueryLevelWhereFilterIntersection, ResolverInputForWhereFilterIntersection, ) -from metricflow_semantics.query.suggestion_generator import QueryItemSuggestionGenerator +from metricflow_semantics.query.suggestion_generator import QueryItemSuggestionGenerator, QueryPartForSuggestions from metricflow_semantics.query.validation_rules.query_validator import PostResolutionQueryValidator -from metricflow_semantics.specs.patterns.match_list_pattern import MatchListSpecPattern from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec from metricflow_semantics.specs.spec_classes import ( InstanceSpec, @@ -149,25 +148,26 @@ def _resolve_has_metric_or_group_by_inputs( ) return ResolveMetricOrGroupByItemsResult(input_to_issue_set_mapping=InputToIssueSetMapping.empty_instance()) - @staticmethod def _resolve_group_by_item_input( + self, group_by_item_input: ResolverInputForGroupByItem, group_by_item_resolver: GroupByItemResolver, valid_group_by_item_specs_for_querying: Sequence[LinkableInstanceSpec], + queried_metrics: Sequence[MetricReference], ) -> GroupByItemResolution: suggestion_generator = QueryItemSuggestionGenerator( input_naming_scheme=group_by_item_input.input_obj_naming_scheme, input_str=str(group_by_item_input.input_obj), - candidate_filters=QueryItemSuggestionGenerator.GROUP_BY_ITEM_CANDIDATE_FILTERS - + ( - MatchListSpecPattern( - listed_specs=valid_group_by_item_specs_for_querying, - ), - ), + query_part=QueryPartForSuggestions.GROUP_BY, + metric_lookup=self._manifest_lookup.metric_lookup, + queried_metrics=queried_metrics, + valid_group_by_item_specs_for_querying=valid_group_by_item_specs_for_querying, ) + return group_by_item_resolver.resolve_matching_item_for_querying( spec_pattern=group_by_item_input.spec_pattern, suggestion_generator=suggestion_generator, + queried_metrics=queried_metrics, ) def _resolve_metric_inputs( @@ -190,7 +190,9 @@ def _resolve_metric_inputs( suggestion_generator = QueryItemSuggestionGenerator( input_naming_scheme=MetricNamingScheme(), input_str=str(metric_input.input_obj), - candidate_filters=(), + query_part=QueryPartForSuggestions.METRIC, + metric_lookup=self._manifest_lookup.metric_lookup, + queried_metrics=tuple(metric_input.spec_pattern.metric_reference for metric_input in metric_inputs), ) metric_suggestions = suggestion_generator.input_suggestions(candidate_specs=available_metric_specs) input_to_issue_set_mapping_items.append( @@ -238,10 +240,11 @@ def _resolve_group_by_items_result( group_by_item_specs: List[LinkableInstanceSpec] = [] linkable_element_sets: List[LinkableElementSet] = [] for group_by_item_input in group_by_item_inputs: - resolution = MetricFlowQueryResolver._resolve_group_by_item_input( + resolution = self._resolve_group_by_item_input( group_by_item_resolver=group_by_item_resolver, group_by_item_input=group_by_item_input, valid_group_by_item_specs_for_querying=valid_group_by_item_specs_for_querying, + queried_metrics=metric_references, ) if resolution.issue_set.has_issues: input_to_issue_set_mapping_items.append( diff --git a/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py b/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py index ce53c50b74..06b0a4a18b 100644 --- a/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py +++ b/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py @@ -1,19 +1,34 @@ from __future__ import annotations import logging -from typing import Sequence, Tuple +from enum import Enum +from typing import Optional, Sequence, Tuple +from dbt_semantic_interfaces.enum_extension import assert_values_exhausted +from dbt_semantic_interfaces.references import MetricReference + +from metricflow_semantics.model.semantics.metric_lookup import MetricLookup from metricflow_semantics.naming.naming_scheme import QueryItemNamingScheme from metricflow_semantics.query.similarity import top_fuzzy_matches -from metricflow_semantics.specs.patterns.base_time_grain import BaseTimeGrainPattern +from metricflow_semantics.specs.patterns.match_list_pattern import MatchListSpecPattern +from metricflow_semantics.specs.patterns.metric_time_default_granularity import MetricTimeDefaultGranularityPattern +from metricflow_semantics.specs.patterns.min_time_grain import MinimumTimeGrainPattern from metricflow_semantics.specs.patterns.no_group_by_metric import NoGroupByMetricPattern from metricflow_semantics.specs.patterns.none_date_part import NoneDatePartPattern from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern -from metricflow_semantics.specs.spec_classes import InstanceSpec +from metricflow_semantics.specs.spec_classes import InstanceSpec, LinkableInstanceSpec logger = logging.getLogger(__name__) +class QueryPartForSuggestions(Enum): + """Indicates which type of query parameter is being suggested.""" + + WHERE_FILTER = "where_filter" + GROUP_BY = "group_by" + METRIC = "metric" + + class QueryItemSuggestionGenerator: """Returns specs that partially match a spec pattern created from user input. Used for suggestions in errors. @@ -22,29 +37,67 @@ class QueryItemSuggestionGenerator: a candidate filter is not needed as any available spec at a resolution node can be used. """ - # Adding these filters so that we don't get multiple suggestions that are similar, but with different - # grains. Some additional thought is needed to tweak this as the base grain may not be the best suggestion. - FILTER_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = (BaseTimeGrainPattern(), NoneDatePartPattern()) - GROUP_BY_ITEM_CANDIDATE_FILTERS: Tuple[SpecPattern, ...] = ( - BaseTimeGrainPattern(), - NoneDatePartPattern(), - NoGroupByMetricPattern(), - ) - def __init__( # noqa: D107 - self, input_naming_scheme: QueryItemNamingScheme, input_str: str, candidate_filters: Sequence[SpecPattern] + self, + input_naming_scheme: QueryItemNamingScheme, + input_str: str, + query_part: QueryPartForSuggestions, + metric_lookup: MetricLookup, + queried_metrics: Sequence[MetricReference], + valid_group_by_item_specs_for_querying: Optional[Sequence[LinkableInstanceSpec]] = None, ) -> None: self._input_naming_scheme = input_naming_scheme self._input_str = input_str - self._candidate_filters = candidate_filters + self._query_part = query_part + self._metric_lookup = metric_lookup + self._queried_metrics = queried_metrics + self._valid_group_by_item_specs_for_querying = valid_group_by_item_specs_for_querying + + if self._query_part is QueryPartForSuggestions.GROUP_BY and valid_group_by_item_specs_for_querying is None: + raise ValueError( + "QueryItemSuggestionGenerator requires valid_group_by_item_specs_for_querying param when used on group by items." + ) + + @property + def candidate_filters(self) -> Tuple[SpecPattern, ...]: + """Filters to apply before determining suggestions. + + These ensure we don't get multiple suggestions that are similar, but with different grains or date_parts. + """ + default_filters = ( + NoneDatePartPattern(), + # MetricTimeDefaultGranularityPattern must come before MinimumTimeGrainPattern to ensure we don't remove the + # default grain from candiate set prematurely. + MetricTimeDefaultGranularityPattern( + metric_lookup=self._metric_lookup, queried_metrics=self._queried_metrics + ), + MinimumTimeGrainPattern(), + ) + if self._query_part is QueryPartForSuggestions.WHERE_FILTER: + return default_filters + elif self._query_part is QueryPartForSuggestions.GROUP_BY: + assert self._valid_group_by_item_specs_for_querying, ( + "Group by suggestions require valid_group_by_item_specs_for_querying param." + "This should have been validated on init." + ) + return default_filters + ( + NoGroupByMetricPattern(), + MatchListSpecPattern( + listed_specs=self._valid_group_by_item_specs_for_querying, + ), + ) + elif self._query_part is QueryPartForSuggestions.METRIC: + return () + else: + assert_values_exhausted(self._query_part) def input_suggestions( self, candidate_specs: Sequence[InstanceSpec], max_suggestions: int = 6, ) -> Sequence[str]: - """Return the best specs that match the given pattern from candidate_specs and match the candidate_filer.""" - for candidate_filter in self._candidate_filters: + """Return the best specs that match the given pattern from candidate_specs and match the candidate_filter.""" + for candidate_filter in self.candidate_filters: candidate_specs = candidate_filter.match(candidate_specs) # Use edit distance to figure out the closest matches, so convert the specs to strings. diff --git a/metricflow-semantics/metricflow_semantics/specs/patterns/metric_time_default_granularity.py b/metricflow-semantics/metricflow_semantics/specs/patterns/metric_time_default_granularity.py new file mode 100644 index 0000000000..2c61f407e4 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/specs/patterns/metric_time_default_granularity.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from collections import defaultdict +from typing import Dict, List, Sequence, Set + +from dbt_semantic_interfaces.references import MetricReference +from dbt_semantic_interfaces.type_enums import TimeGranularity +from typing_extensions import override + +from metricflow_semantics.model.semantics.metric_lookup import MetricLookup +from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern +from metricflow_semantics.specs.spec_classes import ( + InstanceSpec, + LinkableInstanceSpec, + TimeDimensionSpec, + TimeDimensionSpecComparisonKey, + TimeDimensionSpecField, +) +from metricflow_semantics.specs.spec_set import group_specs_by_type + + +class MetricTimeDefaultGranularityPattern(SpecPattern): + """A pattern that matches metric_time specs, applying the default granularity for the requested metrics. + + If granularity is already selected or if no metrics were queried, no default is needed. All other specs are passed through. + + e.g., if a metric with default_granularity MONTH is queried + + inputs: + [ + TimeDimensionSpec('metric_time', 'day'), + TimeDimensionSpec('metric_time', 'week'), + TimeDimensionSpec('metric_time', 'month'), + DimensionSpec('listing__country'), + ] + + matches: + [ + TimeDimensionSpec('metric_time', 'month'), + DimensionSpec('listing__country'), + ] + + The finest grain represents the defined grain of the time dimension in the semantic model when evaluating specs + of the source. + + This pattern helps to implement matching of group-by-items. An ambiguously specified group-by-item can only match to + time dimension spec with the base grain. + """ + + def __init__(self, metric_lookup: MetricLookup, queried_metrics: Sequence[MetricReference] = ()) -> None: + """Match only time dimensions with the default granularity for a given query. + + Only affects time dimensions. All other items pass through. + """ + self._metric_lookup = metric_lookup + self._queried_metrics = queried_metrics + + @override + def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]: + default_granularity_for_metrics = self._metric_lookup.get_default_granularity_for_metrics(self._queried_metrics) + spec_set = group_specs_by_type(candidate_specs) + # If there are no metrics or metric_time specs in the query, skip this filter. + if not (default_granularity_for_metrics and spec_set.metric_time_specs): + return candidate_specs + + spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set) + spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, List[TimeDimensionSpec]] = defaultdict(list) + for metric_time_spec in spec_set.metric_time_specs: + spec_key = metric_time_spec.comparison_key(exclude_fields=(TimeDimensionSpecField.TIME_GRANULARITY,)) + spec_key_to_grains[spec_key].add(metric_time_spec.time_granularity) + spec_key_to_specs[spec_key].append(metric_time_spec) + + matched_metric_time_specs: List[TimeDimensionSpec] = [] + for spec_key, time_grains in spec_key_to_grains.items(): + if default_granularity_for_metrics in time_grains: + matched_metric_time_specs.append( + spec_key_to_specs[spec_key][0].with_grain(default_granularity_for_metrics) + ) + else: + # If default_granularity is not in the available options, then time granularity was specified in the request + # and a default is not needed here. Pass all options through. + matched_metric_time_specs.extend(spec_key_to_specs[spec_key]) + + matching_specs: Sequence[LinkableInstanceSpec] = ( + spec_set.dimension_specs + + tuple(matched_metric_time_specs) + + tuple(spec for spec in spec_set.time_dimension_specs if not spec.is_metric_time) + + spec_set.entity_specs + + spec_set.group_by_metric_specs + ) + + return matching_specs diff --git a/metricflow-semantics/metricflow_semantics/specs/patterns/base_time_grain.py b/metricflow-semantics/metricflow_semantics/specs/patterns/min_time_grain.py similarity index 94% rename from metricflow-semantics/metricflow_semantics/specs/patterns/base_time_grain.py rename to metricflow-semantics/metricflow_semantics/specs/patterns/min_time_grain.py index 34b44c1267..dcd1a930ce 100644 --- a/metricflow-semantics/metricflow_semantics/specs/patterns/base_time_grain.py +++ b/metricflow-semantics/metricflow_semantics/specs/patterns/min_time_grain.py @@ -18,7 +18,7 @@ from metricflow_semantics.specs.spec_set import group_specs_by_type -class BaseTimeGrainPattern(SpecPattern): +class MinimumTimeGrainPattern(SpecPattern): """A pattern that matches linkable specs, but for time dimension specs, only the one with the finest grain. e.g. @@ -63,7 +63,9 @@ def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpe metric_time_specs = MetricTimePattern().match(candidate_specs) other_specs = tuple(spec for spec in candidate_specs if spec not in metric_time_specs) - return other_specs + tuple(BaseTimeGrainPattern(only_apply_for_metric_time=False).match(metric_time_specs)) + return other_specs + tuple( + MinimumTimeGrainPattern(only_apply_for_metric_time=False).match(metric_time_specs) + ) spec_set = group_specs_by_type(candidate_specs) diff --git a/metricflow-semantics/metricflow_semantics/specs/query_param_implementations.py b/metricflow-semantics/metricflow_semantics/specs/query_param_implementations.py index 697b65db8f..d13b2bcd05 100644 --- a/metricflow-semantics/metricflow_semantics/specs/query_param_implementations.py +++ b/metricflow-semantics/metricflow_semantics/specs/query_param_implementations.py @@ -41,11 +41,6 @@ def _implements_protocol(self) -> TimeDimensionQueryParameter: grain: Optional[TimeGranularity] = None date_part: Optional[DatePart] = None - def __post_init__(self) -> None: # noqa: D105 - parsed_name = StructuredLinkableSpecName.from_name(self.name) - if parsed_name.time_granularity: - raise ValueError("Must use object syntax for `grain` parameter if `date_part` is requested.") - @property def query_resolver_input(self) -> ResolverInputForGroupByItem: # noqa: D102 fields_to_compare = [ diff --git a/metricflow-semantics/metricflow_semantics/specs/spec_classes.py b/metricflow-semantics/metricflow_semantics/specs/spec_classes.py index 546f07f58d..ebfdeb7c1c 100644 --- a/metricflow-semantics/metricflow_semantics/specs/spec_classes.py +++ b/metricflow-semantics/metricflow_semantics/specs/spec_classes.py @@ -570,6 +570,7 @@ class MetricSpec(InstanceSpec): # noqa: D101 alias: Optional[str] = None offset_window: Optional[PydanticMetricTimeWindow] = None offset_to_grain: Optional[TimeGranularity] = None + default_granularity: Optional[TimeGranularity] = None @staticmethod def from_element_name(element_name: str) -> MetricSpec: # noqa: D102 @@ -598,7 +599,12 @@ def has_time_offset(self) -> bool: # noqa: D102 def without_offset(self) -> MetricSpec: """Represents the metric spec with any time offsets removed.""" - return MetricSpec(element_name=self.element_name, filter_specs=self.filter_specs, alias=self.alias) + return MetricSpec( + element_name=self.element_name, + filter_specs=self.filter_specs, + alias=self.alias, + default_granularity=self.default_granularity, + ) @dataclass(frozen=True) diff --git a/metricflow-semantics/metricflow_semantics/test_helpers/semantic_manifest_yamls/simple_manifest/metrics.yaml b/metricflow-semantics/metricflow_semantics/test_helpers/semantic_manifest_yamls/simple_manifest/metrics.yaml index 877453249f..1f815fa52d 100644 --- a/metricflow-semantics/metricflow_semantics/test_helpers/semantic_manifest_yamls/simple_manifest/metrics.yaml +++ b/metricflow-semantics/metricflow_semantics/test_helpers/semantic_manifest_yamls/simple_manifest/metrics.yaml @@ -114,6 +114,7 @@ metric: type: simple type_params: measure: largest_listing + default_granularity: month --- metric: name: "identity_verifications" @@ -626,6 +627,7 @@ metric: name: bookings join_to_timespine: true fill_nulls_with: 0 + default_granularity: week --- metric: name: every_two_days_bookers_fill_nulls_with_0 @@ -780,3 +782,4 @@ metric: denominator: name: listings filter: "{{ Metric('views', ['listing']) }} > 10" + default_granularity: week diff --git a/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_filters.py b/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_filters.py index 76b5f61169..c69f5f0de9 100644 --- a/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_filters.py +++ b/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_filters.py @@ -9,6 +9,7 @@ from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.naming.naming_scheme import QueryItemNamingScheme from metricflow_semantics.naming.object_builder_scheme import ObjectBuilderNamingScheme +from metricflow_semantics.query.group_by_item.filter_spec_resolution.filter_location import WhereFilterLocation from metricflow_semantics.query.group_by_item.group_by_item_resolver import GroupByItemResolver from metricflow_semantics.query.group_by_item.resolution_dag.dag import GroupByItemResolutionDag from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration @@ -42,6 +43,7 @@ def test_ambiguous_metric_time_in_query_filter( # noqa: D103 input_str=input_str, spec_pattern=spec_pattern, resolution_node=resolution_dag.sink_node, + filter_location=WhereFilterLocation(metric_references=()), ) assert_object_snapshot_equal( diff --git a/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_querying.py b/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_querying.py index 41c0cf6afc..cc203505cf 100644 --- a/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_querying.py +++ b/metricflow-semantics/tests_metricflow_semantics/query/group_by_item/test_matching_item_for_querying.py @@ -16,6 +16,9 @@ from metricflow_semantics.query.group_by_item.resolution_dag.resolution_nodes.metric_resolution_node import ( MetricGroupByItemResolutionNode, ) +from metricflow_semantics.query.group_by_item.resolution_dag.resolution_nodes.query_resolution_node import ( + QueryGroupByItemResolutionNode, +) from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration from metricflow_semantics.test_helpers.metric_time_dimension import MTD_SPEC_DAY, MTD_SPEC_MONTH, MTD_SPEC_YEAR from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal @@ -39,10 +42,9 @@ def test_ambiguous_metric_time_in_query( # noqa: D103 ) spec_pattern = ObjectBuilderNamingScheme().spec_pattern(f"TimeDimension('{METRIC_TIME_ELEMENT_NAME}')") - + assert isinstance(resolution_dag.sink_node, QueryGroupByItemResolutionNode) result = group_by_item_resolver.resolve_matching_item_for_querying( - spec_pattern=spec_pattern, - suggestion_generator=None, + spec_pattern=spec_pattern, suggestion_generator=None, queried_metrics=resolution_dag.sink_node.metrics_in_query ) if case_id is AmbiguousResolutionQueryId.NO_METRICS: @@ -81,6 +83,7 @@ def test_unavailable_group_by_item_in_derived_metric_parent( result = group_by_item_resolver.resolve_matching_item_for_querying( spec_pattern=spec_pattern, suggestion_generator=None, + queried_metrics=(MetricReference("derived_metric_with_different_parent_time_grains"),), ) assert_object_snapshot_equal( @@ -108,6 +111,7 @@ def test_invalid_group_by_item( # noqa: D103 result = group_by_item_resolver.resolve_matching_item_for_querying( spec_pattern=naming_scheme.spec_pattern(input_str), suggestion_generator=None, + queried_metrics=(MetricReference("monthly_metric_0"), MetricReference("yearly_metric_0")), ) assert_object_snapshot_equal( diff --git a/metricflow-semantics/tests_metricflow_semantics/query/test_query_parser.py b/metricflow-semantics/tests_metricflow_semantics/query/test_query_parser.py index 79f1e57e34..ce615ad1e4 100644 --- a/metricflow-semantics/tests_metricflow_semantics/query/test_query_parser.py +++ b/metricflow-semantics/tests_metricflow_semantics/query/test_query_parser.py @@ -30,7 +30,7 @@ from metricflow_semantics.test_helpers.example_project_configuration import ( EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, ) -from metricflow_semantics.test_helpers.metric_time_dimension import MTD +from metricflow_semantics.test_helpers.metric_time_dimension import MTD, MTD_SPEC_MONTH, MTD_SPEC_YEAR from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal logger = logging.getLogger(__name__) @@ -53,6 +53,10 @@ expr: "1" agg: sum create_metric: true + - name: monthly_bookings + expr: "1" + agg: sum + agg_time_dimension: ds_month dimensions: - name: is_instant @@ -61,6 +65,10 @@ type: time type_params: time_granularity: day + - name: ds_month + type: time + type_params: + time_granularity: month primary_entity: booking @@ -68,6 +76,33 @@ - name: listing type: foreign expr: listing_id + + --- + metric: + name: instant_bookings + description: instant bookings + type: simple + type_params: + measure: bookings + filter: "{{ Dimension('booking__is_instant') }}" + default_granularity: year + --- + metric: + name: month_bookings + description: monthly bookings + type: simple + type_params: + measure: monthly_bookings + --- + metric: + name: instant_plus_months_bookings + description: instant plus month bookings + type: derived + type_params: + expr: instant_bookings + month_bookings + metrics: + - name: instant_bookings + - name: month_bookings """ ) @@ -626,3 +661,34 @@ def test_invalid_group_by_metric(bookings_query_parser: MetricFlowQueryParser) - bookings_query_parser.parse_and_validate_query( metric_names=("bookings",), where_constraint_str="{{ Metric('listings', ['garbage']) }} > 1" ) + + +def test_default_granularity(bookings_query_parser: MetricFlowQueryParser) -> None: + """Tests different scenarios using default granularity.""" + # Metric with default_granularity set + query_spec = bookings_query_parser.parse_and_validate_query( + metric_names=("instant_bookings",), group_by_names=("metric_time",) + ).query_spec + assert len(query_spec.time_dimension_specs) == 1 + assert query_spec.time_dimension_specs[0] == MTD_SPEC_YEAR + + # Metric without default_granularity set + query_spec = bookings_query_parser.parse_and_validate_query( + metric_names=("month_bookings",), group_by_names=("metric_time",) + ).query_spec + assert len(query_spec.time_dimension_specs) == 1 + assert query_spec.time_dimension_specs[0] == MTD_SPEC_MONTH + + # Derived metric with different agg_time_dimensions and no default granularity set + query_spec = bookings_query_parser.parse_and_validate_query( + metric_names=("instant_plus_months_bookings",), group_by_names=("metric_time",) + ).query_spec + assert len(query_spec.time_dimension_specs) == 1 + assert query_spec.time_dimension_specs[0] == MTD_SPEC_MONTH + + # Using non-metric_time - should ignore default granularity + query_spec = bookings_query_parser.parse_and_validate_query( + metric_names=("instant_bookings",), group_by_names=("booking__ds",) + ).query_spec + assert len(query_spec.time_dimension_specs) == 1 + assert query_spec.time_dimension_specs[0].time_granularity == TimeGranularity.DAY diff --git a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_filter__result.txt b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_filter__result.txt index 119b0392e5..80b32c7364 100644 --- a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_filter__result.txt +++ b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_filter__result.txt @@ -22,116 +22,53 @@ FilterSpecResolutionLookUp( ), ], ), - resolved_linkable_element_set=LinkableElementSet(), - spec_pattern=TimeDimensionPattern( - parameter_set=EntityLinkPatternParameterSet( - fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), - element_name='metric_time', - ), - ), - issue_set=MetricFlowQueryResolutionIssueSet( - issues=( - NoCommonItemsInParents( - issue_type=ERROR, - query_resolution_path=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_2), + resolved_linkable_element_set=LinkableElementSet( + path_key_to_linkable_dimensions={ + ElementPathKey( + element_name='metric_time', + element_type=TIME_DIMENSION, + time_granularity=YEAR, + ): ( + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), - ), - parent_candidate_sets=( - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=MONTH, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=MONTH, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_0), - MeasureGroupByItemSourceNode(node_id=msr_0), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_0), - ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), ), - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=YEAR, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=YEAR, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_1), - MeasureGroupByItemSourceNode(node_id=msr_1), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_1), - ), + properties=frozenset( + 'DERIVED_TIME_GRANULARITY', + 'METRIC_TIME', + ), + time_granularity=YEAR, + ), + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='yearly_measure_source', + ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='yearly_measure_source', ), ), + properties=frozenset( + 'METRIC_TIME', + ), + time_granularity=YEAR, ), ), + }, + ), + spec_pattern=TimeDimensionPattern( + parameter_set=EntityLinkPatternParameterSet( + fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), + element_name='metric_time', ), ), filter_location_path=MetricFlowQueryResolutionPath( diff --git a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_input_filter__result.txt b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_input_filter__result.txt index 7d9f61426a..139eba5580 100644 --- a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_input_filter__result.txt +++ b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_resolution_for_invalid_metric_input_filter__result.txt @@ -22,121 +22,53 @@ FilterSpecResolutionLookUp( ), ], ), - resolved_linkable_element_set=LinkableElementSet(), - spec_pattern=TimeDimensionPattern( - parameter_set=EntityLinkPatternParameterSet( - fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), - element_name='metric_time', - ), - ), - issue_set=MetricFlowQueryResolutionIssueSet( - issues=( - NoCommonItemsInParents( - issue_type=ERROR, - query_resolution_path=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_3), - MetricGroupByItemResolutionNode(node_id=mtr_2), + resolved_linkable_element_set=LinkableElementSet( + path_key_to_linkable_dimensions={ + ElementPathKey( + element_name='metric_time', + element_type=TIME_DIMENSION, + time_granularity=YEAR, + ): ( + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), - ), - parent_candidate_sets=( - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=MONTH, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=MONTH, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_3), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_0), - MeasureGroupByItemSourceNode(node_id=msr_0), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_3), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_0), - ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), ), - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=YEAR, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=YEAR, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_3), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_1), - MeasureGroupByItemSourceNode(node_id=msr_1), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_0), - MetricGroupByItemResolutionNode(node_id=mtr_3), - MetricGroupByItemResolutionNode(node_id=mtr_2), - MetricGroupByItemResolutionNode(node_id=mtr_1), - ), + properties=frozenset( + 'DERIVED_TIME_GRANULARITY', + 'METRIC_TIME', + ), + time_granularity=YEAR, + ), + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='yearly_measure_source', + ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='yearly_measure_source', ), ), + properties=frozenset( + 'METRIC_TIME', + ), + time_granularity=YEAR, ), ), + }, + ), + spec_pattern=TimeDimensionPattern( + parameter_set=EntityLinkPatternParameterSet( + fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), + element_name='metric_time', ), ), filter_location_path=MetricFlowQueryResolutionPath( diff --git a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__derived_metric_with_different_parent_time_grains__result.txt b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__derived_metric_with_different_parent_time_grains__result.txt index 2fb667f41c..affa1a709f 100644 --- a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__derived_metric_with_different_parent_time_grains__result.txt +++ b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__derived_metric_with_different_parent_time_grains__result.txt @@ -22,116 +22,53 @@ FilterSpecResolutionLookUp( ), ], ), - resolved_linkable_element_set=LinkableElementSet(), - spec_pattern=TimeDimensionPattern( - parameter_set=EntityLinkPatternParameterSet( - fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), - element_name='metric_time', - ), - ), - issue_set=MetricFlowQueryResolutionIssueSet( - issues=( - NoCommonItemsInParents( - issue_type=ERROR, - query_resolution_path=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_5), - MetricGroupByItemResolutionNode(node_id=mtr_10), + resolved_linkable_element_set=LinkableElementSet( + path_key_to_linkable_dimensions={ + ElementPathKey( + element_name='metric_time', + element_type=TIME_DIMENSION, + time_granularity=YEAR, + ): ( + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), - ), - parent_candidate_sets=( - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=MONTH, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=MONTH, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_5), - MetricGroupByItemResolutionNode(node_id=mtr_10), - MetricGroupByItemResolutionNode(node_id=mtr_8), - MeasureGroupByItemSourceNode(node_id=msr_7), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_5), - MetricGroupByItemResolutionNode(node_id=mtr_10), - MetricGroupByItemResolutionNode(node_id=mtr_8), - ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), ), - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=YEAR, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=YEAR, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_5), - MetricGroupByItemResolutionNode(node_id=mtr_10), - MetricGroupByItemResolutionNode(node_id=mtr_9), - MeasureGroupByItemSourceNode(node_id=msr_8), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_5), - MetricGroupByItemResolutionNode(node_id=mtr_10), - MetricGroupByItemResolutionNode(node_id=mtr_9), - ), + properties=frozenset( + 'DERIVED_TIME_GRANULARITY', + 'METRIC_TIME', + ), + time_granularity=YEAR, + ), + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='yearly_measure_source', + ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='yearly_measure_source', ), ), + properties=frozenset( + 'METRIC_TIME', + ), + time_granularity=YEAR, ), ), + }, + ), + spec_pattern=TimeDimensionPattern( + parameter_set=EntityLinkPatternParameterSet( + fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), + element_name='metric_time', ), ), filter_location_path=MetricFlowQueryResolutionPath( diff --git a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__metrics_with_different_time_grains__result.txt b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__metrics_with_different_time_grains__result.txt index 870dd193e7..2dd9caf21d 100644 --- a/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__metrics_with_different_time_grains__result.txt +++ b/metricflow-semantics/tests_metricflow_semantics/snapshots/test_spec_lookup.py/str/test_filter_spec_resolution__metrics_with_different_time_grains__result.txt @@ -25,111 +25,53 @@ FilterSpecResolutionLookUp( ), ], ), - resolved_linkable_element_set=LinkableElementSet(), - spec_pattern=TimeDimensionPattern( - parameter_set=EntityLinkPatternParameterSet( - fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), - element_name='metric_time', - ), - ), - issue_set=MetricFlowQueryResolutionIssueSet( - issues=( - NoCommonItemsInParents( - issue_type=ERROR, - query_resolution_path=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_3), + resolved_linkable_element_set=LinkableElementSet( + path_key_to_linkable_dimensions={ + ElementPathKey( + element_name='metric_time', + element_type=TIME_DIMENSION, + time_granularity=YEAR, + ): ( + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), - ), - parent_candidate_sets=( - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=MONTH, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='monthly_measures_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=MONTH, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_3), - MetricGroupByItemResolutionNode(node_id=mtr_3), - MeasureGroupByItemSourceNode(node_id=msr_3), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_3), - MetricGroupByItemResolutionNode(node_id=mtr_3), - ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='monthly_measures_source', ), ), - GroupByItemCandidateSet( - linkable_element_set=LinkableElementSet( - path_key_to_linkable_dimensions={ - ElementPathKey( - element_name='metric_time', - element_type=TIME_DIMENSION, - time_granularity=YEAR, - ): ( - LinkableDimension( - defined_in_semantic_model=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - element_name='metric_time', - dimension_type=TIME, - join_path=SemanticModelJoinPath( - left_semantic_model_reference=SemanticModelReference( - semantic_model_name='yearly_measure_source', - ), - ), - properties=frozenset( - 'METRIC_TIME', - ), - time_granularity=YEAR, - ), - ), - }, - ), - measure_paths=( - MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_3), - MetricGroupByItemResolutionNode(node_id=mtr_4), - MeasureGroupByItemSourceNode(node_id=msr_4), - ), - ), - ), - path_from_leaf_node=MetricFlowQueryResolutionPath( - resolution_path_nodes=( - QueryGroupByItemResolutionNode(node_id=qr_3), - MetricGroupByItemResolutionNode(node_id=mtr_4), - ), + properties=frozenset( + 'DERIVED_TIME_GRANULARITY', + 'METRIC_TIME', + ), + time_granularity=YEAR, + ), + LinkableDimension( + defined_in_semantic_model=SemanticModelReference( + semantic_model_name='yearly_measure_source', + ), + element_name='metric_time', + dimension_type=TIME, + join_path=SemanticModelJoinPath( + left_semantic_model_reference=SemanticModelReference( + semantic_model_name='yearly_measure_source', ), ), + properties=frozenset( + 'METRIC_TIME', + ), + time_granularity=YEAR, ), ), + }, + ), + spec_pattern=TimeDimensionPattern( + parameter_set=EntityLinkPatternParameterSet( + fields_to_compare=(DATE_PART, ELEMENT_NAME, ENTITY_LINKS), + element_name='metric_time', ), ), filter_location_path=MetricFlowQueryResolutionPath( diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 096d63edbf..d9bee6b645 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -432,7 +432,6 @@ def _build_cumulative_metric_output_node( predicate_pushdown_state: PredicatePushdownState, for_group_by_source_node: bool = False, ) -> DataflowPlanNode: - # TODO: replace with default_grain once added to YAML spec default_granularity = self._metric_lookup.get_min_queryable_time_granularity(metric_spec.reference) queried_agg_time_dimensions = queried_linkable_specs.included_agg_time_dimension_specs_for_metric( diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index c3ad1d542f..623eaa568e 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -296,8 +296,7 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat input_data_set_alias = self._next_unique_table_alias() # Find requested agg_time_dimensions in parent instance set. - # For now, will use instance with smallest granularity in time spine join. - # TODO: use metric's default_grain once that property is available. + # Will use instance with smallest granularity in time spine join. agg_time_dimension_instance_for_join: Optional[TimeDimensionInstance] = None requested_agg_time_dimension_instances: Tuple[TimeDimensionInstance, ...] = () for instance in input_data_set.instance_set.time_dimension_instances: diff --git a/tests_metricflow/integration/query_output/test_fill_nulls_with_0.py b/tests_metricflow/integration/query_output/test_fill_nulls_with_0.py index 3847b3d572..94111e2df3 100644 --- a/tests_metricflow/integration/query_output/test_fill_nulls_with_0.py +++ b/tests_metricflow/integration/query_output/test_fill_nulls_with_0.py @@ -22,8 +22,8 @@ def test_simple_fill_nulls_with_0_metric_time( # noqa: D103 query_result = it_helpers.mf_engine.query( MetricFlowQueryRequest.create_with_random_request_id( metric_names=["bookings_fill_nulls_with_0"], - group_by_names=["metric_time"], - order_by_names=["metric_time"], + group_by_names=["metric_time__day"], + order_by_names=["metric_time__day"], time_constraint_start=datetime.datetime(2019, 11, 27), time_constraint_end=datetime.datetime(2020, 1, 5), ) @@ -103,8 +103,8 @@ def test_fill_nulls_with_0_multi_metric_query( # noqa: D103 query_result = it_helpers.mf_engine.query( MetricFlowQueryRequest.create_with_random_request_id( metric_names=["bookings_fill_nulls_with_0", "views"], - group_by_names=["metric_time"], - order_by_names=["metric_time"], + group_by_names=["metric_time__day"], + order_by_names=["metric_time__day"], time_constraint_start=datetime.datetime(2019, 11, 27), time_constraint_end=datetime.datetime(2020, 1, 5), ) diff --git a/tests_metricflow/integration/test_cases/itest_granularity.yaml b/tests_metricflow/integration/test_cases/itest_granularity.yaml index ea3f974516..34e3cec01d 100644 --- a/tests_metricflow/integration/test_cases/itest_granularity.yaml +++ b/tests_metricflow/integration/test_cases/itest_granularity.yaml @@ -221,7 +221,7 @@ integration_test: --- integration_test: name: daily_metric_with_monthly_time_dimension - description: Query a metric with a month-granularity time dimensions. + description: Query a metric with a month-granularity time dimensions. Filter should expand to requested granularity. model: EXTENDED_DATE_MODEL metrics: ["bookings"] group_bys: ["metric_time__month"] diff --git a/tests_metricflow/integration/test_cases/itest_measure_aggregations.yaml b/tests_metricflow/integration/test_cases/itest_measure_aggregations.yaml index cc98ed9828..e3fc977427 100644 --- a/tests_metricflow/integration/test_cases/itest_measure_aggregations.yaml +++ b/tests_metricflow/integration/test_cases/itest_measure_aggregations.yaml @@ -8,7 +8,7 @@ integration_test: check_query: | SELECT MAX(capacity) as largest_listing - , created_at AS metric_time__day + , {{ render_date_trunc("created_at", TimeGranularity.MONTH) }} AS metric_time__month FROM {{ source_schema }}.dim_listings_latest GROUP BY 2 diff --git a/tests_metricflow/integration/test_cases/itest_metrics.yaml b/tests_metricflow/integration/test_cases/itest_metrics.yaml index 54006e9be4..a7967b7e8f 100644 --- a/tests_metricflow/integration/test_cases/itest_metrics.yaml +++ b/tests_metricflow/integration/test_cases/itest_metrics.yaml @@ -1140,7 +1140,7 @@ integration_test: description: Test a simple query that joins to time spine and fills nulls model: SIMPLE_MODEL metrics: ["bookings_fill_nulls_with_0"] - group_by_objs: [{"name": "metric_time"}] + group_by_objs: [{"name": "metric_time", "grain": "day"}] check_query: | SELECT subq_5.ds AS metric_time__day @@ -1340,7 +1340,7 @@ integration_test: description: Test a multi-metric query that fills nulls model: SIMPLE_MODEL metrics: ["bookings_fill_nulls_with_0", "views"] - group_by_objs: [{"name": "metric_time"}] + group_by_objs: [{"name": "metric_time", "grain": "day"}] check_query: | SELECT COALESCE(subq_7.metric_time__day, subq_12.metric_time__day) AS metric_time__day diff --git a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py index ee8b868453..9feab48212 100644 --- a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py +++ b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py @@ -201,7 +201,7 @@ def test_join_to_time_spine_with_filters( # noqa: D103 metric_names=("bookings_fill_nulls_with_0",), group_by_names=("metric_time__day",), where_constraint=PydanticWhereFilter( - where_sql_template="{{ TimeDimension('metric_time') }} > '2020-01-01'", + where_sql_template="{{ TimeDimension('metric_time', 'day') }} > '2020-01-01'", ), time_constraint_start=datetime.datetime(2020, 1, 3), time_constraint_end=datetime.datetime(2020, 1, 5),