From e50b51b867b3c440618bbbe801543c25c2d1668e Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Tue, 24 Sep 2024 10:26:48 -0700 Subject: [PATCH] /* PR_START p--short-term-perf 05 */ Lazily generate group-by-item candidates for suggestions. --- .../candidate_push_down/push_down_visitor.py | 29 +++++++++++-------- .../group_by_item/group_by_item_resolver.py | 6 +++- .../query/query_resolver.py | 17 ++++------- .../query/suggestion_generator.py | 8 ++++- 4 files changed, 34 insertions(+), 26 deletions(-) diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py index 155383511d..bf610cdce1 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import typing from contextlib import contextmanager from dataclasses import dataclass from typing import Dict, FrozenSet, Iterator, List, Optional, Sequence, Set, Tuple @@ -56,6 +57,10 @@ from metricflow_semantics.specs.patterns.none_date_part import NoneDatePartPattern from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern +if typing.TYPE_CHECKING: + from metricflow_semantics.query.group_by_item.group_by_item_resolver import GroupByItemResolver + + logger = logging.getLogger(__name__) @@ -140,6 +145,7 @@ def __init__( self, manifest_lookup: SemanticManifestLookup, suggestion_generator: Optional[QueryItemSuggestionGenerator], + group_by_item_resolver: GroupByItemResolver, source_spec_patterns: Sequence[SpecPattern] = (), with_any_property: Optional[FrozenSet[LinkableElementProperty]] = None, without_any_property: Optional[FrozenSet[LinkableElementProperty]] = None, @@ -149,6 +155,7 @@ def __init__( Args: manifest_lookup: The semantic manifest lookup associated with the resolution DAG that this will traverse. + group_by_item_resolver: The group-by-item resolver for the query to help generate suggestions. suggestion_generator: If there are issues with matching patterns to specs, use this to generate suggestions that will go in the issue. source_spec_patterns: The patterns to apply to the specs available at the measure nodes. @@ -164,6 +171,7 @@ def __init__( self._without_any_property = without_any_property self._suggestion_generator = suggestion_generator self._filter_location = filter_location + self._group_by_item_resolver_for_query = group_by_item_resolver @override def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResult: @@ -221,24 +229,21 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu # The specified patterns don't match to any of the available group-by-items that can be queried for the # measure. if matching_items.spec_count == 0: - items_available_for_measure_given_child_metric = items_available_for_measure.filter_by_spec_patterns( - patterns_to_apply - ) + input_suggestions: Sequence[str] = () + + if self._suggestion_generator is not None: + candidate_specs = self._group_by_item_resolver_for_query.resolve_available_items( + source_spec_patterns=self._suggestion_generator.candidate_filters + ).specs + input_suggestions = self._suggestion_generator.input_suggestions(tuple(candidate_specs)) + return PushDownResult( candidate_set=GroupByItemCandidateSet.empty_instance(), issue_set=MetricFlowQueryResolutionIssueSet.from_issue( NoMatchingItemsForMeasure.from_parameters( parent_issues=(), query_resolution_path=current_traversal_path, - input_suggestions=( - tuple( - self._suggestion_generator.input_suggestions( - items_available_for_measure_given_child_metric.specs - ) - ) - if self._suggestion_generator is not None - else () - ), + input_suggestions=input_suggestions, ) ), ) diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py index 9799e572ab..4e13c4cbe3 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py @@ -92,6 +92,7 @@ def resolve_matching_item_for_querying( push_down_visitor = _PushDownGroupByItemCandidatesVisitor( manifest_lookup=self._manifest_lookup, source_spec_patterns=(spec_pattern, NoGroupByMetricPattern()), + group_by_item_resolver=self, suggestion_generator=suggestion_generator, ) @@ -163,6 +164,7 @@ def resolve_matching_item_for_filters( push_down_visitor = _PushDownGroupByItemCandidatesVisitor( manifest_lookup=self._manifest_lookup, source_spec_patterns=(spec_pattern,), + group_by_item_resolver=self, suggestion_generator=suggestion_generator, filter_location=filter_location, ) @@ -206,6 +208,7 @@ def resolve_matching_item_for_filters( def resolve_available_items( self, resolution_node: Optional[ResolutionDagSinkNode] = None, + source_spec_patterns: Sequence[SpecPattern] = (), ) -> AvailableGroupByItemsResolution: """Return all available group-by-items at a given node. @@ -217,7 +220,8 @@ def resolve_available_items( push_down_visitor = _PushDownGroupByItemCandidatesVisitor( manifest_lookup=self._manifest_lookup, - source_spec_patterns=(), + source_spec_patterns=source_spec_patterns, + group_by_item_resolver=self, suggestion_generator=None, ) diff --git a/metricflow-semantics/metricflow_semantics/query/query_resolver.py b/metricflow-semantics/metricflow_semantics/query/query_resolver.py index 9f38fcf630..5181ebf642 100644 --- a/metricflow-semantics/metricflow_semantics/query/query_resolver.py +++ b/metricflow-semantics/metricflow_semantics/query/query_resolver.py @@ -58,7 +58,6 @@ from metricflow_semantics.specs.instance_spec import InstanceSpec, LinkableInstanceSpec from metricflow_semantics.specs.metric_spec import MetricSpec from metricflow_semantics.specs.order_by_spec import OrderBySpec -from metricflow_semantics.specs.patterns.match_list_pattern import MatchListSpecPattern from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec from metricflow_semantics.specs.spec_set import group_specs_by_type from metricflow_semantics.workarounds.reference import sorted_semantic_model_references @@ -151,17 +150,11 @@ def _resolve_has_metric_or_group_by_inputs( def _resolve_group_by_item_input( group_by_item_input: ResolverInputForGroupByItem, group_by_item_resolver: GroupByItemResolver, - valid_group_by_item_specs_for_querying: Sequence[LinkableInstanceSpec], ) -> GroupByItemResolution: suggestion_generator = QueryItemSuggestionGenerator( input_naming_scheme=group_by_item_input.input_obj_naming_scheme, input_str=str(group_by_item_input.input_obj), - candidate_filters=QueryItemSuggestionGenerator.GROUP_BY_ITEM_CANDIDATE_FILTERS - + ( - MatchListSpecPattern.create( - listed_specs=valid_group_by_item_specs_for_querying, - ), - ), + candidate_filters=QueryItemSuggestionGenerator.GROUP_BY_ITEM_CANDIDATE_FILTERS, ) return group_by_item_resolver.resolve_matching_item_for_querying( spec_pattern=group_by_item_input.spec_pattern, @@ -230,8 +223,6 @@ def _resolve_group_by_items_result( resolution_dag=resolution_dag, ) - valid_group_by_item_specs_for_querying = group_by_item_resolver.resolve_available_items().specs - input_to_issue_set_mapping_items: List[InputToIssueSetMappingItem] = [] group_by_item_specs: List[LinkableInstanceSpec] = [] linkable_element_sets: List[LinkableElementSet] = [] @@ -239,7 +230,6 @@ def _resolve_group_by_items_result( resolution = MetricFlowQueryResolver._resolve_group_by_item_input( group_by_item_resolver=group_by_item_resolver, group_by_item_input=group_by_item_input, - valid_group_by_item_specs_for_querying=valid_group_by_item_specs_for_querying, ) if resolution.issue_set.has_issues: input_to_issue_set_mapping_items.append( @@ -395,7 +385,10 @@ def _resolve_query(self, resolver_input_for_query: ResolverInputForQuery) -> Met mappings_to_merge.append(resolve_metrics_or_group_by_result.input_to_issue_set_mapping) # Resolve metrics. - resolve_metrics_result = self._resolve_metric_inputs(metric_inputs, query_resolution_path=query_resolution_path) + resolve_metrics_result = self._resolve_metric_inputs( + metric_inputs=metric_inputs, + query_resolution_path=query_resolution_path, + ) mappings_to_merge.append(resolve_metrics_result.input_to_issue_set_mapping) metric_specs = resolve_metrics_result.metric_specs diff --git a/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py b/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py index 02dcb4b54a..55e24bc240 100644 --- a/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py +++ b/metricflow-semantics/metricflow_semantics/query/suggestion_generator.py @@ -41,16 +41,22 @@ def __init__( # noqa: D107 self._input_str = input_str self._candidate_filters = candidate_filters + @property + def candidate_filters(self) -> Sequence[SpecPattern]: + """Return the filters that should be applied to the candidate specs when generating suggestions.""" + return self._candidate_filters + def input_suggestions( self, candidate_specs: Sequence[InstanceSpec], max_suggestions: int = 6, ) -> Sequence[str]: """Return the best specs that match the given pattern from candidate_specs and match the candidate_filer.""" + # Use edit distance to figure out the closest matches, so convert the specs to strings. + for candidate_filter in self._candidate_filters: candidate_specs = candidate_filter.match(candidate_specs) - # Use edit distance to figure out the closest matches, so convert the specs to strings. candidate_strs = set() for candidate_spec in candidate_specs: candidate_str = self._input_naming_scheme.input_str(candidate_spec)