-
Notifications
You must be signed in to change notification settings - Fork 98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support Saved Queries #794
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Features | ||
body: Support Saved Queries in MetricFlow | ||
time: 2023-09-13T17:36:08.874392-07:00 | ||
custom: | ||
Author: plypaul | ||
Issue: "765" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,11 +9,11 @@ | |
|
||
import pandas as pd | ||
from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimensionTypeParams | ||
from dbt_semantic_interfaces.implementations.filters.where_filter import PydanticWhereFilter | ||
from dbt_semantic_interfaces.pretty_print import pformat_big_objects | ||
from dbt_semantic_interfaces.references import EntityReference, MeasureReference, MetricReference | ||
from dbt_semantic_interfaces.type_enums import DimensionType | ||
|
||
from metricflow.assert_one_arg import assert_exactly_one_arg_set | ||
from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder | ||
from metricflow.dataflow.builder.node_data_set import ( | ||
DataflowPlanNodeOutputDataSetResolver, | ||
|
@@ -53,6 +53,7 @@ | |
from metricflow.query.query_parser import MetricFlowQueryParser | ||
from metricflow.random_id import random_id | ||
from metricflow.specs.column_assoc import ColumnAssociationResolver | ||
from metricflow.specs.query_param_implementations import SavedQueryParameter | ||
from metricflow.specs.specs import InstanceSpecSet, MetricFlowQuerySpec | ||
from metricflow.sql.optimizer.optimization_levels import SqlQueryOptimizationLevel | ||
from metricflow.telemetry.models import TelemetryLevel | ||
|
@@ -84,6 +85,8 @@ class MetricFlowQueryType(Enum): | |
class MetricFlowQueryRequest: | ||
"""Encapsulates the parameters for a metric query. | ||
|
||
TODO: This has turned into a bag of parameters that make it difficult to use without a bunch of conditionals. | ||
|
||
metric_names: Names of the metrics to query. | ||
metrics: Metric objects to query. | ||
group_by_names: Names of the dimensions and entities to query. | ||
|
@@ -100,6 +103,7 @@ class MetricFlowQueryRequest: | |
""" | ||
|
||
request_id: MetricFlowRequestId | ||
saved_query_name: Optional[str] = None | ||
metric_names: Optional[Sequence[str]] = None | ||
metrics: Optional[Sequence[MetricQueryParameter]] = None | ||
group_by_names: Optional[Sequence[str]] = None | ||
|
@@ -116,6 +120,7 @@ class MetricFlowQueryRequest: | |
|
||
@staticmethod | ||
def create_with_random_request_id( # noqa: D | ||
saved_query_name: Optional[str] = None, | ||
metric_names: Optional[Sequence[str]] = None, | ||
metrics: Optional[Sequence[MetricQueryParameter]] = None, | ||
group_by_names: Optional[Sequence[str]] = None, | ||
|
@@ -130,15 +135,9 @@ def create_with_random_request_id( # noqa: D | |
sql_optimization_level: SqlQueryOptimizationLevel = SqlQueryOptimizationLevel.O4, | ||
query_type: MetricFlowQueryType = MetricFlowQueryType.METRIC, | ||
) -> MetricFlowQueryRequest: | ||
assert_exactly_one_arg_set(metric_names=metric_names, metrics=metrics) | ||
assert not ( | ||
group_by_names and group_by | ||
), "Both group_by_names and group_by were set, but if a group by is specified you should only use one of these!" | ||
assert not ( | ||
order_by_names and order_by | ||
), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!" | ||
Comment on lines
-133
to
-139
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we removing these? Is this a redundant validation? If it isn't we should leave them in, because we're in this ugly transitional state where we accept both type inputs via different parameters instead of either merging to a Union or pushing that to an outer interface where everything can be normalized. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it's a redundant validation that's handled in |
||
return MetricFlowQueryRequest( | ||
request_id=MetricFlowRequestId(mf_rid=f"{random_id()}"), | ||
saved_query_name=saved_query_name, | ||
metric_names=metric_names, | ||
metrics=metrics, | ||
group_by_names=group_by_names, | ||
|
@@ -417,18 +416,34 @@ def all_time_constraint(self) -> TimeRangeConstraint: | |
return TimeRangeConstraint.all_time() | ||
|
||
def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> MetricFlowExplainResult: | ||
query_spec = self._query_parser.parse_and_validate_query( | ||
metric_names=mf_query_request.metric_names, | ||
metrics=mf_query_request.metrics, | ||
group_by_names=mf_query_request.group_by_names, | ||
group_by=mf_query_request.group_by, | ||
limit=mf_query_request.limit, | ||
time_constraint_start=mf_query_request.time_constraint_start, | ||
time_constraint_end=mf_query_request.time_constraint_end, | ||
where_constraint_str=mf_query_request.where_constraint, | ||
order_by_names=mf_query_request.order_by_names, | ||
order_by=mf_query_request.order_by, | ||
) | ||
if mf_query_request.saved_query_name is not None: | ||
if mf_query_request.metrics or mf_query_request.metric_names: | ||
raise InvalidQueryException("Metrics can't be specified with a saved query.") | ||
if mf_query_request.group_by or mf_query_request.group_by_names: | ||
raise InvalidQueryException("Group by items can't be specified with a saved query.") | ||
query_spec = self._query_parser.parse_and_validate_saved_query( | ||
saved_query_parameter=SavedQueryParameter(mf_query_request.saved_query_name), | ||
where_filter=( | ||
PydanticWhereFilter(where_sql_template=mf_query_request.where_constraint) | ||
if mf_query_request.where_constraint is not None | ||
else None | ||
), | ||
limit=mf_query_request.limit, | ||
order_by_parameters=mf_query_request.order_by, | ||
) | ||
else: | ||
query_spec = self._query_parser.parse_and_validate_query( | ||
metric_names=mf_query_request.metric_names, | ||
metrics=mf_query_request.metrics, | ||
group_by_names=mf_query_request.group_by_names, | ||
group_by=mf_query_request.group_by, | ||
limit=mf_query_request.limit, | ||
time_constraint_start=mf_query_request.time_constraint_start, | ||
time_constraint_end=mf_query_request.time_constraint_end, | ||
where_constraint_str=mf_query_request.where_constraint, | ||
order_by_names=mf_query_request.order_by_names, | ||
order_by=mf_query_request.order_by, | ||
) | ||
logger.info(f"Query spec is:\n{pformat_big_objects(query_spec)}") | ||
|
||
if self._semantic_manifest_lookup.metric_lookup.contains_cumulative_or_time_offset_metric( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
from __future__ import annotations | ||
|
||
from typing import List | ||
|
||
from dbt_semantic_interfaces.call_parameter_sets import ParseWhereFilterException | ||
from dbt_semantic_interfaces.implementations.filters.where_filter import PydanticWhereFilter | ||
|
||
from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName | ||
from metricflow.protocols.query_parameter import GroupByParameter | ||
from metricflow.query.query_exceptions import InvalidQueryException | ||
from metricflow.specs.query_param_implementations import DimensionOrEntityParameter, TimeDimensionParameter | ||
|
||
|
||
def parse_object_builder_naming_scheme(group_by_item_name: str) -> GroupByParameter: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know how much of this still applies and what's been improved. Agreed we should check in with @DevonFulcher, probably worth doing that before merge since you'll need to update some stuff due to me mucking about with WhereFilter interfaces in dbt-semantic-interfaces. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey, sorry I am just catching up here. In general, I agree that query interface objects should share more code. Now that they all share an interface, this should be easier. I'm happy to chat about that more about that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Chatted with @DevonFulcher - using this for now until an improved setup is ready. |
||
"""Parses a string following the object-builder naming scheme into the corresponding GroupByParameter. | ||
|
||
The implementation of the query parameter classes seems incomplete and there needs to be follow up with the author | ||
of the query interface classes for the best approach. Right now, it seems like using the where filter is the only | ||
way to handle this conversion. However, it seems like this functionality should be abstracted into a module that | ||
handles operations related to the object-builder naming scheme. There is an additional issue where conversion | ||
from the element name / entity path to the name field in the query parameter objects requires going through | ||
StructuredLinkableSpecName. | ||
|
||
TODO: Replace this method once ideal implementations are in place. | ||
""" | ||
try: | ||
call_parameter_sets = PydanticWhereFilter( | ||
where_sql_template="{{ " + group_by_item_name + " }}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is mainly for the CLI, and verified with @DevonFulcher that it's handled on the MFS side. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, this should all go away when we finish cleaning up the parameter parsing stuff, right? |
||
).call_parameter_sets | ||
except ParseWhereFilterException as e: | ||
raise InvalidQueryException(f"Error parsing `{group_by_item_name}`") from e | ||
|
||
group_by_parameters: List[GroupByParameter] = [] | ||
|
||
for dimension_call_parameter_set in call_parameter_sets.dimension_call_parameter_sets: | ||
if len(dimension_call_parameter_set.entity_path) != 1: | ||
raise NotImplementedError( | ||
f"DimensionOrEntityParameter only supports a single item in the entity path. Got " | ||
f"{dimension_call_parameter_set} while handling `{group_by_item_name}`" | ||
) | ||
group_by_parameters.append( | ||
DimensionOrEntityParameter( | ||
name=StructuredLinkableSpecName( | ||
element_name=dimension_call_parameter_set.dimension_reference.element_name, | ||
entity_link_names=tuple( | ||
entity_reference.element_name for entity_reference in dimension_call_parameter_set.entity_path | ||
), | ||
).qualified_name | ||
) | ||
) | ||
|
||
for entity_call_parameter_set in call_parameter_sets.entity_call_parameter_sets: | ||
if len(entity_call_parameter_set.entity_path) != 1: | ||
raise NotImplementedError( | ||
f"DimensionOrEntityParameter only supports a single item in the entity path. Got " | ||
f"{entity_call_parameter_set} while handling `{group_by_item_name}`" | ||
) | ||
group_by_parameters.append( | ||
DimensionOrEntityParameter( | ||
name=StructuredLinkableSpecName( | ||
element_name=entity_call_parameter_set.entity_reference.element_name, | ||
entity_link_names=tuple( | ||
entity_reference.element_name for entity_reference in entity_call_parameter_set.entity_path | ||
), | ||
).qualified_name | ||
) | ||
) | ||
|
||
for time_dimension_parameter_set in call_parameter_sets.time_dimension_call_parameter_sets: | ||
group_by_parameters.append( | ||
TimeDimensionParameter( | ||
name=StructuredLinkableSpecName( | ||
element_name=time_dimension_parameter_set.time_dimension_reference.element_name, | ||
entity_link_names=tuple( | ||
entity_reference.element_name for entity_reference in time_dimension_parameter_set.entity_path | ||
), | ||
).qualified_name, | ||
grain=time_dimension_parameter_set.time_granularity, | ||
) | ||
) | ||
|
||
if len(group_by_parameters) != 1: | ||
raise InvalidQueryException( | ||
f"Did not get exactly 1 parameter while parsing `{group_by_item_name}`. Got: {group_by_parameters}" | ||
) | ||
|
||
return group_by_parameters[0] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Of course it has, that's what always happens! Where's the lolsob emoji when I need it....