diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index 0ae5976325..a42718f3a8 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -96,6 +96,7 @@ class MetricFlowQueryRequest: time_constraint_end: Get data for the end of this time range. where_constraint: A SQL string using group by names that can be used like a where clause on the output data. order_by_names: metric and group by names to order by. A "-" can be used to specify reverse order e.g. "-ds" + order_by: metric and group by objects to order by output_table: If specified, output the result data to this table instead of a result dataframe. sql_optimization_level: The level of optimization for the generated SQL. query_type: Type of MetricFlow query. @@ -426,7 +427,7 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me time_constraint_start=mf_query_request.time_constraint_start, time_constraint_end=mf_query_request.time_constraint_end, where_constraint_str=mf_query_request.where_constraint, - order=mf_query_request.order_by_names, + order_by_names=mf_query_request.order_by_names, order_by=mf_query_request.order_by, ) logger.info(f"Query spec is:\n{pformat_big_objects(query_spec)}") @@ -466,7 +467,8 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me time_constraint_start=mf_query_request.time_constraint_start, time_constraint_end=mf_query_request.time_constraint_end, where_constraint_str=mf_query_request.where_constraint, - order=mf_query_request.order_by_names, + order_by_names=mf_query_request.order_by_names, + order_by=mf_query_request.order_by, ) logger.warning(f"Query spec updated to:\n{pformat_big_objects(query_spec)}") diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index d4e6040b84..701c059a3c 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -48,6 +48,7 @@ WhereFilterSpec, ) from metricflow.specs.where_filter_transform import WhereSpecFactory +from metricflow.time.date_part import DatePart from metricflow.time.time_granularity_solver import ( PartialTimeDimensionSpec, RequestTimeGranularityException, @@ -182,9 +183,8 @@ def parse_and_validate_query( time_constraint_end: Optional[datetime.datetime] = None, where_constraint: Optional[WhereFilter] = None, where_constraint_str: Optional[str] = None, - order: Optional[Sequence[str]] = None, + order_by_names: Optional[Sequence[str]] = None, order_by: Optional[Sequence[OrderByQueryParameter]] = None, - time_granularity: Optional[TimeGranularity] = None, ) -> MetricFlowQuerySpec: """Parse the query into spec objects, validating them in the process. @@ -202,9 +202,8 @@ def parse_and_validate_query( time_constraint_end=time_constraint_end, where_constraint=where_constraint, where_constraint_str=where_constraint_str, - order=order, + order_by_names=order_by_names, order_by=order_by, - time_granularity=time_granularity, ) finally: logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s") @@ -312,14 +311,6 @@ def _get_where_filter( PydanticWhereFilter(where_sql_template=where_constraint_str) if where_constraint_str else where_constraint ) - def _get_order( - self, order: Optional[Sequence[str]], order_by: Optional[Sequence[OrderByQueryParameter]] - ) -> Sequence[str]: - assert not ( - order and order_by - ), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!" - return order if order else [f"{o.name}__{o.grain}" if o.grain else o.name for o in order_by] if order_by else [] - def _parse_and_validate_query( self, metric_names: Optional[Sequence[str]] = None, @@ -331,13 +322,11 @@ def _parse_and_validate_query( time_constraint_end: Optional[datetime.datetime] = None, where_constraint: Optional[WhereFilter] = None, where_constraint_str: Optional[str] = None, - order: Optional[Sequence[str]] = None, + order_by_names: Optional[Sequence[str]] = None, order_by: Optional[Sequence[OrderByQueryParameter]] = None, - time_granularity: Optional[TimeGranularity] = None, ) -> MetricFlowQuerySpec: metric_names = self._get_metric_names(metric_names, metrics) where_filter = self._get_where_filter(where_constraint, where_constraint_str) - order = self._get_order(order, order_by) # Get metric references used for validations # In a case of derived metric, all the input metrics would be here. @@ -423,7 +412,11 @@ def _parse_and_validate_query( self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs) self._validate_date_part(metric_references, time_dimension_specs) - order_by_specs = self._parse_order_by(order or [], partial_time_dimension_spec_replacements) + order_by_specs = self._parse_order_by( + order_by_names=order_by_names, + order_by=order_by, + time_dimension_spec_replacements=partial_time_dimension_spec_replacements, + ) # For each metric, verify that it's possible to retrieve all group by elements, including the ones as required # by the filters. @@ -819,29 +812,47 @@ def _get_invalid_linkable_specs( def _parse_order_by( self, - order_by_names: Sequence[str], time_dimension_spec_replacements: Dict[PartialTimeDimensionSpec, TimeDimensionSpec], + order_by_names: Optional[Sequence[str]] = None, + order_by: Optional[Sequence[OrderByQueryParameter]] = None, ) -> Tuple[OrderBySpec, ...]: """time_dimension_spec_replacements is used to replace a partial spec from parsing the names to a full one.""" # TODO: Validate entity links # TODO: Validate order by items are in the query + + assert not ( + order_by_names and order_by + ), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!" + order_by_specs: List[OrderBySpec] = [] - for order_by_name in order_by_names: - descending = False - if order_by_name.startswith("-"): - order_by_name = order_by_name[1:] - descending = True - parsed_name = StructuredLinkableSpecName.from_name(order_by_name) + for order in order_by_names or order_by or []: + # Order might be string or object - parse into relevant parts. + time_granularity: Optional[TimeGranularity] = None + date_part: Optional[DatePart] = None + if isinstance(order, str): + # Note: date part cannot be requested via string parameter. + descending = False + if order.startswith("-"): + order = order[1:] + descending = True + parsed_name = StructuredLinkableSpecName.from_name(order) + time_granularity = parsed_name.time_granularity + else: + descending = order.descending + parsed_name = StructuredLinkableSpecName.from_name(order.order_by.name) + if isinstance(order.order_by, TimeDimensionQueryParameter): + time_granularity = order.order_by.grain + date_part = order.order_by.date_part + if parsed_name.time_granularity and parsed_name.time_granularity != time_granularity: + raise InvalidQueryException("Must use object syntax to request a time granularity.") if MetricReference(element_name=parsed_name.element_name) in self._known_metric_names: - if parsed_name.time_granularity: + if time_granularity: raise InvalidQueryException( - f"Order by item '{order_by_name}' references a metric but has a time granularity" + f"Order by item '{order}' references a metric but has a time granularity" ) if parsed_name.entity_link_names: - raise InvalidQueryException( - f"Order by item '{order_by_name}' references a metric but has entity links" - ) + raise InvalidQueryException(f"Order by item '{order}' references a metric but has entity links") order_by_specs.append( OrderBySpec( metric_spec=MetricSpec(element_name=parsed_name.element_name), @@ -849,10 +860,9 @@ def _parse_order_by( ) ) elif DimensionReference(element_name=parsed_name.element_name) in self._known_dimension_element_references: - if parsed_name.time_granularity: + if time_granularity: raise InvalidQueryException( - f"Order by item '{order_by_name}' references a categorical dimension but has a time " - f"granularity" + f"Order by item '{order}' references a categorical dimension but has a time " f"granularity" ) order_by_specs.append( OrderBySpec( @@ -868,14 +878,14 @@ def _parse_order_by( in self._known_time_dimension_element_references ): entity_links = tuple(EntityReference(element_name=x) for x in parsed_name.entity_link_names) - if parsed_name.time_granularity: + if time_granularity: order_by_specs.append( OrderBySpec( time_dimension_spec=TimeDimensionSpec( element_name=parsed_name.element_name, entity_links=entity_links, - time_granularity=parsed_name.time_granularity, - date_part=parsed_name.date_part, + time_granularity=time_granularity, + date_part=date_part, ), descending=descending, ) @@ -886,6 +896,7 @@ def _parse_order_by( partial_time_dimension_spec = PartialTimeDimensionSpec( element_name=parsed_name.element_name, entity_links=entity_links, + date_part=date_part, ) if partial_time_dimension_spec in time_dimension_spec_replacements: @@ -897,14 +908,14 @@ def _parse_order_by( ) else: raise RequestTimeGranularityException( - f"Order by item '{order_by_name}' does not specify a time granularity and it does not " + f"Order by item '{order}' does not specify a time granularity and it does not " f"match a requested time dimension" ) elif EntityReference(element_name=parsed_name.element_name) in self._known_entity_element_references: - if parsed_name.time_granularity: + if time_granularity: raise InvalidQueryException( - f"Order by item '{order_by_name}' references an entity but has a time granularity" + f"Order by item '{order}' references an entity but has a time granularity" ) order_by_specs.append( OrderBySpec( @@ -916,6 +927,6 @@ def _parse_order_by( ) ) else: - raise InvalidQueryException(f"Order by item '{order_by_name}' references an element that is not known") + raise InvalidQueryException(f"Order by item '{order}' references an element that is not known") return tuple(order_by_specs) diff --git a/metricflow/specs/query_param_implementations.py b/metricflow/specs/query_param_implementations.py index 093a974f0e..dd534eace2 100644 --- a/metricflow/specs/query_param_implementations.py +++ b/metricflow/specs/query_param_implementations.py @@ -51,7 +51,3 @@ class OrderByParameter: order_by: Union[MetricQueryParameter, GroupByQueryParameter, TimeDimensionQueryParameter] descending: bool = False - - -# Do we want one generic type for QueryParameter which includes grain & date_part? -# The main question: do we need to know what type we're passing into MF? Or are we ok with MF just figuring it out? diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index 6f49adf132..a5644406c2 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -165,7 +165,9 @@ def bookings_query_parser() -> MetricFlowQueryParser: # noqa def test_query_parser(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D query_spec = bookings_query_parser.parse_and_validate_query( - metric_names=["bookings"], group_by_names=["booking__is_instant", "listing", MTD], order=[MTD, "-bookings"] + metric_names=["bookings"], + group_by_names=["booking__is_instant", "listing", MTD], + order_by_names=[MTD, "-bookings"], ) assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),) @@ -239,7 +241,7 @@ def test_order_by_granularity_conversion() -> None: [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file] ) query_spec = query_parser.parse_and_validate_query( - metric_names=["bookings", "revenue"], group_by_names=[MTD], order=[f"-{MTD}"] + metric_names=["bookings", "revenue"], group_by_names=[MTD], order_by_names=[f"-{MTD}"] ) # The lowest common granularity is MONTH, so we expect the PTD in the order by to have that granularity. @@ -255,7 +257,7 @@ def test_order_by_granularity_conversion() -> None: def test_order_by_granularity_no_conversion(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D query_spec = bookings_query_parser.parse_and_validate_query( - metric_names=["bookings"], group_by_names=[MTD], order=[MTD] + metric_names=["bookings"], group_by_names=[MTD], order_by_names=[MTD] ) # The only granularity is DAY, so we expect the PTD in the order by to have that granularity.