Skip to content

Commit

Permalink
Support order_by object syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Sep 21, 2023
1 parent 68edcfb commit df6a8f7
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 47 deletions.
6 changes: 4 additions & 2 deletions metricflow/engine/metricflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class MetricFlowQueryRequest:
time_constraint_end: Get data for the end of this time range.
where_constraint: A SQL string using group by names that can be used like a where clause on the output data.
order_by_names: metric and group by names to order by. A "-" can be used to specify reverse order e.g. "-ds"
order_by: metric and group by objects to order by
output_table: If specified, output the result data to this table instead of a result dataframe.
sql_optimization_level: The level of optimization for the generated SQL.
query_type: Type of MetricFlow query.
Expand Down Expand Up @@ -426,7 +427,7 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me
time_constraint_start=mf_query_request.time_constraint_start,
time_constraint_end=mf_query_request.time_constraint_end,
where_constraint_str=mf_query_request.where_constraint,
order=mf_query_request.order_by_names,
order_by_names=mf_query_request.order_by_names,
order_by=mf_query_request.order_by,
)
logger.info(f"Query spec is:\n{pformat_big_objects(query_spec)}")
Expand Down Expand Up @@ -466,7 +467,8 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me
time_constraint_start=mf_query_request.time_constraint_start,
time_constraint_end=mf_query_request.time_constraint_end,
where_constraint_str=mf_query_request.where_constraint,
order=mf_query_request.order_by_names,
order_by_names=mf_query_request.order_by_names,
order_by=mf_query_request.order_by,
)
logger.warning(f"Query spec updated to:\n{pformat_big_objects(query_spec)}")

Expand Down
87 changes: 49 additions & 38 deletions metricflow/query/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
WhereFilterSpec,
)
from metricflow.specs.where_filter_transform import WhereSpecFactory
from metricflow.time.date_part import DatePart
from metricflow.time.time_granularity_solver import (
PartialTimeDimensionSpec,
RequestTimeGranularityException,
Expand Down Expand Up @@ -182,9 +183,8 @@ def parse_and_validate_query(
time_constraint_end: Optional[datetime.datetime] = None,
where_constraint: Optional[WhereFilter] = None,
where_constraint_str: Optional[str] = None,
order: Optional[Sequence[str]] = None,
order_by_names: Optional[Sequence[str]] = None,
order_by: Optional[Sequence[OrderByQueryParameter]] = None,
time_granularity: Optional[TimeGranularity] = None,
) -> MetricFlowQuerySpec:
"""Parse the query into spec objects, validating them in the process.
Expand All @@ -202,9 +202,8 @@ def parse_and_validate_query(
time_constraint_end=time_constraint_end,
where_constraint=where_constraint,
where_constraint_str=where_constraint_str,
order=order,
order_by_names=order_by_names,
order_by=order_by,
time_granularity=time_granularity,
)
finally:
logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s")
Expand Down Expand Up @@ -312,14 +311,6 @@ def _get_where_filter(
PydanticWhereFilter(where_sql_template=where_constraint_str) if where_constraint_str else where_constraint
)

def _get_order(
self, order: Optional[Sequence[str]], order_by: Optional[Sequence[OrderByQueryParameter]]
) -> Sequence[str]:
assert not (
order and order_by
), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!"
return order if order else [f"{o.name}__{o.grain}" if o.grain else o.name for o in order_by] if order_by else []

def _parse_and_validate_query(
self,
metric_names: Optional[Sequence[str]] = None,
Expand All @@ -331,13 +322,11 @@ def _parse_and_validate_query(
time_constraint_end: Optional[datetime.datetime] = None,
where_constraint: Optional[WhereFilter] = None,
where_constraint_str: Optional[str] = None,
order: Optional[Sequence[str]] = None,
order_by_names: Optional[Sequence[str]] = None,
order_by: Optional[Sequence[OrderByQueryParameter]] = None,
time_granularity: Optional[TimeGranularity] = None,
) -> MetricFlowQuerySpec:
metric_names = self._get_metric_names(metric_names, metrics)
where_filter = self._get_where_filter(where_constraint, where_constraint_str)
order = self._get_order(order, order_by)

# Get metric references used for validations
# In a case of derived metric, all the input metrics would be here.
Expand Down Expand Up @@ -423,7 +412,11 @@ def _parse_and_validate_query(
self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs)
self._validate_date_part(metric_references, time_dimension_specs)

order_by_specs = self._parse_order_by(order or [], partial_time_dimension_spec_replacements)
order_by_specs = self._parse_order_by(
order_by_names=order_by_names,
order_by=order_by,
time_dimension_spec_replacements=partial_time_dimension_spec_replacements,
)

# For each metric, verify that it's possible to retrieve all group by elements, including the ones as required
# by the filters.
Expand Down Expand Up @@ -819,40 +812,57 @@ def _get_invalid_linkable_specs(

def _parse_order_by(
self,
order_by_names: Sequence[str],
time_dimension_spec_replacements: Dict[PartialTimeDimensionSpec, TimeDimensionSpec],
order_by_names: Optional[Sequence[str]] = None,
order_by: Optional[Sequence[OrderByQueryParameter]] = None,
) -> Tuple[OrderBySpec, ...]:
"""time_dimension_spec_replacements is used to replace a partial spec from parsing the names to a full one."""
# TODO: Validate entity links
# TODO: Validate order by items are in the query

assert not (
order_by_names and order_by
), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!"

order_by_specs: List[OrderBySpec] = []
for order_by_name in order_by_names:
descending = False
if order_by_name.startswith("-"):
order_by_name = order_by_name[1:]
descending = True
parsed_name = StructuredLinkableSpecName.from_name(order_by_name)
for order in order_by_names or order_by or []:
# Order might be string or object - parse into relevant parts.
time_granularity: Optional[TimeGranularity] = None
date_part: Optional[DatePart] = None
if isinstance(order, str):
# Note: date part cannot be requested via string parameter.
descending = False
if order.startswith("-"):
order = order[1:]
descending = True
parsed_name = StructuredLinkableSpecName.from_name(order)
time_granularity = parsed_name.time_granularity
else:
descending = order.descending
parsed_name = StructuredLinkableSpecName.from_name(order.order_by.name)
if isinstance(order.order_by, TimeDimensionQueryParameter):
time_granularity = order.order_by.grain
date_part = order.order_by.date_part
if parsed_name.time_granularity and parsed_name.time_granularity != time_granularity:
raise InvalidQueryException("Must use object syntax to request a time granularity.")

if MetricReference(element_name=parsed_name.element_name) in self._known_metric_names:
if parsed_name.time_granularity:
if time_granularity:
raise InvalidQueryException(
f"Order by item '{order_by_name}' references a metric but has a time granularity"
f"Order by item '{order}' references a metric but has a time granularity"
)
if parsed_name.entity_link_names:
raise InvalidQueryException(
f"Order by item '{order_by_name}' references a metric but has entity links"
)
raise InvalidQueryException(f"Order by item '{order}' references a metric but has entity links")
order_by_specs.append(
OrderBySpec(
metric_spec=MetricSpec(element_name=parsed_name.element_name),
descending=descending,
)
)
elif DimensionReference(element_name=parsed_name.element_name) in self._known_dimension_element_references:
if parsed_name.time_granularity:
if time_granularity:
raise InvalidQueryException(
f"Order by item '{order_by_name}' references a categorical dimension but has a time "
f"granularity"
f"Order by item '{order}' references a categorical dimension but has a time " f"granularity"
)
order_by_specs.append(
OrderBySpec(
Expand All @@ -868,14 +878,14 @@ def _parse_order_by(
in self._known_time_dimension_element_references
):
entity_links = tuple(EntityReference(element_name=x) for x in parsed_name.entity_link_names)
if parsed_name.time_granularity:
if time_granularity:
order_by_specs.append(
OrderBySpec(
time_dimension_spec=TimeDimensionSpec(
element_name=parsed_name.element_name,
entity_links=entity_links,
time_granularity=parsed_name.time_granularity,
date_part=parsed_name.date_part,
time_granularity=time_granularity,
date_part=date_part,
),
descending=descending,
)
Expand All @@ -886,6 +896,7 @@ def _parse_order_by(
partial_time_dimension_spec = PartialTimeDimensionSpec(
element_name=parsed_name.element_name,
entity_links=entity_links,
date_part=date_part,
)

if partial_time_dimension_spec in time_dimension_spec_replacements:
Expand All @@ -897,14 +908,14 @@ def _parse_order_by(
)
else:
raise RequestTimeGranularityException(
f"Order by item '{order_by_name}' does not specify a time granularity and it does not "
f"Order by item '{order}' does not specify a time granularity and it does not "
f"match a requested time dimension"
)

elif EntityReference(element_name=parsed_name.element_name) in self._known_entity_element_references:
if parsed_name.time_granularity:
if time_granularity:
raise InvalidQueryException(
f"Order by item '{order_by_name}' references an entity but has a time granularity"
f"Order by item '{order}' references an entity but has a time granularity"
)
order_by_specs.append(
OrderBySpec(
Expand All @@ -916,6 +927,6 @@ def _parse_order_by(
)
)
else:
raise InvalidQueryException(f"Order by item '{order_by_name}' references an element that is not known")
raise InvalidQueryException(f"Order by item '{order}' references an element that is not known")

return tuple(order_by_specs)
4 changes: 0 additions & 4 deletions metricflow/specs/query_param_implementations.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,3 @@ class OrderByParameter:

order_by: Union[MetricQueryParameter, GroupByQueryParameter, TimeDimensionQueryParameter]
descending: bool = False


# Do we want one generic type for QueryParameter which includes grain & date_part?
# The main question: do we need to know what type we're passing into MF? Or are we ok with MF just figuring it out?
8 changes: 5 additions & 3 deletions metricflow/test/query/test_query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ def bookings_query_parser() -> MetricFlowQueryParser: # noqa

def test_query_parser(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D
query_spec = bookings_query_parser.parse_and_validate_query(
metric_names=["bookings"], group_by_names=["booking__is_instant", "listing", MTD], order=[MTD, "-bookings"]
metric_names=["bookings"],
group_by_names=["booking__is_instant", "listing", MTD],
order_by_names=[MTD, "-bookings"],
)

assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),)
Expand Down Expand Up @@ -239,7 +241,7 @@ def test_order_by_granularity_conversion() -> None:
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file]
)
query_spec = query_parser.parse_and_validate_query(
metric_names=["bookings", "revenue"], group_by_names=[MTD], order=[f"-{MTD}"]
metric_names=["bookings", "revenue"], group_by_names=[MTD], order_by_names=[f"-{MTD}"]
)

# The lowest common granularity is MONTH, so we expect the PTD in the order by to have that granularity.
Expand All @@ -255,7 +257,7 @@ def test_order_by_granularity_conversion() -> None:

def test_order_by_granularity_no_conversion(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D
query_spec = bookings_query_parser.parse_and_validate_query(
metric_names=["bookings"], group_by_names=[MTD], order=[MTD]
metric_names=["bookings"], group_by_names=[MTD], order_by_names=[MTD]
)

# The only granularity is DAY, so we expect the PTD in the order by to have that granularity.
Expand Down

0 comments on commit df6a8f7

Please sign in to comment.