diff --git a/.changes/unreleased/Fixes-20230926-140015.yaml b/.changes/unreleased/Fixes-20230926-140015.yaml new file mode 100644 index 0000000000..339de9012e --- /dev/null +++ b/.changes/unreleased/Fixes-20230926-140015.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Removing methods and reordering parameters for Query Interface. +time: 2023-09-26T14:00:15.741015-05:00 +custom: + Author: DevonFulcher + Issue: None diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index 21f7bc899b..bf66a994b6 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -47,7 +47,7 @@ DataflowToExecutionPlanConverter, ) from metricflow.plan_conversion.dataflow_to_sql import DataflowToSqlQueryPlanConverter -from metricflow.protocols.query_parameter import QueryParameterDimension, QueryParameterMetric +from metricflow.protocols.query_parameter import GroupByParameter, MetricQueryParameter, OrderByQueryParameter from metricflow.protocols.sql_client import SqlClient from metricflow.query.query_exceptions import InvalidQueryException from metricflow.query.query_parser import MetricFlowQueryParser @@ -85,12 +85,15 @@ class MetricFlowQueryRequest: """Encapsulates the parameters for a metric query. metric_names: Names of the metrics to query. + metrics: Metric objects to query. group_by_names: Names of the dimensions and entities to query. + group_by: Dimension or entity objects to query. limit: Limit the result to this many rows. time_constraint_start: Get data for the start of this time range. time_constraint_end: Get data for the end of this time range. where_constraint: A SQL string using group by names that can be used like a where clause on the output data. - order_by_names: metric and group by names to order by. A "-" can be used to specify reverse order e.g. "-ds" + order_by_names: metric and group by names to order by. A "-" can be used to specify reverse order e.g. "-ds". + order_by: metric, dimension, or entity objects to order by. output_table: If specified, output the result data to this table instead of a result dataframe. sql_optimization_level: The level of optimization for the generated SQL. query_type: Type of MetricFlow query. @@ -98,15 +101,15 @@ class MetricFlowQueryRequest: request_id: MetricFlowRequestId metric_names: Optional[Sequence[str]] = None - metrics: Optional[Sequence[QueryParameterMetric]] = None + metrics: Optional[Sequence[MetricQueryParameter]] = None group_by_names: Optional[Sequence[str]] = None - group_by: Optional[Sequence[QueryParameterDimension]] = None + group_by: Optional[Tuple[GroupByParameter, ...]] = None limit: Optional[int] = None time_constraint_start: Optional[datetime.datetime] = None time_constraint_end: Optional[datetime.datetime] = None where_constraint: Optional[str] = None order_by_names: Optional[Sequence[str]] = None - order_by: Optional[Sequence[QueryParameterDimension]] = None + order_by: Optional[Sequence[OrderByQueryParameter]] = None output_table: Optional[str] = None sql_optimization_level: SqlQueryOptimizationLevel = SqlQueryOptimizationLevel.O4 query_type: MetricFlowQueryType = MetricFlowQueryType.METRIC @@ -114,15 +117,15 @@ class MetricFlowQueryRequest: @staticmethod def create_with_random_request_id( # noqa: D metric_names: Optional[Sequence[str]] = None, - metrics: Optional[Sequence[QueryParameterMetric]] = None, + metrics: Optional[Sequence[MetricQueryParameter]] = None, group_by_names: Optional[Sequence[str]] = None, - group_by: Optional[Sequence[QueryParameterDimension]] = None, + group_by: Optional[Tuple[GroupByParameter, ...]] = None, limit: Optional[int] = None, time_constraint_start: Optional[datetime.datetime] = None, time_constraint_end: Optional[datetime.datetime] = None, where_constraint: Optional[str] = None, order_by_names: Optional[Sequence[str]] = None, - order_by: Optional[Sequence[QueryParameterDimension]] = None, + order_by: Optional[Sequence[OrderByQueryParameter]] = None, output_table: Optional[str] = None, sql_optimization_level: SqlQueryOptimizationLevel = SqlQueryOptimizationLevel.O4, query_type: MetricFlowQueryType = MetricFlowQueryType.METRIC, @@ -286,9 +289,9 @@ def get_dimension_values( def explain_get_dimension_values( # noqa: D self, metric_names: Optional[List[str]] = None, - metrics: Optional[Sequence[QueryParameterMetric]] = None, + metrics: Optional[Sequence[MetricQueryParameter]] = None, get_group_by_values: Optional[str] = None, - group_by: Optional[QueryParameterDimension] = None, + group_by: Optional[GroupByParameter] = None, time_constraint_start: Optional[datetime.datetime] = None, time_constraint_end: Optional[datetime.datetime] = None, ) -> MetricFlowExplainResult: @@ -421,7 +424,7 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me time_constraint_start=mf_query_request.time_constraint_start, time_constraint_end=mf_query_request.time_constraint_end, where_constraint_str=mf_query_request.where_constraint, - order=mf_query_request.order_by_names, + order_by_names=mf_query_request.order_by_names, order_by=mf_query_request.order_by, ) logger.info(f"Query spec is:\n{pformat_big_objects(query_spec)}") @@ -461,7 +464,8 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me time_constraint_start=mf_query_request.time_constraint_start, time_constraint_end=mf_query_request.time_constraint_end, where_constraint_str=mf_query_request.where_constraint, - order=mf_query_request.order_by_names, + order_by_names=mf_query_request.order_by_names, + order_by=mf_query_request.order_by, ) logger.warning(f"Query spec updated to:\n{pformat_big_objects(query_spec)}") @@ -682,9 +686,9 @@ def get_dimension_values( # noqa: D def explain_get_dimension_values( # noqa: D self, metric_names: Optional[List[str]] = None, - metrics: Optional[Sequence[QueryParameterMetric]] = None, + metrics: Optional[Sequence[MetricQueryParameter]] = None, get_group_by_values: Optional[str] = None, - group_by: Optional[QueryParameterDimension] = None, + group_by: Optional[GroupByParameter] = None, time_constraint_start: Optional[datetime.datetime] = None, time_constraint_end: Optional[datetime.datetime] = None, ) -> MetricFlowExplainResult: @@ -695,8 +699,8 @@ def explain_get_dimension_values( # noqa: D MetricFlowQueryRequest.create_with_random_request_id( metric_names=metric_names, metrics=metrics, - group_by_names=[get_group_by_values] if get_group_by_values else None, - group_by=[group_by] if group_by else None, + group_by_names=(get_group_by_values,) if get_group_by_values else None, + group_by=(group_by,) if group_by else None, time_constraint_start=time_constraint_start, time_constraint_end=time_constraint_end, query_type=MetricFlowQueryType.DIMENSION_VALUES, diff --git a/metricflow/engine/models.py b/metricflow/engine/models.py index cefbf73e85..542ca4bb75 100644 --- a/metricflow/engine/models.py +++ b/metricflow/engine/models.py @@ -109,10 +109,7 @@ def granularity_free_qualified_name(self) -> str: Dimension set has de-duplicated TimeDimensions such that you never have more than one granularity in your set for each TimeDimension. """ - parsed_name = StructuredLinkableSpecName.from_name(qualified_name=self.qualified_name) - return StructuredLinkableSpecName( - entity_link_names=parsed_name.entity_link_names, element_name=self.name - ).qualified_name + return StructuredLinkableSpecName.from_name(qualified_name=self.qualified_name).granularity_free_qualified_name @dataclass(frozen=True) diff --git a/metricflow/naming/linkable_spec_name.py b/metricflow/naming/linkable_spec_name.py index e2eeed0d7c..27fa92416b 100644 --- a/metricflow/naming/linkable_spec_name.py +++ b/metricflow/naming/linkable_spec_name.py @@ -91,3 +91,17 @@ def entity_prefix(self) -> Optional[str]: def date_part_suffix(date_part: DatePart) -> str: """Suffix used for names with a date_part.""" return f"extract_{date_part.value}" + + @property + def granularity_free_qualified_name(self) -> str: + """Renders the qualified name without the granularity suffix. + + In the list metrics and list dimensions outputs we want to render the qualified name of the dimension, but + without including the base granularity for time dimensions. This method is useful in those contexts. + Note: in most cases you should be using the qualified_name - this is only useful in cases where the + Dimension set has de-duplicated TimeDimensions such that you never have more than one granularity + in your set for each TimeDimension. + """ + return StructuredLinkableSpecName( + entity_link_names=self.entity_link_names, element_name=self.element_name + ).qualified_name diff --git a/metricflow/protocols/query_interface.py b/metricflow/protocols/query_interface.py index 5d5797cf8f..2831740ba4 100644 --- a/metricflow/protocols/query_interface.py +++ b/metricflow/protocols/query_interface.py @@ -1,29 +1,35 @@ from __future__ import annotations +from abc import abstractmethod from typing import Optional, Protocol, Sequence class QueryInterfaceMetric(Protocol): """Represents the interface for Metric in the query interface.""" + @abstractmethod def descending(self, _is_descending: bool) -> QueryInterfaceMetric: """Set the sort order for order-by.""" - raise NotImplementedError + pass class QueryInterfaceDimension(Protocol): """Represents the interface for Dimension in the query interface.""" + @abstractmethod def grain(self, _grain: str) -> QueryInterfaceDimension: """The time granularity.""" - raise NotImplementedError + pass + @abstractmethod def descending(self, _is_descending: bool) -> QueryInterfaceDimension: """Set the sort order for order-by.""" + pass + @abstractmethod def date_part(self, _date_part: str) -> QueryInterfaceDimension: """Date part to extract from the dimension.""" - raise NotImplementedError + pass class QueryInterfaceDimensionFactory(Protocol): @@ -32,9 +38,10 @@ class QueryInterfaceDimensionFactory(Protocol): Represented as the Dimension constructor in the Jinja sandbox. """ + @abstractmethod def create(self, name: str, entity_path: Sequence[str] = ()) -> QueryInterfaceDimension: """Create a QueryInterfaceDimension.""" - raise NotImplementedError + pass class QueryInterfaceTimeDimension(Protocol): @@ -49,24 +56,23 @@ class QueryInterfaceTimeDimensionFactory(Protocol): Represented as the TimeDimension constructor in the Jinja sandbox. """ + @abstractmethod def create( self, time_dimension_name: str, time_granularity_name: str, - descending: bool = False, - date_part_name: Optional[str] = None, entity_path: Sequence[str] = (), + descending: Optional[bool] = None, + date_part_name: Optional[str] = None, ) -> QueryInterfaceTimeDimension: """Create a TimeDimension.""" - raise NotImplementedError + pass class QueryInterfaceEntity(Protocol): """Represents the interface for Entity in the query interface.""" - def descending(self, _is_descending: bool) -> QueryInterfaceEntity: - """Set the sort order for order-by.""" - raise NotImplementedError + pass class QueryInterfaceEntityFactory(Protocol): @@ -75,6 +81,7 @@ class QueryInterfaceEntityFactory(Protocol): Represented as the Entity constructor in the Jinja sandbox. """ + @abstractmethod def create(self, entity_name: str, entity_path: Sequence[str] = ()) -> QueryInterfaceEntity: """Create an Entity.""" - raise NotImplementedError + pass diff --git a/metricflow/protocols/query_parameter.py b/metricflow/protocols/query_parameter.py index 2b6b4e02bc..79e5db93c7 100644 --- a/metricflow/protocols/query_parameter.py +++ b/metricflow/protocols/query_parameter.py @@ -1,28 +1,42 @@ from __future__ import annotations -from typing import Optional, Protocol +from typing import Optional, Protocol, Union, runtime_checkable from dbt_semantic_interfaces.type_enums import TimeGranularity from metricflow.time.date_part import DatePart -class QueryParameterDimension(Protocol): - """A query parameter with a grain.""" +@runtime_checkable +class MetricQueryParameter(Protocol): + """Metric requested in a query.""" @property def name(self) -> str: - """The name of the item.""" + """The name of the metric.""" raise NotImplementedError + +@runtime_checkable +class DimensionOrEntityQueryParameter(Protocol): + """Generic group by parameter for queries. Might be an entity or a dimension.""" + @property - def grain(self) -> Optional[TimeGranularity]: - """The time granularity.""" + def name(self) -> str: + """The name of the metric.""" raise NotImplementedError + +@runtime_checkable +class TimeDimensionQueryParameter(Protocol): # noqa: D @property - def descending(self) -> bool: - """Set the sort order for order-by.""" + def name(self) -> str: + """The name of the item.""" + raise NotImplementedError + + @property + def grain(self) -> Optional[TimeGranularity]: + """The time granularity.""" raise NotImplementedError @property @@ -31,15 +45,19 @@ def date_part(self) -> Optional[DatePart]: raise NotImplementedError -class QueryParameterMetric(Protocol): - """Metric in the query interface.""" +GroupByParameter = Union[DimensionOrEntityQueryParameter, TimeDimensionQueryParameter] +InputOrderByParameter = Union[MetricQueryParameter, GroupByParameter] + + +class OrderByQueryParameter(Protocol): + """Parameter to order by, specifying ascending or descending.""" @property - def name(self) -> str: - """The name of the metric.""" + def order_by(self) -> InputOrderByParameter: + """Parameter to order results by.""" raise NotImplementedError @property def descending(self) -> bool: - """Set the sort order for order-by.""" + """Indicates if the order should be ascending or descending.""" raise NotImplementedError diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index a8e7deda0b..4312b44764 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -28,7 +28,12 @@ from metricflow.filters.time_constraint import TimeRangeConstraint from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName -from metricflow.protocols.query_parameter import QueryParameterDimension, QueryParameterMetric +from metricflow.protocols.query_parameter import ( + GroupByParameter, + MetricQueryParameter, + OrderByQueryParameter, + TimeDimensionQueryParameter, +) from metricflow.query.query_exceptions import InvalidQueryException from metricflow.specs.column_assoc import ColumnAssociationResolver from metricflow.specs.specs import ( @@ -43,6 +48,7 @@ WhereFilterSpec, ) from metricflow.specs.where_filter_transform import WhereSpecFactory +from metricflow.time.date_part import DatePart from metricflow.time.time_granularity_solver import ( PartialTimeDimensionSpec, RequestTimeGranularityException, @@ -169,17 +175,16 @@ def _top_fuzzy_matches( def parse_and_validate_query( self, metric_names: Optional[Sequence[str]] = None, - metrics: Optional[Sequence[QueryParameterMetric]] = None, + metrics: Optional[Sequence[MetricQueryParameter]] = None, group_by_names: Optional[Sequence[str]] = None, - group_by: Optional[Sequence[QueryParameterDimension]] = None, + group_by: Optional[Tuple[GroupByParameter, ...]] = None, limit: Optional[int] = None, time_constraint_start: Optional[datetime.datetime] = None, time_constraint_end: Optional[datetime.datetime] = None, where_constraint: Optional[WhereFilter] = None, where_constraint_str: Optional[str] = None, - order: Optional[Sequence[str]] = None, - order_by: Optional[Sequence[QueryParameterDimension]] = None, - time_granularity: Optional[TimeGranularity] = None, + order_by_names: Optional[Sequence[str]] = None, + order_by: Optional[Sequence[OrderByQueryParameter]] = None, ) -> MetricFlowQuerySpec: """Parse the query into spec objects, validating them in the process. @@ -197,9 +202,8 @@ def parse_and_validate_query( time_constraint_end=time_constraint_end, where_constraint=where_constraint, where_constraint_str=where_constraint_str, - order=order, + order_by_names=order_by_names, order_by=order_by, - time_granularity=time_granularity, ) finally: logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s") @@ -290,7 +294,7 @@ def _construct_metric_specs_for_query( return tuple(metric_specs) def _get_metric_names( - self, metric_names: Optional[Sequence[str]], metrics: Optional[Sequence[QueryParameterMetric]] + self, metric_names: Optional[Sequence[str]], metrics: Optional[Sequence[MetricQueryParameter]] ) -> Sequence[str]: assert_exactly_one_arg_set(metric_names=metric_names, metrics=metrics) return metric_names if metric_names else [m.name for m in metrics] if metrics else [] @@ -307,32 +311,22 @@ def _get_where_filter( PydanticWhereFilter(where_sql_template=where_constraint_str) if where_constraint_str else where_constraint ) - def _get_order( - self, order: Optional[Sequence[str]], order_by: Optional[Sequence[QueryParameterDimension]] - ) -> Sequence[str]: - assert not ( - order and order_by - ), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!" - return order if order else [f"{o.name}__{o.grain}" if o.grain else o.name for o in order_by] if order_by else [] - def _parse_and_validate_query( self, metric_names: Optional[Sequence[str]] = None, - metrics: Optional[Sequence[QueryParameterMetric]] = None, + metrics: Optional[Sequence[MetricQueryParameter]] = None, group_by_names: Optional[Sequence[str]] = None, - group_by: Optional[Sequence[QueryParameterDimension]] = None, + group_by: Optional[Tuple[GroupByParameter, ...]] = None, limit: Optional[int] = None, time_constraint_start: Optional[datetime.datetime] = None, time_constraint_end: Optional[datetime.datetime] = None, where_constraint: Optional[WhereFilter] = None, where_constraint_str: Optional[str] = None, - order: Optional[Sequence[str]] = None, - order_by: Optional[Sequence[QueryParameterDimension]] = None, - time_granularity: Optional[TimeGranularity] = None, + order_by_names: Optional[Sequence[str]] = None, + order_by: Optional[Sequence[OrderByQueryParameter]] = None, ) -> MetricFlowQuerySpec: metric_names = self._get_metric_names(metric_names, metrics) where_filter = self._get_where_filter(where_constraint, where_constraint_str) - order = self._get_order(order, order_by) # Get metric references used for validations # In a case of derived metric, all the input metrics would be here. @@ -380,8 +374,8 @@ def _parse_and_validate_query( # If the time constraint is all time, just ignore and not render time_constraint = None - requested_linkable_specs = self._parse_linkable_elements( - qualified_linkable_names=group_by_names, linkable_elements=group_by, metric_references=metric_references + requested_linkable_specs = self._parse_group_by( + group_by_names=group_by_names, group_by=group_by, metric_references=metric_references ) where_filter_spec: Optional[WhereFilterSpec] = None if where_filter is not None: @@ -418,7 +412,11 @@ def _parse_and_validate_query( self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs) self._validate_date_part(metric_references, time_dimension_specs) - order_by_specs = self._parse_order_by(order or [], partial_time_dimension_spec_replacements) + order_by_specs = self._parse_order_by( + order_by_names=order_by_names, + order_by=order_by, + time_dimension_spec_replacements=partial_time_dimension_spec_replacements, + ) # For each metric, verify that it's possible to retrieve all group by elements, including the ones as required # by the filters. @@ -426,9 +424,9 @@ def _parse_and_validate_query( for metric_reference in metric_references: metric = self._metric_lookup.get_metric(metric_reference) if metric.filter is not None: - group_by_specs_for_one_metric = self._parse_linkable_elements( - qualified_linkable_names=group_by_names, - linkable_elements=group_by, + group_by_specs_for_one_metric = self._parse_group_by( + group_by_names=group_by_names, + group_by=group_by, metric_references=(metric_reference,), ) @@ -663,30 +661,32 @@ def _parse_metric_names( metric_references.extend(list(input_metrics)) return tuple(metric_references) - def _parse_linkable_elements( + def _parse_group_by( self, metric_references: Sequence[MetricReference], - qualified_linkable_names: Optional[Sequence[str]] = None, - linkable_elements: Optional[Sequence[QueryParameterDimension]] = None, + group_by_names: Optional[Sequence[str]] = None, + group_by: Optional[Tuple[GroupByParameter, ...]] = None, ) -> QueryTimeLinkableSpecSet: """Convert the linkable spec names into the respective specification objects.""" # TODO: refactor to only support group_by object inputs (removing group_by_names param) assert not ( - qualified_linkable_names and linkable_elements + group_by_names and group_by ), "Both group_by_names and group_by were set, but if a group by is specified you should only use one of these!" structured_names: List[StructuredLinkableSpecName] = [] - if qualified_linkable_names: - qualified_linkable_names = [x.lower() for x in qualified_linkable_names] - structured_names = [StructuredLinkableSpecName.from_name(name) for name in qualified_linkable_names] - elif linkable_elements: - for linkable_element in linkable_elements: - parsed_name = StructuredLinkableSpecName.from_name(linkable_element.name) + if group_by_names: + group_by_names = [x.lower() for x in group_by_names] + structured_names = [StructuredLinkableSpecName.from_name(name) for name in group_by_names] + elif group_by: + for group_by_obj in group_by: + parsed_name = StructuredLinkableSpecName.from_name(group_by_obj.name) structured_name = StructuredLinkableSpecName( entity_link_names=parsed_name.entity_link_names, element_name=parsed_name.element_name, - time_granularity=linkable_element.grain, - date_part=linkable_element.date_part, + time_granularity=group_by_obj.grain + if isinstance(group_by_obj, TimeDimensionQueryParameter) + else None, + date_part=group_by_obj.date_part if isinstance(group_by_obj, TimeDimensionQueryParameter) else None, ) structured_names.append(structured_name) @@ -729,15 +729,12 @@ def _parse_linkable_elements( valid_group_bys_for_metrics = self._metric_lookup.element_specs_for_metrics(list(metric_references)) valid_group_by_names_for_metrics = sorted( list( - set( - x.qualified_name if qualified_linkable_names else x.element_name - for x in valid_group_bys_for_metrics - ) + set(x.qualified_name if group_by_names else x.element_name for x in valid_group_bys_for_metrics) ) ) # If requested by name, show qualified name. If requested as object, show element name. - display_name = structured_name.qualified_name if qualified_linkable_names else element_name + display_name = structured_name.qualified_name if group_by_names else element_name suggestions = { f"Suggestions for '{display_name}'": pformat_big_objects( MetricFlowQueryParser._top_fuzzy_matches( @@ -748,7 +745,7 @@ def _parse_linkable_elements( } raise UnableToSatisfyQueryError( f"Unknown element name '{element_name}' in dimension name '{display_name}'" - if qualified_linkable_names + if group_by_names else f"Unknown dimension {element_name}", context=suggestions, ) @@ -815,29 +812,47 @@ def _get_invalid_linkable_specs( def _parse_order_by( self, - order_by_names: Sequence[str], time_dimension_spec_replacements: Dict[PartialTimeDimensionSpec, TimeDimensionSpec], + order_by_names: Optional[Sequence[str]] = None, + order_by: Optional[Sequence[OrderByQueryParameter]] = None, ) -> Tuple[OrderBySpec, ...]: """time_dimension_spec_replacements is used to replace a partial spec from parsing the names to a full one.""" # TODO: Validate entity links # TODO: Validate order by items are in the query + + assert not ( + order_by_names and order_by + ), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!" + order_by_specs: List[OrderBySpec] = [] - for order_by_name in order_by_names: - descending = False - if order_by_name.startswith("-"): - order_by_name = order_by_name[1:] - descending = True - parsed_name = StructuredLinkableSpecName.from_name(order_by_name) + for order in order_by_names or order_by or []: + # Order might be string or object - parse into relevant parts. + time_granularity: Optional[TimeGranularity] = None + date_part: Optional[DatePart] = None + if isinstance(order, str): + # Note: date part cannot be requested via string parameter. + descending = False + if order.startswith("-"): + order = order[1:] + descending = True + parsed_name = StructuredLinkableSpecName.from_name(order) + time_granularity = parsed_name.time_granularity + else: + descending = order.descending + parsed_name = StructuredLinkableSpecName.from_name(order.order_by.name) + if isinstance(order.order_by, TimeDimensionQueryParameter): + time_granularity = order.order_by.grain + date_part = order.order_by.date_part + if parsed_name.time_granularity and parsed_name.time_granularity != time_granularity: + raise InvalidQueryException("Must use object syntax to request a time granularity.") if MetricReference(element_name=parsed_name.element_name) in self._known_metric_names: - if parsed_name.time_granularity: + if time_granularity: raise InvalidQueryException( - f"Order by item '{order_by_name}' references a metric but has a time granularity" + f"Order by item '{order}' references a metric but has a time granularity" ) if parsed_name.entity_link_names: - raise InvalidQueryException( - f"Order by item '{order_by_name}' references a metric but has entity links" - ) + raise InvalidQueryException(f"Order by item '{order}' references a metric but has entity links") order_by_specs.append( OrderBySpec( metric_spec=MetricSpec(element_name=parsed_name.element_name), @@ -845,10 +860,9 @@ def _parse_order_by( ) ) elif DimensionReference(element_name=parsed_name.element_name) in self._known_dimension_element_references: - if parsed_name.time_granularity: + if time_granularity: raise InvalidQueryException( - f"Order by item '{order_by_name}' references a categorical dimension but has a time " - f"granularity" + f"Order by item '{order}' references a categorical dimension but has a time " f"granularity" ) order_by_specs.append( OrderBySpec( @@ -864,14 +878,14 @@ def _parse_order_by( in self._known_time_dimension_element_references ): entity_links = tuple(EntityReference(element_name=x) for x in parsed_name.entity_link_names) - if parsed_name.time_granularity: + if time_granularity: order_by_specs.append( OrderBySpec( time_dimension_spec=TimeDimensionSpec( element_name=parsed_name.element_name, entity_links=entity_links, - time_granularity=parsed_name.time_granularity, - date_part=parsed_name.date_part, + time_granularity=time_granularity, + date_part=date_part, ), descending=descending, ) @@ -882,6 +896,7 @@ def _parse_order_by( partial_time_dimension_spec = PartialTimeDimensionSpec( element_name=parsed_name.element_name, entity_links=entity_links, + date_part=date_part, ) if partial_time_dimension_spec in time_dimension_spec_replacements: @@ -893,14 +908,14 @@ def _parse_order_by( ) else: raise RequestTimeGranularityException( - f"Order by item '{order_by_name}' does not specify a time granularity and it does not " + f"Order by item '{order}' does not specify a time granularity and it does not " f"match a requested time dimension" ) elif EntityReference(element_name=parsed_name.element_name) in self._known_entity_element_references: - if parsed_name.time_granularity: + if time_granularity: raise InvalidQueryException( - f"Order by item '{order_by_name}' references an entity but has a time granularity" + f"Order by item '{order}' references an entity but has a time granularity" ) order_by_specs.append( OrderBySpec( @@ -912,6 +927,6 @@ def _parse_order_by( ) ) else: - raise InvalidQueryException(f"Order by item '{order_by_name}' references an element that is not known") + raise InvalidQueryException(f"Order by item '{order}' references an element that is not known") return tuple(order_by_specs) diff --git a/metricflow/specs/query_param_implementations.py b/metricflow/specs/query_param_implementations.py index b791c46189..5b06b13f14 100644 --- a/metricflow/specs/query_param_implementations.py +++ b/metricflow/specs/query_param_implementations.py @@ -6,19 +6,44 @@ from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName +from metricflow.protocols.query_parameter import InputOrderByParameter from metricflow.time.date_part import DatePart @dataclass(frozen=True) -class DimensionQueryParameter: +class TimeDimensionParameter: """Time dimension requested in a query.""" name: str grain: Optional[TimeGranularity] = None - descending: bool = False date_part: Optional[DatePart] = None def __post_init__(self) -> None: # noqa: D parsed_name = StructuredLinkableSpecName.from_name(self.name) if parsed_name.time_granularity: raise ValueError("Must use object syntax for `grain` parameter if `date_part` is requested.") + + +@dataclass(frozen=True) +class DimensionOrEntityParameter: + """Group by parameter requested in a query. + + Might represent an entity or a dimension. + """ + + name: str + + +@dataclass(frozen=True) +class MetricParameter: + """Metric requested in a query.""" + + name: str + + +@dataclass(frozen=True) +class OrderByParameter: + """Order by requested in a query.""" + + order_by: InputOrderByParameter + descending: bool = False diff --git a/metricflow/specs/where_filter_time_dimension.py b/metricflow/specs/where_filter_time_dimension.py index ed498d85d0..20280b48bc 100644 --- a/metricflow/specs/where_filter_time_dimension.py +++ b/metricflow/specs/where_filter_time_dimension.py @@ -6,6 +6,7 @@ from dbt_semantic_interfaces.protocols.protocol_hint import ProtocolHint from dbt_semantic_interfaces.type_enums import TimeGranularity from typing_extensions import override +from metricflow.errors.errors import InvalidQuerySyntax from metricflow.protocols.query_interface import QueryInterfaceTimeDimension, QueryInterfaceTimeDimensionFactory from metricflow.specs.column_assoc import ColumnAssociationResolver @@ -55,11 +56,17 @@ def create( self, time_dimension_name: str, time_granularity_name: str, - descending: bool = False, - date_part_name: Optional[str] = None, entity_path: Sequence[str] = (), + descending: Optional[bool] = None, + date_part_name: Optional[str] = None, ) -> WhereFilterTimeDimension: """Create a WhereFilterTimeDimension.""" + if descending: + raise InvalidQuerySyntax( + "Can't set descending in the where clause. Try setting descending in the order_by clause instead" + ) + if date_part_name: + raise InvalidQuerySyntax("date_part_name isn't currently supported in the where parameter") time_dimension_spec = self._dimension_spec_resolver.resolve_time_dimension_spec( time_dimension_name, TimeGranularity(time_granularity_name), entity_path ) diff --git a/metricflow/test/conftest.py b/metricflow/test/conftest.py index d2a60d713c..31cb6d558e 100644 --- a/metricflow/test/conftest.py +++ b/metricflow/test/conftest.py @@ -1,11 +1,6 @@ # These imports are required to properly set up pytest fixtures. from __future__ import annotations -from dataclasses import dataclass -from typing import Optional - -from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity - from metricflow.test.fixtures.cli_fixtures import * # noqa: F401, F403 from metricflow.test.fixtures.dataflow_fixtures import * # noqa: F401, F403 from metricflow.test.fixtures.id_fixtures import * # noqa: F401, F403 @@ -14,14 +9,3 @@ from metricflow.test.fixtures.sql_client_fixtures import * # noqa: F401, F403 from metricflow.test.fixtures.sql_fixtures import * # noqa: F401, F403 from metricflow.test.fixtures.table_fixtures import * # noqa: F401, F403 -from metricflow.time.date_part import DatePart - - -@dataclass -class MockQueryParameterDimension: - """This is a mock that is just used to test the query parser.""" - - name: str - grain: Optional[TimeGranularity] = None - descending: bool = False - date_part: Optional[DatePart] = None diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index af897edcaa..eec106d555 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -18,8 +18,9 @@ from metricflow.plan_conversion.column_resolver import ( DunderColumnAssociationResolver, ) +from metricflow.protocols.query_parameter import DimensionOrEntityQueryParameter from metricflow.protocols.sql_client import SqlClient -from metricflow.specs.query_param_implementations import DimensionQueryParameter +from metricflow.specs.query_param_implementations import DimensionOrEntityParameter, TimeDimensionParameter from metricflow.sql.sql_exprs import ( SqlCastToTimestampExpression, SqlColumnReference, @@ -255,16 +256,19 @@ def test_case( check_query_helpers = CheckQueryHelpers(sql_client) - group_by: List[DimensionQueryParameter] = [] + group_by: List[DimensionOrEntityQueryParameter] = [] for group_by_kwargs in case.group_by_objs: kwargs = copy(group_by_kwargs) date_part = kwargs.get("date_part") grain = kwargs.get("grain") - if date_part: - kwargs["date_part"] = DatePart(date_part) - if grain: - kwargs["grain"] = TimeGranularity(grain) - group_by.append(DimensionQueryParameter(**kwargs)) + if date_part or grain: + if date_part: + kwargs["date_part"] = DatePart(date_part) + if grain: + kwargs["grain"] = TimeGranularity(grain) + group_by.append(TimeDimensionParameter(**kwargs)) + else: + group_by.append(DimensionOrEntityParameter(**kwargs)) query_result = engine.query( MetricFlowQueryRequest.create_with_random_request_id( metric_names=case.metrics, diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index e861369302..51f272879c 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -14,6 +14,12 @@ from metricflow.filters.time_constraint import TimeRangeConstraint from metricflow.query.query_exceptions import InvalidQueryException from metricflow.query.query_parser import MetricFlowQueryParser +from metricflow.specs.query_param_implementations import ( + DimensionOrEntityParameter, + MetricParameter, + OrderByParameter, + TimeDimensionParameter, +) from metricflow.specs.specs import ( DimensionSpec, EntitySpec, @@ -21,7 +27,6 @@ OrderBySpec, TimeDimensionSpec, ) -from metricflow.test.conftest import MockQueryParameterDimension from metricflow.test.fixtures.model_fixtures import query_parser_from_yaml from metricflow.test.model.example_project_configuration import EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE from metricflow.test.time.metric_time_dimension import MTD @@ -160,7 +165,9 @@ def bookings_query_parser() -> MetricFlowQueryParser: # noqa def test_query_parser(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D query_spec = bookings_query_parser.parse_and_validate_query( - metric_names=["bookings"], group_by_names=["booking__is_instant", "listing", MTD], order=[MTD, "-bookings"] + metric_names=["bookings"], + group_by_names=["booking__is_instant", "listing", MTD], + order_by_names=[MTD, "-bookings"], ) assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),) @@ -188,12 +195,15 @@ def test_query_parser(bookings_query_parser: MetricFlowQueryParser) -> None: # def test_query_parser_with_object_params(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D Metric = namedtuple("Metric", ["name", "descending"]) metric = Metric("bookings", False) - group_by = [ - MockQueryParameterDimension("booking__is_instant"), - MockQueryParameterDimension("listing"), - MockQueryParameterDimension(MTD), - ] - order_by = [MockQueryParameterDimension(MTD), MockQueryParameterDimension("-bookings")] + group_by = ( + DimensionOrEntityParameter("booking__is_instant"), + DimensionOrEntityParameter("listing"), + TimeDimensionParameter(MTD), + ) + order_by = ( + OrderByParameter(order_by=TimeDimensionParameter(MTD)), + OrderByParameter(order_by=MetricParameter("bookings"), descending=True), + ) query_spec = bookings_query_parser.parse_and_validate_query(metrics=[metric], group_by=group_by, order_by=order_by) assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),) assert query_spec.dimension_specs == ( @@ -231,7 +241,7 @@ def test_order_by_granularity_conversion() -> None: [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file] ) query_spec = query_parser.parse_and_validate_query( - metric_names=["bookings", "revenue"], group_by_names=[MTD], order=[f"-{MTD}"] + metric_names=["bookings", "revenue"], group_by_names=[MTD], order_by_names=[f"-{MTD}"] ) # The lowest common granularity is MONTH, so we expect the PTD in the order by to have that granularity. @@ -247,7 +257,7 @@ def test_order_by_granularity_conversion() -> None: def test_order_by_granularity_no_conversion(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D query_spec = bookings_query_parser.parse_and_validate_query( - metric_names=["bookings"], group_by_names=[MTD], order=[MTD] + metric_names=["bookings"], group_by_names=[MTD], order_by_names=[MTD] ) # The only granularity is DAY, so we expect the PTD in the order by to have that granularity. @@ -414,34 +424,34 @@ def test_date_part_parsing() -> None: with pytest.raises(RequestTimeGranularityException): query_parser.parse_and_validate_query( metric_names=["revenue"], - group_by=[MockQueryParameterDimension(name="metric_time", date_part=DatePart.DOW)], + group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.DOW),), ) # Can't query date part for cumulative metrics with pytest.raises(UnableToSatisfyQueryError): query_parser.parse_and_validate_query( metric_names=["revenue_cumulative"], - group_by=[MockQueryParameterDimension(name="metric_time", date_part=DatePart.YEAR)], + group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.YEAR),), ) # Can't query date part for metrics with offset to grain with pytest.raises(UnableToSatisfyQueryError): query_parser.parse_and_validate_query( metric_names=["revenue_since_start_of_year"], - group_by=[MockQueryParameterDimension(name="metric_time", date_part=DatePart.MONTH)], + group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.MONTH),), ) # Requested granularity doesn't match resolved granularity with pytest.raises(RequestTimeGranularityException): query_parser.parse_and_validate_query( metric_names=["revenue"], - group_by=[ - MockQueryParameterDimension(name="metric_time", grain=TimeGranularity.YEAR, date_part=DatePart.MONTH) - ], + group_by=( + TimeDimensionParameter(name="metric_time", grain=TimeGranularity.YEAR, date_part=DatePart.MONTH), + ), ) # Date part is compatible query_parser.parse_and_validate_query( metric_names=["revenue"], - group_by=[MockQueryParameterDimension(name="metric_time", date_part=DatePart.MONTH)], + group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.MONTH),), ) diff --git a/pyproject.toml b/pyproject.toml index 46e660d97a..e6461b5d1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "metricflow" -version = "0.202.0" +version = "0.203.0.dev1" description = "Translates a simple metric definition into reusable SQL and executes it against the SQL engine of your choice." readme = "README.md" requires-python = ">=3.8,<3.12"