diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 1b35d77347..e63ee118a5 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -695,7 +695,7 @@ def build_sink_node( @staticmethod def _contains_multihop_linkables(linkable_specs: Sequence[LinkableInstanceSpec]) -> bool: """Returns true if any of the linkable specs requires a multi-hop join to realize.""" - return any(len(x.entity_links) > 1 for x in linkable_specs) + return any(len(x.group_by_links) > 1 for x in linkable_specs) def _get_semantic_model_names_for_measures(self, measures: Sequence[MeasureSpec]) -> Set[str]: """Return the names of the semantic models needed to compute the input measures. @@ -815,6 +815,23 @@ def _find_dataflow_recipe( measure_specs=set(measure_spec_properties.measure_specs), source_nodes=self._source_node_set.source_nodes_for_metric_queries, ) + # If there are MetricGroupBys in the requested linkable specs, build source nodes to satisfy them. + # We do this at query time instead of during usual source node generation because the number of potential + # MetricGroupBy source nodes would be extremely large (and potentially slow). + for group_by_metric_spec in linkable_spec_set.group_by_metric_specs: + # TODO: handle dimensions + group_by_metric_source_node = self._build_query_output_node( + # TODO: move this logic into MetricGroupBySpec + MetricFlowQuerySpec( + metric_specs=(MetricSpec(element_name=group_by_metric_spec.element_name),), + entity_specs=tuple( + EntitySpec.from_name(group_by_link.element_name) + for group_by_link in group_by_metric_spec.group_by_links + ), + ) + ) + candidate_nodes_for_right_side_of_join += (group_by_metric_source_node,) + default_join_type = SqlJoinType.LEFT_OUTER else: candidate_nodes_for_right_side_of_join = list(self._source_node_set.source_nodes_for_group_by_item_queries) @@ -1329,6 +1346,8 @@ def _build_aggregated_measure_from_measure_source_node( required_linkable_specs.as_spec_set, ) + # somehow ensure that group by metrics are in data set before it gets here. so after join to base output. + # after_join_filtered_node = FilterElementsNode( parent_node=filtered_measures_with_joined_elements, include_specs=specs_to_keep_after_join ) diff --git a/metricflow/dataflow/builder/node_evaluator.py b/metricflow/dataflow/builder/node_evaluator.py index c5c3c06473..6f4cc917a4 100644 --- a/metricflow/dataflow/builder/node_evaluator.py +++ b/metricflow/dataflow/builder/node_evaluator.py @@ -30,6 +30,7 @@ PartitionTimeDimensionJoinDescription, ) from metricflow.dataflow.dataflow_plan import BaseOutput +from metricflow.dataflow.nodes.compute_metrics import ComputeMetricsNode from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_to_base import JoinDescription, ValidityWindowJoinDescription from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode @@ -82,7 +83,7 @@ def join_description(self) -> JoinDescription: include_specs: List[LinkableInstanceSpec] = [] assert all( [ - len(spec.entity_links) > 0 + len(spec.group_by_links) > 0 for spec in self.satisfiable_linkable_specs if not spec.element_name == METRIC_TIME_ELEMENT_NAME ] @@ -90,9 +91,9 @@ def join_description(self) -> JoinDescription: include_specs.extend( [ - LinklessEntitySpec.from_reference(spec.entity_links[0]) + LinklessEntitySpec.from_reference(spec.group_by_links[0]) for spec in self.satisfiable_linkable_specs - if len(spec.entity_links) > 0 + if len(spec.group_by_links) > 0 ] ) @@ -109,7 +110,7 @@ def join_description(self) -> JoinDescription: # "user_id" and the "country" dimension so that it can be joined to the source node. include_specs.extend( [ - spec.without_first_entity_link if len(spec.entity_links) > 0 else spec + spec.without_first_group_by_link if len(spec.group_by_links) > 0 else spec for spec in self.satisfiable_linkable_specs ] ) @@ -220,7 +221,7 @@ def _find_joinable_candidate_nodes_that_can_satisfy_linkable_specs( for entity_spec_in_right_node in entity_specs_in_right_node: # If an entity has links, what that means and whether it can be used is unclear at the moment, # so skip it. - if len(entity_spec_in_right_node.entity_links) > 0: + if len(entity_spec_in_right_node.group_by_links) > 0: continue entity_instance_in_right_node = None @@ -264,7 +265,9 @@ def _find_joinable_candidate_nodes_that_can_satisfy_linkable_specs( ].semantic_model_reference, on_entity_reference=entity_spec_in_right_node.reference, ): - continue + # Check if it's joining to something pre-aggregated. If so, we can allow the supposed fan-out join. + if not isinstance(right_node, ComputeMetricsNode): + continue linkless_entity_spec_in_node = LinklessEntitySpec.from_element_name( entity_spec_in_right_node.element_name @@ -272,7 +275,7 @@ def _find_joinable_candidate_nodes_that_can_satisfy_linkable_specs( satisfiable_linkable_specs = [] for needed_linkable_spec in needed_linkable_specs: - if len(needed_linkable_spec.entity_links) == 0: + if len(needed_linkable_spec.group_by_links) == 0: assert ( needed_linkable_spec.element_name == METRIC_TIME_ELEMENT_NAME ), "Only metric_time should have 0 entity links." @@ -288,21 +291,26 @@ def _find_joinable_candidate_nodes_that_can_satisfy_linkable_specs( # # Then the data set must contain "device_id__platform", which is realized with # - # required_linkable_spec.remove_first_entity_link() + # required_linkable_spec.remove_first_group_by_link() # # We might also need to check the entity type and see if it's the type of join we're allowing, # but since we're doing all left joins now, it's been left out. required_entity_matches_data_set_entity = ( - LinklessEntitySpec.from_reference(needed_linkable_spec.entity_links[0]) + LinklessEntitySpec.from_reference(needed_linkable_spec.group_by_links[0]) == linkless_entity_spec_in_node ) needed_linkable_spec_in_node = ( - needed_linkable_spec.without_first_entity_link in linkable_specs_in_right_node + needed_linkable_spec.without_first_group_by_link in linkable_specs_in_right_node ) if required_entity_matches_data_set_entity and needed_linkable_spec_in_node: satisfiable_linkable_specs.append(needed_linkable_spec) - + if isinstance(right_node, ComputeMetricsNode): + print( + "made it here!3", + needed_linkable_spec.without_first_group_by_link, + linkable_specs_in_right_node, + ) # If this node can satisfy some linkable specs, it could be useful to join on, so add it to the # candidate list. if len(satisfiable_linkable_specs) > 0: @@ -406,8 +414,8 @@ def evaluate_node( is_metric_time = required_linkable_spec.element_name == DataSet.metric_time_dimension_name() is_local = required_linkable_spec in data_set_linkable_specs is_unjoinable = not is_metric_time and ( - len(required_linkable_spec.entity_links) == 0 - or LinklessEntitySpec.from_reference(required_linkable_spec.entity_links[0]) + len(required_linkable_spec.group_by_links) == 0 + or LinklessEntitySpec.from_reference(required_linkable_spec.group_by_links[0]) not in data_set_linkable_specs ) if is_local: diff --git a/metricflow/dataflow/builder/partitions.py b/metricflow/dataflow/builder/partitions.py index 45a63accac..68b15bfc58 100644 --- a/metricflow/dataflow/builder/partitions.py +++ b/metricflow/dataflow/builder/partitions.py @@ -59,7 +59,7 @@ def _get_partitions(self, spec_set: InstanceSpecSet) -> PartitionSpecSet: def _get_simplest_dimension_spec(dimension_specs: Sequence[DimensionSpec]) -> DimensionSpec: """Return the time dimension spec with the fewest entity links.""" assert len(dimension_specs) > 0 - sorted_dimension_specs = sorted(dimension_specs, key=lambda x: len(x.entity_links)) + sorted_dimension_specs = sorted(dimension_specs, key=lambda x: len(x.group_by_links)) return sorted_dimension_specs[0] def resolve_partition_dimension_joins( @@ -99,7 +99,7 @@ def resolve_partition_dimension_joins( def _get_simplest_time_dimension_spec(time_dimension_specs: Sequence[TimeDimensionSpec]) -> TimeDimensionSpec: """Return the time dimension spec with the smallest granularity, then fewest entity links.""" assert len(time_dimension_specs) > 0 - sorted_specs = sorted(time_dimension_specs, key=lambda x: (x.time_granularity, len(x.entity_links))) + sorted_specs = sorted(time_dimension_specs, key=lambda x: (x.time_granularity, len(x.group_by_links))) return sorted_specs[0] def resolve_partition_time_dimension_joins( diff --git a/metricflow/dataset/convert_semantic_model.py b/metricflow/dataset/convert_semantic_model.py index 63e106b729..576187a07a 100644 --- a/metricflow/dataset/convert_semantic_model.py +++ b/metricflow/dataset/convert_semantic_model.py @@ -86,12 +86,12 @@ def _create_dimension_instance( self, semantic_model_name: str, dimension: Dimension, - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], ) -> DimensionInstance: """Create a dimension instance from the dimension object in the model.""" dimension_spec = DimensionSpec( element_name=dimension.reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, ) return DimensionInstance( associated_columns=(self._column_association_resolver.resolve_spec(dimension_spec),), @@ -107,7 +107,7 @@ def _create_dimension_instance( def _create_time_dimension_instance( self, element_name: str, - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], time_granularity: TimeGranularity = DEFAULT_TIME_GRANULARITY, date_part: Optional[DatePart] = None, semantic_model_name: Optional[str] = None, @@ -115,7 +115,7 @@ def _create_time_dimension_instance( """Create a time dimension instance from the dimension object from a semantic model in the model.""" time_dimension_spec = TimeDimensionSpec( element_name=element_name, - entity_links=entity_links, + group_by_links=group_by_links, time_granularity=time_granularity, date_part=date_part, ) @@ -124,25 +124,27 @@ def _create_time_dimension_instance( associated_columns=(self._column_association_resolver.resolve_spec(time_dimension_spec),), spec=time_dimension_spec, defined_from=( - SemanticModelElementReference( - semantic_model_name=semantic_model_name, - element_name=element_name, - ), - ) - if semantic_model_name - else (), + ( + SemanticModelElementReference( + semantic_model_name=semantic_model_name, + element_name=element_name, + ), + ) + if semantic_model_name + else () + ), ) def _create_entity_instance( self, semantic_model_name: str, entity: Entity, - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], ) -> EntityInstance: """Create an entity instance from the entity object from a semantic modelin the model.""" entity_spec = EntitySpec( element_name=entity.reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, ) return EntityInstance( associated_columns=(self._column_association_resolver.resolve_spec(entity_spec),), @@ -223,7 +225,7 @@ def _convert_dimensions( self, semantic_model_name: str, dimensions: Sequence[Dimension], - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], table_alias: str, ) -> DimensionConversionResult: dimension_instances = [] @@ -240,7 +242,7 @@ def _convert_dimensions( dimension_instance = self._create_dimension_instance( semantic_model_name=semantic_model_name, dimension=dimension, - entity_links=entity_links, + group_by_links=group_by_links, ) dimension_instances.append(dimension_instance) select_columns.append( @@ -254,7 +256,7 @@ def _convert_dimensions( dimension_select_expr=dimension_select_expr, dimension=dimension, semantic_model_name=semantic_model_name, - entity_links=entity_links, + group_by_links=group_by_links, ) time_dimension_instances += derived_time_dimension_instances select_columns += time_select_columns @@ -272,7 +274,7 @@ def _convert_time_dimension( dimension_select_expr: SqlExpressionNode, dimension: Dimension, semantic_model_name: str, - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], ) -> Tuple[List[TimeDimensionInstance], List[SqlSelectColumn]]: """Converts Dimension objects with type TIME into the relevant DataSet columns. @@ -289,7 +291,7 @@ def _convert_time_dimension( time_dimension_instance = self._create_time_dimension_instance( semantic_model_name=semantic_model_name, element_name=dimension.reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, time_granularity=defined_time_granularity, ) time_dimension_instances.append(time_dimension_instance) @@ -317,7 +319,7 @@ def _convert_time_dimension( semantic_model_name=semantic_model_name, defined_time_granularity=defined_time_granularity, element_name=dimension.reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, dimension_select_expr=dimension_select_expr, ) time_dimension_instances.extend(new_instances) @@ -328,7 +330,7 @@ def _build_time_dimension_instances_and_columns( self, defined_time_granularity: TimeGranularity, element_name: str, - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], dimension_select_expr: SqlExpressionNode, semantic_model_name: Optional[str] = None, ) -> Tuple[List[TimeDimensionInstance], List[SqlSelectColumn]]: @@ -340,7 +342,7 @@ def _build_time_dimension_instances_and_columns( time_dimension_instance = self._create_time_dimension_instance( semantic_model_name=semantic_model_name, element_name=element_name, - entity_links=entity_links, + group_by_links=group_by_links, time_granularity=time_granularity, ) time_dimension_instances.append(time_dimension_instance) @@ -359,7 +361,7 @@ def _build_time_dimension_instances_and_columns( time_dimension_instance = self._create_time_dimension_instance( semantic_model_name=semantic_model_name, element_name=element_name, - entity_links=entity_links, + group_by_links=group_by_links, time_granularity=defined_time_granularity, date_part=date_part, ) @@ -385,7 +387,7 @@ def _create_entity_instances( self, semantic_model_name: str, entities: Sequence[Entity], - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], table_alias: str, ) -> Tuple[Sequence[EntityInstance], Sequence[SqlSelectColumn]]: entity_instances = [] @@ -393,13 +395,13 @@ def _create_entity_instances( for entity in entities or []: # We don't want to create something like user_id__user_id, so skip if the link is the same as the # entity. - if len(entity_links) == 1 and entity.reference == entity_links[0]: + if len(group_by_links) == 1 and entity.reference == group_by_links[0]: continue entity_instance = self._create_entity_instance( semantic_model_name=semantic_model_name, entity=entity, - entity_links=entity_links, + group_by_links=group_by_links, ) entity_instances.append(entity_instance) @@ -438,22 +440,22 @@ def create_sql_source_data_set(self, semantic_model: SemanticModel) -> SemanticM all_select_columns.extend(select_columns) # Group by items in the semantic model can be accessed though a subset of the entities defined in the model. - possible_entity_links: List[Tuple[EntityReference, ...]] = [ + possible_group_by_links: List[Tuple[EntityReference, ...]] = [ (), ] - for entity_link in SemanticModelLookup.entity_links_for_local_elements(semantic_model): - possible_entity_links.append((entity_link,)) + for group_by_link in SemanticModelLookup.group_by_links_for_local_elements(semantic_model): + possible_group_by_links.append((group_by_link,)) # Handle dimensions conversion_results = [ self._convert_dimensions( semantic_model_name=semantic_model.name, dimensions=semantic_model.dimensions, - entity_links=entity_links, + group_by_links=group_by_links, table_alias=from_source_alias, ) - for entity_links in possible_entity_links + for group_by_links in possible_group_by_links ] all_dimension_instances.extend( @@ -481,11 +483,11 @@ def create_sql_source_data_set(self, semantic_model: SemanticModel) -> SemanticM ) # Handle entities - for entity_links in possible_entity_links: + for group_by_links in possible_group_by_links: entity_instances, select_columns = self._create_entity_instances( semantic_model_name=semantic_model.name, entities=semantic_model.entities, - entity_links=entity_links, + group_by_links=group_by_links, table_alias=from_source_alias, ) all_entity_instances.extend(entity_instances) @@ -528,7 +530,7 @@ def build_time_spine_source_data_set(self, time_spine_source: TimeSpineSource) - select_columns: List[SqlSelectColumn] = [] time_dimension_instance = self._create_time_dimension_instance( - element_name=time_column_name, entity_links=(), time_granularity=defined_time_granularity + element_name=time_column_name, group_by_links=(), time_granularity=defined_time_granularity ) time_dimension_instances.append(time_dimension_instance) @@ -545,7 +547,7 @@ def build_time_spine_source_data_set(self, time_spine_source: TimeSpineSource) - new_instances, new_columns = self._build_time_dimension_instances_and_columns( defined_time_granularity=defined_time_granularity, element_name=time_column_name, - entity_links=(), + group_by_links=(), dimension_select_expr=dimension_select_expr, ) time_dimension_instances.extend(new_instances) diff --git a/metricflow/dataset/dataset.py b/metricflow/dataset/dataset.py index b32bf28274..5cd1739f2b 100644 --- a/metricflow/dataset/dataset.py +++ b/metricflow/dataset/dataset.py @@ -55,7 +55,7 @@ def metric_time_dimension_spec( """Spec that corresponds to DataSet.metric_time_dimension_reference.""" return TimeDimensionSpec( element_name=DataSet.metric_time_dimension_reference().element_name, - entity_links=(), + group_by_links=(), time_granularity=time_granularity, date_part=date_part, ) diff --git a/metricflow/dataset/sql_dataset.py b/metricflow/dataset/sql_dataset.py index 4d054834a1..9e061fa1b5 100644 --- a/metricflow/dataset/sql_dataset.py +++ b/metricflow/dataset/sql_dataset.py @@ -43,7 +43,7 @@ def column_associations_for_entity( for linkable_instance in self.instance_set.entity_instances: if ( entity_spec.element_name == linkable_instance.spec.element_name - and entity_spec.entity_links == linkable_instance.spec.entity_links + and entity_spec.group_by_links == linkable_instance.spec.group_by_links ): column_associations_to_return = linkable_instance.associated_columns matching_instances += 1 diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index 4b1f3c7ef4..3cb431f56a 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -608,9 +608,9 @@ def simple_dimensions_for_metrics( # noqa: D name=metric_time_name, qualified_name=StructuredLinkableSpecName( element_name=metric_time_name, - entity_link_names=tuple( + group_by_link_names=tuple( entity_reference.element_name - for entity_reference in linkable_dimension.entity_links + for entity_reference in linkable_dimension.group_by_links ), time_granularity=linkable_dimension.time_granularity, ).qualified_name, diff --git a/metricflow/engine/models.py b/metricflow/engine/models.py index d3431df768..790baa2f07 100644 --- a/metricflow/engine/models.py +++ b/metricflow/engine/models.py @@ -89,7 +89,7 @@ def from_pydantic(cls, pydantic_dimension: SemanticManifestDimension, path_key: """Build from pydantic Dimension and entity_key.""" qualified_name = DimensionSpec( element_name=path_key.element_name, - entity_links=path_key.entity_links, + group_by_links=path_key.group_by_links, ).qualified_name parsed_type_params: Optional[DimensionTypeParams] = None if pydantic_dimension.type_params: diff --git a/metricflow/instances.py b/metricflow/instances.py index 8e85305495..d084953b05 100644 --- a/metricflow/instances.py +++ b/metricflow/instances.py @@ -14,6 +14,7 @@ from metricflow.specs.specs import ( DimensionSpec, EntitySpec, + GroupByMetricSpec, InstanceSpec, InstanceSpecSet, MeasureSpec, @@ -102,6 +103,13 @@ class EntityInstance(MdoInstance[EntitySpec], SemanticModelElementInstance): # spec: EntitySpec +@dataclass(frozen=True) +class GroupByMetricInstance(MdoInstance[GroupByMetricSpec]): # noqa: D + associated_columns: Tuple[ColumnAssociation, ...] + spec: GroupByMetricSpec + defined_from: MetricModelReference + + @dataclass(frozen=True) class MetricInstance(MdoInstance[MetricSpec], SerializableDataclass): # noqa: D associated_columns: Tuple[ColumnAssociation, ...] @@ -145,6 +153,7 @@ class InstanceSet(SerializableDataclass): entity_instances: Tuple[EntityInstance, ...] = () metric_instances: Tuple[MetricInstance, ...] = () metadata_instances: Tuple[MetadataInstance, ...] = () + group_by_metric_instances: Tuple[GroupByMetricInstance, ...] = () def transform(self, transform_function: InstanceSetTransform[TransformOutputT]) -> TransformOutputT: # noqa: D return transform_function.transform(self) @@ -161,6 +170,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: entity_instances: List[EntityInstance] = [] metric_instances: List[MetricInstance] = [] metadata_instances: List[MetadataInstance] = [] + group_by_metric_instances: List[GroupByMetricInstance] = [] for instance_set in instance_sets: for measure_instance in instance_set.measure_instances: @@ -181,6 +191,9 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: for metadata_instance in instance_set.metadata_instances: if metadata_instance.spec not in {x.spec for x in metadata_instances}: metadata_instances.append(metadata_instance) + for group_by_metric_instance in instance_set.group_by_metric_instances: + if group_by_metric_instance.spec not in {x.spec for x in group_by_metric_instances}: + group_by_metric_instances.append(group_by_metric_instance) return InstanceSet( measure_instances=tuple(measure_instances), @@ -189,6 +202,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: entity_instances=tuple(entity_instances), metric_instances=tuple(metric_instances), metadata_instances=tuple(metadata_instances), + group_by_metric_instances=tuple(group_by_metric_instances), ) @property @@ -200,4 +214,5 @@ def spec_set(self) -> InstanceSpecSet: # noqa: D entity_specs=tuple(x.spec for x in self.entity_instances), metric_specs=tuple(x.spec for x in self.metric_instances), metadata_specs=tuple(x.spec for x in self.metadata_instances), + group_by_metric_specs=tuple(x.spec for x in self.group_by_metric_instances), ) diff --git a/metricflow/mf_logging/pretty_print.py b/metricflow/mf_logging/pretty_print.py index 2f8c1bb001..195dbab24e 100644 --- a/metricflow/mf_logging/pretty_print.py +++ b/metricflow/mf_logging/pretty_print.py @@ -378,12 +378,12 @@ def mf_pformat( # type: ignore e.g. metricflow.specs.DimensionSpec( element_name='country', - entity_links=() + group_by_links=() ), Instead, the below will print something like: - DimensionSpec(element_name='country', entity_links=()) + DimensionSpec(element_name='country', group_by_links=()) Also, this simplifies the object representation in some cases (e.g. Enums) and provides options for a more compact string. This is an improvement on pformat_big_objects() in dbt-semantic-interfaces to be more compact and easier diff --git a/metricflow/model/data_warehouse_model_validator.py b/metricflow/model/data_warehouse_model_validator.py index 09f388914c..186293fadd 100644 --- a/metricflow/model/data_warehouse_model_validator.py +++ b/metricflow/model/data_warehouse_model_validator.py @@ -98,9 +98,9 @@ class DataWarehouseTaskBuilder: """Task builder for standard data warehouse validation tasks.""" @staticmethod - def _remove_entity_link_specs(specs: Tuple[LinkableInstanceSpecT, ...]) -> Tuple[LinkableInstanceSpecT, ...]: - """For the purposes of data warehouse validation, specs with entity_links are unnecesary.""" - return tuple(spec for spec in specs if not spec.entity_links) + def _remove_group_by_link_specs(specs: Tuple[LinkableInstanceSpecT, ...]) -> Tuple[LinkableInstanceSpecT, ...]: + """For the purposes of data warehouse validation, specs with group_by_links are unnecesary.""" + return tuple(spec for spec in specs if not spec.group_by_links) @staticmethod def _semantic_model_nodes(render_tools: QueryRenderingTools, semantic_model: SemanticModel) -> Sequence[BaseOutput]: @@ -177,7 +177,7 @@ def gen_dimension_tasks( semantic_model_sub_tasks: List[DataWarehouseValidationTask] = [] dataset = render_tools.converter.create_sql_source_data_set(semantic_model) - dimension_specs = DataWarehouseTaskBuilder._remove_entity_link_specs( + dimension_specs = DataWarehouseTaskBuilder._remove_group_by_link_specs( dataset.instance_set.spec_set.dimension_specs ) @@ -192,7 +192,7 @@ def gen_dimension_tasks( ) ) - time_dimension_specs = DataWarehouseTaskBuilder._remove_entity_link_specs( + time_dimension_specs = DataWarehouseTaskBuilder._remove_group_by_link_specs( dataset.instance_set.spec_set.time_dimension_specs ) for spec in time_dimension_specs: @@ -272,7 +272,7 @@ def gen_entity_tasks(cls, manifest: SemanticManifest, sql_client: SqlClient) -> semantic_model_sub_tasks: List[DataWarehouseValidationTask] = [] dataset = render_tools.converter.create_sql_source_data_set(semantic_model) - semantic_model_specs = DataWarehouseTaskBuilder._remove_entity_link_specs( + semantic_model_specs = DataWarehouseTaskBuilder._remove_group_by_link_specs( dataset.instance_set.spec_set.entity_specs ) for spec in semantic_model_specs: diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index 739a947458..c6ea0bbb27 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -32,6 +32,7 @@ DimensionSpec, EntityReference, EntitySpec, + GroupByMetricSpec, LinkableSpecSet, TimeDimensionSpec, ) @@ -44,7 +45,7 @@ class ElementPathKey: """A key that can uniquely identify an element and the joins used to realize the element.""" element_name: str - entity_links: Tuple[EntityReference, ...] + group_by_links: Tuple[EntityReference, ...] time_granularity: Optional[TimeGranularity] date_part: Optional[DatePart] @@ -56,7 +57,7 @@ class LinkableDimension: # The semantic model where this dimension was defined. semantic_model_origin: Optional[SemanticModelReference] element_name: str - entity_links: Tuple[EntityReference, ...] + group_by_links: Tuple[EntityReference, ...] join_path: Tuple[SemanticModelJoinPathElement, ...] properties: FrozenSet[LinkableElementProperties] time_granularity: Optional[TimeGranularity] @@ -66,7 +67,7 @@ class LinkableDimension: def path_key(self) -> ElementPathKey: # noqa: D return ElementPathKey( element_name=self.element_name, - entity_links=self.entity_links, + group_by_links=self.group_by_links, time_granularity=self.time_granularity, date_part=self.date_part, ) @@ -84,18 +85,48 @@ class LinkableEntity: semantic_model_origin: SemanticModelReference element_name: str properties: FrozenSet[LinkableElementProperties] - entity_links: Tuple[EntityReference, ...] + group_by_links: Tuple[EntityReference, ...] join_path: Tuple[SemanticModelJoinPathElement, ...] @property def path_key(self) -> ElementPathKey: # noqa: D return ElementPathKey( element_name=self.element_name, - entity_links=self.entity_links, + group_by_links=self.group_by_links, time_granularity=None, date_part=None, ) + @property + def reference(self) -> EntityReference: # noqa: D + return EntityReference(element_name=self.element_name) + + +@dataclass(frozen=True) +class LinkableMetric: + """Describes how a metric can be realized by joining based on entity links.""" + + element_name: str + join_by_semantic_model: SemanticModelReference + # TODO: start with joining by entity, add joining by dimension later + group_by_links: Tuple[EntityReference, ...] + # TODO: add group by metric property + properties: FrozenSet[LinkableElementProperties] + join_path: Tuple[SemanticModelJoinPathElement, ...] + + @property + def path_key(self) -> ElementPathKey: # noqa: D + return ElementPathKey( + element_name=self.element_name, + group_by_links=self.group_by_links, + time_granularity=None, + date_part=None, + ) + + @property + def reference(self) -> MetricReference: # noqa: D + return MetricReference(element_name=self.element_name) + @dataclass(frozen=True) class LinkableElementSet: @@ -111,12 +142,13 @@ class LinkableElementSet: # "listing__country_latest": ( # LinkableDimension( # element_name="country_latest", - # entity_links=("listing",), + # group_by_links=("listing",), # semantic_model_origin="listings_latest_source", # ) # } path_key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] + path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] @staticmethod def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: @@ -126,12 +158,15 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li """ key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list) key_to_linkable_entities: Dict[ElementPathKey, List[LinkableEntity]] = defaultdict(list) + key_to_linkable_metrics: Dict[ElementPathKey, List[LinkableMetric]] = defaultdict(list) for linkable_element_set in linkable_element_sets: for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): key_to_linkable_dimensions[path_key].extend(linkable_dimensions) for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): key_to_linkable_entities[path_key].extend(linkable_entities) + for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): + key_to_linkable_metrics[path_key].extend(linkable_metrics) # Convert the dictionaries to use tuples instead of lists. return LinkableElementSet( @@ -141,6 +176,9 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li path_key_to_linkable_entities={ path_key: tuple(entities) for path_key, entities in key_to_linkable_entities.items() }, + path_key_to_linkable_metrics={ + path_key: tuple(metrics) for path_key, metrics in key_to_linkable_metrics.items() + }, ) @staticmethod @@ -148,12 +186,11 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet] """Find the intersection of all elements in the sets by path key. This is useful to figure out the common dimensions that are possible to query with multiple metrics. You would - find the LinakbleSpecSet for each metric in the query, then do an intersection of the sets. + find the LinkableSpecSet for each metric in the query, then do an intersection of the sets. """ if len(linkable_element_sets) == 0: return LinkableElementSet( - path_key_to_linkable_dimensions={}, - path_key_to_linkable_entities={}, + path_key_to_linkable_dimensions={}, path_key_to_linkable_entities={}, path_key_to_linkable_metrics={} ) # Find path keys that are common to all LinkableElementSets. @@ -171,9 +208,17 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet] ] ) + common_linkable_metric_path_keys: Set[ElementPathKey] = set.intersection( + *[ + set(linkable_element_set.path_key_to_linkable_metrics.keys()) + for linkable_element_set in linkable_element_sets + ] + ) + # Create a new LinkableElementSet that only includes items where the path key is common to all sets. join_path_to_linkable_dimensions: Dict[ElementPathKey, Set[LinkableDimension]] = defaultdict(set) join_path_to_linkable_entities: Dict[ElementPathKey, Set[LinkableEntity]] = defaultdict(set) + join_path_to_linkable_metrics: Dict[ElementPathKey, Set[LinkableMetric]] = defaultdict(set) for linkable_element_set in linkable_element_sets: for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): @@ -182,6 +227,9 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet] for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): if path_key in common_linkable_entity_path_keys: join_path_to_linkable_entities[path_key].update(linkable_entities) + for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): + if path_key in common_linkable_metric_path_keys: + join_path_to_linkable_metrics[path_key].update(linkable_metrics) return LinkableElementSet( path_key_to_linkable_dimensions={ @@ -205,6 +253,14 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet] ) for path_key, entities in join_path_to_linkable_entities.items() }, + path_key_to_linkable_metrics={ + path_key: tuple( + sorted( + metrics, key=lambda linkable_metric: linkable_metric.join_by_semantic_model.semantic_model_name + ) + ) + for path_key, metrics in join_path_to_linkable_metrics.items() + }, ) def filter( @@ -221,6 +277,7 @@ def filter( """ key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = {} key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = {} + key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {} for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items(): filtered_linkable_dimensions = tuple( @@ -250,9 +307,24 @@ def filter( if len(filtered_linkable_entities) > 0: key_to_linkable_entities[path_key] = filtered_linkable_entities + for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items(): + filtered_linkable_metrics = tuple( + linkable_metric + for linkable_metric in linkable_metrics + if len(linkable_metric.properties.intersection(with_any_of)) > 0 + and len(linkable_metric.properties.intersection(without_any_of)) == 0 + and ( + len(without_all_of) == 0 + or linkable_metric.properties.intersection(without_all_of) != without_all_of + ) + ) + if len(filtered_linkable_metrics) > 0: + key_to_linkable_metrics[path_key] = filtered_linkable_metrics + return LinkableElementSet( path_key_to_linkable_dimensions=key_to_linkable_dimensions, path_key_to_linkable_entities=key_to_linkable_entities, + path_key_to_linkable_metrics=key_to_linkable_metrics, ) @property @@ -261,7 +333,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D dimension_specs=tuple( DimensionSpec( element_name=path_key.element_name, - entity_links=path_key.entity_links, + group_by_links=path_key.group_by_links, ) for path_key in self.path_key_to_linkable_dimensions.keys() if not path_key.time_granularity @@ -269,7 +341,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D time_dimension_specs=tuple( TimeDimensionSpec( element_name=path_key.element_name, - entity_links=path_key.entity_links, + group_by_links=path_key.group_by_links, time_granularity=path_key.time_granularity, date_part=path_key.date_part, ) @@ -279,10 +351,19 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D entity_specs=tuple( EntitySpec( element_name=path_key.element_name, - entity_links=path_key.entity_links, + group_by_links=path_key.group_by_links, ) for path_key in self.path_key_to_linkable_entities ), + # Need this? Can we just use the metric_specs field and not pass group_by_links through? + # If so, might be able to remove GroupByMetricSpec class entirely + group_by_metric_specs=tuple( + GroupByMetricSpec( + element_name=path_key.element_name, + group_by_links=path_key.group_by_links, + ) + for path_key in self.path_key_to_linkable_metrics + ), ) @property @@ -299,6 +380,11 @@ def only_unique_path_keys(self) -> LinkableElementSet: for path_key, linkable_entities in self.path_key_to_linkable_entities.items() if len(linkable_entities) <= 1 }, + path_key_to_linkable_metrics={ + path_key: linkable_metrics + for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items() + if len(linkable_metrics) <= 1 + }, ) @@ -313,7 +399,7 @@ class SemanticModelJoinPathElement: def _generate_linkable_time_dimensions( semantic_model_origin: SemanticModelReference, dimension: Dimension, - entity_links: Tuple[EntityReference, ...], + group_by_links: Tuple[EntityReference, ...], join_path: Sequence[SemanticModelJoinPathElement], with_properties: FrozenSet[LinkableElementProperties], ) -> Sequence[LinkableDimension]: @@ -334,7 +420,7 @@ def _generate_linkable_time_dimensions( LinkableDimension( semantic_model_origin=semantic_model_origin, element_name=dimension.reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, join_path=tuple(join_path), time_granularity=time_granularity, date_part=None, @@ -349,7 +435,7 @@ def _generate_linkable_time_dimensions( LinkableDimension( semantic_model_origin=semantic_model_origin, element_name=dimension.reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, join_path=tuple(join_path), time_granularity=time_granularity, date_part=date_part, @@ -373,73 +459,15 @@ class SemanticModelJoinPath: path_elements: Tuple[SemanticModelJoinPathElement, ...] - def create_linkable_element_set( - self, semantic_model_accessor: SemanticModelAccessor, with_properties: FrozenSet[LinkableElementProperties] - ) -> LinkableElementSet: - """Given the current path, generate the respective linkable elements from the last semantic model in the path.""" - entity_links = tuple(x.join_on_entity for x in self.path_elements) - + @property + def last_path_element(self) -> SemanticModelJoinPathElement: # noqa: D assert len(self.path_elements) > 0 - semantic_model = semantic_model_accessor.get_by_reference(self.path_elements[-1].semantic_model_reference) - assert semantic_model - - linkable_dimensions: List[LinkableDimension] = [] - linkable_entities: List[LinkableEntity] = [] - - for dimension in semantic_model.dimensions: - dimension_type = dimension.type - if dimension_type == DimensionType.CATEGORICAL: - linkable_dimensions.append( - LinkableDimension( - semantic_model_origin=semantic_model.reference, - element_name=dimension.reference.element_name, - entity_links=entity_links, - join_path=self.path_elements, - properties=with_properties, - time_granularity=None, - date_part=None, - ) - ) - elif dimension_type == DimensionType.TIME: - linkable_dimensions.extend( - _generate_linkable_time_dimensions( - semantic_model_origin=semantic_model.reference, - dimension=dimension, - entity_links=entity_links, - join_path=(), - with_properties=with_properties, - ) - ) - else: - raise RuntimeError(f"Unhandled type: {dimension_type}") - - for entity in semantic_model.entities: - # Avoid creating "booking_id__booking_id" - if entity.reference != entity_links[-1]: - linkable_entities.append( - LinkableEntity( - semantic_model_origin=semantic_model.reference, - element_name=entity.reference.element_name, - entity_links=entity_links, - join_path=self.path_elements, - properties=with_properties.union({LinkableElementProperties.ENTITY}), - ) - ) - - return LinkableElementSet( - path_key_to_linkable_dimensions={ - linkable_dimension.path_key: (linkable_dimension,) for linkable_dimension in linkable_dimensions - }, - path_key_to_linkable_entities={ - linkable_entity.path_key: (linkable_entity,) for linkable_entity in linkable_entities - }, - ) + return self.path_elements[-1] @property def last_semantic_model_reference(self) -> SemanticModelReference: """The last semantic model that would be joined in this path.""" - assert len(self.path_elements) > 0 - return self.path_elements[-1].semantic_model_reference + return self.last_path_element.semantic_model_reference class ValidLinkableSpecResolver: @@ -452,14 +480,14 @@ def __init__( self, semantic_manifest: SemanticManifest, semantic_model_lookup: SemanticModelAccessor, - max_entity_links: int, + max_group_by_links: int, ) -> None: """Constructor. Args: semantic_manifest: the model to use. semantic_model_lookup: used to look up entities for a semantic model. - max_entity_links: the maximum number of joins to do when computing valid elements. + max_group_by_links: the maximum number of joins to do when computing valid elements. """ self._semantic_manifest = semantic_manifest self._semantic_model_lookup = semantic_model_lookup @@ -467,8 +495,8 @@ def __init__( self._semantic_models = sorted(self._semantic_manifest.semantic_models, key=lambda x: x.name) self._join_evaluator = SemanticModelJoinEvaluator(semantic_model_lookup) - assert max_entity_links >= 0 - self._max_entity_links = max_entity_links + assert max_group_by_links >= 0 + self._max_group_by_links = max_group_by_links # Map measures / entities to semantic models that contain them. self._entity_to_semantic_model: Dict[str, List[SemanticModel]] = defaultdict(list) @@ -479,6 +507,7 @@ def __init__( self._entity_to_semantic_model[entity.reference.element_name].append(semantic_model) self._metric_to_linkable_element_sets: Dict[str, List[LinkableElementSet]] = {} + self._joinable_metrics_for_semantic_models: Dict[SemanticModelReference, Set[MetricReference]] = {} start_time = time.time() for metric in self._semantic_manifest.metrics: @@ -518,6 +547,17 @@ def __init__( self._metric_to_linkable_element_sets[metric.name] = linkable_sets_for_measure + # Linkable element lookup won't work unless this loop happens after the one above. + for metric in self._semantic_manifest.metrics: + metric_reference = MetricReference(metric.name) + linkable_element_set_for_metric = self.get_linkable_elements_for_metrics([metric_reference]) + for linkable_entities in linkable_element_set_for_metric.path_key_to_linkable_entities.values(): + for linkable_entity in linkable_entities: + semantic_model_reference = linkable_entity.semantic_model_origin + metrics = self._joinable_metrics_for_semantic_models.get(semantic_model_reference, set()) + metrics.add(metric_reference) + self._joinable_metrics_for_semantic_models[semantic_model_reference] = metrics + # If no metrics are specified, the query interface supports distinct dimension values from a single semantic # model. linkable_element_sets_to_merge: List[LinkableElementSet] = [] @@ -559,23 +599,23 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link LinkableEntity( semantic_model_origin=semantic_model.reference, element_name=entity.reference.element_name, - entity_links=(), + group_by_links=(), join_path=(), properties=frozenset({LinkableElementProperties.LOCAL, LinkableElementProperties.ENTITY}), ) ) - for entity_link in self._semantic_model_lookup.entity_links_for_local_elements(semantic_model): + for group_by_link in self._semantic_model_lookup.group_by_links_for_local_elements(semantic_model): linkable_entities.append( LinkableEntity( semantic_model_origin=semantic_model.reference, element_name=entity.reference.element_name, - entity_links=(entity_link,), + group_by_links=(group_by_link,), join_path=(), properties=frozenset({LinkableElementProperties.LOCAL, LinkableElementProperties.ENTITY}), ) ) - for entity_link in self._semantic_model_lookup.entity_links_for_local_elements(semantic_model): + for group_by_link in self._semantic_model_lookup.group_by_links_for_local_elements(semantic_model): dimension_properties = frozenset({LinkableElementProperties.LOCAL}) for dimension in semantic_model.dimensions: dimension_type = dimension.type @@ -584,7 +624,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link LinkableDimension( semantic_model_origin=semantic_model.reference, element_name=dimension.reference.element_name, - entity_links=(entity_link,), + group_by_links=(group_by_link,), join_path=(), properties=dimension_properties, time_granularity=None, @@ -596,7 +636,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link _generate_linkable_time_dimensions( semantic_model_origin=semantic_model.reference, dimension=dimension, - entity_links=(entity_link,), + group_by_links=(group_by_link,), join_path=(), with_properties=dimension_properties, ) @@ -611,6 +651,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link path_key_to_linkable_entities={ linkable_entity.path_key: (linkable_entity,) for linkable_entity in linkable_entities }, + path_key_to_linkable_metrics={}, ) def _get_semantic_models_with_joinable_entity( @@ -683,7 +724,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference if defined_granularity.is_smaller_than_or_equal(time_granularity) ) - # For each of the possible time granularities, create a LinkableDimension for each one. + # For each of the possible time granularities, create a LinkableDimension. path_key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list) for time_granularity in possible_metric_time_granularities: possible_date_parts: Sequence[Optional[DatePart]] = ( @@ -696,7 +737,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference for date_part in possible_date_parts: path_key = ElementPathKey( element_name=DataSet.metric_time_dimension_name(), - entity_links=(), + group_by_links=(), time_granularity=time_granularity, date_part=date_part, ) @@ -704,17 +745,19 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference LinkableDimension( semantic_model_origin=measure_semantic_model.reference if measure_semantic_model else None, element_name=DataSet.metric_time_dimension_name(), - entity_links=(), + group_by_links=(), join_path=(), # Anything that's not at the base time granularity of the measure's aggregation time dimension # should be considered derived. - properties=frozenset({LinkableElementProperties.METRIC_TIME}) - if time_granularity is defined_granularity and date_part is None - else frozenset( - { - LinkableElementProperties.METRIC_TIME, - LinkableElementProperties.DERIVED_TIME_GRANULARITY, - } + properties=( + frozenset({LinkableElementProperties.METRIC_TIME}) + if time_granularity is defined_granularity and date_part is None + else frozenset( + { + LinkableElementProperties.METRIC_TIME, + LinkableElementProperties.DERIVED_TIME_GRANULARITY, + } + ) ), time_granularity=time_granularity, date_part=date_part, @@ -727,6 +770,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference for path_key, linkable_dimensions in path_key_to_linkable_dimensions.items() }, path_key_to_linkable_entities={}, + path_key_to_linkable_metrics={}, ) def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> LinkableElementSet: @@ -753,8 +797,8 @@ def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> Linkabl ) single_hop_elements = LinkableElementSet.merge_by_path_key( [ - join_path.create_linkable_element_set( - semantic_model_accessor=self._semantic_model_lookup, + self.create_linkable_element_set_from_join_path( + join_path=join_path, with_properties=frozenset({LinkableElementProperties.JOINED}), ) for join_path in join_paths @@ -763,9 +807,11 @@ def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> Linkabl # Create multi-hop elements. At each iteration, we generate the list of valid elements based on the current join # path, extend all paths to include the next valid semantic model, then repeat. - multi_hop_elements = LinkableElementSet(path_key_to_linkable_dimensions={}, path_key_to_linkable_entities={}) + multi_hop_elements = LinkableElementSet( + path_key_to_linkable_dimensions={}, path_key_to_linkable_entities={}, path_key_to_linkable_metrics={} + ) - for i in range(self._max_entity_links - 1): + for _ in range(self._max_group_by_links - 1): new_join_paths: List[SemanticModelJoinPath] = [] for join_path in join_paths: new_join_paths.extend( @@ -780,8 +826,8 @@ def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> Linkabl multi_hop_elements = LinkableElementSet.merge_by_path_key( (multi_hop_elements,) + tuple( - new_join_path.create_linkable_element_set( - semantic_model_accessor=self._semantic_model_lookup, + self.create_linkable_element_set_from_join_path( + join_path=new_join_path, with_properties=frozenset( {LinkableElementProperties.JOINED, LinkableElementProperties.MULTI_HOP} ), @@ -805,7 +851,6 @@ def _get_linkable_element_set_for_measure( elements_in_semantic_model = self._get_elements_in_semantic_model(measure_semantic_model) metric_time_elements = self._get_metric_time_elements(measure_reference) joined_elements = self._get_joined_elements(measure_semantic_model) - return LinkableElementSet.merge_by_path_key( ( elements_in_semantic_model, @@ -844,8 +889,8 @@ def get_linkable_elements_for_distinct_values_query( def get_linkable_elements_for_metrics( self, metric_references: Sequence[MetricReference], - with_any_of: FrozenSet[LinkableElementProperties], - without_any_of: FrozenSet[LinkableElementProperties], + with_any_of: FrozenSet[LinkableElementProperties] = LinkableElementProperties.all_properties(), + without_any_of: FrozenSet[LinkableElementProperties] = frozenset(), ) -> LinkableElementSet: """Gets the valid linkable elements that are common to all requested metrics.""" linkable_element_sets = [] @@ -909,3 +954,81 @@ def _find_next_possible_paths( new_join_paths.append(new_join_path) return new_join_paths + + def create_linkable_element_set_from_join_path( + self, + join_path: SemanticModelJoinPath, + with_properties: FrozenSet[LinkableElementProperties], + ) -> LinkableElementSet: + """Given the current path, generate the respective linkable elements from the last semantic model in the path.""" + group_by_links = tuple(x.join_on_entity for x in join_path.path_elements) + + semantic_model = self._semantic_model_lookup.get_by_reference(join_path.last_semantic_model_reference) + assert semantic_model + + linkable_dimensions: List[LinkableDimension] = [] + linkable_entities: List[LinkableEntity] = [] + linkable_metrics: List[LinkableMetric] = [] + + for dimension in semantic_model.dimensions: + dimension_type = dimension.type + if dimension_type == DimensionType.CATEGORICAL: + linkable_dimensions.append( + LinkableDimension( + semantic_model_origin=semantic_model.reference, + element_name=dimension.reference.element_name, + group_by_links=group_by_links, + join_path=join_path.path_elements, + properties=with_properties, + time_granularity=None, + date_part=None, + ) + ) + elif dimension_type == DimensionType.TIME: + linkable_dimensions.extend( + _generate_linkable_time_dimensions( + semantic_model_origin=semantic_model.reference, + dimension=dimension, + group_by_links=group_by_links, + join_path=(), + with_properties=with_properties, + ) + ) + else: + raise RuntimeError(f"Unhandled type: {dimension_type}") + + for entity in semantic_model.entities: + # Avoid creating "booking_id__booking_id" + if entity.reference != group_by_links[-1]: + linkable_entities.append( + LinkableEntity( + semantic_model_origin=semantic_model.reference, + element_name=entity.reference.element_name, + group_by_links=group_by_links, + join_path=join_path.path_elements, + properties=with_properties.union({LinkableElementProperties.ENTITY}), + ) + ) + + linkable_metrics = [ + LinkableMetric( + element_name=metric.element_name, + group_by_links=group_by_links, + join_path=join_path.path_elements, + join_by_semantic_model=semantic_model.reference, + properties=with_properties, + ) + for metric in self._joinable_metrics_for_semantic_models.get(join_path.last_semantic_model_reference, set()) + ] + + return LinkableElementSet( + path_key_to_linkable_dimensions={ + linkable_dimension.path_key: (linkable_dimension,) for linkable_dimension in linkable_dimensions + }, + path_key_to_linkable_entities={ + linkable_entity.path_key: (linkable_entity,) for linkable_entity in linkable_entities + }, + path_key_to_linkable_metrics={ + linkable_metric.path_key: (linkable_metric,) for linkable_metric in linkable_metrics + }, + ) diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index 44a7bd3280..883d7930bb 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -18,7 +18,7 @@ from metricflow.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup from metricflow.protocols.semantics import MetricAccessor -from metricflow.specs.specs import LinkableInstanceSpec, TimeDimensionSpec +from metricflow.specs.specs import InstanceSpec, TimeDimensionSpec logger = logging.getLogger(__name__) @@ -37,7 +37,7 @@ def __init__( # noqa: D self._linkable_spec_resolver = ValidLinkableSpecResolver( semantic_manifest=self._semantic_manifest, semantic_model_lookup=semantic_model_lookup, - max_entity_links=MAX_JOIN_HOPS, + max_group_by_links=MAX_JOIN_HOPS, ) def element_specs_for_metrics( @@ -45,7 +45,7 @@ def element_specs_for_metrics( metric_references: Sequence[MetricReference], with_any_property: FrozenSet[LinkableElementProperties] = LinkableElementProperties.all_properties(), without_any_property: FrozenSet[LinkableElementProperties] = frozenset(), - ) -> Sequence[LinkableInstanceSpec]: + ) -> Sequence[InstanceSpec]: """Dimensions common to all metrics requested (intersection).""" all_linkable_specs = self._linkable_spec_resolver.get_linkable_elements_for_metrics( metric_references=metric_references, @@ -60,7 +60,7 @@ def group_by_item_specs_for_measure( measure_reference: MeasureReference, with_any_of: Optional[Set[LinkableElementProperties]] = None, without_any_of: Optional[Set[LinkableElementProperties]] = None, - ) -> Sequence[LinkableInstanceSpec]: + ) -> Sequence[InstanceSpec]: """Return group-by-items that are possible for a measure.""" frozen_with_any_of = ( LinkableElementProperties.all_properties() if with_any_of is None else frozenset(with_any_of) @@ -77,7 +77,7 @@ def group_by_item_specs_for_no_metrics_query( self, with_any_of: Optional[Set[LinkableElementProperties]] = None, without_any_of: Optional[Set[LinkableElementProperties]] = None, - ) -> Sequence[LinkableInstanceSpec]: + ) -> Sequence[InstanceSpec]: """Return the possible group-by-items for a dimension values query with no metrics.""" frozen_with_any_of = ( LinkableElementProperties.all_properties() if with_any_of is None else frozenset(with_any_of) @@ -187,6 +187,6 @@ def get_valid_agg_time_dimensions_for_metric( path_key = agg_time_dimension_element_path_keys[0] valid_agg_time_dimension_specs = TimeDimensionSpec.generate_possible_specs_for_time_dimension( time_dimension_reference=TimeDimensionReference(element_name=path_key.element_name), - entity_links=path_key.entity_links, + group_by_links=path_key.group_by_links, ) return valid_agg_time_dimension_specs diff --git a/metricflow/model/semantics/semantic_model_join_evaluator.py b/metricflow/model/semantics/semantic_model_join_evaluator.py index 18b50c6982..a196c72984 100644 --- a/metricflow/model/semantics/semantic_model_join_evaluator.py +++ b/metricflow/model/semantics/semantic_model_join_evaluator.py @@ -194,7 +194,6 @@ def get_valid_semantic_model_entity_join_type( return None join_type = SemanticModelEntityJoinType(left_entity.type, right_entity.type) - if join_type in SemanticModelJoinEvaluator._VALID_ENTITY_JOINS: return join_type elif join_type in SemanticModelJoinEvaluator._INVALID_ENTITY_JOINS: @@ -227,7 +226,7 @@ def _semantic_model_of_entity_in_instance_set( matching_instances: List[EntityInstance] = [] for entity_instance in instance_set.entity_instances: assert len(entity_instance.defined_from) == 1 - if len(entity_instance.spec.entity_links) == 0 and entity_instance.spec.reference == entity_reference: + if len(entity_instance.spec.group_by_links) == 0 and entity_instance.spec.reference == entity_reference: matching_instances.append(entity_instance) assert len(matching_instances) == 1, ( diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index c45f349ffc..78d1655955 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -316,18 +316,18 @@ def resolved_primary_entity(semantic_model: SemanticModel) -> Optional[EntityRef @staticmethod @override - def entity_links_for_local_elements(semantic_model: SemanticModel) -> Sequence[EntityReference]: + def group_by_links_for_local_elements(semantic_model: SemanticModel) -> Sequence[EntityReference]: primary_entity_reference = semantic_model.primary_entity_reference - possible_entity_links = set() + possible_group_by_links = set() if primary_entity_reference is not None: - possible_entity_links.add(primary_entity_reference) + possible_group_by_links.add(primary_entity_reference) for entity in semantic_model.entities: if entity.is_linkable_entity_type: - possible_entity_links.add(entity.reference) + possible_group_by_links.add(entity.reference) - return sorted(possible_entity_links, key=lambda entity_reference: entity_reference.element_name) + return sorted(possible_group_by_links, key=lambda entity_reference: entity_reference.element_name) def get_element_spec_for_name(self, element_name: str) -> LinkableInstanceSpec: # noqa: D if TimeDimensionReference(element_name=element_name) in self._dimension_ref_to_spec: @@ -351,15 +351,15 @@ def get_agg_time_dimension_path_key_for_measure(self, measure_reference: Measure ), f"Expected exactly one semantic model for measure {measure_reference}, but found semantic models {semantic_models}." semantic_model = semantic_models[0] - entity_link = self.resolved_primary_entity(semantic_model) - assert entity_link is not None, ( + group_by_link = self.resolved_primary_entity(semantic_model) + assert group_by_link is not None, ( f"Expected semantic model {semantic_model} to have a primary entity since it has a " "measure requiring an agg_time_dimension, but found none.", ) return ElementPathKey( element_name=agg_time_dimension.element_name, - entity_links=(entity_link,), + group_by_links=(group_by_link,), time_granularity=None, date_part=None, ) @@ -371,5 +371,5 @@ def get_agg_time_dimension_specs_for_measure( path_key = self.get_agg_time_dimension_path_key_for_measure(measure_reference) return TimeDimensionSpec.generate_possible_specs_for_time_dimension( time_dimension_reference=TimeDimensionReference(element_name=path_key.element_name), - entity_links=path_key.entity_links, + group_by_links=path_key.group_by_links, ) diff --git a/metricflow/naming/dunder_scheme.py b/metricflow/naming/dunder_scheme.py index 35eb1f12e9..56ce2f6987 100644 --- a/metricflow/naming/dunder_scheme.py +++ b/metricflow/naming/dunder_scheme.py @@ -73,7 +73,7 @@ def spec_pattern(self, input_str: str) -> EntityLinkPattern: return EntityLinkPattern( parameter_set=EntityLinkPatternParameterSet.from_parameters( element_name=input_str_parts[0], - entity_links=(), + group_by_links=(), time_granularity=time_grain, date_part=None, fields_to_compare=tuple(fields_to_compare), @@ -93,7 +93,7 @@ def spec_pattern(self, input_str: str) -> EntityLinkPattern: return EntityLinkPattern( parameter_set=EntityLinkPatternParameterSet.from_parameters( element_name=input_str_parts[0], - entity_links=(), + group_by_links=(), time_granularity=time_grain, date_part=None, fields_to_compare=fields_to_compare, @@ -103,7 +103,7 @@ def spec_pattern(self, input_str: str) -> EntityLinkPattern: return EntityLinkPattern( parameter_set=EntityLinkPatternParameterSet.from_parameters( element_name=input_str_parts[-2], - entity_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-2]), + group_by_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-2]), time_granularity=time_grain, date_part=None, fields_to_compare=fields_to_compare, @@ -114,7 +114,7 @@ def spec_pattern(self, input_str: str) -> EntityLinkPattern: return EntityLinkPattern( parameter_set=EntityLinkPatternParameterSet.from_parameters( element_name=input_str_parts[-1], - entity_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-1]), + group_by_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-1]), time_granularity=None, date_part=None, fields_to_compare=fields_to_compare, @@ -151,7 +151,7 @@ def transform(self, spec_set: InstanceSpecSet) -> Sequence[str]: names_to_return = [] for time_dimension_spec in spec_set.time_dimension_specs: - items = list(entity_link.element_name for entity_link in time_dimension_spec.entity_links) + [ + items = list(group_by_link.element_name for group_by_link in time_dimension_spec.group_by_links) + [ time_dimension_spec.element_name ] if time_dimension_spec.date_part is not None: @@ -161,7 +161,7 @@ def transform(self, spec_set: InstanceSpecSet) -> Sequence[str]: names_to_return.append(DUNDER.join(items)) for other_group_by_item_specs in spec_set.entity_specs + spec_set.dimension_specs: - items = list(entity_link.element_name for entity_link in other_group_by_item_specs.entity_links) + [ + items = list(group_by_link.element_name for group_by_link in other_group_by_item_specs.group_by_links) + [ other_group_by_item_specs.element_name ] names_to_return.append(DUNDER.join(items)) diff --git a/metricflow/naming/linkable_spec_name.py b/metricflow/naming/linkable_spec_name.py index bfcd1d6d12..3c11e61539 100644 --- a/metricflow/naming/linkable_spec_name.py +++ b/metricflow/naming/linkable_spec_name.py @@ -17,12 +17,12 @@ class StructuredLinkableSpecName: """Parse a qualified name into different parts. e.g. listing__ds__week -> - entity_links: ["listing"] + group_by_links: ["listing"] element_name: "ds" granularity: TimeGranularity.WEEK """ - entity_link_names: Tuple[str, ...] + group_by_link_names: Tuple[str, ...] element_name: str time_granularity: Optional[TimeGranularity] = None date_part: Optional[DatePart] = None @@ -34,7 +34,7 @@ def from_name(qualified_name: str) -> StructuredLinkableSpecName: # No dunder, e.g. "ds" if len(name_parts) == 1: - return StructuredLinkableSpecName(entity_link_names=(), element_name=name_parts[0]) + return StructuredLinkableSpecName(group_by_link_names=(), element_name=name_parts[0]) for date_part in DatePart: if name_parts[-1] == StructuredLinkableSpecName.date_part_suffix(date_part=date_part): @@ -52,18 +52,18 @@ def from_name(qualified_name: str) -> StructuredLinkableSpecName: # e.g. "ds__month" if len(name_parts) == 2: return StructuredLinkableSpecName( - entity_link_names=(), element_name=name_parts[0], time_granularity=associated_granularity + group_by_link_names=(), element_name=name_parts[0], time_granularity=associated_granularity ) # e.g. "messages__ds__month" return StructuredLinkableSpecName( - entity_link_names=tuple(name_parts[:-2]), + group_by_link_names=tuple(name_parts[:-2]), element_name=name_parts[-2], time_granularity=associated_granularity, ) # e.g. "messages__ds" else: - return StructuredLinkableSpecName(entity_link_names=tuple(name_parts[:-1]), element_name=name_parts[-1]) + return StructuredLinkableSpecName(group_by_link_names=tuple(name_parts[:-1]), element_name=name_parts[-1]) @property def qualified_name(self) -> str: @@ -71,7 +71,7 @@ def qualified_name(self) -> str: If date_part is specified, don't include granularity in qualified_name since it will not impact the result. """ - items = list(self.entity_link_names) + [self.element_name] + items = list(self.group_by_link_names) + [self.element_name] if self.date_part: items.append(self.date_part_suffix(date_part=self.date_part)) elif self.time_granularity: @@ -81,8 +81,8 @@ def qualified_name(self) -> str: @property def entity_prefix(self) -> Optional[str]: """Return the entity prefix. e.g. listing__ds__month -> listing.""" - if len(self.entity_link_names) > 0: - return DUNDER.join(self.entity_link_names) + if len(self.group_by_link_names) > 0: + return DUNDER.join(self.group_by_link_names) return None @@ -102,5 +102,5 @@ def granularity_free_qualified_name(self) -> str: in your set for each TimeDimension. """ return StructuredLinkableSpecName( - entity_link_names=self.entity_link_names, element_name=self.element_name + group_by_link_names=self.group_by_link_names, element_name=self.element_name ).qualified_name diff --git a/metricflow/naming/object_builder_scheme.py b/metricflow/naming/object_builder_scheme.py index 0ce7ef534b..b97aee79a9 100644 --- a/metricflow/naming/object_builder_scheme.py +++ b/metricflow/naming/object_builder_scheme.py @@ -61,7 +61,7 @@ def spec_pattern(self, input_str: str) -> SpecPattern: return DimensionPattern( EntityLinkPatternParameterSet.from_parameters( element_name=dimension_call_parameter_set.dimension_reference.element_name, - entity_links=dimension_call_parameter_set.entity_path, + group_by_links=dimension_call_parameter_set.entity_path, time_granularity=None, date_part=None, fields_to_compare=( @@ -85,7 +85,7 @@ def spec_pattern(self, input_str: str) -> SpecPattern: return TimeDimensionPattern( EntityLinkPatternParameterSet.from_parameters( element_name=time_dimension_call_parameter_set.time_dimension_reference.element_name, - entity_links=time_dimension_call_parameter_set.entity_path, + group_by_links=time_dimension_call_parameter_set.entity_path, time_granularity=time_dimension_call_parameter_set.time_granularity, date_part=time_dimension_call_parameter_set.date_part, fields_to_compare=tuple(fields_to_compare), @@ -96,7 +96,7 @@ def spec_pattern(self, input_str: str) -> SpecPattern: return EntityLinkPattern( EntityLinkPatternParameterSet.from_parameters( element_name=entity_call_parameter_set.entity_reference.element_name, - entity_links=entity_call_parameter_set.entity_path, + group_by_links=entity_call_parameter_set.entity_path, time_granularity=None, date_part=None, fields_to_compare=( diff --git a/metricflow/naming/object_builder_str.py b/metricflow/naming/object_builder_str.py index c0e07a07a6..5ba9d25f65 100644 --- a/metricflow/naming/object_builder_str.py +++ b/metricflow/naming/object_builder_str.py @@ -5,10 +5,11 @@ from dbt_semantic_interfaces.call_parameter_sets import ( DimensionCallParameterSet, EntityCallParameterSet, + MetricCallParameterSet, TimeDimensionCallParameterSet, ) from dbt_semantic_interfaces.naming.keywords import DUNDER -from dbt_semantic_interfaces.references import EntityReference +from dbt_semantic_interfaces.references import EntityReference, LinkableElementReference from dbt_semantic_interfaces.type_enums import TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart from typing_extensions import override @@ -28,16 +29,29 @@ class ObjectBuilderNameConverter: def input_str_from_entity_call_parameter_set(parameter_set: EntityCallParameterSet) -> str: # noqa: D initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( element_name=parameter_set.entity_reference.element_name, - entity_links=parameter_set.entity_path, + group_by_links=parameter_set.entity_path, + group_by=(), time_granularity=None, date_part=None, ) return f"Entity({initializer_parameter_str})" + @staticmethod + def input_str_from_metric_call_parameter_set(parameter_set: MetricCallParameterSet) -> str: # noqa: D + initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( + element_name=parameter_set.metric_reference.element_name, + group_by_links=(), + group_by=parameter_set.group_by, + time_granularity=None, + date_part=None, + ) + return f"Metric({initializer_parameter_str})" + @staticmethod def initializer_parameter_str( element_name: str, - entity_links: Sequence[EntityReference], + group_by_links: Sequence[EntityReference], + group_by: Sequence[LinkableElementReference], time_granularity: Optional[TimeGranularity], date_part: Optional[DatePart], ) -> str: @@ -46,9 +60,9 @@ def initializer_parameter_str( e.g. `'user__country', time_granularity_name='month'` """ initializer_parameters = [] - entity_link_names = list(entity_link.element_name for entity_link in entity_links) - if len(entity_link_names) > 0: - initializer_parameters.append(repr(entity_link_names[-1] + DUNDER + element_name)) + group_by_link_names = list(group_by_link.element_name for group_by_link in group_by_links) + if len(group_by_link_names) > 0: + initializer_parameters.append(repr(group_by_link_names[-1] + DUNDER + element_name)) else: initializer_parameters.append(repr(element_name)) if time_granularity is not None: @@ -57,8 +71,8 @@ def initializer_parameter_str( ) if date_part is not None: initializer_parameters.append(f"date_part_name={repr(date_part.value)}") - if len(entity_link_names) > 1: - initializer_parameters.append(f"entity_path={repr(entity_link_names[:-1])}") + if len(group_by_link_names) > 1: + initializer_parameters.append(f"entity_path={repr(group_by_link_names[:-1])}") return ", ".join(initializer_parameters) @@ -74,7 +88,8 @@ def transform(self, spec_set: InstanceSpecSet) -> Sequence[str]: for entity_spec in spec_set.entity_specs: initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( element_name=entity_spec.element_name, - entity_links=entity_spec.entity_links, + group_by_links=entity_spec.group_by_links, + group_by=(), time_granularity=None, date_part=None, ) @@ -83,7 +98,8 @@ def transform(self, spec_set: InstanceSpecSet) -> Sequence[str]: for dimension_spec in spec_set.dimension_specs: initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( element_name=dimension_spec.element_name, - entity_links=dimension_spec.entity_links, + group_by_links=dimension_spec.group_by_links, + group_by=(), time_granularity=None, date_part=None, ) @@ -92,12 +108,23 @@ def transform(self, spec_set: InstanceSpecSet) -> Sequence[str]: for time_dimension_spec in spec_set.time_dimension_specs: initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( element_name=time_dimension_spec.element_name, - entity_links=time_dimension_spec.entity_links, + group_by_links=time_dimension_spec.group_by_links, + group_by=(), time_granularity=time_dimension_spec.time_granularity, date_part=time_dimension_spec.date_part, ) names_to_return.append(f"TimeDimension({initializer_parameter_str})") + for metric_spec in spec_set.metric_specs: + initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( + element_name=metric_spec.element_name, + group_by_links=(), + group_by=(), # how to get from spec? + time_granularity=None, + date_part=None, + ) + names_to_return.append(f"TimeDimension({initializer_parameter_str})") + return names_to_return @staticmethod @@ -115,7 +142,8 @@ def input_str_from_spec(instance_spec: InstanceSpec) -> str: # noqa: D def input_str_from_dimension_call_parameter_set(parameter_set: DimensionCallParameterSet) -> str: # noqa: D initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( element_name=parameter_set.dimension_reference.element_name, - entity_links=parameter_set.entity_path, + group_by_links=parameter_set.entity_path, + group_by=(), time_granularity=None, date_part=None, ) @@ -127,7 +155,8 @@ def input_str_from_time_dimension_call_parameter_set( # noqa: D ) -> str: initializer_parameter_str = ObjectBuilderNameConverter.initializer_parameter_str( element_name=parameter_set.time_dimension_reference.element_name, - entity_links=parameter_set.entity_path, + group_by_links=parameter_set.entity_path, + group_by=(), time_granularity=None, date_part=None, ) diff --git a/metricflow/plan_conversion/column_resolver.py b/metricflow/plan_conversion/column_resolver.py index 1cb8ff8bfa..daf33c0474 100644 --- a/metricflow/plan_conversion/column_resolver.py +++ b/metricflow/plan_conversion/column_resolver.py @@ -12,6 +12,7 @@ from metricflow.specs.specs import ( DimensionSpec, EntitySpec, + GroupByMetricSpec, InstanceSpec, InstanceSpecVisitor, MeasureSpec, @@ -44,7 +45,7 @@ def visit_measure_spec(self, measure_spec: MeasureSpec) -> ColumnAssociation: # def visit_dimension_spec(self, dimension_spec: DimensionSpec) -> ColumnAssociation: # noqa: D return ColumnAssociation( column_name=StructuredLinkableSpecName( - entity_link_names=tuple(x.element_name for x in dimension_spec.entity_links), + group_by_link_names=tuple(x.element_name for x in dimension_spec.group_by_links), element_name=dimension_spec.element_name, ).qualified_name, single_column_correlation_key=SingleColumnCorrelationKey(), @@ -52,7 +53,7 @@ def visit_dimension_spec(self, dimension_spec: DimensionSpec) -> ColumnAssociati def visit_time_dimension_spec(self, time_dimension_spec: TimeDimensionSpec) -> ColumnAssociation: # noqa: D column_name = StructuredLinkableSpecName( - entity_link_names=tuple(x.element_name for x in time_dimension_spec.entity_links), + group_by_link_names=tuple(x.element_name for x in time_dimension_spec.group_by_links), element_name=time_dimension_spec.element_name, time_granularity=time_dimension_spec.time_granularity, date_part=time_dimension_spec.date_part, @@ -71,7 +72,7 @@ def visit_time_dimension_spec(self, time_dimension_spec: TimeDimensionSpec) -> C def visit_entity_spec(self, entity_spec: EntitySpec) -> ColumnAssociation: # noqa: D return ColumnAssociation( column_name=StructuredLinkableSpecName( - entity_link_names=tuple(x.element_name for x in entity_spec.entity_links), + group_by_link_names=tuple(x.element_name for x in entity_spec.group_by_links), element_name=entity_spec.element_name, ).qualified_name, single_column_correlation_key=SingleColumnCorrelationKey(), @@ -83,13 +84,26 @@ def visit_metadata_spec(self, metadata_spec: MetadataSpec) -> ColumnAssociation: single_column_correlation_key=SingleColumnCorrelationKey(), ) + def visit_group_by_metric_spec(self, group_by_metric_spec: GroupByMetricSpec) -> ColumnAssociation: # noqa: D + return ColumnAssociation( + # TODO: is this what we want the column names to look like? group_by__group_by__metric_name + column_name=StructuredLinkableSpecName( + # TODO: rename to group_by_link_names + group_by_link_names=tuple( + group_by_link.element_name for group_by_link in group_by_metric_spec.group_by_links + ), + element_name=group_by_metric_spec.element_name, + ).qualified_name, + single_column_correlation_key=SingleColumnCorrelationKey(), + ) + class DunderColumnAssociationResolver(ColumnAssociationResolver): """Uses a double underscore to map specs to column names. For example: - DimensionSpec(element_name='country', entity_links=['listing']) + DimensionSpec(element_name='country', group_by_links=['listing']) -> diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 12771de95e..6bac356f9f 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -42,7 +42,12 @@ from metricflow.dataset.dataset import DataSet from metricflow.dataset.sql_dataset import SqlDataSet from metricflow.filters.time_constraint import TimeRangeConstraint -from metricflow.instances import InstanceSet, MetadataInstance, MetricInstance, TimeDimensionInstance +from metricflow.instances import ( + InstanceSet, + MetadataInstance, + MetricInstance, + TimeDimensionInstance, +) from metricflow.mf_logging.formatting import indent from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow.plan_conversion.instance_converters import ( @@ -443,7 +448,7 @@ def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> SqlDataS # data set. The next step would create an instance like "listing__listing__country_latest" without this # filter. right_data_set_instance_set_filtered = FilterLinkableInstancesWithLeadingLink( - entity_link=join_on_entity, + group_by_link=join_on_entity, ).transform(right_data_set.instance_set) # After the right data set is joined to the "from" data set, we need to change the links for some of the @@ -1064,7 +1069,7 @@ def visit_metric_time_dimension_transform_node(self, node: MetricTimeDimensionTr for time_dimension_instance in input_data_set.instance_set.time_dimension_instances: # The specification for the time dimension to use for aggregation is the local one. if ( - len(time_dimension_instance.spec.entity_links) == 0 + len(time_dimension_instance.spec.group_by_links) == 0 and time_dimension_instance.spec.reference == node.aggregation_time_dimension_reference ): matching_time_dimension_instances.append(time_dimension_instance) @@ -1253,10 +1258,10 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet if node.use_custom_agg_time_dimension: agg_time_dimension = node.requested_agg_time_dimension_specs[0] agg_time_element_name = agg_time_dimension.element_name - agg_time_entity_links: Tuple[EntityReference, ...] = agg_time_dimension.entity_links + agg_time_group_by_links: Tuple[EntityReference, ...] = agg_time_dimension.group_by_links else: agg_time_element_name = METRIC_TIME_ELEMENT_NAME - agg_time_entity_links = () + agg_time_group_by_links = () # Find the time dimension instances in the parent data set that match the one we want to join with. agg_time_dimension_instances: List[TimeDimensionInstance] = [] @@ -1264,7 +1269,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet if ( instance.spec.date_part is None # Ensure we don't join using an instance with date part and instance.spec.element_name == agg_time_element_name - and instance.spec.entity_links == agg_time_entity_links + and instance.spec.group_by_links == agg_time_group_by_links ): agg_time_dimension_instances.append(instance) @@ -1304,7 +1309,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet for time_dimension_instance in parent_data_set.instance_set.time_dimension_instances: if ( time_dimension_instance.spec.element_name == agg_time_element_name - and time_dimension_instance.spec.entity_links == agg_time_entity_links + and time_dimension_instance.spec.group_by_links == agg_time_group_by_links ): time_dimensions_to_select_from_time_spine += (time_dimension_instance,) else: @@ -1317,7 +1322,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet for time_dimension_instance in parent_data_set.instance_set.time_dimension_instances if not ( time_dimension_instance.spec.element_name == agg_time_element_name - and time_dimension_instance.spec.entity_links == agg_time_entity_links + and time_dimension_instance.spec.group_by_links == agg_time_group_by_links ) ), entity_instances=parent_data_set.instance_set.entity_instances, @@ -1394,7 +1399,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet select_expr = SqlExtractExpression(date_part=time_dimension_spec.date_part, arg=select_expr) time_dim_spec = TimeDimensionSpec( element_name=original_time_spine_dim_instance.spec.element_name, - entity_links=original_time_spine_dim_instance.spec.entity_links, + group_by_links=original_time_spine_dim_instance.spec.group_by_links, time_granularity=time_dimension_spec.time_granularity, date_part=time_dimension_spec.date_part, aggregation_state=original_time_spine_dim_instance.spec.aggregation_state, diff --git a/metricflow/plan_conversion/instance_converters.py b/metricflow/plan_conversion/instance_converters.py index 6dfe71be36..6fbd53647b 100644 --- a/metricflow/plan_conversion/instance_converters.py +++ b/metricflow/plan_conversion/instance_converters.py @@ -20,6 +20,7 @@ from metricflow.instances import ( DimensionInstance, EntityInstance, + GroupByMetricInstance, InstanceSet, InstanceSetTransform, MdoInstance, @@ -35,6 +36,7 @@ DimensionSpec, EntityReference, EntitySpec, + GroupByMetricSpec, InstanceSpec, InstanceSpecSet, LinkableInstanceSpec, @@ -99,6 +101,9 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D entity_cols = list( chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.entity_instances]) ) + group_by_metric_cols = list( + chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.group_by_metric_instances]) + ) metadata_cols = list( chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.metadata_instances]) ) @@ -109,6 +114,7 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D time_dimension_columns=time_dimension_cols, entity_columns=entity_cols, metadata_columns=metadata_cols, + group_by_metric_columns=group_by_metric_cols, ) def _make_sql_column_expression( @@ -254,6 +260,9 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D metadata_cols = list( chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.metadata_instances]) ) + group_by_metric_cols = list( + chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.group_by_metric_instances]) + ) return SelectColumnSet( metric_columns=metric_cols, measure_columns=measure_cols, @@ -261,6 +270,7 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D time_dimension_columns=time_dimension_cols, entity_columns=entity_cols, metadata_columns=metadata_cols, + group_by_metric_columns=group_by_metric_cols, ) @@ -344,8 +354,8 @@ def transform(self, instance_set: InstanceSet) -> Optional[ValidityWindowJoinDes and spec.time_granularity == end_dim.time_granularity and spec.date_part == end_dim.date_part ] - linkless_start_specs = {spec.without_entity_links for spec in start_specs} - linkless_end_specs = {spec.without_entity_links for spec in end_specs} + linkless_start_specs = {spec.without_group_by_links for spec in start_specs} + linkless_end_specs = {spec.without_group_by_links for spec in end_specs} assert len(linkless_start_specs) == 1 and len(linkless_end_specs) == 1, ( f"Did not find exactly one pair of specs from semantic model `{semantic_model_reference}` matching the validity " f"window end points defined in the semantic model. This means we cannot process an SCD join, because we " @@ -357,8 +367,8 @@ def transform(self, instance_set: InstanceSet) -> Optional[ValidityWindowJoinDes # SCD join targets are joined as dimension links in much the same was as partitions are joined. Therefore, # we treat this like a partition time column join and take the dimension spec with the shortest set of # entity links so that the subquery uses the correct reference in the ON statement - start_specs = sorted(start_specs, key=lambda x: len(x.entity_links)) - end_specs = sorted(end_specs, key=lambda x: len(x.entity_links)) + start_specs = sorted(start_specs, key=lambda x: len(x.group_by_links)) + end_specs = sorted(end_specs, key=lambda x: len(x.group_by_links)) semantic_model_to_window[semantic_model_reference] = ValidityWindowJoinDescription( window_start_dimension=start_specs[0], window_end_dimension=end_specs[0] ) @@ -393,7 +403,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D # The new dimension spec should include the join on entity. transformed_dimension_spec_from_right = DimensionSpec( element_name=dimension_instance.spec.element_name, - entity_links=self._join_on_entity.as_linkless_prefix + dimension_instance.spec.entity_links, + group_by_links=self._join_on_entity.as_linkless_prefix + dimension_instance.spec.group_by_links, ) dimension_instances_with_additional_link.append( DimensionInstance( @@ -409,9 +419,9 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D # The new dimension spec should include the join on entity. transformed_time_dimension_spec_from_right = TimeDimensionSpec( element_name=time_dimension_instance.spec.element_name, - entity_links=( + group_by_links=( (EntityReference(element_name=self._join_on_entity.element_name),) - + time_dimension_instance.spec.entity_links + + time_dimension_instance.spec.group_by_links ), time_granularity=time_dimension_instance.spec.time_granularity, date_part=time_dimension_instance.spec.date_part, @@ -434,7 +444,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D # The new entity spec should include the join on entity. transformed_entity_spec_from_right = EntitySpec( element_name=entity_instance.spec.element_name, - entity_links=self._join_on_entity.as_linkless_prefix + entity_instance.spec.entity_links, + group_by_links=self._join_on_entity.as_linkless_prefix + entity_instance.spec.group_by_links, ) entity_instances_with_additional_link.append( EntityInstance( @@ -444,11 +454,28 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D ) ) + # Handle group by metric instances + group_by_metric_instances_with_additional_link = [] + for group_by_metric_instance in instance_set.group_by_metric_instances: + # The new group by metric spec should include the join on entity. + transformed_group_by_metric_spec_from_right = GroupByMetricSpec( + element_name=group_by_metric_instance.spec.element_name, + group_by_links=self._join_on_entity.as_linkless_prefix + group_by_metric_instance.spec.group_by_links, + ) + group_by_metric_instances_with_additional_link.append( + GroupByMetricInstance( + associated_columns=group_by_metric_instance.associated_columns, + defined_from=group_by_metric_instance.defined_from, + spec=transformed_group_by_metric_spec_from_right, + ) + ) + return InstanceSet( measure_instances=(), dimension_instances=tuple(dimension_instances_with_additional_link), time_dimension_instances=tuple(time_dimension_instances_with_additional_link), entity_instances=tuple(entity_instances_with_additional_link), + group_by_metric_instances=tuple(group_by_metric_instances_with_additional_link), metric_instances=(), metadata_instances=(), ) @@ -462,19 +489,19 @@ class FilterLinkableInstancesWithLeadingLink(InstanceSetTransform[InstanceSet]): def __init__( # noqa: D self, - entity_link: LinklessEntitySpec, + group_by_link: LinklessEntitySpec, ) -> None: """Constructor. Args: - entity_link: Remove elements with this link as the first element in "entity_links" + group_by_link: Remove elements with this link as the first element in "group_by_links" """ - self._entity_link = entity_link + self._group_by_link = group_by_link def _should_pass(self, linkable_spec: LinkableInstanceSpec) -> bool: # noqa: D return ( - len(linkable_spec.entity_links) == 0 - or LinklessEntitySpec.from_reference(linkable_spec.entity_links[0]) != self._entity_link + len(linkable_spec.group_by_links) == 0 + or LinklessEntitySpec.from_reference(linkable_spec.group_by_links[0]) != self._group_by_link ) def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D @@ -484,6 +511,9 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D x for x in instance_set.time_dimension_instances if self._should_pass(x.spec) ) filtered_entity_instances = tuple(x for x in instance_set.entity_instances if self._should_pass(x.spec)) + filtered_group_by_metric_instances = tuple( + x for x in instance_set.group_by_metric_instances if self._should_pass(x.spec) + ) output = InstanceSet( measure_instances=instance_set.measure_instances, @@ -492,6 +522,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=filtered_entity_instances, metric_instances=instance_set.metric_instances, metadata_instances=instance_set.metadata_instances, + group_by_metric_instances=filtered_group_by_metric_instances, ) return output @@ -551,6 +582,9 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=tuple(x for x in instance_set.entity_instances if self._should_pass(x.spec)), metric_instances=tuple(x for x in instance_set.metric_instances if self._should_pass(x.spec)), metadata_instances=tuple(x for x in instance_set.metadata_instances if self._should_pass(x.spec)), + group_by_metric_instances=tuple( + x for x in instance_set.group_by_metric_instances if self._should_pass(x.spec) + ), ) return output @@ -590,6 +624,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=instance_set.entity_instances, metric_instances=instance_set.metric_instances, metadata_instances=instance_set.metadata_instances, + group_by_metric_instances=instance_set.group_by_metric_instances, ) @@ -631,6 +666,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=instance_set.entity_instances, metric_instances=instance_set.metric_instances, metadata_instances=instance_set.metadata_instances, + group_by_metric_instances=instance_set.group_by_metric_instances, ) @@ -685,6 +721,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=instance_set.entity_instances, metric_instances=instance_set.metric_instances, metadata_instances=instance_set.metadata_instances, + group_by_metric_instances=instance_set.group_by_metric_instances, ) @@ -702,6 +739,15 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=instance_set.entity_instances, metric_instances=instance_set.metric_instances + tuple(self._metric_instances), metadata_instances=instance_set.metadata_instances, + group_by_metric_instances=instance_set.group_by_metric_instances + + tuple( + GroupByMetricInstance( + associated_columns=metric_instance.associated_columns, + spec=GroupByMetricSpec(element_name=metric_instance.spec.element_name, group_by_links=()), + defined_from=metric_instance.defined_from, + ) + for metric_instance in self._metric_instances + ), ) @@ -716,6 +762,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=instance_set.entity_instances, metric_instances=instance_set.metric_instances, metadata_instances=instance_set.metadata_instances, + group_by_metric_instances=instance_set.group_by_metric_instances, ) @@ -730,6 +777,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=instance_set.entity_instances, metric_instances=(), metadata_instances=instance_set.metadata_instances, + group_by_metric_instances=instance_set.group_by_metric_instances, ) @@ -940,6 +988,18 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D ) ) + output_group_by_metric_instances = [] + for input_group_by_metric_instance in instance_set.group_by_metric_instances: + output_group_by_metric_instances.append( + GroupByMetricInstance( + associated_columns=( + self._column_association_resolver.resolve_spec(input_group_by_metric_instance.spec), + ), + spec=input_group_by_metric_instance.spec, + defined_from=input_group_by_metric_instance.defined_from, + ) + ) + return InstanceSet( measure_instances=tuple(output_measure_instances), dimension_instances=tuple(output_dimension_instances), @@ -947,6 +1007,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=tuple(output_entity_instances), metric_instances=tuple(output_metric_instances), metadata_instances=tuple(output_metadata_instances), + group_by_metric_instances=tuple(output_group_by_metric_instances), ) @@ -998,4 +1059,5 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D entity_instances=instance_set.entity_instances, metric_instances=instance_set.metric_instances, metadata_instances=instance_set.metadata_instances + tuple(self._metadata_instances), + group_by_metric_instances=instance_set.group_by_metric_instances, ) diff --git a/metricflow/plan_conversion/node_processor.py b/metricflow/plan_conversion/node_processor.py index 78eabb7a86..0638bcdf57 100644 --- a/metricflow/plan_conversion/node_processor.py +++ b/metricflow/plan_conversion/node_processor.py @@ -102,7 +102,7 @@ def add_time_range_constraint( for time_dimension_instance in node_output_data_set.instance_set.time_dimension_instances: if ( time_dimension_instance.spec.reference == metric_time_dimension_reference - and len(time_dimension_instance.spec.entity_links) == 0 + and len(time_dimension_instance.spec.group_by_links) == 0 ): constrain_time = True break @@ -130,7 +130,7 @@ def _node_contains_entity( if entity_spec_in_first_node.reference != entity_reference: continue - if len(entity_spec_in_first_node.entity_links) > 0: + if len(entity_spec_in_first_node.group_by_links) > 0: continue assert ( @@ -153,12 +153,12 @@ def _get_candidates_nodes_for_multi_hop( self, desired_linkable_spec: LinkableInstanceSpec, nodes: Sequence[BaseOutput], join_type: SqlJoinType ) -> Sequence[MultiHopJoinCandidate]: """Assemble nodes representing all possible one-hop joins.""" - if len(desired_linkable_spec.entity_links) > MAX_JOIN_HOPS: + if len(desired_linkable_spec.group_by_links) > MAX_JOIN_HOPS: raise NotImplementedError( f"Multi-hop joins with more than {MAX_JOIN_HOPS} entity links not yet supported. " f"Got: {desired_linkable_spec}" ) - if len(desired_linkable_spec.entity_links) != 2: + if len(desired_linkable_spec.group_by_links) != 2: return () multi_hop_join_candidates: List[MultiHopJoinCandidate] = [] @@ -173,11 +173,11 @@ def _get_candidates_nodes_for_multi_hop( if not ( self._node_contains_entity( node=first_node_that_could_be_joined, - entity_reference=desired_linkable_spec.entity_links[0], + entity_reference=desired_linkable_spec.group_by_links[0], ) and self._node_contains_entity( node=first_node_that_could_be_joined, - entity_reference=desired_linkable_spec.entity_links[1], + entity_reference=desired_linkable_spec.group_by_links[1], ) ): continue @@ -186,7 +186,7 @@ def _get_candidates_nodes_for_multi_hop( if not ( self._node_contains_entity( node=second_node_that_could_be_joined, - entity_reference=desired_linkable_spec.entity_links[1], + entity_reference=desired_linkable_spec.group_by_links[1], ) ): continue @@ -209,7 +209,7 @@ def _get_candidates_nodes_for_multi_hop( continue # The first and second nodes are joined by this entity - entity_reference_to_join_first_and_second_nodes = desired_linkable_spec.entity_links[1] + entity_reference_to_join_first_and_second_nodes = desired_linkable_spec.group_by_links[1] if not self._join_evaluator.is_valid_instance_set_join( left_instance_set=data_set_of_first_node_that_could_be_joined.instance_set, @@ -244,7 +244,7 @@ def _get_candidates_nodes_for_multi_hop( JoinDescription( join_node=filtered_joinable_node, join_on_entity=LinklessEntitySpec.from_reference( - desired_linkable_spec.entity_links[1] + desired_linkable_spec.group_by_links[1] ), join_on_partition_dimensions=join_on_partition_dimensions, join_on_partition_time_dimensions=join_on_partition_time_dimensions, @@ -258,7 +258,7 @@ def _get_candidates_nodes_for_multi_hop( # entity_spec_in_first_node should already not have entity links since we checked # for that, but using this method for type checking. join_second_node_by_entity=LinklessEntitySpec.from_reference( - desired_linkable_spec.entity_links[1] + desired_linkable_spec.group_by_links[1] ), ), ) @@ -309,14 +309,19 @@ def remove_unnecessary_nodes( doesn't mean that the node will be useful, but not having common elements definitely means it's not useful. """ relevant_element_names = {x.element_name for x in desired_linkable_specs}.union( - {y.element_name for x in desired_linkable_specs for y in x.entity_links} + { + y.element_name + for x in desired_linkable_specs + # TODO: fix typing here; use "group_by_links" as name everywhere + for y in (x.group_by_links if hasattr(x, "group_by_links") else x.group_by_links) + } ) # The metric time dimension is used everywhere, so don't count it unless specifically desired in linkable spec # that has entity links. metric_time_dimension_used_in_linked_spec = any( [ - len(linkable_spec.entity_links) > 0 + len(linkable_spec.group_by_links) > 0 and linkable_spec.element_name == metric_time_dimension_reference.element_name for linkable_spec in desired_linkable_specs ] diff --git a/metricflow/plan_conversion/select_column_gen.py b/metricflow/plan_conversion/select_column_gen.py index eee5fa2087..f4803ca4b6 100644 --- a/metricflow/plan_conversion/select_column_gen.py +++ b/metricflow/plan_conversion/select_column_gen.py @@ -19,6 +19,7 @@ class SelectColumnSet: time_dimension_columns: List[SqlSelectColumn] = field(default_factory=list) entity_columns: List[SqlSelectColumn] = field(default_factory=list) metadata_columns: List[SqlSelectColumn] = field(default_factory=list) + group_by_metric_columns: List[SqlSelectColumn] = field(default_factory=list) def merge(self, other_set: SelectColumnSet) -> SelectColumnSet: """Combine the select columns by type.""" @@ -29,6 +30,7 @@ def merge(self, other_set: SelectColumnSet) -> SelectColumnSet: time_dimension_columns=self.time_dimension_columns + other_set.time_dimension_columns, entity_columns=self.entity_columns + other_set.entity_columns, metadata_columns=self.metadata_columns + other_set.metadata_columns, + group_by_metric_columns=self.group_by_metric_columns + other_set.group_by_metric_columns, ) def as_tuple(self) -> Tuple[SqlSelectColumn, ...]: @@ -40,6 +42,7 @@ def as_tuple(self) -> Tuple[SqlSelectColumn, ...]: + self.dimension_columns + self.metric_columns + self.measure_columns + + self.group_by_metric_columns + self.metadata_columns ) @@ -51,4 +54,5 @@ def without_measure_columns(self) -> SelectColumnSet: time_dimension_columns=self.time_dimension_columns, entity_columns=self.entity_columns, metadata_columns=self.metadata_columns, + group_by_metric_columns=self.group_by_metric_columns, ) diff --git a/metricflow/plan_conversion/sql_join_builder.py b/metricflow/plan_conversion/sql_join_builder.py index a6e4984b99..7502820234 100644 --- a/metricflow/plan_conversion/sql_join_builder.py +++ b/metricflow/plan_conversion/sql_join_builder.py @@ -245,7 +245,7 @@ def _make_validity_window_on_conditions( left_data_set_metric_time_dimension_instances = sorted( left_data_set.data_set.metric_time_dimension_instances, - key=lambda x: (x.spec.time_granularity.to_int(), len(x.spec.entity_links)), + key=lambda x: (x.spec.time_granularity.to_int(), len(x.spec.group_by_links)), ) assert left_data_set_metric_time_dimension_instances, ( f"Cannot process join to data set with alias {right_data_set.alias} because it has a validity " diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index f9afb82548..31d970770d 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -28,7 +28,13 @@ from metricflow.model.semantics.element_group import ElementGrouper from metricflow.model.semantics.linkable_element_properties import LinkableElementProperties -from metricflow.specs.specs import LinkableInstanceSpec, MeasureSpec, NonAdditiveDimensionSpec, TimeDimensionSpec +from metricflow.specs.specs import ( + InstanceSpec, + LinkableInstanceSpec, + MeasureSpec, + NonAdditiveDimensionSpec, + TimeDimensionSpec, +) if TYPE_CHECKING: from metricflow.model.semantics.linkable_spec_resolver import ElementPathKey @@ -129,7 +135,7 @@ def get_semantic_models_for_entity(self, entity_reference: EntityReference) -> S @staticmethod @abstractmethod - def entity_links_for_local_elements(semantic_model: SemanticModel) -> Sequence[EntityReference]: + def group_by_links_for_local_elements(semantic_model: SemanticModel) -> Sequence[EntityReference]: """Return the entity prefix that can be used to access dimensions defined in the semantic model.""" raise NotImplementedError @@ -165,7 +171,7 @@ def element_specs_for_metrics( metric_references: Sequence[MetricReference], with_any_property: FrozenSet[LinkableElementProperties] = LinkableElementProperties.all_properties(), without_any_property: FrozenSet[LinkableElementProperties] = frozenset(), - ) -> Sequence[LinkableInstanceSpec]: + ) -> Sequence[InstanceSpec]: """Retrieve the matching set of linkable elements common to all metrics requested (intersection).""" raise NotImplementedError @@ -206,7 +212,7 @@ def group_by_item_specs_for_measure( measure_reference: MeasureReference, with_any_of: Optional[Set[LinkableElementProperties]] = None, without_any_of: Optional[Set[LinkableElementProperties]] = None, - ) -> Sequence[LinkableInstanceSpec]: + ) -> Sequence[InstanceSpec]: """Return group-by-items that are possible for a measure.""" raise NotImplementedError @@ -215,7 +221,7 @@ def group_by_item_specs_for_no_metrics_query( self, with_any_of: Optional[Set[LinkableElementProperties]] = None, without_any_of: Optional[Set[LinkableElementProperties]] = None, - ) -> Sequence[LinkableInstanceSpec]: + ) -> Sequence[InstanceSpec]: """Return the possible group-by-items for a dimension values query with no metrics.""" raise NotImplementedError diff --git a/metricflow/query/group_by_item/candidate_push_down/group_by_item_candidate.py b/metricflow/query/group_by_item/candidate_push_down/group_by_item_candidate.py index 2deb7f23a3..c01db2308c 100644 --- a/metricflow/query/group_by_item/candidate_push_down/group_by_item_candidate.py +++ b/metricflow/query/group_by_item/candidate_push_down/group_by_item_candidate.py @@ -9,7 +9,7 @@ from metricflow.query.group_by_item.path_prefixable import PathPrefixable from metricflow.query.group_by_item.resolution_path import MetricFlowQueryResolutionPath from metricflow.specs.patterns.spec_pattern import SpecPattern -from metricflow.specs.specs import InstanceSpecSet, LinkableInstanceSpec, LinkableSpecSet +from metricflow.specs.specs import InstanceSpec, InstanceSpecSet, LinkableSpecSet @dataclass(frozen=True) @@ -29,7 +29,7 @@ class GroupByItemCandidateSet(PathPrefixable): error messages, you start analyzing from the leaf node. """ - specs: Tuple[LinkableInstanceSpec, ...] + specs: Tuple[InstanceSpec, ...] measure_paths: Tuple[MetricFlowQueryResolutionPath, ...] path_from_leaf_node: MetricFlowQueryResolutionPath diff --git a/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py b/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py index 862b83edd0..c65e98aa86 100644 --- a/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py +++ b/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py @@ -196,7 +196,6 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu ).linkable_specs matching_specs = specs_available_for_measure_given_child_metric - for source_spec_pattern in self._source_spec_patterns: matching_specs = InstanceSpecSet.from_specs(source_spec_pattern.match(matching_specs)).linkable_specs @@ -220,13 +219,15 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu NoMatchingItemsForMeasure.from_parameters( parent_issues=(), query_resolution_path=current_traversal_path, - input_suggestions=tuple( - self._suggestion_generator.input_suggestions( - specs_available_for_measure_given_child_metric + input_suggestions=( + tuple( + self._suggestion_generator.input_suggestions( + specs_available_for_measure_given_child_metric + ) ) - ) - if self._suggestion_generator is not None - else (), + if self._suggestion_generator is not None + else () + ), ) ), ) @@ -368,9 +369,9 @@ def visit_metric_node(self, node: MetricGroupByItemResolutionNode) -> PushDownRe return PushDownResult( candidate_set=GroupByItemCandidateSet( specs=tuple(matched_specs), - measure_paths=merged_result_from_parents.candidate_set.measure_paths - if len(matched_specs) > 0 - else (), + measure_paths=( + merged_result_from_parents.candidate_set.measure_paths if len(matched_specs) > 0 else () + ), path_from_leaf_node=current_traversal_path, ), issue_set=MetricFlowQueryResolutionIssueSet.merge_iterable(issue_sets_to_merge), diff --git a/metricflow/query/group_by_item/filter_spec_resolution/filter_pattern_factory.py b/metricflow/query/group_by_item/filter_spec_resolution/filter_pattern_factory.py index 92fe217816..d8706798a6 100644 --- a/metricflow/query/group_by_item/filter_spec_resolution/filter_pattern_factory.py +++ b/metricflow/query/group_by_item/filter_spec_resolution/filter_pattern_factory.py @@ -5,12 +5,18 @@ from dbt_semantic_interfaces.call_parameter_sets import ( DimensionCallParameterSet, EntityCallParameterSet, + MetricCallParameterSet, TimeDimensionCallParameterSet, ) from typing_extensions import override from metricflow.specs.patterns.spec_pattern import SpecPattern -from metricflow.specs.patterns.typed_patterns import DimensionPattern, EntityPattern, TimeDimensionPattern +from metricflow.specs.patterns.typed_patterns import ( + DimensionPattern, + EntityPattern, + GroupByMetricPattern, + TimeDimensionPattern, +) class WhereFilterPatternFactory(ABC): @@ -34,6 +40,12 @@ def create_for_entity_call_parameter_set( # noqa: D ) -> SpecPattern: raise NotImplementedError + @abstractmethod + def create_for_metric_call_parameter_set( # noqa: D + self, metric_call_parameter_set: MetricCallParameterSet + ) -> SpecPattern: + raise NotImplementedError + class DefaultWhereFilterPatternFactory(WhereFilterPatternFactory): """Default implementation using patterns derived from EntityLinkPattern.""" @@ -53,3 +65,7 @@ def create_for_time_dimension_call_parameter_set( @override def create_for_entity_call_parameter_set(self, entity_call_parameter_set: EntityCallParameterSet) -> SpecPattern: return EntityPattern.from_call_parameter_set(entity_call_parameter_set) + + @override + def create_for_metric_call_parameter_set(self, metric_call_parameter_set: MetricCallParameterSet) -> SpecPattern: + return GroupByMetricPattern.from_call_parameter_set(metric_call_parameter_set) diff --git a/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_lookup.py b/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_lookup.py index 047b34dd8d..6846ce5e95 100644 --- a/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_lookup.py +++ b/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_lookup.py @@ -7,6 +7,7 @@ from dbt_semantic_interfaces.call_parameter_sets import ( DimensionCallParameterSet, EntityCallParameterSet, + MetricCallParameterSet, TimeDimensionCallParameterSet, ) from dbt_semantic_interfaces.protocols import WhereFilterIntersection @@ -177,7 +178,9 @@ class FilterSpecResolution: object_builder_str: str -CallParameterSet = Union[DimensionCallParameterSet, TimeDimensionCallParameterSet, EntityCallParameterSet] +CallParameterSet = Union[ + DimensionCallParameterSet, TimeDimensionCallParameterSet, EntityCallParameterSet, MetricCallParameterSet +] @dataclass(frozen=True) diff --git a/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py b/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py index c55bbf443d..355914cdab 100644 --- a/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py +++ b/metricflow/query/group_by_item/filter_spec_resolution/filter_spec_resolver.py @@ -133,6 +133,14 @@ def _dedupe_filter_call_parameter_sets( ) ) ), + metric_call_parameter_sets=tuple( + dict.fromkeys( + itertools.chain.from_iterable( + filter_call_parameter_sets.metric_call_parameter_sets + for filter_call_parameter_sets in filter_call_parameter_sets_sequence + ) + ) + ), ) def _map_filter_parameter_sets_to_pattern( @@ -183,6 +191,18 @@ def _map_filter_parameter_sets_to_pattern( ), ) ) + for metric_call_parameter_set in filter_call_parameter_sets.metric_call_parameter_sets: + patterns_in_filter.append( + PatternAssociationForWhereFilterGroupByItem( + call_parameter_set=metric_call_parameter_set, + object_builder_str=ObjectBuilderNameConverter.input_str_from_metric_call_parameter_set( + metric_call_parameter_set + ), + spec_pattern=self._spec_pattern_factory.create_for_metric_call_parameter_set( + metric_call_parameter_set + ), + ) + ) return patterns_in_filter @@ -201,7 +221,6 @@ def visit_metric_node(self, node: MetricGroupByItemResolutionNode) -> FilterSpec results_to_merge.append(parent_node.accept(self)) resolved_spec_lookup_so_far = FilterSpecResolutionLookUp.merge_iterable(results_to_merge) - return resolved_spec_lookup_so_far.merge( self._resolve_specs_for_where_filters( current_node=node, @@ -221,7 +240,6 @@ def visit_query_node(self, node: QueryGroupByItemResolutionNode) -> FilterSpecRe # If the same metric is present multiple times in a query - there could be duplicates. resolved_spec_lookup_so_far = FilterSpecResolutionLookUp.merge_iterable(results_to_merge) - return resolved_spec_lookup_so_far.merge( self._resolve_specs_for_where_filters( current_node=node, diff --git a/metricflow/query/group_by_item/group_by_item_resolver.py b/metricflow/query/group_by_item/group_by_item_resolver.py index ace100ecf8..5df66f1693 100644 --- a/metricflow/query/group_by_item/group_by_item_resolver.py +++ b/metricflow/query/group_by_item/group_by_item_resolver.py @@ -137,13 +137,13 @@ def resolve_matching_item_for_filters( manifest_lookup=self._manifest_lookup, source_spec_patterns=( spec_pattern, - BaseTimeGrainPattern(), + BaseTimeGrainPattern(only_apply_for_metric_time=True), ), suggestion_generator=suggestion_generator, ) - + # we make it here push_down_result: PushDownResult = resolution_node.accept(push_down_visitor) - + # but not here if push_down_result.candidate_set.num_candidates == 0: return GroupByItemResolution( spec=None, diff --git a/metricflow/query/validation_rules/metric_time_requirements.py b/metricflow/query/validation_rules/metric_time_requirements.py index aeb4693f01..6f78c26de6 100644 --- a/metricflow/query/validation_rules/metric_time_requirements.py +++ b/metricflow/query/validation_rules/metric_time_requirements.py @@ -35,7 +35,8 @@ def __init__(self, manifest_lookup: SemanticManifestLookup) -> None: # noqa: D self._metric_time_specs = tuple( TimeDimensionSpec.generate_possible_specs_for_time_dimension( - time_dimension_reference=TimeDimensionReference(element_name=METRIC_TIME_ELEMENT_NAME), entity_links=() + time_dimension_reference=TimeDimensionReference(element_name=METRIC_TIME_ELEMENT_NAME), + group_by_links=(), ) ) diff --git a/metricflow/specs/dimension_spec_resolver.py b/metricflow/specs/dimension_spec_resolver.py index 25caa0ac6c..06fe2a3fd3 100644 --- a/metricflow/specs/dimension_spec_resolver.py +++ b/metricflow/specs/dimension_spec_resolver.py @@ -32,7 +32,7 @@ def resolve_dimension_spec(self, name: str, entity_path: Sequence[str]) -> Dimen ) return DimensionSpec( element_name=call_parameter_set.dimension_reference.element_name, - entity_links=call_parameter_set.entity_path, + group_by_links=call_parameter_set.entity_path, ) def resolve_time_dimension_spec( @@ -55,7 +55,7 @@ def resolve_time_dimension_spec( assert call_parameter_set in self._call_parameter_sets.time_dimension_call_parameter_sets return TimeDimensionSpec( element_name=call_parameter_set.time_dimension_reference.element_name, - entity_links=call_parameter_set.entity_path, + group_by_links=call_parameter_set.entity_path, time_granularity=( call_parameter_set.time_granularity # TODO: This should be updated once resolution of unspecified grain is supported. diff --git a/metricflow/specs/patterns/entity_link_pattern.py b/metricflow/specs/patterns/entity_link_pattern.py index da28745557..66b9975de6 100644 --- a/metricflow/specs/patterns/entity_link_pattern.py +++ b/metricflow/specs/patterns/entity_link_pattern.py @@ -24,9 +24,10 @@ class ParameterSetField(Enum): """ ELEMENT_NAME = "element_name" - ENTITY_LINKS = "entity_links" + ENTITY_LINKS = "group_by_links" TIME_GRANULARITY = "time_granularity" DATE_PART = "date_part" + GROUP_BY_LINKS = "group_by_links" def __lt__(self, other: Any) -> bool: # type: ignore[misc] """Allow for ordering so that a sequence of these can be consistently represented for test snapshots.""" @@ -46,7 +47,7 @@ class EntityLinkPatternParameterSet: # The name of the element in the semantic model element_name: Optional[str] = None # The entities used for joining semantic models. - entity_links: Optional[Tuple[EntityReference, ...]] = None + group_by_links: Optional[Tuple[EntityReference, ...]] = None # Properties of time dimensions to match. time_granularity: Optional[TimeGranularity] = None date_part: Optional[DatePart] = None @@ -55,14 +56,14 @@ class EntityLinkPatternParameterSet: def from_parameters( # noqa: D fields_to_compare: Sequence[ParameterSetField], element_name: Optional[str] = None, - entity_links: Optional[Sequence[EntityReference]] = None, + group_by_links: Optional[Sequence[EntityReference]] = None, time_granularity: Optional[TimeGranularity] = None, date_part: Optional[DatePart] = None, ) -> EntityLinkPatternParameterSet: return EntityLinkPatternParameterSet( fields_to_compare=tuple(sorted(fields_to_compare)), element_name=element_name, - entity_links=tuple(entity_links) if entity_links is not None else None, + group_by_links=tuple(group_by_links) if group_by_links is not None else None, time_granularity=time_granularity, date_part=date_part, ) @@ -87,15 +88,15 @@ class EntityLinkPattern(SpecPattern): parameter_set: EntityLinkPatternParameterSet - def _match_entity_links(self, candidate_specs: Sequence[LinkableInstanceSpec]) -> Sequence[LinkableInstanceSpec]: - assert self.parameter_set.entity_links is not None - num_links_to_check = len(self.parameter_set.entity_links) + def _match_group_by_links(self, candidate_specs: Sequence[LinkableInstanceSpec]) -> Sequence[LinkableInstanceSpec]: + assert self.parameter_set.group_by_links is not None + num_links_to_check = len(self.parameter_set.group_by_links) matching_specs: Sequence[LinkableInstanceSpec] = tuple( candidate_spec for candidate_spec in candidate_specs if ( - self.parameter_set.entity_links[-num_links_to_check:] - == candidate_spec.entity_links[-num_links_to_check:] + self.parameter_set.group_by_links[-num_links_to_check:] + == candidate_spec.group_by_links[-num_links_to_check:] ) ) @@ -104,8 +105,8 @@ def _match_entity_links(self, candidate_specs: Sequence[LinkableInstanceSpec]) - # If multiple match, then return only the ones with the shortest entity link path. There could be multiple # e.g. booking__listing__country and listing__country will match with listing__country. - shortest_entity_link_length = min(len(matching_spec.entity_links) for matching_spec in matching_specs) - return tuple(spec for spec in matching_specs if len(spec.entity_links) == shortest_entity_link_length) + shortest_group_by_link_length = min(len(matching_spec.group_by_links) for matching_spec in matching_specs) + return tuple(spec for spec in matching_specs if len(spec.group_by_links) == shortest_group_by_link_length) @override def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[LinkableInstanceSpec]: @@ -114,7 +115,7 @@ def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[LinkableIns # Entity links could be a partial match, so it's handled separately. if ParameterSetField.ENTITY_LINKS in self.parameter_set.fields_to_compare: - filtered_candidate_specs = self._match_entity_links(filtered_candidate_specs) + filtered_candidate_specs = self._match_group_by_links(filtered_candidate_specs) other_keys_to_check = set( field_to_compare.value for field_to_compare in self.parameter_set.fields_to_compare diff --git a/metricflow/specs/patterns/metric_time_pattern.py b/metricflow/specs/patterns/metric_time_pattern.py index 85f7dca0fc..e4e7a1b67a 100644 --- a/metricflow/specs/patterns/metric_time_pattern.py +++ b/metricflow/specs/patterns/metric_time_pattern.py @@ -9,7 +9,6 @@ from metricflow.specs.specs import ( InstanceSpec, InstanceSpecSet, - LinkableSpecSet, TimeDimensionSpec, ) @@ -23,9 +22,8 @@ class MetricTimePattern(SpecPattern): @override def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[TimeDimensionSpec]: - spec_set = LinkableSpecSet.from_specs(InstanceSpecSet.from_specs(candidate_specs).linkable_specs) return tuple( time_dimension_spec - for time_dimension_spec in spec_set.time_dimension_specs + for time_dimension_spec in InstanceSpecSet.from_specs(candidate_specs).time_dimension_specs if time_dimension_spec.element_name == METRIC_TIME_ELEMENT_NAME ) diff --git a/metricflow/specs/patterns/typed_patterns.py b/metricflow/specs/patterns/typed_patterns.py index aeeb68375a..03fbf5d300 100644 --- a/metricflow/specs/patterns/typed_patterns.py +++ b/metricflow/specs/patterns/typed_patterns.py @@ -6,8 +6,10 @@ from dbt_semantic_interfaces.call_parameter_sets import ( DimensionCallParameterSet, EntityCallParameterSet, + MetricCallParameterSet, TimeDimensionCallParameterSet, ) +from dbt_semantic_interfaces.references import LinkableElementReference from typing_extensions import override from metricflow.specs.patterns.entity_link_pattern import ( @@ -15,6 +17,7 @@ EntityLinkPatternParameterSet, ParameterSetField, ) +from metricflow.specs.patterns.spec_pattern import SpecPattern from metricflow.specs.specs import InstanceSpec, InstanceSpecSet, LinkableInstanceSpec @@ -42,7 +45,7 @@ def from_call_parameter_set( # noqa: D ParameterSetField.ENTITY_LINKS, ), element_name=dimension_call_parameter_set.dimension_reference.element_name, - entity_links=dimension_call_parameter_set.entity_path, + group_by_links=dimension_call_parameter_set.entity_path, ) ) @@ -81,7 +84,7 @@ def from_call_parameter_set( parameter_set=EntityLinkPatternParameterSet.from_parameters( fields_to_compare=tuple(fields_to_compare), element_name=time_dimension_call_parameter_set.time_dimension_reference.element_name, - entity_links=time_dimension_call_parameter_set.entity_path, + group_by_links=time_dimension_call_parameter_set.entity_path, time_granularity=time_dimension_call_parameter_set.time_granularity, date_part=time_dimension_call_parameter_set.date_part, ) @@ -109,6 +112,53 @@ def from_call_parameter_set(entity_call_parameter_set: EntityCallParameterSet) - ParameterSetField.ENTITY_LINKS, ), element_name=entity_call_parameter_set.entity_reference.element_name, - entity_links=entity_call_parameter_set.entity_path, + group_by_links=entity_call_parameter_set.entity_path, + ) + ) + + +@dataclass(frozen=True) +class GroupByMetricPatternParameterSet: + """Pattern for joining metrics to semantic models so that they can be used in group bys & filters.""" + + # The name of the metric as defined in the semantic manifest. + element_name: str + # The group bys used for joining metrics to semantic models. + group_by_links: Sequence[LinkableElementReference] + + @staticmethod + def from_parameters( # noqa: D + element_name: str, group_by_links: Sequence[LinkableElementReference] + ) -> GroupByMetricPatternParameterSet: + return GroupByMetricPatternParameterSet(element_name=element_name, group_by_links=group_by_links) + + +@dataclass(frozen=True) +class GroupByMetricPattern(SpecPattern): + """A pattern that matches metrics using the group by specifications.""" + + parameter_set: GroupByMetricPatternParameterSet + + @override + def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[LinkableInstanceSpec]: + filtered_candidate_specs = InstanceSpecSet.from_specs(candidate_specs).group_by_metric_specs + + matching_specs: List[InstanceSpec] = [] + for spec in filtered_candidate_specs: + if spec.element_name == self.parameter_set.element_name and { + # TODO: should these be ordered? tuples instead of sets? Does order matter for matching metrics? thinking no. + group_by.element_name + for group_by in spec.group_by_links + } == {group_by.element_name for group_by in self.parameter_set.group_by_links}: + matching_specs.append(spec) + + return matching_specs + + @staticmethod + def from_call_parameter_set(metric_call_parameter_set: MetricCallParameterSet) -> GroupByMetricPattern: # noqa: D + return GroupByMetricPattern( + parameter_set=GroupByMetricPatternParameterSet.from_parameters( + element_name=metric_call_parameter_set.metric_reference.element_name, + group_by_links=metric_call_parameter_set.group_by, ) ) diff --git a/metricflow/specs/python_object.py b/metricflow/specs/python_object.py index 9dc916daea..fff6c4653b 100644 --- a/metricflow/specs/python_object.py +++ b/metricflow/specs/python_object.py @@ -42,7 +42,7 @@ def parse_object_builder_naming_scheme(group_by_item_name: str) -> GroupByParame DimensionOrEntityParameter( name=StructuredLinkableSpecName( element_name=dimension_call_parameter_set.dimension_reference.element_name, - entity_link_names=tuple( + group_by_link_names=tuple( entity_reference.element_name for entity_reference in dimension_call_parameter_set.entity_path ), ).qualified_name @@ -59,7 +59,7 @@ def parse_object_builder_naming_scheme(group_by_item_name: str) -> GroupByParame DimensionOrEntityParameter( name=StructuredLinkableSpecName( element_name=entity_call_parameter_set.entity_reference.element_name, - entity_link_names=tuple( + group_by_link_names=tuple( entity_reference.element_name for entity_reference in entity_call_parameter_set.entity_path ), ).qualified_name @@ -71,7 +71,7 @@ def parse_object_builder_naming_scheme(group_by_item_name: str) -> GroupByParame TimeDimensionParameter( name=StructuredLinkableSpecName( element_name=time_dimension_parameter_set.time_dimension_reference.element_name, - entity_link_names=tuple( + group_by_link_names=tuple( entity_reference.element_name for entity_reference in time_dimension_parameter_set.entity_path ), ).qualified_name, diff --git a/metricflow/specs/query_param_implementations.py b/metricflow/specs/query_param_implementations.py index 3ddba66d8d..2d5dd0a6b4 100644 --- a/metricflow/specs/query_param_implementations.py +++ b/metricflow/specs/query_param_implementations.py @@ -65,7 +65,9 @@ def query_resolver_input(self) -> ResolverInputForGroupByItem: # noqa: D EntityLinkPatternParameterSet.from_parameters( fields_to_compare=tuple(fields_to_compare), element_name=name_structure.element_name, - entity_links=tuple(EntityReference(link_name) for link_name in name_structure.entity_link_names), + group_by_links=tuple( + EntityReference(link_name) for link_name in name_structure.group_by_link_names + ), time_granularity=self.grain, date_part=self.date_part, ) @@ -101,7 +103,9 @@ def query_resolver_input(self) -> ResolverInputForGroupByItem: # noqa: D ParameterSetField.DATE_PART, ), element_name=name_structure.element_name, - entity_links=tuple(EntityReference(link_name) for link_name in name_structure.entity_link_names), + group_by_links=tuple( + EntityReference(link_name) for link_name in name_structure.group_by_link_names + ), time_granularity=None, date_part=None, ) diff --git a/metricflow/specs/specs.py b/metricflow/specs/specs.py index a4853690a0..6c450a43cd 100644 --- a/metricflow/specs/specs.py +++ b/metricflow/specs/specs.py @@ -26,6 +26,7 @@ from dbt_semantic_interfaces.references import ( DimensionReference, EntityReference, + LinkableElementReference, MeasureReference, MetricReference, TimeDimensionReference, @@ -81,6 +82,10 @@ def visit_entity_spec(self, entity_spec: EntitySpec) -> VisitorOutputT: # noqa: def visit_metric_spec(self, metric_spec: MetricSpec) -> VisitorOutputT: # noqa: D raise NotImplementedError + @abstractmethod + def visit_group_by_metric_spec(self, group_by_metric_spec: GroupByMetricSpec) -> VisitorOutputT: # noqa: D + raise NotImplementedError + @abstractmethod def visit_metadata_spec(self, metadata_spec: MetadataSpec) -> VisitorOutputT: # noqa: D raise NotImplementedError @@ -154,21 +159,21 @@ def as_spec_set(self) -> InstanceSpecSet: class LinkableInstanceSpec(InstanceSpec, ABC): """Generally a dimension or entity that may be specified using entity links. - For example, user_id__country -> LinkableElementSpec(element_name="country", entity_links=["user_id"] + For example, user_id__country -> LinkableElementSpec(element_name="country", group_by_links=["user_id"] See InstanceSpec for the reason behind "type: ignore" """ """A list representing the join path of entities to get to this element.""" - entity_links: Tuple[EntityReference, ...] + group_by_links: Tuple[EntityReference, ...] @property - def without_first_entity_link(self: SelfTypeT) -> SelfTypeT: + def without_first_group_by_link(self: SelfTypeT) -> SelfTypeT: """e.g. user_id__device_id__platform -> device_id__platform.""" raise NotImplementedError() @property - def without_entity_links(self: SelfTypeT) -> SelfTypeT: # noqa: D + def without_group_by_links(self: SelfTypeT) -> SelfTypeT: # noqa: D """e.g. user_id__device_id__platform -> platform.""" raise NotImplementedError() @@ -184,44 +189,67 @@ def merge_linkable_specs(*specs: Sequence[LinkableInstanceSpec]) -> List[Linkabl def qualified_name(self) -> str: """Return the qualified name of this spec. e.g. "user_id__country".""" return StructuredLinkableSpecName( - entity_link_names=tuple(x.element_name for x in self.entity_links), element_name=self.element_name + group_by_link_names=tuple(x.element_name for x in self.group_by_links), element_name=self.element_name ).qualified_name +@dataclass(frozen=True) +class GroupByMetricSpec(InstanceSpec, SerializableDataclass): # noqa: D + group_by_links: Tuple[LinkableElementReference, ...] + + @property + def as_spec_set(self) -> InstanceSpecSet: + return InstanceSpecSet(group_by_metric_specs=(self,)) + + def accept(self, visitor: InstanceSpecVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D + return visitor.visit_group_by_metric_spec(self) + + @property + def qualified_name(self) -> str: + return StructuredLinkableSpecName( + group_by_link_names=tuple(x.element_name for x in self.group_by_links), element_name=self.element_name + ).qualified_name + + @property + def without_first_group_by_link(self) -> GroupByMetricSpec: # noqa: D + assert len(self.group_by_links) > 0, f"Spec does not have any group by links: {self}" + return GroupByMetricSpec(element_name=self.element_name, group_by_links=self.group_by_links[1:]) + + @dataclass(frozen=True) class EntitySpec(LinkableInstanceSpec, SerializableDataclass): # noqa: D @property - def without_first_entity_link(self) -> EntitySpec: # noqa: D - assert len(self.entity_links) > 0, f"Spec does not have any entity links: {self}" - return EntitySpec(element_name=self.element_name, entity_links=self.entity_links[1:]) + def without_first_group_by_link(self) -> EntitySpec: # noqa: D + assert len(self.group_by_links) > 0, f"Spec does not have any entity links: {self}" + return EntitySpec(element_name=self.element_name, group_by_links=self.group_by_links[1:]) @property - def without_entity_links(self) -> EntitySpec: # noqa: D + def without_group_by_links(self) -> EntitySpec: # noqa: D return LinklessEntitySpec.from_element_name(self.element_name) @property def as_linkless_prefix(self) -> Tuple[EntityReference, ...]: - """Creates tuple of linkless entities that could be included in the entity_links of another spec. + """Creates tuple of linkless entities that could be included in the group_by_links of another spec. eg as a prefix to a DimensionSpec's entity links to when a join is occurring via this entity """ - return (EntityReference(element_name=self.element_name),) + self.entity_links + return (EntityReference(element_name=self.element_name),) + self.group_by_links @staticmethod def from_name(name: str) -> EntitySpec: # noqa: D structured_name = StructuredLinkableSpecName.from_name(name) return EntitySpec( - entity_links=tuple(EntityReference(idl) for idl in structured_name.entity_link_names), + group_by_links=tuple(EntityReference(idl) for idl in structured_name.group_by_link_names), element_name=structured_name.element_name, ) def __eq__(self, other: Any) -> bool: # type: ignore[misc] # noqa: D if not isinstance(other, EntitySpec): return False - return self.element_name == other.element_name and self.entity_links == other.entity_links + return self.element_name == other.element_name and self.group_by_links == other.group_by_links def __hash__(self) -> int: # noqa: D - return hash((self.element_name, self.entity_links)) + return hash((self.element_name, self.group_by_links)) @property def reference(self) -> EntityReference: # noqa: D @@ -242,49 +270,49 @@ class LinklessEntitySpec(EntitySpec, SerializableDataclass): @staticmethod def from_element_name(element_name: str) -> LinklessEntitySpec: # noqa: D - return LinklessEntitySpec(element_name=element_name, entity_links=()) + return LinklessEntitySpec(element_name=element_name, group_by_links=()) def __post_init__(self) -> None: # noqa: D - if len(self.entity_links) > 0: + if len(self.group_by_links) > 0: raise RuntimeError(f"{self.__class__.__name__} shouldn't have entity links. Got: {self}") def __eq__(self, other: Any) -> bool: # type: ignore[misc] # noqa: D if not isinstance(other, EntitySpec): return False - return self.element_name == other.element_name and self.entity_links == other.entity_links + return self.element_name == other.element_name and self.group_by_links == other.group_by_links def __hash__(self) -> int: # noqa: D - return hash((self.element_name, self.entity_links)) + return hash((self.element_name, self.group_by_links)) @staticmethod def from_reference(entity_reference: EntityReference) -> LinklessEntitySpec: # noqa: D - return LinklessEntitySpec(element_name=entity_reference.element_name, entity_links=()) + return LinklessEntitySpec(element_name=entity_reference.element_name, group_by_links=()) @dataclass(frozen=True) class DimensionSpec(LinkableInstanceSpec, SerializableDataclass): # noqa: D element_name: str - entity_links: Tuple[EntityReference, ...] + group_by_links: Tuple[EntityReference, ...] @property - def without_first_entity_link(self) -> DimensionSpec: # noqa: D - assert len(self.entity_links) > 0, f"Spec does not have any entity links: {self}" - return DimensionSpec(element_name=self.element_name, entity_links=self.entity_links[1:]) + def without_first_group_by_link(self) -> DimensionSpec: # noqa: D + assert len(self.group_by_links) > 0, f"Spec does not have any entity links: {self}" + return DimensionSpec(element_name=self.element_name, group_by_links=self.group_by_links[1:]) @property - def without_entity_links(self) -> DimensionSpec: # noqa: D - return DimensionSpec(element_name=self.element_name, entity_links=()) + def without_group_by_links(self) -> DimensionSpec: # noqa: D + return DimensionSpec(element_name=self.element_name, group_by_links=()) @staticmethod def from_linkable(spec: LinkableInstanceSpec) -> DimensionSpec: # noqa: D - return DimensionSpec(element_name=spec.element_name, entity_links=spec.entity_links) + return DimensionSpec(element_name=spec.element_name, group_by_links=spec.group_by_links) @staticmethod def from_name(name: str) -> DimensionSpec: """Construct from a name e.g. listing__ds__month.""" parsed_name = StructuredLinkableSpecName.from_name(name) return DimensionSpec( - entity_links=tuple([EntityReference(idl) for idl in parsed_name.entity_link_names]), + group_by_links=tuple([EntityReference(idl) for idl in parsed_name.group_by_link_names]), element_name=parsed_name.element_name, ) @@ -334,7 +362,7 @@ def __init__(self, source_spec: TimeDimensionSpec, exclude_fields: Sequence[Time # This is a list of field values of TimeDimensionSpec that we should use for comparison. spec_field_values_for_comparison: List[ Union[str, Tuple[EntityReference, ...], TimeGranularity, Optional[DatePart]] - ] = [self._source_spec.element_name, self._source_spec.entity_links] + ] = [self._source_spec.element_name, self._source_spec.group_by_links] if TimeDimensionSpecField.TIME_GRANULARITY not in self._excluded_fields: spec_field_values_for_comparison.append(self._source_spec.time_granularity) @@ -374,24 +402,24 @@ class TimeDimensionSpec(DimensionSpec): # noqa: D aggregation_state: Optional[AggregationState] = None @property - def without_first_entity_link(self) -> TimeDimensionSpec: # noqa: D - assert len(self.entity_links) > 0, f"Spec does not have any entity links: {self}" + def without_first_group_by_link(self) -> TimeDimensionSpec: # noqa: D + assert len(self.group_by_links) > 0, f"Spec does not have any entity links: {self}" return TimeDimensionSpec( element_name=self.element_name, - entity_links=self.entity_links[1:], + group_by_links=self.group_by_links[1:], time_granularity=self.time_granularity, date_part=self.date_part, ) @property - def without_entity_links(self) -> TimeDimensionSpec: # noqa: D + def without_group_by_links(self) -> TimeDimensionSpec: # noqa: D return TimeDimensionSpec.from_name(self.element_name) @staticmethod def from_name(name: str) -> TimeDimensionSpec: # noqa: D structured_name = StructuredLinkableSpecName.from_name(name) return TimeDimensionSpec( - entity_links=tuple(EntityReference(idl) for idl in structured_name.entity_link_names), + group_by_links=tuple(EntityReference(idl) for idl in structured_name.group_by_link_names), element_name=structured_name.element_name, time_granularity=structured_name.time_granularity or DEFAULT_TIME_GRANULARITY, ) @@ -407,7 +435,7 @@ def dimension_reference(self) -> DimensionReference: # noqa: D @property def qualified_name(self) -> str: # noqa: D return StructuredLinkableSpecName( - entity_link_names=tuple(x.element_name for x in self.entity_links), + group_by_link_names=tuple(x.element_name for x in self.group_by_links), element_name=self.element_name, time_granularity=self.time_granularity, date_part=self.date_part, @@ -416,7 +444,7 @@ def qualified_name(self) -> str: # noqa: D @staticmethod def from_reference(reference: TimeDimensionReference) -> TimeDimensionSpec: """Initialize from a time dimension reference instance.""" - return TimeDimensionSpec(entity_links=(), element_name=reference.element_name) + return TimeDimensionSpec(group_by_links=(), element_name=reference.element_name) @property @override @@ -429,7 +457,7 @@ def accept(self, visitor: InstanceSpecVisitor[VisitorOutputT]) -> VisitorOutputT def with_grain(self, time_granularity: TimeGranularity) -> TimeDimensionSpec: # noqa: D return TimeDimensionSpec( element_name=self.element_name, - entity_links=self.entity_links, + group_by_links=self.group_by_links, time_granularity=time_granularity, date_part=self.date_part, aggregation_state=self.aggregation_state, @@ -438,7 +466,7 @@ def with_grain(self, time_granularity: TimeGranularity) -> TimeDimensionSpec: # def with_aggregation_state(self, aggregation_state: AggregationState) -> TimeDimensionSpec: # noqa: D return TimeDimensionSpec( element_name=self.element_name, - entity_links=self.entity_links, + group_by_links=self.group_by_links, time_granularity=self.time_granularity, date_part=self.date_part, aggregation_state=aggregation_state, @@ -453,7 +481,7 @@ def comparison_key(self, exclude_fields: Sequence[TimeDimensionSpecField] = ()) @staticmethod def generate_possible_specs_for_time_dimension( - time_dimension_reference: TimeDimensionReference, entity_links: Tuple[EntityReference, ...] + time_dimension_reference: TimeDimensionReference, group_by_links: Tuple[EntityReference, ...] ) -> List[TimeDimensionSpec]: """Generate a list of time dimension specs with all combinations of granularity & date part.""" time_dimension_specs: List[TimeDimensionSpec] = [] @@ -461,7 +489,7 @@ def generate_possible_specs_for_time_dimension( time_dimension_specs.append( TimeDimensionSpec( element_name=time_dimension_reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, time_granularity=time_granularity, date_part=None, ) @@ -471,7 +499,7 @@ def generate_possible_specs_for_time_dimension( time_dimension_specs.append( TimeDimensionSpec( element_name=time_dimension_reference.element_name, - entity_links=entity_links, + group_by_links=group_by_links, time_granularity=time_granularity, date_part=date_part, ) @@ -656,6 +684,7 @@ class LinkableSpecSet(Mergeable, SerializableDataclass): dimension_specs: Tuple[DimensionSpec, ...] = () time_dimension_specs: Tuple[TimeDimensionSpec, ...] = () entity_specs: Tuple[EntitySpec, ...] = () + group_by_metric_specs: Tuple[GroupByMetricSpec, ...] = () @property def contains_metric_time(self) -> bool: @@ -700,8 +729,12 @@ def metric_time_specs(self) -> Sequence[TimeDimensionSpec]: ) @property - def as_tuple(self) -> Tuple[LinkableInstanceSpec, ...]: # noqa: D - return tuple(itertools.chain(self.dimension_specs, self.time_dimension_specs, self.entity_specs)) + def as_tuple(self) -> Tuple[InstanceSpec, ...]: # noqa: D + return tuple( + itertools.chain( + self.dimension_specs, self.time_dimension_specs, self.entity_specs, self.group_by_metric_specs + ) + ) @override def merge(self, other: LinkableSpecSet) -> LinkableSpecSet: @@ -709,6 +742,7 @@ def merge(self, other: LinkableSpecSet) -> LinkableSpecSet: dimension_specs=self.dimension_specs + other.dimension_specs, time_dimension_specs=self.time_dimension_specs + other.time_dimension_specs, entity_specs=self.entity_specs + other.entity_specs, + group_by_metric_specs=self.group_by_metric_specs + other.group_by_metric_specs, ) @override @@ -731,10 +765,15 @@ def dedupe(self) -> LinkableSpecSet: # noqa: D for entity_spec in self.entity_specs: entity_spec_dict[entity_spec] = None + group_by_metric_spec_dict: Dict[GroupByMetricSpec, None] = {} + for group_by_metric in self.group_by_metric_specs: + group_by_metric_spec_dict[group_by_metric] = None + return LinkableSpecSet( dimension_specs=tuple(dimension_spec_dict.keys()), time_dimension_specs=tuple(time_dimension_spec_dict.keys()), entity_specs=tuple(entity_spec_dict.keys()), + group_by_metric_specs=tuple(group_by_metric_spec_dict.keys()), ) def is_subset_of(self, other_set: LinkableSpecSet) -> bool: # noqa: D @@ -746,6 +785,7 @@ def as_spec_set(self) -> InstanceSpecSet: # noqa: D dimension_specs=self.dimension_specs, time_dimension_specs=self.time_dimension_specs, entity_specs=self.entity_specs, + group_by_metric_specs=self.group_by_metric_specs, ) def difference(self, other: LinkableSpecSet) -> LinkableSpecSet: # noqa: D @@ -753,6 +793,7 @@ def difference(self, other: LinkableSpecSet) -> LinkableSpecSet: # noqa: D dimension_specs=tuple(set(self.dimension_specs) - set(other.dimension_specs)), time_dimension_specs=tuple(set(self.time_dimension_specs) - set(other.time_dimension_specs)), entity_specs=tuple(set(self.entity_specs) - set(other.entity_specs)), + group_by_metric_specs=tuple(set(self.group_by_metric_specs) - set(other.group_by_metric_specs)), ) def __len__(self) -> int: # noqa: D @@ -765,6 +806,7 @@ def from_specs(specs: Sequence[LinkableInstanceSpec]) -> LinkableSpecSet: # noq dimension_specs=instance_spec_set.dimension_specs, time_dimension_specs=instance_spec_set.time_dimension_specs, entity_specs=instance_spec_set.entity_specs, + group_by_metric_specs=instance_spec_set.group_by_metric_specs, ) @@ -825,6 +867,7 @@ class InstanceSpecSet(Mergeable, SerializableDataclass): measure_specs: Tuple[MeasureSpec, ...] = () dimension_specs: Tuple[DimensionSpec, ...] = () entity_specs: Tuple[EntitySpec, ...] = () + group_by_metric_specs: Tuple[GroupByMetricSpec, ...] = () time_dimension_specs: Tuple[TimeDimensionSpec, ...] = () metadata_specs: Tuple[MetadataSpec, ...] = () @@ -835,6 +878,7 @@ def merge(self, other: InstanceSpecSet) -> InstanceSpecSet: measure_specs=self.measure_specs + other.measure_specs, dimension_specs=self.dimension_specs + other.dimension_specs, entity_specs=self.entity_specs + other.entity_specs, + group_by_metric_specs=self.group_by_metric_specs + other.group_by_metric_specs, time_dimension_specs=self.time_dimension_specs + other.time_dimension_specs, metadata_specs=self.metadata_specs + other.metadata_specs, ) @@ -874,18 +918,28 @@ def dedupe(self) -> InstanceSpecSet: if entity_spec not in entity_specs_deduped: entity_specs_deduped.append(entity_spec) + group_by_metric_specs_deduped = [] + for group_by_metric_spec in self.group_by_metric_specs: + if group_by_metric_spec not in group_by_metric_specs_deduped: + group_by_metric_specs_deduped.append(group_by_metric_spec) + return InstanceSpecSet( metric_specs=tuple(metric_specs_deduped), measure_specs=tuple(measure_specs_deduped), dimension_specs=tuple(dimension_specs_deduped), time_dimension_specs=tuple(time_dimension_specs_deduped), entity_specs=tuple(entity_specs_deduped), + group_by_metric_specs=tuple(group_by_metric_specs_deduped), ) @property - def linkable_specs(self) -> Sequence[LinkableInstanceSpec]: + def linkable_specs(self) -> Sequence[InstanceSpec]: """All linkable specs in this set.""" - return list(itertools.chain(self.dimension_specs, self.time_dimension_specs, self.entity_specs)) + return list( + itertools.chain( + self.dimension_specs, self.time_dimension_specs, self.entity_specs, self.group_by_metric_specs + ) + ) @property def all_specs(self) -> Sequence[InstanceSpec]: # noqa: D @@ -895,6 +949,7 @@ def all_specs(self) -> Sequence[InstanceSpec]: # noqa: D self.dimension_specs, self.time_dimension_specs, self.entity_specs, + self.group_by_metric_specs, self.metric_specs, self.metadata_specs, ) @@ -940,7 +995,7 @@ class WhereFilterSpec(Mergeable, SerializableDataclass): dimension_specs=( DimensionSpec( element_name='country', - entity_links=('listing',), + group_by_links=('listing',), ), ) ) diff --git a/metricflow/specs/where_filter_dimension.py b/metricflow/specs/where_filter_dimension.py index 663c10f871..3b614619f2 100644 --- a/metricflow/specs/where_filter_dimension.py +++ b/metricflow/specs/where_filter_dimension.py @@ -44,7 +44,7 @@ def __init__( # noqa where_filter_location: WhereFilterLocation, rendered_spec_tracker: RenderedSpecTracker, element_name: str, - entity_links: Sequence[EntityReference], + group_by_links: Sequence[EntityReference], time_grain: Optional[TimeGranularity] = None, date_part: Optional[DatePart] = None, ) -> None: @@ -53,7 +53,7 @@ def __init__( # noqa self._where_filter_location = where_filter_location self._rendered_spec_tracker = rendered_spec_tracker self._element_name = element_name - self._entity_links = tuple(entity_links) + self._group_by_links = tuple(group_by_links) self._time_grain = time_grain self._date_part = date_part @@ -65,7 +65,7 @@ def grain(self, time_granularity_name: str) -> QueryInterfaceDimension: where_filter_location=self._where_filter_location, rendered_spec_tracker=self._rendered_spec_tracker, element_name=self._element_name, - entity_links=self._entity_links, + group_by_links=self._group_by_links, time_grain=TimeGranularity(time_granularity_name.lower()), date_part=self._date_part, ) @@ -78,7 +78,7 @@ def date_part(self, date_part_name: str) -> QueryInterfaceDimension: where_filter_location=self._where_filter_location, rendered_spec_tracker=self._rendered_spec_tracker, element_name=self._element_name, - entity_links=self._entity_links, + group_by_links=self._group_by_links, time_grain=self._time_grain, date_part=DatePart(date_part_name.lower()), ) @@ -95,14 +95,14 @@ def __str__(self) -> str: call_parameter_set: Union[TimeDimensionCallParameterSet, DimensionCallParameterSet] if self._time_grain is not None or self._date_part is not None: call_parameter_set = TimeDimensionCallParameterSet( - entity_path=self._entity_links, + entity_path=self._group_by_links, time_dimension_reference=TimeDimensionReference(self._element_name), time_granularity=self._time_grain, date_part=self._date_part, ) else: call_parameter_set = DimensionCallParameterSet( - entity_path=self._entity_links, + entity_path=self._group_by_links, dimension_reference=DimensionReference(self._element_name), ) @@ -150,6 +150,6 @@ def create(self, name: str, entity_path: Sequence[str] = ()) -> WhereFilterDimen where_filter_location=self._where_filter_location, rendered_spec_tracker=self._rendered_spec_tracker, element_name=structured_name.element_name, - entity_links=tuple(EntityReference(entity_link_name.lower()) for entity_link_name in entity_path) + group_by_links=tuple(EntityReference(group_by_link_name.lower()) for group_by_link_name in entity_path) + structured_name.entity_links, ) diff --git a/metricflow/specs/where_filter_entity.py b/metricflow/specs/where_filter_entity.py index 507b4ef590..233fd9afaa 100644 --- a/metricflow/specs/where_filter_entity.py +++ b/metricflow/specs/where_filter_entity.py @@ -37,7 +37,7 @@ def __init__( # noqa where_filter_location: WhereFilterLocation, rendered_spec_tracker: RenderedSpecTracker, element_name: str, - entity_links: Sequence[EntityReference], + group_by_links: Sequence[EntityReference], time_grain: Optional[TimeGranularity] = None, date_part: Optional[DatePart] = None, ) -> None: @@ -46,7 +46,7 @@ def __init__( # noqa self._where_filter_location = where_filter_location self._rendered_spec_tracker = rendered_spec_tracker self._element_name = element_name - self._entity_links = tuple(entity_links) + self._group_by_links = tuple(group_by_links) self._time_grain = time_grain self._date_part = date_part @@ -62,7 +62,7 @@ def __str__(self) -> str: Important in the Jinja sandbox. """ call_parameter_set = EntityCallParameterSet( - entity_path=self._entity_links, + entity_path=self._group_by_links, entity_reference=EntityReference(self._element_name), ) resolved_spec = self._resolved_spec_lookup.checked_resolved_spec( @@ -109,6 +109,6 @@ def create(self, entity_name: str, entity_path: Sequence[str] = ()) -> WhereFilt where_filter_location=self._where_filter_location, rendered_spec_tracker=self._rendered_spec_tracker, element_name=structured_name.element_name, - entity_links=tuple(EntityReference(entity_link_name.lower()) for entity_link_name in entity_path) + group_by_links=tuple(EntityReference(group_by_link_name.lower()) for group_by_link_name in entity_path) + structured_name.entity_links, ) diff --git a/metricflow/specs/where_filter_metric.py b/metricflow/specs/where_filter_metric.py new file mode 100644 index 0000000000..a5efaa74a1 --- /dev/null +++ b/metricflow/specs/where_filter_metric.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +from typing import Sequence + +from dbt_semantic_interfaces.call_parameter_sets import ( + MetricCallParameterSet, +) +from dbt_semantic_interfaces.protocols.protocol_hint import ProtocolHint +from dbt_semantic_interfaces.protocols.query_interface import QueryInterfaceMetric, QueryInterfaceMetricFactory +from dbt_semantic_interfaces.references import LinkableElementReference, MetricReference +from typing_extensions import override + +from metricflow.errors.errors import InvalidQuerySyntax +from metricflow.query.group_by_item.filter_spec_resolution.filter_location import WhereFilterLocation +from metricflow.query.group_by_item.filter_spec_resolution.filter_spec_lookup import ( + FilterSpecResolutionLookUp, + ResolvedSpecLookUpKey, +) +from metricflow.specs.column_assoc import ColumnAssociationResolver +from metricflow.specs.rendered_spec_tracker import RenderedSpecTracker + + +class WhereFilterMetric(ProtocolHint[QueryInterfaceMetric]): + """A metric that is passed in through the where filter parameter.""" + + @override + def _implements_protocol(self) -> QueryInterfaceMetric: + return self + + def __init__( # noqa + self, + column_association_resolver: ColumnAssociationResolver, + resolved_spec_lookup: FilterSpecResolutionLookUp, + where_filter_location: WhereFilterLocation, + rendered_spec_tracker: RenderedSpecTracker, + element_name: str, + group_by: Sequence[LinkableElementReference], + ) -> None: + self._column_association_resolver = column_association_resolver + self._resolved_spec_lookup = resolved_spec_lookup + self._where_filter_location = where_filter_location + self._rendered_spec_tracker = rendered_spec_tracker + self._element_name = element_name + self._group_by = tuple(group_by) + + def descending(self, _is_descending: bool) -> QueryInterfaceMetric: + """Set the sort order for order-by.""" + raise InvalidQuerySyntax( + "Can't set descending in the where clause. Try setting descending in the order_by clause instead" + ) + + def __str__(self) -> str: + """Returns the column name. + + Important in the Jinja sandbox. + """ + call_parameter_set = MetricCallParameterSet( + group_by=self._group_by, + metric_reference=MetricReference(self._element_name), + ) + resolved_spec = self._resolved_spec_lookup.checked_resolved_spec( + ResolvedSpecLookUpKey( + filter_location=self._where_filter_location, + call_parameter_set=call_parameter_set, + ) + ) + self._rendered_spec_tracker.record_rendered_spec(resolved_spec) + column_association = self._column_association_resolver.resolve_spec(resolved_spec) + + return column_association.column_name + + +class WhereFilterMetricFactory(ProtocolHint[QueryInterfaceMetricFactory]): + """Creates a WhereFilterMetric. + + Each call to `create` adds a MetricSpec to metric_specs. + """ + + @override + def _implements_protocol(self) -> QueryInterfaceMetricFactory: + return self + + def __init__( # noqa + self, + column_association_resolver: ColumnAssociationResolver, + spec_resolution_lookup: FilterSpecResolutionLookUp, + where_filter_location: WhereFilterLocation, + rendered_spec_tracker: RenderedSpecTracker, + ): + self._column_association_resolver = column_association_resolver + self._resolved_spec_lookup = spec_resolution_lookup + self._where_filter_location = where_filter_location + self._rendered_spec_tracker = rendered_spec_tracker + + def create(self, metric_name: str, group_by: Sequence[str] = ()) -> WhereFilterMetric: + """Create a WhereFilterMetric.""" + return WhereFilterMetric( + column_association_resolver=self._column_association_resolver, + resolved_spec_lookup=self._resolved_spec_lookup, + where_filter_location=self._where_filter_location, + rendered_spec_tracker=self._rendered_spec_tracker, + element_name=metric_name, + group_by=tuple(LinkableElementReference(group_by_name.lower()) for group_by_name in group_by), + ) diff --git a/metricflow/specs/where_filter_time_dimension.py b/metricflow/specs/where_filter_time_dimension.py index e08ab6ac5f..f35e365153 100644 --- a/metricflow/specs/where_filter_time_dimension.py +++ b/metricflow/specs/where_filter_time_dimension.py @@ -35,7 +35,7 @@ def __str__(self) -> str: Important in the Jinja sandbox. """ call_parameter_set = TimeDimensionCallParameterSet( - entity_path=self._entity_links, + entity_path=self._group_by_links, time_dimension_reference=TimeDimensionReference(self._element_name), time_granularity=self._time_grain, date_part=self._date_part, @@ -95,7 +95,7 @@ def create( where_filter_location=self._where_filter_location, rendered_spec_tracker=self._rendered_spec_tracker, element_name=structured_name.element_name, - entity_links=tuple(EntityReference(entity_link_name.lower()) for entity_link_name in entity_path) + group_by_links=tuple(EntityReference(group_by_link_name.lower()) for group_by_link_name in entity_path) + structured_name.entity_links, time_grain=TimeGranularity(time_granularity_name.lower()) if time_granularity_name else None, date_part=DatePart(date_part_name.lower()) if date_part_name else None, diff --git a/metricflow/specs/where_filter_transform.py b/metricflow/specs/where_filter_transform.py index cb38f50614..2420e53838 100644 --- a/metricflow/specs/where_filter_transform.py +++ b/metricflow/specs/where_filter_transform.py @@ -14,6 +14,7 @@ from metricflow.specs.specs import LinkableSpecSet, WhereFilterSpec from metricflow.specs.where_filter_dimension import WhereFilterDimensionFactory from metricflow.specs.where_filter_entity import WhereFilterEntityFactory +from metricflow.specs.where_filter_metric import WhereFilterMetricFactory from metricflow.specs.where_filter_time_dimension import WhereFilterTimeDimensionFactory from metricflow.sql.sql_bind_parameters import SqlBindParameters @@ -75,20 +76,27 @@ def create_from_where_filter_intersection( # noqa: D where_filter_location=filter_location, rendered_spec_tracker=rendered_spec_tracker, ) - try: - # If there was an error with the template, it should have been caught while resolving the specs for - # the filters during query resolution. - where_sql = jinja2.Template(where_filter.where_sql_template, undefined=jinja2.StrictUndefined).render( - { - "Dimension": dimension_factory.create, - "TimeDimension": time_dimension_factory.create, - "Entity": entity_factory.create, - } - ) - except (jinja2.exceptions.UndefinedError, jinja2.exceptions.TemplateSyntaxError) as e: - raise RenderSqlTemplateException( - f"Error while rendering Jinja template:\n{where_filter.where_sql_template}" - ) from e + metric_factory = WhereFilterMetricFactory( + column_association_resolver=self._column_association_resolver, + spec_resolution_lookup=self._spec_resolution_lookup, + where_filter_location=filter_location, + rendered_spec_tracker=rendered_spec_tracker, + ) + # try: + # If there was an error with the template, it should have been caught while resolving the specs for + # the filters during query resolution. + where_sql = jinja2.Template(where_filter.where_sql_template, undefined=jinja2.StrictUndefined).render( + { + "Dimension": dimension_factory.create, + "TimeDimension": time_dimension_factory.create, + "Entity": entity_factory.create, + "Metric": metric_factory.create, + } + ) + # except (jinja2.exceptions.UndefinedError, jinja2.exceptions.TemplateSyntaxError) as e: + # raise RenderSqlTemplateException( + # f"Error while rendering Jinja template:\n{where_filter.where_sql_template}" + # ) from e filter_specs.append( WhereFilterSpec( where_sql=where_sql, diff --git a/metricflow/test/collection_helpers/test_pretty_print.py b/metricflow/test/collection_helpers/test_pretty_print.py index 63c7de7edb..747504cee9 100644 --- a/metricflow/test/collection_helpers/test_pretty_print.py +++ b/metricflow/test/collection_helpers/test_pretty_print.py @@ -39,7 +39,7 @@ def test_classes() -> None: # noqa: D """\ TimeDimensionSpec( element_name='metric_time', - entity_links=(), + group_by_links=(), time_granularity=DAY, date_part=None, aggregation_state=None, diff --git a/metricflow/test/dataflow/builder/test_cyclic_join.py b/metricflow/test/dataflow/builder/test_cyclic_join.py index a14908f039..9a30b0ed6d 100644 --- a/metricflow/test/dataflow/builder/test_cyclic_join.py +++ b/metricflow/test/dataflow/builder/test_cyclic_join.py @@ -41,7 +41,7 @@ def test_cyclic_join( # noqa: D dimension_specs=( DimensionSpec( element_name="capacity_latest", - entity_links=(EntityReference(element_name="cyclic_entity"),), + group_by_links=(EntityReference(element_name="cyclic_entity"),), ), ), ) diff --git a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py index 99eb4da692..bfd8c0dea2 100644 --- a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py +++ b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py @@ -44,7 +44,7 @@ def test_simple_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="is_instant", - entity_links=(EntityReference("booking"),), + group_by_links=(EntityReference("booking"),), ), ), ) @@ -77,7 +77,7 @@ def test_primary_entity_dimension( # noqa: D dimension_specs=( DimensionSpec( element_name="is_instant", - entity_links=(EntityReference(element_name="booking"),), + group_by_links=(EntityReference(element_name="booking"),), ), ), ) @@ -110,11 +110,11 @@ def test_joined_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="is_instant", - entity_links=(EntityReference("booking"),), + group_by_links=(EntityReference("booking"),), ), DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ), ) @@ -214,7 +214,7 @@ def test_multiple_metrics_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="is_instant", - entity_links=(EntityReference("booking"),), + group_by_links=(EntityReference("booking"),), ), ), time_dimension_specs=(MTD_SPEC_DAY,), @@ -248,7 +248,7 @@ def test_single_semantic_model_ratio_metrics_plan( dimension_specs=( DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ), time_dimension_specs=(MTD_SPEC_DAY,), @@ -282,7 +282,7 @@ def test_multi_semantic_model_ratio_metrics_plan( dimension_specs=( DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ), time_dimension_specs=(MTD_SPEC_DAY,), @@ -316,7 +316,7 @@ def test_multihop_join_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="customer_name", - entity_links=( + group_by_links=( EntityReference(element_name="account_id"), EntityReference(element_name="customer_id"), ), @@ -441,7 +441,7 @@ def test_multihop_join_plan_ambiguous_dim( # noqa: D dimension_specs=( DimensionSpec( element_name="home_country", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="user"), ), @@ -665,7 +665,7 @@ def test_common_semantic_model( # noqa: D metric_specs=(MetricSpec(element_name="bookings"), MetricSpec(element_name="booking_value")), dimension_specs=( DataSet.metric_time_dimension_spec(TimeGranularity.DAY), - DimensionSpec(element_name="country_latest", entity_links=(EntityReference("listing"),)), + DimensionSpec(element_name="country_latest", group_by_links=(EntityReference("listing"),)), ), ) ) @@ -855,7 +855,7 @@ def test_join_to_time_spine_with_non_metric_time( # noqa: D MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings_fill_nulls_with_0"),), time_dimension_specs=( - TimeDimensionSpec(element_name="paid_at", entity_links=(EntityReference("booking"),)), + TimeDimensionSpec(element_name="paid_at", group_by_links=(EntityReference("booking"),)), ), ) ) @@ -932,7 +932,7 @@ def test_min_max_only_categorical( dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( query_spec=MetricFlowQuerySpec( dimension_specs=( - DimensionSpec(element_name="country_latest", entity_links=(EntityReference(element_name="listing"),)), + DimensionSpec(element_name="country_latest", group_by_links=(EntityReference(element_name="listing"),)), ), min_max_only=True, ) @@ -961,7 +961,7 @@ def test_min_max_only_time( dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( query_spec=MetricFlowQuerySpec( time_dimension_specs=( - TimeDimensionSpec(element_name="paid_at", entity_links=(EntityReference("booking"),)), + TimeDimensionSpec(element_name="paid_at", group_by_links=(EntityReference("booking"),)), ), min_max_only=True, ) @@ -1036,8 +1036,8 @@ def test_metric_time_with_other_dimensions( # noqa: D MetricFlowQuerySpec( time_dimension_specs=(MTD_SPEC_DAY, MTD_SPEC_MONTH), dimension_specs=( - DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference("user"),)), - DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference("listing"),)), + DimensionSpec(element_name="home_state_latest", group_by_links=(EntityReference("user"),)), + DimensionSpec(element_name="is_lux_latest", group_by_links=(EntityReference("listing"),)), ), ) ) @@ -1064,7 +1064,9 @@ def test_dimensions_with_time_constraint( # noqa: D dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( MetricFlowQuerySpec( time_dimension_specs=(MTD_SPEC_MONTH,), - dimension_specs=(DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference("listing"),)),), + dimension_specs=( + DimensionSpec(element_name="is_lux_latest", group_by_links=(EntityReference("listing"),)), + ), time_range_constraint=TimeRangeConstraint( start_time=datetime.datetime(2020, 1, 1), end_time=datetime.datetime(2020, 1, 3) ), @@ -1096,7 +1098,7 @@ def test_min_max_only_time_year( time_dimension_specs=( TimeDimensionSpec( element_name="paid_at", - entity_links=(EntityReference("booking"),), + group_by_links=(EntityReference("booking"),), time_granularity=TimeGranularity.YEAR, ), ), @@ -1265,3 +1267,29 @@ def test_offset_to_grain_metric_filter_and_query_have_different_granularities( mf_test_session_state=mf_test_session_state, dag_graph=dataflow_plan, ) + + +def test_metric_in_where_filter( + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, + query_parser: MetricFlowQueryParser, + create_source_tables: bool, +) -> None: + query_spec = query_parser.parse_and_validate_query( + metric_names=("active_listings",), + ) + dataflow_plan = dataflow_plan_builder.build_plan(query_spec) + + assert_plan_snapshot_text_equal( + request=request, + mf_test_session_state=mf_test_session_state, + plan=dataflow_plan, + plan_snapshot_text=dataflow_plan.text_structure(), + ) + + display_graph_if_requested( + request=request, + mf_test_session_state=mf_test_session_state, + dag_graph=dataflow_plan, + ) diff --git a/metricflow/test/dataflow/builder/test_node_evaluator.py b/metricflow/test/dataflow/builder/test_node_evaluator.py index ec29def401..25ca57d9e4 100644 --- a/metricflow/test/dataflow/builder/test_node_evaluator.py +++ b/metricflow/test/dataflow/builder/test_node_evaluator.py @@ -118,7 +118,7 @@ def test_node_evaluator_with_unjoinable_specs( # noqa: D required_linkable_specs=[ DimensionSpec( element_name="verification_type", - entity_links=(EntityReference(element_name="verification"),), + group_by_links=(EntityReference(element_name="verification"),), ) ], left_node=bookings_source_node, @@ -131,7 +131,7 @@ def test_node_evaluator_with_unjoinable_specs( # noqa: D unjoinable_linkable_specs=( DimensionSpec( element_name="verification_type", - entity_links=(EntityReference(element_name="verification"),), + group_by_links=(EntityReference(element_name="verification"),), ), ), ) @@ -146,12 +146,14 @@ def test_node_evaluator_with_local_spec( # noqa: D "bookings_source" ] evaluation = node_evaluator.evaluate_node( - required_linkable_specs=[DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),))], + required_linkable_specs=[ + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)) + ], left_node=bookings_source_node, default_join_type=SqlJoinType.LEFT_OUTER, ) assert evaluation == LinkableInstanceSatisfiabilityEvaluation( - local_linkable_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),), + local_linkable_specs=(DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)),), joinable_linkable_specs=(), join_recipes=(), unjoinable_linkable_specs=(), @@ -168,7 +170,7 @@ def test_node_evaluator_with_local_spec_using_primary_entity( # noqa: D ] evaluation = node_evaluator.evaluate_node( required_linkable_specs=[ - DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference(element_name="user"),)) + DimensionSpec(element_name="home_state_latest", group_by_links=(EntityReference(element_name="user"),)) ], left_node=bookings_source_node, default_join_type=SqlJoinType.LEFT_OUTER, @@ -179,7 +181,7 @@ def test_node_evaluator_with_local_spec_using_primary_entity( # noqa: D local_linkable_specs=( DimensionSpec( element_name="home_state_latest", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ), joinable_linkable_specs=(), @@ -199,14 +201,14 @@ def test_node_evaluator_with_joined_spec( # noqa: D ] evaluation = node_evaluator.evaluate_node( required_linkable_specs=[ - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)), DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), DimensionSpec( element_name="capacity_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ], left_node=bookings_source_node, @@ -214,15 +216,15 @@ def test_node_evaluator_with_joined_spec( # noqa: D ) assert evaluation == LinkableInstanceSatisfiabilityEvaluation( - local_linkable_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),), + local_linkable_specs=(DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)),), joinable_linkable_specs=( DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), DimensionSpec( element_name="capacity_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ), join_recipes=( @@ -234,11 +236,11 @@ def test_node_evaluator_with_joined_spec( # noqa: D satisfiable_linkable_specs=[ DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), DimensionSpec( element_name="capacity_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ], join_on_partition_dimensions=(), @@ -262,7 +264,7 @@ def test_node_evaluator_with_joined_spec_on_unique_id( # noqa: D required_linkable_specs=[ DimensionSpec( element_name="company_name", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ], left_node=listings_node, @@ -274,7 +276,7 @@ def test_node_evaluator_with_joined_spec_on_unique_id( # noqa: D joinable_linkable_specs=( DimensionSpec( element_name="company_name", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ), join_recipes=( @@ -286,7 +288,7 @@ def test_node_evaluator_with_joined_spec_on_unique_id( # noqa: D satisfiable_linkable_specs=[ DimensionSpec( element_name="company_name", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ], join_on_partition_dimensions=(), @@ -310,11 +312,11 @@ def test_node_evaluator_with_multiple_joined_specs( # noqa: D required_linkable_specs=[ DimensionSpec( element_name="home_state_latest", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), EntitySpec( element_name="user", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ], left_node=views_source, @@ -326,11 +328,11 @@ def test_node_evaluator_with_multiple_joined_specs( # noqa: D joinable_linkable_specs=( EntitySpec( element_name="user", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), DimensionSpec( element_name="home_state_latest", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ), join_recipes=( @@ -342,7 +344,7 @@ def test_node_evaluator_with_multiple_joined_specs( # noqa: D satisfiable_linkable_specs=[ EntitySpec( element_name="user", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ) ], join_on_partition_dimensions=(), @@ -357,7 +359,7 @@ def test_node_evaluator_with_multiple_joined_specs( # noqa: D satisfiable_linkable_specs=[ DimensionSpec( element_name="home_state_latest", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ) ], join_on_partition_dimensions=(), @@ -381,7 +383,7 @@ def test_node_evaluator_with_multihop_joined_spec( # noqa: D linkable_specs = [ DimensionSpec( element_name="customer_name", - entity_links=( + group_by_links=( EntityReference(element_name="account_id"), EntityReference(element_name="customer_id"), ), @@ -405,7 +407,7 @@ def test_node_evaluator_with_multihop_joined_spec( # noqa: D joinable_linkable_specs=( DimensionSpec( element_name="customer_name", - entity_links=( + group_by_links=( EntityReference(element_name="account_id"), EntityReference(element_name="customer_id"), ), @@ -418,7 +420,7 @@ def test_node_evaluator_with_multihop_joined_spec( # noqa: D satisfiable_linkable_specs=[ DimensionSpec( element_name="customer_name", - entity_links=( + group_by_links=( EntityReference(element_name="account_id"), EntityReference(element_name="customer_id"), ), @@ -428,11 +430,11 @@ def test_node_evaluator_with_multihop_joined_spec( # noqa: D join_on_partition_time_dimensions=( PartitionTimeDimensionJoinDescription( start_node_time_dimension_spec=TimeDimensionSpec( - element_name="ds_partitioned", entity_links=(), time_granularity=TimeGranularity.DAY + element_name="ds_partitioned", group_by_links=(), time_granularity=TimeGranularity.DAY ), node_to_join_time_dimension_spec=TimeDimensionSpec( element_name="ds_partitioned", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, ), ), @@ -453,7 +455,7 @@ def test_node_evaluator_with_partition_joined_spec( # noqa: D required_linkable_specs=[ DimensionSpec( element_name="home_state", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ], left_node=mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ @@ -467,7 +469,7 @@ def test_node_evaluator_with_partition_joined_spec( # noqa: D joinable_linkable_specs=( DimensionSpec( element_name="home_state", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ), join_recipes=( @@ -479,7 +481,7 @@ def test_node_evaluator_with_partition_joined_spec( # noqa: D satisfiable_linkable_specs=[ DimensionSpec( element_name="home_state", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ], join_on_partition_dimensions=(), @@ -487,11 +489,11 @@ def test_node_evaluator_with_partition_joined_spec( # noqa: D PartitionTimeDimensionJoinDescription( start_node_time_dimension_spec=TimeDimensionSpec( element_name="ds_partitioned", - entity_links=(), + group_by_links=(), ), node_to_join_time_dimension_spec=TimeDimensionSpec( element_name="ds_partitioned", - entity_links=(), + group_by_links=(), ), ), ), @@ -526,7 +528,7 @@ def test_node_evaluator_with_scd_target( required_linkable_specs=[ DimensionSpec( element_name="is_lux", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ) ], left_node=mf_engine_test_fixture_mapping[SemanticManifestSetup.SCD_MANIFEST].read_node_mapping[ @@ -540,7 +542,7 @@ def test_node_evaluator_with_scd_target( joinable_linkable_specs=( DimensionSpec( element_name="is_lux", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ), join_recipes=( @@ -552,14 +554,14 @@ def test_node_evaluator_with_scd_target( satisfiable_linkable_specs=[ DimensionSpec( element_name="is_lux", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ], join_on_partition_dimensions=(), join_on_partition_time_dimensions=(), validity_window=ValidityWindowJoinDescription( - window_start_dimension=TimeDimensionSpec(element_name="window_start", entity_links=()), - window_end_dimension=TimeDimensionSpec(element_name="window_end", entity_links=()), + window_start_dimension=TimeDimensionSpec(element_name="window_start", group_by_links=()), + window_end_dimension=TimeDimensionSpec(element_name="window_end", group_by_links=()), ), join_type=SqlJoinType.LEFT_OUTER, ), @@ -597,7 +599,7 @@ def test_node_evaluator_with_multi_hop_scd_target( joinable_linkable_specs=( DimensionSpec( element_name="is_confirmed_lux", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="lux_listing"), ), @@ -610,7 +612,7 @@ def test_node_evaluator_with_multi_hop_scd_target( satisfiable_linkable_specs=[ DimensionSpec( element_name="is_confirmed_lux", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="lux_listing"), ), @@ -620,10 +622,10 @@ def test_node_evaluator_with_multi_hop_scd_target( join_on_partition_time_dimensions=(), validity_window=ValidityWindowJoinDescription( window_start_dimension=TimeDimensionSpec( - element_name="window_start", entity_links=(EntityReference(element_name="lux_listing"),) + element_name="window_start", group_by_links=(EntityReference(element_name="lux_listing"),) ), window_end_dimension=TimeDimensionSpec( - element_name="window_end", entity_links=(EntityReference(element_name="lux_listing"),) + element_name="window_end", group_by_links=(EntityReference(element_name="lux_listing"),) ), ), join_type=SqlJoinType.LEFT_OUTER, @@ -662,7 +664,7 @@ def test_node_evaluator_with_multi_hop_through_scd( joinable_linkable_specs=( DimensionSpec( element_name="home_state_latest", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="user"), ), @@ -675,7 +677,7 @@ def test_node_evaluator_with_multi_hop_through_scd( satisfiable_linkable_specs=[ DimensionSpec( element_name="home_state_latest", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="user"), ), @@ -684,8 +686,8 @@ def test_node_evaluator_with_multi_hop_through_scd( join_on_partition_dimensions=(), join_on_partition_time_dimensions=(), validity_window=ValidityWindowJoinDescription( - window_start_dimension=TimeDimensionSpec(element_name="window_start", entity_links=()), - window_end_dimension=TimeDimensionSpec(element_name="window_end", entity_links=()), + window_start_dimension=TimeDimensionSpec(element_name="window_start", group_by_links=()), + window_end_dimension=TimeDimensionSpec(element_name="window_end", group_by_links=()), ), join_type=SqlJoinType.LEFT_OUTER, ), @@ -724,7 +726,7 @@ def test_node_evaluator_with_invalid_multi_hop_scd( unjoinable_linkable_specs=( DimensionSpec( element_name="account_type", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="user"), ), diff --git a/metricflow/test/dataflow/optimizer/source_scan/test_source_scan_optimizer.py b/metricflow/test/dataflow/optimizer/source_scan/test_source_scan_optimizer.py index 8bda90dfe1..de05ba9c0f 100644 --- a/metricflow/test/dataflow/optimizer/source_scan/test_source_scan_optimizer.py +++ b/metricflow/test/dataflow/optimizer/source_scan/test_source_scan_optimizer.py @@ -174,7 +174,7 @@ def test_2_metrics_from_1_semantic_model( # noqa: D metric_specs=(MetricSpec(element_name="bookings"), MetricSpec(element_name="booking_value")), dimension_specs=( DataSet.metric_time_dimension_spec(TimeGranularity.DAY), - DimensionSpec(element_name="country_latest", entity_links=(EntityReference("listing"),)), + DimensionSpec(element_name="country_latest", group_by_links=(EntityReference("listing"),)), ), ), expected_num_sources_in_unoptimized=4, diff --git a/metricflow/test/examples/test_node_sql.py b/metricflow/test/examples/test_node_sql.py index e358fb0f89..2dfe2c10aa 100644 --- a/metricflow/test/examples/test_node_sql.py +++ b/metricflow/test/examples/test_node_sql.py @@ -71,7 +71,7 @@ def test_view_sql_generated_at_a_node( parent_node=metric_time_node, include_specs=InstanceSpecSet( time_dimension_specs=( - TimeDimensionSpec(element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.DAY), + TimeDimensionSpec(element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.DAY), ), ), ) diff --git a/metricflow/test/fixtures/manifest_fixtures.py b/metricflow/test/fixtures/manifest_fixtures.py index c49e1ac81e..b5c186a7be 100644 --- a/metricflow/test/fixtures/manifest_fixtures.py +++ b/metricflow/test/fixtures/manifest_fixtures.py @@ -224,10 +224,10 @@ def mf_engine_test_fixture_mapping( fixture_mapping: Dict[SemanticManifestSetup, MetricFlowEngineTestFixture] = {} for semantic_manifest_setup in SemanticManifestSetup: with patch_id_generators_helper(semantic_manifest_setup.id_number_space.start_value): - try: - build_result = load_semantic_manifest(semantic_manifest_setup.semantic_manifest_name, template_mapping) - except Exception as e: - raise RuntimeError(f"Error while loading semantic manifest: {semantic_manifest_setup}") from e + # try: + build_result = load_semantic_manifest(semantic_manifest_setup.semantic_manifest_name, template_mapping) + # except Exception as e: + # raise RuntimeError(f"Error while loading semantic manifest: {semantic_manifest_setup}") from e fixture_mapping[semantic_manifest_setup] = MetricFlowEngineTestFixture.from_parameters( sql_client, build_result.semantic_manifest diff --git a/metricflow/test/fixtures/semantic_manifest_yamls/simple_manifest/metrics.yaml b/metricflow/test/fixtures/semantic_manifest_yamls/simple_manifest/metrics.yaml index 3648c71ec6..b63b75a6c3 100644 --- a/metricflow/test/fixtures/semantic_manifest_yamls/simple_manifest/metrics.yaml +++ b/metricflow/test/fixtures/semantic_manifest_yamls/simple_manifest/metrics.yaml @@ -753,3 +753,11 @@ metric: window: 7 days entity: user calculation: conversion_rate +--- +metric: + name: active_listings + description: Listings with at least 2 bookings + type: simple + type_params: + measure: listings + filter: "{{ Metric('bookings', ['listing']) }} > 2" diff --git a/metricflow/test/integration/test_cases/itest_metrics.yaml b/metricflow/test/integration/test_cases/itest_metrics.yaml index 266148b72b..0e4920d26b 100644 --- a/metricflow/test/integration/test_cases/itest_metrics.yaml +++ b/metricflow/test/integration/test_cases/itest_metrics.yaml @@ -1889,3 +1889,11 @@ integration_test: ds ) b ON {{ render_date_sub("a", "ds", 5, TimeGranularity.DAY) }} = b.ds +--- +integration_test: + name: active_listings + description: Query a metric that has a filter containing a metric + model: SIMPLE_MODEL + metrics: ["active_listings"] + check_query: | + SELECT 1 diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index 1eae8106c7..21dece0161 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -230,7 +230,8 @@ def filter_not_supported_features( @pytest.mark.parametrize( "name", - CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, + # CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, + ["itest_metrics.yaml/active_listings"], ids=lambda name: f"name={name}", ) def test_case( @@ -301,35 +302,38 @@ def test_case( limit=case.limit, time_constraint_start=parser.parse(case.time_constraint[0]) if case.time_constraint else None, time_constraint_end=parser.parse(case.time_constraint[1]) if case.time_constraint else None, - where_constraint=jinja2.Template( - case.where_filter, - undefined=jinja2.StrictUndefined, - ).render( - source_schema=mf_test_session_state.mf_source_schema, - render_time_constraint=check_query_helpers.render_time_constraint, - render_between_time_constraint=check_query_helpers.render_between_time_constraint, - TimeGranularity=TimeGranularity, - DatePart=DatePart, - render_date_sub=check_query_helpers.render_date_sub, - render_date_trunc=check_query_helpers.render_date_trunc, - render_extract=check_query_helpers.render_extract, - render_percentile_expr=check_query_helpers.render_percentile_expr, - mf_time_spine_source=semantic_manifest_lookup.time_spine_source.spine_table.sql, - double_data_type_name=check_query_helpers.double_data_type_name, - render_dimension_template=check_query_helpers.render_dimension_template, - render_entity_template=check_query_helpers.render_entity_template, - render_time_dimension_template=check_query_helpers.render_time_dimension_template, - generate_random_uuid=check_query_helpers.generate_random_uuid, - cast_to_ts=check_query_helpers.cast_to_ts, - ) - if case.where_filter - else None, + where_constraint=( + jinja2.Template( + case.where_filter, + undefined=jinja2.StrictUndefined, + ).render( + source_schema=mf_test_session_state.mf_source_schema, + render_time_constraint=check_query_helpers.render_time_constraint, + render_between_time_constraint=check_query_helpers.render_between_time_constraint, + TimeGranularity=TimeGranularity, + DatePart=DatePart, + render_date_sub=check_query_helpers.render_date_sub, + render_date_trunc=check_query_helpers.render_date_trunc, + render_extract=check_query_helpers.render_extract, + render_percentile_expr=check_query_helpers.render_percentile_expr, + mf_time_spine_source=semantic_manifest_lookup.time_spine_source.spine_table.sql, + double_data_type_name=check_query_helpers.double_data_type_name, + render_dimension_template=check_query_helpers.render_dimension_template, + render_entity_template=check_query_helpers.render_entity_template, + render_time_dimension_template=check_query_helpers.render_time_dimension_template, + generate_random_uuid=check_query_helpers.generate_random_uuid, + cast_to_ts=check_query_helpers.cast_to_ts, + ) + if case.where_filter + else None + ), order_by_names=case.order_bys, min_max_only=case.min_max_only, ) ) actual = query_result.result_df + assert 0, query_result.sql expected = sql_client.query( jinja2.Template( diff --git a/metricflow/test/model/semantics/test_linkable_spec_resolver.py b/metricflow/test/model/semantics/test_linkable_spec_resolver.py index 42d249fbf6..89134d4457 100644 --- a/metricflow/test/model/semantics/test_linkable_spec_resolver.py +++ b/metricflow/test/model/semantics/test_linkable_spec_resolver.py @@ -25,7 +25,7 @@ def simple_model_spec_resolver( # noqa: D return ValidLinkableSpecResolver( semantic_manifest=simple_semantic_manifest_lookup.semantic_manifest, semantic_model_lookup=simple_semantic_manifest_lookup.semantic_model_lookup, - max_entity_links=MAX_JOIN_HOPS, + max_group_by_links=MAX_JOIN_HOPS, ) @@ -36,7 +36,7 @@ def cyclic_join_manifest_spec_resolver( # noqa: D return ValidLinkableSpecResolver( semantic_manifest=cyclic_join_semantic_manifest_lookup.semantic_manifest, semantic_model_lookup=cyclic_join_semantic_manifest_lookup.semantic_model_lookup, - max_entity_links=MAX_JOIN_HOPS, + max_group_by_links=MAX_JOIN_HOPS, ) diff --git a/metricflow/test/model/test_where_filter_spec.py b/metricflow/test/model/test_where_filter_spec.py index f0e978d013..3ee676a10f 100644 --- a/metricflow/test/model/test_where_filter_spec.py +++ b/metricflow/test/model/test_where_filter_spec.py @@ -86,7 +86,7 @@ def test_dimension_in_filter( # noqa: D entity_path=(EntityReference("listing"),), dimension_reference=DimensionReference("country_latest"), ), - resolved_spec=DimensionSpec(element_name="country_latest", entity_links=(EntityReference("listing"),)), + resolved_spec=DimensionSpec(element_name="country_latest", group_by_links=(EntityReference("listing"),)), ), ).create_from_where_filter_intersection( filter_location=EXAMPLE_FILTER_LOCATION, @@ -97,7 +97,7 @@ def test_dimension_in_filter( # noqa: D assert where_filter_spec.where_sql == "listing__country_latest = 'US'" assert where_filter_spec.linkable_spec_set == LinkableSpecSet( dimension_specs=( - DimensionSpec(element_name="country_latest", entity_links=(EntityReference(element_name="listing"),)), + DimensionSpec(element_name="country_latest", group_by_links=(EntityReference(element_name="listing"),)), ), time_dimension_specs=(), entity_specs=(), @@ -117,7 +117,7 @@ def test_dimension_in_filter_with_grain( # noqa: D ), resolved_spec=TimeDimensionSpec( element_name="country_latest", - entity_links=(EntityReference("listing"),), + group_by_links=(EntityReference("listing"),), time_granularity=TimeGranularity.WEEK, ), ), @@ -135,7 +135,7 @@ def test_dimension_in_filter_with_grain( # noqa: D time_dimension_specs=( TimeDimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), time_granularity=TimeGranularity.WEEK, ), ), @@ -156,7 +156,7 @@ def test_time_dimension_in_filter( # noqa: D ), resolved_spec=TimeDimensionSpec( element_name="created_at", - entity_links=(EntityReference("listing"),), + group_by_links=(EntityReference("listing"),), time_granularity=TimeGranularity.MONTH, ), ), @@ -174,7 +174,7 @@ def test_time_dimension_in_filter( # noqa: D time_dimension_specs=( TimeDimensionSpec( element_name="created_at", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), time_granularity=TimeGranularity.MONTH, ), ), @@ -195,7 +195,7 @@ def test_date_part_in_filter( # noqa: D ), resolved_spec=TimeDimensionSpec( element_name="metric_time", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, date_part=DatePart.YEAR, ), @@ -214,7 +214,7 @@ def test_date_part_in_filter( # noqa: D time_dimension_specs=( TimeDimensionSpec( element_name="metric_time", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, date_part=DatePart.YEAR, ), @@ -244,7 +244,7 @@ def resolved_spec_lookup() -> FilterSpecResolutionLookUp: ), resolved_spec=TimeDimensionSpec( element_name="metric_time", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.WEEK, date_part=DatePart.YEAR, ), @@ -283,7 +283,7 @@ def test_date_part_and_grain_in_filter( # noqa: D time_dimension_specs=( TimeDimensionSpec( element_name="metric_time", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.WEEK, date_part=DatePart.YEAR, ), @@ -319,7 +319,7 @@ def test_date_part_less_than_grain_in_filter( # noqa: D time_dimension_specs=( TimeDimensionSpec( element_name="metric_time", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.WEEK, date_part=DatePart.DAY, ), @@ -343,7 +343,7 @@ def test_entity_in_filter( # noqa: D entity_path=(EntityReference("listing"),), entity_reference=EntityReference("user"), ), - resolved_spec=EntitySpec(element_name="user", entity_links=(EntityReference("listing"),)), + resolved_spec=EntitySpec(element_name="user", group_by_links=(EntityReference("listing"),)), ), ).create_from_where_filter(filter_location=EXAMPLE_FILTER_LOCATION, where_filter=where_filter) @@ -351,7 +351,7 @@ def test_entity_in_filter( # noqa: D assert where_filter_spec.linkable_spec_set == LinkableSpecSet( dimension_specs=(), time_dimension_specs=(), - entity_specs=(EntitySpec(element_name="user", entity_links=(EntityReference(element_name="listing"),)),), + entity_specs=(EntitySpec(element_name="user", group_by_links=(EntityReference(element_name="listing"),)),), ) @@ -377,7 +377,7 @@ def get_spec(dimension: str) -> WhereFilterSpec: where_filter_intersection=PydanticWhereFilterIntersection(where_filters=[where_filter]), resolved_spec=TimeDimensionSpec( element_name=METRIC_TIME_ELEMENT_NAME, - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.WEEK, date_part=DatePart.YEAR, ), diff --git a/metricflow/test/naming/conftest.py b/metricflow/test/naming/conftest.py index 546f25c9a0..826e67880f 100644 --- a/metricflow/test/naming/conftest.py +++ b/metricflow/test/naming/conftest.py @@ -20,33 +20,33 @@ def specs() -> Sequence[LinkableInstanceSpec]: # noqa: D MTD_SPEC_YEAR, TimeDimensionSpec( element_name="creation_time", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.MONTH, date_part=DatePart.DAY, ), # Dimensions DimensionSpec( element_name="country", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="user"), ), ), DimensionSpec( element_name="country", - entity_links=( + group_by_links=( EntityReference(element_name="booking"), EntityReference(element_name="listing"), ), ), - DimensionSpec(element_name="is_instant", entity_links=(EntityReference(element_name="booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference(element_name="booking"),)), # Entities EntitySpec( element_name="listing", - entity_links=(EntityReference(element_name="booking"),), + group_by_links=(EntityReference(element_name="booking"),), ), EntitySpec( element_name="user", - entity_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), + group_by_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), ), ) diff --git a/metricflow/test/naming/test_dunder_naming_scheme.py b/metricflow/test/naming/test_dunder_naming_scheme.py index d6d716c975..eaa3b5ec2a 100644 --- a/metricflow/test/naming/test_dunder_naming_scheme.py +++ b/metricflow/test/naming/test_dunder_naming_scheme.py @@ -22,7 +22,7 @@ def test_input_str(dunder_naming_scheme: DunderNamingScheme) -> None: # noqa: D dunder_naming_scheme.input_str( DimensionSpec( element_name="country", - entity_links=( + group_by_links=( EntityReference(element_name="booking"), EntityReference(element_name="listing"), ), @@ -35,7 +35,7 @@ def test_input_str(dunder_naming_scheme: DunderNamingScheme) -> None: # noqa: D dunder_naming_scheme.input_str( TimeDimensionSpec( element_name="creation_time", - entity_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), + group_by_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), time_granularity=TimeGranularity.MONTH, date_part=DatePart.DAY, ) @@ -47,7 +47,7 @@ def test_input_str(dunder_naming_scheme: DunderNamingScheme) -> None: # noqa: D dunder_naming_scheme.input_str( TimeDimensionSpec( element_name="creation_time", - entity_links=( + group_by_links=( EntityReference(element_name="booking"), EntityReference(element_name="listing"), ), @@ -61,7 +61,7 @@ def test_input_str(dunder_naming_scheme: DunderNamingScheme) -> None: # noqa: D dunder_naming_scheme.input_str( EntitySpec( element_name="user", - entity_links=( + group_by_links=( EntityReference(element_name="booking"), EntityReference(element_name="listing"), ), @@ -86,7 +86,7 @@ def test_spec_pattern( # noqa: D assert tuple(dunder_naming_scheme.spec_pattern("listing__user__country").match(specs)) == ( DimensionSpec( element_name="country", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="user"), ), @@ -102,7 +102,7 @@ def test_spec_pattern( # noqa: D assert tuple(dunder_naming_scheme.spec_pattern("booking__listing__user").match(specs)) == ( EntitySpec( element_name="user", - entity_links=( + group_by_links=( EntityReference(element_name="booking"), EntityReference(element_name="listing"), ), diff --git a/metricflow/test/naming/test_metric_name_scheme.py b/metricflow/test/naming/test_metric_name_scheme.py index 7084753408..d0c0e72bdd 100644 --- a/metricflow/test/naming/test_metric_name_scheme.py +++ b/metricflow/test/naming/test_metric_name_scheme.py @@ -28,7 +28,7 @@ def test_spec_pattern(metric_naming_scheme: MetricNamingScheme) -> None: # noqa MetricSpec(element_name="metric_0"), MetricSpec(element_name="metric_1"), # Shouldn't happen in practice, but checks to see that only metric specs are matched. - DimensionSpec(element_name="metric_0", entity_links=()), + DimensionSpec(element_name="metric_0", group_by_links=()), ) assert (MetricSpec(element_name="metric_0"),) == tuple(spec_pattern.match(specs)) diff --git a/metricflow/test/naming/test_object_builder_naming_scheme.py b/metricflow/test/naming/test_object_builder_naming_scheme.py index ab52412cd1..ee79232a1d 100644 --- a/metricflow/test/naming/test_object_builder_naming_scheme.py +++ b/metricflow/test/naming/test_object_builder_naming_scheme.py @@ -22,7 +22,7 @@ def test_input_str(object_builder_naming_scheme: ObjectBuilderNamingScheme) -> N object_builder_naming_scheme.input_str( DimensionSpec( element_name="country", - entity_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), + group_by_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), ) ) == "Dimension('listing__country', entity_path=['booking'])" @@ -31,7 +31,7 @@ def test_input_str(object_builder_naming_scheme: ObjectBuilderNamingScheme) -> N assert object_builder_naming_scheme.input_str( TimeDimensionSpec( element_name="creation_time", - entity_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), + group_by_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), time_granularity=TimeGranularity.MONTH, date_part=DatePart.DAY, ) @@ -41,7 +41,7 @@ def test_input_str(object_builder_naming_scheme: ObjectBuilderNamingScheme) -> N object_builder_naming_scheme.input_str( EntitySpec( element_name="user", - entity_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), + group_by_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), ) ) == "Entity('listing__user', entity_path=['booking'])" @@ -72,7 +72,7 @@ def test_spec_pattern( # noqa: D ) == ( DimensionSpec( element_name="country", - entity_links=( + group_by_links=( EntityReference(element_name="booking"), EntityReference(element_name="listing"), ), @@ -87,7 +87,7 @@ def test_spec_pattern( # noqa: D ) == ( TimeDimensionSpec( element_name="creation_time", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.MONTH, date_part=DatePart.DAY, ), @@ -104,6 +104,6 @@ def test_spec_pattern( # noqa: D ) == ( EntitySpec( element_name="user", - entity_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), + group_by_links=(EntityReference(element_name="booking"), EntityReference(element_name="listing")), ), ) diff --git a/metricflow/test/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py b/metricflow/test/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py index 8efb771769..b50a62d8fc 100644 --- a/metricflow/test/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py +++ b/metricflow/test/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py @@ -29,7 +29,7 @@ def test_conversion_rate( """Test conversion metric data flow plan rendering.""" dimension_spec = DimensionSpec( element_name="referrer_id", - entity_links=(EntityReference(element_name="visit"),), + group_by_links=(EntityReference(element_name="visit"),), ) metric_spec = MetricSpec(element_name="visit_buy_conversion_rate") @@ -60,10 +60,10 @@ def test_conversion_rate_with_window( """Test conversion metric with a window data flow plan rendering.""" dimension_spec = DimensionSpec( element_name="referrer_id", - entity_links=(EntityReference(element_name="visit"),), + group_by_links=(EntityReference(element_name="visit"),), ) metric_time_spec = TimeDimensionSpec( - element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.DAY + element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.DAY ) metric_spec = MetricSpec(element_name="visit_buy_conversion_rate_7days") @@ -148,10 +148,10 @@ def test_conversion_rate_with_constant_properties( metric_spec = MetricSpec(element_name="visit_buy_conversion_rate_by_session") dimension_spec = DimensionSpec( element_name="referrer_id", - entity_links=(EntityReference(element_name="visit"),), + group_by_links=(EntityReference(element_name="visit"),), ) metric_time_spec = TimeDimensionSpec( - element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.DAY + element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.DAY ) dataflow_plan = dataflow_plan_builder.build_plan( query_spec=MetricFlowQuerySpec( @@ -181,7 +181,7 @@ def test_conversion_metric_join_to_timespine_and_fill_nulls_with_0( """Test conversion metric that joins to time spine and fills nulls with 0.""" metric_spec = MetricSpec(element_name="visit_buy_conversion_rate_7days_fill_nulls_with_0") metric_time_spec = TimeDimensionSpec( - element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.DAY + element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.DAY ) dataflow_plan = dataflow_plan_builder.build_plan( query_spec=MetricFlowQuerySpec( diff --git a/metricflow/test/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py b/metricflow/test/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py index 9392944529..309290f150 100644 --- a/metricflow/test/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py +++ b/metricflow/test/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py @@ -25,8 +25,8 @@ def test_dimensions_requiring_join( ) -> None: """Tests querying 2 dimensions that require a join.""" dimension_specs = ( - DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference(element_name="user"),)), - DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference(element_name="listing"),)), + DimensionSpec(element_name="home_state_latest", group_by_links=(EntityReference(element_name="user"),)), + DimensionSpec(element_name="is_lux_latest", group_by_links=(EntityReference(element_name="listing"),)), ) dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( query_spec=MetricFlowQuerySpec(dimension_specs=dimension_specs) diff --git a/metricflow/test/plan_conversion/instance_converters/test_create_validity_window_join_description.py b/metricflow/test/plan_conversion/instance_converters/test_create_validity_window_join_description.py index bb65ced30c..a7025f3afc 100644 --- a/metricflow/test/plan_conversion/instance_converters/test_create_validity_window_join_description.py +++ b/metricflow/test/plan_conversion/instance_converters/test_create_validity_window_join_description.py @@ -42,10 +42,10 @@ def test_validity_window_conversion( window_start_dimension=TimeDimensionSpec( element_name="window_start", time_granularity=TimeGranularity.DAY, - entity_links=(), + group_by_links=(), ), window_end_dimension=TimeDimensionSpec( - element_name="window_end", time_granularity=TimeGranularity.DAY, entity_links=() + element_name="window_end", time_granularity=TimeGranularity.DAY, group_by_links=() ), ) diff --git a/metricflow/test/plan_conversion/test_dataflow_to_execution.py b/metricflow/test/plan_conversion/test_dataflow_to_execution.py index 6be51ffa78..f358d13691 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_execution.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_execution.py @@ -49,11 +49,11 @@ def test_joined_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="is_instant", - entity_links=(), + group_by_links=(), ), DimensionSpec( element_name="country_latest", - entity_links=(EntityReference("listing"),), + group_by_links=(EntityReference("listing"),), ), ), ) @@ -91,7 +91,7 @@ def test_small_combined_metrics_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="is_instant", - entity_links=(), + group_by_links=(), ), ), ) @@ -129,10 +129,10 @@ def test_combined_metrics_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="is_instant", - entity_links=(), + group_by_links=(), ), ), - time_dimension_specs=(TimeDimensionSpec(element_name="ds", entity_links=()),), + time_dimension_specs=(TimeDimensionSpec(element_name="ds", group_by_links=()),), ) ) @@ -165,7 +165,7 @@ def test_multihop_joined_plan( # noqa: D dimension_specs=( DimensionSpec( element_name="customer_name", - entity_links=( + group_by_links=( EntityReference(element_name="account_id"), EntityReference(element_name="customer_id"), ), diff --git a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py index 3dfe85389f..eb75d36295 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py @@ -182,7 +182,7 @@ def test_filter_with_where_constraint_node( # noqa: D "bookings_source" ] - ds_spec = TimeDimensionSpec(element_name="ds", entity_links=(), time_granularity=TimeGranularity.DAY) + ds_spec = TimeDimensionSpec(element_name="ds", group_by_links=(), time_granularity=TimeGranularity.DAY) filter_node = FilterElementsNode( parent_node=source_node, include_specs=InstanceSpecSet(measure_specs=(measure_spec,), time_dimension_specs=(ds_spec,)), @@ -196,7 +196,7 @@ def test_filter_with_where_constraint_node( # noqa: D time_dimension_specs=( TimeDimensionSpec( element_name="ds", - entity_links=(EntityReference(element_name="booking"),), + group_by_links=(EntityReference(element_name="booking"),), time_granularity=TimeGranularity.DAY, ), ) @@ -287,7 +287,7 @@ def test_single_join_node( # noqa: D dimension_spec = DimensionSpec( element_name="country_latest", - entity_links=(EntityReference("listing"),), + group_by_links=(EntityReference("listing"),), ) dimension_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "listings_latest" @@ -345,7 +345,7 @@ def test_multi_join_node( dimension_spec = DimensionSpec( element_name="country_latest", - entity_links=(), + group_by_links=(), ) dimension_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "listings_latest" @@ -414,7 +414,7 @@ def test_compute_metrics_node( dimension_spec = DimensionSpec( element_name="country_latest", - entity_links=(), + group_by_links=(), ) dimension_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "listings_latest" @@ -480,7 +480,7 @@ def test_compute_metrics_node_simple_expr( dimension_spec = DimensionSpec( element_name="country_latest", - entity_links=(), + group_by_links=(), ) dimension_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "listings_latest" @@ -550,7 +550,7 @@ def test_join_to_time_spine_node_without_offset( # noqa: D entity_spec = LinklessEntitySpec.from_element_name(element_name="listing") metric_input_measure_specs = (MetricInputMeasureSpec(measure_spec=measure_spec),) metric_time_spec = TimeDimensionSpec( - element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.DAY + element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.DAY ) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "bookings_source" @@ -618,7 +618,7 @@ def test_join_to_time_spine_node_with_offset_window( # noqa: D entity_spec = LinklessEntitySpec.from_element_name(element_name="listing") metric_input_measure_specs = (MetricInputMeasureSpec(measure_spec=measure_spec),) metric_time_spec = TimeDimensionSpec( - element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.DAY + element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.DAY ) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "bookings_source" @@ -687,7 +687,7 @@ def test_join_to_time_spine_node_with_offset_to_grain( entity_spec = LinklessEntitySpec.from_element_name(element_name="listing") metric_input_measure_specs = (MetricInputMeasureSpec(measure_spec=measure_spec),) metric_time_spec = TimeDimensionSpec( - element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.DAY + element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.DAY ) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "bookings_source" @@ -774,7 +774,7 @@ def test_compute_metrics_node_ratio_from_single_semantic_model( dimension_spec = DimensionSpec( element_name="country_latest", - entity_links=(), + group_by_links=(), ) dimension_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "listings_latest" @@ -831,12 +831,12 @@ def test_order_by_node( dimension_spec = DimensionSpec( element_name="is_instant", - entity_links=(), + group_by_links=(), ) time_dimension_spec = TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), ) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "bookings_source" @@ -891,7 +891,7 @@ def test_semi_additive_join_node( ) -> None: """Tests converting a dataflow plan to a SQL query plan using a SemiAdditiveJoinNode.""" non_additive_dimension_spec = NonAdditiveDimensionSpec(name="ds", window_choice=AggregationType.MIN) - time_dimension_spec = TimeDimensionSpec(element_name="ds", entity_links=()) + time_dimension_spec = TimeDimensionSpec(element_name="ds", group_by_links=()) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "accounts_source" @@ -922,9 +922,9 @@ def test_semi_additive_join_node_with_queried_group_by( ) -> None: """Tests converting a dataflow plan to a SQL query plan using a SemiAdditiveJoinNode.""" non_additive_dimension_spec = NonAdditiveDimensionSpec(name="ds", window_choice=AggregationType.MIN) - time_dimension_spec = TimeDimensionSpec(element_name="ds", entity_links=()) + time_dimension_spec = TimeDimensionSpec(element_name="ds", group_by_links=()) queried_time_dimension_spec = TimeDimensionSpec( - element_name="ds", entity_links=(), time_granularity=TimeGranularity.WEEK + element_name="ds", group_by_links=(), time_granularity=TimeGranularity.WEEK ) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ @@ -960,8 +960,8 @@ def test_semi_additive_join_node_with_grouping( window_choice=AggregationType.MAX, window_groupings=("user",), ) - entity_spec = LinklessEntitySpec(element_name="user", entity_links=()) - time_dimension_spec = TimeDimensionSpec(element_name="ds", entity_links=()) + entity_spec = LinklessEntitySpec(element_name="user", group_by_links=()) + time_dimension_spec = TimeDimensionSpec(element_name="ds", group_by_links=()) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "accounts_source" @@ -1002,7 +1002,7 @@ def test_constrain_time_range_node( ), ), time_dimension_specs=( - TimeDimensionSpec(element_name="ds", entity_links=(), time_granularity=TimeGranularity.DAY), + TimeDimensionSpec(element_name="ds", group_by_links=(), time_granularity=TimeGranularity.DAY), ), ), ) @@ -1039,11 +1039,11 @@ def test_compute_metrics_node_ratio_from_multiple_semantic_models( """Tests the combine metrics node for ratio type metrics.""" dimension_spec = DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ) time_dimension_spec = TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), ) metric_spec = MetricSpec(element_name="bookings_per_view") @@ -1084,7 +1084,7 @@ def test_combine_output_node( # noqa: D ) dimension_spec = DimensionSpec( element_name="is_instant", - entity_links=(), + group_by_links=(), ) measure_source_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping[ "bookings_source" @@ -1134,8 +1134,8 @@ def test_dimensions_requiring_join( ) -> None: """Tests querying 2 dimensions that require a join.""" dimension_specs = ( - DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference(element_name="user"),)), - DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference(element_name="listing"),)), + DimensionSpec(element_name="home_state_latest", group_by_links=(EntityReference(element_name="user"),)), + DimensionSpec(element_name="is_lux_latest", group_by_links=(EntityReference(element_name="listing"),)), ) dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( query_spec=MetricFlowQuerySpec(dimension_specs=dimension_specs) diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index 77a43838d8..9a5ab62220 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -202,15 +202,15 @@ def test_query_parser(bookings_query_parser: MetricFlowQueryParser) -> None: # assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),) assert query_spec.dimension_specs == ( - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)), ) assert query_spec.time_dimension_specs == ( - TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), ) - assert query_spec.entity_specs == (EntitySpec(element_name="listing", entity_links=()),) + assert query_spec.entity_specs == (EntitySpec(element_name="listing", group_by_links=()),) assert query_spec.order_by_specs == ( OrderBySpec( - instance_spec=TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + instance_spec=TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), descending=False, ), OrderBySpec( @@ -230,15 +230,15 @@ def test_query_parser_case_insensitivity(bookings_query_parser: MetricFlowQueryP assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),) assert query_spec.dimension_specs == ( - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)), ) assert query_spec.time_dimension_specs == ( - TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), ) - assert query_spec.entity_specs == (EntitySpec(element_name="listing", entity_links=()),) + assert query_spec.entity_specs == (EntitySpec(element_name="listing", group_by_links=()),) assert query_spec.order_by_specs == ( OrderBySpec( - instance_spec=TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + instance_spec=TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), descending=False, ), OrderBySpec( @@ -261,15 +261,15 @@ def test_query_parser_case_insensitivity(bookings_query_parser: MetricFlowQueryP query_spec = bookings_query_parser.parse_and_validate_query(metrics=[metric], group_by=group_by, order_by=order_by) assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),) assert query_spec.dimension_specs == ( - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)), ) assert query_spec.time_dimension_specs == ( - TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), ) - assert query_spec.entity_specs == (EntitySpec(element_name="listing", entity_links=()),) + assert query_spec.entity_specs == (EntitySpec(element_name="listing", group_by_links=()),) assert query_spec.order_by_specs == ( OrderBySpec( - instance_spec=TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + instance_spec=TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), descending=False, ), OrderBySpec( @@ -298,15 +298,15 @@ def test_query_parser_with_object_params(bookings_query_parser: MetricFlowQueryP query_spec = bookings_query_parser.parse_and_validate_query(metrics=[metric], group_by=group_by, order_by=order_by) assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),) assert query_spec.dimension_specs == ( - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)), ) assert query_spec.time_dimension_specs == ( - TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), ) - assert query_spec.entity_specs == (EntitySpec(element_name="listing", entity_links=()),) + assert query_spec.entity_specs == (EntitySpec(element_name="listing", group_by_links=()),) assert query_spec.order_by_specs == ( OrderBySpec( - instance_spec=TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + instance_spec=TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), descending=False, ), OrderBySpec( @@ -336,7 +336,9 @@ def test_order_by_granularity_conversion() -> None: # The lowest common granularity is MONTH, so we expect the PTD in the order by to have that granularity. assert ( OrderBySpec( - instance_spec=TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.MONTH), + instance_spec=TimeDimensionSpec( + element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.MONTH + ), descending=True, ), ) == query_spec.order_by_specs @@ -350,7 +352,7 @@ def test_order_by_granularity_no_conversion(bookings_query_parser: MetricFlowQue # The only granularity is DAY, so we expect the PTD in the order by to have that granularity. assert ( OrderBySpec( - instance_spec=TimeDimensionSpec(element_name=MTD, entity_links=(), time_granularity=TimeGranularity.DAY), + instance_spec=TimeDimensionSpec(element_name=MTD, group_by_links=(), time_granularity=TimeGranularity.DAY), descending=False, ), ) == query_spec.order_by_specs @@ -407,7 +409,7 @@ def test_parse_and_validate_where_constraint_dims(bookings_query_parser: MetricF where_constraint_str="{{ Dimension('booking__is_instant') }} = '1'", ) assert ( - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)) + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)) not in query_spec.dimension_specs ) diff --git a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py index 65b0e8f774..3a5953fde1 100644 --- a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py +++ b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py @@ -41,7 +41,7 @@ def test_cumulative_metric( time_dimension_specs=( TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, ), ), @@ -79,7 +79,7 @@ def test_cumulative_metric_with_time_constraint( time_dimension_specs=( TimeDimensionSpec( element_name="metric_time", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, ), ), @@ -180,7 +180,7 @@ def test_cumulative_metric_no_window( time_dimension_specs=( TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.MONTH, ), ), @@ -243,7 +243,7 @@ def test_cumulative_metric_grain_to_date( time_dimension_specs=( TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.MONTH, ), ), @@ -304,7 +304,7 @@ def test_cumulative_metric_with_agg_time_dimension( metric_specs=(MetricSpec(element_name="trailing_2_months_revenue"),), dimension_specs=(), time_dimension_specs=( - TimeDimensionSpec(element_name="ds", entity_links=(EntityReference("revenue_instance"),)), + TimeDimensionSpec(element_name="ds", group_by_links=(EntityReference("revenue_instance"),)), ), ) ) diff --git a/metricflow/test/query_rendering/test_fill_nulls_with_rendering.py b/metricflow/test/query_rendering/test_fill_nulls_with_rendering.py index 192f66fec8..ea53947ce4 100644 --- a/metricflow/test/query_rendering/test_fill_nulls_with_rendering.py +++ b/metricflow/test/query_rendering/test_fill_nulls_with_rendering.py @@ -87,7 +87,7 @@ def test_simple_fill_nulls_with_0_with_non_metric_time( # noqa: D MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings_fill_nulls_with_0"),), time_dimension_specs=( - TimeDimensionSpec(element_name="paid_at", entity_links=(EntityReference("booking"),)), + TimeDimensionSpec(element_name="paid_at", group_by_links=(EntityReference("booking"),)), ), ) ) @@ -112,7 +112,7 @@ def test_simple_fill_nulls_with_0_with_categorical_dimension( # noqa: D dataflow_plan = dataflow_plan_builder.build_plan( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings_fill_nulls_with_0"),), - dimension_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),), + dimension_specs=(DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)),), ) ) diff --git a/metricflow/test/query_rendering/test_metric_time_without_metrics.py b/metricflow/test/query_rendering/test_metric_time_without_metrics.py index 2b95cd533d..9cfd642527 100644 --- a/metricflow/test/query_rendering/test_metric_time_without_metrics.py +++ b/metricflow/test/query_rendering/test_metric_time_without_metrics.py @@ -28,7 +28,7 @@ def test_metric_time_only( """Tests querying only metric time.""" dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( query_spec=MetricFlowQuerySpec( - time_dimension_specs=(TimeDimensionSpec(element_name="metric_time", entity_links=()),), + time_dimension_specs=(TimeDimensionSpec(element_name="metric_time", group_by_links=()),), ), ) @@ -53,7 +53,7 @@ def test_metric_time_quarter_alone( # noqa:D query_spec=MetricFlowQuerySpec( time_dimension_specs=( TimeDimensionSpec( - element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.QUARTER + element_name="metric_time", group_by_links=(), time_granularity=TimeGranularity.QUARTER ), ), ), @@ -80,8 +80,8 @@ def test_metric_time_with_other_dimensions( # noqa:D MetricFlowQuerySpec( time_dimension_specs=(MTD_SPEC_DAY,), dimension_specs=( - DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference("user"),)), - DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference("listing"),)), + DimensionSpec(element_name="home_state_latest", group_by_links=(EntityReference("user"),)), + DimensionSpec(element_name="is_lux_latest", group_by_links=(EntityReference("listing"),)), ), ) ) @@ -107,8 +107,8 @@ def test_dimensions_with_time_constraint( # noqa:D MetricFlowQuerySpec( time_dimension_specs=(MTD_SPEC_DAY,), dimension_specs=( - DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference("user"),)), - DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference("listing"),)), + DimensionSpec(element_name="home_state_latest", group_by_links=(EntityReference("user"),)), + DimensionSpec(element_name="is_lux_latest", group_by_links=(EntityReference("listing"),)), ), time_range_constraint=TimeRangeConstraint( start_time=datetime.datetime(2020, 1, 1), end_time=datetime.datetime(2020, 1, 3) diff --git a/metricflow/test/query_rendering/test_query_rendering.py b/metricflow/test/query_rendering/test_query_rendering.py index 99c6ad741d..5dc87c9f43 100644 --- a/metricflow/test/query_rendering/test_query_rendering.py +++ b/metricflow/test/query_rendering/test_query_rendering.py @@ -49,7 +49,7 @@ def test_multihop_node( dimension_specs=( DimensionSpec( element_name="customer_name", - entity_links=( + group_by_links=( EntityReference(element_name="account_id"), EntityReference(element_name="customer_id"), ), @@ -110,7 +110,7 @@ def test_partitioned_join( dimension_specs=( DimensionSpec( element_name="home_state", - entity_links=(EntityReference(element_name="user"),), + group_by_links=(EntityReference(element_name="user"),), ), ), ) @@ -140,7 +140,7 @@ def test_limit_rows( # noqa: D time_dimension_specs=( TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), ), ), limit=1, @@ -200,7 +200,7 @@ def test_local_dimension_using_local_entity( # noqa: D dimension_specs=( DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ), ) @@ -456,7 +456,7 @@ def test_min_max_only_categorical( dimension_specs=( DimensionSpec( element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), + group_by_links=(EntityReference(element_name="listing"),), ), ), min_max_only=True, @@ -486,7 +486,7 @@ def test_min_max_only_time( time_dimension_specs=( TimeDimensionSpec( element_name="paid_at", - entity_links=(EntityReference("booking"),), + group_by_links=(EntityReference("booking"),), time_granularity=TimeGranularity.DAY, ), ), @@ -517,7 +517,7 @@ def test_min_max_only_time_quarter( time_dimension_specs=( TimeDimensionSpec( element_name="paid_at", - entity_links=(EntityReference("booking"),), + group_by_links=(EntityReference("booking"),), time_granularity=TimeGranularity.QUARTER, ), ), diff --git a/metricflow/test/snapshot_utils.py b/metricflow/test/snapshot_utils.py index e5aa6106c3..423a0e57ab 100644 --- a/metricflow/test/snapshot_utils.py +++ b/metricflow/test/snapshot_utils.py @@ -350,10 +350,12 @@ def assert_linkable_element_set_snapshot_equal( # noqa: D rows.append( ( # Checking a limited set of fields as the result is large due to the paths in the object. - linkable_dimension.semantic_model_origin.semantic_model_name - if linkable_dimension.semantic_model_origin - else None, - tuple(entity_link.element_name for entity_link in linkable_dimension.entity_links), + ( + linkable_dimension.semantic_model_origin.semantic_model_name + if linkable_dimension.semantic_model_origin + else None + ), + tuple(group_by_link.element_name for group_by_link in linkable_dimension.group_by_links), linkable_dimension.element_name, linkable_dimension.time_granularity.name if linkable_dimension.time_granularity is not None else "", linkable_dimension.date_part.name if linkable_dimension.date_part is not None else "", @@ -369,7 +371,7 @@ def assert_linkable_element_set_snapshot_equal( # noqa: D ( # Checking a limited set of fields as the result is large due to the paths in the object. linkable_entity.semantic_model_origin.semantic_model_name, - tuple(entity_link.element_name for entity_link in linkable_entity.entity_links), + tuple(group_by_link.element_name for group_by_link in linkable_entity.group_by_links), linkable_entity.element_name, "", "", diff --git a/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_ambiguous_entity_path_resolves_to_shortest_entity_path_item__result_0.txt b/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_ambiguous_entity_path_resolves_to_shortest_entity_path_item__result_0.txt index 202bece9e0..c5704eb123 100644 --- a/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_ambiguous_entity_path_resolves_to_shortest_entity_path_item__result_0.txt +++ b/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_ambiguous_entity_path_resolves_to_shortest_entity_path_item__result_0.txt @@ -1,6 +1,6 @@ MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name='all_entity_metric'),), - dimension_specs=(DimensionSpec(element_name='country', entity_links=(EntityReference(element_name='entity_1'),)),), + dimension_specs=(DimensionSpec(element_name='country', group_by_links=(EntityReference(element_name='entity_1'),)),), filter_intersection=PydanticWhereFilterIntersection(), filter_spec_resolution_lookup=FilterSpecResolutionLookUp(), min_max_only=False, diff --git a/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_resolvable_ambiguous_entity_path__result_0.txt b/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_resolvable_ambiguous_entity_path__result_0.txt index cd062bfdee..c4aa071a2d 100644 --- a/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_resolvable_ambiguous_entity_path__result_0.txt +++ b/metricflow/test/snapshots/test_ambiguous_entity_path.py/MetricFlowQuerySpec/test_resolvable_ambiguous_entity_path__result_0.txt @@ -3,7 +3,7 @@ MetricFlowQuerySpec( dimension_specs=( DimensionSpec( element_name='country', - entity_links=(EntityReference(element_name='entity_1'), EntityReference(element_name='entity_0')), + group_by_links=(EntityReference(element_name='entity_1'), EntityReference(element_name='entity_0')), ), ), filter_intersection=PydanticWhereFilterIntersection(), diff --git a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index 1ade48ce17..a48452676b 100644 --- a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -16,7 +16,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml index 7b3ec666b1..052e6d6473 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml @@ -18,11 +18,11 @@ - - - - - + + + + + @@ -85,11 +85,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml index 99530dd03e..951a33faba 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml @@ -5,8 +5,8 @@ - - + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml index bf661d834b..6d12898b80 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml @@ -5,23 +5,23 @@ - - - - - - - - + + + + + + + + - - - - - + + + + + @@ -34,7 +34,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml index da62fd9e41..51e8c86fb3 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml @@ -5,28 +5,28 @@ - - - - - - - - + + + + + + + + - - - - - - - - - - + + + + + + + + + + @@ -39,7 +39,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_non_metric_time__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_non_metric_time__dfp_0.xml index ea4127768a..b58a0163c7 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_non_metric_time__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_non_metric_time__dfp_0.xml @@ -13,12 +13,12 @@ - - - - - - + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml index 1c9054d896..d63be04b1c 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml @@ -14,16 +14,16 @@ - - - - - - - - - - + + + + + + + + + + @@ -38,11 +38,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml index ebd82f88a6..2f7522c691 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml @@ -23,7 +23,7 @@ - + @@ -54,7 +54,7 @@ - + @@ -69,11 +69,11 @@ - - - - - + + + + + @@ -146,7 +146,7 @@ - + @@ -177,7 +177,7 @@ - + @@ -192,11 +192,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml index 01e049e74c..82614c7991 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml @@ -23,7 +23,7 @@ - + @@ -55,7 +55,7 @@ - + @@ -70,11 +70,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_where_filter__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_where_filter__dfp_0.xml new file mode 100644 index 0000000000..c679e4bd2e --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_where_filter__dfp_0.xml @@ -0,0 +1,99 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml index 077dec9a99..46081438b8 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml @@ -7,10 +7,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_categorical__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_categorical__dfp_0.xml index e621dafcc3..7c218b4e21 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_categorical__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_categorical__dfp_0.xml @@ -8,11 +8,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time__dfp_0.xml index e179eec6c3..43240c5e73 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time__dfp_0.xml @@ -8,12 +8,12 @@ - - - - - - + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time_year__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time_year__dfp_0.xml index 0a407dfaf1..edb5cd2cb2 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time_year__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_min_max_only_time_year__dfp_0.xml @@ -8,12 +8,12 @@ - - - - - - + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml index a99dea7083..dca7d33ee2 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -21,11 +21,11 @@ - - - - - + + + + + @@ -89,11 +89,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml index 1aaf339860..2214abe46d 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml @@ -16,7 +16,7 @@ - + @@ -66,11 +66,11 @@ - - - - - + + + + + @@ -134,16 +134,16 @@ - - - - - - - - - - + + + + + + + + + + @@ -190,78 +190,78 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml index 5b9e047685..ca2bd4d568 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml @@ -16,11 +16,11 @@ - - - - - + + + + + @@ -48,11 +48,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml index 3037578d89..4f66d1dfe3 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml @@ -13,11 +13,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml index 3037578d89..4f66d1dfe3 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml @@ -13,11 +13,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml index b90a880dda..62bc30c4d1 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -21,11 +21,11 @@ - - - - - + + + + + @@ -89,11 +89,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml index b134b228aa..2adf63c3b5 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml @@ -16,7 +16,7 @@ - + @@ -34,11 +34,11 @@ - - - - - + + + + + @@ -51,7 +51,7 @@ - + @@ -65,16 +65,16 @@ - - - - - - - - - - + + + + + + + + + + @@ -90,11 +90,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml index b3ba6c0a1e..a8a90e24e6 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml @@ -30,11 +30,11 @@ - - - - - + + + + + @@ -57,11 +57,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml index 7a9e744ad3..126a4ef736 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml @@ -16,7 +16,7 @@ - + @@ -41,7 +41,7 @@ - + @@ -54,11 +54,11 @@ - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0.sql index 0a56dd7299..35079a44d6 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0.sql @@ -111,7 +111,7 @@ FROM ( subq_12.visit__referrer_id , subq_12.buys FROM ( - -- Find conversions for EntitySpec(element_name='user', entity_links=()) within the range of count=7 granularity=TimeGranularity.DAY + -- Find conversions for EntitySpec(element_name='user', group_by_links=()) within the range of count=7 granularity=TimeGranularity.DAY SELECT subq_11.visit__referrer_id , subq_11.buys diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0_optimized.sql index f340be4a39..565e1caefe 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0_optimized.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_conversion_metric__plan0_optimized.sql @@ -27,7 +27,7 @@ FROM ( visit__referrer_id ) subq_19 FULL OUTER JOIN ( - -- Find conversions for EntitySpec(element_name='user', entity_links=()) within the range of count=7 granularity=TimeGranularity.DAY + -- Find conversions for EntitySpec(element_name='user', group_by_links=()) within the range of count=7 granularity=TimeGranularity.DAY -- Pass Only Elements: -- ['buys', 'visit__referrer_id'] -- Aggregate Measures diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_conversion_metric__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_conversion_metric__plan0.xml index ebfbf27b26..22dbd9a346 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_conversion_metric__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_conversion_metric__plan0.xml @@ -400,7 +400,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml index 7b3ec666b1..88609d0539 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml @@ -21,7 +21,7 @@ - + @@ -88,7 +88,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml index c672eced1d..7275286c47 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml @@ -21,7 +21,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml index 2171124c84..1c98623c68 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml @@ -55,7 +55,7 @@ - + @@ -72,7 +72,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml index 0ea28d8b15..5ca5bb29ca 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml @@ -55,7 +55,7 @@ - + @@ -72,7 +72,7 @@ - + diff --git a/metricflow/test/specs/patterns/test_entity_link_pattern.py b/metricflow/test/specs/patterns/test_entity_link_pattern.py index f1057c1448..cb2aa46057 100644 --- a/metricflow/test/specs/patterns/test_entity_link_pattern.py +++ b/metricflow/test/specs/patterns/test_entity_link_pattern.py @@ -30,27 +30,27 @@ def specs() -> Sequence[LinkableInstanceSpec]: # noqa: D MTD_SPEC_YEAR, TimeDimensionSpec( element_name="creation_time", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.MONTH, date_part=DatePart.YEAR, ), # Dimensions DimensionSpec( element_name="country", - entity_links=( + group_by_links=( EntityReference(element_name="listing"), EntityReference(element_name="user"), ), ), - DimensionSpec(element_name="is_instant", entity_links=(EntityReference(element_name="booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference(element_name="booking"),)), # Entities EntitySpec( element_name="listing", - entity_links=(EntityReference(element_name="booking"),), + group_by_links=(EntityReference(element_name="booking"),), ), EntitySpec( element_name="host", - entity_links=(EntityReference(element_name="booking"),), + group_by_links=(EntityReference(element_name="booking"),), ), ) @@ -60,7 +60,7 @@ def test_valid_parameter_fields() -> None: parameter_set = EntityLinkPatternParameterSet.from_parameters( fields_to_compare=(), element_name=None, - entity_links=None, + group_by_links=None, time_granularity=None, date_part=None, ) @@ -73,7 +73,7 @@ def test_dimension_match(specs: Sequence[LinkableInstanceSpec]) -> None: # noqa pattern = EntityLinkPattern( EntityLinkPatternParameterSet.from_parameters( element_name="is_instant", - entity_links=(EntityReference(element_name="booking"),), + group_by_links=(EntityReference(element_name="booking"),), time_granularity=None, date_part=None, fields_to_compare=( @@ -84,7 +84,7 @@ def test_dimension_match(specs: Sequence[LinkableInstanceSpec]) -> None: # noqa ) assert tuple(pattern.match(specs)) == ( - DimensionSpec(element_name="is_instant", entity_links=(EntityReference(element_name="booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference(element_name="booking"),)), ) @@ -92,7 +92,7 @@ def test_entity_match(specs: Sequence[LinkableInstanceSpec]) -> None: # noqa: D pattern = EntityLinkPattern( EntityLinkPatternParameterSet.from_parameters( element_name="listing", - entity_links=(EntityReference(element_name="booking"),), + group_by_links=(EntityReference(element_name="booking"),), time_granularity=None, date_part=None, fields_to_compare=( @@ -103,7 +103,7 @@ def test_entity_match(specs: Sequence[LinkableInstanceSpec]) -> None: # noqa: D ) assert tuple(pattern.match(specs)) == ( - EntitySpec(element_name="listing", entity_links=(EntityReference(element_name="booking"),)), + EntitySpec(element_name="listing", group_by_links=(EntityReference(element_name="booking"),)), ) @@ -111,7 +111,7 @@ def test_time_dimension_match(specs: Sequence[LinkableInstanceSpec]) -> None: # pattern = EntityLinkPattern( EntityLinkPatternParameterSet.from_parameters( element_name=METRIC_TIME_ELEMENT_NAME, - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.WEEK, date_part=None, fields_to_compare=( @@ -129,7 +129,7 @@ def test_time_dimension_match_without_grain_specified(specs: Sequence[LinkableIn pattern = EntityLinkPattern( EntityLinkPatternParameterSet.from_parameters( element_name=METRIC_TIME_ELEMENT_NAME, - entity_links=(), + group_by_links=(), time_granularity=None, date_part=None, fields_to_compare=( @@ -151,7 +151,7 @@ def test_time_dimension_date_part_mismatch(specs: Sequence[LinkableInstanceSpec] pattern = EntityLinkPattern( EntityLinkPatternParameterSet.from_parameters( element_name="creation_time", - entity_links=None, + group_by_links=None, time_granularity=None, date_part=None, fields_to_compare=( @@ -169,7 +169,7 @@ def test_time_dimension_date_part_match(specs: Sequence[LinkableInstanceSpec]) - pattern = EntityLinkPattern( EntityLinkPatternParameterSet.from_parameters( element_name="creation_time", - entity_links=None, + group_by_links=None, time_granularity=None, date_part=DatePart.YEAR, fields_to_compare=( @@ -182,7 +182,7 @@ def test_time_dimension_date_part_match(specs: Sequence[LinkableInstanceSpec]) - assert tuple(pattern.match(specs)) == ( TimeDimensionSpec( element_name="creation_time", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.MONTH, date_part=DatePart.YEAR, ), diff --git a/metricflow/test/specs/patterns/test_typed_patterns.py b/metricflow/test/specs/patterns/test_typed_patterns.py index 2e53dbc5eb..03cca4054b 100644 --- a/metricflow/test/specs/patterns/test_typed_patterns.py +++ b/metricflow/test/specs/patterns/test_typed_patterns.py @@ -25,25 +25,25 @@ def specs() -> Sequence[LinkableInstanceSpec]: # noqa: D # Time dimensions TimeDimensionSpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.DAY, date_part=None, ), TimeDimensionSpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.DAY, date_part=DatePart.MONTH, ), # Dimensions DimensionSpec( element_name="common_name", - entity_links=((EntityReference("booking"), EntityReference("listing"))), + group_by_links=((EntityReference("booking"), EntityReference("listing"))), ), # Entities EntitySpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), ), ) @@ -59,17 +59,17 @@ def test_dimension_pattern(specs: Sequence[LinkableInstanceSpec]) -> None: # no assert tuple(pattern.match(specs)) == ( DimensionSpec( element_name="common_name", - entity_links=((EntityReference("booking"), EntityReference("listing"))), + group_by_links=((EntityReference("booking"), EntityReference("listing"))), ), TimeDimensionSpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.DAY, date_part=None, ), TimeDimensionSpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.DAY, date_part=DatePart.MONTH, ), @@ -87,7 +87,7 @@ def test_time_dimension_pattern(specs: Sequence[LinkableInstanceSpec]) -> None: assert tuple(pattern.match(specs)) == ( TimeDimensionSpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.DAY, date_part=None, ), @@ -106,7 +106,7 @@ def test_time_dimension_pattern_with_date_part(specs: Sequence[LinkableInstanceS assert tuple(pattern.match(specs)) == ( TimeDimensionSpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), time_granularity=TimeGranularity.DAY, date_part=DatePart.MONTH, ), @@ -124,6 +124,6 @@ def test_entity_pattern(specs: Sequence[LinkableInstanceSpec]) -> None: # noqa: assert tuple(pattern.match(specs)) == ( EntitySpec( element_name="common_name", - entity_links=(EntityReference("booking"), EntityReference("listing")), + group_by_links=(EntityReference("booking"), EntityReference("listing")), ), ) diff --git a/metricflow/test/specs/test_time_dimension_spec.py b/metricflow/test/specs/test_time_dimension_spec.py index 40527f50b7..2840c6f556 100644 --- a/metricflow/test/specs/test_time_dimension_spec.py +++ b/metricflow/test/specs/test_time_dimension_spec.py @@ -13,13 +13,13 @@ def test_comparison_key_excluding_time_grain() -> None: # noqa: D spec0 = TimeDimensionSpec( element_name="element0", - entity_links=(EntityReference("entity0"),), + group_by_links=(EntityReference("entity0"),), time_granularity=TimeGranularity.DAY, ) spec1 = TimeDimensionSpec( element_name="element0", - entity_links=(EntityReference("entity0"),), + group_by_links=(EntityReference("entity0"),), time_granularity=TimeGranularity.MONTH, ) assert spec0.comparison_key(exclude_fields=[]) != spec1.comparison_key(exclude_fields=[]) diff --git a/metricflow/test/specs/test_where_filter_entity.py b/metricflow/test/specs/test_where_filter_entity.py index 5682b38e06..8fed47bd7a 100644 --- a/metricflow/test/specs/test_where_filter_entity.py +++ b/metricflow/test/specs/test_where_filter_entity.py @@ -20,5 +20,5 @@ def test_descending_cannot_be_set( # noqa where_filter_location=EXAMPLE_FILTER_LOCATION, rendered_spec_tracker=RenderedSpecTracker(), element_name="customer", - entity_links=(), + group_by_links=(), ).descending(True) diff --git a/metricflow/test/test_specs.py b/metricflow/test/test_specs.py index 8b1aca8109..d807cabcd9 100644 --- a/metricflow/test/test_specs.py +++ b/metricflow/test/test_specs.py @@ -23,7 +23,7 @@ def dimension_spec() -> DimensionSpec: # noqa: D return DimensionSpec( element_name="platform", - entity_links=( + group_by_links=( EntityReference(element_name="user_id"), EntityReference(element_name="device_id"), ), @@ -34,7 +34,7 @@ def dimension_spec() -> DimensionSpec: # noqa: D def time_dimension_spec() -> TimeDimensionSpec: # noqa: D return TimeDimensionSpec( element_name="signup_ts", - entity_links=(EntityReference(element_name="user_id"),), + group_by_links=(EntityReference(element_name="user_id"),), time_granularity=TimeGranularity.DAY, ) @@ -43,7 +43,7 @@ def time_dimension_spec() -> TimeDimensionSpec: # noqa: D def entity_spec() -> EntitySpec: # noqa: D return EntitySpec( element_name="user_id", - entity_links=(EntityReference(element_name="listing_id"),), + group_by_links=(EntityReference(element_name="listing_id"),), ) @@ -52,43 +52,43 @@ def test_merge_specs(dimension_spec: DimensionSpec, entity_spec: EntitySpec) -> assert InstanceSpec.merge([dimension_spec], [entity_spec]) == [dimension_spec, entity_spec] -def test_dimension_without_first_entity_link(dimension_spec: DimensionSpec) -> None: # noqa: D - assert dimension_spec.without_first_entity_link == DimensionSpec( - element_name="platform", entity_links=(EntityReference(element_name="device_id"),) +def test_dimension_without_first_group_by_link(dimension_spec: DimensionSpec) -> None: # noqa: D + assert dimension_spec.without_first_group_by_link == DimensionSpec( + element_name="platform", group_by_links=(EntityReference(element_name="device_id"),) ) -def test_dimension_without_entity_links(dimension_spec: DimensionSpec) -> None: # noqa: D - assert dimension_spec.without_entity_links == DimensionSpec(element_name="platform", entity_links=()) +def test_dimension_without_group_by_links(dimension_spec: DimensionSpec) -> None: # noqa: D + assert dimension_spec.without_group_by_links == DimensionSpec(element_name="platform", group_by_links=()) -def test_time_dimension_without_first_entity_link(time_dimension_spec: TimeDimensionSpec) -> None: # noqa: D - assert time_dimension_spec.without_first_entity_link == TimeDimensionSpec( +def test_time_dimension_without_first_group_by_link(time_dimension_spec: TimeDimensionSpec) -> None: # noqa: D + assert time_dimension_spec.without_first_group_by_link == TimeDimensionSpec( element_name="signup_ts", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, ) -def test_time_dimension_without_entity_links(time_dimension_spec: TimeDimensionSpec) -> None: # noqa: D - assert time_dimension_spec.without_entity_links == TimeDimensionSpec( +def test_time_dimension_without_group_by_links(time_dimension_spec: TimeDimensionSpec) -> None: # noqa: D + assert time_dimension_spec.without_group_by_links == TimeDimensionSpec( element_name="signup_ts", - entity_links=(), + group_by_links=(), time_granularity=time_dimension_spec.time_granularity, ) -def test_entity_without_first_entity_link(entity_spec: EntitySpec) -> None: # noqa: D - assert entity_spec.without_first_entity_link == EntitySpec( +def test_entity_without_first_group_by_link(entity_spec: EntitySpec) -> None: # noqa: D + assert entity_spec.without_first_group_by_link == EntitySpec( element_name="user_id", - entity_links=(), + group_by_links=(), ) -def test_entity_without_entity_links(entity_spec: EntitySpec) -> None: # noqa: D - assert entity_spec.without_entity_links == EntitySpec( +def test_entity_without_group_by_links(entity_spec: EntitySpec) -> None: # noqa: D + assert entity_spec.without_group_by_links == EntitySpec( element_name="user_id", - entity_links=(), + group_by_links=(), ) @@ -100,7 +100,7 @@ def test_merge_linkable_specs(dimension_spec: DimensionSpec, entity_spec: Entity def test_qualified_name() -> None: # noqa: D assert ( - DimensionSpec(element_name="country", entity_links=(EntityReference("listing_id"),)).qualified_name + DimensionSpec(element_name="country", group_by_links=(EntityReference("listing_id"),)).qualified_name == "listing_id__country" ) @@ -108,12 +108,12 @@ def test_qualified_name() -> None: # noqa: D def test_merge_spec_set() -> None: # noqa: D spec_set1 = InstanceSpecSet(metric_specs=(MetricSpec(element_name="bookings"),)) spec_set2 = InstanceSpecSet( - dimension_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),) + dimension_specs=(DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)),) ) assert spec_set1.merge(spec_set2) == InstanceSpecSet( metric_specs=(MetricSpec(element_name="bookings"),), - dimension_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),), + dimension_specs=(DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)),), ) @@ -126,18 +126,18 @@ def spec_set() -> InstanceSpecSet: # noqa: D element_name="bookings", ), ), - dimension_specs=(DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),), + dimension_specs=(DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)),), time_dimension_specs=( TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, ), ), entity_specs=( EntitySpec( element_name="user_id", - entity_links=(EntityReference(element_name="listing_id"),), + group_by_links=(EntityReference(element_name="listing_id"),), ), ), ) @@ -145,15 +145,15 @@ def spec_set() -> InstanceSpecSet: # noqa: D def test_spec_set_linkable_specs(spec_set: InstanceSpecSet) -> None: # noqa: D assert set(spec_set.linkable_specs) == { - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)), TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, ), EntitySpec( element_name="user_id", - entity_links=(EntityReference(element_name="listing_id"),), + group_by_links=(EntityReference(element_name="listing_id"),), ), } @@ -164,22 +164,22 @@ def test_spec_set_all_specs(spec_set: InstanceSpecSet) -> None: # noqa: D MeasureSpec( element_name="bookings", ), - DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)), + DimensionSpec(element_name="is_instant", group_by_links=(EntityReference("booking"),)), TimeDimensionSpec( element_name="ds", - entity_links=(), + group_by_links=(), time_granularity=TimeGranularity.DAY, ), EntitySpec( element_name="user_id", - entity_links=(EntityReference(element_name="listing_id"),), + group_by_links=(EntityReference(element_name="listing_id"),), ), } def test_linkless_entity() -> None: # noqa: D """Check that equals and hash works as expected for the LinklessEntitySpec / EntitySpec.""" - entity_spec = EntitySpec(element_name="user_id", entity_links=()) + entity_spec = EntitySpec(element_name="user_id", group_by_links=()) linkless_entity_spec = LinklessEntitySpec.from_element_name("user_id") # Check equality between the two.