From fcb7dfaf4e83ba411b6f29e199518d46cf899ad9 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 27 Mar 2024 18:36:37 -0700 Subject: [PATCH] Add `GroupByMetricSpec`, `GroupByMetricInstance`, and `LinkableMetric` (#1093) --- metricflow/instances.py | 15 + .../semantics/linkable_element_properties.py | 2 + .../model/semantics/linkable_spec_resolver.py | 281 ++++++++++++------ metricflow/plan_conversion/column_resolver.py | 10 + metricflow/specs/specs.py | 80 ++++- .../semantics/test_linkable_spec_resolver.py | 50 +++- tests/snapshot_utils.py | 15 + ...le_element_set_from_join_path__result0.txt | 108 +++++++ ..._set_from_join_path_multi_hop__result0.txt | 108 +++++++ tests/test_specs.py | 15 + 10 files changed, 596 insertions(+), 88 deletions(-) create mode 100644 tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path__result0.txt create mode 100644 tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path_multi_hop__result0.txt diff --git a/metricflow/instances.py b/metricflow/instances.py index d1accd177c..f1e8164f8c 100644 --- a/metricflow/instances.py +++ b/metricflow/instances.py @@ -14,6 +14,7 @@ from metricflow.specs.specs import ( DimensionSpec, EntitySpec, + GroupByMetricSpec, InstanceSpec, InstanceSpecSet, MeasureSpec, @@ -102,6 +103,13 @@ class EntityInstance(MdoInstance[EntitySpec], SemanticModelElementInstance): # spec: EntitySpec +@dataclass(frozen=True) +class GroupByMetricInstance(MdoInstance[GroupByMetricSpec], SerializableDataclass): # noqa: D101 + associated_columns: Tuple[ColumnAssociation, ...] + spec: GroupByMetricSpec + defined_from: MetricModelReference + + @dataclass(frozen=True) class MetricInstance(MdoInstance[MetricSpec], SerializableDataclass): # noqa: D101 associated_columns: Tuple[ColumnAssociation, ...] @@ -143,6 +151,7 @@ class InstanceSet(SerializableDataclass): dimension_instances: Tuple[DimensionInstance, ...] = () time_dimension_instances: Tuple[TimeDimensionInstance, ...] = () entity_instances: Tuple[EntityInstance, ...] = () + group_by_metric_instances: Tuple[GroupByMetricInstance, ...] = () metric_instances: Tuple[MetricInstance, ...] = () metadata_instances: Tuple[MetadataInstance, ...] = () @@ -159,6 +168,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: dimension_instances: List[DimensionInstance] = [] time_dimension_instances: List[TimeDimensionInstance] = [] entity_instances: List[EntityInstance] = [] + group_by_metric_instances: List[GroupByMetricInstance] = [] metric_instances: List[MetricInstance] = [] metadata_instances: List[MetadataInstance] = [] @@ -175,6 +185,9 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: for entity_instance in instance_set.entity_instances: if entity_instance.spec not in {x.spec for x in entity_instances}: entity_instances.append(entity_instance) + for group_by_metric_instance in instance_set.group_by_metric_instances: + if group_by_metric_instance.spec not in {x.spec for x in group_by_metric_instances}: + group_by_metric_instances.append(group_by_metric_instance) for metric_instance in instance_set.metric_instances: if metric_instance.spec not in {x.spec for x in metric_instances}: metric_instances.append(metric_instance) @@ -187,6 +200,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: dimension_instances=tuple(dimension_instances), time_dimension_instances=tuple(time_dimension_instances), entity_instances=tuple(entity_instances), + group_by_metric_instances=tuple(group_by_metric_instances), metric_instances=tuple(metric_instances), metadata_instances=tuple(metadata_instances), ) @@ -198,6 +212,7 @@ def spec_set(self) -> InstanceSpecSet: # noqa: D102 dimension_specs=tuple(x.spec for x in self.dimension_instances), time_dimension_specs=tuple(x.spec for x in self.time_dimension_instances), entity_specs=tuple(x.spec for x in self.entity_instances), + group_by_metric_specs=tuple(x.spec for x in self.group_by_metric_instances), metric_specs=tuple(x.spec for x in self.metric_instances), metadata_specs=tuple(x.spec for x in self.metadata_instances), ) diff --git a/metricflow/model/semantics/linkable_element_properties.py b/metricflow/model/semantics/linkable_element_properties.py index 1d5bea88d7..4ba500c5f4 100644 --- a/metricflow/model/semantics/linkable_element_properties.py +++ b/metricflow/model/semantics/linkable_element_properties.py @@ -25,6 +25,8 @@ class LinkableElementProperties(Enum): ENTITY = "entity" # See metric_time in DataSet METRIC_TIME = "metric_time" + # Refers to a metric, not a dimension. + METRIC = "metric" @staticmethod def all_properties() -> FrozenSet[LinkableElementProperties]: # noqa: D102 diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index 0ce2536f1a..777efab43d 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -32,6 +32,7 @@ DimensionSpec, EntityReference, EntitySpec, + GroupByMetricSpec, LinkableSpecSet, TimeDimensionSpec, ) @@ -96,6 +97,35 @@ def path_key(self) -> ElementPathKey: # noqa: D102 date_part=None, ) + @property + def reference(self) -> EntityReference: # noqa: D102 + return EntityReference(element_name=self.element_name) + + +@dataclass(frozen=True) +class LinkableMetric: + """Describes how a metric can be realized by joining based on entity links.""" + + element_name: str + join_by_semantic_model: SemanticModelReference + # TODO: Enable joining by dimension + entity_links: Tuple[EntityReference, ...] + properties: FrozenSet[LinkableElementProperties] + join_path: Tuple[SemanticModelJoinPathElement, ...] + + @property + def path_key(self) -> ElementPathKey: # noqa: D102 + return ElementPathKey( + element_name=self.element_name, + entity_links=self.entity_links, + time_granularity=None, + date_part=None, + ) + + @property + def reference(self) -> MetricReference: # noqa: D102 + return MetricReference(element_name=self.element_name) + @dataclass(frozen=True) class LinkableElementSet: @@ -117,6 +147,7 @@ class LinkableElementSet: # } path_key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] + path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] @staticmethod def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: @@ -126,12 +157,15 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li """ key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list) key_to_linkable_entities: Dict[ElementPathKey, List[LinkableEntity]] = defaultdict(list) + key_to_linkable_metrics: Dict[ElementPathKey, List[LinkableMetric]] = defaultdict(list) for linkable_element_set in linkable_element_sets: for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): key_to_linkable_dimensions[path_key].extend(linkable_dimensions) for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): key_to_linkable_entities[path_key].extend(linkable_entities) + for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): + key_to_linkable_metrics[path_key].extend(linkable_metrics) # Convert the dictionaries to use tuples instead of lists. return LinkableElementSet( @@ -141,6 +175,9 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li path_key_to_linkable_entities={ path_key: tuple(entities) for path_key, entities in key_to_linkable_entities.items() }, + path_key_to_linkable_metrics={ + path_key: tuple(metrics) for path_key, metrics in key_to_linkable_metrics.items() + }, ) @staticmethod @@ -148,32 +185,31 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet] """Find the intersection of all elements in the sets by path key. This is useful to figure out the common dimensions that are possible to query with multiple metrics. You would - find the LinakbleSpecSet for each metric in the query, then do an intersection of the sets. + find the LinkableSpecSet for each metric in the query, then do an intersection of the sets. """ if len(linkable_element_sets) == 0: return LinkableElementSet( path_key_to_linkable_dimensions={}, path_key_to_linkable_entities={}, + path_key_to_linkable_metrics={}, ) # Find path keys that are common to all LinkableElementSets. - common_linkable_dimension_path_keys: Set[ElementPathKey] = set.intersection( - *[ - set(linkable_element_set.path_key_to_linkable_dimensions.keys()) - for linkable_element_set in linkable_element_sets - ] - ) - - common_linkable_entity_path_keys: Set[ElementPathKey] = set.intersection( - *[ - set(linkable_element_set.path_key_to_linkable_entities.keys()) - for linkable_element_set in linkable_element_sets - ] - ) + dimension_path_keys: List[Set[ElementPathKey]] = [] + entity_path_keys: List[Set[ElementPathKey]] = [] + metric_path_keys: List[Set[ElementPathKey]] = [] + for linkable_element_set in linkable_element_sets: + dimension_path_keys.append(set(linkable_element_set.path_key_to_linkable_dimensions.keys())) + entity_path_keys.append(set(linkable_element_set.path_key_to_linkable_entities.keys())) + metric_path_keys.append(set(linkable_element_set.path_key_to_linkable_metrics.keys())) + common_linkable_dimension_path_keys = set.intersection(*dimension_path_keys) if dimension_path_keys else set() + common_linkable_entity_path_keys = set.intersection(*entity_path_keys) if entity_path_keys else set() + common_linkable_metric_path_keys = set.intersection(*metric_path_keys) if metric_path_keys else set() # Create a new LinkableElementSet that only includes items where the path key is common to all sets. join_path_to_linkable_dimensions: Dict[ElementPathKey, Set[LinkableDimension]] = defaultdict(set) join_path_to_linkable_entities: Dict[ElementPathKey, Set[LinkableEntity]] = defaultdict(set) + join_path_to_linkable_metrics: Dict[ElementPathKey, Set[LinkableMetric]] = defaultdict(set) for linkable_element_set in linkable_element_sets: for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): @@ -182,6 +218,9 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet] for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): if path_key in common_linkable_entity_path_keys: join_path_to_linkable_entities[path_key].update(linkable_entities) + for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): + if path_key in common_linkable_metric_path_keys: + join_path_to_linkable_metrics[path_key].update(linkable_metrics) return LinkableElementSet( path_key_to_linkable_dimensions={ @@ -205,6 +244,14 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet] ) for path_key, entities in join_path_to_linkable_entities.items() }, + path_key_to_linkable_metrics={ + path_key: tuple( + sorted( + metrics, key=lambda linkable_metric: linkable_metric.join_by_semantic_model.semantic_model_name + ) + ) + for path_key, metrics in join_path_to_linkable_metrics.items() + }, ) def filter( @@ -221,6 +268,7 @@ def filter( """ key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = {} key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = {} + key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {} for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items(): filtered_linkable_dimensions = tuple( @@ -250,9 +298,24 @@ def filter( if len(filtered_linkable_entities) > 0: key_to_linkable_entities[path_key] = filtered_linkable_entities + for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items(): + filtered_linkable_metrics = tuple( + linkable_metric + for linkable_metric in linkable_metrics + if len(linkable_metric.properties.intersection(with_any_of)) > 0 + and len(linkable_metric.properties.intersection(without_any_of)) == 0 + and ( + len(without_all_of) == 0 + or linkable_metric.properties.intersection(without_all_of) != without_all_of + ) + ) + if len(filtered_linkable_metrics) > 0: + key_to_linkable_metrics[path_key] = filtered_linkable_metrics + return LinkableElementSet( path_key_to_linkable_dimensions=key_to_linkable_dimensions, path_key_to_linkable_entities=key_to_linkable_entities, + path_key_to_linkable_metrics=key_to_linkable_metrics, ) @property @@ -283,6 +346,13 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102 ) for path_key in self.path_key_to_linkable_entities ), + group_by_metric_specs=tuple( + GroupByMetricSpec( + element_name=path_key.element_name, + entity_links=path_key.entity_links, + ) + for path_key in self.path_key_to_linkable_metrics + ), ) @property @@ -299,6 +369,11 @@ def only_unique_path_keys(self) -> LinkableElementSet: for path_key, linkable_entities in self.path_key_to_linkable_entities.items() if len(linkable_entities) <= 1 }, + path_key_to_linkable_metrics={ + path_key: linkable_metrics + for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items() + if len(linkable_metrics) <= 1 + }, ) @@ -373,73 +448,15 @@ class SemanticModelJoinPath: path_elements: Tuple[SemanticModelJoinPathElement, ...] - def create_linkable_element_set( - self, semantic_model_accessor: SemanticModelAccessor, with_properties: FrozenSet[LinkableElementProperties] - ) -> LinkableElementSet: - """Given the current path, generate the respective linkable elements from the last semantic model in the path.""" - entity_links = tuple(x.join_on_entity for x in self.path_elements) - + @property + def last_path_element(self) -> SemanticModelJoinPathElement: # noqa: D102 assert len(self.path_elements) > 0 - semantic_model = semantic_model_accessor.get_by_reference(self.path_elements[-1].semantic_model_reference) - assert semantic_model - - linkable_dimensions: List[LinkableDimension] = [] - linkable_entities: List[LinkableEntity] = [] - - for dimension in semantic_model.dimensions: - dimension_type = dimension.type - if dimension_type == DimensionType.CATEGORICAL: - linkable_dimensions.append( - LinkableDimension( - semantic_model_origin=semantic_model.reference, - element_name=dimension.reference.element_name, - entity_links=entity_links, - join_path=self.path_elements, - properties=with_properties, - time_granularity=None, - date_part=None, - ) - ) - elif dimension_type == DimensionType.TIME: - linkable_dimensions.extend( - _generate_linkable_time_dimensions( - semantic_model_origin=semantic_model.reference, - dimension=dimension, - entity_links=entity_links, - join_path=(), - with_properties=with_properties, - ) - ) - else: - raise RuntimeError(f"Unhandled type: {dimension_type}") - - for entity in semantic_model.entities: - # Avoid creating "booking_id__booking_id" - if entity.reference != entity_links[-1]: - linkable_entities.append( - LinkableEntity( - semantic_model_origin=semantic_model.reference, - element_name=entity.reference.element_name, - entity_links=entity_links, - join_path=self.path_elements, - properties=with_properties.union({LinkableElementProperties.ENTITY}), - ) - ) - - return LinkableElementSet( - path_key_to_linkable_dimensions={ - linkable_dimension.path_key: (linkable_dimension,) for linkable_dimension in linkable_dimensions - }, - path_key_to_linkable_entities={ - linkable_entity.path_key: (linkable_entity,) for linkable_entity in linkable_entities - }, - ) + return self.path_elements[-1] @property def last_semantic_model_reference(self) -> SemanticModelReference: """The last semantic model that would be joined in this path.""" - assert len(self.path_elements) > 0 - return self.path_elements[-1].semantic_model_reference + return self.last_path_element.semantic_model_reference class ValidLinkableSpecResolver: @@ -479,6 +496,7 @@ def __init__( self._entity_to_semantic_model[entity.reference.element_name].append(semantic_model) self._metric_to_linkable_element_sets: Dict[str, List[LinkableElementSet]] = {} + self._joinable_metrics_for_semantic_models: Dict[SemanticModelReference, Set[MetricReference]] = {} start_time = time.time() for metric in self._semantic_manifest.metrics: @@ -518,6 +536,17 @@ def __init__( self._metric_to_linkable_element_sets[metric.name] = linkable_sets_for_measure + # This loop must happen after the one above so that _metric_to_linkable_element_sets is populated. + for metric in self._semantic_manifest.metrics: + metric_reference = MetricReference(metric.name) + linkable_element_set_for_metric = self.get_linkable_elements_for_metrics([metric_reference]) + for linkable_entities in linkable_element_set_for_metric.path_key_to_linkable_entities.values(): + for linkable_entity in linkable_entities: + semantic_model_reference = linkable_entity.semantic_model_origin + metrics = self._joinable_metrics_for_semantic_models.get(semantic_model_reference, set()) + metrics.add(metric_reference) + self._joinable_metrics_for_semantic_models[semantic_model_reference] = metrics + # If no metrics are specified, the query interface supports distinct dimension values from a single semantic # model. linkable_element_sets_to_merge: List[LinkableElementSet] = [] @@ -611,6 +640,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link path_key_to_linkable_entities={ linkable_entity.path_key: (linkable_entity,) for linkable_entity in linkable_entities }, + path_key_to_linkable_metrics={}, ) def _get_semantic_models_with_joinable_entity( @@ -683,7 +713,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference if defined_granularity.is_smaller_than_or_equal(time_granularity) ) - # For each of the possible time granularities, create a LinkableDimension for each one. + # For each of the possible time granularities, create a LinkableDimension. path_key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list) for time_granularity in possible_metric_time_granularities: possible_date_parts: Sequence[Optional[DatePart]] = ( @@ -729,6 +759,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference for path_key, linkable_dimensions in path_key_to_linkable_dimensions.items() }, path_key_to_linkable_entities={}, + path_key_to_linkable_metrics={}, ) def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> LinkableElementSet: @@ -755,8 +786,8 @@ def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> Linkabl ) single_hop_elements = LinkableElementSet.merge_by_path_key( [ - join_path.create_linkable_element_set( - semantic_model_accessor=self._semantic_model_lookup, + self.create_linkable_element_set_from_join_path( + join_path=join_path, with_properties=frozenset({LinkableElementProperties.JOINED}), ) for join_path in join_paths @@ -765,9 +796,11 @@ def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> Linkabl # Create multi-hop elements. At each iteration, we generate the list of valid elements based on the current join # path, extend all paths to include the next valid semantic model, then repeat. - multi_hop_elements = LinkableElementSet(path_key_to_linkable_dimensions={}, path_key_to_linkable_entities={}) + multi_hop_elements = LinkableElementSet( + path_key_to_linkable_dimensions={}, path_key_to_linkable_entities={}, path_key_to_linkable_metrics={} + ) - for i in range(self._max_entity_links - 1): + for _ in range(self._max_entity_links - 1): new_join_paths: List[SemanticModelJoinPath] = [] for join_path in join_paths: new_join_paths.extend( @@ -782,8 +815,8 @@ def _get_joined_elements(self, measure_semantic_model: SemanticModel) -> Linkabl multi_hop_elements = LinkableElementSet.merge_by_path_key( (multi_hop_elements,) + tuple( - new_join_path.create_linkable_element_set( - semantic_model_accessor=self._semantic_model_lookup, + self.create_linkable_element_set_from_join_path( + join_path=new_join_path, with_properties=frozenset( {LinkableElementProperties.JOINED, LinkableElementProperties.MULTI_HOP} ), @@ -846,8 +879,8 @@ def get_linkable_elements_for_distinct_values_query( def get_linkable_elements_for_metrics( self, metric_references: Sequence[MetricReference], - with_any_of: FrozenSet[LinkableElementProperties], - without_any_of: FrozenSet[LinkableElementProperties], + with_any_of: FrozenSet[LinkableElementProperties] = LinkableElementProperties.all_properties(), + without_any_of: FrozenSet[LinkableElementProperties] = frozenset(), ) -> LinkableElementSet: """Gets the valid linkable elements that are common to all requested metrics.""" linkable_element_sets = [] @@ -911,3 +944,81 @@ def _find_next_possible_paths( new_join_paths.append(new_join_path) return new_join_paths + + def create_linkable_element_set_from_join_path( + self, + join_path: SemanticModelJoinPath, + with_properties: FrozenSet[LinkableElementProperties], + ) -> LinkableElementSet: + """Given the current path, generate the respective linkable elements from the last semantic model in the path.""" + entity_links = tuple(x.join_on_entity for x in join_path.path_elements) + + semantic_model = self._semantic_model_lookup.get_by_reference(join_path.last_semantic_model_reference) + assert semantic_model + + linkable_dimensions: List[LinkableDimension] = [] + linkable_entities: List[LinkableEntity] = [] + linkable_metrics: List[LinkableMetric] = [] + + for dimension in semantic_model.dimensions: + dimension_type = dimension.type + if dimension_type == DimensionType.CATEGORICAL: + linkable_dimensions.append( + LinkableDimension( + semantic_model_origin=semantic_model.reference, + element_name=dimension.reference.element_name, + entity_links=entity_links, + join_path=join_path.path_elements, + properties=with_properties, + time_granularity=None, + date_part=None, + ) + ) + elif dimension_type == DimensionType.TIME: + linkable_dimensions.extend( + _generate_linkable_time_dimensions( + semantic_model_origin=semantic_model.reference, + dimension=dimension, + entity_links=entity_links, + join_path=(), + with_properties=with_properties, + ) + ) + else: + raise RuntimeError(f"Unhandled type: {dimension_type}") + + for entity in semantic_model.entities: + # Avoid creating "booking_id__booking_id" + if entity.reference != entity_links[-1]: + linkable_entities.append( + LinkableEntity( + semantic_model_origin=semantic_model.reference, + element_name=entity.reference.element_name, + entity_links=entity_links, + join_path=join_path.path_elements, + properties=with_properties.union({LinkableElementProperties.ENTITY}), + ) + ) + + linkable_metrics = [ + LinkableMetric( + element_name=metric.element_name, + entity_links=entity_links, + join_path=join_path.path_elements, + join_by_semantic_model=semantic_model.reference, + properties=with_properties.union({LinkableElementProperties.METRIC}), + ) + for metric in self._joinable_metrics_for_semantic_models.get(join_path.last_semantic_model_reference, set()) + ] + + return LinkableElementSet( + path_key_to_linkable_dimensions={ + linkable_dimension.path_key: (linkable_dimension,) for linkable_dimension in linkable_dimensions + }, + path_key_to_linkable_entities={ + linkable_entity.path_key: (linkable_entity,) for linkable_entity in linkable_entities + }, + path_key_to_linkable_metrics={ + linkable_metric.path_key: (linkable_metric,) for linkable_metric in linkable_metrics + }, + ) diff --git a/metricflow/plan_conversion/column_resolver.py b/metricflow/plan_conversion/column_resolver.py index 2b47fc4afa..323bbf2c83 100644 --- a/metricflow/plan_conversion/column_resolver.py +++ b/metricflow/plan_conversion/column_resolver.py @@ -12,6 +12,7 @@ from metricflow.specs.specs import ( DimensionSpec, EntitySpec, + GroupByMetricSpec, InstanceSpec, InstanceSpecVisitor, MeasureSpec, @@ -77,6 +78,15 @@ def visit_entity_spec(self, entity_spec: EntitySpec) -> ColumnAssociation: # no single_column_correlation_key=SingleColumnCorrelationKey(), ) + def visit_group_by_metric_spec(self, group_by_metric_spec: GroupByMetricSpec) -> ColumnAssociation: # noqa: D102 + return ColumnAssociation( + column_name=StructuredLinkableSpecName( + entity_link_names=tuple(x.element_name for x in group_by_metric_spec.entity_links), + element_name=group_by_metric_spec.element_name, + ).qualified_name, + single_column_correlation_key=SingleColumnCorrelationKey(), + ) + def visit_metadata_spec(self, metadata_spec: MetadataSpec) -> ColumnAssociation: # noqa: D102 return ColumnAssociation( column_name=metadata_spec.qualified_name, diff --git a/metricflow/specs/specs.py b/metricflow/specs/specs.py index 3eeca5a3d1..dfa0399674 100644 --- a/metricflow/specs/specs.py +++ b/metricflow/specs/specs.py @@ -77,6 +77,10 @@ def visit_time_dimension_spec(self, time_dimension_spec: TimeDimensionSpec) -> V def visit_entity_spec(self, entity_spec: EntitySpec) -> VisitorOutputT: # noqa: D102 raise NotImplementedError + @abstractmethod + def visit_group_by_metric_spec(self, group_by_metric_spec: GroupByMetricSpec) -> VisitorOutputT: # noqa: D102 + raise NotImplementedError + @abstractmethod def visit_metric_spec(self, metric_spec: MetricSpec) -> VisitorOutputT: # noqa: D102 raise NotImplementedError @@ -236,6 +240,48 @@ def accept(self, visitor: InstanceSpecVisitor[VisitorOutputT]) -> VisitorOutputT return visitor.visit_entity_spec(self) +@dataclass(frozen=True) +class GroupByMetricSpec(LinkableInstanceSpec, SerializableDataclass): + """Metric used in group by or where filter.""" + + @property + def without_first_entity_link(self) -> GroupByMetricSpec: # noqa: D102 + assert len(self.entity_links) > 0, f"Spec does not have any entity links: {self}" + return GroupByMetricSpec(element_name=self.element_name, entity_links=self.entity_links[1:]) + + @property + def without_entity_links(self) -> GroupByMetricSpec: # noqa: D102 + return GroupByMetricSpec(element_name=self.element_name, entity_links=()) + + @staticmethod + def from_name(name: str) -> GroupByMetricSpec: # noqa: D102 + structured_name = StructuredLinkableSpecName.from_name(name) + return GroupByMetricSpec( + entity_links=tuple(EntityReference(idl) for idl in structured_name.entity_link_names), + element_name=structured_name.element_name, + ) + + def __eq__(self, other: Any) -> bool: # type: ignore[misc] # noqa: D105 + if not isinstance(other, GroupByMetricSpec): + return False + return self.element_name == other.element_name and self.entity_links == other.entity_links + + def __hash__(self) -> int: # noqa: D105 + return hash((self.element_name, self.entity_links)) + + @property + def reference(self) -> MetricReference: # noqa: D102 + return MetricReference(element_name=self.element_name) + + @property + @override + def as_spec_set(self) -> InstanceSpecSet: + return InstanceSpecSet(group_by_metric_specs=(self,)) + + def accept(self, visitor: InstanceSpecVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D102 + return visitor.visit_group_by_metric_spec(self) + + @dataclass(frozen=True) class LinklessEntitySpec(EntitySpec, SerializableDataclass): """Similar to EntitySpec, but requires that it doesn't have entity links.""" @@ -656,6 +702,7 @@ class LinkableSpecSet(Mergeable, SerializableDataclass): dimension_specs: Tuple[DimensionSpec, ...] = () time_dimension_specs: Tuple[TimeDimensionSpec, ...] = () entity_specs: Tuple[EntitySpec, ...] = () + group_by_metric_specs: Tuple[GroupByMetricSpec, ...] = () @property def contains_metric_time(self) -> bool: @@ -701,7 +748,11 @@ def metric_time_specs(self) -> Sequence[TimeDimensionSpec]: @property def as_tuple(self) -> Tuple[LinkableInstanceSpec, ...]: # noqa: D102 - return tuple(itertools.chain(self.dimension_specs, self.time_dimension_specs, self.entity_specs)) + return tuple( + itertools.chain( + self.dimension_specs, self.time_dimension_specs, self.entity_specs, self.group_by_metric_specs + ) + ) @override def merge(self, other: LinkableSpecSet) -> LinkableSpecSet: @@ -709,6 +760,7 @@ def merge(self, other: LinkableSpecSet) -> LinkableSpecSet: dimension_specs=self.dimension_specs + other.dimension_specs, time_dimension_specs=self.time_dimension_specs + other.time_dimension_specs, entity_specs=self.entity_specs + other.entity_specs, + group_by_metric_specs=self.group_by_metric_specs + other.group_by_metric_specs, ) @override @@ -731,10 +783,15 @@ def dedupe(self) -> LinkableSpecSet: # noqa: D102 for entity_spec in self.entity_specs: entity_spec_dict[entity_spec] = None + group_by_metric_spec_dict: Dict[GroupByMetricSpec, None] = {} + for group_by_metric in self.group_by_metric_specs: + group_by_metric_spec_dict[group_by_metric] = None + return LinkableSpecSet( dimension_specs=tuple(dimension_spec_dict.keys()), time_dimension_specs=tuple(time_dimension_spec_dict.keys()), entity_specs=tuple(entity_spec_dict.keys()), + group_by_metric_specs=tuple(group_by_metric_spec_dict.keys()), ) def is_subset_of(self, other_set: LinkableSpecSet) -> bool: # noqa: D102 @@ -746,6 +803,7 @@ def as_spec_set(self) -> InstanceSpecSet: # noqa: D102 dimension_specs=self.dimension_specs, time_dimension_specs=self.time_dimension_specs, entity_specs=self.entity_specs, + group_by_metric_specs=self.group_by_metric_specs, ) def difference(self, other: LinkableSpecSet) -> LinkableSpecSet: # noqa: D102 @@ -753,6 +811,7 @@ def difference(self, other: LinkableSpecSet) -> LinkableSpecSet: # noqa: D102 dimension_specs=tuple(set(self.dimension_specs) - set(other.dimension_specs)), time_dimension_specs=tuple(set(self.time_dimension_specs) - set(other.time_dimension_specs)), entity_specs=tuple(set(self.entity_specs) - set(other.entity_specs)), + group_by_metric_specs=tuple(set(self.group_by_metric_specs) - set(other.group_by_metric_specs)), ) def __len__(self) -> int: # noqa: D105 @@ -765,6 +824,7 @@ def from_specs(specs: Sequence[LinkableInstanceSpec]) -> LinkableSpecSet: # noq dimension_specs=instance_spec_set.dimension_specs, time_dimension_specs=instance_spec_set.time_dimension_specs, entity_specs=instance_spec_set.entity_specs, + group_by_metric_specs=instance_spec_set.group_by_metric_specs, ) @@ -776,6 +836,7 @@ class MetricFlowQuerySpec(SerializableDataclass): dimension_specs: Tuple[DimensionSpec, ...] = () entity_specs: Tuple[EntitySpec, ...] = () time_dimension_specs: Tuple[TimeDimensionSpec, ...] = () + group_by_metric_specs: Tuple[GroupByMetricSpec, ...] = () order_by_specs: Tuple[OrderBySpec, ...] = () time_range_constraint: Optional[TimeRangeConstraint] = None limit: Optional[int] = None @@ -789,6 +850,7 @@ def linkable_specs(self) -> LinkableSpecSet: # noqa: D102 dimension_specs=self.dimension_specs, time_dimension_specs=self.time_dimension_specs, entity_specs=self.entity_specs, + group_by_metric_specs=self.group_by_metric_specs, ) def with_time_range_constraint(self, time_range_constraint: Optional[TimeRangeConstraint]) -> MetricFlowQuerySpec: @@ -798,6 +860,7 @@ def with_time_range_constraint(self, time_range_constraint: Optional[TimeRangeCo dimension_specs=self.dimension_specs, entity_specs=self.entity_specs, time_dimension_specs=self.time_dimension_specs, + group_by_metric_specs=self.group_by_metric_specs, order_by_specs=self.order_by_specs, time_range_constraint=time_range_constraint, limit=self.limit, @@ -826,6 +889,7 @@ class InstanceSpecSet(Mergeable, SerializableDataclass): dimension_specs: Tuple[DimensionSpec, ...] = () entity_specs: Tuple[EntitySpec, ...] = () time_dimension_specs: Tuple[TimeDimensionSpec, ...] = () + group_by_metric_specs: Tuple[GroupByMetricSpec, ...] = () metadata_specs: Tuple[MetadataSpec, ...] = () @override @@ -835,6 +899,7 @@ def merge(self, other: InstanceSpecSet) -> InstanceSpecSet: measure_specs=self.measure_specs + other.measure_specs, dimension_specs=self.dimension_specs + other.dimension_specs, entity_specs=self.entity_specs + other.entity_specs, + group_by_metric_specs=self.group_by_metric_specs + other.group_by_metric_specs, time_dimension_specs=self.time_dimension_specs + other.time_dimension_specs, metadata_specs=self.metadata_specs + other.metadata_specs, ) @@ -874,18 +939,28 @@ def dedupe(self) -> InstanceSpecSet: if entity_spec not in entity_specs_deduped: entity_specs_deduped.append(entity_spec) + group_by_metric_specs_deduped = [] + for group_by_metric_spec in self.group_by_metric_specs: + if group_by_metric_spec not in group_by_metric_specs_deduped: + group_by_metric_specs_deduped.append(group_by_metric_spec) + return InstanceSpecSet( metric_specs=tuple(metric_specs_deduped), measure_specs=tuple(measure_specs_deduped), dimension_specs=tuple(dimension_specs_deduped), time_dimension_specs=tuple(time_dimension_specs_deduped), entity_specs=tuple(entity_specs_deduped), + group_by_metric_specs=tuple(group_by_metric_specs_deduped), ) @property def linkable_specs(self) -> Sequence[LinkableInstanceSpec]: """All linkable specs in this set.""" - return list(itertools.chain(self.dimension_specs, self.time_dimension_specs, self.entity_specs)) + return list( + itertools.chain( + self.dimension_specs, self.time_dimension_specs, self.entity_specs, self.group_by_metric_specs + ) + ) @property def all_specs(self) -> Sequence[InstanceSpec]: # noqa: D102 @@ -895,6 +970,7 @@ def all_specs(self) -> Sequence[InstanceSpec]: # noqa: D102 self.dimension_specs, self.time_dimension_specs, self.entity_specs, + self.group_by_metric_specs, self.metric_specs, self.metadata_specs, ) diff --git a/tests/model/semantics/test_linkable_spec_resolver.py b/tests/model/semantics/test_linkable_spec_resolver.py index f02f6948a2..ae8d2eaa36 100644 --- a/tests/model/semantics/test_linkable_spec_resolver.py +++ b/tests/model/semantics/test_linkable_spec_resolver.py @@ -4,11 +4,13 @@ import pytest from _pytest.fixtures import FixtureRequest -from dbt_semantic_interfaces.references import MetricReference +from dbt_semantic_interfaces.references import EntityReference, MetricReference, SemanticModelReference from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow.model.semantics.linkable_element_properties import LinkableElementProperties from metricflow.model.semantics.linkable_spec_resolver import ( + SemanticModelJoinPath, + SemanticModelJoinPathElement, ValidLinkableSpecResolver, ) from metricflow.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS @@ -123,3 +125,49 @@ def test_cyclic_join_manifest( # noqa: D103 without_any_of=frozenset(), ), ) + + +def test_create_linkable_element_set_from_join_path( # noqa: D103 + request: FixtureRequest, + mf_test_configuration: MetricFlowTestConfiguration, + simple_model_spec_resolver: ValidLinkableSpecResolver, +) -> None: + assert_linkable_element_set_snapshot_equal( + request=request, + mf_test_configuration=mf_test_configuration, + set_id="result0", + linkable_element_set=simple_model_spec_resolver.create_linkable_element_set_from_join_path( + join_path=SemanticModelJoinPath( + path_elements=( + SemanticModelJoinPathElement( + semantic_model_reference=SemanticModelReference("listings_latest"), + join_on_entity=EntityReference("listing"), + ), + ) + ), + with_properties=frozenset({LinkableElementProperties.JOINED}), + ), + ) + + +def test_create_linkable_element_set_from_join_path_multi_hop( # noqa: D103 + request: FixtureRequest, + mf_test_configuration: MetricFlowTestConfiguration, + simple_model_spec_resolver: ValidLinkableSpecResolver, +) -> None: + assert_linkable_element_set_snapshot_equal( + request=request, + mf_test_configuration=mf_test_configuration, + set_id="result0", + linkable_element_set=simple_model_spec_resolver.create_linkable_element_set_from_join_path( + join_path=SemanticModelJoinPath( + path_elements=( + SemanticModelJoinPathElement( + semantic_model_reference=SemanticModelReference("listings_latest"), + join_on_entity=EntityReference("listing"), + ), + ) + ), + with_properties=frozenset({LinkableElementProperties.JOINED, LinkableElementProperties.MULTI_HOP}), + ), + ) diff --git a/tests/snapshot_utils.py b/tests/snapshot_utils.py index 38a9a76ca6..47808db968 100644 --- a/tests/snapshot_utils.py +++ b/tests/snapshot_utils.py @@ -373,6 +373,21 @@ def assert_linkable_element_set_snapshot_equal( # noqa: D103 sorted(linkable_element_property.name for linkable_element_property in linkable_entity.properties), ) ) + + for linkable_metric_iterable in linkable_element_set.path_key_to_linkable_metrics.values(): + for linkable_metric in linkable_metric_iterable: + rows.append( + ( + # Checking a limited set of fields as the result is large due to the paths in the object. + linkable_metric.join_by_semantic_model.semantic_model_name, + tuple(entity_link.element_name for entity_link in linkable_entity.entity_links), + linkable_metric.element_name, + "", + "", + sorted(linkable_element_property.name for linkable_element_property in linkable_metric.properties), + ) + ) + assert_str_snapshot_equal( request=request, mf_test_configuration=mf_test_configuration, diff --git a/tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path__result0.txt b/tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path__result0.txt new file mode 100644 index 0000000000..d4e89f8eb8 --- /dev/null +++ b/tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path__result0.txt @@ -0,0 +1,108 @@ +Semantic Model Entity Links Name Time Granularity Date Part Properties +---------------- -------------- -------------------------------------------------------- ------------------ ----------- -------------------------------------- +listings_latest ('listing',) approximate_continuous_booking_value_p99 ['JOINED', 'METRIC'] +listings_latest ('listing',) approximate_discrete_booking_value_p99 ['JOINED', 'METRIC'] +listings_latest ('listing',) average_booking_value ['JOINED', 'METRIC'] +listings_latest ('listing',) average_instant_booking_value ['JOINED', 'METRIC'] +listings_latest ('listing',) bookers ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_fees ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_fees_last_week_per_booker_this_week ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_fees_per_booker ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_fees_since_start_of_month ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_payments ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_value ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_value_for_non_null_listing_id ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_value_p99 ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_value_per_view ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_value_sub_instant ['JOINED', 'METRIC'] +listings_latest ('listing',) booking_value_sub_instant_add_10 ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_5_day_lag ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_at_start_of_month ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_fill_nulls_with_0 ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_growth_2_weeks ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_growth_2_weeks_fill_nulls_with_0 ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_growth_2_weeks_fill_nulls_with_0_for_non_offset ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_growth_since_start_of_month ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_join_to_time_spine ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_month_start_compared_to_1_month_prior ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_offset_once ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_offset_twice ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_per_booker ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_per_dollar ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_per_listing ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_per_lux_listing_derived ['JOINED', 'METRIC'] +listings_latest ('listing',) bookings_per_view ['JOINED', 'METRIC'] +listings_latest ('listing',) capacity_latest ['JOINED'] +listings_latest ('listing',) country_latest ['JOINED'] +listings_latest ('listing',) created_at DAY ['JOINED'] +listings_latest ('listing',) created_at DAY DAY ['JOINED'] +listings_latest ('listing',) created_at DAY DOW ['JOINED'] +listings_latest ('listing',) created_at DAY DOY ['JOINED'] +listings_latest ('listing',) created_at DAY MONTH ['JOINED'] +listings_latest ('listing',) created_at DAY QUARTER ['JOINED'] +listings_latest ('listing',) created_at DAY YEAR ['JOINED'] +listings_latest ('listing',) created_at MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at MONTH MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at MONTH QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at MONTH YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at QUARTER QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at QUARTER YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at WEEK ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at WEEK MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at WEEK QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at WEEK YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) created_at YEAR YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) derived_bookings_0 ['JOINED', 'METRIC'] +listings_latest ('listing',) derived_bookings_1 ['JOINED', 'METRIC'] +listings_latest ('listing',) discrete_booking_value_p99 ['JOINED', 'METRIC'] +listings_latest ('listing',) double_counted_delayed_bookings ['JOINED', 'METRIC'] +listings_latest ('listing',) ds DAY ['JOINED'] +listings_latest ('listing',) ds DAY DAY ['JOINED'] +listings_latest ('listing',) ds DAY DOW ['JOINED'] +listings_latest ('listing',) ds DAY DOY ['JOINED'] +listings_latest ('listing',) ds DAY MONTH ['JOINED'] +listings_latest ('listing',) ds DAY QUARTER ['JOINED'] +listings_latest ('listing',) ds DAY YEAR ['JOINED'] +listings_latest ('listing',) ds MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds MONTH MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds MONTH QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds MONTH YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds QUARTER QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds QUARTER YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds WEEK ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds WEEK MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds WEEK QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds WEEK YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) ds YEAR YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED'] +listings_latest ('listing',) every_2_days_bookers_2_days_ago ['JOINED', 'METRIC'] +listings_latest ('listing',) every_two_days_bookers ['JOINED', 'METRIC'] +listings_latest ('listing',) every_two_days_bookers_fill_nulls_with_0 ['JOINED', 'METRIC'] +listings_latest ('listing',) instant_booking_fraction_of_max_value ['JOINED', 'METRIC'] +listings_latest ('listing',) instant_booking_value ['JOINED', 'METRIC'] +listings_latest ('listing',) instant_booking_value_ratio ['JOINED', 'METRIC'] +listings_latest ('listing',) instant_bookings ['JOINED', 'METRIC'] +listings_latest ('listing',) instant_lux_booking_value_rate ['JOINED', 'METRIC'] +listings_latest ('listing',) instant_plus_non_referred_bookings_pct ['JOINED', 'METRIC'] +listings_latest ('listing',) is_lux_latest ['JOINED'] +listings_latest ('listing',) largest_listing ['JOINED', 'METRIC'] +listings_latest ('listing',) listings ['JOINED', 'METRIC'] +listings_latest ('listing',) lux_booking_fraction_of_max_value ['JOINED', 'METRIC'] +listings_latest ('listing',) lux_booking_value_rate_expr ['JOINED', 'METRIC'] +listings_latest ('listing',) lux_listings ['JOINED', 'METRIC'] +listings_latest ('listing',) max_booking_value ['JOINED', 'METRIC'] +listings_latest ('listing',) median_booking_value ['JOINED', 'METRIC'] +listings_latest ('listing',) min_booking_value ['JOINED', 'METRIC'] +listings_latest ('listing',) nested_fill_nulls_without_time_spine ['JOINED', 'METRIC'] +listings_latest ('listing',) non_referred_bookings_pct ['JOINED', 'METRIC'] +listings_latest ('listing',) referred_bookings ['JOINED', 'METRIC'] +listings_latest ('listing',) smallest_listing ['JOINED', 'METRIC'] +listings_latest ('listing',) twice_bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC'] +listings_latest ('listing',) user ['ENTITY', 'JOINED'] +listings_latest ('listing',) views ['JOINED', 'METRIC'] +listings_latest ('listing',) views_times_booking_value ['JOINED', 'METRIC'] diff --git a/tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path_multi_hop__result0.txt b/tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path_multi_hop__result0.txt new file mode 100644 index 0000000000..27b3ed75ee --- /dev/null +++ b/tests/snapshots/test_linkable_spec_resolver.py/str/test_create_linkable_element_set_from_join_path_multi_hop__result0.txt @@ -0,0 +1,108 @@ +Semantic Model Entity Links Name Time Granularity Date Part Properties +---------------- -------------- -------------------------------------------------------- ------------------ ----------- --------------------------------------------------- +listings_latest ('listing',) approximate_continuous_booking_value_p99 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) approximate_discrete_booking_value_p99 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) average_booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) average_instant_booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookers ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_fees ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_fees_last_week_per_booker_this_week ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_fees_per_booker ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_fees_since_start_of_month ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_payments ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_value_for_non_null_listing_id ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_value_p99 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_value_per_view ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_value_sub_instant ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) booking_value_sub_instant_add_10 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_5_day_lag ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_at_start_of_month ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_fill_nulls_with_0 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_growth_2_weeks ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_growth_2_weeks_fill_nulls_with_0 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_growth_2_weeks_fill_nulls_with_0_for_non_offset ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_growth_since_start_of_month ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_join_to_time_spine ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_month_start_compared_to_1_month_prior ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_offset_once ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_offset_twice ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_per_booker ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_per_dollar ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_per_listing ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_per_lux_listing_derived ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) bookings_per_view ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) capacity_latest ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) country_latest ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at DAY ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at DAY DAY ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at DAY DOW ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at DAY DOY ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at DAY MONTH ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at DAY QUARTER ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at DAY YEAR ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at MONTH MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at MONTH QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at MONTH YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at QUARTER QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at QUARTER YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at WEEK ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at WEEK MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at WEEK QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at WEEK YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) created_at YEAR YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) derived_bookings_0 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) derived_bookings_1 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) discrete_booking_value_p99 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) double_counted_delayed_bookings ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) ds DAY ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds DAY DAY ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds DAY DOW ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds DAY DOY ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds DAY MONTH ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds DAY QUARTER ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds DAY YEAR ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds MONTH MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds MONTH QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds MONTH YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds QUARTER QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds QUARTER YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds WEEK ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds WEEK MONTH ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds WEEK QUARTER ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds WEEK YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) ds YEAR YEAR ['DERIVED_TIME_GRANULARITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) every_2_days_bookers_2_days_ago ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) every_two_days_bookers ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) every_two_days_bookers_fill_nulls_with_0 ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) instant_booking_fraction_of_max_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) instant_booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) instant_booking_value_ratio ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) instant_bookings ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) instant_lux_booking_value_rate ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) instant_plus_non_referred_bookings_pct ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) is_lux_latest ['JOINED', 'MULTI_HOP'] +listings_latest ('listing',) largest_listing ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) listings ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) lux_booking_fraction_of_max_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) lux_booking_value_rate_expr ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) lux_listings ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) max_booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) median_booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) min_booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) nested_fill_nulls_without_time_spine ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) non_referred_bookings_pct ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) referred_bookings ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) smallest_listing ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) twice_bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) user ['ENTITY', 'JOINED', 'MULTI_HOP'] +listings_latest ('listing',) views ['JOINED', 'METRIC', 'MULTI_HOP'] +listings_latest ('listing',) views_times_booking_value ['JOINED', 'METRIC', 'MULTI_HOP'] diff --git a/tests/test_specs.py b/tests/test_specs.py index e43463f505..da922e61c9 100644 --- a/tests/test_specs.py +++ b/tests/test_specs.py @@ -9,6 +9,7 @@ DimensionSpec, EntityReference, EntitySpec, + GroupByMetricSpec, InstanceSpec, InstanceSpecSet, LinkableInstanceSpec, @@ -140,6 +141,12 @@ def spec_set() -> InstanceSpecSet: # noqa: D103 entity_links=(EntityReference(element_name="listing_id"),), ), ), + group_by_metric_specs=( + GroupByMetricSpec( + element_name="bookings", + entity_links=(EntityReference(element_name="listing_id"),), + ), + ), ) @@ -155,6 +162,10 @@ def test_spec_set_linkable_specs(spec_set: InstanceSpecSet) -> None: # noqa: D1 element_name="user_id", entity_links=(EntityReference(element_name="listing_id"),), ), + GroupByMetricSpec( + element_name="bookings", + entity_links=(EntityReference(element_name="listing_id"),), + ), } @@ -174,6 +185,10 @@ def test_spec_set_all_specs(spec_set: InstanceSpecSet) -> None: # noqa: D103 element_name="user_id", entity_links=(EntityReference(element_name="listing_id"),), ), + GroupByMetricSpec( + element_name="bookings", + entity_links=(EntityReference(element_name="listing_id"),), + ), }