From 8ffb5f05d80e7ba86ca8cb5d121a31b5e3cbd754 Mon Sep 17 00:00:00 2001 From: tlento Date: Thu, 18 Apr 2024 22:38:27 -0700 Subject: [PATCH 1/3] Remove inline comment clutter from LinkableElementSet There are some comments that are a bit outdated and confusing, or at the very least not terribly useful, in the first sections of the LinkableElementSet, so we remove them. This was found in a modification to the LinkableDimension class, as the longer comment makes a reference to it. --- metricflow/model/semantics/linkable_element_set.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/metricflow/model/semantics/linkable_element_set.py b/metricflow/model/semantics/linkable_element_set.py index e36dda52f7..444838a4d4 100644 --- a/metricflow/model/semantics/linkable_element_set.py +++ b/metricflow/model/semantics/linkable_element_set.py @@ -21,17 +21,6 @@ class LinkableElementSet: TODO: There are similarities with LinkableSpecSet - consider consolidation. """ - # Dictionaries that map the path key to context on the dimension - # - # For example: - # { - # "listing__country_latest": ( - # LinkableDimension( - # element_name="country_latest", - # entity_links=("listing",), - # semantic_model_origin="listings_latest_source", - # ) - # } path_key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = field(default_factory=dict) path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = field(default_factory=dict) path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = field(default_factory=dict) @@ -54,7 +43,6 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): key_to_linkable_metrics[path_key].extend(linkable_metrics) - # Convert the dictionaries to use tuples instead of lists. return LinkableElementSet( path_key_to_linkable_dimensions={ path_key: tuple(dimensions) for path_key, dimensions in key_to_linkable_dimensions.items() From b1f728be5a775cea2c2bcc0108d8df8ddec0bc10 Mon Sep 17 00:00:00 2001 From: tlento Date: Thu, 18 Apr 2024 22:48:34 -0700 Subject: [PATCH 2/3] Add LinkableElementType annotation to ElementPathKey Currently the ElementPathKey is agnostic to the type of element it represents. In some cases this will be fairly obvious - if the time granularity is set it is a time dimension. In others we can determine this by inspection - if the name and links resolve to a distinct dimension in a semantic model then it must be a dimension, and similarly for an entity. However, there is the possibility of some ambiguity in rare cases where an entity and dimension might end up with the same identifier/link combinations. This doesn't matter in the current usage of the ElementPathKey, which is always bound to an element subtype, but when we add support for predicate pushdown we'll need to be able to use this key to fetch the specific set of LinkableElement classes, and this means we must be able to easily get a determinstic type. This commit adds the elmeent type annotation we require, and does some additional related work to ensure that time dimensions and dimensions are always classified correctly when the LinkableDimension instance is created. --- .../model/semantics/linkable_element.py | 57 ++++++++++++++++++- .../model/semantics/linkable_element_set.py | 5 +- .../model/semantics/linkable_spec_resolver.py | 13 ++++- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/metricflow/model/semantics/linkable_element.py b/metricflow/model/semantics/linkable_element.py index 650748685e..9f95b0cdd9 100644 --- a/metricflow/model/semantics/linkable_element.py +++ b/metricflow/model/semantics/linkable_element.py @@ -4,6 +4,8 @@ from enum import Enum from typing import FrozenSet, Optional, Tuple +from dbt_semantic_interfaces.enum_extension import assert_values_exhausted +from dbt_semantic_interfaces.protocols.dimension import DimensionType from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference from dbt_semantic_interfaces.type_enums.date_part import DatePart from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity @@ -11,6 +13,33 @@ from metricflow.specs.specs import EntityReference +class LinkableElementType(Enum): + """Enumeration of the possible types of linkable element we are encountering or expecting. + + LinkableElements effectively map on to LinkableSpecs and queryable semantic manifest elements such + as Metrics, Dimensions, and Entities. This provides the full set of types we might encounter, and is + useful for ensuring that we are always getting the correct LinkableElement from a given part of the + codebase - e.g., to ensure we are not accidentally getting an Entity when we expect a Dimension. + """ + + DIMENSION = "dimension" + ENTITY = "entity" + METRIC = "metric" + TIME_DIMENSION = "time_dimension" + + @property + def is_dimension_type(self) -> bool: + """Property to simplify scenarios where callers need to know whether or not this represents a dimension.""" + # Use a local alias to allow type refinement for the static exhaustive switch assertion + element_type = self + if element_type is LinkableElementType.DIMENSION or element_type is LinkableElementType.TIME_DIMENSION: + return True + elif element_type is LinkableElementType.ENTITY or element_type is LinkableElementType.METRIC: + return False + else: + return assert_values_exhausted(element_type) + + class LinkableElementProperty(Enum): """The properties associated with a valid linkable element. @@ -55,10 +84,23 @@ class ElementPathKey: """A key that can uniquely identify an element and the joins used to realize the element.""" element_name: str + element_type: LinkableElementType entity_links: Tuple[EntityReference, ...] time_granularity: Optional[TimeGranularity] = None date_part: Optional[DatePart] = None + def __post_init__(self) -> None: + """Asserts all requirements associated with the element_type are met.""" + element_type = self.element_type + if element_type is LinkableElementType.TIME_DIMENSION: + assert ( + self.time_granularity + ), "Time granularity must be specified for all ElementPathKeys associated with time dimensions!" + elif element_type is LinkableElementType.DIMENSION or LinkableElementType.ENTITY or LinkableElementType.METRIC: + pass + else: + assert_values_exhausted(element_type) + @dataclass(frozen=True) class SemanticModelJoinPathElement: @@ -75,6 +117,7 @@ class LinkableDimension: # The semantic model where this dimension was defined. semantic_model_origin: Optional[SemanticModelReference] element_name: str + dimension_type: DimensionType entity_links: Tuple[EntityReference, ...] join_path: Tuple[SemanticModelJoinPathElement, ...] properties: FrozenSet[LinkableElementProperty] @@ -83,8 +126,14 @@ class LinkableDimension: @property def path_key(self) -> ElementPathKey: # noqa: D102 + if self.dimension_type is DimensionType.CATEGORICAL: + element_type = LinkableElementType.DIMENSION + else: + element_type = LinkableElementType.TIME_DIMENSION + return ElementPathKey( element_name=self.element_name, + element_type=element_type, entity_links=self.entity_links, time_granularity=self.time_granularity, date_part=self.date_part, @@ -108,7 +157,9 @@ class LinkableEntity: @property def path_key(self) -> ElementPathKey: # noqa: D102 - return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) + return ElementPathKey( + element_name=self.element_name, element_type=LinkableElementType.ENTITY, entity_links=self.entity_links + ) @property def reference(self) -> EntityReference: # noqa: D102 @@ -128,7 +179,9 @@ class LinkableMetric: @property def path_key(self) -> ElementPathKey: # noqa: D102 - return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) + return ElementPathKey( + element_name=self.element_name, element_type=LinkableElementType.METRIC, entity_links=self.entity_links + ) @property def reference(self) -> MetricReference: # noqa: D102 diff --git a/metricflow/model/semantics/linkable_element_set.py b/metricflow/model/semantics/linkable_element_set.py index 444838a4d4..9d42fdb1d2 100644 --- a/metricflow/model/semantics/linkable_element_set.py +++ b/metricflow/model/semantics/linkable_element_set.py @@ -8,6 +8,7 @@ ElementPathKey, LinkableDimension, LinkableElementProperty, + LinkableElementType, LinkableEntity, LinkableMetric, ) @@ -198,7 +199,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102 entity_links=path_key.entity_links, ) for path_key in self.path_key_to_linkable_dimensions.keys() - if not path_key.time_granularity + if path_key.element_type is LinkableElementType.DIMENSION ), time_dimension_specs=tuple( TimeDimensionSpec( @@ -208,7 +209,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102 date_part=path_key.date_part, ) for path_key in self.path_key_to_linkable_dimensions.keys() - if path_key.time_granularity + if path_key.element_type is LinkableElementType.TIME_DIMENSION and path_key.time_granularity ), entity_specs=tuple( EntitySpec( diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index 767990fa8c..85429eb0d5 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -26,6 +26,7 @@ ElementPathKey, LinkableDimension, LinkableElementProperty, + LinkableElementType, LinkableEntity, LinkableMetric, SemanticModelJoinPath, @@ -69,6 +70,7 @@ def _generate_linkable_time_dimensions( LinkableDimension( semantic_model_origin=semantic_model_origin, element_name=dimension.reference.element_name, + dimension_type=DimensionType.TIME, entity_links=entity_links, join_path=tuple(join_path), time_granularity=time_granularity, @@ -84,6 +86,7 @@ def _generate_linkable_time_dimensions( LinkableDimension( semantic_model_origin=semantic_model_origin, element_name=dimension.reference.element_name, + dimension_type=DimensionType.TIME, entity_links=entity_links, join_path=tuple(join_path), time_granularity=time_granularity, @@ -193,7 +196,11 @@ def __init__( for entity in semantic_model.entities: linkable_metrics_set = LinkableElementSet( path_key_to_linkable_metrics={ - ElementPathKey(element_name=metric.element_name, entity_links=(entity.reference,)): ( + ElementPathKey( + element_name=metric.element_name, + element_type=LinkableElementType.METRIC, + entity_links=(entity.reference,), + ): ( LinkableMetric( element_name=metric.element_name, entity_links=(entity.reference,), @@ -293,6 +300,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link LinkableDimension( semantic_model_origin=semantic_model.reference, element_name=dimension.reference.element_name, + dimension_type=DimensionType.CATEGORICAL, entity_links=(entity_link,), join_path=(), properties=dimension_properties, @@ -405,6 +413,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference for date_part in possible_date_parts: path_key = ElementPathKey( element_name=DataSet.metric_time_dimension_name(), + element_type=LinkableElementType.TIME_DIMENSION, entity_links=(), time_granularity=time_granularity, date_part=date_part, @@ -413,6 +422,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference LinkableDimension( semantic_model_origin=measure_semantic_model.reference if measure_semantic_model else None, element_name=DataSet.metric_time_dimension_name(), + dimension_type=DimensionType.TIME, entity_links=(), join_path=(), # Anything that's not at the base time granularity of the measure's aggregation time dimension @@ -642,6 +652,7 @@ def create_linkable_element_set_from_join_path( LinkableDimension( semantic_model_origin=semantic_model.reference, element_name=dimension.reference.element_name, + dimension_type=DimensionType.CATEGORICAL, entity_links=entity_links, join_path=join_path.path_elements, properties=with_properties, From 9ff74651853f087381e4bc05f58b711bdd592317 Mon Sep 17 00:00:00 2001 From: tlento Date: Tue, 23 Apr 2024 11:01:47 -0700 Subject: [PATCH 3/3] Add tests for LinkableElementSet helper methods The LinkableElementSet has some helper methods to assist with certain gnarly operations, but these are largely untested. Since tests are the best documentation of behavior, we add some here. This commit also includes some updates to documentation to clarify behavior under certain common scenarios. --- .../model/semantics/linkable_element_set.py | 10 +- .../semantics/test_linkable_element_set.py | 477 ++++++++++++++++++ 2 files changed, 486 insertions(+), 1 deletion(-) create mode 100644 tests/model/semantics/test_linkable_element_set.py diff --git a/metricflow/model/semantics/linkable_element_set.py b/metricflow/model/semantics/linkable_element_set.py index 9d42fdb1d2..424a02d5c4 100644 --- a/metricflow/model/semantics/linkable_element_set.py +++ b/metricflow/model/semantics/linkable_element_set.py @@ -30,7 +30,10 @@ class LinkableElementSet: def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: """Combine multiple sets together by the path key. - If there are elements with the same join key, those elements will be categorized as ambiguous. + If there are elements with the same join key and different element(s) in the tuple of values, + those elements will be categorized as ambiguous. + Note this does not deduplicate values, so there may be unambiguous merged sets that appear to have + multiple values if all one does is a simple length check. """ key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list) key_to_linkable_entities: Dict[ElementPathKey, List[LinkableEntity]] = defaultdict(list) @@ -60,6 +63,11 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: """Find the intersection of all elements in the sets by path key. + This will return the intersection of all path keys defined in the sets, but the union of elements associated + with each path key. In other words, it filters out path keys (i.e., linkable specs) that are not referenced + in every set in the input sequence, but it preserves all of the various potentially ambiguous LinkableElement + instances associated with the path keys that remain. + This is useful to figure out the common dimensions that are possible to query with multiple metrics. You would find the LinkableSpecSet for each metric in the query, then do an intersection of the sets. """ diff --git a/tests/model/semantics/test_linkable_element_set.py b/tests/model/semantics/test_linkable_element_set.py new file mode 100644 index 0000000000..a4842b5de4 --- /dev/null +++ b/tests/model/semantics/test_linkable_element_set.py @@ -0,0 +1,477 @@ +"""Module for testing linkable element set operations. + +Note this module departs from our typical approach of defining a bajillion fixtures and wiring them into functions +because the base object types involved here are highly specific to the assertions we want to make in these tests. +Rather than making function calls, we simply initialize these things at module scope. This opens us up to possible +output divergence if someone updates one of these things by reference inside the LinkableElementSet, but since we +are not supposed to be doing that anyway that's actually a reasonably handy feature. +""" + +from __future__ import annotations + +import itertools + +from dbt_semantic_interfaces.protocols.dimension import DimensionType +from dbt_semantic_interfaces.references import ( + DimensionReference, + EntityReference, + MetricReference, + SemanticModelReference, + TimeDimensionReference, +) +from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity +from more_itertools import bucket + +from metricflow.model.semantics.linkable_element import ( + LinkableDimension, + LinkableElementProperty, + LinkableEntity, + LinkableMetric, + SemanticModelJoinPathElement, +) +from metricflow.model.semantics.linkable_element_set import LinkableElementSet + +AMBIGUOUS_NAME = "ambiguous" +# Common references +_base_semantic_model = SemanticModelReference(semantic_model_name="base_semantic_model") +_secondary_semantic_model = SemanticModelReference(semantic_model_name="secondary_semantic_model") +_base_entity_reference = EntityReference(element_name="base_entity") +_base_dimension_reference = DimensionReference(element_name="base_dimension") +_time_dimension_reference = TimeDimensionReference(element_name="time_dimension") +_base_metric_reference = MetricReference(element_name="base_metric") + + +# Entities +_base_entity = LinkableEntity( + element_name=_base_entity_reference.element_name, + semantic_model_origin=_base_semantic_model, + entity_links=(), + join_path=(), + properties=frozenset([LinkableElementProperty.ENTITY]), +) +_ambiguous_entity = LinkableEntity( + element_name=AMBIGUOUS_NAME, + semantic_model_origin=_base_semantic_model, + entity_links=(_base_entity_reference,), + join_path=(), + properties=frozenset([LinkableElementProperty.ENTITY, LinkableElementProperty.LOCAL_LINKED]), +) +# For testing deduplication on entities +_ambiguous_entity_with_join_path = LinkableEntity( + element_name=AMBIGUOUS_NAME, + semantic_model_origin=_base_semantic_model, + entity_links=(_base_entity_reference,), + join_path=( + SemanticModelJoinPathElement( + semantic_model_reference=_secondary_semantic_model, + join_on_entity=EntityReference(element_name="external_entity"), + ), + ), + properties=frozenset([LinkableElementProperty.ENTITY, LinkableElementProperty.JOINED]), +) + +# Dimensions +_categorical_dimension = LinkableDimension( + element_name=_base_dimension_reference.element_name, + entity_links=(_base_entity_reference,), + dimension_type=DimensionType.CATEGORICAL, + semantic_model_origin=_base_semantic_model, + join_path=(), + properties=frozenset([LinkableElementProperty.LOCAL_LINKED]), + time_granularity=None, + date_part=None, +) +_time_dimension = LinkableDimension( + element_name=_time_dimension_reference.element_name, + entity_links=(_base_entity_reference,), + dimension_type=DimensionType.TIME, + semantic_model_origin=_base_semantic_model, + join_path=(), + properties=frozenset([LinkableElementProperty.LOCAL_LINKED]), + time_granularity=TimeGranularity.DAY, + date_part=None, +) +# Resolves to the same local linked name name as _ambiguous_entity +_ambiguous_categorical_dimension = LinkableDimension( + element_name=AMBIGUOUS_NAME, + entity_links=(_base_entity_reference,), + dimension_type=DimensionType.CATEGORICAL, + semantic_model_origin=_secondary_semantic_model, + join_path=(), + properties=frozenset([LinkableElementProperty.LOCAL_LINKED]), + time_granularity=None, + date_part=None, +) +# The opposite direction of the join tfor ambiguous_entity_with_join_path +# For testing deduplication on dimensions +_ambiguous_categorical_dimension_with_join_path = LinkableDimension( + element_name=AMBIGUOUS_NAME, + entity_links=(_base_entity_reference,), + dimension_type=DimensionType.CATEGORICAL, + semantic_model_origin=_secondary_semantic_model, + join_path=( + SemanticModelJoinPathElement( + semantic_model_reference=_base_semantic_model, join_on_entity=_base_entity_reference + ), + ), + properties=frozenset([LinkableElementProperty.JOINED]), + time_granularity=None, + date_part=None, +) + +# Metrics +_base_metric = LinkableMetric( + element_name=_base_metric_reference.element_name, + join_by_semantic_model=_base_semantic_model, + entity_links=(_base_entity_reference,), + properties=frozenset([LinkableElementProperty.METRIC]), + join_path=(), +) +_ambiguous_metric = LinkableMetric( + element_name=AMBIGUOUS_NAME, + join_by_semantic_model=_secondary_semantic_model, + entity_links=(_base_entity_reference,), + properties=frozenset([LinkableElementProperty.METRIC]), + join_path=(), +) +# For testing deduplication on metrics +_ambiguous_metric_with_join_path = LinkableMetric( + element_name=AMBIGUOUS_NAME, + join_by_semantic_model=_secondary_semantic_model, + entity_links=(_base_entity_reference,), + properties=frozenset([LinkableElementProperty.METRIC, LinkableElementProperty.JOINED]), + join_path=( + SemanticModelJoinPathElement( + semantic_model_reference=_base_semantic_model, join_on_entity=_base_entity_reference + ), + ), +) + + +def _linkable_set_with_uniques_and_duplicates() -> LinkableElementSet: + """Helper to create a LinkableElementSet including unique and ambiguous items. + + The ambiguous elements will all resolve to the same ElementPathKey. + + For distinct items we'll see entries like: + + {_categorical_dimension.path_key: (_categorical_dimension,)} + + For ambiguous items we'll see entries like: + + {_ambiguous_entity.path_key: (_ambiguous_entity, _ambiguous_entity_with_join_path)} + + This also includes a cross-type ambiguity, where one dimension has the same name and entity link set as one of + the entities. These will NOT resolve to the same ElementPathKey, because ElementPathKey incorporates elment type. + """ + dimensions = bucket( + ( + _categorical_dimension, + _time_dimension, + _ambiguous_categorical_dimension, + _ambiguous_categorical_dimension_with_join_path, + ), + lambda x: x.path_key, + ) + entities = bucket((_base_entity, _ambiguous_entity, _ambiguous_entity_with_join_path), lambda x: x.path_key) + metrics = bucket((_base_metric, _ambiguous_metric, _ambiguous_metric_with_join_path), lambda x: x.path_key) + + return LinkableElementSet( + path_key_to_linkable_dimensions={path_key: tuple(dimensions[path_key]) for path_key in list(dimensions)}, + path_key_to_linkable_entities={path_key: tuple(entities[path_key]) for path_key in list(entities)}, + path_key_to_linkable_metrics={path_key: tuple(metrics[path_key]) for path_key in list(metrics)}, + ) + + +def test_filter_with_any_of() -> None: + """Tests behavior of filter method with a `with_any_of` specified.""" + filter_properties = frozenset([LinkableElementProperty.JOINED, LinkableElementProperty.ENTITY]) + linkable_set = _linkable_set_with_uniques_and_duplicates() + + filtered_set = linkable_set.filter(with_any_of=filter_properties) + + filtered_dimensions = [ + dim for dim in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_dimensions.values()) + ] + assert all([LinkableElementProperty.JOINED in dim.properties for dim in filtered_dimensions]), ( + f"Found a filtered dimension that did not match the applied filter properties! " + f"Filter properties: {filter_properties}, dimensions: {filtered_dimensions}" + ) + + filtered_metrics = [ + metric for metric in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_metrics.values()) + ] + assert all([LinkableElementProperty.JOINED in metric.properties for metric in filtered_metrics]), ( + f"Found a filtered metric that did not match the applied filter properties! " + f"Filter properties: {filter_properties}, metrics: {filtered_metrics}" + ) + + # These should be untouched so we do a direct comparison in the assertion + filtered_entity_keys = set(filtered_set.path_key_to_linkable_entities.keys()) + original_entity_keys = set(linkable_set.path_key_to_linkable_entities.keys()) + assert filtered_entity_keys == original_entity_keys, ( + f"Found a filter applied to entities despite the filter spec including all elements with the ENTITY property! " + f"Filter properties: {filter_properties}, entities: {linkable_set.path_key_to_linkable_entities}, " + f"filtered_entities: {filtered_set.path_key_to_linkable_entities}" + ) + + +def test_filter_without_any_of() -> None: + """Tests behavior of filter method with a `without_any_of` specified. + + Note the filter conflict - the end result should exclude all metrics. + """ + with_any_of_properties = frozenset( + [LinkableElementProperty.JOINED, LinkableElementProperty.LOCAL_LINKED, LinkableElementProperty.METRIC] + ) + without_any_of_properties = frozenset([LinkableElementProperty.ENTITY, LinkableElementProperty.METRIC]) + linkable_set = _linkable_set_with_uniques_and_duplicates() + + filtered_set = linkable_set.filter(with_any_of=with_any_of_properties, without_any_of=without_any_of_properties) + + filtered_dimensions = [ + dim for dim in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_dimensions.values()) + ] + assert all( + [ + LinkableElementProperty.JOINED in dim.properties or LinkableElementProperty.LOCAL_LINKED in dim.properties + for dim in filtered_dimensions + ] + ), ( + f"Found a filtered dimension that did not match the applied filter properties! " + f"Included properties: {with_any_of_properties}, excluded properties: {without_any_of_properties}, " + f"dimensions: {filtered_dimensions}" + ) + assert len(filtered_set.path_key_to_linkable_metrics) == 0, ( + f"Found at least one metric that passed a filter which should have excluded all metric properties! " + f"Filter: {without_any_of_properties}. Metrics: {filtered_set.path_key_to_linkable_metrics}" + ) + assert len(filtered_set.path_key_to_linkable_entities) == 0, ( + f"Found at least one entity that passed a filter which should have excluded all entity properties! " + f"Filter: {without_any_of_properties}. Entities: {filtered_set.path_key_to_linkable_entities}" + ) + + +def test_filter_without_all_of() -> None: + """Tests behavior of filter method with a `without_all_of` specified. + + Note the filter overlap. The end result should include metrics, but not if they have JOINED. + """ + with_any_of_properties = frozenset( + [LinkableElementProperty.JOINED, LinkableElementProperty.LOCAL_LINKED, LinkableElementProperty.METRIC] + ) + without_all_of_properties = frozenset([LinkableElementProperty.JOINED, LinkableElementProperty.METRIC]) + linkable_set = _linkable_set_with_uniques_and_duplicates() + + filtered_set = linkable_set.filter(with_any_of=with_any_of_properties, without_all_of=without_all_of_properties) + + filtered_entities = [ + entity for entity in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_entities.values()) + ] + assert any(LinkableElementProperty.JOINED in entity.properties for entity in filtered_entities), ( + f"At least one entity with a JOINED property was expected in the filtered output. " + f"Filter with_any_of: {with_any_of_properties}, filter without_all_of: {without_all_of_properties}. " + f"Entities: {linkable_set.path_key_to_linkable_entities.values()}. Filtered Entities: {filtered_entities}" + ) + filtered_metrics = [ + metric for metric in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_metrics.values()) + ] + assert len(filtered_metrics) > 0, ( + f"At least one metric without a JOINED property was expected in the filtered output. " + f"Filter with_any_of: {with_any_of_properties}, filter without_all_of: {without_all_of_properties}. " + f"Metrics: {linkable_set.path_key_to_linkable_metrics.values()}. Filtered Metrics: {filtered_metrics}" + ) + assert all([LinkableElementProperty.JOINED not in metric.properties for metric in filtered_metrics]), ( + f"Found a filtered metric that did not match the applied filter properties! " + f"Filter properties: {without_all_of_properties}, metrics: {filtered_metrics}" + ) + + +def test_intersection_by_path_key() -> None: + """Tests basic intersection operations between LinkableElementSet instances. + + The intersection behavior for the metric type, in particular, illustrates how the base class handles the case + where the path key exists across all sets, but the values associated with it diverge. We expect the union + of all input values in this case, since the intersection is by path key not linkable entity. + """ + final_entities = { + _base_entity.path_key: (_base_entity,), + _ambiguous_entity.path_key: (_ambiguous_entity, _ambiguous_entity_with_join_path), + } + + linkable_set = _linkable_set_with_uniques_and_duplicates() + intermediate_set = LinkableElementSet( + path_key_to_linkable_dimensions={ + _categorical_dimension.path_key: (_categorical_dimension,), + _time_dimension.path_key: (_time_dimension,), + }, + path_key_to_linkable_entities=final_entities, + path_key_to_linkable_metrics={ + _base_metric.path_key: (_base_metric,), + _ambiguous_metric.path_key: (_ambiguous_metric_with_join_path,), + }, + ) + final_set = LinkableElementSet( + path_key_to_linkable_dimensions={_categorical_dimension.path_key: (_categorical_dimension,)}, + path_key_to_linkable_entities=final_entities, + path_key_to_linkable_metrics={_ambiguous_metric.path_key: (_ambiguous_metric,)}, + ) + + intersection = LinkableElementSet.intersection_by_path_key([linkable_set, intermediate_set, final_set]) + + assert { + _categorical_dimension.path_key: (_categorical_dimension,) + } == intersection.path_key_to_linkable_dimensions, ( + "Intersection output did not match expected minimal output for dimension elements!" + ) + + # Entity comparisons are more complicated here + linkable_entities = intersection.path_key_to_linkable_entities + assert ( + _base_entity.path_key in linkable_entities + ), f"Did not find expected base entity in intersected output: {linkable_entities}!" + assert ( + _ambiguous_entity.path_key in linkable_entities + ), f"Did not find expected ambiguous entity in intersected output: {linkable_entities}!" + assert ( + len(linkable_entities) == 2 + ), f"Did not get the expected number of entity entries from intersection: {linkable_entities}!" + assert ( + len(linkable_entities[_ambiguous_entity.path_key]) == 2 + ), f"Did not get the expected number of ambiguous entity entries from intersection: {linkable_entities}!" + assert linkable_entities[_base_entity.path_key] == ( + _base_entity, + ), "Base entity intersection output did not match expected value!" + + # Metric comparisons demonstrate the union behavior within element path key + linkable_metrics = intersection.path_key_to_linkable_metrics + assert ( + len(linkable_metrics) == 1 + ), f"Did not find the expected number of metric entries from intersection: {linkable_metrics}!" + assert ( + _ambiguous_metric.path_key in linkable_metrics + ), f"Did not find expected ambiguous metric in intersected output!{linkable_metrics}" + assert ( + len(linkable_metrics[_ambiguous_metric.path_key]) == 2 + ), f"Did not get the expected number of ambiguous metric entries from intersection: {linkable_metrics}!" + expected_metrics = [_ambiguous_metric_with_join_path, _ambiguous_metric] + assert all(metric in linkable_metrics[_ambiguous_metric.path_key] for metric in expected_metrics), ( + f"Did not find all expected metrics in ambiguous metric output. Expected: {expected_metrics}. " + f"Actual: {linkable_metrics}." + ) + + +def test_merge_by_path_key() -> None: + """Tests basic merge operations between LinkableElementSet instances. + + The merge behavior for the dimension type, in particular, illustrates how the base class handles the case where the + path key exists with the same value in both sets. The merge operation produces a tuple of all inputs without + deduplicating elements in the value for that path key. + """ + first_set = LinkableElementSet( + path_key_to_linkable_dimensions={ + _categorical_dimension.path_key: (_categorical_dimension,), + }, + path_key_to_linkable_entities={ + _base_entity.path_key: (_base_entity,), + }, + path_key_to_linkable_metrics={ + _base_metric.path_key: (_base_metric,), + _ambiguous_metric.path_key: (_ambiguous_metric_with_join_path,), + }, + ) + second_set = LinkableElementSet( + path_key_to_linkable_dimensions={ + _categorical_dimension.path_key: (_categorical_dimension,), + _ambiguous_categorical_dimension.path_key: (_ambiguous_categorical_dimension,), + }, + path_key_to_linkable_entities={ + _ambiguous_entity.path_key: (_ambiguous_entity, _ambiguous_entity_with_join_path), + }, + path_key_to_linkable_metrics={_ambiguous_metric.path_key: (_ambiguous_metric,)}, + ) + + merged_set = LinkableElementSet.merge_by_path_key([first_set, second_set]) + + # Dimension values demonstrate the duplication in the union of values for each path key + merged_dimensions = merged_set.path_key_to_linkable_dimensions + assert ( + len(merged_dimensions) == 2 + ), f"Did not get the expected number of path keys for merged dimensions! Dimensions: {merged_dimensions}" + assert ( + _categorical_dimension.path_key in merged_dimensions + and _ambiguous_categorical_dimension.path_key in merged_dimensions + ), f"Did not get expected keys in merged dimensions! Dimension keys: {list(merged_dimensions.keys())}" + assert len(merged_dimensions[_categorical_dimension.path_key]) == 2, ( + f"Did not get the expected number of values for merged categorical dimensions. Duplicate values are expected! " + f"Categorical dimensions: {merged_dimensions[_categorical_dimension.path_key]}" + ) + assert all(dim == _categorical_dimension for dim in merged_dimensions[_categorical_dimension.path_key]), ( + f"Found unexpected value in categorical dimension set, which should consist only of duplicate values. " + f"Categorical dimensions found: {merged_dimensions[_categorical_dimension.path_key]}" + ) + assert merged_dimensions[_ambiguous_categorical_dimension.path_key] == ( + _ambiguous_categorical_dimension, + ), "Did not get expected value for merged ambiguous categorical dimension!" + + merged_entities = merged_set.path_key_to_linkable_entities + assert ( + len(merged_entities) == 2 + ), f"Did not get the expected number of path keys for merged entities! Entities: {merged_entities}" + assert ( + _base_entity.path_key in merged_entities and _ambiguous_entity.path_key in merged_entities + ), f"Did not get expected keys in merged entities! Entity keys: {list(merged_entities.keys())}" + assert merged_entities[_base_entity.path_key] == ( + _base_entity, + ), "Did not get expected value for merged base entity!" + assert merged_entities[_ambiguous_entity.path_key] == ( + _ambiguous_entity, + _ambiguous_entity_with_join_path, + ), "Did not get expected value for merged ambiguous entity!" + + merged_metrics = merged_set.path_key_to_linkable_metrics + assert ( + len(merged_metrics) == 2 + ), f"Did not get the expected number of path keys for merged metrics! Metrics: {merged_metrics}" + assert ( + _base_metric.path_key in merged_metrics and _ambiguous_metric.path_key in merged_metrics + ), f"Did not get expected keys in merged metrics! Metric keys: {list(merged_metrics.keys())}" + assert merged_metrics[_base_metric.path_key] == ( + _base_metric, + ), "Did not get expected value for merged base metric!" + ambiguous_metrics = merged_metrics[_ambiguous_metric.path_key] + assert ( + len(ambiguous_metrics) == 2 + and _ambiguous_metric in ambiguous_metrics + and _ambiguous_metric_with_join_path in ambiguous_metrics + ), f"Did not get expected value for merged ambiguous metrics! Ambiguous metrics: {ambiguous_metrics}" + + +def test_only_unique_path_keys() -> None: + """Tests behavior of only_unique_path_keys property accessor for LinkableElementSet. + + The dimension and entity sets illustrate how the function behaves with duplicate values in place. + """ + base_set = LinkableElementSet( + path_key_to_linkable_dimensions={ + _categorical_dimension.path_key: (_categorical_dimension, _categorical_dimension), + _time_dimension.path_key: (_time_dimension,), + }, + path_key_to_linkable_entities={ + _base_entity.path_key: (_base_entity,), + _ambiguous_entity.path_key: (_ambiguous_entity, _ambiguous_entity, _ambiguous_entity_with_join_path), + }, + path_key_to_linkable_metrics={ + _ambiguous_metric.path_key: (_ambiguous_metric, _ambiguous_metric_with_join_path) + }, + ) + + unique_path_keys = base_set.only_unique_path_keys + + assert unique_path_keys.path_key_to_linkable_dimensions == { + _time_dimension.path_key: (_time_dimension,) + }, "Got an unexpected value for unique dimensions sets!" + assert unique_path_keys.path_key_to_linkable_entities == { + _base_entity.path_key: (_base_entity,) + }, "Got unexpected value for unique entity sets!" + assert unique_path_keys.path_key_to_linkable_metrics == dict(), "Found unexpected unique values for metric sets!"