Skip to content

Commit

Permalink
Make LinkableElementSet.only_unique_path_keys consider distinct values
Browse files Browse the repository at this point in the history
The only_unique_path_keys property in LinkableElementSet previously
did a simple length check on the value tuple for each element type,
meaning that a path key pointing to a tuple containing a lot of
duplicate elements would not be included.

This is almost certainly counter to expectation - indeed the only callsite
for this property clearly expects consolidation to a unique value.
The issue with this is some, but not all, of our LinkableElementSet
operations do distinct value filtering. For example, the merge_by_path_key
does not do such filtering, while the intersection_by_path_key does.

Furthermore, there is nothing stopping a callsite from creating a
set with duplicate elements, so this should probably apply the deduplication
on each call regardless.

This change adjusts the behavior to be more in line with end user
expectations. If we decide that the intersect and merge methods should
both be brought in line with allowing duplicates we can factor the
deduplication logic out later.
  • Loading branch information
tlento committed Apr 23, 2024
1 parent 9159ec0 commit d977f57
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 8 deletions.
43 changes: 36 additions & 7 deletions metricflow/model/semantics/linkable_element_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,35 @@ class LinkableElementSet:
path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = field(default_factory=dict)
path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = field(default_factory=dict)

def __post_init__(self) -> None:
"""Basic validation for ensuring consistency between path key type and value type."""
mismatched_dimensions = tuple(
path_key
for path_key in self.path_key_to_linkable_dimensions.keys()
if not path_key.element_type.is_dimension_type
)
mismatched_entities = tuple(
path_key
for path_key in self.path_key_to_linkable_entities
if path_key.element_type is not LinkableElementType.ENTITY
)
mismatched_metrics = tuple(
path_key
for path_key in self.path_key_to_linkable_metrics
if path_key.element_type is not LinkableElementType.METRIC
)

mismatched_elements = {
"dimensions": mismatched_dimensions,
"entities": mismatched_entities,
"metrics": mismatched_metrics,
}

assert all(len(mismatches) == 0 for mismatches in mismatched_elements.values()), (
f"Found one or more elements where the element type defined in the path key does not match the value "
f"type! Mismatched elements: {mismatched_elements}"
)

@staticmethod
def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet:
"""Combine multiple sets together by the path key.
Expand Down Expand Up @@ -237,21 +266,21 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102

@property
def only_unique_path_keys(self) -> LinkableElementSet:
"""Returns a set that only includes path keys that map to a single element."""
"""Returns a set that only includes path keys that map to a single distinct element."""
return LinkableElementSet(
path_key_to_linkable_dimensions={
path_key: linkable_dimensions
path_key: tuple(set(linkable_dimensions))
for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items()
if len(linkable_dimensions) <= 1
if len(set(linkable_dimensions)) <= 1
},
path_key_to_linkable_entities={
path_key: linkable_entities
path_key: tuple(set(linkable_entities))
for path_key, linkable_entities in self.path_key_to_linkable_entities.items()
if len(linkable_entities) <= 1
if len(set(linkable_entities)) <= 1
},
path_key_to_linkable_metrics={
path_key: linkable_metrics
path_key: tuple(set(linkable_metrics))
for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items()
if len(linkable_metrics) <= 1
if len(set(linkable_metrics)) <= 1
},
)
3 changes: 2 additions & 1 deletion tests/model/semantics/test_linkable_element_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,8 @@ def test_only_unique_path_keys() -> None:
unique_path_keys = base_set.only_unique_path_keys

assert unique_path_keys.path_key_to_linkable_dimensions == {
_time_dimension.path_key: (_time_dimension,)
_categorical_dimension.path_key: (_categorical_dimension,),
_time_dimension.path_key: (_time_dimension,),
}, "Got an unexpected value for unique dimensions sets!"
assert unique_path_keys.path_key_to_linkable_entities == {
_base_entity.path_key: (_base_entity,)
Expand Down

0 comments on commit d977f57

Please sign in to comment.