From dafccd5afe786de1518a0f79b9a453ffcca20c34 Mon Sep 17 00:00:00 2001 From: tlento Date: Thu, 29 Aug 2024 10:25:41 -0700 Subject: [PATCH] Allow custom granularity names in string name to struct conversion MetricFlow relies on a class called StructuredLinkableSpecName to manage conversion between structured names and their string representations, e.g., `listing__is_lux` or `listing__created_at__month`. Up until now the time granularity component of this naming struct required an enumeration value, which would then be converted to its string representation at serialization time. With custom granularities we need to explicitly allow for a string name for the input time granularity. This implements that change, along with some added enforcement to ensure consistency in how we conver to/from the string representations of these names. --- .../naming/linkable_spec_name.py | 40 ++++++++++++++++--- .../specs/time_dimension_spec.py | 2 +- .../metricflow_semantics/time/granularity.py | 9 +++++ metricflow/engine/metricflow_engine.py | 5 +-- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/metricflow-semantics/metricflow_semantics/naming/linkable_spec_name.py b/metricflow-semantics/metricflow_semantics/naming/linkable_spec_name.py index e277816674..5d6e5a9ced 100644 --- a/metricflow-semantics/metricflow_semantics/naming/linkable_spec_name.py +++ b/metricflow-semantics/metricflow_semantics/naming/linkable_spec_name.py @@ -7,6 +7,8 @@ from dbt_semantic_interfaces.type_enums.date_part import DatePart from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity +from metricflow_semantics.time.granularity import ExpandedTimeGranularity + DUNDER = "__" logger = logging.getLogger(__name__) @@ -24,9 +26,36 @@ class StructuredLinkableSpecName: entity_link_names: Tuple[str, ...] element_name: str - time_granularity: Optional[TimeGranularity] = None + time_granularity_name: Optional[str] = None date_part: Optional[DatePart] = None + def __post_init__(self) -> None: + """Post init method to ensure all name values are lower-case. + + This implies that all values in the TimeGranularity enumeration must be lower-case. + """ + assert self.element_name.lower() == self.element_name, ( + f"Element name {self.element_name} is not all lower-case. This should have been addressed at model " + "parse time!" + ) + assert all([link_name.lower() == link_name for link_name in self.entity_link_names]), ( + f"Found entity link name that includes upper-case letters. Entity link names: {self.entity_link_names}. " + "This should have been addressed at model parse time!" + ) + + if self.time_granularity_name: + is_standard_name = ExpandedTimeGranularity.is_standard_granularity_name(self.time_granularity_name) + if is_standard_name: + follow_up = ( + "This indicates a bug where someone added a non-lowercase value to the TimeGranularity " + "enum in dbt-semantic-interfaces!" + ) + else: + follow_up = "This should have been addressed at model parse time!" + assert ( + self.time_granularity_name.lower() == self.time_granularity_name + ), f"The time_granularity_name {self.time_granularity_name} includes upper-case letters. {follow_up}" + @staticmethod def from_name(qualified_name: str) -> StructuredLinkableSpecName: """Construct from a name e.g. listing__ds__month.""" @@ -43,6 +72,7 @@ def from_name(qualified_name: str) -> StructuredLinkableSpecName: ) associated_granularity = None + # TODO: [custom granularity] Update parsing to account for custom granularities for granularity in TimeGranularity: if name_parts[-1] == granularity.value: associated_granularity = granularity @@ -52,13 +82,13 @@ def from_name(qualified_name: str) -> StructuredLinkableSpecName: # e.g. "ds__month" if len(name_parts) == 2: return StructuredLinkableSpecName( - entity_link_names=(), element_name=name_parts[0], time_granularity=associated_granularity + entity_link_names=(), element_name=name_parts[0], time_granularity_name=associated_granularity.value ) # e.g. "messages__ds__month" return StructuredLinkableSpecName( entity_link_names=tuple(name_parts[:-2]), element_name=name_parts[-2], - time_granularity=associated_granularity, + time_granularity_name=associated_granularity.value, ) # e.g. "messages__ds" @@ -74,8 +104,8 @@ def qualified_name(self) -> str: items = list(self.entity_link_names) + [self.element_name] if self.date_part: items.append(self.date_part_suffix(date_part=self.date_part)) - elif self.time_granularity: - items.append(self.time_granularity.value) + elif self.time_granularity_name: + items.append(self.time_granularity_name) return DUNDER.join(items) @property diff --git a/metricflow-semantics/metricflow_semantics/specs/time_dimension_spec.py b/metricflow-semantics/metricflow_semantics/specs/time_dimension_spec.py index 8c762e35a2..38aa979733 100644 --- a/metricflow-semantics/metricflow_semantics/specs/time_dimension_spec.py +++ b/metricflow-semantics/metricflow_semantics/specs/time_dimension_spec.py @@ -122,7 +122,7 @@ def qualified_name(self) -> str: # noqa: D102 return StructuredLinkableSpecName( entity_link_names=tuple(x.element_name for x in self.entity_links), element_name=self.element_name, - time_granularity=self.time_granularity, + time_granularity_name=self.time_granularity.value, date_part=self.date_part, ).qualified_name diff --git a/metricflow-semantics/metricflow_semantics/time/granularity.py b/metricflow-semantics/metricflow_semantics/time/granularity.py index 872b7c7ee3..98d14ca00f 100644 --- a/metricflow-semantics/metricflow_semantics/time/granularity.py +++ b/metricflow-semantics/metricflow_semantics/time/granularity.py @@ -30,3 +30,12 @@ def is_custom_granularity(self) -> bool: # noqa: D102 def from_time_granularity(cls, granularity: TimeGranularity) -> ExpandedTimeGranularity: """Factory method for creating an ExpandedTimeGranularity from a standard TimeGranularity enumeration value.""" return ExpandedTimeGranularity(name=granularity.value, base_granularity=granularity) + + @staticmethod + def is_standard_granularity_name(time_granularity_name: str) -> bool: + """Helper for checking if a given time granularity name is part of the standard TimeGranularity enumeration.""" + for granularity in TimeGranularity: + if time_granularity_name == granularity.value: + return True + + return False diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index c30e0e928a..2b25a79b8b 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -595,9 +595,8 @@ def simple_dimensions_for_metrics( # noqa: D102 entity_reference.element_name for entity_reference in linkable_dimension.entity_links ), - # TODO: Update StructuredLinkableSpecName to use ExpandedTimeGranularity - time_granularity=( - linkable_dimension.time_granularity.base_granularity + time_granularity_name=( + linkable_dimension.time_granularity.name if linkable_dimension.time_granularity is not None else None ),