-
Notifications
You must be signed in to change notification settings - Fork 97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Naming Schemes to Represent Different Input Formats #893
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
from __future__ import annotations | ||
|
||
import re | ||
from typing import Optional, Sequence, Tuple | ||
|
||
from dbt_semantic_interfaces.naming.keywords import DUNDER | ||
from dbt_semantic_interfaces.references import EntityReference | ||
from dbt_semantic_interfaces.type_enums import TimeGranularity | ||
from dbt_semantic_interfaces.type_enums.date_part import DatePart | ||
from typing_extensions import override | ||
|
||
from metricflow.naming.naming_scheme import QueryItemNamingScheme | ||
from metricflow.specs.patterns.entity_link_pattern import ( | ||
EntityLinkPattern, | ||
EntityLinkPatternParameterSet, | ||
ParameterSetField, | ||
) | ||
from metricflow.specs.specs import ( | ||
InstanceSpec, | ||
InstanceSpecSet, | ||
InstanceSpecSetTransform, | ||
) | ||
|
||
|
||
class DunderNamingScheme(QueryItemNamingScheme): | ||
"""A naming scheme using the dundered name syntax. | ||
|
||
TODO: Consolidate with StructuredLinkableSpecName / DunderedNameFormatter. | ||
""" | ||
|
||
_INPUT_REGEX = re.compile(r"\A[a-z]([a-z0-9_])*[a-z0-9]\Z") | ||
|
||
@staticmethod | ||
def date_part_suffix(date_part: DatePart) -> str: | ||
"""Suffix used for names with a date_part.""" | ||
return f"extract_{date_part.value}" | ||
|
||
@override | ||
def input_str(self, instance_spec: InstanceSpec) -> Optional[str]: | ||
spec_set = InstanceSpecSet.from_specs((instance_spec,)) | ||
|
||
for time_dimension_spec in spec_set.time_dimension_specs: | ||
# From existing comment in StructuredLinkableSpecName: | ||
# | ||
# Dunder syntax not supported for querying date_part | ||
# | ||
if time_dimension_spec.date_part is not None: | ||
return None | ||
names = _DunderNameTransform().transform(spec_set) | ||
if len(names) != 1: | ||
raise RuntimeError(f"Did not get 1 name for {instance_spec}. Got {names}") | ||
|
||
return names[0] | ||
|
||
@override | ||
def spec_pattern(self, input_str: str) -> EntityLinkPattern: | ||
if not self.input_str_follows_scheme(input_str): | ||
raise ValueError(f"{repr(input_str)} does not follow this scheme.") | ||
|
||
input_str = input_str.lower() | ||
|
||
input_str_parts = input_str.split(DUNDER) | ||
fields_to_compare: Tuple[ParameterSetField, ...] = ( | ||
ParameterSetField.ELEMENT_NAME, | ||
ParameterSetField.ENTITY_LINKS, | ||
ParameterSetField.DATE_PART, | ||
) | ||
|
||
time_grain = None | ||
|
||
# No dunder, e.g. "ds" | ||
if len(input_str_parts) == 1: | ||
return EntityLinkPattern( | ||
parameter_set=EntityLinkPatternParameterSet.from_parameters( | ||
element_name=input_str_parts[0], | ||
entity_links=(), | ||
time_granularity=time_grain, | ||
date_part=None, | ||
fields_to_compare=tuple(fields_to_compare), | ||
) | ||
) | ||
|
||
# At this point, len(input_str_parts) >= 2 | ||
for granularity in TimeGranularity: | ||
if input_str_parts[-1] == granularity.value: | ||
time_grain = granularity | ||
|
||
# Has a time grain specified. | ||
if time_grain is not None: | ||
fields_to_compare = fields_to_compare + (ParameterSetField.TIME_GRANULARITY,) | ||
# e.g. "ds__month" | ||
if len(input_str_parts) == 2: | ||
return EntityLinkPattern( | ||
parameter_set=EntityLinkPatternParameterSet.from_parameters( | ||
element_name=input_str_parts[0], | ||
entity_links=(), | ||
time_granularity=time_grain, | ||
date_part=None, | ||
fields_to_compare=fields_to_compare, | ||
) | ||
) | ||
# e.g. "messages__ds__month" | ||
return EntityLinkPattern( | ||
parameter_set=EntityLinkPatternParameterSet.from_parameters( | ||
element_name=input_str_parts[-2], | ||
entity_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-2]), | ||
time_granularity=time_grain, | ||
date_part=None, | ||
fields_to_compare=fields_to_compare, | ||
) | ||
) | ||
|
||
# e.g. "messages__ds" | ||
return EntityLinkPattern( | ||
parameter_set=EntityLinkPatternParameterSet.from_parameters( | ||
element_name=input_str_parts[-1], | ||
entity_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-1]), | ||
time_granularity=None, | ||
date_part=None, | ||
fields_to_compare=fields_to_compare, | ||
) | ||
) | ||
|
||
@override | ||
def input_str_follows_scheme(self, input_str: str) -> bool: | ||
# This naming scheme is case-insensitive. | ||
input_str = input_str.lower() | ||
if DunderNamingScheme._INPUT_REGEX.match(input_str) is None: | ||
return False | ||
|
||
input_str_parts = input_str.split(DUNDER) | ||
|
||
for date_part in DatePart: | ||
if input_str_parts[-1] == DunderNamingScheme.date_part_suffix(date_part=date_part): | ||
# From existing message in StructuredLinkableSpecName: "Dunder syntax not supported for querying | ||
# date_part". | ||
return False | ||
|
||
return True | ||
|
||
@override | ||
def __repr__(self) -> str: | ||
return f"{self.__class__.__name__}(id()={hex(id(self))})" | ||
|
||
|
||
class _DunderNameTransform(InstanceSpecSetTransform[Sequence[str]]): | ||
"""Transforms group-by-item spec into the dundered name.""" | ||
|
||
@override | ||
def transform(self, spec_set: InstanceSpecSet) -> Sequence[str]: | ||
names_to_return = [] | ||
|
||
for time_dimension_spec in spec_set.time_dimension_specs: | ||
items = list(entity_link.element_name for entity_link in time_dimension_spec.entity_links) + [ | ||
time_dimension_spec.element_name | ||
] | ||
if time_dimension_spec.date_part is not None: | ||
items.append(DunderNamingScheme.date_part_suffix(date_part=time_dimension_spec.date_part)) | ||
else: | ||
items.append(time_dimension_spec.time_granularity.value) | ||
names_to_return.append(DUNDER.join(items)) | ||
|
||
for other_group_by_item_specs in spec_set.entity_specs + spec_set.dimension_specs: | ||
items = list(entity_link.element_name for entity_link in other_group_by_item_specs.entity_links) + [ | ||
other_group_by_item_specs.element_name | ||
] | ||
names_to_return.append(DUNDER.join(items)) | ||
|
||
return sorted(names_to_return) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Optional | ||
|
||
from dbt_semantic_interfaces.references import MetricReference | ||
from typing_extensions import override | ||
|
||
from metricflow.naming.naming_scheme import QueryItemNamingScheme | ||
from metricflow.specs.patterns.metric_pattern import MetricSpecPattern | ||
from metricflow.specs.specs import ( | ||
InstanceSpec, | ||
InstanceSpecSet, | ||
) | ||
|
||
|
||
class MetricNamingScheme(QueryItemNamingScheme): | ||
"""A naming scheme for metrics.""" | ||
|
||
@override | ||
def input_str(self, instance_spec: InstanceSpec) -> Optional[str]: | ||
spec_set = InstanceSpecSet.from_specs((instance_spec,)) | ||
names = tuple(spec.element_name for spec in spec_set.metric_specs) | ||
|
||
if len(names) != 1: | ||
raise RuntimeError(f"Did not get 1 name for {instance_spec}. Got {names}") | ||
|
||
return names[0] | ||
|
||
@override | ||
def spec_pattern(self, input_str: str) -> MetricSpecPattern: | ||
input_str = input_str.lower() | ||
if not self.input_str_follows_scheme(input_str): | ||
raise RuntimeError(f"{repr(input_str)} does not follow this scheme.") | ||
return MetricSpecPattern(metric_reference=MetricReference(element_name=input_str)) | ||
|
||
@override | ||
def input_str_follows_scheme(self, input_str: str) -> bool: | ||
# TODO: Use regex. | ||
return True | ||
|
||
@override | ||
def __repr__(self) -> str: | ||
return f"{self.__class__.__name__}(id()={hex(id(self))})" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from __future__ import annotations | ||
|
||
from abc import ABC, abstractmethod | ||
from typing import Optional | ||
|
||
from metricflow.specs.patterns.spec_pattern import SpecPattern | ||
from metricflow.specs.specs import InstanceSpec | ||
|
||
|
||
class QueryItemNamingScheme(ABC): | ||
"""Describes how to name items that are involved in a MetricFlow query. | ||
|
||
Most useful for group-by-items as there are different ways to name them like "user__country" | ||
or "TimeDimension('metric_time', 'DAY')". | ||
""" | ||
|
||
@abstractmethod | ||
def input_str(self, instance_spec: InstanceSpec) -> Optional[str]: | ||
"""Following this scheme, return the string that can be used as an input that would specify the given spec. | ||
|
||
This is used to generate suggestions from available group-by-items if the user specifies a group-by-item that is | ||
invalid. | ||
|
||
If this scheme cannot accommodate the spec, return None. This is needed to handle unsupported cases in | ||
DunderNamingScheme, such as DatePart, but naming schemes should otherwise be complete. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def spec_pattern(self, input_str: str) -> SpecPattern: | ||
"""Given an input that follows this scheme, return a spec pattern that matches the described input. | ||
|
||
If the input_str does not follow this scheme, raise a ValueError. In practice, input_str_follows_scheme() should | ||
be called on the input_str beforehand. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def input_str_follows_scheme(self, input_str: str) -> bool: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be worth adding an enforcing version of this that's implemented to raise a consistent error, since mismatches are likely to all share the same root cause and error type/response messaging, something like:
Then the implementations can just call the assert method when they need it instead of handling the exception info itself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In later commits of this set, we avoid raising exceptions in favor of creating query issues so that all errors can be collected and displayed to the user. |
||
"""Returns true if the given input string follows this naming scheme. | ||
|
||
Consider adding a structured result that indicates why it does not match the scheme. | ||
""" | ||
pass |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This feels like an incredibly roundabout way of getting this value. I guess it's ok for now while we think about how to improve the spec class interfaces.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I agree. However, I haven't been able to come up with a better one. Have ideas?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had an idea a while back but it pushed too much stuff into the common interface. We'll come up with something.