Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Naming Schemes to Represent Different Input Formats #893

Merged
merged 1 commit into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions metricflow/naming/dunder_scheme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
from __future__ import annotations

import re
from typing import Optional, Sequence, Tuple

from dbt_semantic_interfaces.naming.keywords import DUNDER
from dbt_semantic_interfaces.references import EntityReference
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart
from typing_extensions import override

from metricflow.naming.naming_scheme import QueryItemNamingScheme
from metricflow.specs.patterns.entity_link_pattern import (
EntityLinkPattern,
EntityLinkPatternParameterSet,
ParameterSetField,
)
from metricflow.specs.specs import (
InstanceSpec,
InstanceSpecSet,
InstanceSpecSetTransform,
)


class DunderNamingScheme(QueryItemNamingScheme):
"""A naming scheme using the dundered name syntax.

TODO: Consolidate with StructuredLinkableSpecName / DunderedNameFormatter.
"""

_INPUT_REGEX = re.compile(r"\A[a-z]([a-z0-9_])*[a-z0-9]\Z")

@staticmethod
def date_part_suffix(date_part: DatePart) -> str:
"""Suffix used for names with a date_part."""
return f"extract_{date_part.value}"

@override
def input_str(self, instance_spec: InstanceSpec) -> Optional[str]:
spec_set = InstanceSpecSet.from_specs((instance_spec,))

for time_dimension_spec in spec_set.time_dimension_specs:
# From existing comment in StructuredLinkableSpecName:
#
# Dunder syntax not supported for querying date_part
#
if time_dimension_spec.date_part is not None:
return None
Comment on lines +42 to +48
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels like an incredibly roundabout way of getting this value. I guess it's ok for now while we think about how to improve the spec class interfaces.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I agree. However, I haven't been able to come up with a better one. Have ideas?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had an idea a while back but it pushed too much stuff into the common interface. We'll come up with something.

names = _DunderNameTransform().transform(spec_set)
if len(names) != 1:
raise RuntimeError(f"Did not get 1 name for {instance_spec}. Got {names}")

return names[0]

@override
def spec_pattern(self, input_str: str) -> EntityLinkPattern:
if not self.input_str_follows_scheme(input_str):
raise ValueError(f"{repr(input_str)} does not follow this scheme.")

input_str = input_str.lower()

input_str_parts = input_str.split(DUNDER)
fields_to_compare: Tuple[ParameterSetField, ...] = (
ParameterSetField.ELEMENT_NAME,
ParameterSetField.ENTITY_LINKS,
ParameterSetField.DATE_PART,
)

time_grain = None

# No dunder, e.g. "ds"
if len(input_str_parts) == 1:
return EntityLinkPattern(
parameter_set=EntityLinkPatternParameterSet.from_parameters(
element_name=input_str_parts[0],
entity_links=(),
time_granularity=time_grain,
date_part=None,
fields_to_compare=tuple(fields_to_compare),
)
)

# At this point, len(input_str_parts) >= 2
for granularity in TimeGranularity:
if input_str_parts[-1] == granularity.value:
time_grain = granularity

# Has a time grain specified.
if time_grain is not None:
fields_to_compare = fields_to_compare + (ParameterSetField.TIME_GRANULARITY,)
# e.g. "ds__month"
if len(input_str_parts) == 2:
return EntityLinkPattern(
parameter_set=EntityLinkPatternParameterSet.from_parameters(
element_name=input_str_parts[0],
entity_links=(),
time_granularity=time_grain,
date_part=None,
fields_to_compare=fields_to_compare,
)
)
# e.g. "messages__ds__month"
return EntityLinkPattern(
parameter_set=EntityLinkPatternParameterSet.from_parameters(
element_name=input_str_parts[-2],
entity_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-2]),
time_granularity=time_grain,
date_part=None,
fields_to_compare=fields_to_compare,
)
)

# e.g. "messages__ds"
return EntityLinkPattern(
parameter_set=EntityLinkPatternParameterSet.from_parameters(
element_name=input_str_parts[-1],
entity_links=tuple(EntityReference(entity_name) for entity_name in input_str_parts[:-1]),
time_granularity=None,
date_part=None,
fields_to_compare=fields_to_compare,
)
)

@override
def input_str_follows_scheme(self, input_str: str) -> bool:
# This naming scheme is case-insensitive.
input_str = input_str.lower()
if DunderNamingScheme._INPUT_REGEX.match(input_str) is None:
return False

input_str_parts = input_str.split(DUNDER)

for date_part in DatePart:
if input_str_parts[-1] == DunderNamingScheme.date_part_suffix(date_part=date_part):
# From existing message in StructuredLinkableSpecName: "Dunder syntax not supported for querying
# date_part".
return False

return True

@override
def __repr__(self) -> str:
return f"{self.__class__.__name__}(id()={hex(id(self))})"


class _DunderNameTransform(InstanceSpecSetTransform[Sequence[str]]):
"""Transforms group-by-item spec into the dundered name."""

@override
def transform(self, spec_set: InstanceSpecSet) -> Sequence[str]:
names_to_return = []

for time_dimension_spec in spec_set.time_dimension_specs:
items = list(entity_link.element_name for entity_link in time_dimension_spec.entity_links) + [
time_dimension_spec.element_name
]
if time_dimension_spec.date_part is not None:
items.append(DunderNamingScheme.date_part_suffix(date_part=time_dimension_spec.date_part))
else:
items.append(time_dimension_spec.time_granularity.value)
names_to_return.append(DUNDER.join(items))

for other_group_by_item_specs in spec_set.entity_specs + spec_set.dimension_specs:
items = list(entity_link.element_name for entity_link in other_group_by_item_specs.entity_links) + [
other_group_by_item_specs.element_name
]
names_to_return.append(DUNDER.join(items))

return sorted(names_to_return)
43 changes: 43 additions & 0 deletions metricflow/naming/metric_scheme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from __future__ import annotations

from typing import Optional

from dbt_semantic_interfaces.references import MetricReference
from typing_extensions import override

from metricflow.naming.naming_scheme import QueryItemNamingScheme
from metricflow.specs.patterns.metric_pattern import MetricSpecPattern
from metricflow.specs.specs import (
InstanceSpec,
InstanceSpecSet,
)


class MetricNamingScheme(QueryItemNamingScheme):
"""A naming scheme for metrics."""

@override
def input_str(self, instance_spec: InstanceSpec) -> Optional[str]:
spec_set = InstanceSpecSet.from_specs((instance_spec,))
names = tuple(spec.element_name for spec in spec_set.metric_specs)

if len(names) != 1:
raise RuntimeError(f"Did not get 1 name for {instance_spec}. Got {names}")

return names[0]

@override
def spec_pattern(self, input_str: str) -> MetricSpecPattern:
input_str = input_str.lower()
if not self.input_str_follows_scheme(input_str):
raise RuntimeError(f"{repr(input_str)} does not follow this scheme.")
return MetricSpecPattern(metric_reference=MetricReference(element_name=input_str))

@override
def input_str_follows_scheme(self, input_str: str) -> bool:
# TODO: Use regex.
return True

@override
def __repr__(self) -> str:
return f"{self.__class__.__name__}(id()={hex(id(self))})"
44 changes: 44 additions & 0 deletions metricflow/naming/naming_scheme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Optional

from metricflow.specs.patterns.spec_pattern import SpecPattern
from metricflow.specs.specs import InstanceSpec


class QueryItemNamingScheme(ABC):
"""Describes how to name items that are involved in a MetricFlow query.

Most useful for group-by-items as there are different ways to name them like "user__country"
or "TimeDimension('metric_time', 'DAY')".
"""

@abstractmethod
def input_str(self, instance_spec: InstanceSpec) -> Optional[str]:
"""Following this scheme, return the string that can be used as an input that would specify the given spec.

This is used to generate suggestions from available group-by-items if the user specifies a group-by-item that is
invalid.

If this scheme cannot accommodate the spec, return None. This is needed to handle unsupported cases in
DunderNamingScheme, such as DatePart, but naming schemes should otherwise be complete.
"""
pass

@abstractmethod
def spec_pattern(self, input_str: str) -> SpecPattern:
"""Given an input that follows this scheme, return a spec pattern that matches the described input.

If the input_str does not follow this scheme, raise a ValueError. In practice, input_str_follows_scheme() should
be called on the input_str beforehand.
"""
pass

@abstractmethod
def input_str_follows_scheme(self, input_str: str) -> bool:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth adding an enforcing version of this that's implemented to raise a consistent error, since mismatches are likely to all share the same root cause and error type/response messaging, something like:

def assert_input_str_follows_scheme(self, input_str: str) -> None:
    if not self.input_str_follows_scheme(input_str):
        raise ....

Then the implementations can just call the assert method when they need it instead of handling the exception info itself.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In later commits of this set, we avoid raising exceptions in favor of creating query issues so that all errors can be collected and displayed to the user.

"""Returns true if the given input string follows this naming scheme.

Consider adding a structured result that indicates why it does not match the scheme.
"""
pass
Loading