Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DatePart Support to ValidLinkableSpecResolver #911

Merged
merged 2 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 68 additions & 10 deletions metricflow/model/semantics/linkable_spec_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums import MetricType
from dbt_semantic_interfaces.type_enums.date_part import DatePart
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

from metricflow.dataset.dataset import DataSet
Expand Down Expand Up @@ -46,6 +47,7 @@ class ElementPathKey:
element_name: str
entity_links: Tuple[EntityReference, ...]
time_granularity: Optional[TimeGranularity]
date_part: Optional[DatePart]


@dataclass(frozen=True)
Expand All @@ -58,14 +60,16 @@ class LinkableDimension:
entity_links: Tuple[EntityReference, ...]
join_path: Tuple[SemanticModelJoinPathElement, ...]
properties: FrozenSet[LinkableElementProperties]
time_granularity: Optional[TimeGranularity] = None
time_granularity: Optional[TimeGranularity]
date_part: Optional[DatePart]

@property
def path_key(self) -> ElementPathKey: # noqa: D
return ElementPathKey(
element_name=self.element_name,
entity_links=self.entity_links,
time_granularity=self.time_granularity,
date_part=self.date_part,
)

@property
Expand All @@ -86,7 +90,12 @@ class LinkableEntity:

@property
def path_key(self) -> ElementPathKey: # noqa: D
return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links, time_granularity=None)
return ElementPathKey(
element_name=self.element_name,
entity_links=self.entity_links,
time_granularity=None,
date_part=None,
)


@dataclass(frozen=True)
Expand Down Expand Up @@ -259,6 +268,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D
element_name=path_key.element_name,
entity_links=path_key.entity_links,
time_granularity=path_key.time_granularity,
date_part=path_key.date_part,
)
for path_key in self.path_key_to_linkable_dimensions.keys()
if path_key.time_granularity
Expand Down Expand Up @@ -324,10 +334,26 @@ def _generate_linkable_time_dimensions(
entity_links=entity_links,
join_path=tuple(join_path),
time_granularity=time_granularity,
date_part=None,
properties=frozenset(properties),
)
)

# Add the time dimension aggregated to a different date part.
for date_part in DatePart:
if time_granularity.to_int() <= date_part.to_int():
linkable_dimensions.append(
LinkableDimension(
semantic_model_origin=semantic_model_origin,
element_name=dimension.reference.element_name,
entity_links=entity_links,
join_path=tuple(join_path),
time_granularity=time_granularity,
date_part=date_part,
properties=frozenset(properties),
)
)

return linkable_dimensions


Expand Down Expand Up @@ -367,6 +393,8 @@ def create_linkable_element_set(
entity_links=entity_links,
join_path=self.path_elements,
properties=with_properties,
time_granularity=None,
date_part=None,
)
)
elif dimension_type == DimensionType.TIME:
Expand Down Expand Up @@ -476,7 +504,19 @@ def __init__(
assert_values_exhausted(metric.type)

self._metric_to_linkable_element_sets[metric.name] = linkable_sets_for_measure
logger.info(f"Building the [metric -> valid linkable element] index took: {time.time() - start_time:.2f}s")

# If no metrics are specified, the query interface supports distinct dimension values from a single semantic
# model.
Comment on lines +508 to +509
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this true? Because build_plan_for_distinct_values doesn't behave as if this is true. For that matter, neither does this code. Maybe we just remove this comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well dang, looks like I misread the method. I'll have to update this and a few other parts in the stakc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, it does:

def test_distinct_values( # noqa: D

linkable_element_sets_to_merge: List[LinkableElementSet] = []

for semantic_model in semantic_manifest.semantic_models:
linkable_element_sets_to_merge.append(
ValidLinkableSpecResolver._get_elements_in_semantic_model(semantic_model)
)

self._no_metric_linkable_element_set = LinkableElementSet.merge_by_path_key(linkable_element_sets_to_merge)

logger.info(f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s")

def _get_semantic_model_for_measure(self, measure_reference: MeasureReference) -> SemanticModel: # noqa: D
semantic_models_where_measure_was_found = []
Expand Down Expand Up @@ -534,6 +574,8 @@ def _get_elements_in_semantic_model(semantic_model: SemanticModel) -> LinkableEl
entity_links=(entity_link,),
join_path=(),
properties=dimension_properties,
time_granularity=None,
date_part=None,
)
)
elif dimension_type is DimensionType.TIME:
Expand Down Expand Up @@ -627,13 +669,24 @@ def _get_metric_time_elements(self, measure_reference: MeasureReference) -> Link
)

# For each of the possible time granularities, create a LinkableDimension for each one.
return LinkableElementSet(
path_key_to_linkable_dimensions={
ElementPathKey(

path_key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list)
for time_granularity in possible_metric_time_granularities:
possible_date_parts: Sequence[Optional[DatePart]] = (
# No date part, just the metric time at a different grain.
(None,)
# date part of a metric time at a different grain.
+ tuple(date_part for date_part in DatePart if time_granularity.to_int() <= date_part.to_int())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this granularity/date_part comparison all over the place and it's confusing every time. Maybe we should add a method to DatePart that does this and document the mechanics of the comparison there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that we should. The reason why this was in place was that there was a bug in DSI (dbt-labs/dbt-semantic-interfaces#212), so I did this while that was getting pulled in to MF. I'll handle this as a follow up.

)
Comment on lines +675 to +680
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I find this hard to read because it looks like a tuple of tuples and that leading + gets elided a bit by the commentary. Maybe it's a "Tom, you need reading glasses" problem, but there it is. There are lots of ways to restructure this so it's easier to see. Here's one:

# No date part (i.e., just metric time) plus all date parts for metric time at a different grain
possible_date_parts = (None,) + tuple([date_part for date_part....])

In theory the typechecker should infer that correctly but if it doesn't it's more contained and easier to reorient. Note you can also keep the multi-line commentary by using locals and just doing the assignment at the end, whatever works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will update before merging.


for date_part in possible_date_parts:
path_key = ElementPathKey(
element_name=DataSet.metric_time_dimension_name(),
entity_links=(),
time_granularity=time_granularity,
): (
date_part=date_part,
)
path_key_to_linkable_dimensions[path_key].append(
LinkableDimension(
semantic_model_origin=measure_semantic_model.reference,
element_name=DataSet.metric_time_dimension_name(),
Expand All @@ -642,17 +695,22 @@ def _get_metric_time_elements(self, measure_reference: MeasureReference) -> Link
# Anything that's not at the base time granularity of the measure's aggregation time dimension
# should be considered derived.
properties=frozenset({LinkableElementProperties.METRIC_TIME})
if time_granularity is agg_time_dimension_granularity
if time_granularity is agg_time_dimension_granularity and date_part is None
else frozenset(
{
LinkableElementProperties.METRIC_TIME,
LinkableElementProperties.DERIVED_TIME_GRANULARITY,
}
),
time_granularity=time_granularity,
),
date_part=date_part,
)
)
for time_granularity in possible_metric_time_granularities

return LinkableElementSet(
path_key_to_linkable_dimensions={
path_key: tuple(linkable_dimensions)
for path_key, linkable_dimensions in path_key_to_linkable_dimensions.items()
},
path_key_to_linkable_entities={},
)
Expand Down
4 changes: 3 additions & 1 deletion metricflow/test/snapshot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def assert_linkable_element_set_snapshot_equal( # noqa: D
set_id: str,
linkable_element_set: LinkableElementSet,
) -> None:
headers = ("Semantic Model", "Entity Links", "Name", "Time Granularity", "Properties")
headers = ("Semantic Model", "Entity Links", "Name", "Time Granularity", "Date Part", "Properties")
rows = []
for linkable_dimension_iterable in linkable_element_set.path_key_to_linkable_dimensions.values():
for linkable_dimension in linkable_dimension_iterable:
Expand All @@ -306,6 +306,7 @@ def assert_linkable_element_set_snapshot_equal( # noqa: D
tuple(entity_link.element_name for entity_link in linkable_dimension.entity_links),
linkable_dimension.element_name,
linkable_dimension.time_granularity.name if linkable_dimension.time_granularity is not None else "",
linkable_dimension.date_part.name if linkable_dimension.date_part is not None else "",
sorted(
linkable_element_property.name for linkable_element_property in linkable_dimension.properties
),
Expand All @@ -321,6 +322,7 @@ def assert_linkable_element_set_snapshot_equal( # noqa: D
tuple(entity_link.element_name for entity_link in linkable_entity.entity_links),
linkable_entity.element_name,
"",
"",
sorted(linkable_element_property.name for linkable_element_property in linkable_entity.properties),
)
)
Expand Down
Loading