Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test time spines for sub-daily granularity #1358

Merged
merged 6 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240727-081106.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Enable sub-daily queries without metrics.
time: 2024-07-27T08:11:06.357653-07:00
custom:
Author: courtneyholcomb
Issue: "1359"
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet
from metricflow_semantics.model.semantics.semantic_model_join_evaluator import SemanticModelJoinEvaluator
from metricflow_semantics.specs.time_dimension_spec import DEFAULT_TIME_GRANULARITY
from metricflow_semantics.time.time_spine_source import TimeSpineSource

if TYPE_CHECKING:
from metricflow_semantics.model.semantics.semantic_model_lookup import SemanticModelLookup
Expand Down Expand Up @@ -124,6 +125,7 @@ def __init__(
# Sort semantic models by name for consistency in building derived objects.
self._semantic_models = sorted(self._semantic_manifest.semantic_models, key=lambda x: x.name)
self._join_evaluator = SemanticModelJoinEvaluator(semantic_model_lookup)
self._time_spine_sources = TimeSpineSource.create_from_manifest(self._semantic_manifest)

assert max_entity_links >= 0
self._max_entity_links = max_entity_links
Expand Down Expand Up @@ -454,6 +456,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference
on what aggregation time dimension was used to define the measure.
"""
measure_semantic_model: Optional[SemanticModel] = None
defined_granularity: Optional[TimeGranularity] = None
if measure_reference:
measure_semantic_model = self._get_semantic_model_for_measure(measure_reference)
measure_agg_time_dimension_reference = measure_semantic_model.checked_agg_time_dimension_for_measure(
Expand All @@ -463,15 +466,20 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference
semantic_model=measure_semantic_model,
time_dimension_reference=measure_agg_time_dimension_reference,
)
possible_metric_time_granularities = tuple(
time_granularity
for time_granularity in TimeGranularity
if defined_granularity.is_smaller_than_or_equal(time_granularity)
)
else:
defined_granularity = DEFAULT_TIME_GRANULARITY

# It's possible to aggregate measures to coarser time granularities (except with cumulative metrics).
possible_metric_time_granularities = tuple(
time_granularity
for time_granularity in TimeGranularity
if defined_granularity.is_smaller_than_or_equal(time_granularity)
)
# If querying metric_time without metrics, will query from time spines.
# Defaults to DAY granularity if available in time spines, else smallest available granularity.
min_time_spine_granularity = min(self._time_spine_sources.keys())
possible_metric_time_granularities = tuple(
time_granularity
for time_granularity in TimeGranularity
if min_time_spine_granularity.is_smaller_than_or_equal(time_granularity)
)

# For each of the possible time granularities, create a LinkableDimension.
path_key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_set import group_specs_by_type
from metricflow_semantics.specs.time_dimension_spec import (
DEFAULT_TIME_GRANULARITY,
TimeDimensionSpec,
TimeDimensionSpecComparisonKey,
TimeDimensionSpecField,
Expand Down Expand Up @@ -47,10 +48,13 @@ def __init__(self, max_metric_default_time_granularity: Optional[TimeGranularity
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
spec_set = group_specs_by_type(candidate_specs)

# If there are no metrics or metric_time specs in the query, skip this filter.
if not (self._max_metric_default_time_granularity and spec_set.metric_time_specs):
# If there are no metric_time specs in the query, skip this filter.
if not spec_set.metric_time_specs:
return candidate_specs

# If there are metrics in the query, use max metric default. For no-metric queries, use standard default.
default_granularity = self._max_metric_default_time_granularity or DEFAULT_TIME_GRANULARITY

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, Tuple[TimeDimensionSpec, ...]] = defaultdict(tuple)
for metric_time_spec in spec_set.metric_time_specs:
Expand All @@ -60,10 +64,8 @@ def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpe

matched_metric_time_specs: Tuple[TimeDimensionSpec, ...] = ()
for spec_key, time_grains in spec_key_to_grains.items():
if self._max_metric_default_time_granularity in time_grains:
matched_metric_time_specs += (
spec_key_to_specs[spec_key][0].with_grain(self._max_metric_default_time_granularity),
)
if default_granularity in time_grains:
matched_metric_time_specs += (spec_key_to_specs[spec_key][0].with_grain(default_granularity),)
else:
# If default_granularity is not in the available options, then time granularity was specified in the request
# and a default is not needed here. Pass all options through for this spec key.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,12 @@ def without_first_entity_link(self) -> TimeDimensionSpec: # noqa: D102

@property
def without_entity_links(self) -> TimeDimensionSpec: # noqa: D102
return TimeDimensionSpec.from_name(self.element_name)
return TimeDimensionSpec(
element_name=self.element_name,
time_granularity=self.time_granularity,
date_part=self.date_part,
entity_links=(),
)

@staticmethod
def from_name(name: str) -> TimeDimensionSpec: # noqa: D102
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,40 @@ project_configuration:
- location: $source_schema.mf_time_spine
column_name: ds
grain: day
time_spines:
- node_relation:
alias: mf_time_spine_nanosecond
schema_name: $source_schema
primary_column:
name: ts
time_granularity: nanosecond
- node_relation:
alias: mf_time_spine_microsecond
schema_name: $source_schema
primary_column:
name: ts
time_granularity: microsecond
- node_relation:
alias: mf_time_spine_millisecond
schema_name: $source_schema
primary_column:
name: ts
time_granularity: millisecond
- node_relation:
alias: mf_time_spine_second
schema_name: $source_schema
primary_column:
name: ts
time_granularity: second
- node_relation:
alias: mf_time_spine_minute
schema_name: $source_schema
primary_column:
name: ts
time_granularity: minute
- node_relation:
alias: mf_time_spine_hour
schema_name: $source_schema
primary_column:
name: ts
time_granularity: hour
Original file line number Diff line number Diff line change
Expand Up @@ -783,22 +783,3 @@ metric:
name: listings
filter: "{{ Metric('views', ['listing']) }} > 10"
time_granularity: week
---
metric:
name: bookings_before_dec_20_2019
description: Bookings up to the end of 2022
type: simple
type_params:
measure: bookings
filter: "{{ TimeDimension('metric_time') }} < '2012-12-20'"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I think I know why this is. Timestamp literals in Trino have to take the form TIMESTAMP <literal> so we'd need to do some substitution somewhere.

If the error is due to the filter it means we have to do custom rendering against the filter expr.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep. I couldn't come up with an easy way to fix it here and wasn't sure it was worth doing the hard fix for an engine we barely use.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. I think the right way to deal with this is via a more holistic approach to filter expression inputs. In the meantime, having a small gap in Trino test coverage - particularly one where the main difference essentially boils down to how we go about pasting in the user-provided expression at render time - seems fine to me.

---
metric:
name: bookings_between_dec_18_2019_and_dec_20_2019
description: Bookings starting in 2020. Used to test a metric with different types of ambiguous filters in on its input metric.
type: derived
type_params:
expr: bookings_before_dec_20_2019
metrics:
- name: bookings_before_dec_20_2019
filter: "{{ TimeDimension('metric_time') }} > '2019-12-18'"
time_granularity: week
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,23 @@ semantic_model:
time_granularity: day
- name: home_state
type: categorical
- name: last_profile_edit_ts
type: time
type_params:
time_granularity: millisecond
- name: bio_added_ts
type: time
type_params:
time_granularity: second
- name: last_login_ts
type: time
type_params:
time_granularity: minute
- name: archived_at
type: time
type_params:
time_granularity: hour


entities:
- name: user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,55 @@
"TimeDimension('metric_time', 'day', date_part_name='month')",
"TimeDimension('metric_time', 'day', date_part_name='quarter')",
"TimeDimension('metric_time', 'day', date_part_name='year')",
"TimeDimension('metric_time', 'hour')",
"TimeDimension('metric_time', 'hour', date_part_name='day')",
"TimeDimension('metric_time', 'hour', date_part_name='dow')",
"TimeDimension('metric_time', 'hour', date_part_name='doy')",
"TimeDimension('metric_time', 'hour', date_part_name='month')",
"TimeDimension('metric_time', 'hour', date_part_name='quarter')",
"TimeDimension('metric_time', 'hour', date_part_name='year')",
"TimeDimension('metric_time', 'microsecond')",
"TimeDimension('metric_time', 'microsecond', date_part_name='day')",
"TimeDimension('metric_time', 'microsecond', date_part_name='dow')",
"TimeDimension('metric_time', 'microsecond', date_part_name='doy')",
"TimeDimension('metric_time', 'microsecond', date_part_name='month')",
"TimeDimension('metric_time', 'microsecond', date_part_name='quarter')",
"TimeDimension('metric_time', 'microsecond', date_part_name='year')",
"TimeDimension('metric_time', 'millisecond')",
"TimeDimension('metric_time', 'millisecond', date_part_name='day')",
"TimeDimension('metric_time', 'millisecond', date_part_name='dow')",
"TimeDimension('metric_time', 'millisecond', date_part_name='doy')",
"TimeDimension('metric_time', 'millisecond', date_part_name='month')",
"TimeDimension('metric_time', 'millisecond', date_part_name='quarter')",
"TimeDimension('metric_time', 'millisecond', date_part_name='year')",
"TimeDimension('metric_time', 'minute')",
"TimeDimension('metric_time', 'minute', date_part_name='day')",
"TimeDimension('metric_time', 'minute', date_part_name='dow')",
"TimeDimension('metric_time', 'minute', date_part_name='doy')",
"TimeDimension('metric_time', 'minute', date_part_name='month')",
"TimeDimension('metric_time', 'minute', date_part_name='quarter')",
"TimeDimension('metric_time', 'minute', date_part_name='year')",
"TimeDimension('metric_time', 'month')",
"TimeDimension('metric_time', 'month', date_part_name='month')",
"TimeDimension('metric_time', 'month', date_part_name='quarter')",
"TimeDimension('metric_time', 'month', date_part_name='year')",
"TimeDimension('metric_time', 'nanosecond')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='day')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='dow')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='doy')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='month')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='quarter')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='year')",
"TimeDimension('metric_time', 'quarter')",
"TimeDimension('metric_time', 'quarter', date_part_name='quarter')",
"TimeDimension('metric_time', 'quarter', date_part_name='year')",
"TimeDimension('metric_time', 'second')",
"TimeDimension('metric_time', 'second', date_part_name='day')",
"TimeDimension('metric_time', 'second', date_part_name='dow')",
"TimeDimension('metric_time', 'second', date_part_name='doy')",
"TimeDimension('metric_time', 'second', date_part_name='month')",
"TimeDimension('metric_time', 'second', date_part_name='quarter')",
"TimeDimension('metric_time', 'second', date_part_name='year')",
"TimeDimension('metric_time', 'week')",
"TimeDimension('metric_time', 'week', date_part_name='month')",
"TimeDimension('metric_time', 'week', date_part_name='quarter')",
Expand Down
Loading
Loading