Skip to content

Commit

Permalink
Add test time spines for sub-daily granularity (#1358)
Browse files Browse the repository at this point in the history
Add test time spines for sub-daily granularity
I added as much data as seemed reasonable to include manually, and
figure we can focus tests to those time ranges. And we always have the
flexibility to add more data later.
I included one time spine per sub-daily grain so that we could be sure
to test the syntax for each grain against each engine that supports it.
Snapshot changes are separated into isolated commits for easier review.
They only include the following changes:
- Node ID changes due to new time spine source nodes
- Time spine table alias changes due to new time spine source nodes
  • Loading branch information
courtneyholcomb authored Jul 30, 2024
1 parent e467f37 commit b72ac67
Show file tree
Hide file tree
Showing 234 changed files with 5,769 additions and 2,050 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240727-081106.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Enable sub-daily queries without metrics.
time: 2024-07-27T08:11:06.357653-07:00
custom:
Author: courtneyholcomb
Issue: "1359"
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet
from metricflow_semantics.model.semantics.semantic_model_join_evaluator import SemanticModelJoinEvaluator
from metricflow_semantics.specs.time_dimension_spec import DEFAULT_TIME_GRANULARITY
from metricflow_semantics.time.time_spine_source import TimeSpineSource

if TYPE_CHECKING:
from metricflow_semantics.model.semantics.semantic_model_lookup import SemanticModelLookup
Expand Down Expand Up @@ -124,6 +125,7 @@ def __init__(
# Sort semantic models by name for consistency in building derived objects.
self._semantic_models = sorted(self._semantic_manifest.semantic_models, key=lambda x: x.name)
self._join_evaluator = SemanticModelJoinEvaluator(semantic_model_lookup)
self._time_spine_sources = TimeSpineSource.create_from_manifest(self._semantic_manifest)

assert max_entity_links >= 0
self._max_entity_links = max_entity_links
Expand Down Expand Up @@ -454,6 +456,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference
on what aggregation time dimension was used to define the measure.
"""
measure_semantic_model: Optional[SemanticModel] = None
defined_granularity: Optional[TimeGranularity] = None
if measure_reference:
measure_semantic_model = self._get_semantic_model_for_measure(measure_reference)
measure_agg_time_dimension_reference = measure_semantic_model.checked_agg_time_dimension_for_measure(
Expand All @@ -463,15 +466,20 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference
semantic_model=measure_semantic_model,
time_dimension_reference=measure_agg_time_dimension_reference,
)
possible_metric_time_granularities = tuple(
time_granularity
for time_granularity in TimeGranularity
if defined_granularity.is_smaller_than_or_equal(time_granularity)
)
else:
defined_granularity = DEFAULT_TIME_GRANULARITY

# It's possible to aggregate measures to coarser time granularities (except with cumulative metrics).
possible_metric_time_granularities = tuple(
time_granularity
for time_granularity in TimeGranularity
if defined_granularity.is_smaller_than_or_equal(time_granularity)
)
# If querying metric_time without metrics, will query from time spines.
# Defaults to DAY granularity if available in time spines, else smallest available granularity.
min_time_spine_granularity = min(self._time_spine_sources.keys())
possible_metric_time_granularities = tuple(
time_granularity
for time_granularity in TimeGranularity
if min_time_spine_granularity.is_smaller_than_or_equal(time_granularity)
)

# For each of the possible time granularities, create a LinkableDimension.
path_key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_set import group_specs_by_type
from metricflow_semantics.specs.time_dimension_spec import (
DEFAULT_TIME_GRANULARITY,
TimeDimensionSpec,
TimeDimensionSpecComparisonKey,
TimeDimensionSpecField,
Expand Down Expand Up @@ -47,10 +48,13 @@ def __init__(self, max_metric_default_time_granularity: Optional[TimeGranularity
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpec]:
spec_set = group_specs_by_type(candidate_specs)

# If there are no metrics or metric_time specs in the query, skip this filter.
if not (self._max_metric_default_time_granularity and spec_set.metric_time_specs):
# If there are no metric_time specs in the query, skip this filter.
if not spec_set.metric_time_specs:
return candidate_specs

# If there are metrics in the query, use max metric default. For no-metric queries, use standard default.
default_granularity = self._max_metric_default_time_granularity or DEFAULT_TIME_GRANULARITY

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, Tuple[TimeDimensionSpec, ...]] = defaultdict(tuple)
for metric_time_spec in spec_set.metric_time_specs:
Expand All @@ -60,10 +64,8 @@ def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpe

matched_metric_time_specs: Tuple[TimeDimensionSpec, ...] = ()
for spec_key, time_grains in spec_key_to_grains.items():
if self._max_metric_default_time_granularity in time_grains:
matched_metric_time_specs += (
spec_key_to_specs[spec_key][0].with_grain(self._max_metric_default_time_granularity),
)
if default_granularity in time_grains:
matched_metric_time_specs += (spec_key_to_specs[spec_key][0].with_grain(default_granularity),)
else:
# If default_granularity is not in the available options, then time granularity was specified in the request
# and a default is not needed here. Pass all options through for this spec key.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,12 @@ def without_first_entity_link(self) -> TimeDimensionSpec: # noqa: D102

@property
def without_entity_links(self) -> TimeDimensionSpec: # noqa: D102
return TimeDimensionSpec.from_name(self.element_name)
return TimeDimensionSpec(
element_name=self.element_name,
time_granularity=self.time_granularity,
date_part=self.date_part,
entity_links=(),
)

@staticmethod
def from_name(name: str) -> TimeDimensionSpec: # noqa: D102
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,40 @@ project_configuration:
- location: $source_schema.mf_time_spine
column_name: ds
grain: day
time_spines:
- node_relation:
alias: mf_time_spine_nanosecond
schema_name: $source_schema
primary_column:
name: ts
time_granularity: nanosecond
- node_relation:
alias: mf_time_spine_microsecond
schema_name: $source_schema
primary_column:
name: ts
time_granularity: microsecond
- node_relation:
alias: mf_time_spine_millisecond
schema_name: $source_schema
primary_column:
name: ts
time_granularity: millisecond
- node_relation:
alias: mf_time_spine_second
schema_name: $source_schema
primary_column:
name: ts
time_granularity: second
- node_relation:
alias: mf_time_spine_minute
schema_name: $source_schema
primary_column:
name: ts
time_granularity: minute
- node_relation:
alias: mf_time_spine_hour
schema_name: $source_schema
primary_column:
name: ts
time_granularity: hour
Original file line number Diff line number Diff line change
Expand Up @@ -783,22 +783,3 @@ metric:
name: listings
filter: "{{ Metric('views', ['listing']) }} > 10"
time_granularity: week
---
metric:
name: bookings_before_dec_20_2019
description: Bookings up to the end of 2022
type: simple
type_params:
measure: bookings
filter: "{{ TimeDimension('metric_time') }} < '2012-12-20'"
---
metric:
name: bookings_between_dec_18_2019_and_dec_20_2019
description: Bookings starting in 2020. Used to test a metric with different types of ambiguous filters in on its input metric.
type: derived
type_params:
expr: bookings_before_dec_20_2019
metrics:
- name: bookings_before_dec_20_2019
filter: "{{ TimeDimension('metric_time') }} > '2019-12-18'"
time_granularity: week
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,23 @@ semantic_model:
time_granularity: day
- name: home_state
type: categorical
- name: last_profile_edit_ts
type: time
type_params:
time_granularity: millisecond
- name: bio_added_ts
type: time
type_params:
time_granularity: second
- name: last_login_ts
type: time
type_params:
time_granularity: minute
- name: archived_at
type: time
type_params:
time_granularity: hour


entities:
- name: user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,55 @@
"TimeDimension('metric_time', 'day', date_part_name='month')",
"TimeDimension('metric_time', 'day', date_part_name='quarter')",
"TimeDimension('metric_time', 'day', date_part_name='year')",
"TimeDimension('metric_time', 'hour')",
"TimeDimension('metric_time', 'hour', date_part_name='day')",
"TimeDimension('metric_time', 'hour', date_part_name='dow')",
"TimeDimension('metric_time', 'hour', date_part_name='doy')",
"TimeDimension('metric_time', 'hour', date_part_name='month')",
"TimeDimension('metric_time', 'hour', date_part_name='quarter')",
"TimeDimension('metric_time', 'hour', date_part_name='year')",
"TimeDimension('metric_time', 'microsecond')",
"TimeDimension('metric_time', 'microsecond', date_part_name='day')",
"TimeDimension('metric_time', 'microsecond', date_part_name='dow')",
"TimeDimension('metric_time', 'microsecond', date_part_name='doy')",
"TimeDimension('metric_time', 'microsecond', date_part_name='month')",
"TimeDimension('metric_time', 'microsecond', date_part_name='quarter')",
"TimeDimension('metric_time', 'microsecond', date_part_name='year')",
"TimeDimension('metric_time', 'millisecond')",
"TimeDimension('metric_time', 'millisecond', date_part_name='day')",
"TimeDimension('metric_time', 'millisecond', date_part_name='dow')",
"TimeDimension('metric_time', 'millisecond', date_part_name='doy')",
"TimeDimension('metric_time', 'millisecond', date_part_name='month')",
"TimeDimension('metric_time', 'millisecond', date_part_name='quarter')",
"TimeDimension('metric_time', 'millisecond', date_part_name='year')",
"TimeDimension('metric_time', 'minute')",
"TimeDimension('metric_time', 'minute', date_part_name='day')",
"TimeDimension('metric_time', 'minute', date_part_name='dow')",
"TimeDimension('metric_time', 'minute', date_part_name='doy')",
"TimeDimension('metric_time', 'minute', date_part_name='month')",
"TimeDimension('metric_time', 'minute', date_part_name='quarter')",
"TimeDimension('metric_time', 'minute', date_part_name='year')",
"TimeDimension('metric_time', 'month')",
"TimeDimension('metric_time', 'month', date_part_name='month')",
"TimeDimension('metric_time', 'month', date_part_name='quarter')",
"TimeDimension('metric_time', 'month', date_part_name='year')",
"TimeDimension('metric_time', 'nanosecond')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='day')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='dow')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='doy')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='month')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='quarter')",
"TimeDimension('metric_time', 'nanosecond', date_part_name='year')",
"TimeDimension('metric_time', 'quarter')",
"TimeDimension('metric_time', 'quarter', date_part_name='quarter')",
"TimeDimension('metric_time', 'quarter', date_part_name='year')",
"TimeDimension('metric_time', 'second')",
"TimeDimension('metric_time', 'second', date_part_name='day')",
"TimeDimension('metric_time', 'second', date_part_name='dow')",
"TimeDimension('metric_time', 'second', date_part_name='doy')",
"TimeDimension('metric_time', 'second', date_part_name='month')",
"TimeDimension('metric_time', 'second', date_part_name='quarter')",
"TimeDimension('metric_time', 'second', date_part_name='year')",
"TimeDimension('metric_time', 'week')",
"TimeDimension('metric_time', 'week', date_part_name='month')",
"TimeDimension('metric_time', 'week', date_part_name='quarter')",
Expand Down
Loading

0 comments on commit b72ac67

Please sign in to comment.