From fc669107a70470a2a38b4b331116439dce9df5c4 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 18 Jan 2024 11:33:48 -0800 Subject: [PATCH 01/22] WIP --- .../dataflow/builder/dataflow_plan_builder.py | 12 ++-- .../model/semantics/semantic_model_lookup.py | 17 ++--- metricflow/protocols/semantics.py | 2 +- .../cumulative_metric_requires_metric_time.py | 4 +- .../metric_time_requirements.py | 71 +++++++++++++++---- .../test_cases/itest_cumulative_metric.yaml | 28 ++++++++ .../test/integration/test_configured_cases.py | 3 +- .../model/test_semantic_model_container.py | 24 +++---- 8 files changed, 117 insertions(+), 44 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index d602c38bb9..8d1e1fbc33 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -704,16 +704,18 @@ def _contains_multihop_linkables(linkable_specs: Sequence[LinkableInstanceSpec]) """Returns true if any of the linkable specs requires a multi-hop join to realize.""" return any(len(x.entity_links) > 1 for x in linkable_specs) - def _get_semantic_model_names_for_measures(self, measure_names: Sequence[MeasureSpec]) -> Set[str]: + def _get_semantic_model_names_for_measures(self, measures: Sequence[MeasureSpec]) -> Set[str]: """Return the names of the semantic models needed to compute the input measures. This is a temporary method for use in assertion boundaries while we implement support for multiple semantic models """ semantic_model_names: Set[str] = set() - for measure_name in measure_names: - semantic_model_names = semantic_model_names.union( - {d.name for d in self._semantic_model_lookup.get_semantic_models_for_measure(measure_name.reference)} - ) + for measure in measures: + semantic_model = self._semantic_model_lookup.get_semantic_model_for_measure(measure.reference) + if not semantic_model: + raise ValueError(f"Could not find measure with name {measure.reference} in configured semantic models.") + semantic_model_names.add(semantic_model.name) + return semantic_model_names def _sort_by_suitability(self, nodes: Sequence[BaseOutput]) -> Sequence[BaseOutput]: diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 9aceec4582..3d68d460a8 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -52,12 +52,13 @@ def __init__( # noqa: D model: SemanticManifest, ) -> None: self._model = model - self._measure_index: Dict[MeasureReference, List[SemanticModel]] = defaultdict(list) + self._measure_index: Dict[MeasureReference, SemanticModel] = {} self._measure_aggs: Dict[ MeasureReference, AggregationType ] = {} # maps measures to their one consistent aggregation self._measure_agg_time_dimension: Dict[MeasureReference, TimeDimensionReference] = {} self._measure_non_additive_dimension_specs: Dict[MeasureReference, NonAdditiveDimensionSpec] = {} + # TODO: remove defaultdicts. Will add fake elements to the dict that might get referenced. self._dimension_index: Dict[DimensionReference, List[SemanticModel]] = defaultdict(list) self._linkable_reference_index: Dict[LinkableElementReference, List[SemanticModel]] = defaultdict(list) self._entity_index: Dict[Optional[str], List[SemanticModel]] = defaultdict(list) @@ -141,12 +142,10 @@ def get_measure_from_semantic_model(semantic_model: SemanticModel, measure_refer ) def get_measure(self, measure_reference: MeasureReference) -> Measure: # noqa: D - if measure_reference not in self._measure_index: + semantic_model = self._measure_index.get(measure_reference) + if not semantic_model: raise ValueError(f"Could not find measure with name ({measure_reference}) in configured semantic models") - assert len(self._measure_index[measure_reference]) >= 1 - # Measures should be consistent across semantic models, so just use the first one. - semantic_model = list(self._measure_index[measure_reference])[0] return SemanticModelLookup.get_measure_from_semantic_model( semantic_model=semantic_model, measure_reference=measure_reference ) @@ -155,10 +154,8 @@ def get_entity_references(self) -> Sequence[EntityReference]: # noqa: D return list(self._entity_ref_to_entity.keys()) # DSC interface - def get_semantic_models_for_measure( # noqa: D - self, measure_reference: MeasureReference - ) -> Sequence[SemanticModel]: - return self._measure_index[measure_reference] + def get_semantic_model_for_measure(self, measure_reference: MeasureReference) -> Optional[SemanticModel]: # noqa: D + return self._measure_index.get(measure_reference) def get_agg_time_dimension_for_measure( # noqa: D self, measure_reference: MeasureReference @@ -202,7 +199,7 @@ def _add_semantic_model(self, semantic_model: SemanticModel) -> None: for measure in semantic_model.measures: self._measure_aggs[measure.reference] = measure.agg - self._measure_index[measure.reference].append(semantic_model) + self._measure_index[measure.reference] = semantic_model agg_time_dimension_reference = semantic_model.checked_agg_time_dimension_for_measure(measure.reference) matching_dimensions = tuple( diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index cdaeeaa9a6..291eb1d25a 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -85,7 +85,7 @@ def get_entity_references(self) -> Sequence[EntityReference]: raise NotImplementedError @abstractmethod - def get_semantic_models_for_measure(self, measure_reference: MeasureReference) -> Sequence[SemanticModel]: + def get_semantic_model_for_measure(self, measure_reference: MeasureReference) -> Optional[SemanticModel]: """Retrieve a list of all semantic model model objects associated with the measure reference.""" raise NotImplementedError diff --git a/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py b/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py index 484cd5131e..60893c6b98 100644 --- a/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py +++ b/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py @@ -24,7 +24,9 @@ class CumulativeMetricRequiresMetricTimeIssue(MetricFlowQueryResolutionIssue): def ui_description(self, associated_input: MetricFlowQueryResolverInput) -> str: return ( f"The query includes a cumulative metric {repr(self.metric_reference.element_name)} that does not " - f"accumulate over all-time, but the group-by items do not include {repr(METRIC_TIME_ELEMENT_NAME)}" + f"accumulate over all-time, but the group-by items do not include {repr(METRIC_TIME_ELEMENT_NAME)} " + "or the metric's agg_time_dimension." + # TODO: add name of agg_time_dim? ) @override diff --git a/metricflow/query/validation_rules/metric_time_requirements.py b/metricflow/query/validation_rules/metric_time_requirements.py index fe75f6854c..61c8e506aa 100644 --- a/metricflow/query/validation_rules/metric_time_requirements.py +++ b/metricflow/query/validation_rules/metric_time_requirements.py @@ -1,11 +1,11 @@ from __future__ import annotations -from typing import List, Sequence +from typing import List, Sequence, Tuple from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME from dbt_semantic_interfaces.protocols import WhereFilterIntersection -from dbt_semantic_interfaces.references import MetricReference +from dbt_semantic_interfaces.references import EntityReference, MetricReference, TimeDimensionReference from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity from dbt_semantic_interfaces.type_enums.date_part import DatePart from typing_extensions import override @@ -34,29 +34,36 @@ class MetricTimeQueryValidationRule(PostResolutionQueryValidationRule): def __init__(self, manifest_lookup: SemanticManifestLookup) -> None: # noqa: D super().__init__(manifest_lookup=manifest_lookup) - metric_time_specs: List[TimeDimensionSpec] = [] + self._metric_time_specs = tuple( + self._generate_valid_specs_for_time_dimension( + time_dimension_reference=TimeDimensionReference(element_name=METRIC_TIME_ELEMENT_NAME), entity_links=() + ) + ) + def _generate_valid_specs_for_time_dimension( + self, time_dimension_reference: TimeDimensionReference, entity_links: Tuple[EntityReference, ...] + ) -> List[TimeDimensionSpec]: + time_dimension_specs: List[TimeDimensionSpec] = [] for time_granularity in TimeGranularity: - metric_time_specs.append( + time_dimension_specs.append( TimeDimensionSpec( - element_name=METRIC_TIME_ELEMENT_NAME, - entity_links=(), + element_name=time_dimension_reference.element_name, + entity_links=entity_links, time_granularity=time_granularity, date_part=None, ) ) for date_part in DatePart: for time_granularity in date_part.compatible_granularities: - metric_time_specs.append( + time_dimension_specs.append( TimeDimensionSpec( - element_name=METRIC_TIME_ELEMENT_NAME, - entity_links=(), + element_name=time_dimension_reference.element_name, + entity_links=entity_links, time_granularity=time_granularity, date_part=date_part, ) ) - - self._metric_time_specs = tuple(metric_time_specs) + return time_dimension_specs def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInputForQuery) -> bool: for group_by_item_input in query_resolver_input.group_by_item_inputs: @@ -65,6 +72,38 @@ def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInpu return False + def _group_by_items_include_agg_time_dimension( + self, query_resolver_input: ResolverInputForQuery, metric_reference: MetricReference + ) -> bool: + metric = self._manifest_lookup.metric_lookup.get_metric(metric_reference=metric_reference) + semantic_model_lookup = self._manifest_lookup.semantic_model_lookup + + valid_agg_time_dimension_specs: List[TimeDimensionSpec] = [] + for measure_reference in metric.measure_references: + agg_time_dimension_reference = semantic_model_lookup.get_agg_time_dimension_for_measure(measure_reference) + semantic_model = semantic_model_lookup.get_semantic_model_for_measure(measure_reference) + assert semantic_model, f"No semantic model found for measure {measure_reference}." + + # is this too broad? need to narrow entity links? + possible_entity_links = semantic_model_lookup.entity_links_for_local_elements(semantic_model) + for entity_link in possible_entity_links: + valid_agg_time_dimension_specs.extend( + self._generate_valid_specs_for_time_dimension( + time_dimension_reference=agg_time_dimension_reference, entity_links=(entity_link,) + ) + ) + print("valid:::") + for x in valid_agg_time_dimension_specs: + print(f"\n{x}") + + print("requested:::") + for group_by_item_input in query_resolver_input.group_by_item_inputs: + print(print(f"\n{group_by_item_input}")) + if group_by_item_input.spec_pattern.matches_any(valid_agg_time_dimension_specs): + return True + + return False + @override def validate_metric_in_resolution_dag( self, @@ -73,7 +112,11 @@ def validate_metric_in_resolution_dag( resolution_path: MetricFlowQueryResolutionPath, ) -> MetricFlowQueryResolutionIssueSet: metric = self._get_metric(metric_reference) - query_includes_metric_time = self._group_by_items_include_metric_time(resolver_input_for_query) + query_includes_metric_time_or_agg_time_dimension = self._group_by_items_include_metric_time( + resolver_input_for_query + ) or self._group_by_items_include_agg_time_dimension( + query_resolver_input=resolver_input_for_query, metric_reference=metric_reference + ) if metric.type is MetricType.SIMPLE or metric.type is MetricType.CONVERSION: return MetricFlowQueryResolutionIssueSet.empty_instance() @@ -81,7 +124,7 @@ def validate_metric_in_resolution_dag( if ( metric.type_params is not None and (metric.type_params.window is not None or metric.type_params.grain_to_date is not None) - and not query_includes_metric_time + and not query_includes_metric_time_or_agg_time_dimension ): return MetricFlowQueryResolutionIssueSet.from_issue( CumulativeMetricRequiresMetricTimeIssue.from_parameters( @@ -97,7 +140,7 @@ def validate_metric_in_resolution_dag( for input_metric in metric.input_metrics ) - if has_time_offset and not query_includes_metric_time: + if has_time_offset and not query_includes_metric_time_or_agg_time_dimension: return MetricFlowQueryResolutionIssueSet.from_issue( OffsetMetricRequiresMetricTimeIssue.from_parameters( metric_reference=metric_reference, diff --git a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml index a86a9ea838..1f5d8b0eb9 100644 --- a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml +++ b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml @@ -347,3 +347,31 @@ integration_test: OR {{ render_time_constraint("a.ds", "2020-01-04", "2020-01-04") }} GROUP BY a.ds ORDER BY a.ds +--- +integration_test: + name: cumulative_metric_with_agg_time_dimension + description: Query a cumulative metric with its agg_time_dimension. + model: SIMPLE_MODEL + metrics: ["trailing_2_months_revenue"] + group_bys: ["company__ds__day"] + order_bys: ["company__ds__day"] + time_constraint: ["2020-03-05", "2021-01-04"] + check_query: | + SELECT + SUM(b.txn_revenue) as trailing_2_months_revenue + , a.ds AS company__ds__day + FROM ( + SELECT ds + FROM {{ mf_time_spine_source }} + WHERE {{ render_time_constraint("ds", "2020-01-05", "2021-01-04") }} + ) a + INNER JOIN ( + SELECT + revenue as txn_revenue + , created_at AS ds + FROM {{ source_schema }}.fct_revenue + ) b + ON b.ds <= a.ds AND b.ds > {{ render_date_sub("a", "ds", 2, TimeGranularity.MONTH) }} + WHERE {{ render_time_constraint("a.ds", "2020-03-05", "2021-01-04") }} + GROUP BY a.ds + ORDER BY a.ds diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index 92462b515f..6b6e080470 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -230,7 +230,8 @@ def filter_not_supported_features( @pytest.mark.parametrize( "name", - CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, + # CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, + ["itest_cumulative_metric.yaml/cumulative_metric_with_agg_time_dimension"], ids=lambda name: f"name={name}", ) def test_case( diff --git a/metricflow/test/model/test_semantic_model_container.py b/metricflow/test/model/test_semantic_model_container.py index 80ec27d5c7..1623eae3cb 100644 --- a/metricflow/test/model/test_semantic_model_container.py +++ b/metricflow/test/model/test_semantic_model_container.py @@ -58,18 +58,18 @@ def test_get_elements(semantic_model_lookup: SemanticModelLookup) -> None: # no assert semantic_model_lookup.get_measure(measure_reference=measure_reference).reference == measure_reference -def test_get_semantic_models_for_measure(semantic_model_lookup: SemanticModelLookup) -> None: # noqa: D - bookings_sources = semantic_model_lookup.get_semantic_models_for_measure(MeasureReference(element_name="bookings")) - assert len(bookings_sources) == 1 - assert bookings_sources[0].name == "bookings_source" - - views_sources = semantic_model_lookup.get_semantic_models_for_measure(MeasureReference(element_name="views")) - assert len(views_sources) == 1 - assert views_sources[0].name == "views_source" - - listings_sources = semantic_model_lookup.get_semantic_models_for_measure(MeasureReference(element_name="listings")) - assert len(listings_sources) == 1 - assert listings_sources[0].name == "listings_latest" +def test_get_semantic_model_for_measure(semantic_model_lookup: SemanticModelLookup) -> None: # noqa: D + bookings_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="bookings")) + assert bookings_source + assert bookings_source.name == "bookings_source" + + views_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="views")) + assert views_source + assert views_source.name == "views_source" + + listings_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="listings")) + assert listings_source + assert listings_source.name == "listings_latest" def test_elements_for_metric( # noqa: D From 25ef666b7e48a2ac6db8aed3045063311dee391f Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 18 Jan 2024 14:49:44 -0800 Subject: [PATCH 02/22] Remove unneeded change --- .../dataflow/builder/dataflow_plan_builder.py | 10 ++++---- .../model/semantics/semantic_model_lookup.py | 17 +++++++------ metricflow/protocols/semantics.py | 2 +- .../model/test_semantic_model_container.py | 24 +++++++++---------- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 8d1e1fbc33..6604dca8bc 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -711,10 +711,12 @@ def _get_semantic_model_names_for_measures(self, measures: Sequence[MeasureSpec] """ semantic_model_names: Set[str] = set() for measure in measures: - semantic_model = self._semantic_model_lookup.get_semantic_model_for_measure(measure.reference) - if not semantic_model: - raise ValueError(f"Could not find measure with name {measure.reference} in configured semantic models.") - semantic_model_names.add(semantic_model.name) + semantic_model_names.update( + { + semantic_model.name + for semantic_model in self._semantic_model_lookup.get_semantic_models_for_measure(measure.reference) + } + ) return semantic_model_names diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 3d68d460a8..9aceec4582 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -52,13 +52,12 @@ def __init__( # noqa: D model: SemanticManifest, ) -> None: self._model = model - self._measure_index: Dict[MeasureReference, SemanticModel] = {} + self._measure_index: Dict[MeasureReference, List[SemanticModel]] = defaultdict(list) self._measure_aggs: Dict[ MeasureReference, AggregationType ] = {} # maps measures to their one consistent aggregation self._measure_agg_time_dimension: Dict[MeasureReference, TimeDimensionReference] = {} self._measure_non_additive_dimension_specs: Dict[MeasureReference, NonAdditiveDimensionSpec] = {} - # TODO: remove defaultdicts. Will add fake elements to the dict that might get referenced. self._dimension_index: Dict[DimensionReference, List[SemanticModel]] = defaultdict(list) self._linkable_reference_index: Dict[LinkableElementReference, List[SemanticModel]] = defaultdict(list) self._entity_index: Dict[Optional[str], List[SemanticModel]] = defaultdict(list) @@ -142,10 +141,12 @@ def get_measure_from_semantic_model(semantic_model: SemanticModel, measure_refer ) def get_measure(self, measure_reference: MeasureReference) -> Measure: # noqa: D - semantic_model = self._measure_index.get(measure_reference) - if not semantic_model: + if measure_reference not in self._measure_index: raise ValueError(f"Could not find measure with name ({measure_reference}) in configured semantic models") + assert len(self._measure_index[measure_reference]) >= 1 + # Measures should be consistent across semantic models, so just use the first one. + semantic_model = list(self._measure_index[measure_reference])[0] return SemanticModelLookup.get_measure_from_semantic_model( semantic_model=semantic_model, measure_reference=measure_reference ) @@ -154,8 +155,10 @@ def get_entity_references(self) -> Sequence[EntityReference]: # noqa: D return list(self._entity_ref_to_entity.keys()) # DSC interface - def get_semantic_model_for_measure(self, measure_reference: MeasureReference) -> Optional[SemanticModel]: # noqa: D - return self._measure_index.get(measure_reference) + def get_semantic_models_for_measure( # noqa: D + self, measure_reference: MeasureReference + ) -> Sequence[SemanticModel]: + return self._measure_index[measure_reference] def get_agg_time_dimension_for_measure( # noqa: D self, measure_reference: MeasureReference @@ -199,7 +202,7 @@ def _add_semantic_model(self, semantic_model: SemanticModel) -> None: for measure in semantic_model.measures: self._measure_aggs[measure.reference] = measure.agg - self._measure_index[measure.reference] = semantic_model + self._measure_index[measure.reference].append(semantic_model) agg_time_dimension_reference = semantic_model.checked_agg_time_dimension_for_measure(measure.reference) matching_dimensions = tuple( diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index 291eb1d25a..cdaeeaa9a6 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -85,7 +85,7 @@ def get_entity_references(self) -> Sequence[EntityReference]: raise NotImplementedError @abstractmethod - def get_semantic_model_for_measure(self, measure_reference: MeasureReference) -> Optional[SemanticModel]: + def get_semantic_models_for_measure(self, measure_reference: MeasureReference) -> Sequence[SemanticModel]: """Retrieve a list of all semantic model model objects associated with the measure reference.""" raise NotImplementedError diff --git a/metricflow/test/model/test_semantic_model_container.py b/metricflow/test/model/test_semantic_model_container.py index 1623eae3cb..80ec27d5c7 100644 --- a/metricflow/test/model/test_semantic_model_container.py +++ b/metricflow/test/model/test_semantic_model_container.py @@ -58,18 +58,18 @@ def test_get_elements(semantic_model_lookup: SemanticModelLookup) -> None: # no assert semantic_model_lookup.get_measure(measure_reference=measure_reference).reference == measure_reference -def test_get_semantic_model_for_measure(semantic_model_lookup: SemanticModelLookup) -> None: # noqa: D - bookings_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="bookings")) - assert bookings_source - assert bookings_source.name == "bookings_source" - - views_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="views")) - assert views_source - assert views_source.name == "views_source" - - listings_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="listings")) - assert listings_source - assert listings_source.name == "listings_latest" +def test_get_semantic_models_for_measure(semantic_model_lookup: SemanticModelLookup) -> None: # noqa: D + bookings_sources = semantic_model_lookup.get_semantic_models_for_measure(MeasureReference(element_name="bookings")) + assert len(bookings_sources) == 1 + assert bookings_sources[0].name == "bookings_source" + + views_sources = semantic_model_lookup.get_semantic_models_for_measure(MeasureReference(element_name="views")) + assert len(views_sources) == 1 + assert views_sources[0].name == "views_source" + + listings_sources = semantic_model_lookup.get_semantic_models_for_measure(MeasureReference(element_name="listings")) + assert len(listings_sources) == 1 + assert listings_sources[0].name == "listings_latest" def test_elements_for_metric( # noqa: D From 1a471995840224dd61dbfc3eefb499fe94984370 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 18 Jan 2024 15:43:36 -0800 Subject: [PATCH 03/22] Get correct primary entity --- .../model/semantics/semantic_model_lookup.py | 8 ++--- metricflow/protocols/semantics.py | 5 +++ .../metric_time_requirements.py | 33 ++++++++++--------- .../test_cases/itest_cumulative_metric.yaml | 6 ++-- metricflow/test/query/test_query_parser.py | 8 ++--- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 9aceec4582..33cb099c48 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -224,7 +224,7 @@ def _add_semantic_model(self, semantic_model: SemanticModel) -> None: f"Aggregation time dimension does not have a time granularity set: {agg_time_dimension}" ) - primary_entity = SemanticModelLookup._resolved_primary_entity(semantic_model) + primary_entity = SemanticModelLookup.resolved_primary_entity(semantic_model) if primary_entity is None: raise RuntimeError( @@ -299,7 +299,7 @@ def get_entity_from_semantic_model( ) @staticmethod - def _resolved_primary_entity(semantic_model: SemanticModel) -> Optional[EntityReference]: + def resolved_primary_entity(semantic_model: SemanticModel) -> Optional[EntityReference]: """Return the primary entity for dimensions in the model.""" primary_entity_reference = semantic_model.primary_entity_reference @@ -308,14 +308,12 @@ def _resolved_primary_entity(semantic_model: SemanticModel) -> Optional[EntityRe ) # This should be caught by the validation, but adding a sanity check. - assert len(entities_with_type_primary) <= 1, f"Found >1 primary entity in {semantic_model}" + assert len(entities_with_type_primary) <= 1, f"Found > 1 primary entity in {semantic_model}" if primary_entity_reference is not None: assert len(entities_with_type_primary) == 0, ( f"The primary_entity field was set to {primary_entity_reference}, but there are non-zero entities with " f"type {EntityType.PRIMARY} in {semantic_model}" ) - - if primary_entity_reference is not None: return primary_entity_reference if len(entities_with_type_primary) > 0: diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index cdaeeaa9a6..3266e5fd05 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -98,6 +98,11 @@ def get_entity_in_semantic_model(self, ref: SemanticModelElementReference) -> Op """Retrieve the entity matching the element -> semantic model mapping, if any.""" raise NotImplementedError + @staticmethod + def resolved_primary_entity(semantic_model: SemanticModel) -> Optional[EntityReference]: + """Return the primary entity for dimensions in the model.""" + raise NotImplementedError + @abstractmethod def get_by_reference(self, semantic_model_reference: SemanticModelReference) -> Optional[SemanticModel]: """Retrieve the semantic model object matching the input semantic model reference, if any.""" diff --git a/metricflow/query/validation_rules/metric_time_requirements.py b/metricflow/query/validation_rules/metric_time_requirements.py index 61c8e506aa..8f16508949 100644 --- a/metricflow/query/validation_rules/metric_time_requirements.py +++ b/metricflow/query/validation_rules/metric_time_requirements.py @@ -81,24 +81,27 @@ def _group_by_items_include_agg_time_dimension( valid_agg_time_dimension_specs: List[TimeDimensionSpec] = [] for measure_reference in metric.measure_references: agg_time_dimension_reference = semantic_model_lookup.get_agg_time_dimension_for_measure(measure_reference) - semantic_model = semantic_model_lookup.get_semantic_model_for_measure(measure_reference) - assert semantic_model, f"No semantic model found for measure {measure_reference}." - - # is this too broad? need to narrow entity links? - possible_entity_links = semantic_model_lookup.entity_links_for_local_elements(semantic_model) - for entity_link in possible_entity_links: - valid_agg_time_dimension_specs.extend( - self._generate_valid_specs_for_time_dimension( - time_dimension_reference=agg_time_dimension_reference, entity_links=(entity_link,) - ) + + # A measure's gg_time_dimension is required to be in the same semantic model as the measure, + # so we can assume the same semantic model for both measure and dimension. + semantic_models = semantic_model_lookup.get_semantic_models_for_measure(measure_reference) + assert ( + len(semantic_models) == 1 + ), f"Expected exactly one semantic model for measure {measure_reference}, but found semantic models {semantic_models}." + semantic_model = semantic_models[0] + + entity_link = semantic_model_lookup.resolved_primary_entity(semantic_model) + assert ( + entity_link is not None + ), f"Expected semantic model {semantic_model} to have a primary entity since is contains dimensions, but found none." + + valid_agg_time_dimension_specs.extend( + self._generate_valid_specs_for_time_dimension( + time_dimension_reference=agg_time_dimension_reference, entity_links=(entity_link,) ) - print("valid:::") - for x in valid_agg_time_dimension_specs: - print(f"\n{x}") + ) - print("requested:::") for group_by_item_input in query_resolver_input.group_by_item_inputs: - print(print(f"\n{group_by_item_input}")) if group_by_item_input.spec_pattern.matches_any(valid_agg_time_dimension_specs): return True diff --git a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml index 1f5d8b0eb9..7ace7d118c 100644 --- a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml +++ b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml @@ -353,13 +353,13 @@ integration_test: description: Query a cumulative metric with its agg_time_dimension. model: SIMPLE_MODEL metrics: ["trailing_2_months_revenue"] - group_bys: ["company__ds__day"] - order_bys: ["company__ds__day"] + group_bys: ["revenue_instance__ds__day"] + order_bys: ["revenue_instance__ds__day"] time_constraint: ["2020-03-05", "2021-01-04"] check_query: | SELECT SUM(b.txn_revenue) as trailing_2_months_revenue - , a.ds AS company__ds__day + , a.ds AS revenue_instance__ds__day FROM ( SELECT ds FROM {{ mf_time_spine_source }} diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index 7860d2a66d..ee927e1b85 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -103,7 +103,7 @@ type: categorical expr: country - primary_entity: company + primary_entity: revenue_instance entities: - name: user @@ -455,7 +455,7 @@ def test_cumulative_metric_wrong_time_dimension_validation() -> None: with pytest.raises(InvalidQueryException, match="do not include 'metric_time'"): query_parser.parse_and_validate_query( metric_names=["revenue_cumulative"], - group_by_names=["company__loaded_at"], + group_by_names=["revenue_instance__loaded_at"], ) @@ -480,7 +480,7 @@ def test_cumulative_metric_agg_time_dimension_name_validation() -> None: with pytest.raises(InvalidQueryException, match="do not include 'metric_time'"): query_parser.parse_and_validate_query( metric_names=["revenue_cumulative"], - group_by_names=["company__ds"], + group_by_names=["revenue_instance__loaded_at"], ) @@ -531,7 +531,7 @@ def test_derived_metric_with_offset_parsing() -> None: with pytest.raises(InvalidQueryException, match="do not include 'metric_time'"): query_parser.parse_and_validate_query( metric_names=["revenue_growth_2_weeks"], - group_by_names=["company__country"], + group_by_names=["revenue_instance__country"], ) # Query with time dimension From 08d2e0f56977f7c2eb7c254927816836ded19ce4 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Fri, 19 Jan 2024 16:01:09 -0800 Subject: [PATCH 04/22] WIP --- .../dataflow/builder/dataflow_plan_builder.py | 95 +++++++++++++------ metricflow/model/semantics/metric_lookup.py | 36 ++++++- .../model/semantics/semantic_model_lookup.py | 23 ++++- metricflow/plan_conversion/dataflow_to_sql.py | 9 +- metricflow/protocols/semantics.py | 8 ++ .../metric_time_requirements.py | 61 ++---------- metricflow/specs/specs.py | 28 +++++- 7 files changed, 171 insertions(+), 89 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 6604dca8bc..1eabbaad81 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -412,10 +412,10 @@ def _build_base_metric_output_node( metric_input_measure_spec = self._build_input_measure_spec_for_base_metric( filter_spec_factory=filter_spec_factory, metric_reference=metric_reference, - query_contains_metric_time=queried_linkable_specs.contains_metric_time, + queried_linkable_specs=queried_linkable_specs, child_metric_offset_window=metric_spec.offset_window, child_metric_offset_to_grain=metric_spec.offset_to_grain, - culmination_description=CumulativeMeasureDescription( + cumulation_description=CumulativeMeasureDescription( cumulative_window=metric.type_params.window, cumulative_grain_to_date=metric.type_params.grain_to_date, ) @@ -504,9 +504,14 @@ def _build_derived_metric_output_node( # For nested ratio / derived metrics with time offset, apply offset & where constraint after metric computation. if metric_spec.has_time_offset: + # TODO: if you only query with the agg_time_dimension for the offset metric, should that work? (assuming another input metric uses a diff agg_time_dim) + query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time + if not query_contains_metric_time_or_agg_time_dimension: + pass # check for agg_time_dimension and update accordingly + # Write a test case for this scenario assert ( - queried_linkable_specs.contains_metric_time - ), "Joining to time spine requires querying with metric_time." + query_contains_metric_time_or_agg_time_dimension + ), "Joining to time spine requires querying with metric_time or the appropriate agg_time_dimension." output_node = JoinToTimeSpineNode( parent_node=output_node, requested_metric_time_dimension_specs=list(queried_linkable_specs.metric_time_specs), @@ -1045,8 +1050,8 @@ def _build_input_measure_spec_for_base_metric( child_metric_offset_window: Optional[MetricTimeWindow], child_metric_offset_to_grain: Optional[TimeGranularity], descendent_filter_specs: Sequence[WhereFilterSpec], - query_contains_metric_time: bool, - culmination_description: Optional[CumulativeMeasureDescription], + queried_linkable_specs: LinkableSpecSet, + cumulation_description: Optional[CumulativeMeasureDescription], ) -> MetricInputMeasureSpec: """Return the input measure spec required to compute the base metric. @@ -1090,12 +1095,17 @@ def _build_input_measure_spec_for_base_metric( # Even if the measure is configured to join to time spine, if there's no metric_time in the query, # there's no need to join to the time spine since all metric_time will be aggregated. after_aggregation_time_spine_join_description = None - if input_measure.join_to_timespine and query_contains_metric_time: - after_aggregation_time_spine_join_description = JoinToTimeSpineDescription( - join_type=SqlJoinType.LEFT_OUTER, - offset_window=None, - offset_to_grain=None, - ) + if input_measure.join_to_timespine: + query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time + if not query_contains_metric_time_or_agg_time_dimension: + pass # check for agg_time_dimension and update accordingly + # Write a test case for this scenario + if query_contains_metric_time_or_agg_time_dimension: + after_aggregation_time_spine_join_description = JoinToTimeSpineDescription( + join_type=SqlJoinType.LEFT_OUTER, + offset_window=None, + offset_to_grain=None, + ) filter_specs: List[WhereFilterSpec] = [] filter_specs.extend( @@ -1116,7 +1126,7 @@ def _build_input_measure_spec_for_base_metric( fill_nulls_with=input_measure.fill_nulls_with, offset_window=child_metric_offset_window, offset_to_grain=child_metric_offset_to_grain, - culmination_description=culmination_description, + cumulation_description=cumulation_description, filter_specs=tuple(filter_specs), alias=input_measure.alias, before_aggregation_time_spine_join_description=before_aggregation_time_spine_join_description, @@ -1211,15 +1221,15 @@ def _build_aggregated_measure_from_measure_source_node( measure_recipe: Optional[DataflowRecipe] = None, ) -> BaseOutput: measure_spec = metric_input_measure_spec.measure_spec - cumulative = metric_input_measure_spec.culmination_description is not None + cumulative = metric_input_measure_spec.cumulation_description is not None cumulative_window = ( - metric_input_measure_spec.culmination_description.cumulative_window - if metric_input_measure_spec.culmination_description is not None + metric_input_measure_spec.cumulation_description.cumulative_window + if metric_input_measure_spec.cumulation_description is not None else None ) cumulative_grain_to_date = ( - metric_input_measure_spec.culmination_description.cumulative_grain_to_date - if metric_input_measure_spec.culmination_description + metric_input_measure_spec.cumulation_description.cumulative_grain_to_date + if metric_input_measure_spec.cumulation_description else None ) measure_properties = self._build_measure_spec_properties([measure_spec]) @@ -1278,25 +1288,43 @@ def _build_aggregated_measure_from_measure_source_node( f"Recipe not found for measure spec: {measure_spec} and linkable specs: {required_linkable_specs}" ) - # If a cumulative metric is queried with metric_time, join over time range. + # If a cumulative metric is queried with metric_time or agg_time_dimension, join over time range. # Otherwise, the measure will be aggregated over all time. time_range_node: Optional[JoinOverTimeRangeNode] = None - if cumulative and queried_linkable_specs.contains_metric_time: - time_range_node = JoinOverTimeRangeNode( - parent_node=measure_recipe.source_node, - window=cumulative_window, - grain_to_date=cumulative_grain_to_date, - time_range_constraint=time_range_constraint - if not before_aggregation_time_spine_join_description - else None, - ) + if cumulative: + query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time + if not query_contains_metric_time_or_agg_time_dimension: + # TODO: Write a test case for this scenario + agg_time_dimension_element_path_key = ( + self._semantic_model_lookup.get_agg_time_dimension_path_key_for_measure(measure_spec.reference) + ) + for time_dimension_spec in queried_linkable_specs.time_dimension_specs: + if ( + time_dimension_spec.element_name == agg_time_dimension_element_path_key.element_name + and time_dimension_spec.entity_links == agg_time_dimension_element_path_key.entity_links + ): + query_contains_metric_time_or_agg_time_dimension = True + if query_contains_metric_time_or_agg_time_dimension: + time_range_node = JoinOverTimeRangeNode( + parent_node=measure_recipe.source_node, + window=cumulative_window, + grain_to_date=cumulative_grain_to_date, + time_range_constraint=time_range_constraint + if not before_aggregation_time_spine_join_description + else None, + ) # If querying an offset metric, join to time spine before aggregation. join_to_time_spine_node: Optional[JoinToTimeSpineNode] = None if before_aggregation_time_spine_join_description is not None: + query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time + if not query_contains_metric_time_or_agg_time_dimension: + pass # check for agg_time_dimension and update accordingly + # Write a test case for this scenario assert ( - queried_linkable_specs.contains_metric_time - ), "Joining to time spine requires querying with metric time." + query_contains_metric_time_or_agg_time_dimension + ), "Joining to time spine requires querying with metric time or the appropriate agg_time_dimension." + # Can you use agg_time_dimension if it's a ratio metric? Only if both metrics use same agg time dim? assert before_aggregation_time_spine_join_description.join_type is SqlJoinType.INNER, ( f"Expected {SqlJoinType.INNER} for joining to time spine before aggregation. Remove this if there's a " f"new use case." @@ -1337,11 +1365,16 @@ def _build_aggregated_measure_from_measure_source_node( else: unaggregated_measure_node = filtered_measure_source_node + query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time + if not query_contains_metric_time_or_agg_time_dimension: + pass # check for agg_time_dimension and update accordingly + # Write a test case for this scenario + cumulative_metric_constrained_node: Optional[ConstrainTimeRangeNode] = None if ( cumulative_metric_adjusted_time_constraint is not None and time_range_constraint is not None - and queried_linkable_specs.contains_metric_time + and query_contains_metric_time_or_agg_time_dimension ): cumulative_metric_constrained_node = ConstrainTimeRangeNode( unaggregated_measure_node, time_range_constraint diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index 4f9df240d7..200e821eab 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -1,16 +1,25 @@ from __future__ import annotations import logging -from typing import Dict, FrozenSet, Optional, Sequence, Set +from typing import Dict, FrozenSet, Optional, Sequence, Set, Tuple from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.protocols.metric import Metric, MetricInputMeasure, MetricType from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest -from dbt_semantic_interfaces.references import MeasureReference, MetricReference +from dbt_semantic_interfaces.references import ( + EntityReference, + MeasureReference, + MetricReference, + TimeDimensionReference, +) from metricflow.errors.errors import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError from metricflow.model.semantics.linkable_element_properties import LinkableElementProperties -from metricflow.model.semantics.linkable_spec_resolver import LinkableElementSet, ValidLinkableSpecResolver +from metricflow.model.semantics.linkable_spec_resolver import ( + LinkableElementSet, + ValidLinkableSpecResolver, + ElementPathKey, +) from metricflow.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup from metricflow.protocols.semantics import MetricAccessor @@ -159,3 +168,24 @@ def contains_cumulative_or_time_offset_metric(self, metric_references: Sequence[ if input_metric.offset_window or input_metric.offset_to_grain: return True return False + + def _get_agg_time_dimension_path_keys_for_metric(self, metric_reference: MetricReference) -> Set[ElementPathKey]: + """Retrieves the aggregate time dimensions associated with the metric's measures.""" + metric = self.get_metric(metric_reference) + assert metric.input_measures, f"No input measures found for metric {metric_reference}" + return { + self._semantic_model_lookup.get_agg_time_dimension_path_key_for_measure( + measure_reference=input_measure.measure_reference + ) + for input_measure in metric.input_measures + } + + def get_agg_time_dimension_to_replace_metric_time( + self, metric_reference: MetricReference + ) -> Optional[ElementPathKey]: + agg_time_dimension_element_path_keys = self._get_agg_time_dimension_path_keys_for_metric(metric_reference) + if len(agg_time_dimension_element_path_keys) == 1: + return agg_time_dimension_element_path_keys[0] + + # If the metric's input measures have different agg_time_dimensions, user must use metric_time. + return None diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 33cb099c48..bbb26c8a56 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -3,8 +3,10 @@ import logging from collections import defaultdict from copy import deepcopy -from typing import Dict, List, Optional, Sequence, Set +from typing import Dict, List, Optional, Sequence, Set, Tuple +from dbt_semantic_interfaces.type_enums import TimeGranularity +from dbt_semantic_interfaces.type_enums.date_part import DatePart from dbt_semantic_interfaces.pretty_print import pformat_big_objects from dbt_semantic_interfaces.protocols.dimension import Dimension from dbt_semantic_interfaces.protocols.entity import Entity @@ -20,6 +22,7 @@ SemanticModelReference, TimeDimensionReference, ) +from metricflow.model.semantics.linkable_spec_resolver import ElementPathKey from dbt_semantic_interfaces.type_enums import DimensionType, EntityType from dbt_semantic_interfaces.type_enums.aggregation_type import AggregationType from typing_extensions import override @@ -345,3 +348,21 @@ def get_element_spec_for_name(self, element_name: str) -> LinkableInstanceSpec: return self._entity_ref_to_spec[EntityReference(element_name=element_name)] else: raise ValueError(f"Unable to find linkable element {element_name} in manifest") + + def get_agg_time_dimension_path_key_for_measure(self, measure_reference: MeasureReference) -> ElementPathKey: + agg_time_dimension = self.get_agg_time_dimension_for_measure(measure_reference) + + # A measure's agg_time_dimension is required to be in the same semantic model as the measure, + # so we can assume the same semantic model for both measure and dimension. + semantic_models = self.get_semantic_models_for_measure(measure_reference) + assert ( + len(semantic_models) == 1 + ), f"Expected exactly one semantic model for measure {measure_reference}, but found semantic models {semantic_models}." + semantic_model = semantic_models[0] + + entity_link = self.resolved_primary_entity(semantic_model) + assert ( + entity_link is not None + ), f"Expected semantic model {semantic_model} to have a primary entity since is contains dimensions, but found none." + + return ElementPathKey(element_name=agg_time_dimension.element_name, entity_links=(entity_link,)) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 407cc01056..8b13b865cd 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -293,8 +293,13 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat metric_time_dimension_instance = instance metric_time_dimension_spec = instance.spec break + # User might refer to the agg_time_dimension by its dimension name instead of 'metric_time'. + if not metric_time_dimension_spec: + pass # finish this - assert metric_time_dimension_spec + assert ( + metric_time_dimension_spec and metric_time_dimension_instance + ), "No metric time dimension or agg_time_dimension found in join over time range query. This should have been caught by validations." time_spine_data_set_alias = self._next_unique_table_alias() metric_time_dimension_column_name = self.column_association_resolver.resolve_spec( @@ -303,7 +308,6 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat # Assemble time_spine dataset with metric_time_dimension to join. # Granularity of time_spine column should match granularity of metric_time column from parent dataset. - assert metric_time_dimension_instance time_spine_data_set = self._make_time_spine_data_set( metric_time_dimension_instance=metric_time_dimension_instance, metric_time_dimension_column_name=metric_time_dimension_column_name, @@ -1249,6 +1253,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet parent_alias = self._next_unique_table_alias() # Build time spine dataset + # Here: if no metric time instance, replace this var with agg_time_dim metric_time_dimension_instance: Optional[TimeDimensionInstance] = None for instance in parent_data_set.metric_time_dimension_instances: if len(instance.spec.entity_links) == 0: diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index 3266e5fd05..dc2e1498ed 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -26,6 +26,8 @@ TimeDimensionReference, ) +from metricflow.model.semantics.linkable_spec_resolver import ElementPathKey + from metricflow.model.semantics.element_group import ElementGrouper from metricflow.model.semantics.linkable_element_properties import LinkableElementProperties from metricflow.specs.specs import ( @@ -92,6 +94,7 @@ def get_semantic_models_for_measure(self, measure_reference: MeasureReference) - @abstractmethod def get_agg_time_dimension_for_measure(self, measure_reference: MeasureReference) -> TimeDimensionReference: """Retrieves the aggregate time dimension that is associated with the measure reference.""" + raise NotImplementedError @abstractmethod def get_entity_in_semantic_model(self, ref: SemanticModelElementReference) -> Optional[Entity]: @@ -99,6 +102,7 @@ def get_entity_in_semantic_model(self, ref: SemanticModelElementReference) -> Op raise NotImplementedError @staticmethod + @abstractmethod def resolved_primary_entity(semantic_model: SemanticModel) -> Optional[EntityReference]: """Return the primary entity for dimensions in the model.""" raise NotImplementedError @@ -137,6 +141,10 @@ def get_element_spec_for_name(self, element_name: str) -> LinkableInstanceSpec: """Returns the spec for the given name of a linkable element (dimension or entity).""" raise NotImplementedError + @abstractmethod + def get_agg_time_dimension_path_key_for_measure(self, measure_reference: MeasureReference) -> ElementPathKey: + raise NotImplementedError + class MetricAccessor(ABC): """Interface for accessing semantic information about a set of metric objects. diff --git a/metricflow/query/validation_rules/metric_time_requirements.py b/metricflow/query/validation_rules/metric_time_requirements.py index 8f16508949..b2f18de814 100644 --- a/metricflow/query/validation_rules/metric_time_requirements.py +++ b/metricflow/query/validation_rules/metric_time_requirements.py @@ -35,36 +35,11 @@ def __init__(self, manifest_lookup: SemanticManifestLookup) -> None: # noqa: D super().__init__(manifest_lookup=manifest_lookup) self._metric_time_specs = tuple( - self._generate_valid_specs_for_time_dimension( + TimeDimensionSpec.generate_possible_specs_for_time_dimension( time_dimension_reference=TimeDimensionReference(element_name=METRIC_TIME_ELEMENT_NAME), entity_links=() ) ) - def _generate_valid_specs_for_time_dimension( - self, time_dimension_reference: TimeDimensionReference, entity_links: Tuple[EntityReference, ...] - ) -> List[TimeDimensionSpec]: - time_dimension_specs: List[TimeDimensionSpec] = [] - for time_granularity in TimeGranularity: - time_dimension_specs.append( - TimeDimensionSpec( - element_name=time_dimension_reference.element_name, - entity_links=entity_links, - time_granularity=time_granularity, - date_part=None, - ) - ) - for date_part in DatePart: - for time_granularity in date_part.compatible_granularities: - time_dimension_specs.append( - TimeDimensionSpec( - element_name=time_dimension_reference.element_name, - entity_links=entity_links, - time_granularity=time_granularity, - date_part=date_part, - ) - ) - return time_dimension_specs - def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInputForQuery) -> bool: for group_by_item_input in query_resolver_input.group_by_item_inputs: if group_by_item_input.spec_pattern.matches_any(self._metric_time_specs): @@ -75,32 +50,16 @@ def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInpu def _group_by_items_include_agg_time_dimension( self, query_resolver_input: ResolverInputForQuery, metric_reference: MetricReference ) -> bool: - metric = self._manifest_lookup.metric_lookup.get_metric(metric_reference=metric_reference) - semantic_model_lookup = self._manifest_lookup.semantic_model_lookup - - valid_agg_time_dimension_specs: List[TimeDimensionSpec] = [] - for measure_reference in metric.measure_references: - agg_time_dimension_reference = semantic_model_lookup.get_agg_time_dimension_for_measure(measure_reference) - - # A measure's gg_time_dimension is required to be in the same semantic model as the measure, - # so we can assume the same semantic model for both measure and dimension. - semantic_models = semantic_model_lookup.get_semantic_models_for_measure(measure_reference) - assert ( - len(semantic_models) == 1 - ), f"Expected exactly one semantic model for measure {measure_reference}, but found semantic models {semantic_models}." - semantic_model = semantic_models[0] - - entity_link = semantic_model_lookup.resolved_primary_entity(semantic_model) - assert ( - entity_link is not None - ), f"Expected semantic model {semantic_model} to have a primary entity since is contains dimensions, but found none." - - valid_agg_time_dimension_specs.extend( - self._generate_valid_specs_for_time_dimension( - time_dimension_reference=agg_time_dimension_reference, entity_links=(entity_link,) - ) - ) + agg_time_dimension_and_entity_link = ( + self._manifest_lookup.metric_lookup.get_agg_time_dimension_to_replace_metric_time(metric_reference) + ) + if agg_time_dimension_and_entity_link is None: + return False + agg_time_dimension_reference, entity_link = agg_time_dimension_and_entity_link + valid_agg_time_dimension_specs = TimeDimensionSpec.generate_possible_specs_for_time_dimension( + time_dimension_reference=agg_time_dimension_reference, entity_links=(entity_link,) + ) for group_by_item_input in query_resolver_input.group_by_item_inputs: if group_by_item_input.spec_pattern.matches_any(valid_agg_time_dimension_specs): return True diff --git a/metricflow/specs/specs.py b/metricflow/specs/specs.py index d7e74ac01d..ae6668f4e7 100644 --- a/metricflow/specs/specs.py +++ b/metricflow/specs/specs.py @@ -449,6 +449,32 @@ def comparison_key(self, exclude_fields: Sequence[TimeDimensionSpecField] = ()) exclude_fields=exclude_fields, ) + @staticmethod + def generate_possible_specs_for_time_dimension( + time_dimension_reference: TimeDimensionReference, entity_links: Tuple[EntityReference, ...] + ) -> List[TimeDimensionSpec]: + time_dimension_specs: List[TimeDimensionSpec] = [] + for time_granularity in TimeGranularity: + time_dimension_specs.append( + TimeDimensionSpec( + element_name=time_dimension_reference.element_name, + entity_links=entity_links, + time_granularity=time_granularity, + date_part=None, + ) + ) + for date_part in DatePart: + for time_granularity in date_part.compatible_granularities: + time_dimension_specs.append( + TimeDimensionSpec( + element_name=time_dimension_reference.element_name, + entity_links=entity_links, + time_granularity=time_granularity, + date_part=date_part, + ) + ) + return time_dimension_specs + @dataclass(frozen=True) class NonAdditiveDimensionSpec(SerializableDataclass): @@ -585,7 +611,7 @@ class MetricInputMeasureSpec(SerializableDataclass): fill_nulls_with: Optional[int] = None offset_window: Optional[MetricTimeWindow] = None offset_to_grain: Optional[TimeGranularity] = None - culmination_description: Optional[CumulativeMeasureDescription] = None + cumulation_description: Optional[CumulativeMeasureDescription] = None filter_specs: Tuple[WhereFilterSpec, ...] = () alias: Optional[str] = None before_aggregation_time_spine_join_description: Optional[JoinToTimeSpineDescription] = None From 1563c185f0fdb6937c277ab89ad70e7f54337682 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Tue, 23 Jan 2024 16:47:21 -0800 Subject: [PATCH 05/22] WIP --- .../dataflow/builder/dataflow_plan_builder.py | 28 +++++++------ metricflow/dataflow/dataflow_plan.py | 4 ++ .../model/semantics/linkable_spec_resolver.py | 12 ++---- metricflow/model/semantics/metric_lookup.py | 41 +++++++++++-------- .../model/semantics/semantic_model_lookup.py | 18 +++++++- metricflow/plan_conversion/dataflow_to_sql.py | 22 ++++------ metricflow/protocols/semantics.py | 27 ++++++++---- .../metric_time_requirements.py | 20 ++++----- metricflow/specs/specs.py | 8 ++++ .../test/integration/test_configured_cases.py | 3 +- .../test_cumulative_metric_rendering.py | 1 + 11 files changed, 110 insertions(+), 74 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 123e662e1d..bcecb01675 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -1315,21 +1315,25 @@ def _build_aggregated_measure_from_measure_source_node( # Otherwise, the measure will be aggregated over all time. time_range_node: Optional[JoinOverTimeRangeNode] = None if cumulative: - query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time - if not query_contains_metric_time_or_agg_time_dimension: - # TODO: Write a test case for this scenario - agg_time_dimension_element_path_key = ( - self._semantic_model_lookup.get_agg_time_dimension_path_key_for_measure(measure_spec.reference) + queried_metric_time_spec = queried_linkable_specs.metric_time_spec_with_smallest_granularity + if not queried_metric_time_spec: + valid_agg_time_dimensions = ( + self._semantic_model_lookup.get_agg_time_dimensions_to_replace_metric_time_for_measure( + measure_spec.reference + ) ) - for time_dimension_spec in queried_linkable_specs.time_dimension_specs: - if ( - time_dimension_spec.element_name == agg_time_dimension_element_path_key.element_name - and time_dimension_spec.entity_links == agg_time_dimension_element_path_key.entity_links - ): - query_contains_metric_time_or_agg_time_dimension = True - if query_contains_metric_time_or_agg_time_dimension: + # is there actually only one we allow?? no other granularity/date part? + queried_agg_time_dims = sorted( + set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)), + key=lambda x: x.time_granularity.to_int(), + ) + if queried_agg_time_dims: + queried_metric_time_spec = queried_agg_time_dims[0] + + if queried_metric_time_spec: time_range_node = JoinOverTimeRangeNode( parent_node=measure_recipe.source_node, + metric_time_dimension_spec=queried_metric_time_spec, window=cumulative_window, grain_to_date=cumulative_grain_to_date, time_range_constraint=time_range_constraint diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index ccc55c874d..db506b5bad 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -376,6 +376,7 @@ class JoinOverTimeRangeNode(BaseOutput): def __init__( self, parent_node: BaseOutput, + metric_time_dimension_spec: TimeDimensionSpec, window: Optional[MetricTimeWindow], grain_to_date: Optional[TimeGranularity], node_id: Optional[NodeId] = None, @@ -390,6 +391,7 @@ def __init__( (eg month to day) node_id: Override the node ID with this value time_range_constraint: time range to aggregate over + metric_time_dimension_spec: time dimension spec to use when joining to time spine """ if window and grain_to_date: raise RuntimeError( @@ -400,6 +402,7 @@ def __init__( self._grain_to_date = grain_to_date self._window = window self.time_range_constraint = time_range_constraint + self.metric_time_dimension_spec = metric_time_dimension_spec # Doing a list comprehension throws a type error, so doing it this way. parent_nodes: List[DataflowPlanNode] = [self._parent_node] @@ -447,6 +450,7 @@ def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> JoinOverTi window=self.window, grain_to_date=self.grain_to_date, time_range_constraint=self.time_range_constraint, + metric_time_dimension_spec=self.metric_time_dimension_spec, ) diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index bc9ccd43a3..739a947458 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -26,7 +26,6 @@ from metricflow.mf_logging.pretty_print import mf_pformat from metricflow.model.semantics.linkable_element_properties import LinkableElementProperties from metricflow.model.semantics.semantic_model_join_evaluator import SemanticModelJoinEvaluator -from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup from metricflow.protocols.semantics import SemanticModelAccessor from metricflow.specs.specs import ( DEFAULT_TIME_GRANULARITY, @@ -524,9 +523,7 @@ def __init__( linkable_element_sets_to_merge: List[LinkableElementSet] = [] for semantic_model in semantic_manifest.semantic_models: - linkable_element_sets_to_merge.append( - ValidLinkableSpecResolver._get_elements_in_semantic_model(semantic_model) - ) + linkable_element_sets_to_merge.append(self._get_elements_in_semantic_model(semantic_model)) metric_time_elements_for_no_metrics = self._get_metric_time_elements(measure_reference=None) self._no_metric_linkable_element_set = LinkableElementSet.merge_by_path_key( @@ -550,8 +547,7 @@ def _get_semantic_model_for_measure(self, measure_reference: MeasureReference) - ) return semantic_models_where_measure_was_found[0] - @staticmethod - def _get_elements_in_semantic_model(semantic_model: SemanticModel) -> LinkableElementSet: + def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> LinkableElementSet: """Gets the elements in the semantic model, without requiring any joins. Elements related to metric_time are handled separately in _get_metric_time_elements(). @@ -568,7 +564,7 @@ def _get_elements_in_semantic_model(semantic_model: SemanticModel) -> LinkableEl properties=frozenset({LinkableElementProperties.LOCAL, LinkableElementProperties.ENTITY}), ) ) - for entity_link in SemanticModelLookup.entity_links_for_local_elements(semantic_model): + for entity_link in self._semantic_model_lookup.entity_links_for_local_elements(semantic_model): linkable_entities.append( LinkableEntity( semantic_model_origin=semantic_model.reference, @@ -579,7 +575,7 @@ def _get_elements_in_semantic_model(semantic_model: SemanticModel) -> LinkableEl ) ) - for entity_link in SemanticModelLookup.entity_links_for_local_elements(semantic_model): + for entity_link in self._semantic_model_lookup.entity_links_for_local_elements(semantic_model): dimension_properties = frozenset({LinkableElementProperties.LOCAL}) for dimension in semantic_model.dimensions: dimension_type = dimension.type diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index 5fae09f372..e881cff910 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -6,10 +6,7 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.protocols.metric import Metric, MetricInputMeasure, MetricType from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest -from dbt_semantic_interfaces.references import ( - MeasureReference, - MetricReference, -) +from dbt_semantic_interfaces.references import MeasureReference, MetricReference, TimeDimensionReference from metricflow.errors.errors import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError from metricflow.model.semantics.linkable_element_properties import LinkableElementProperties @@ -21,7 +18,7 @@ from metricflow.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup from metricflow.protocols.semantics import MetricAccessor -from metricflow.specs.specs import LinkableInstanceSpec +from metricflow.specs.specs import LinkableInstanceSpec, TimeDimensionSpec logger = logging.getLogger(__name__) @@ -165,23 +162,33 @@ def contains_cumulative_or_time_offset_metric(self, metric_references: Sequence[ return True return False - def _get_agg_time_dimension_path_keys_for_metric(self, metric_reference: MetricReference) -> Set[ElementPathKey]: + def _get_agg_time_dimension_path_keys_for_metric( + self, metric_reference: MetricReference + ) -> Sequence[ElementPathKey]: """Retrieves the aggregate time dimensions associated with the metric's measures.""" metric = self.get_metric(metric_reference) assert metric.input_measures, f"No input measures found for metric {metric_reference}" - return { - self._semantic_model_lookup.get_agg_time_dimension_path_key_for_measure( + + path_keys = set() + for input_measure in metric.input_measures: + path_key = self._semantic_model_lookup.get_agg_time_dimension_path_key_for_measure( measure_reference=input_measure.measure_reference ) - for input_measure in metric.input_measures - } + path_keys.add(path_key) + return list(path_keys) - def get_agg_time_dimension_to_replace_metric_time( + def get_agg_time_dimensions_to_replace_metric_time_for_metric( self, metric_reference: MetricReference - ) -> Optional[ElementPathKey]: + ) -> Sequence[TimeDimensionSpec]: + """Get the agg time dimension specs that can be used in place of metric time for this metric, if applicable.""" agg_time_dimension_element_path_keys = self._get_agg_time_dimension_path_keys_for_metric(metric_reference) - if len(agg_time_dimension_element_path_keys) == 1: - return agg_time_dimension_element_path_keys[0] - - # If the metric's input measures have different agg_time_dimensions, user must use metric_time. - return None + if len(agg_time_dimension_element_path_keys) != 1: + # If the metric's input measures have different agg_time_dimensions, user must use metric_time. + return [] + + path_key = agg_time_dimension_element_path_keys[0] + valid_agg_time_dimension_specs = TimeDimensionSpec.generate_possible_specs_for_time_dimension( + time_dimension_reference=TimeDimensionReference(element_name=path_key.element_name), + entity_links=path_key.entity_links, + ) + return valid_agg_time_dimension_specs diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 2e80b5b204..3079eabbd9 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -340,6 +340,7 @@ def get_element_spec_for_name(self, element_name: str) -> LinkableInstanceSpec: raise ValueError(f"Unable to find linkable element {element_name} in manifest") def get_agg_time_dimension_path_key_for_measure(self, measure_reference: MeasureReference) -> ElementPathKey: + """Get the agg time dimension associated with the measure.""" agg_time_dimension = self.get_agg_time_dimension_for_measure(measure_reference) # A measure's agg_time_dimension is required to be in the same semantic model as the measure, @@ -355,4 +356,19 @@ def get_agg_time_dimension_path_key_for_measure(self, measure_reference: Measure entity_link is not None ), f"Expected semantic model {semantic_model} to have a primary entity since is contains dimensions, but found none." - return ElementPathKey(element_name=agg_time_dimension.element_name, entity_links=(entity_link,)) + return ElementPathKey( + element_name=agg_time_dimension.element_name, + entity_links=(entity_link,), + time_granularity=None, + date_part=None, + ) + + def get_agg_time_dimensions_to_replace_metric_time_for_measure( + self, measure_reference: MeasureReference + ) -> Sequence[TimeDimensionSpec]: + """Get the agg time dimension specs that can be used in place of metric time for this measure.""" + path_key = self.get_agg_time_dimension_path_key_for_measure(measure_reference) + return TimeDimensionSpec.generate_possible_specs_for_time_dimension( + time_dimension_reference=TimeDimensionReference(element_name=path_key.element_name), + entity_links=path_key.entity_links, + ) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 9d1f32007d..8a31c604c0 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -281,24 +281,19 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat input_data_set = node.parent_node.accept(self) input_data_set_alias = self._next_unique_table_alias() - metric_time_dimension_spec: Optional[TimeDimensionSpec] = None metric_time_dimension_instance: Optional[TimeDimensionInstance] = None - for instance in input_data_set.metric_time_dimension_instances: - if len(instance.spec.entity_links) == 0: + for instance in input_data_set.instance_set.time_dimension_instances: + if instance.spec == node.metric_time_dimension_spec: metric_time_dimension_instance = instance - metric_time_dimension_spec = instance.spec break - # User might refer to the agg_time_dimension by its dimension name instead of 'metric_time'. - if not metric_time_dimension_spec: - pass # finish this assert ( - metric_time_dimension_spec and metric_time_dimension_instance - ), "No metric time dimension or agg_time_dimension found in join over time range query. This should have been caught by validations." + metric_time_dimension_instance + ), "Specified metric time spec not found in join over time range query. This should have been caught by validations." time_spine_data_set_alias = self._next_unique_table_alias() metric_time_dimension_column_name = self.column_association_resolver.resolve_spec( - metric_time_dimension_spec + metric_time_dimension_instance.spec ).column_name # Assemble time_spine dataset with metric_time_dimension to join. @@ -313,12 +308,12 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat # Figure out which columns correspond to the time dimension that we want to join on. input_data_set_metric_time_column_association = input_data_set.column_association_for_time_dimension( - metric_time_dimension_spec + metric_time_dimension_instance.spec ) input_data_set_metric_time_col = input_data_set_metric_time_column_association.column_name time_spine_data_set_column_associations = time_spine_data_set.column_association_for_time_dimension( - metric_time_dimension_spec + metric_time_dimension_instance.spec ) time_spine_data_set_time_dimension_col = time_spine_data_set_column_associations.column_name @@ -346,13 +341,14 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat [ time_dimension_instance for time_dimension_instance in input_data_set.instance_set.time_dimension_instances - if time_dimension_instance.spec != metric_time_dimension_spec + if time_dimension_instance != metric_time_dimension_instance ] ), ) table_alias_to_instance_set[input_data_set_alias] = modified_input_instance_set # The output instances are the same as the input instances. + # Shouldn't this just be the selected ones? maybe it is (un-optimized) output_instance_set = ChangeAssociatedColumns(self._column_association_resolver).transform( input_data_set.instance_set ) diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index dc2e1498ed..c9b4b0f049 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -9,7 +9,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Dict, FrozenSet, Optional, Sequence, Set +from typing import TYPE_CHECKING, Dict, FrozenSet, Optional, Sequence, Set from dbt_semantic_interfaces.protocols.dimension import Dimension from dbt_semantic_interfaces.protocols.entity import Entity @@ -26,15 +26,12 @@ TimeDimensionReference, ) -from metricflow.model.semantics.linkable_spec_resolver import ElementPathKey - from metricflow.model.semantics.element_group import ElementGrouper from metricflow.model.semantics.linkable_element_properties import LinkableElementProperties -from metricflow.specs.specs import ( - LinkableInstanceSpec, - MeasureSpec, - NonAdditiveDimensionSpec, -) +from metricflow.specs.specs import LinkableInstanceSpec, MeasureSpec, NonAdditiveDimensionSpec, TimeDimensionSpec + +if TYPE_CHECKING: + from metricflow.model.semantics.linkable_spec_resolver import ElementPathKey class SemanticModelAccessor(ABC): @@ -143,6 +140,13 @@ def get_element_spec_for_name(self, element_name: str) -> LinkableInstanceSpec: @abstractmethod def get_agg_time_dimension_path_key_for_measure(self, measure_reference: MeasureReference) -> ElementPathKey: + """Get the agg time dimension associated with the measure.""" + raise NotImplementedError + + def get_agg_time_dimensions_to_replace_metric_time_for_measure( + self, measure_reference: MeasureReference + ) -> Sequence[TimeDimensionSpec]: + """Get the agg time dimension specs that can be used in place of metric time for this measure.""" raise NotImplementedError @@ -214,3 +218,10 @@ def group_by_item_specs_for_no_metrics_query( ) -> Sequence[LinkableInstanceSpec]: """Return the possible group-by-items for a dimension values query with no metrics.""" raise NotImplementedError + + @abstractmethod + def get_agg_time_dimensions_to_replace_metric_time_for_metric( + self, metric_reference: MetricReference + ) -> Sequence[TimeDimensionSpec]: + """Get the agg time dimension specs that can be used in place of metric time for this metric, if applicable.""" + raise NotImplementedError diff --git a/metricflow/query/validation_rules/metric_time_requirements.py b/metricflow/query/validation_rules/metric_time_requirements.py index b2f18de814..569b9c5a41 100644 --- a/metricflow/query/validation_rules/metric_time_requirements.py +++ b/metricflow/query/validation_rules/metric_time_requirements.py @@ -1,13 +1,12 @@ from __future__ import annotations -from typing import List, Sequence, Tuple +from typing import Sequence from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME from dbt_semantic_interfaces.protocols import WhereFilterIntersection -from dbt_semantic_interfaces.references import EntityReference, MetricReference, TimeDimensionReference -from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity -from dbt_semantic_interfaces.type_enums.date_part import DatePart +from dbt_semantic_interfaces.references import MetricReference, TimeDimensionReference +from dbt_semantic_interfaces.type_enums import MetricType from typing_extensions import override from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup @@ -50,15 +49,10 @@ def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInpu def _group_by_items_include_agg_time_dimension( self, query_resolver_input: ResolverInputForQuery, metric_reference: MetricReference ) -> bool: - agg_time_dimension_and_entity_link = ( - self._manifest_lookup.metric_lookup.get_agg_time_dimension_to_replace_metric_time(metric_reference) - ) - if agg_time_dimension_and_entity_link is None: - return False - - agg_time_dimension_reference, entity_link = agg_time_dimension_and_entity_link - valid_agg_time_dimension_specs = TimeDimensionSpec.generate_possible_specs_for_time_dimension( - time_dimension_reference=agg_time_dimension_reference, entity_links=(entity_link,) + valid_agg_time_dimension_specs = ( + self._manifest_lookup.metric_lookup.get_agg_time_dimensions_to_replace_metric_time_for_metric( + metric_reference + ) ) for group_by_item_input in query_resolver_input.group_by_item_inputs: if group_by_item_input.spec_pattern.matches_any(valid_agg_time_dimension_specs): diff --git a/metricflow/specs/specs.py b/metricflow/specs/specs.py index ae6668f4e7..bcfc71289e 100644 --- a/metricflow/specs/specs.py +++ b/metricflow/specs/specs.py @@ -453,6 +453,7 @@ def comparison_key(self, exclude_fields: Sequence[TimeDimensionSpecField] = ()) def generate_possible_specs_for_time_dimension( time_dimension_reference: TimeDimensionReference, entity_links: Tuple[EntityReference, ...] ) -> List[TimeDimensionSpec]: + """Generate a list of time dimension specs with all combinations of granularity & date part.""" time_dimension_specs: List[TimeDimensionSpec] = [] for time_granularity in TimeGranularity: time_dimension_specs.append( @@ -664,6 +665,13 @@ def metric_time_specs(self) -> Sequence[TimeDimensionSpec]: if time_dimension_spec.element_name == METRIC_TIME_ELEMENT_NAME ) + # TODO: what about date part? not allowed for cumulative, right? + @property + def metric_time_spec_with_smallest_granularity(self) -> Optional[TimeDimensionSpec]: + """Get the metric time spec with the smallest granularity, if there are any metric time specs.""" + sorted_specs = sorted(self.metric_time_specs, key=lambda x: x.time_granularity) + return sorted_specs[0] if sorted_specs else None + @property def as_tuple(self) -> Tuple[LinkableInstanceSpec, ...]: # noqa: D return tuple(itertools.chain(self.dimension_specs, self.time_dimension_specs, self.entity_specs)) diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index 6b6e080470..92462b515f 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -230,8 +230,7 @@ def filter_not_supported_features( @pytest.mark.parametrize( "name", - # CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, - ["itest_cumulative_metric.yaml/cumulative_metric_with_agg_time_dimension"], + CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, ids=lambda name: f"name={name}", ) def test_case( diff --git a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py index d3e887078b..7d80301f7e 100644 --- a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py +++ b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py @@ -81,6 +81,7 @@ def test_cumulative_metric_with_time_constraint( TimeDimensionSpec( element_name="metric_time", entity_links=(), + # TODO: figure out this case. Do we allow querying by month here? time_granularity=TimeGranularity.MONTH, ), ), From fde7f07e162013dcb5dff102ba3cd3b5038ba77e Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Tue, 23 Jan 2024 18:02:43 -0800 Subject: [PATCH 06/22] Cleanup --- .../test/query_rendering/test_cumulative_metric_rendering.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py index 7d80301f7e..d3e887078b 100644 --- a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py +++ b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py @@ -81,7 +81,6 @@ def test_cumulative_metric_with_time_constraint( TimeDimensionSpec( element_name="metric_time", entity_links=(), - # TODO: figure out this case. Do we allow querying by month here? time_granularity=TimeGranularity.MONTH, ), ), From cd8a1367c234926e4f07b2f665e68dbb1d97725c Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Tue, 23 Jan 2024 18:11:21 -0800 Subject: [PATCH 07/22] Cleanup --- metricflow/model/semantics/metric_lookup.py | 1 + metricflow/plan_conversion/dataflow_to_sql.py | 2 +- .../issues/parsing/cumulative_metric_requires_metric_time.py | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index e881cff910..caea42cbe9 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -187,6 +187,7 @@ def get_agg_time_dimensions_to_replace_metric_time_for_metric( return [] path_key = agg_time_dimension_element_path_keys[0] + # TODO: do we need all these? Or just the one valid granularity? Depends what's allowed for time_offset valid_agg_time_dimension_specs = TimeDimensionSpec.generate_possible_specs_for_time_dimension( time_dimension_reference=TimeDimensionReference(element_name=path_key.element_name), entity_links=path_key.entity_links, diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 8a31c604c0..983ae2293a 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -289,7 +289,7 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat assert ( metric_time_dimension_instance - ), "Specified metric time spec not found in join over time range query. This should have been caught by validations." + ), "Specified metric time spec not found in parent data set. This should have been caught by validations." time_spine_data_set_alias = self._next_unique_table_alias() metric_time_dimension_column_name = self.column_association_resolver.resolve_spec( diff --git a/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py b/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py index 60893c6b98..e135cd7f2c 100644 --- a/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py +++ b/metricflow/query/issues/parsing/cumulative_metric_requires_metric_time.py @@ -26,7 +26,6 @@ def ui_description(self, associated_input: MetricFlowQueryResolverInput) -> str: f"The query includes a cumulative metric {repr(self.metric_reference.element_name)} that does not " f"accumulate over all-time, but the group-by items do not include {repr(METRIC_TIME_ELEMENT_NAME)} " "or the metric's agg_time_dimension." - # TODO: add name of agg_time_dim? ) @override From a30b4e33c3e24582805dd6cbaf2b41627e4a615a Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 10:52:48 -0800 Subject: [PATCH 08/22] Get time offset working --- .../dataflow/builder/dataflow_plan_builder.py | 34 +- metricflow/model/semantics/metric_lookup.py | 3 +- .../model/semantics/semantic_model_lookup.py | 2 +- metricflow/plan_conversion/dataflow_to_sql.py | 68 ++- metricflow/protocols/semantics.py | 4 +- .../metric_time_requirements.py | 6 +- metricflow/specs/specs.py | 6 +- .../integration/test_cases/itest_metrics.yaml | 71 +++ .../test_derived_metric_rendering.py | 52 ++ ...ive_metric_with_time_constraint__plan0.sql | 12 +- ..._with_time_constraint__plan0_optimized.sql | 17 +- ...fset_to_grain_with_agg_time_dim__plan0.sql | 534 ++++++++++++++++++ ...ain_with_agg_time_dim__plan0_optimized.sql | 55 ++ ...offset_window_with_agg_time_dim__plan0.sql | 534 ++++++++++++++++++ ...dow_with_agg_time_dim__plan0_optimized.sql | 55 ++ 15 files changed, 1393 insertions(+), 60 deletions(-) create mode 100644 metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql create mode 100644 metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0_optimized.sql create mode 100644 metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql create mode 100644 metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0_optimized.sql diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index bcecb01675..ad1a1fa4d8 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -1317,12 +1317,11 @@ def _build_aggregated_measure_from_measure_source_node( if cumulative: queried_metric_time_spec = queried_linkable_specs.metric_time_spec_with_smallest_granularity if not queried_metric_time_spec: - valid_agg_time_dimensions = ( - self._semantic_model_lookup.get_agg_time_dimensions_to_replace_metric_time_for_measure( - measure_spec.reference - ) + valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( + measure_spec.reference ) - # is there actually only one we allow?? no other granularity/date part? + # TODO: we only actually allow one granularity for cumulative. Should that be reflected here? + # Definitely shouldn't have date part in here queried_agg_time_dims = sorted( set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)), key=lambda x: x.time_granularity.to_int(), @@ -1344,21 +1343,28 @@ def _build_aggregated_measure_from_measure_source_node( # If querying an offset metric, join to time spine before aggregation. join_to_time_spine_node: Optional[JoinToTimeSpineNode] = None if before_aggregation_time_spine_join_description is not None: - query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time - if not query_contains_metric_time_or_agg_time_dimension: - pass # check for agg_time_dimension and update accordingly - # Write a test case for this scenario - assert ( - query_contains_metric_time_or_agg_time_dimension - ), "Joining to time spine requires querying with metric time or the appropriate agg_time_dimension." - # Can you use agg_time_dimension if it's a ratio metric? Only if both metrics use same agg time dim? + # TODO: below logic is somewhat duplicated + queried_metric_time_specs = list(queried_linkable_specs.metric_time_specs) + if not queried_metric_time_specs: + valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( + measure_spec.reference + ) + queried_metric_time_specs = list( + set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)) + ) + + assert queried_metric_time_specs, ( + "Joining to time spine requires querying with metric time or the appropriate agg_time_dimension." + "This should have been caught by validations." + ) + assert before_aggregation_time_spine_join_description.join_type is SqlJoinType.INNER, ( f"Expected {SqlJoinType.INNER} for joining to time spine before aggregation. Remove this if there's a " f"new use case." ) join_to_time_spine_node = JoinToTimeSpineNode( parent_node=time_range_node or measure_recipe.source_node, - requested_metric_time_dimension_specs=list(queried_linkable_specs.metric_time_specs), + requested_metric_time_dimension_specs=queried_metric_time_specs, time_range_constraint=time_range_constraint, offset_window=before_aggregation_time_spine_join_description.offset_window, offset_to_grain=before_aggregation_time_spine_join_description.offset_to_grain, diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index caea42cbe9..4a57090197 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -167,6 +167,7 @@ def _get_agg_time_dimension_path_keys_for_metric( ) -> Sequence[ElementPathKey]: """Retrieves the aggregate time dimensions associated with the metric's measures.""" metric = self.get_metric(metric_reference) + # This should get hit on offset metric, right? assert metric.input_measures, f"No input measures found for metric {metric_reference}" path_keys = set() @@ -177,7 +178,7 @@ def _get_agg_time_dimension_path_keys_for_metric( path_keys.add(path_key) return list(path_keys) - def get_agg_time_dimensions_to_replace_metric_time_for_metric( + def get_valid_agg_time_dimensions_for_metric( self, metric_reference: MetricReference ) -> Sequence[TimeDimensionSpec]: """Get the agg time dimension specs that can be used in place of metric time for this metric, if applicable.""" diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 3079eabbd9..edf9f18066 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -363,7 +363,7 @@ def get_agg_time_dimension_path_key_for_measure(self, measure_reference: Measure date_part=None, ) - def get_agg_time_dimensions_to_replace_metric_time_for_measure( + def get_agg_time_dimension_specs_for_measure( self, measure_reference: MeasureReference ) -> Sequence[TimeDimensionSpec]: """Get the agg time dimension specs that can be used in place of metric time for this measure.""" diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 983ae2293a..be222e1e98 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -5,6 +5,7 @@ from typing import List, Optional, Sequence, Tuple, Union from dbt_semantic_interfaces.enum_extension import assert_values_exhausted +from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME from dbt_semantic_interfaces.protocols.metric import MetricInputMeasure, MetricType from dbt_semantic_interfaces.references import MetricModelReference from dbt_semantic_interfaces.type_enums.aggregation_type import AggregationType @@ -1239,57 +1240,76 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet parent_data_set = node.parent_node.accept(self) parent_alias = self._next_unique_table_alias() - # Build time spine dataset - # Here: if no metric time instance, replace this var with agg_time_dim - metric_time_dimension_instance: Optional[TimeDimensionInstance] = None - for instance in parent_data_set.metric_time_dimension_instances: - if len(instance.spec.entity_links) == 0: - # Use the instance with the lowest granularity - if not metric_time_dimension_instance or ( - instance.spec.time_granularity < metric_time_dimension_instance.spec.time_granularity - ): - metric_time_dimension_instance = instance + # TODO: rename requested_metric_time_dimension_specs -> requested_agg_time_dimension_specs assert ( - metric_time_dimension_instance - ), "Can't join to time spine without metric time. Validations should have prevented this." - metric_time_dimension_column_name = self.column_association_resolver.resolve_spec( - metric_time_dimension_instance.spec + len(node.requested_metric_time_dimension_specs) > 0 + ), "Must have at least one value in requested_metric_time_dimension_specs for JoinToTimeSpineNode." + + # Determine if the time spine join should use metric_time or the agg_time_dimension (metric_time takes priority). + agg_time_dimension_for_join = node.requested_metric_time_dimension_specs[0] + for spec in node.requested_metric_time_dimension_specs[1:]: + if spec.element_name == METRIC_TIME_ELEMENT_NAME: + agg_time_dimension_for_join = spec + break + + # Find the time dimension instances in the parent data set that match the one we want to join with. + agg_time_dimension_instances: List[TimeDimensionInstance] = [] + for instance in parent_data_set.instance_set.time_dimension_instances: + if ( + instance.spec.date_part is None # Ensure we don't join using an instance with date part + and instance.spec.element_name == agg_time_dimension_for_join.element_name + and instance.spec.entity_links == agg_time_dimension_for_join.entity_links + ): + agg_time_dimension_instances.append(instance) + + # Choose the instance with the smallest granularity available. + agg_time_dimension_instances.sort(key=lambda instance: instance.spec.time_granularity.to_int()) + assert ( + len(agg_time_dimension_instances) > 0 + ), "Couldn't find requested agg_time_dimension in parent data set. The dataflow plan may have been configured incorrectly." + agg_time_dimension_instance_for_join = agg_time_dimension_instances[0] + + # Build time spine data set using the requested agg_time_dimension name. + agg_time_dimension_column_name = self.column_association_resolver.resolve_spec( + agg_time_dimension_instance_for_join.spec ).column_name time_spine_alias = self._next_unique_table_alias() time_spine_dataset = self._make_time_spine_data_set( - metric_time_dimension_instance=metric_time_dimension_instance, - metric_time_dimension_column_name=metric_time_dimension_column_name, + metric_time_dimension_instance=agg_time_dimension_instance_for_join, + metric_time_dimension_column_name=agg_time_dimension_column_name, time_spine_source=self._time_spine_source, time_range_constraint=node.time_range_constraint, ) - # Build join expression + # Build join expression. join_description = SqlQueryPlanJoinBuilder.make_join_to_time_spine_join_description( node=node, time_spine_alias=time_spine_alias, - metric_time_dimension_column_name=metric_time_dimension_column_name, + metric_time_dimension_column_name=agg_time_dimension_column_name, parent_sql_select_node=parent_data_set.sql_select_node, parent_alias=parent_alias, ) - # Use all instances EXCEPT metric_time from parent data set. - non_metric_time_parent_instance_set = InstanceSet( + # Select all instances from the parent data set, EXCEPT the requested agg_time_dimension. + # The agg_time_dimension will be selected from the time spine data set. + parent_instance_set = InstanceSet( measure_instances=parent_data_set.instance_set.measure_instances, dimension_instances=parent_data_set.instance_set.dimension_instances, time_dimension_instances=tuple( time_dimension_instance for time_dimension_instance in parent_data_set.instance_set.time_dimension_instances - if time_dimension_instance.spec.element_name != DataSet.metric_time_dimension_reference().element_name + if time_dimension_instance.spec.element_name != agg_time_dimension_for_join.element_name + # and time_dimension_instance.spec.entity_links != agg_time_dimension_for_join.entity_links ), entity_instances=parent_data_set.instance_set.entity_instances, metric_instances=parent_data_set.instance_set.metric_instances, metadata_instances=parent_data_set.instance_set.metadata_instances, ) parent_select_columns = create_select_columns_for_instance_sets( - self._column_association_resolver, OrderedDict({parent_alias: non_metric_time_parent_instance_set}) + self._column_association_resolver, OrderedDict({parent_alias: parent_instance_set}) ) - # Use time instance from time spine to replace metric_time instances. + # Select agg_time_dimension instance from time spine data set. assert ( len(time_spine_dataset.instance_set.time_dimension_instances) == 1 and len(time_spine_dataset.sql_select_node.select_columns) == 1 @@ -1344,7 +1364,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet time_spine_instance_set = InstanceSet(time_dimension_instances=tuple(time_spine_dim_instances)) return SqlDataSet( - instance_set=InstanceSet.merge([time_spine_instance_set, non_metric_time_parent_instance_set]), + instance_set=InstanceSet.merge([time_spine_instance_set, parent_instance_set]), sql_select_node=SqlSelectStatementNode( description=node.description, select_columns=tuple(time_spine_select_columns) + parent_select_columns, diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index c9b4b0f049..f9afb82548 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -143,7 +143,7 @@ def get_agg_time_dimension_path_key_for_measure(self, measure_reference: Measure """Get the agg time dimension associated with the measure.""" raise NotImplementedError - def get_agg_time_dimensions_to_replace_metric_time_for_measure( + def get_agg_time_dimension_specs_for_measure( self, measure_reference: MeasureReference ) -> Sequence[TimeDimensionSpec]: """Get the agg time dimension specs that can be used in place of metric time for this measure.""" @@ -220,7 +220,7 @@ def group_by_item_specs_for_no_metrics_query( raise NotImplementedError @abstractmethod - def get_agg_time_dimensions_to_replace_metric_time_for_metric( + def get_valid_agg_time_dimensions_for_metric( self, metric_reference: MetricReference ) -> Sequence[TimeDimensionSpec]: """Get the agg time dimension specs that can be used in place of metric time for this metric, if applicable.""" diff --git a/metricflow/query/validation_rules/metric_time_requirements.py b/metricflow/query/validation_rules/metric_time_requirements.py index 569b9c5a41..aeb4693f01 100644 --- a/metricflow/query/validation_rules/metric_time_requirements.py +++ b/metricflow/query/validation_rules/metric_time_requirements.py @@ -49,10 +49,8 @@ def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInpu def _group_by_items_include_agg_time_dimension( self, query_resolver_input: ResolverInputForQuery, metric_reference: MetricReference ) -> bool: - valid_agg_time_dimension_specs = ( - self._manifest_lookup.metric_lookup.get_agg_time_dimensions_to_replace_metric_time_for_metric( - metric_reference - ) + valid_agg_time_dimension_specs = self._manifest_lookup.metric_lookup.get_valid_agg_time_dimensions_for_metric( + metric_reference ) for group_by_item_input in query_resolver_input.group_by_item_inputs: if group_by_item_input.spec_pattern.matches_any(valid_agg_time_dimension_specs): diff --git a/metricflow/specs/specs.py b/metricflow/specs/specs.py index bcfc71289e..0c24e97010 100644 --- a/metricflow/specs/specs.py +++ b/metricflow/specs/specs.py @@ -476,6 +476,10 @@ def generate_possible_specs_for_time_dimension( ) return time_dimension_specs + @property + def is_metric_time(self) -> bool: # noqa: D + return self.element_name == METRIC_TIME_ELEMENT_NAME + @dataclass(frozen=True) class NonAdditiveDimensionSpec(SerializableDataclass): @@ -662,7 +666,7 @@ def metric_time_specs(self) -> Sequence[TimeDimensionSpec]: return tuple( time_dimension_spec for time_dimension_spec in self.time_dimension_specs - if time_dimension_spec.element_name == METRIC_TIME_ELEMENT_NAME + if time_dimension_spec.is_metric_time ) # TODO: what about date part? not allowed for cumulative, right? diff --git a/metricflow/test/integration/test_cases/itest_metrics.yaml b/metricflow/test/integration/test_cases/itest_metrics.yaml index 26ae81116f..7110f22109 100644 --- a/metricflow/test/integration/test_cases/itest_metrics.yaml +++ b/metricflow/test/integration/test_cases/itest_metrics.yaml @@ -1743,3 +1743,74 @@ integration_test: ) subq_11 WHERE booking__is_instant ) +--- +integration_test: + name: offset_window_with_agg_time_dim + description: Tests a derived metric query with an offset_window queried with agg_time_dimension + model: SIMPLE_MODEL + metrics: ["bookings_growth_2_weeks"] + group_bys: ["booking__ds__day"] + check_query: | + SELECT + COALESCE(a.booking__ds__day, b.booking__ds__day) AS booking__ds__day + , bookings - bookings_2_weeks_ago AS bookings_growth_2_weeks + FROM ( + SELECT + ds AS booking__ds__day + , SUM(1) AS bookings + FROM {{ source_schema }}.fct_bookings + GROUP BY + ds + ) a + FULL OUTER JOIN ( + SELECT + c.ds AS booking__ds__day + , d.bookings_2_weeks_ago AS bookings_2_weeks_ago + FROM {{ mf_time_spine_source }} c + INNER JOIN ( + SELECT + ds AS booking__ds__day + , SUM(1) AS bookings_2_weeks_ago + FROM {{ source_schema }}.fct_bookings + GROUP BY + ds + ) d + ON {{ render_date_sub("C", "ds", 14, TimeGranularity.DAY) }} = d.booking__ds__day + ) b + ON a.booking__ds__day = b.booking__ds__day +--- +integration_test: + name: offset_to_grain_with_agg_time_dim + description: Tests a derived metric query with an offset_to_grain and agg_time_dimension. + model: SIMPLE_MODEL + metrics: ["bookings_growth_since_start_of_month"] + group_bys: ["booking__ds__day"] + check_query: | + SELECT + COALESCE(a.booking__ds__day, b.booking__ds__day) AS booking__ds__day + , bookings - bookings_at_start_of_month AS bookings_growth_since_start_of_month + FROM ( + SELECT + ds AS booking__ds__day + , SUM(1) AS bookings + FROM {{ source_schema }}.fct_bookings + GROUP BY + ds + ) a + FULL OUTER JOIN ( + SELECT + c.ds AS booking__ds__day + , d.bookings_at_start_of_month AS bookings_at_start_of_month + FROM {{ mf_time_spine_source }} c + INNER JOIN ( + SELECT + ds AS booking__ds__day + , SUM(1) AS bookings_at_start_of_month + FROM {{ source_schema }}.fct_bookings + GROUP BY + ds + ) d + ON {{ render_date_trunc("c.ds", TimeGranularity.MONTH) }} = d.booking__ds__day + ) b + ON a.booking__ds__day = b.booking__ds__day +# TODO: tests with granularity, date part, multiple metric time / agg time options diff --git a/metricflow/test/query_rendering/test_derived_metric_rendering.py b/metricflow/test/query_rendering/test_derived_metric_rendering.py index 1bf6204a9d..00f9e79974 100644 --- a/metricflow/test/query_rendering/test_derived_metric_rendering.py +++ b/metricflow/test/query_rendering/test_derived_metric_rendering.py @@ -520,3 +520,55 @@ def test_nested_derived_metric_offset_with_joined_where_constraint_not_selected( sql_client=sql_client, node=dataflow_plan.sink_output_nodes[0].parent_node, ) + + +@pytest.mark.sql_engine_snapshot +def test_offset_window_with_agg_time_dim( # noqa: D + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + query_parser: MetricFlowQueryParser, + dataflow_plan_builder: DataflowPlanBuilder, + dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, + sql_client: SqlClient, + create_source_tables: bool, + column_association_resolver: ColumnAssociationResolver, +) -> None: + query_spec = query_parser.parse_and_validate_query( + metric_names=("bookings_growth_2_weeks",), + group_by_names=("booking__ds__day",), + ) + + dataflow_plan = dataflow_plan_builder.build_plan(query_spec) + convert_and_check( + request=request, + mf_test_session_state=mf_test_session_state, + dataflow_to_sql_converter=dataflow_to_sql_converter, + sql_client=sql_client, + node=dataflow_plan.sink_output_nodes[0].parent_node, + ) + + +@pytest.mark.sql_engine_snapshot +def test_offset_to_grain_with_agg_time_dim( # noqa: D + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + query_parser: MetricFlowQueryParser, + dataflow_plan_builder: DataflowPlanBuilder, + dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, + sql_client: SqlClient, + create_source_tables: bool, + column_association_resolver: ColumnAssociationResolver, +) -> None: + query_spec = query_parser.parse_and_validate_query( + metric_names=("bookings_growth_since_start_of_month",), + group_by_names=("booking__ds__day",), + ) + + dataflow_plan = dataflow_plan_builder.build_plan(query_spec) + convert_and_check( + request=request, + mf_test_session_state=mf_test_session_state, + dataflow_to_sql_converter=dataflow_to_sql_converter, + sql_client=sql_client, + node=dataflow_plan.sink_output_nodes[0].parent_node, + ) diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql index 38870197e4..2927feac1d 100644 --- a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql @@ -20,7 +20,7 @@ FROM ( FROM ( -- Join Self Over Time Range SELECT - subq_3.metric_time__day AS metric_time__day + subq_3.metric_time__month AS metric_time__month , subq_2.ds__day AS ds__day , subq_2.ds__week AS ds__week , subq_2.ds__month AS ds__month @@ -43,8 +43,8 @@ FROM ( , subq_2.revenue_instance__ds__extract_day AS revenue_instance__ds__extract_day , subq_2.revenue_instance__ds__extract_dow AS revenue_instance__ds__extract_dow , subq_2.revenue_instance__ds__extract_doy AS revenue_instance__ds__extract_doy + , subq_2.metric_time__day AS metric_time__day , subq_2.metric_time__week AS metric_time__week - , subq_2.metric_time__month AS metric_time__month , subq_2.metric_time__quarter AS metric_time__quarter , subq_2.metric_time__year AS metric_time__year , subq_2.metric_time__extract_year AS metric_time__extract_year @@ -59,9 +59,11 @@ FROM ( FROM ( -- Time Spine SELECT - subq_4.ds AS metric_time__day + DATE_TRUNC('month', subq_4.ds) AS metric_time__month FROM ***************************.mf_time_spine subq_4 WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01' + GROUP BY + DATE_TRUNC('month', subq_4.ds) ) subq_3 INNER JOIN ( -- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00] @@ -176,9 +178,9 @@ FROM ( ) subq_2 ON ( - subq_2.metric_time__day <= subq_3.metric_time__day + subq_2.metric_time__month <= subq_3.metric_time__month ) AND ( - subq_2.metric_time__day > subq_3.metric_time__day - INTERVAL 2 month + subq_2.metric_time__month > subq_3.metric_time__month - INTERVAL 2 month ) ) subq_5 ) subq_6 diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql index 933797f685..db55011cab 100644 --- a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql @@ -4,32 +4,33 @@ -- Aggregate Measures -- Compute Metrics via Expressions SELECT - subq_11.metric_time__month AS metric_time__month + subq_12.metric_time__month AS metric_time__month , SUM(subq_11.txn_revenue) AS trailing_2_months_revenue FROM ( -- Time Spine SELECT - ds AS metric_time__day + DATE_TRUNC('month', ds) AS metric_time__month FROM ***************************.mf_time_spine subq_13 WHERE ds BETWEEN '2020-01-01' AND '2020-01-01' + GROUP BY + DATE_TRUNC('month', ds) ) subq_12 INNER JOIN ( -- Read Elements From Semantic Model 'revenue' -- Metric Time Dimension 'ds' -- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00] SELECT - DATE_TRUNC('day', created_at) AS metric_time__day - , DATE_TRUNC('month', created_at) AS metric_time__month + DATE_TRUNC('month', created_at) AS metric_time__month , revenue AS txn_revenue FROM ***************************.fct_revenue revenue_src_10007 WHERE DATE_TRUNC('day', created_at) BETWEEN '2019-11-01' AND '2020-01-01' ) subq_11 ON ( - subq_11.metric_time__day <= subq_12.metric_time__day + subq_11.metric_time__month <= subq_12.metric_time__month ) AND ( - subq_11.metric_time__day > subq_12.metric_time__day - INTERVAL 2 month + subq_11.metric_time__month > subq_12.metric_time__month - INTERVAL 2 month ) -WHERE subq_11.metric_time__month BETWEEN '2020-01-01' AND '2020-01-01' +WHERE subq_12.metric_time__month BETWEEN '2020-01-01' AND '2020-01-01' GROUP BY - subq_11.metric_time__month + subq_12.metric_time__month diff --git a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql new file mode 100644 index 0000000000..2c317b7ee1 --- /dev/null +++ b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql @@ -0,0 +1,534 @@ +-- Compute Metrics via Expressions +SELECT + subq_13.booking__ds__day + , bookings - bookings_at_start_of_month AS bookings_growth_since_start_of_month +FROM ( + -- Combine Aggregated Outputs + SELECT + COALESCE(subq_4.booking__ds__day, subq_12.booking__ds__day) AS booking__ds__day + , MAX(subq_4.bookings) AS bookings + , MAX(subq_12.bookings_at_start_of_month) AS bookings_at_start_of_month + FROM ( + -- Compute Metrics via Expressions + SELECT + subq_3.booking__ds__day + , subq_3.bookings + FROM ( + -- Aggregate Measures + SELECT + subq_2.booking__ds__day + , SUM(subq_2.bookings) AS bookings + FROM ( + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + SELECT + subq_1.booking__ds__day + , subq_1.bookings + FROM ( + -- Metric Time Dimension 'ds' + SELECT + subq_0.ds__day + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds__extract_year + , subq_0.ds__extract_quarter + , subq_0.ds__extract_month + , subq_0.ds__extract_day + , subq_0.ds__extract_dow + , subq_0.ds__extract_doy + , subq_0.ds_partitioned__day + , subq_0.ds_partitioned__week + , subq_0.ds_partitioned__month + , subq_0.ds_partitioned__quarter + , subq_0.ds_partitioned__year + , subq_0.ds_partitioned__extract_year + , subq_0.ds_partitioned__extract_quarter + , subq_0.ds_partitioned__extract_month + , subq_0.ds_partitioned__extract_day + , subq_0.ds_partitioned__extract_dow + , subq_0.ds_partitioned__extract_doy + , subq_0.paid_at__day + , subq_0.paid_at__week + , subq_0.paid_at__month + , subq_0.paid_at__quarter + , subq_0.paid_at__year + , subq_0.paid_at__extract_year + , subq_0.paid_at__extract_quarter + , subq_0.paid_at__extract_month + , subq_0.paid_at__extract_day + , subq_0.paid_at__extract_dow + , subq_0.paid_at__extract_doy + , subq_0.booking__ds__day + , subq_0.booking__ds__week + , subq_0.booking__ds__month + , subq_0.booking__ds__quarter + , subq_0.booking__ds__year + , subq_0.booking__ds__extract_year + , subq_0.booking__ds__extract_quarter + , subq_0.booking__ds__extract_month + , subq_0.booking__ds__extract_day + , subq_0.booking__ds__extract_dow + , subq_0.booking__ds__extract_doy + , subq_0.booking__ds_partitioned__day + , subq_0.booking__ds_partitioned__week + , subq_0.booking__ds_partitioned__month + , subq_0.booking__ds_partitioned__quarter + , subq_0.booking__ds_partitioned__year + , subq_0.booking__ds_partitioned__extract_year + , subq_0.booking__ds_partitioned__extract_quarter + , subq_0.booking__ds_partitioned__extract_month + , subq_0.booking__ds_partitioned__extract_day + , subq_0.booking__ds_partitioned__extract_dow + , subq_0.booking__ds_partitioned__extract_doy + , subq_0.booking__paid_at__day + , subq_0.booking__paid_at__week + , subq_0.booking__paid_at__month + , subq_0.booking__paid_at__quarter + , subq_0.booking__paid_at__year + , subq_0.booking__paid_at__extract_year + , subq_0.booking__paid_at__extract_quarter + , subq_0.booking__paid_at__extract_month + , subq_0.booking__paid_at__extract_day + , subq_0.booking__paid_at__extract_dow + , subq_0.booking__paid_at__extract_doy + , subq_0.ds__day AS metric_time__day + , subq_0.ds__week AS metric_time__week + , subq_0.ds__month AS metric_time__month + , subq_0.ds__quarter AS metric_time__quarter + , subq_0.ds__year AS metric_time__year + , subq_0.ds__extract_year AS metric_time__extract_year + , subq_0.ds__extract_quarter AS metric_time__extract_quarter + , subq_0.ds__extract_month AS metric_time__extract_month + , subq_0.ds__extract_day AS metric_time__extract_day + , subq_0.ds__extract_dow AS metric_time__extract_dow + , subq_0.ds__extract_doy AS metric_time__extract_doy + , subq_0.listing + , subq_0.guest + , subq_0.host + , subq_0.booking__listing + , subq_0.booking__guest + , subq_0.booking__host + , subq_0.is_instant + , subq_0.booking__is_instant + , subq_0.bookings + , subq_0.instant_bookings + , subq_0.booking_value + , subq_0.max_booking_value + , subq_0.min_booking_value + , subq_0.bookers + , subq_0.average_booking_value + , subq_0.referred_bookings + , subq_0.median_booking_value + , subq_0.booking_value_p99 + , subq_0.discrete_booking_value_p99 + , subq_0.approximate_continuous_booking_value_p99 + , subq_0.approximate_discrete_booking_value_p99 + FROM ( + -- Read Elements From Semantic Model 'bookings_source' + SELECT + 1 AS bookings + , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings + , bookings_source_src_10001.booking_value + , bookings_source_src_10001.booking_value AS max_booking_value + , bookings_source_src_10001.booking_value AS min_booking_value + , bookings_source_src_10001.guest_id AS bookers + , bookings_source_src_10001.booking_value AS average_booking_value + , bookings_source_src_10001.booking_value AS booking_payments + , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings + , bookings_source_src_10001.booking_value AS median_booking_value + , bookings_source_src_10001.booking_value AS booking_value_p99 + , bookings_source_src_10001.booking_value AS discrete_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_continuous_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_discrete_booking_value_p99 + , bookings_source_src_10001.is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS paid_at__extract_doy + , bookings_source_src_10001.is_instant AS booking__is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS booking__ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS booking__ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS booking__ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS booking__ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS booking__ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS booking__ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS booking__ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS booking__ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS booking__ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS booking__ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS booking__ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS booking__paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS booking__paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS booking__paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS booking__paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS booking__paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_doy + , bookings_source_src_10001.listing_id AS listing + , bookings_source_src_10001.guest_id AS guest + , bookings_source_src_10001.host_id AS host + , bookings_source_src_10001.listing_id AS booking__listing + , bookings_source_src_10001.guest_id AS booking__guest + , bookings_source_src_10001.host_id AS booking__host + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_0 + ) subq_1 + ) subq_2 + GROUP BY + subq_2.booking__ds__day + ) subq_3 + ) subq_4 + FULL OUTER JOIN ( + -- Compute Metrics via Expressions + SELECT + subq_11.booking__ds__day + , subq_11.bookings AS bookings_at_start_of_month + FROM ( + -- Aggregate Measures + SELECT + subq_10.booking__ds__day + , SUM(subq_10.bookings) AS bookings + FROM ( + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + SELECT + subq_9.booking__ds__day + , subq_9.bookings + FROM ( + -- Join to Time Spine Dataset + SELECT + subq_7.booking__ds__day AS booking__ds__day + , subq_6.ds_partitioned__day AS ds_partitioned__day + , subq_6.ds_partitioned__week AS ds_partitioned__week + , subq_6.ds_partitioned__month AS ds_partitioned__month + , subq_6.ds_partitioned__quarter AS ds_partitioned__quarter + , subq_6.ds_partitioned__year AS ds_partitioned__year + , subq_6.ds_partitioned__extract_year AS ds_partitioned__extract_year + , subq_6.ds_partitioned__extract_quarter AS ds_partitioned__extract_quarter + , subq_6.ds_partitioned__extract_month AS ds_partitioned__extract_month + , subq_6.ds_partitioned__extract_day AS ds_partitioned__extract_day + , subq_6.ds_partitioned__extract_dow AS ds_partitioned__extract_dow + , subq_6.ds_partitioned__extract_doy AS ds_partitioned__extract_doy + , subq_6.paid_at__day AS paid_at__day + , subq_6.paid_at__week AS paid_at__week + , subq_6.paid_at__month AS paid_at__month + , subq_6.paid_at__quarter AS paid_at__quarter + , subq_6.paid_at__year AS paid_at__year + , subq_6.paid_at__extract_year AS paid_at__extract_year + , subq_6.paid_at__extract_quarter AS paid_at__extract_quarter + , subq_6.paid_at__extract_month AS paid_at__extract_month + , subq_6.paid_at__extract_day AS paid_at__extract_day + , subq_6.paid_at__extract_dow AS paid_at__extract_dow + , subq_6.paid_at__extract_doy AS paid_at__extract_doy + , subq_6.booking__ds_partitioned__day AS booking__ds_partitioned__day + , subq_6.booking__ds_partitioned__week AS booking__ds_partitioned__week + , subq_6.booking__ds_partitioned__month AS booking__ds_partitioned__month + , subq_6.booking__ds_partitioned__quarter AS booking__ds_partitioned__quarter + , subq_6.booking__ds_partitioned__year AS booking__ds_partitioned__year + , subq_6.booking__ds_partitioned__extract_year AS booking__ds_partitioned__extract_year + , subq_6.booking__ds_partitioned__extract_quarter AS booking__ds_partitioned__extract_quarter + , subq_6.booking__ds_partitioned__extract_month AS booking__ds_partitioned__extract_month + , subq_6.booking__ds_partitioned__extract_day AS booking__ds_partitioned__extract_day + , subq_6.booking__ds_partitioned__extract_dow AS booking__ds_partitioned__extract_dow + , subq_6.booking__ds_partitioned__extract_doy AS booking__ds_partitioned__extract_doy + , subq_6.booking__paid_at__day AS booking__paid_at__day + , subq_6.booking__paid_at__week AS booking__paid_at__week + , subq_6.booking__paid_at__month AS booking__paid_at__month + , subq_6.booking__paid_at__quarter AS booking__paid_at__quarter + , subq_6.booking__paid_at__year AS booking__paid_at__year + , subq_6.booking__paid_at__extract_year AS booking__paid_at__extract_year + , subq_6.booking__paid_at__extract_quarter AS booking__paid_at__extract_quarter + , subq_6.booking__paid_at__extract_month AS booking__paid_at__extract_month + , subq_6.booking__paid_at__extract_day AS booking__paid_at__extract_day + , subq_6.booking__paid_at__extract_dow AS booking__paid_at__extract_dow + , subq_6.booking__paid_at__extract_doy AS booking__paid_at__extract_doy + , subq_6.metric_time__day AS metric_time__day + , subq_6.metric_time__week AS metric_time__week + , subq_6.metric_time__month AS metric_time__month + , subq_6.metric_time__quarter AS metric_time__quarter + , subq_6.metric_time__year AS metric_time__year + , subq_6.metric_time__extract_year AS metric_time__extract_year + , subq_6.metric_time__extract_quarter AS metric_time__extract_quarter + , subq_6.metric_time__extract_month AS metric_time__extract_month + , subq_6.metric_time__extract_day AS metric_time__extract_day + , subq_6.metric_time__extract_dow AS metric_time__extract_dow + , subq_6.metric_time__extract_doy AS metric_time__extract_doy + , subq_6.listing AS listing + , subq_6.guest AS guest + , subq_6.host AS host + , subq_6.booking__listing AS booking__listing + , subq_6.booking__guest AS booking__guest + , subq_6.booking__host AS booking__host + , subq_6.is_instant AS is_instant + , subq_6.booking__is_instant AS booking__is_instant + , subq_6.bookings AS bookings + , subq_6.instant_bookings AS instant_bookings + , subq_6.booking_value AS booking_value + , subq_6.max_booking_value AS max_booking_value + , subq_6.min_booking_value AS min_booking_value + , subq_6.bookers AS bookers + , subq_6.average_booking_value AS average_booking_value + , subq_6.referred_bookings AS referred_bookings + , subq_6.median_booking_value AS median_booking_value + , subq_6.booking_value_p99 AS booking_value_p99 + , subq_6.discrete_booking_value_p99 AS discrete_booking_value_p99 + , subq_6.approximate_continuous_booking_value_p99 AS approximate_continuous_booking_value_p99 + , subq_6.approximate_discrete_booking_value_p99 AS approximate_discrete_booking_value_p99 + FROM ( + -- Time Spine + SELECT + subq_8.ds AS booking__ds__day + FROM ***************************.mf_time_spine subq_8 + ) subq_7 + INNER JOIN ( + -- Metric Time Dimension 'ds' + SELECT + subq_5.ds__day + , subq_5.ds__week + , subq_5.ds__month + , subq_5.ds__quarter + , subq_5.ds__year + , subq_5.ds__extract_year + , subq_5.ds__extract_quarter + , subq_5.ds__extract_month + , subq_5.ds__extract_day + , subq_5.ds__extract_dow + , subq_5.ds__extract_doy + , subq_5.ds_partitioned__day + , subq_5.ds_partitioned__week + , subq_5.ds_partitioned__month + , subq_5.ds_partitioned__quarter + , subq_5.ds_partitioned__year + , subq_5.ds_partitioned__extract_year + , subq_5.ds_partitioned__extract_quarter + , subq_5.ds_partitioned__extract_month + , subq_5.ds_partitioned__extract_day + , subq_5.ds_partitioned__extract_dow + , subq_5.ds_partitioned__extract_doy + , subq_5.paid_at__day + , subq_5.paid_at__week + , subq_5.paid_at__month + , subq_5.paid_at__quarter + , subq_5.paid_at__year + , subq_5.paid_at__extract_year + , subq_5.paid_at__extract_quarter + , subq_5.paid_at__extract_month + , subq_5.paid_at__extract_day + , subq_5.paid_at__extract_dow + , subq_5.paid_at__extract_doy + , subq_5.booking__ds__day + , subq_5.booking__ds__week + , subq_5.booking__ds__month + , subq_5.booking__ds__quarter + , subq_5.booking__ds__year + , subq_5.booking__ds__extract_year + , subq_5.booking__ds__extract_quarter + , subq_5.booking__ds__extract_month + , subq_5.booking__ds__extract_day + , subq_5.booking__ds__extract_dow + , subq_5.booking__ds__extract_doy + , subq_5.booking__ds_partitioned__day + , subq_5.booking__ds_partitioned__week + , subq_5.booking__ds_partitioned__month + , subq_5.booking__ds_partitioned__quarter + , subq_5.booking__ds_partitioned__year + , subq_5.booking__ds_partitioned__extract_year + , subq_5.booking__ds_partitioned__extract_quarter + , subq_5.booking__ds_partitioned__extract_month + , subq_5.booking__ds_partitioned__extract_day + , subq_5.booking__ds_partitioned__extract_dow + , subq_5.booking__ds_partitioned__extract_doy + , subq_5.booking__paid_at__day + , subq_5.booking__paid_at__week + , subq_5.booking__paid_at__month + , subq_5.booking__paid_at__quarter + , subq_5.booking__paid_at__year + , subq_5.booking__paid_at__extract_year + , subq_5.booking__paid_at__extract_quarter + , subq_5.booking__paid_at__extract_month + , subq_5.booking__paid_at__extract_day + , subq_5.booking__paid_at__extract_dow + , subq_5.booking__paid_at__extract_doy + , subq_5.ds__day AS metric_time__day + , subq_5.ds__week AS metric_time__week + , subq_5.ds__month AS metric_time__month + , subq_5.ds__quarter AS metric_time__quarter + , subq_5.ds__year AS metric_time__year + , subq_5.ds__extract_year AS metric_time__extract_year + , subq_5.ds__extract_quarter AS metric_time__extract_quarter + , subq_5.ds__extract_month AS metric_time__extract_month + , subq_5.ds__extract_day AS metric_time__extract_day + , subq_5.ds__extract_dow AS metric_time__extract_dow + , subq_5.ds__extract_doy AS metric_time__extract_doy + , subq_5.listing + , subq_5.guest + , subq_5.host + , subq_5.booking__listing + , subq_5.booking__guest + , subq_5.booking__host + , subq_5.is_instant + , subq_5.booking__is_instant + , subq_5.bookings + , subq_5.instant_bookings + , subq_5.booking_value + , subq_5.max_booking_value + , subq_5.min_booking_value + , subq_5.bookers + , subq_5.average_booking_value + , subq_5.referred_bookings + , subq_5.median_booking_value + , subq_5.booking_value_p99 + , subq_5.discrete_booking_value_p99 + , subq_5.approximate_continuous_booking_value_p99 + , subq_5.approximate_discrete_booking_value_p99 + FROM ( + -- Read Elements From Semantic Model 'bookings_source' + SELECT + 1 AS bookings + , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings + , bookings_source_src_10001.booking_value + , bookings_source_src_10001.booking_value AS max_booking_value + , bookings_source_src_10001.booking_value AS min_booking_value + , bookings_source_src_10001.guest_id AS bookers + , bookings_source_src_10001.booking_value AS average_booking_value + , bookings_source_src_10001.booking_value AS booking_payments + , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings + , bookings_source_src_10001.booking_value AS median_booking_value + , bookings_source_src_10001.booking_value AS booking_value_p99 + , bookings_source_src_10001.booking_value AS discrete_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_continuous_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_discrete_booking_value_p99 + , bookings_source_src_10001.is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS paid_at__extract_doy + , bookings_source_src_10001.is_instant AS booking__is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS booking__ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS booking__ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS booking__ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS booking__ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS booking__ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS booking__ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS booking__ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS booking__ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS booking__ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS booking__ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS booking__ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS booking__paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS booking__paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS booking__paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS booking__paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS booking__paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_doy + , bookings_source_src_10001.listing_id AS listing + , bookings_source_src_10001.guest_id AS guest + , bookings_source_src_10001.host_id AS host + , bookings_source_src_10001.listing_id AS booking__listing + , bookings_source_src_10001.guest_id AS booking__guest + , bookings_source_src_10001.host_id AS booking__host + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_5 + ) subq_6 + ON + DATE_TRUNC('month', subq_7.booking__ds__day) = subq_6.booking__ds__day + ) subq_9 + ) subq_10 + GROUP BY + subq_10.booking__ds__day + ) subq_11 + ) subq_12 + ON + subq_4.booking__ds__day = subq_12.booking__ds__day + GROUP BY + COALESCE(subq_4.booking__ds__day, subq_12.booking__ds__day) +) subq_13 diff --git a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0_optimized.sql b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0_optimized.sql new file mode 100644 index 0000000000..c75e71d99a --- /dev/null +++ b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0_optimized.sql @@ -0,0 +1,55 @@ +-- Compute Metrics via Expressions +SELECT + booking__ds__day + , bookings - bookings_at_start_of_month AS bookings_growth_since_start_of_month +FROM ( + -- Combine Aggregated Outputs + SELECT + COALESCE(subq_18.booking__ds__day, subq_26.booking__ds__day) AS booking__ds__day + , MAX(subq_18.bookings) AS bookings + , MAX(subq_26.bookings_at_start_of_month) AS bookings_at_start_of_month + FROM ( + -- Aggregate Measures + -- Compute Metrics via Expressions + SELECT + booking__ds__day + , SUM(bookings) AS bookings + FROM ( + -- Read Elements From Semantic Model 'bookings_source' + -- Metric Time Dimension 'ds' + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + SELECT + DATE_TRUNC('day', ds) AS booking__ds__day + , 1 AS bookings + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_16 + GROUP BY + booking__ds__day + ) subq_18 + FULL OUTER JOIN ( + -- Join to Time Spine Dataset + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + -- Aggregate Measures + -- Compute Metrics via Expressions + SELECT + subq_22.ds AS booking__ds__day + , SUM(subq_20.bookings) AS bookings_at_start_of_month + FROM ***************************.mf_time_spine subq_22 + INNER JOIN ( + -- Read Elements From Semantic Model 'bookings_source' + -- Metric Time Dimension 'ds' + SELECT + DATE_TRUNC('day', ds) AS booking__ds__day + , 1 AS bookings + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_20 + ON + DATE_TRUNC('month', subq_22.ds) = subq_20.booking__ds__day + GROUP BY + subq_22.ds + ) subq_26 + ON + subq_18.booking__ds__day = subq_26.booking__ds__day + GROUP BY + COALESCE(subq_18.booking__ds__day, subq_26.booking__ds__day) +) subq_27 diff --git a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql new file mode 100644 index 0000000000..6cdb31c5d2 --- /dev/null +++ b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql @@ -0,0 +1,534 @@ +-- Compute Metrics via Expressions +SELECT + subq_13.booking__ds__day + , bookings - bookings_2_weeks_ago AS bookings_growth_2_weeks +FROM ( + -- Combine Aggregated Outputs + SELECT + COALESCE(subq_4.booking__ds__day, subq_12.booking__ds__day) AS booking__ds__day + , MAX(subq_4.bookings) AS bookings + , MAX(subq_12.bookings_2_weeks_ago) AS bookings_2_weeks_ago + FROM ( + -- Compute Metrics via Expressions + SELECT + subq_3.booking__ds__day + , subq_3.bookings + FROM ( + -- Aggregate Measures + SELECT + subq_2.booking__ds__day + , SUM(subq_2.bookings) AS bookings + FROM ( + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + SELECT + subq_1.booking__ds__day + , subq_1.bookings + FROM ( + -- Metric Time Dimension 'ds' + SELECT + subq_0.ds__day + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds__extract_year + , subq_0.ds__extract_quarter + , subq_0.ds__extract_month + , subq_0.ds__extract_day + , subq_0.ds__extract_dow + , subq_0.ds__extract_doy + , subq_0.ds_partitioned__day + , subq_0.ds_partitioned__week + , subq_0.ds_partitioned__month + , subq_0.ds_partitioned__quarter + , subq_0.ds_partitioned__year + , subq_0.ds_partitioned__extract_year + , subq_0.ds_partitioned__extract_quarter + , subq_0.ds_partitioned__extract_month + , subq_0.ds_partitioned__extract_day + , subq_0.ds_partitioned__extract_dow + , subq_0.ds_partitioned__extract_doy + , subq_0.paid_at__day + , subq_0.paid_at__week + , subq_0.paid_at__month + , subq_0.paid_at__quarter + , subq_0.paid_at__year + , subq_0.paid_at__extract_year + , subq_0.paid_at__extract_quarter + , subq_0.paid_at__extract_month + , subq_0.paid_at__extract_day + , subq_0.paid_at__extract_dow + , subq_0.paid_at__extract_doy + , subq_0.booking__ds__day + , subq_0.booking__ds__week + , subq_0.booking__ds__month + , subq_0.booking__ds__quarter + , subq_0.booking__ds__year + , subq_0.booking__ds__extract_year + , subq_0.booking__ds__extract_quarter + , subq_0.booking__ds__extract_month + , subq_0.booking__ds__extract_day + , subq_0.booking__ds__extract_dow + , subq_0.booking__ds__extract_doy + , subq_0.booking__ds_partitioned__day + , subq_0.booking__ds_partitioned__week + , subq_0.booking__ds_partitioned__month + , subq_0.booking__ds_partitioned__quarter + , subq_0.booking__ds_partitioned__year + , subq_0.booking__ds_partitioned__extract_year + , subq_0.booking__ds_partitioned__extract_quarter + , subq_0.booking__ds_partitioned__extract_month + , subq_0.booking__ds_partitioned__extract_day + , subq_0.booking__ds_partitioned__extract_dow + , subq_0.booking__ds_partitioned__extract_doy + , subq_0.booking__paid_at__day + , subq_0.booking__paid_at__week + , subq_0.booking__paid_at__month + , subq_0.booking__paid_at__quarter + , subq_0.booking__paid_at__year + , subq_0.booking__paid_at__extract_year + , subq_0.booking__paid_at__extract_quarter + , subq_0.booking__paid_at__extract_month + , subq_0.booking__paid_at__extract_day + , subq_0.booking__paid_at__extract_dow + , subq_0.booking__paid_at__extract_doy + , subq_0.ds__day AS metric_time__day + , subq_0.ds__week AS metric_time__week + , subq_0.ds__month AS metric_time__month + , subq_0.ds__quarter AS metric_time__quarter + , subq_0.ds__year AS metric_time__year + , subq_0.ds__extract_year AS metric_time__extract_year + , subq_0.ds__extract_quarter AS metric_time__extract_quarter + , subq_0.ds__extract_month AS metric_time__extract_month + , subq_0.ds__extract_day AS metric_time__extract_day + , subq_0.ds__extract_dow AS metric_time__extract_dow + , subq_0.ds__extract_doy AS metric_time__extract_doy + , subq_0.listing + , subq_0.guest + , subq_0.host + , subq_0.booking__listing + , subq_0.booking__guest + , subq_0.booking__host + , subq_0.is_instant + , subq_0.booking__is_instant + , subq_0.bookings + , subq_0.instant_bookings + , subq_0.booking_value + , subq_0.max_booking_value + , subq_0.min_booking_value + , subq_0.bookers + , subq_0.average_booking_value + , subq_0.referred_bookings + , subq_0.median_booking_value + , subq_0.booking_value_p99 + , subq_0.discrete_booking_value_p99 + , subq_0.approximate_continuous_booking_value_p99 + , subq_0.approximate_discrete_booking_value_p99 + FROM ( + -- Read Elements From Semantic Model 'bookings_source' + SELECT + 1 AS bookings + , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings + , bookings_source_src_10001.booking_value + , bookings_source_src_10001.booking_value AS max_booking_value + , bookings_source_src_10001.booking_value AS min_booking_value + , bookings_source_src_10001.guest_id AS bookers + , bookings_source_src_10001.booking_value AS average_booking_value + , bookings_source_src_10001.booking_value AS booking_payments + , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings + , bookings_source_src_10001.booking_value AS median_booking_value + , bookings_source_src_10001.booking_value AS booking_value_p99 + , bookings_source_src_10001.booking_value AS discrete_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_continuous_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_discrete_booking_value_p99 + , bookings_source_src_10001.is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS paid_at__extract_doy + , bookings_source_src_10001.is_instant AS booking__is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS booking__ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS booking__ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS booking__ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS booking__ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS booking__ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS booking__ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS booking__ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS booking__ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS booking__ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS booking__ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS booking__ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS booking__paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS booking__paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS booking__paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS booking__paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS booking__paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_doy + , bookings_source_src_10001.listing_id AS listing + , bookings_source_src_10001.guest_id AS guest + , bookings_source_src_10001.host_id AS host + , bookings_source_src_10001.listing_id AS booking__listing + , bookings_source_src_10001.guest_id AS booking__guest + , bookings_source_src_10001.host_id AS booking__host + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_0 + ) subq_1 + ) subq_2 + GROUP BY + subq_2.booking__ds__day + ) subq_3 + ) subq_4 + FULL OUTER JOIN ( + -- Compute Metrics via Expressions + SELECT + subq_11.booking__ds__day + , subq_11.bookings AS bookings_2_weeks_ago + FROM ( + -- Aggregate Measures + SELECT + subq_10.booking__ds__day + , SUM(subq_10.bookings) AS bookings + FROM ( + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + SELECT + subq_9.booking__ds__day + , subq_9.bookings + FROM ( + -- Join to Time Spine Dataset + SELECT + subq_7.booking__ds__day AS booking__ds__day + , subq_6.ds_partitioned__day AS ds_partitioned__day + , subq_6.ds_partitioned__week AS ds_partitioned__week + , subq_6.ds_partitioned__month AS ds_partitioned__month + , subq_6.ds_partitioned__quarter AS ds_partitioned__quarter + , subq_6.ds_partitioned__year AS ds_partitioned__year + , subq_6.ds_partitioned__extract_year AS ds_partitioned__extract_year + , subq_6.ds_partitioned__extract_quarter AS ds_partitioned__extract_quarter + , subq_6.ds_partitioned__extract_month AS ds_partitioned__extract_month + , subq_6.ds_partitioned__extract_day AS ds_partitioned__extract_day + , subq_6.ds_partitioned__extract_dow AS ds_partitioned__extract_dow + , subq_6.ds_partitioned__extract_doy AS ds_partitioned__extract_doy + , subq_6.paid_at__day AS paid_at__day + , subq_6.paid_at__week AS paid_at__week + , subq_6.paid_at__month AS paid_at__month + , subq_6.paid_at__quarter AS paid_at__quarter + , subq_6.paid_at__year AS paid_at__year + , subq_6.paid_at__extract_year AS paid_at__extract_year + , subq_6.paid_at__extract_quarter AS paid_at__extract_quarter + , subq_6.paid_at__extract_month AS paid_at__extract_month + , subq_6.paid_at__extract_day AS paid_at__extract_day + , subq_6.paid_at__extract_dow AS paid_at__extract_dow + , subq_6.paid_at__extract_doy AS paid_at__extract_doy + , subq_6.booking__ds_partitioned__day AS booking__ds_partitioned__day + , subq_6.booking__ds_partitioned__week AS booking__ds_partitioned__week + , subq_6.booking__ds_partitioned__month AS booking__ds_partitioned__month + , subq_6.booking__ds_partitioned__quarter AS booking__ds_partitioned__quarter + , subq_6.booking__ds_partitioned__year AS booking__ds_partitioned__year + , subq_6.booking__ds_partitioned__extract_year AS booking__ds_partitioned__extract_year + , subq_6.booking__ds_partitioned__extract_quarter AS booking__ds_partitioned__extract_quarter + , subq_6.booking__ds_partitioned__extract_month AS booking__ds_partitioned__extract_month + , subq_6.booking__ds_partitioned__extract_day AS booking__ds_partitioned__extract_day + , subq_6.booking__ds_partitioned__extract_dow AS booking__ds_partitioned__extract_dow + , subq_6.booking__ds_partitioned__extract_doy AS booking__ds_partitioned__extract_doy + , subq_6.booking__paid_at__day AS booking__paid_at__day + , subq_6.booking__paid_at__week AS booking__paid_at__week + , subq_6.booking__paid_at__month AS booking__paid_at__month + , subq_6.booking__paid_at__quarter AS booking__paid_at__quarter + , subq_6.booking__paid_at__year AS booking__paid_at__year + , subq_6.booking__paid_at__extract_year AS booking__paid_at__extract_year + , subq_6.booking__paid_at__extract_quarter AS booking__paid_at__extract_quarter + , subq_6.booking__paid_at__extract_month AS booking__paid_at__extract_month + , subq_6.booking__paid_at__extract_day AS booking__paid_at__extract_day + , subq_6.booking__paid_at__extract_dow AS booking__paid_at__extract_dow + , subq_6.booking__paid_at__extract_doy AS booking__paid_at__extract_doy + , subq_6.metric_time__day AS metric_time__day + , subq_6.metric_time__week AS metric_time__week + , subq_6.metric_time__month AS metric_time__month + , subq_6.metric_time__quarter AS metric_time__quarter + , subq_6.metric_time__year AS metric_time__year + , subq_6.metric_time__extract_year AS metric_time__extract_year + , subq_6.metric_time__extract_quarter AS metric_time__extract_quarter + , subq_6.metric_time__extract_month AS metric_time__extract_month + , subq_6.metric_time__extract_day AS metric_time__extract_day + , subq_6.metric_time__extract_dow AS metric_time__extract_dow + , subq_6.metric_time__extract_doy AS metric_time__extract_doy + , subq_6.listing AS listing + , subq_6.guest AS guest + , subq_6.host AS host + , subq_6.booking__listing AS booking__listing + , subq_6.booking__guest AS booking__guest + , subq_6.booking__host AS booking__host + , subq_6.is_instant AS is_instant + , subq_6.booking__is_instant AS booking__is_instant + , subq_6.bookings AS bookings + , subq_6.instant_bookings AS instant_bookings + , subq_6.booking_value AS booking_value + , subq_6.max_booking_value AS max_booking_value + , subq_6.min_booking_value AS min_booking_value + , subq_6.bookers AS bookers + , subq_6.average_booking_value AS average_booking_value + , subq_6.referred_bookings AS referred_bookings + , subq_6.median_booking_value AS median_booking_value + , subq_6.booking_value_p99 AS booking_value_p99 + , subq_6.discrete_booking_value_p99 AS discrete_booking_value_p99 + , subq_6.approximate_continuous_booking_value_p99 AS approximate_continuous_booking_value_p99 + , subq_6.approximate_discrete_booking_value_p99 AS approximate_discrete_booking_value_p99 + FROM ( + -- Time Spine + SELECT + subq_8.ds AS booking__ds__day + FROM ***************************.mf_time_spine subq_8 + ) subq_7 + INNER JOIN ( + -- Metric Time Dimension 'ds' + SELECT + subq_5.ds__day + , subq_5.ds__week + , subq_5.ds__month + , subq_5.ds__quarter + , subq_5.ds__year + , subq_5.ds__extract_year + , subq_5.ds__extract_quarter + , subq_5.ds__extract_month + , subq_5.ds__extract_day + , subq_5.ds__extract_dow + , subq_5.ds__extract_doy + , subq_5.ds_partitioned__day + , subq_5.ds_partitioned__week + , subq_5.ds_partitioned__month + , subq_5.ds_partitioned__quarter + , subq_5.ds_partitioned__year + , subq_5.ds_partitioned__extract_year + , subq_5.ds_partitioned__extract_quarter + , subq_5.ds_partitioned__extract_month + , subq_5.ds_partitioned__extract_day + , subq_5.ds_partitioned__extract_dow + , subq_5.ds_partitioned__extract_doy + , subq_5.paid_at__day + , subq_5.paid_at__week + , subq_5.paid_at__month + , subq_5.paid_at__quarter + , subq_5.paid_at__year + , subq_5.paid_at__extract_year + , subq_5.paid_at__extract_quarter + , subq_5.paid_at__extract_month + , subq_5.paid_at__extract_day + , subq_5.paid_at__extract_dow + , subq_5.paid_at__extract_doy + , subq_5.booking__ds__day + , subq_5.booking__ds__week + , subq_5.booking__ds__month + , subq_5.booking__ds__quarter + , subq_5.booking__ds__year + , subq_5.booking__ds__extract_year + , subq_5.booking__ds__extract_quarter + , subq_5.booking__ds__extract_month + , subq_5.booking__ds__extract_day + , subq_5.booking__ds__extract_dow + , subq_5.booking__ds__extract_doy + , subq_5.booking__ds_partitioned__day + , subq_5.booking__ds_partitioned__week + , subq_5.booking__ds_partitioned__month + , subq_5.booking__ds_partitioned__quarter + , subq_5.booking__ds_partitioned__year + , subq_5.booking__ds_partitioned__extract_year + , subq_5.booking__ds_partitioned__extract_quarter + , subq_5.booking__ds_partitioned__extract_month + , subq_5.booking__ds_partitioned__extract_day + , subq_5.booking__ds_partitioned__extract_dow + , subq_5.booking__ds_partitioned__extract_doy + , subq_5.booking__paid_at__day + , subq_5.booking__paid_at__week + , subq_5.booking__paid_at__month + , subq_5.booking__paid_at__quarter + , subq_5.booking__paid_at__year + , subq_5.booking__paid_at__extract_year + , subq_5.booking__paid_at__extract_quarter + , subq_5.booking__paid_at__extract_month + , subq_5.booking__paid_at__extract_day + , subq_5.booking__paid_at__extract_dow + , subq_5.booking__paid_at__extract_doy + , subq_5.ds__day AS metric_time__day + , subq_5.ds__week AS metric_time__week + , subq_5.ds__month AS metric_time__month + , subq_5.ds__quarter AS metric_time__quarter + , subq_5.ds__year AS metric_time__year + , subq_5.ds__extract_year AS metric_time__extract_year + , subq_5.ds__extract_quarter AS metric_time__extract_quarter + , subq_5.ds__extract_month AS metric_time__extract_month + , subq_5.ds__extract_day AS metric_time__extract_day + , subq_5.ds__extract_dow AS metric_time__extract_dow + , subq_5.ds__extract_doy AS metric_time__extract_doy + , subq_5.listing + , subq_5.guest + , subq_5.host + , subq_5.booking__listing + , subq_5.booking__guest + , subq_5.booking__host + , subq_5.is_instant + , subq_5.booking__is_instant + , subq_5.bookings + , subq_5.instant_bookings + , subq_5.booking_value + , subq_5.max_booking_value + , subq_5.min_booking_value + , subq_5.bookers + , subq_5.average_booking_value + , subq_5.referred_bookings + , subq_5.median_booking_value + , subq_5.booking_value_p99 + , subq_5.discrete_booking_value_p99 + , subq_5.approximate_continuous_booking_value_p99 + , subq_5.approximate_discrete_booking_value_p99 + FROM ( + -- Read Elements From Semantic Model 'bookings_source' + SELECT + 1 AS bookings + , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings + , bookings_source_src_10001.booking_value + , bookings_source_src_10001.booking_value AS max_booking_value + , bookings_source_src_10001.booking_value AS min_booking_value + , bookings_source_src_10001.guest_id AS bookers + , bookings_source_src_10001.booking_value AS average_booking_value + , bookings_source_src_10001.booking_value AS booking_payments + , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings + , bookings_source_src_10001.booking_value AS median_booking_value + , bookings_source_src_10001.booking_value AS booking_value_p99 + , bookings_source_src_10001.booking_value AS discrete_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_continuous_booking_value_p99 + , bookings_source_src_10001.booking_value AS approximate_discrete_booking_value_p99 + , bookings_source_src_10001.is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS paid_at__extract_doy + , bookings_source_src_10001.is_instant AS booking__is_instant + , DATE_TRUNC('day', bookings_source_src_10001.ds) AS booking__ds__day + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS booking__ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS booking__ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS booking__ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS booking__ds__year + , EXTRACT(year FROM bookings_source_src_10001.ds) AS booking__ds__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds) AS booking__ds__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds) AS booking__ds__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds) AS booking__ds__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds) AS booking__ds__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds) AS booking__ds__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__day + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__year + , EXTRACT(year FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_month + , EXTRACT(day FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__extract_doy + , DATE_TRUNC('day', bookings_source_src_10001.paid_at) AS booking__paid_at__day + , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS booking__paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS booking__paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS booking__paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS booking__paid_at__year + , EXTRACT(year FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_year + , EXTRACT(quarter FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_quarter + , EXTRACT(month FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_month + , EXTRACT(day FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_day + , EXTRACT(isodow FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_dow + , EXTRACT(doy FROM bookings_source_src_10001.paid_at) AS booking__paid_at__extract_doy + , bookings_source_src_10001.listing_id AS listing + , bookings_source_src_10001.guest_id AS guest + , bookings_source_src_10001.host_id AS host + , bookings_source_src_10001.listing_id AS booking__listing + , bookings_source_src_10001.guest_id AS booking__guest + , bookings_source_src_10001.host_id AS booking__host + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_5 + ) subq_6 + ON + subq_7.booking__ds__day - INTERVAL 14 day = subq_6.booking__ds__day + ) subq_9 + ) subq_10 + GROUP BY + subq_10.booking__ds__day + ) subq_11 + ) subq_12 + ON + subq_4.booking__ds__day = subq_12.booking__ds__day + GROUP BY + COALESCE(subq_4.booking__ds__day, subq_12.booking__ds__day) +) subq_13 diff --git a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0_optimized.sql b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0_optimized.sql new file mode 100644 index 0000000000..d972aa60d9 --- /dev/null +++ b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0_optimized.sql @@ -0,0 +1,55 @@ +-- Compute Metrics via Expressions +SELECT + booking__ds__day + , bookings - bookings_2_weeks_ago AS bookings_growth_2_weeks +FROM ( + -- Combine Aggregated Outputs + SELECT + COALESCE(subq_18.booking__ds__day, subq_26.booking__ds__day) AS booking__ds__day + , MAX(subq_18.bookings) AS bookings + , MAX(subq_26.bookings_2_weeks_ago) AS bookings_2_weeks_ago + FROM ( + -- Aggregate Measures + -- Compute Metrics via Expressions + SELECT + booking__ds__day + , SUM(bookings) AS bookings + FROM ( + -- Read Elements From Semantic Model 'bookings_source' + -- Metric Time Dimension 'ds' + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + SELECT + DATE_TRUNC('day', ds) AS booking__ds__day + , 1 AS bookings + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_16 + GROUP BY + booking__ds__day + ) subq_18 + FULL OUTER JOIN ( + -- Join to Time Spine Dataset + -- Pass Only Elements: ['bookings', 'booking__ds__day'] + -- Aggregate Measures + -- Compute Metrics via Expressions + SELECT + subq_22.ds AS booking__ds__day + , SUM(subq_20.bookings) AS bookings_2_weeks_ago + FROM ***************************.mf_time_spine subq_22 + INNER JOIN ( + -- Read Elements From Semantic Model 'bookings_source' + -- Metric Time Dimension 'ds' + SELECT + DATE_TRUNC('day', ds) AS booking__ds__day + , 1 AS bookings + FROM ***************************.fct_bookings bookings_source_src_10001 + ) subq_20 + ON + subq_22.ds - INTERVAL 14 day = subq_20.booking__ds__day + GROUP BY + subq_22.ds + ) subq_26 + ON + subq_18.booking__ds__day = subq_26.booking__ds__day + GROUP BY + COALESCE(subq_18.booking__ds__day, subq_26.booking__ds__day) +) subq_27 From fa28dc96f901611a96e7da3bd1d75de493b491a5 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 10:55:18 -0800 Subject: [PATCH 09/22] Quick fix --- metricflow/plan_conversion/dataflow_to_sql.py | 6 ++++-- .../test_offset_to_grain_with_agg_time_dim__plan0.sql | 11 +++++++++++ .../test_offset_window_with_agg_time_dim__plan0.sql | 11 +++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index be222e1e98..dca49699a9 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -1298,8 +1298,10 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet time_dimension_instances=tuple( time_dimension_instance for time_dimension_instance in parent_data_set.instance_set.time_dimension_instances - if time_dimension_instance.spec.element_name != agg_time_dimension_for_join.element_name - # and time_dimension_instance.spec.entity_links != agg_time_dimension_for_join.entity_links + if not ( + time_dimension_instance.spec.element_name == agg_time_dimension_for_join.element_name + and time_dimension_instance.spec.entity_links == agg_time_dimension_for_join.entity_links + ) ), entity_instances=parent_data_set.instance_set.entity_instances, metric_instances=parent_data_set.instance_set.metric_instances, diff --git a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql index 2c317b7ee1..1f55db2271 100644 --- a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql +++ b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_to_grain_with_agg_time_dim__plan0.sql @@ -242,6 +242,17 @@ FROM ( -- Join to Time Spine Dataset SELECT subq_7.booking__ds__day AS booking__ds__day + , subq_6.ds__day AS ds__day + , subq_6.ds__week AS ds__week + , subq_6.ds__month AS ds__month + , subq_6.ds__quarter AS ds__quarter + , subq_6.ds__year AS ds__year + , subq_6.ds__extract_year AS ds__extract_year + , subq_6.ds__extract_quarter AS ds__extract_quarter + , subq_6.ds__extract_month AS ds__extract_month + , subq_6.ds__extract_day AS ds__extract_day + , subq_6.ds__extract_dow AS ds__extract_dow + , subq_6.ds__extract_doy AS ds__extract_doy , subq_6.ds_partitioned__day AS ds_partitioned__day , subq_6.ds_partitioned__week AS ds_partitioned__week , subq_6.ds_partitioned__month AS ds_partitioned__month diff --git a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql index 6cdb31c5d2..10afa5e97a 100644 --- a/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql +++ b/metricflow/test/snapshots/test_derived_metric_rendering.py/SqlQueryPlan/DuckDB/test_offset_window_with_agg_time_dim__plan0.sql @@ -242,6 +242,17 @@ FROM ( -- Join to Time Spine Dataset SELECT subq_7.booking__ds__day AS booking__ds__day + , subq_6.ds__day AS ds__day + , subq_6.ds__week AS ds__week + , subq_6.ds__month AS ds__month + , subq_6.ds__quarter AS ds__quarter + , subq_6.ds__year AS ds__year + , subq_6.ds__extract_year AS ds__extract_year + , subq_6.ds__extract_quarter AS ds__extract_quarter + , subq_6.ds__extract_month AS ds__extract_month + , subq_6.ds__extract_day AS ds__extract_day + , subq_6.ds__extract_dow AS ds__extract_dow + , subq_6.ds__extract_doy AS ds__extract_doy , subq_6.ds_partitioned__day AS ds_partitioned__day , subq_6.ds_partitioned__week AS ds_partitioned__week , subq_6.ds_partitioned__month AS ds_partitioned__month From 23e5c0b90ce7784ba7ccf697af58be7ebeb74679 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 10:56:55 -0800 Subject: [PATCH 10/22] Comment --- metricflow/test/integration/test_cases/itest_metrics.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metricflow/test/integration/test_cases/itest_metrics.yaml b/metricflow/test/integration/test_cases/itest_metrics.yaml index 7110f22109..b874fc104c 100644 --- a/metricflow/test/integration/test_cases/itest_metrics.yaml +++ b/metricflow/test/integration/test_cases/itest_metrics.yaml @@ -1813,4 +1813,5 @@ integration_test: ON {{ render_date_trunc("c.ds", TimeGranularity.MONTH) }} = d.booking__ds__day ) b ON a.booking__ds__day = b.booking__ds__day -# TODO: tests with granularity, date part, multiple metric time / agg time options +# TODO: offset tests with granularity, date part, multiple metric time / agg time options +# TODO: cumulative tests with multiple metric_time / agg time options From a8018d9dc7b64fdb366ce15c02ceeae8aeeb3ec6 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 11:16:34 -0800 Subject: [PATCH 11/22] WIP --- .../dataflow/builder/dataflow_plan_builder.py | 54 +++++++++++-------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index ad1a1fa4d8..f2af2e2c60 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -499,19 +499,23 @@ def _build_derived_metric_output_node( ) output_node: BaseOutput = ComputeMetricsNode(parent_node=parent_node, metric_specs=[metric_spec]) + # TODO: Write a test case for this scenario # For nested ratio / derived metrics with time offset, apply offset & where constraint after metric computation. if metric_spec.has_time_offset: - # TODO: if you only query with the agg_time_dimension for the offset metric, should that work? (assuming another input metric uses a diff agg_time_dim) - query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time - if not query_contains_metric_time_or_agg_time_dimension: - pass # check for agg_time_dimension and update accordingly - # Write a test case for this scenario + queried_agg_time_dimension_specs = list(queried_linkable_specs.metric_time_specs) + if not queried_agg_time_dimension_specs: + valid_agg_time_dimensions = self._metric_lookup.get_valid_agg_time_dimensions_for_metric( + metric_spec.reference + ) + queried_agg_time_dimension_specs = list( + set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)) + ) assert ( - query_contains_metric_time_or_agg_time_dimension + queried_agg_time_dimension_specs ), "Joining to time spine requires querying with metric_time or the appropriate agg_time_dimension." output_node = JoinToTimeSpineNode( parent_node=output_node, - requested_metric_time_dimension_specs=list(queried_linkable_specs.metric_time_specs), + requested_metric_time_dimension_specs=queried_agg_time_dimension_specs, time_range_constraint=time_range_constraint, offset_window=metric_spec.offset_window, offset_to_grain=metric_spec.offset_to_grain, @@ -1110,15 +1114,22 @@ def _build_input_measure_spec_for_base_metric( offset_to_grain=child_metric_offset_to_grain, ) - # Even if the measure is configured to join to time spine, if there's no metric_time in the query, - # there's no need to join to the time spine since all metric_time will be aggregated. + # Even if the measure is configured to join to time spine, if there's no agg_time_dimension in the query, + # there's no need to join to the time spine since all time will be aggregated. after_aggregation_time_spine_join_description = None if input_measure.join_to_timespine: - query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time - if not query_contains_metric_time_or_agg_time_dimension: - pass # check for agg_time_dimension and update accordingly - # Write a test case for this scenario - if query_contains_metric_time_or_agg_time_dimension: + # TODO: Write a test case for this scenario + query_contains_agg_time_dimension = queried_linkable_specs.contains_metric_time + if not query_contains_agg_time_dimension: + # TODO: should this be checking valid agg time dims for measure or metric? + valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( + measure_spec.reference + ) + query_contains_agg_time_dimension = bool( + set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)) + ) + + if query_contains_agg_time_dimension: after_aggregation_time_spine_join_description = JoinToTimeSpineDescription( join_type=SqlJoinType.LEFT_OUTER, offset_window=None, @@ -1311,7 +1322,7 @@ def _build_aggregated_measure_from_measure_source_node( f"Recipe not found for measure spec: {measure_spec} and linkable specs: {required_linkable_specs}" ) - # If a cumulative metric is queried with metric_time or agg_time_dimension, join over time range. + # If a cumulative metric is queried with agg_time_dimension, join over time range. # Otherwise, the measure will be aggregated over all time. time_range_node: Optional[JoinOverTimeRangeNode] = None if cumulative: @@ -1320,8 +1331,7 @@ def _build_aggregated_measure_from_measure_source_node( valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( measure_spec.reference ) - # TODO: we only actually allow one granularity for cumulative. Should that be reflected here? - # Definitely shouldn't have date part in here + # TODO: will it be a problem if we get one with date part or diff granularity? queried_agg_time_dims = sorted( set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)), key=lambda x: x.time_granularity.to_int(), @@ -1344,16 +1354,16 @@ def _build_aggregated_measure_from_measure_source_node( join_to_time_spine_node: Optional[JoinToTimeSpineNode] = None if before_aggregation_time_spine_join_description is not None: # TODO: below logic is somewhat duplicated - queried_metric_time_specs = list(queried_linkable_specs.metric_time_specs) - if not queried_metric_time_specs: + queried_agg_time_dimension_specs = list(queried_linkable_specs.metric_time_specs) + if not queried_agg_time_dimension_specs: valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( measure_spec.reference ) - queried_metric_time_specs = list( + queried_agg_time_dimension_specs = list( set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)) ) - assert queried_metric_time_specs, ( + assert queried_agg_time_dimension_specs, ( "Joining to time spine requires querying with metric time or the appropriate agg_time_dimension." "This should have been caught by validations." ) @@ -1364,7 +1374,7 @@ def _build_aggregated_measure_from_measure_source_node( ) join_to_time_spine_node = JoinToTimeSpineNode( parent_node=time_range_node or measure_recipe.source_node, - requested_metric_time_dimension_specs=queried_metric_time_specs, + requested_metric_time_dimension_specs=queried_agg_time_dimension_specs, time_range_constraint=time_range_constraint, offset_window=before_aggregation_time_spine_join_description.offset_window, offset_to_grain=before_aggregation_time_spine_join_description.offset_to_grain, From 3f58e882a649803982955784ddcbf5ae67ba5a05 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 11:58:00 -0800 Subject: [PATCH 12/22] Update snapshots --- ...cumulative_metric_with_time_constraint__plan0.sql | 12 +++++------- ..._metric_with_time_constraint__plan0_optimized.sql | 8 +++----- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql index 63ee35aaad..4faccf1d37 100644 --- a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0.sql @@ -20,7 +20,7 @@ FROM ( FROM ( -- Join Self Over Time Range SELECT - subq_3.metric_time__month AS metric_time__month + subq_3.metric_time__day AS metric_time__day , subq_2.ds__day AS ds__day , subq_2.ds__week AS ds__week , subq_2.ds__month AS ds__month @@ -43,8 +43,8 @@ FROM ( , subq_2.revenue_instance__ds__extract_day AS revenue_instance__ds__extract_day , subq_2.revenue_instance__ds__extract_dow AS revenue_instance__ds__extract_dow , subq_2.revenue_instance__ds__extract_doy AS revenue_instance__ds__extract_doy - , subq_2.metric_time__day AS metric_time__day , subq_2.metric_time__week AS metric_time__week + , subq_2.metric_time__month AS metric_time__month , subq_2.metric_time__quarter AS metric_time__quarter , subq_2.metric_time__year AS metric_time__year , subq_2.metric_time__extract_year AS metric_time__extract_year @@ -59,11 +59,9 @@ FROM ( FROM ( -- Time Spine SELECT - DATE_TRUNC('month', subq_4.ds) AS metric_time__month + subq_4.ds AS metric_time__day FROM ***************************.mf_time_spine subq_4 WHERE subq_4.ds BETWEEN '2020-01-01' AND '2020-01-01' - GROUP BY - DATE_TRUNC('month', subq_4.ds) ) subq_3 INNER JOIN ( -- Constrain Time Range to [2019-11-01T00:00:00, 2020-01-01T00:00:00] @@ -178,9 +176,9 @@ FROM ( ) subq_2 ON ( - subq_2.metric_time__month <= subq_3.metric_time__month + subq_2.metric_time__day <= subq_3.metric_time__day ) AND ( - subq_2.metric_time__month > subq_3.metric_time__month - INTERVAL 2 month + subq_2.metric_time__day > subq_3.metric_time__day - INTERVAL 2 month ) ) subq_5 ) subq_6 diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql index 1fa1babb3d..ddcc18a49f 100644 --- a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_time_constraint__plan0_optimized.sql @@ -9,11 +9,9 @@ SELECT FROM ( -- Time Spine SELECT - DATE_TRUNC('month', ds) AS metric_time__month + ds AS metric_time__day FROM ***************************.mf_time_spine subq_13 WHERE ds BETWEEN '2020-01-01' AND '2020-01-01' - GROUP BY - DATE_TRUNC('month', ds) ) subq_12 INNER JOIN ( -- Read Elements From Semantic Model 'revenue' @@ -27,9 +25,9 @@ INNER JOIN ( ) subq_11 ON ( - subq_11.metric_time__month <= subq_12.metric_time__month + subq_11.metric_time__day <= subq_12.metric_time__day ) AND ( - subq_11.metric_time__month > subq_12.metric_time__month - INTERVAL 2 month + subq_11.metric_time__day > subq_12.metric_time__day - INTERVAL 2 month ) WHERE subq_12.metric_time__day BETWEEN '2020-01-01' AND '2020-01-01' GROUP BY From f0039d2b17e7a80317d6a063f7b9834e6dd1b862 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 13:17:23 -0800 Subject: [PATCH 13/22] Comment --- metricflow/dataflow/builder/dataflow_plan_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 8d1f2f1738..de2458cd6a 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -1331,7 +1331,7 @@ def _build_aggregated_measure_from_measure_source_node( valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( measure_spec.reference ) - # TODO: will it be a problem if we get one with date part or diff granularity? + # TODO: will it be a problem if we get one with date part or diff granularity? Write test case queried_agg_time_dims = sorted( set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)), key=lambda x: x.time_granularity.to_int(), From 3fc26652614fc383eec852a5ed2e68b3ab7322ff Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 16:03:24 -0800 Subject: [PATCH 14/22] WIP --- .../test_cases/itest_cumulative_metric.yaml | 2 +- .../test_cumulative_metric_rendering.py | 46 +++-- ..._metric_with_agg_time_dimension__plan0.sql | 189 ++++++++++++++++++ ...th_agg_time_dimension__plan0_optimized.sql | 34 ++++ 4 files changed, 258 insertions(+), 13 deletions(-) create mode 100644 metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql create mode 100644 metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql diff --git a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml index 974296656f..17b9927073 100644 --- a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml +++ b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml @@ -387,7 +387,7 @@ integration_test: --- integration_test: name: cumulative_metric_with_agg_time_dimension - description: Query a cumulative metric with its agg_time_dimension. + description: Query a cumulative metric with its agg_time_dimension and a time constraint. model: SIMPLE_MODEL metrics: ["trailing_2_months_revenue"] group_bys: ["revenue_instance__ds__day"] diff --git a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py index 38a59242fe..873cf79ea8 100644 --- a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py +++ b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py @@ -15,11 +15,7 @@ from metricflow.protocols.sql_client import SqlClient from metricflow.query.query_parser import MetricFlowQueryParser from metricflow.specs.column_assoc import ColumnAssociationResolver -from metricflow.specs.specs import ( - MetricFlowQuerySpec, - MetricSpec, - TimeDimensionSpec, -) +from metricflow.specs.specs import EntityReference, MetricFlowQuerySpec, MetricSpec, TimeDimensionSpec from metricflow.test.fixtures.model_fixtures import ConsistentIdObjectRepository from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState from metricflow.test.query_rendering.compare_rendered_query import convert_and_check @@ -212,13 +208,7 @@ def test_cumulative_metric_no_window_with_time_constraint( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="revenue_all_time"),), dimension_specs=(), - time_dimension_specs=( - TimeDimensionSpec( - element_name="ds", - entity_links=(), - time_granularity=TimeGranularity.MONTH, - ), - ), + time_dimension_specs=(MTD_SPEC_MONTH,), time_range_constraint=TimeRangeConstraint( start_time=as_datetime("2020-01-01"), end_time=as_datetime("2020-01-01") ), @@ -295,3 +285,35 @@ def test_cumulative_metric_month( sql_client=sql_client, node=dataflow_plan.sink_output_nodes[0].parent_node, ) + + +@pytest.mark.sql_engine_snapshot +def test_cumulative_metric_with_agg_time_dimension( + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, + dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, + consistent_id_object_repository: ConsistentIdObjectRepository, + sql_client: SqlClient, +) -> None: + """Tests rendering a query for a cumulative metric queried with agg time dimension.""" + dataflow_plan = dataflow_plan_builder.build_plan( + MetricFlowQuerySpec( + metric_specs=(MetricSpec(element_name="trailing_2_months_revenue"),), + dimension_specs=(), + time_dimension_specs=( + TimeDimensionSpec(element_name="ds", entity_links=(EntityReference("revenue_instance"),)), + ), + time_range_constraint=TimeRangeConstraint( + start_time=as_datetime("2020-03-05"), end_time=as_datetime("2021-01-04") + ), + ) + ) + + convert_and_check( + request=request, + mf_test_session_state=mf_test_session_state, + dataflow_to_sql_converter=dataflow_to_sql_converter, + sql_client=sql_client, + node=dataflow_plan.sink_output_nodes[0].parent_node, + ) diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql new file mode 100644 index 0000000000..24daa015d5 --- /dev/null +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql @@ -0,0 +1,189 @@ +-- Compute Metrics via Expressions +SELECT + subq_8.revenue_instance__ds__day + , subq_8.txn_revenue AS trailing_2_months_revenue +FROM ( + -- Aggregate Measures + SELECT + subq_7.revenue_instance__ds__day + , SUM(subq_7.txn_revenue) AS txn_revenue + FROM ( + -- Constrain Time Range to [2020-03-05T00:00:00, 2021-01-04T00:00:00] + SELECT + subq_6.revenue_instance__ds__day + , subq_6.txn_revenue + FROM ( + -- Pass Only Elements: ['txn_revenue', 'revenue_instance__ds__day'] + SELECT + subq_5.revenue_instance__ds__day + , subq_5.txn_revenue + FROM ( + -- Join Self Over Time Range + SELECT + subq_3.revenue_instance__ds__day AS revenue_instance__ds__day + , subq_2.ds__day AS ds__day + , subq_2.ds__week AS ds__week + , subq_2.ds__month AS ds__month + , subq_2.ds__quarter AS ds__quarter + , subq_2.ds__year AS ds__year + , subq_2.ds__extract_year AS ds__extract_year + , subq_2.ds__extract_quarter AS ds__extract_quarter + , subq_2.ds__extract_month AS ds__extract_month + , subq_2.ds__extract_day AS ds__extract_day + , subq_2.ds__extract_dow AS ds__extract_dow + , subq_2.ds__extract_doy AS ds__extract_doy + , subq_2.revenue_instance__ds__week AS revenue_instance__ds__week + , subq_2.revenue_instance__ds__month AS revenue_instance__ds__month + , subq_2.revenue_instance__ds__quarter AS revenue_instance__ds__quarter + , subq_2.revenue_instance__ds__year AS revenue_instance__ds__year + , subq_2.revenue_instance__ds__extract_year AS revenue_instance__ds__extract_year + , subq_2.revenue_instance__ds__extract_quarter AS revenue_instance__ds__extract_quarter + , subq_2.revenue_instance__ds__extract_month AS revenue_instance__ds__extract_month + , subq_2.revenue_instance__ds__extract_day AS revenue_instance__ds__extract_day + , subq_2.revenue_instance__ds__extract_dow AS revenue_instance__ds__extract_dow + , subq_2.revenue_instance__ds__extract_doy AS revenue_instance__ds__extract_doy + , subq_2.metric_time__day AS metric_time__day + , subq_2.metric_time__week AS metric_time__week + , subq_2.metric_time__month AS metric_time__month + , subq_2.metric_time__quarter AS metric_time__quarter + , subq_2.metric_time__year AS metric_time__year + , subq_2.metric_time__extract_year AS metric_time__extract_year + , subq_2.metric_time__extract_quarter AS metric_time__extract_quarter + , subq_2.metric_time__extract_month AS metric_time__extract_month + , subq_2.metric_time__extract_day AS metric_time__extract_day + , subq_2.metric_time__extract_dow AS metric_time__extract_dow + , subq_2.metric_time__extract_doy AS metric_time__extract_doy + , subq_2.user AS user + , subq_2.revenue_instance__user AS revenue_instance__user + , subq_2.txn_revenue AS txn_revenue + FROM ( + -- Time Spine + SELECT + subq_4.ds AS revenue_instance__ds__day + FROM ***************************.mf_time_spine subq_4 + WHERE subq_4.ds BETWEEN '2020-03-05' AND '2021-01-04' + ) subq_3 + INNER JOIN ( + -- Constrain Time Range to [2020-01-05T00:00:00, 2021-01-04T00:00:00] + SELECT + subq_1.ds__day + , subq_1.ds__week + , subq_1.ds__month + , subq_1.ds__quarter + , subq_1.ds__year + , subq_1.ds__extract_year + , subq_1.ds__extract_quarter + , subq_1.ds__extract_month + , subq_1.ds__extract_day + , subq_1.ds__extract_dow + , subq_1.ds__extract_doy + , subq_1.revenue_instance__ds__day + , subq_1.revenue_instance__ds__week + , subq_1.revenue_instance__ds__month + , subq_1.revenue_instance__ds__quarter + , subq_1.revenue_instance__ds__year + , subq_1.revenue_instance__ds__extract_year + , subq_1.revenue_instance__ds__extract_quarter + , subq_1.revenue_instance__ds__extract_month + , subq_1.revenue_instance__ds__extract_day + , subq_1.revenue_instance__ds__extract_dow + , subq_1.revenue_instance__ds__extract_doy + , subq_1.metric_time__day + , subq_1.metric_time__week + , subq_1.metric_time__month + , subq_1.metric_time__quarter + , subq_1.metric_time__year + , subq_1.metric_time__extract_year + , subq_1.metric_time__extract_quarter + , subq_1.metric_time__extract_month + , subq_1.metric_time__extract_day + , subq_1.metric_time__extract_dow + , subq_1.metric_time__extract_doy + , subq_1.user + , subq_1.revenue_instance__user + , subq_1.txn_revenue + FROM ( + -- Metric Time Dimension 'ds' + SELECT + subq_0.ds__day + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds__extract_year + , subq_0.ds__extract_quarter + , subq_0.ds__extract_month + , subq_0.ds__extract_day + , subq_0.ds__extract_dow + , subq_0.ds__extract_doy + , subq_0.revenue_instance__ds__day + , subq_0.revenue_instance__ds__week + , subq_0.revenue_instance__ds__month + , subq_0.revenue_instance__ds__quarter + , subq_0.revenue_instance__ds__year + , subq_0.revenue_instance__ds__extract_year + , subq_0.revenue_instance__ds__extract_quarter + , subq_0.revenue_instance__ds__extract_month + , subq_0.revenue_instance__ds__extract_day + , subq_0.revenue_instance__ds__extract_dow + , subq_0.revenue_instance__ds__extract_doy + , subq_0.ds__day AS metric_time__day + , subq_0.ds__week AS metric_time__week + , subq_0.ds__month AS metric_time__month + , subq_0.ds__quarter AS metric_time__quarter + , subq_0.ds__year AS metric_time__year + , subq_0.ds__extract_year AS metric_time__extract_year + , subq_0.ds__extract_quarter AS metric_time__extract_quarter + , subq_0.ds__extract_month AS metric_time__extract_month + , subq_0.ds__extract_day AS metric_time__extract_day + , subq_0.ds__extract_dow AS metric_time__extract_dow + , subq_0.ds__extract_doy AS metric_time__extract_doy + , subq_0.user + , subq_0.revenue_instance__user + , subq_0.txn_revenue + FROM ( + -- Read Elements From Semantic Model 'revenue' + SELECT + revenue_src_10007.revenue AS txn_revenue + , DATE_TRUNC('day', revenue_src_10007.created_at) AS ds__day + , DATE_TRUNC('week', revenue_src_10007.created_at) AS ds__week + , DATE_TRUNC('month', revenue_src_10007.created_at) AS ds__month + , DATE_TRUNC('quarter', revenue_src_10007.created_at) AS ds__quarter + , DATE_TRUNC('year', revenue_src_10007.created_at) AS ds__year + , EXTRACT(year FROM revenue_src_10007.created_at) AS ds__extract_year + , EXTRACT(quarter FROM revenue_src_10007.created_at) AS ds__extract_quarter + , EXTRACT(month FROM revenue_src_10007.created_at) AS ds__extract_month + , EXTRACT(day FROM revenue_src_10007.created_at) AS ds__extract_day + , EXTRACT(isodow FROM revenue_src_10007.created_at) AS ds__extract_dow + , EXTRACT(doy FROM revenue_src_10007.created_at) AS ds__extract_doy + , DATE_TRUNC('day', revenue_src_10007.created_at) AS revenue_instance__ds__day + , DATE_TRUNC('week', revenue_src_10007.created_at) AS revenue_instance__ds__week + , DATE_TRUNC('month', revenue_src_10007.created_at) AS revenue_instance__ds__month + , DATE_TRUNC('quarter', revenue_src_10007.created_at) AS revenue_instance__ds__quarter + , DATE_TRUNC('year', revenue_src_10007.created_at) AS revenue_instance__ds__year + , EXTRACT(year FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_year + , EXTRACT(quarter FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_quarter + , EXTRACT(month FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_month + , EXTRACT(day FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_day + , EXTRACT(isodow FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_dow + , EXTRACT(doy FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_doy + , revenue_src_10007.user_id AS user + , revenue_src_10007.user_id AS revenue_instance__user + FROM ***************************.fct_revenue revenue_src_10007 + ) subq_0 + ) subq_1 + WHERE subq_1.revenue_instance__ds__day BETWEEN '2020-01-05' AND '2021-01-04' + ) subq_2 + ON + ( + subq_2.revenue_instance__ds__day <= subq_3.revenue_instance__ds__day + ) AND ( + subq_2.revenue_instance__ds__day > subq_3.revenue_instance__ds__day - INTERVAL 2 month + ) + ) subq_5 + ) subq_6 + WHERE subq_6.revenue_instance__ds__day BETWEEN '2020-03-05' AND '2021-01-04' + ) subq_7 + GROUP BY + subq_7.revenue_instance__ds__day +) subq_8 diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql new file mode 100644 index 0000000000..09a0aa5184 --- /dev/null +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql @@ -0,0 +1,34 @@ +-- Join Self Over Time Range +-- Pass Only Elements: ['txn_revenue', 'revenue_instance__ds__day'] +-- Constrain Time Range to [2020-03-05T00:00:00, 2021-01-04T00:00:00] +-- Aggregate Measures +-- Compute Metrics via Expressions +SELECT + subq_12.revenue_instance__ds__day AS revenue_instance__ds__day + , SUM(subq_11.txn_revenue) AS trailing_2_months_revenue +FROM ( + -- Time Spine + SELECT + ds AS revenue_instance__ds__day + FROM ***************************.mf_time_spine subq_13 + WHERE ds BETWEEN '2020-03-05' AND '2021-01-04' +) subq_12 +INNER JOIN ( + -- Read Elements From Semantic Model 'revenue' + -- Metric Time Dimension 'ds' + -- Constrain Time Range to [2020-01-05T00:00:00, 2021-01-04T00:00:00] + SELECT + DATE_TRUNC('day', created_at) AS revenue_instance__ds__day + , revenue AS txn_revenue + FROM ***************************.fct_revenue revenue_src_10007 + WHERE DATE_TRUNC('day', created_at) BETWEEN '2020-01-05' AND '2021-01-04' +) subq_11 +ON + ( + subq_11.revenue_instance__ds__day <= subq_12.revenue_instance__ds__day + ) AND ( + subq_11.revenue_instance__ds__day > subq_12.revenue_instance__ds__day - INTERVAL 2 month + ) +WHERE subq_12.revenue_instance__ds__day BETWEEN '2020-03-05' AND '2021-01-04' +GROUP BY + subq_12.revenue_instance__ds__day From cf88ff620adbdc4a895da84a2e140639243a3328 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 16:35:39 -0800 Subject: [PATCH 15/22] Undo --- .../query_rendering/test_cumulative_metric_rendering.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py index 873cf79ea8..d6de8292cc 100644 --- a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py +++ b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py @@ -208,7 +208,13 @@ def test_cumulative_metric_no_window_with_time_constraint( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="revenue_all_time"),), dimension_specs=(), - time_dimension_specs=(MTD_SPEC_MONTH,), + time_dimension_specs=( + TimeDimensionSpec( + element_name="ds", + entity_links=(), + time_granularity=TimeGranularity.MONTH, + ), + ), time_range_constraint=TimeRangeConstraint( start_time=as_datetime("2020-01-01"), end_time=as_datetime("2020-01-01") ), From 8ea457828ff6cde3160299b6c3d88449dcc6fad7 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 16:47:22 -0800 Subject: [PATCH 16/22] Finish --- .../dataflow/builder/dataflow_plan_builder.py | 77 ++--- metricflow/plan_conversion/dataflow_to_sql.py | 2 +- .../test_cases/itest_cumulative_metric.yaml | 2 +- .../test_cumulative_metric_rendering.py | 3 - ..._metric_with_agg_time_dimension__plan0.sql | 301 ++++++++---------- ...th_agg_time_dimension__plan0_optimized.sql | 32 +- 6 files changed, 168 insertions(+), 249 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index de2458cd6a..e7d7e41bbe 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -796,9 +796,7 @@ def _build_measure_spec_properties(self, measure_specs: Sequence[MeasureSpec]) - f"semantic models: {semantic_models}. This suggests the measure_specs were not correctly filtered." ) - agg_time_dimension = agg_time_dimension = self._semantic_model_lookup.get_agg_time_dimension_for_measure( - measure_specs[0].reference - ) + agg_time_dimension = self._semantic_model_lookup.get_agg_time_dimension_for_measure(measure_specs[0].reference) non_additive_dimension_spec = measure_specs[0].non_additive_dimension_spec for measure_spec in measure_specs: if non_additive_dimension_spec != measure_spec.non_additive_dimension_spec: @@ -1322,47 +1320,38 @@ def _build_aggregated_measure_from_measure_source_node( f"Recipe not found for measure spec: {measure_spec} and linkable specs: {required_linkable_specs}" ) + queried_agg_time_dimension_specs = list(queried_linkable_specs.metric_time_specs) + if not queried_agg_time_dimension_specs: + valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( + measure_spec.reference + ) + queried_agg_time_dimension_specs = list( + set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)) + ) + # If a cumulative metric is queried with agg_time_dimension, join over time range. # Otherwise, the measure will be aggregated over all time. time_range_node: Optional[JoinOverTimeRangeNode] = None - if cumulative: - queried_metric_time_spec = queried_linkable_specs.metric_time_spec_with_smallest_granularity - if not queried_metric_time_spec: - valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( - measure_spec.reference - ) - # TODO: will it be a problem if we get one with date part or diff granularity? Write test case - queried_agg_time_dims = sorted( - set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)), - key=lambda x: x.time_granularity.to_int(), - ) - if queried_agg_time_dims: - queried_metric_time_spec = queried_agg_time_dims[0] - - if queried_metric_time_spec: - time_range_node = JoinOverTimeRangeNode( - parent_node=measure_recipe.source_node, - metric_time_dimension_spec=queried_metric_time_spec, - window=cumulative_window, - grain_to_date=cumulative_grain_to_date, - time_range_constraint=time_range_constraint - if not before_aggregation_time_spine_join_description - else None, - ) + if cumulative and queried_agg_time_dimension_specs: + # TODO: will it be a problem if we get one with date part or diff granularity? Write test case to confirm + # Use the time dimension spec with the smallest granularity. + agg_time_dimension_spec_for_join = sorted( + queried_agg_time_dimension_specs, key=lambda spec: spec.time_granularity.to_int() + )[0] + time_range_node = JoinOverTimeRangeNode( + parent_node=measure_recipe.source_node, + # TODO: rename param + metric_time_dimension_spec=agg_time_dimension_spec_for_join, + window=cumulative_window, + grain_to_date=cumulative_grain_to_date, + time_range_constraint=time_range_constraint + if not before_aggregation_time_spine_join_description + else None, + ) # If querying an offset metric, join to time spine before aggregation. join_to_time_spine_node: Optional[JoinToTimeSpineNode] = None if before_aggregation_time_spine_join_description is not None: - # TODO: below logic is somewhat duplicated - queried_agg_time_dimension_specs = list(queried_linkable_specs.metric_time_specs) - if not queried_agg_time_dimension_specs: - valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( - measure_spec.reference - ) - queried_agg_time_dimension_specs = list( - set(queried_linkable_specs.time_dimension_specs).intersection(set(valid_agg_time_dimensions)) - ) - assert queried_agg_time_dimension_specs, ( "Joining to time spine requires querying with metric time or the appropriate agg_time_dimension." "This should have been caught by validations." @@ -1408,19 +1397,17 @@ def _build_aggregated_measure_from_measure_source_node( else: unaggregated_measure_node = filtered_measure_source_node - query_contains_metric_time_or_agg_time_dimension = queried_linkable_specs.contains_metric_time - if not query_contains_metric_time_or_agg_time_dimension: - pass # check for agg_time_dimension and update accordingly - # Write a test case for this scenario - + # If time constraint was previously adjusted for cumulative window or grain, apply original time constraint + # here. Can skip if metric is being aggregated over all time. cumulative_metric_constrained_node: Optional[ConstrainTimeRangeNode] = None if ( cumulative_metric_adjusted_time_constraint is not None and time_range_constraint is not None - and query_contains_metric_time_or_agg_time_dimension + and queried_agg_time_dimension_specs ): cumulative_metric_constrained_node = ConstrainTimeRangeNode( - unaggregated_measure_node, time_range_constraint + parent_node=unaggregated_measure_node, + time_range_constraint=time_range_constraint, ) pre_aggregate_node: BaseOutput = cumulative_metric_constrained_node or unaggregated_measure_node @@ -1439,7 +1426,7 @@ def _build_aggregated_measure_from_measure_source_node( queried_time_dimension_spec: Optional[ TimeDimensionSpec ] = self._find_non_additive_dimension_in_linkable_specs( - agg_time_dimension=agg_time_dimension, + agg_time_dimension=TimeDimensionReference(agg_time_dimension.element_name), linkable_specs=queried_linkable_specs.as_tuple, non_additive_dimension_spec=non_additive_dimension_spec, ) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index dca49699a9..607a4e277d 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -958,7 +958,7 @@ def visit_constrain_time_range_node(self, node: ConstrainTimeRangeNode) -> SqlDa instead of this: DATE_TRUNC('month', ds) >= '2020-01-01' AND DATE_TRUNC('month', ds <= '2020-02-01') """ - from_data_set: SqlDataSet = node.parent_node.accept(self) + from_data_set = node.parent_node.accept(self) from_data_set_alias = self._next_unique_table_alias() time_dimension_instances_for_metric_time = sorted( diff --git a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml index 17b9927073..a854eb2846 100644 --- a/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml +++ b/metricflow/test/integration/test_cases/itest_cumulative_metric.yaml @@ -392,7 +392,7 @@ integration_test: metrics: ["trailing_2_months_revenue"] group_bys: ["revenue_instance__ds__day"] order_bys: ["revenue_instance__ds__day"] - time_constraint: ["2020-03-05", "2021-01-04"] + where_filter: '{{ render_time_constraint("revenue_instance__ds__day", "2020-03-05", "2021-01-04") }}' check_query: | SELECT SUM(b.txn_revenue) as trailing_2_months_revenue diff --git a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py index d6de8292cc..52dd131c20 100644 --- a/metricflow/test/query_rendering/test_cumulative_metric_rendering.py +++ b/metricflow/test/query_rendering/test_cumulative_metric_rendering.py @@ -310,9 +310,6 @@ def test_cumulative_metric_with_agg_time_dimension( time_dimension_specs=( TimeDimensionSpec(element_name="ds", entity_links=(EntityReference("revenue_instance"),)), ), - time_range_constraint=TimeRangeConstraint( - start_time=as_datetime("2020-03-05"), end_time=as_datetime("2021-01-04") - ), ) ) diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql index 24daa015d5..2d7d48e429 100644 --- a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0.sql @@ -1,189 +1,140 @@ -- Compute Metrics via Expressions SELECT - subq_8.revenue_instance__ds__day - , subq_8.txn_revenue AS trailing_2_months_revenue + subq_6.revenue_instance__ds__day + , subq_6.txn_revenue AS trailing_2_months_revenue FROM ( -- Aggregate Measures SELECT - subq_7.revenue_instance__ds__day - , SUM(subq_7.txn_revenue) AS txn_revenue + subq_5.revenue_instance__ds__day + , SUM(subq_5.txn_revenue) AS txn_revenue FROM ( - -- Constrain Time Range to [2020-03-05T00:00:00, 2021-01-04T00:00:00] + -- Pass Only Elements: ['txn_revenue', 'revenue_instance__ds__day'] SELECT - subq_6.revenue_instance__ds__day - , subq_6.txn_revenue + subq_4.revenue_instance__ds__day + , subq_4.txn_revenue FROM ( - -- Pass Only Elements: ['txn_revenue', 'revenue_instance__ds__day'] + -- Join Self Over Time Range SELECT - subq_5.revenue_instance__ds__day - , subq_5.txn_revenue + subq_2.revenue_instance__ds__day AS revenue_instance__ds__day + , subq_1.ds__day AS ds__day + , subq_1.ds__week AS ds__week + , subq_1.ds__month AS ds__month + , subq_1.ds__quarter AS ds__quarter + , subq_1.ds__year AS ds__year + , subq_1.ds__extract_year AS ds__extract_year + , subq_1.ds__extract_quarter AS ds__extract_quarter + , subq_1.ds__extract_month AS ds__extract_month + , subq_1.ds__extract_day AS ds__extract_day + , subq_1.ds__extract_dow AS ds__extract_dow + , subq_1.ds__extract_doy AS ds__extract_doy + , subq_1.revenue_instance__ds__week AS revenue_instance__ds__week + , subq_1.revenue_instance__ds__month AS revenue_instance__ds__month + , subq_1.revenue_instance__ds__quarter AS revenue_instance__ds__quarter + , subq_1.revenue_instance__ds__year AS revenue_instance__ds__year + , subq_1.revenue_instance__ds__extract_year AS revenue_instance__ds__extract_year + , subq_1.revenue_instance__ds__extract_quarter AS revenue_instance__ds__extract_quarter + , subq_1.revenue_instance__ds__extract_month AS revenue_instance__ds__extract_month + , subq_1.revenue_instance__ds__extract_day AS revenue_instance__ds__extract_day + , subq_1.revenue_instance__ds__extract_dow AS revenue_instance__ds__extract_dow + , subq_1.revenue_instance__ds__extract_doy AS revenue_instance__ds__extract_doy + , subq_1.metric_time__day AS metric_time__day + , subq_1.metric_time__week AS metric_time__week + , subq_1.metric_time__month AS metric_time__month + , subq_1.metric_time__quarter AS metric_time__quarter + , subq_1.metric_time__year AS metric_time__year + , subq_1.metric_time__extract_year AS metric_time__extract_year + , subq_1.metric_time__extract_quarter AS metric_time__extract_quarter + , subq_1.metric_time__extract_month AS metric_time__extract_month + , subq_1.metric_time__extract_day AS metric_time__extract_day + , subq_1.metric_time__extract_dow AS metric_time__extract_dow + , subq_1.metric_time__extract_doy AS metric_time__extract_doy + , subq_1.user AS user + , subq_1.revenue_instance__user AS revenue_instance__user + , subq_1.txn_revenue AS txn_revenue FROM ( - -- Join Self Over Time Range + -- Time Spine SELECT - subq_3.revenue_instance__ds__day AS revenue_instance__ds__day - , subq_2.ds__day AS ds__day - , subq_2.ds__week AS ds__week - , subq_2.ds__month AS ds__month - , subq_2.ds__quarter AS ds__quarter - , subq_2.ds__year AS ds__year - , subq_2.ds__extract_year AS ds__extract_year - , subq_2.ds__extract_quarter AS ds__extract_quarter - , subq_2.ds__extract_month AS ds__extract_month - , subq_2.ds__extract_day AS ds__extract_day - , subq_2.ds__extract_dow AS ds__extract_dow - , subq_2.ds__extract_doy AS ds__extract_doy - , subq_2.revenue_instance__ds__week AS revenue_instance__ds__week - , subq_2.revenue_instance__ds__month AS revenue_instance__ds__month - , subq_2.revenue_instance__ds__quarter AS revenue_instance__ds__quarter - , subq_2.revenue_instance__ds__year AS revenue_instance__ds__year - , subq_2.revenue_instance__ds__extract_year AS revenue_instance__ds__extract_year - , subq_2.revenue_instance__ds__extract_quarter AS revenue_instance__ds__extract_quarter - , subq_2.revenue_instance__ds__extract_month AS revenue_instance__ds__extract_month - , subq_2.revenue_instance__ds__extract_day AS revenue_instance__ds__extract_day - , subq_2.revenue_instance__ds__extract_dow AS revenue_instance__ds__extract_dow - , subq_2.revenue_instance__ds__extract_doy AS revenue_instance__ds__extract_doy - , subq_2.metric_time__day AS metric_time__day - , subq_2.metric_time__week AS metric_time__week - , subq_2.metric_time__month AS metric_time__month - , subq_2.metric_time__quarter AS metric_time__quarter - , subq_2.metric_time__year AS metric_time__year - , subq_2.metric_time__extract_year AS metric_time__extract_year - , subq_2.metric_time__extract_quarter AS metric_time__extract_quarter - , subq_2.metric_time__extract_month AS metric_time__extract_month - , subq_2.metric_time__extract_day AS metric_time__extract_day - , subq_2.metric_time__extract_dow AS metric_time__extract_dow - , subq_2.metric_time__extract_doy AS metric_time__extract_doy - , subq_2.user AS user - , subq_2.revenue_instance__user AS revenue_instance__user - , subq_2.txn_revenue AS txn_revenue + subq_3.ds AS revenue_instance__ds__day + FROM ***************************.mf_time_spine subq_3 + ) subq_2 + INNER JOIN ( + -- Metric Time Dimension 'ds' + SELECT + subq_0.ds__day + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds__extract_year + , subq_0.ds__extract_quarter + , subq_0.ds__extract_month + , subq_0.ds__extract_day + , subq_0.ds__extract_dow + , subq_0.ds__extract_doy + , subq_0.revenue_instance__ds__day + , subq_0.revenue_instance__ds__week + , subq_0.revenue_instance__ds__month + , subq_0.revenue_instance__ds__quarter + , subq_0.revenue_instance__ds__year + , subq_0.revenue_instance__ds__extract_year + , subq_0.revenue_instance__ds__extract_quarter + , subq_0.revenue_instance__ds__extract_month + , subq_0.revenue_instance__ds__extract_day + , subq_0.revenue_instance__ds__extract_dow + , subq_0.revenue_instance__ds__extract_doy + , subq_0.ds__day AS metric_time__day + , subq_0.ds__week AS metric_time__week + , subq_0.ds__month AS metric_time__month + , subq_0.ds__quarter AS metric_time__quarter + , subq_0.ds__year AS metric_time__year + , subq_0.ds__extract_year AS metric_time__extract_year + , subq_0.ds__extract_quarter AS metric_time__extract_quarter + , subq_0.ds__extract_month AS metric_time__extract_month + , subq_0.ds__extract_day AS metric_time__extract_day + , subq_0.ds__extract_dow AS metric_time__extract_dow + , subq_0.ds__extract_doy AS metric_time__extract_doy + , subq_0.user + , subq_0.revenue_instance__user + , subq_0.txn_revenue FROM ( - -- Time Spine - SELECT - subq_4.ds AS revenue_instance__ds__day - FROM ***************************.mf_time_spine subq_4 - WHERE subq_4.ds BETWEEN '2020-03-05' AND '2021-01-04' - ) subq_3 - INNER JOIN ( - -- Constrain Time Range to [2020-01-05T00:00:00, 2021-01-04T00:00:00] + -- Read Elements From Semantic Model 'revenue' SELECT - subq_1.ds__day - , subq_1.ds__week - , subq_1.ds__month - , subq_1.ds__quarter - , subq_1.ds__year - , subq_1.ds__extract_year - , subq_1.ds__extract_quarter - , subq_1.ds__extract_month - , subq_1.ds__extract_day - , subq_1.ds__extract_dow - , subq_1.ds__extract_doy - , subq_1.revenue_instance__ds__day - , subq_1.revenue_instance__ds__week - , subq_1.revenue_instance__ds__month - , subq_1.revenue_instance__ds__quarter - , subq_1.revenue_instance__ds__year - , subq_1.revenue_instance__ds__extract_year - , subq_1.revenue_instance__ds__extract_quarter - , subq_1.revenue_instance__ds__extract_month - , subq_1.revenue_instance__ds__extract_day - , subq_1.revenue_instance__ds__extract_dow - , subq_1.revenue_instance__ds__extract_doy - , subq_1.metric_time__day - , subq_1.metric_time__week - , subq_1.metric_time__month - , subq_1.metric_time__quarter - , subq_1.metric_time__year - , subq_1.metric_time__extract_year - , subq_1.metric_time__extract_quarter - , subq_1.metric_time__extract_month - , subq_1.metric_time__extract_day - , subq_1.metric_time__extract_dow - , subq_1.metric_time__extract_doy - , subq_1.user - , subq_1.revenue_instance__user - , subq_1.txn_revenue - FROM ( - -- Metric Time Dimension 'ds' - SELECT - subq_0.ds__day - , subq_0.ds__week - , subq_0.ds__month - , subq_0.ds__quarter - , subq_0.ds__year - , subq_0.ds__extract_year - , subq_0.ds__extract_quarter - , subq_0.ds__extract_month - , subq_0.ds__extract_day - , subq_0.ds__extract_dow - , subq_0.ds__extract_doy - , subq_0.revenue_instance__ds__day - , subq_0.revenue_instance__ds__week - , subq_0.revenue_instance__ds__month - , subq_0.revenue_instance__ds__quarter - , subq_0.revenue_instance__ds__year - , subq_0.revenue_instance__ds__extract_year - , subq_0.revenue_instance__ds__extract_quarter - , subq_0.revenue_instance__ds__extract_month - , subq_0.revenue_instance__ds__extract_day - , subq_0.revenue_instance__ds__extract_dow - , subq_0.revenue_instance__ds__extract_doy - , subq_0.ds__day AS metric_time__day - , subq_0.ds__week AS metric_time__week - , subq_0.ds__month AS metric_time__month - , subq_0.ds__quarter AS metric_time__quarter - , subq_0.ds__year AS metric_time__year - , subq_0.ds__extract_year AS metric_time__extract_year - , subq_0.ds__extract_quarter AS metric_time__extract_quarter - , subq_0.ds__extract_month AS metric_time__extract_month - , subq_0.ds__extract_day AS metric_time__extract_day - , subq_0.ds__extract_dow AS metric_time__extract_dow - , subq_0.ds__extract_doy AS metric_time__extract_doy - , subq_0.user - , subq_0.revenue_instance__user - , subq_0.txn_revenue - FROM ( - -- Read Elements From Semantic Model 'revenue' - SELECT - revenue_src_10007.revenue AS txn_revenue - , DATE_TRUNC('day', revenue_src_10007.created_at) AS ds__day - , DATE_TRUNC('week', revenue_src_10007.created_at) AS ds__week - , DATE_TRUNC('month', revenue_src_10007.created_at) AS ds__month - , DATE_TRUNC('quarter', revenue_src_10007.created_at) AS ds__quarter - , DATE_TRUNC('year', revenue_src_10007.created_at) AS ds__year - , EXTRACT(year FROM revenue_src_10007.created_at) AS ds__extract_year - , EXTRACT(quarter FROM revenue_src_10007.created_at) AS ds__extract_quarter - , EXTRACT(month FROM revenue_src_10007.created_at) AS ds__extract_month - , EXTRACT(day FROM revenue_src_10007.created_at) AS ds__extract_day - , EXTRACT(isodow FROM revenue_src_10007.created_at) AS ds__extract_dow - , EXTRACT(doy FROM revenue_src_10007.created_at) AS ds__extract_doy - , DATE_TRUNC('day', revenue_src_10007.created_at) AS revenue_instance__ds__day - , DATE_TRUNC('week', revenue_src_10007.created_at) AS revenue_instance__ds__week - , DATE_TRUNC('month', revenue_src_10007.created_at) AS revenue_instance__ds__month - , DATE_TRUNC('quarter', revenue_src_10007.created_at) AS revenue_instance__ds__quarter - , DATE_TRUNC('year', revenue_src_10007.created_at) AS revenue_instance__ds__year - , EXTRACT(year FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_year - , EXTRACT(quarter FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_quarter - , EXTRACT(month FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_month - , EXTRACT(day FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_day - , EXTRACT(isodow FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_dow - , EXTRACT(doy FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_doy - , revenue_src_10007.user_id AS user - , revenue_src_10007.user_id AS revenue_instance__user - FROM ***************************.fct_revenue revenue_src_10007 - ) subq_0 - ) subq_1 - WHERE subq_1.revenue_instance__ds__day BETWEEN '2020-01-05' AND '2021-01-04' - ) subq_2 - ON - ( - subq_2.revenue_instance__ds__day <= subq_3.revenue_instance__ds__day - ) AND ( - subq_2.revenue_instance__ds__day > subq_3.revenue_instance__ds__day - INTERVAL 2 month - ) - ) subq_5 - ) subq_6 - WHERE subq_6.revenue_instance__ds__day BETWEEN '2020-03-05' AND '2021-01-04' - ) subq_7 + revenue_src_10007.revenue AS txn_revenue + , DATE_TRUNC('day', revenue_src_10007.created_at) AS ds__day + , DATE_TRUNC('week', revenue_src_10007.created_at) AS ds__week + , DATE_TRUNC('month', revenue_src_10007.created_at) AS ds__month + , DATE_TRUNC('quarter', revenue_src_10007.created_at) AS ds__quarter + , DATE_TRUNC('year', revenue_src_10007.created_at) AS ds__year + , EXTRACT(year FROM revenue_src_10007.created_at) AS ds__extract_year + , EXTRACT(quarter FROM revenue_src_10007.created_at) AS ds__extract_quarter + , EXTRACT(month FROM revenue_src_10007.created_at) AS ds__extract_month + , EXTRACT(day FROM revenue_src_10007.created_at) AS ds__extract_day + , EXTRACT(isodow FROM revenue_src_10007.created_at) AS ds__extract_dow + , EXTRACT(doy FROM revenue_src_10007.created_at) AS ds__extract_doy + , DATE_TRUNC('day', revenue_src_10007.created_at) AS revenue_instance__ds__day + , DATE_TRUNC('week', revenue_src_10007.created_at) AS revenue_instance__ds__week + , DATE_TRUNC('month', revenue_src_10007.created_at) AS revenue_instance__ds__month + , DATE_TRUNC('quarter', revenue_src_10007.created_at) AS revenue_instance__ds__quarter + , DATE_TRUNC('year', revenue_src_10007.created_at) AS revenue_instance__ds__year + , EXTRACT(year FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_year + , EXTRACT(quarter FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_quarter + , EXTRACT(month FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_month + , EXTRACT(day FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_day + , EXTRACT(isodow FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_dow + , EXTRACT(doy FROM revenue_src_10007.created_at) AS revenue_instance__ds__extract_doy + , revenue_src_10007.user_id AS user + , revenue_src_10007.user_id AS revenue_instance__user + FROM ***************************.fct_revenue revenue_src_10007 + ) subq_0 + ) subq_1 + ON + ( + subq_1.revenue_instance__ds__day <= subq_2.revenue_instance__ds__day + ) AND ( + subq_1.revenue_instance__ds__day > subq_2.revenue_instance__ds__day - INTERVAL 2 month + ) + ) subq_4 + ) subq_5 GROUP BY - subq_7.revenue_instance__ds__day -) subq_8 + subq_5.revenue_instance__ds__day +) subq_6 diff --git a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql index 09a0aa5184..80e5ab0f65 100644 --- a/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql +++ b/metricflow/test/snapshots/test_cumulative_metric_rendering.py/SqlQueryPlan/DuckDB/test_cumulative_metric_with_agg_time_dimension__plan0_optimized.sql @@ -1,34 +1,18 @@ -- Join Self Over Time Range -- Pass Only Elements: ['txn_revenue', 'revenue_instance__ds__day'] --- Constrain Time Range to [2020-03-05T00:00:00, 2021-01-04T00:00:00] -- Aggregate Measures -- Compute Metrics via Expressions SELECT - subq_12.revenue_instance__ds__day AS revenue_instance__ds__day - , SUM(subq_11.txn_revenue) AS trailing_2_months_revenue -FROM ( - -- Time Spine - SELECT - ds AS revenue_instance__ds__day - FROM ***************************.mf_time_spine subq_13 - WHERE ds BETWEEN '2020-03-05' AND '2021-01-04' -) subq_12 -INNER JOIN ( - -- Read Elements From Semantic Model 'revenue' - -- Metric Time Dimension 'ds' - -- Constrain Time Range to [2020-01-05T00:00:00, 2021-01-04T00:00:00] - SELECT - DATE_TRUNC('day', created_at) AS revenue_instance__ds__day - , revenue AS txn_revenue - FROM ***************************.fct_revenue revenue_src_10007 - WHERE DATE_TRUNC('day', created_at) BETWEEN '2020-01-05' AND '2021-01-04' -) subq_11 + subq_10.ds AS revenue_instance__ds__day + , SUM(revenue_src_10007.revenue) AS trailing_2_months_revenue +FROM ***************************.mf_time_spine subq_10 +INNER JOIN + ***************************.fct_revenue revenue_src_10007 ON ( - subq_11.revenue_instance__ds__day <= subq_12.revenue_instance__ds__day + DATE_TRUNC('day', revenue_src_10007.created_at) <= subq_10.ds ) AND ( - subq_11.revenue_instance__ds__day > subq_12.revenue_instance__ds__day - INTERVAL 2 month + DATE_TRUNC('day', revenue_src_10007.created_at) > subq_10.ds - INTERVAL 2 month ) -WHERE subq_12.revenue_instance__ds__day BETWEEN '2020-03-05' AND '2021-01-04' GROUP BY - subq_12.revenue_instance__ds__day + subq_10.ds From a24700e432a9135916047d2dacbe9eb1bb31c6ef Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 16:51:32 -0800 Subject: [PATCH 17/22] Add nested test case --- .../dataflow/builder/dataflow_plan_builder.py | 1 - .../integration/test_cases/itest_metrics.yaml | 35 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index e7d7e41bbe..6a2cfe6369 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -499,7 +499,6 @@ def _build_derived_metric_output_node( ) output_node: BaseOutput = ComputeMetricsNode(parent_node=parent_node, metric_specs=[metric_spec]) - # TODO: Write a test case for this scenario # For nested ratio / derived metrics with time offset, apply offset & where constraint after metric computation. if metric_spec.has_time_offset: queried_agg_time_dimension_specs = list(queried_linkable_specs.metric_time_specs) diff --git a/metricflow/test/integration/test_cases/itest_metrics.yaml b/metricflow/test/integration/test_cases/itest_metrics.yaml index b874fc104c..14cd01a667 100644 --- a/metricflow/test/integration/test_cases/itest_metrics.yaml +++ b/metricflow/test/integration/test_cases/itest_metrics.yaml @@ -1813,5 +1813,40 @@ integration_test: ON {{ render_date_trunc("c.ds", TimeGranularity.MONTH) }} = d.booking__ds__day ) b ON a.booking__ds__day = b.booking__ds__day +--- +integration_test: + name: nested_derived_metric_outer_offset_with_agg_time_dimension + description: Tests a nested derived metric where the outer metric has an input metric with offset_window, queried with agg_time_dimension. + model: SIMPLE_MODEL + metrics: ["bookings_offset_twice"] + group_by_objs: [{"name": "booking__ds__day"}] + check_query: | + SELECT + subq_9.ds AS booking__ds__day + , 2 * bookings_offset_once AS bookings_offset_twice + FROM {{ source_schema }}.mf_time_spine subq_9 + INNER JOIN ( + SELECT + booking__ds__day + , 2 * bookings AS bookings_offset_once + FROM ( + SELECT + subq_3.ds AS booking__ds__day + , SUM(subq_1.bookings) AS bookings + FROM {{ source_schema }}.mf_time_spine subq_3 + INNER JOIN ( + SELECT + {{ render_date_trunc("ds", TimeGranularity.DAY) }} AS booking__ds__day + , 1 AS bookings + FROM {{ source_schema }}.fct_bookings bookings_source_src_1 + ) subq_1 + ON + {{ render_date_sub("subq_3", "ds", 5, TimeGranularity.DAY) }} = subq_1.booking__ds__day + GROUP BY + subq_3.ds + ) subq_7 + ) subq_8 + ON + {{ render_date_sub("subq_9", "ds", 2, TimeGranularity.DAY) }} = subq_8.booking__ds__day # TODO: offset tests with granularity, date part, multiple metric time / agg time options # TODO: cumulative tests with multiple metric_time / agg time options From 2ff2eb1dface34183f356bc393c2ca9e3d912a06 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 17:08:16 -0800 Subject: [PATCH 18/22] Add another test case --- .../dataflow/builder/dataflow_plan_builder.py | 3 +-- metricflow/plan_conversion/dataflow_to_sql.py | 2 +- .../integration/test_cases/itest_metrics.yaml | 20 +++++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 6a2cfe6369..d5828048b5 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -1115,7 +1115,6 @@ def _build_input_measure_spec_for_base_metric( # there's no need to join to the time spine since all time will be aggregated. after_aggregation_time_spine_join_description = None if input_measure.join_to_timespine: - # TODO: Write a test case for this scenario query_contains_agg_time_dimension = queried_linkable_specs.contains_metric_time if not query_contains_agg_time_dimension: # TODO: should this be checking valid agg time dims for measure or metric? @@ -1468,7 +1467,7 @@ def _build_aggregated_measure_from_measure_source_node( ) return JoinToTimeSpineNode( parent_node=aggregate_measures_node, - requested_metric_time_dimension_specs=list(queried_linkable_specs.metric_time_specs), + requested_metric_time_dimension_specs=queried_agg_time_dimension_specs, join_type=after_aggregation_time_spine_join_description.join_type, time_range_constraint=time_range_constraint, offset_window=after_aggregation_time_spine_join_description.offset_window, diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 607a4e277d..83487278a1 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -1243,7 +1243,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet # TODO: rename requested_metric_time_dimension_specs -> requested_agg_time_dimension_specs assert ( len(node.requested_metric_time_dimension_specs) > 0 - ), "Must have at least one value in requested_metric_time_dimension_specs for JoinToTimeSpineNode." + ), "Must have at least one value in requested_metric_time_dimension_specs for JoinToTimeSpineNode." # Determine if the time spine join should use metric_time or the agg_time_dimension (metric_time takes priority). agg_time_dimension_for_join = node.requested_metric_time_dimension_specs[0] diff --git a/metricflow/test/integration/test_cases/itest_metrics.yaml b/metricflow/test/integration/test_cases/itest_metrics.yaml index 14cd01a667..7424477d05 100644 --- a/metricflow/test/integration/test_cases/itest_metrics.yaml +++ b/metricflow/test/integration/test_cases/itest_metrics.yaml @@ -1180,6 +1180,26 @@ integration_test: ) subq_3 ON subq_4.metric_time__month = subq_3.metric_time__month --- +integration_test: + name: simple_fill_nulls_with_0_agg_time_dim + description: Test a simple query that joins to time spine and fills nulls, with agg_time_dimension + model: SIMPLE_MODEL + metrics: ["bookings_fill_nulls_with_0"] + group_by_objs: [{"name": "booking__ds__day"}] + check_query: | + SELECT + subq_5.ds AS booking__ds__day + , COALESCE(subq_3.bookings, 0) AS bookings_fill_nulls_with_0 + FROM {{ source_schema }}.mf_time_spine subq_5 + LEFT OUTER JOIN ( + SELECT + {{ render_date_trunc("ds", TimeGranularity.DAY) }} AS booking__ds__day + , SUM(1) AS bookings + FROM {{ source_schema }}.fct_bookings bookings_source_src_1 + GROUP BY 1 + ) subq_3 + ON subq_5.ds = subq_3.booking__ds__day +--- integration_test: name: simple_fill_nulls_with_0_with_non_metric_time description: Test simple query that fills nulls but doesn't join to time spine (non-metric time dimension) From feae64d87d6dc4cd24f9e3391f059dd7969151ae Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 17:11:46 -0800 Subject: [PATCH 19/22] Rename param --- metricflow/dataflow/builder/dataflow_plan_builder.py | 4 +--- metricflow/dataflow/dataflow_plan.py | 8 ++++---- metricflow/plan_conversion/dataflow_to_sql.py | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index d5828048b5..1550fc5b87 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -1117,7 +1117,6 @@ def _build_input_measure_spec_for_base_metric( if input_measure.join_to_timespine: query_contains_agg_time_dimension = queried_linkable_specs.contains_metric_time if not query_contains_agg_time_dimension: - # TODO: should this be checking valid agg time dims for measure or metric? valid_agg_time_dimensions = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure( measure_spec.reference ) @@ -1338,8 +1337,7 @@ def _build_aggregated_measure_from_measure_source_node( )[0] time_range_node = JoinOverTimeRangeNode( parent_node=measure_recipe.source_node, - # TODO: rename param - metric_time_dimension_spec=agg_time_dimension_spec_for_join, + time_dimension_spec_for_join=agg_time_dimension_spec_for_join, window=cumulative_window, grain_to_date=cumulative_grain_to_date, time_range_constraint=time_range_constraint diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index db506b5bad..1cfb79b2d8 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -376,7 +376,7 @@ class JoinOverTimeRangeNode(BaseOutput): def __init__( self, parent_node: BaseOutput, - metric_time_dimension_spec: TimeDimensionSpec, + time_dimension_spec_for_join: TimeDimensionSpec, window: Optional[MetricTimeWindow], grain_to_date: Optional[TimeGranularity], node_id: Optional[NodeId] = None, @@ -391,7 +391,7 @@ def __init__( (eg month to day) node_id: Override the node ID with this value time_range_constraint: time range to aggregate over - metric_time_dimension_spec: time dimension spec to use when joining to time spine + time_dimension_spec_for_join: time dimension spec to use when joining to time spine """ if window and grain_to_date: raise RuntimeError( @@ -402,7 +402,7 @@ def __init__( self._grain_to_date = grain_to_date self._window = window self.time_range_constraint = time_range_constraint - self.metric_time_dimension_spec = metric_time_dimension_spec + self.time_dimension_spec_for_join = time_dimension_spec_for_join # Doing a list comprehension throws a type error, so doing it this way. parent_nodes: List[DataflowPlanNode] = [self._parent_node] @@ -450,7 +450,7 @@ def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> JoinOverTi window=self.window, grain_to_date=self.grain_to_date, time_range_constraint=self.time_range_constraint, - metric_time_dimension_spec=self.metric_time_dimension_spec, + time_dimension_spec_for_join=self.time_dimension_spec_for_join, ) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 83487278a1..76762e0aa6 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -284,7 +284,7 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat metric_time_dimension_instance: Optional[TimeDimensionInstance] = None for instance in input_data_set.instance_set.time_dimension_instances: - if instance.spec == node.metric_time_dimension_spec: + if instance.spec == node.time_dimension_spec_for_join: metric_time_dimension_instance = instance break From 930098efecfe957f96699cc0a277e0d01d31fe90 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 17:56:39 -0800 Subject: [PATCH 20/22] Rename param --- .../dataflow/builder/dataflow_plan_builder.py | 7 +++---- metricflow/dataflow/dataflow_plan.py | 16 ++++++++-------- metricflow/model/semantics/metric_lookup.py | 1 - metricflow/plan_conversion/dataflow_to_sql.py | 11 +++++------ .../plan_conversion/test_dataflow_to_sql_plan.py | 6 +++--- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 1550fc5b87..05d9b10801 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -514,7 +514,7 @@ def _build_derived_metric_output_node( ), "Joining to time spine requires querying with metric_time or the appropriate agg_time_dimension." output_node = JoinToTimeSpineNode( parent_node=output_node, - requested_metric_time_dimension_specs=queried_agg_time_dimension_specs, + requested_agg_time_dimension_specs=queried_agg_time_dimension_specs, time_range_constraint=time_range_constraint, offset_window=metric_spec.offset_window, offset_to_grain=metric_spec.offset_to_grain, @@ -1330,7 +1330,6 @@ def _build_aggregated_measure_from_measure_source_node( # Otherwise, the measure will be aggregated over all time. time_range_node: Optional[JoinOverTimeRangeNode] = None if cumulative and queried_agg_time_dimension_specs: - # TODO: will it be a problem if we get one with date part or diff granularity? Write test case to confirm # Use the time dimension spec with the smallest granularity. agg_time_dimension_spec_for_join = sorted( queried_agg_time_dimension_specs, key=lambda spec: spec.time_granularity.to_int() @@ -1359,7 +1358,7 @@ def _build_aggregated_measure_from_measure_source_node( ) join_to_time_spine_node = JoinToTimeSpineNode( parent_node=time_range_node or measure_recipe.source_node, - requested_metric_time_dimension_specs=queried_agg_time_dimension_specs, + requested_agg_time_dimension_specs=queried_agg_time_dimension_specs, time_range_constraint=time_range_constraint, offset_window=before_aggregation_time_spine_join_description.offset_window, offset_to_grain=before_aggregation_time_spine_join_description.offset_to_grain, @@ -1465,7 +1464,7 @@ def _build_aggregated_measure_from_measure_source_node( ) return JoinToTimeSpineNode( parent_node=aggregate_measures_node, - requested_metric_time_dimension_specs=queried_agg_time_dimension_specs, + requested_agg_time_dimension_specs=queried_agg_time_dimension_specs, join_type=after_aggregation_time_spine_join_description.join_type, time_range_constraint=time_range_constraint, offset_window=after_aggregation_time_spine_join_description.offset_window, diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 1cfb79b2d8..3867a1b131 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -672,7 +672,7 @@ class JoinToTimeSpineNode(BaseOutput, ABC): def __init__( self, parent_node: BaseOutput, - requested_metric_time_dimension_specs: List[TimeDimensionSpec], + requested_agg_time_dimension_specs: List[TimeDimensionSpec], join_type: SqlJoinType, time_range_constraint: Optional[TimeRangeConstraint] = None, offset_window: Optional[MetricTimeWindow] = None, @@ -682,7 +682,7 @@ def __init__( Args: parent_node: Node that returns desired dataset to join to time spine. - requested_metric_time_dimension_specs: Time dimensions requested in query. Used to determine granularities. + requested_agg_time_dimension_specs: Time dimensions requested in query. Used to determine granularities. time_range_constraint: Time range to constrain the time spine to. offset_window: Time window to offset the parent dataset by when joining to time spine. offset_to_grain: Granularity period to offset the parent dataset to when joining to time spine. @@ -693,7 +693,7 @@ def __init__( offset_window and offset_to_grain ), "Can't set both offset_window and offset_to_grain when joining to time spine. Choose one or the other." self._parent_node = parent_node - self._requested_metric_time_dimension_specs = requested_metric_time_dimension_specs + self._requested_agg_time_dimension_specs = requested_agg_time_dimension_specs self._offset_window = offset_window self._offset_to_grain = offset_to_grain self._time_range_constraint = time_range_constraint @@ -706,9 +706,9 @@ def id_prefix(cls) -> str: # noqa: D return DATAFLOW_NODE_JOIN_TO_TIME_SPINE_ID_PREFIX @property - def requested_metric_time_dimension_specs(self) -> List[TimeDimensionSpec]: + def requested_agg_time_dimension_specs(self) -> List[TimeDimensionSpec]: """Time dimension specs to use when creating time spine table.""" - return self._requested_metric_time_dimension_specs + return self._requested_agg_time_dimension_specs @property def time_range_constraint(self) -> Optional[TimeRangeConstraint]: @@ -740,7 +740,7 @@ def description(self) -> str: # noqa: D @property def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D return super().displayed_properties + [ - DisplayedProperty("requested_metric_time_dimension_specs", self._requested_metric_time_dimension_specs), + DisplayedProperty("requested_agg_time_dimension_specs", self._requested_agg_time_dimension_specs), DisplayedProperty("time_range_constraint", self._time_range_constraint), DisplayedProperty("offset_window", self._offset_window), DisplayedProperty("offset_to_grain", self._offset_to_grain), @@ -757,7 +757,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.time_range_constraint == self.time_range_constraint and other_node.offset_window == self.offset_window and other_node.offset_to_grain == self.offset_to_grain - and other_node.requested_metric_time_dimension_specs == self.requested_metric_time_dimension_specs + and other_node.requested_agg_time_dimension_specs == self.requested_agg_time_dimension_specs and other_node.join_type == self.join_type ) @@ -765,7 +765,7 @@ def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> JoinToTime assert len(new_parent_nodes) == 1 return JoinToTimeSpineNode( parent_node=new_parent_nodes[0], - requested_metric_time_dimension_specs=self.requested_metric_time_dimension_specs, + requested_agg_time_dimension_specs=self.requested_agg_time_dimension_specs, time_range_constraint=self.time_range_constraint, offset_window=self.offset_window, offset_to_grain=self.offset_to_grain, diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index 4a57090197..f57b5206a1 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -188,7 +188,6 @@ def get_valid_agg_time_dimensions_for_metric( return [] path_key = agg_time_dimension_element_path_keys[0] - # TODO: do we need all these? Or just the one valid granularity? Depends what's allowed for time_offset valid_agg_time_dimension_specs = TimeDimensionSpec.generate_possible_specs_for_time_dimension( time_dimension_reference=TimeDimensionReference(element_name=path_key.element_name), entity_links=path_key.entity_links, diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 76762e0aa6..9b572a4f77 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -1240,14 +1240,13 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet parent_data_set = node.parent_node.accept(self) parent_alias = self._next_unique_table_alias() - # TODO: rename requested_metric_time_dimension_specs -> requested_agg_time_dimension_specs assert ( - len(node.requested_metric_time_dimension_specs) > 0 - ), "Must have at least one value in requested_metric_time_dimension_specs for JoinToTimeSpineNode." + len(node.requested_agg_time_dimension_specs) > 0 + ), "Must have at least one value in requested_agg_time_dimension_specs for JoinToTimeSpineNode." # Determine if the time spine join should use metric_time or the agg_time_dimension (metric_time takes priority). - agg_time_dimension_for_join = node.requested_metric_time_dimension_specs[0] - for spec in node.requested_metric_time_dimension_specs[1:]: + agg_time_dimension_for_join = node.requested_agg_time_dimension_specs[0] + for spec in node.requested_agg_time_dimension_specs[1:]: if spec.element_name == METRIC_TIME_ELEMENT_NAME: agg_time_dimension_for_join = spec break @@ -1327,7 +1326,7 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet time_spine_select_columns = [] time_spine_dim_instances = [] where: Optional[SqlExpressionNode] = None - for requested_time_dimension_spec in node.requested_metric_time_dimension_specs: + for requested_time_dimension_spec in node.requested_agg_time_dimension_specs: # Apply granularity to time spine column select expression. if requested_time_dimension_spec.time_granularity == time_spine_dim_instance.spec.time_granularity: select_expr: SqlExpressionNode = time_spine_column_select_expr diff --git a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py index 4c8d80ac6b..58c983b2c7 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py @@ -548,7 +548,7 @@ def test_join_to_time_spine_node_without_offset( # noqa: D compute_metrics_node = ComputeMetricsNode(parent_node=aggregated_measures_node, metric_specs=[metric_spec]) join_to_time_spine_node = JoinToTimeSpineNode( parent_node=compute_metrics_node, - requested_metric_time_dimension_specs=[MTD_SPEC_DAY], + requested_agg_time_dimension_specs=[MTD_SPEC_DAY], time_range_constraint=TimeRangeConstraint( start_time=as_datetime("2020-01-01"), end_time=as_datetime("2021-01-01") ), @@ -612,7 +612,7 @@ def test_join_to_time_spine_node_with_offset_window( # noqa: D compute_metrics_node = ComputeMetricsNode(parent_node=aggregated_measures_node, metric_specs=[metric_spec]) join_to_time_spine_node = JoinToTimeSpineNode( parent_node=compute_metrics_node, - requested_metric_time_dimension_specs=[MTD_SPEC_DAY], + requested_agg_time_dimension_specs=[MTD_SPEC_DAY], time_range_constraint=TimeRangeConstraint( start_time=as_datetime("2020-01-01"), end_time=as_datetime("2021-01-01") ), @@ -678,7 +678,7 @@ def test_join_to_time_spine_node_with_offset_to_grain( compute_metrics_node = ComputeMetricsNode(parent_node=aggregated_measures_node, metric_specs=[metric_spec]) join_to_time_spine_node = JoinToTimeSpineNode( parent_node=compute_metrics_node, - requested_metric_time_dimension_specs=[MTD_SPEC_DAY], + requested_agg_time_dimension_specs=[MTD_SPEC_DAY], time_range_constraint=TimeRangeConstraint( start_time=as_datetime("2020-01-01"), end_time=as_datetime("2021-01-01") ), From ff28ecbae34e315166ee8746fe6111b8de2482c9 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 18:18:16 -0800 Subject: [PATCH 21/22] Updates --- metricflow/specs/specs.py | 7 ------ .../integration/test_cases/itest_metrics.yaml | 23 +++++++++++++++++-- ..._derived_metric_offset_to_grain__dfp_0.xml | 2 +- ...st_derived_metric_offset_window__dfp_0.xml | 2 +- ..._metric_offset_with_granularity__dfp_0.xml | 2 +- ...erived_offset_cumulative_metric__dfp_0.xml | 2 +- ...in_to_time_spine_derived_metric__dfp_0.xml | 6 ++--- ..._to_time_spine_with_metric_time__dfp_0.xml | 2 +- ...erived_metric_with_outer_offset__dfp_0.xml | 4 ++-- ...spine_node_with_offset_to_grain__plan0.xml | 2 +- ...e_spine_node_with_offset_window__plan0.xml | 2 +- ..._time_spine_node_without_offset__plan0.xml | 2 +- 12 files changed, 34 insertions(+), 22 deletions(-) diff --git a/metricflow/specs/specs.py b/metricflow/specs/specs.py index ec44c556cf..0705fb0a60 100644 --- a/metricflow/specs/specs.py +++ b/metricflow/specs/specs.py @@ -669,13 +669,6 @@ def metric_time_specs(self) -> Sequence[TimeDimensionSpec]: if time_dimension_spec.is_metric_time ) - # TODO: what about date part? not allowed for cumulative, right? - @property - def metric_time_spec_with_smallest_granularity(self) -> Optional[TimeDimensionSpec]: - """Get the metric time spec with the smallest granularity, if there are any metric time specs.""" - sorted_specs = sorted(self.metric_time_specs, key=lambda x: x.time_granularity) - return sorted_specs[0] if sorted_specs else None - @property def as_tuple(self) -> Tuple[LinkableInstanceSpec, ...]: # noqa: D return tuple(itertools.chain(self.dimension_specs, self.time_dimension_specs, self.entity_specs)) diff --git a/metricflow/test/integration/test_cases/itest_metrics.yaml b/metricflow/test/integration/test_cases/itest_metrics.yaml index 7424477d05..29760a7fbc 100644 --- a/metricflow/test/integration/test_cases/itest_metrics.yaml +++ b/metricflow/test/integration/test_cases/itest_metrics.yaml @@ -1868,5 +1868,24 @@ integration_test: ) subq_8 ON {{ render_date_sub("subq_9", "ds", 2, TimeGranularity.DAY) }} = subq_8.booking__ds__day -# TODO: offset tests with granularity, date part, multiple metric time / agg time options -# TODO: cumulative tests with multiple metric_time / agg time options +--- +integration_test: + name: offset_metric_with_agg_time_dim_date_part + description: Tests a derived metric query with an offset and agg_time_dimension with date part. + model: SIMPLE_MODEL + metrics: ["bookings_5_day_lag"] + group_by_objs: [{"name": "booking__ds", "date_part": "doy"}] + check_query: | + SELECT + {{ render_extract("a.ds", DatePart.DOY) }} AS booking__ds__extract_doy + , b.bookings_5_day_lag + FROM {{ mf_time_spine_source }} a + INNER JOIN ( + SELECT + ds + , SUM(1) AS bookings_5_day_lag + FROM {{ source_schema }}.fct_bookings + GROUP BY + ds + ) b + ON {{ render_date_sub("a", "ds", 5, TimeGranularity.DAY) }} = b.ds diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml index 8129aca791..e86eb5a59d 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml @@ -60,7 +60,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml index 3c35caec5f..b5a332709f 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml @@ -30,7 +30,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml index ed5c63701e..841524a925 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml @@ -30,7 +30,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml index ffd5457f7a..a6d584a4d8 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml @@ -30,7 +30,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml index 3cf58ceada..d43e414776 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml @@ -18,7 +18,7 @@ - + @@ -63,7 +63,7 @@ - + @@ -83,7 +83,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_metric_time__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_metric_time__dfp_0.xml index 7edb223370..ab704033a4 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_metric_time__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_metric_time__dfp_0.xml @@ -10,7 +10,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_nested_derived_metric_with_outer_offset__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_nested_derived_metric_with_outer_offset__dfp_0.xml index b8a78ed26c..32fd5e1e64 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_nested_derived_metric_with_outer_offset__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_nested_derived_metric_with_outer_offset__dfp_0.xml @@ -10,7 +10,7 @@ - + @@ -40,7 +40,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml index f83b57688b..9a5d81695e 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml @@ -5,7 +5,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml index 75794f74d9..e951eebd3f 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml @@ -5,7 +5,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml index 9bf1e08fa3..19602e3e91 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml @@ -5,7 +5,7 @@ - + From 86fa33ad601119a4b30f818093f3c9a09c823b69 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 24 Jan 2024 18:20:24 -0800 Subject: [PATCH 22/22] Cleanup --- metricflow/model/semantics/metric_lookup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index f57b5206a1..44a7bd3280 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -167,9 +167,6 @@ def _get_agg_time_dimension_path_keys_for_metric( ) -> Sequence[ElementPathKey]: """Retrieves the aggregate time dimensions associated with the metric's measures.""" metric = self.get_metric(metric_reference) - # This should get hit on offset metric, right? - assert metric.input_measures, f"No input measures found for metric {metric_reference}" - path_keys = set() for input_measure in metric.input_measures: path_key = self._semantic_model_lookup.get_agg_time_dimension_path_key_for_measure(