From 46f9943b65695a761b50af2b87d1164b01fd4b24 Mon Sep 17 00:00:00 2001
From: tlento <tom@transformdata.io>
Date: Wed, 8 Nov 2023 14:22:39 -0800
Subject: [PATCH] Fix query validation for metric_time requirements

As of right now we require metric_time in the group by expression
for any metric query against cumulative metrics or derived metrics
with time offset windows or an offset_to_grain parameter set.

The current query validation was only asserting that some time
dimension was included in the group by. In the case of derived
metrics, this simply failed later in the query building phase for
any query with a different time dimension in the group by expression.

In the case of cumulative metrics, this would run an incorrectly
rendered query and then return incorrect results.

This validation change ensures that users have the proper
configuration on queries based on our current limitations.
---
 metricflow/query/query_parser.py           | 14 ++--
 metricflow/test/query/test_query_parser.py | 74 ++++++++++++++++++++++
 2 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py
index f302a48365..2fd3036e3f 100644
--- a/metricflow/query/query_parser.py
+++ b/metricflow/query/query_parser.py
@@ -270,8 +270,13 @@ def parse_and_validate_query(
         finally:
             logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s")
 
-    def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricReference]) -> None:
-        """Validate if all requested metrics are queryable without a time dimension."""
+    def _validate_no_metric_time_dimension_query(
+        self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec]
+    ) -> None:
+        """Validate if all requested metrics are queryable without grouping by metric_time."""
+        if any([spec.reference == DataSet.metric_time_dimension_reference() for spec in time_dimension_specs]):
+            return
+
         for metric_reference in metric_references:
             metric = self._metric_lookup.get_metric(metric_reference)
             if metric.type == MetricType.CUMULATIVE:
@@ -471,8 +476,9 @@ def _parse_and_validate_query(
         time_dimension_specs = requested_linkable_specs.time_dimension_specs + tuple(
             time_dimension_spec for _, time_dimension_spec in partial_time_dimension_spec_replacements.items()
         )
-        if len(time_dimension_specs) == 0:
-            self._validate_no_time_dimension_query(metric_references=metric_references)
+        self._validate_no_metric_time_dimension_query(
+            metric_references=metric_references, time_dimension_specs=time_dimension_specs
+        )
 
         self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs)
         self._validate_date_part(metric_references, time_dimension_specs)
diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py
index 03e5ab9ccb..a340f0dc8e 100644
--- a/metricflow/test/query/test_query_parser.py
+++ b/metricflow/test/query/test_query_parser.py
@@ -97,6 +97,11 @@
           expr: created_at
           type_params:
             time_granularity: month
+        - name: loaded_at
+          type: time
+          expr: created_at
+          type_params:
+            time_granularity: day
         - name: country
           type: categorical
           expr: country
@@ -412,6 +417,75 @@ def test_parse_and_validate_metric_constraint_dims() -> None:
         )
 
 
+def test_cumulative_metric_no_time_dimension_validation() -> None:
+    """Test that queries for cumulative metrics fail if no time dimensions are selected.
+
+    This is a test of validation enforcement to ensure users don't get incorrect results due to current
+    limitations, and should be deleted or updated when this restriction is lifted.
+    """
+    bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
+    metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
+    revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
+    query_parser = query_parser_from_yaml(
+        [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
+    )
+
+    with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
+        query_parser.parse_and_validate_query(
+            metric_names=["revenue_cumulative"],
+        )
+
+
+def test_cumulative_metric_wrong_time_dimension_validation() -> None:
+    """Test that queries for cumulative metrics fail if the agg_time_dimension is not selected.
+
+    Our current behavior for cases where a different time dimension is selected by the agg_time_dimension is
+    not is undefined. Until we add support for grouping by a different time dimension for a cumulative metric
+    computed against metric_time, overriding the agg_time_dimension at query time, or both, this query is
+    restricted.
+
+    This is a test of validation enforcement to ensure users don't get incorrect results due to current
+    limitations, and should be deleted or updated when this restriction is lifted.
+    """
+    bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
+    metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
+    revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
+    query_parser = query_parser_from_yaml(
+        [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
+    )
+
+    with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
+        query_parser.parse_and_validate_query(
+            metric_names=["revenue_cumulative"],
+            group_by_names=["company__loaded_at"],
+        )
+
+
+def test_cumulative_metric_agg_time_dimension_name_validation() -> None:
+    """Test that queries for cumulative metrics fail if the agg_time_dimension is selected by name.
+
+    Currently, cumulative metrics only return correct results if the query includes the `metric_time` virtual
+    dimension. In many cases the underlying agg_time_dimension is a single column and users will use it
+    directly instead of requesting metric_time. While shis should be fine, we cannot allow it until we fix
+    the query rendering issues that prevent this from working correctly.
+
+    This is a test of validation enforcement to ensure users don't get incorrect results due to current
+    limitations, and should be deleted or updated when this restriction is lifted.
+    """
+    bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
+    metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
+    revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
+    query_parser = query_parser_from_yaml(
+        [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
+    )
+
+    with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
+        query_parser.parse_and_validate_query(
+            metric_names=["revenue_cumulative"],
+            group_by_names=["company__ds"],
+        )
+
+
 def test_derived_metric_query_parsing() -> None:
     """Test derived metric inputs are properly validated."""
     bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)