Fix query validation for metric_time requirements

As of right now we require metric_time in the group by expression for any metric query against cumulative metrics or derived metrics with time offset windows or an offset_to_grain parameter set. The current query validation was only asserting that some time dimension was included in the group by. In the case of derived metrics, this simply failed later in the query building phase for any query with a different time dimension in the group by expression. In the case of cumulative metrics, this would run an incorrectly rendered query and then return incorrect results. This validation change ensures that users have the proper configuration on queries based on our current limitations.
dbt-labs · Nov 8, 2023 · 46f9943 · 46f9943
1 parent 5e6ea1f
commit 46f9943
Show file tree

Hide file tree

Showing 2 changed files with 84 additions and 4 deletions.
diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py
@@ -270,8 +270,13 @@ def parse_and_validate_query(
         finally:
             logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s")
 
-    def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricReference]) -> None:
-        """Validate if all requested metrics are queryable without a time dimension."""
+    def _validate_no_metric_time_dimension_query(
+        self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec]
+    ) -> None:
+        """Validate if all requested metrics are queryable without grouping by metric_time."""
+        if any([spec.reference == DataSet.metric_time_dimension_reference() for spec in time_dimension_specs]):
+            return
+
         for metric_reference in metric_references:
             metric = self._metric_lookup.get_metric(metric_reference)
             if metric.type == MetricType.CUMULATIVE:
@@ -471,8 +476,9 @@ def _parse_and_validate_query(
         time_dimension_specs = requested_linkable_specs.time_dimension_specs + tuple(
             time_dimension_spec for _, time_dimension_spec in partial_time_dimension_spec_replacements.items()
         )
-        if len(time_dimension_specs) == 0:
-            self._validate_no_time_dimension_query(metric_references=metric_references)
+        self._validate_no_metric_time_dimension_query(
+            metric_references=metric_references, time_dimension_specs=time_dimension_specs
+        )
 
         self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs)
         self._validate_date_part(metric_references, time_dimension_specs)

diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py
@@ -97,6 +97,11 @@
           expr: created_at
           type_params:
             time_granularity: month
+        - name: loaded_at
+          type: time
+          expr: created_at
+          type_params:
+            time_granularity: day
         - name: country
           type: categorical
           expr: country
@@ -412,6 +417,75 @@ def test_parse_and_validate_metric_constraint_dims() -> None:
         )
 
 
+def test_cumulative_metric_no_time_dimension_validation() -> None:
+    """Test that queries for cumulative metrics fail if no time dimensions are selected.
+
+    This is a test of validation enforcement to ensure users don't get incorrect results due to current
+    limitations, and should be deleted or updated when this restriction is lifted.
+    """
+    bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
+    metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
+    revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
+    query_parser = query_parser_from_yaml(
+        [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
+    )
+
+    with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
+        query_parser.parse_and_validate_query(
+            metric_names=["revenue_cumulative"],
+        )
+
+
+def test_cumulative_metric_wrong_time_dimension_validation() -> None:
+    """Test that queries for cumulative metrics fail if the agg_time_dimension is not selected.
+
+    Our current behavior for cases where a different time dimension is selected by the agg_time_dimension is
+    not is undefined. Until we add support for grouping by a different time dimension for a cumulative metric
+    computed against metric_time, overriding the agg_time_dimension at query time, or both, this query is
+    restricted.
+
+    This is a test of validation enforcement to ensure users don't get incorrect results due to current
+    limitations, and should be deleted or updated when this restriction is lifted.
+    """
+    bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
+    metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
+    revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
+    query_parser = query_parser_from_yaml(
+        [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
+    )
+
+    with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
+        query_parser.parse_and_validate_query(
+            metric_names=["revenue_cumulative"],
+            group_by_names=["company__loaded_at"],
+        )
+
+
+def test_cumulative_metric_agg_time_dimension_name_validation() -> None:
+    """Test that queries for cumulative metrics fail if the agg_time_dimension is selected by name.
+
+    Currently, cumulative metrics only return correct results if the query includes the `metric_time` virtual
+    dimension. In many cases the underlying agg_time_dimension is a single column and users will use it
+    directly instead of requesting metric_time. While shis should be fine, we cannot allow it until we fix
+    the query rendering issues that prevent this from working correctly.
+
+    This is a test of validation enforcement to ensure users don't get incorrect results due to current
+    limitations, and should be deleted or updated when this restriction is lifted.
+    """
+    bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
+    metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
+    revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
+    query_parser = query_parser_from_yaml(
+        [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
+    )
+
+    with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
+        query_parser.parse_and_validate_query(
+            metric_names=["revenue_cumulative"],
+            group_by_names=["company__ds"],
+        )
+
+
 def test_derived_metric_query_parsing() -> None:
     """Test derived metric inputs are properly validated."""
     bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)