diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index f302a48365..2fd3036e3f 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -270,8 +270,13 @@ def parse_and_validate_query( finally: logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s") - def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricReference]) -> None: - """Validate if all requested metrics are queryable without a time dimension.""" + def _validate_no_metric_time_dimension_query( + self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec] + ) -> None: + """Validate if all requested metrics are queryable without grouping by metric_time.""" + if any([spec.reference == DataSet.metric_time_dimension_reference() for spec in time_dimension_specs]): + return + for metric_reference in metric_references: metric = self._metric_lookup.get_metric(metric_reference) if metric.type == MetricType.CUMULATIVE: @@ -471,8 +476,9 @@ def _parse_and_validate_query( time_dimension_specs = requested_linkable_specs.time_dimension_specs + tuple( time_dimension_spec for _, time_dimension_spec in partial_time_dimension_spec_replacements.items() ) - if len(time_dimension_specs) == 0: - self._validate_no_time_dimension_query(metric_references=metric_references) + self._validate_no_metric_time_dimension_query( + metric_references=metric_references, time_dimension_specs=time_dimension_specs + ) self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs) self._validate_date_part(metric_references, time_dimension_specs) diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index 03e5ab9ccb..a340f0dc8e 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -97,6 +97,11 @@ expr: created_at type_params: time_granularity: month + - name: loaded_at + type: time + expr: created_at + type_params: + time_granularity: day - name: country type: categorical expr: country @@ -412,6 +417,75 @@ def test_parse_and_validate_metric_constraint_dims() -> None: ) +def test_cumulative_metric_no_time_dimension_validation() -> None: + """Test that queries for cumulative metrics fail if no time dimensions are selected. + + This is a test of validation enforcement to ensure users don't get incorrect results due to current + limitations, and should be deleted or updated when this restriction is lifted. + """ + bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML) + metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML) + revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML) + query_parser = query_parser_from_yaml( + [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file] + ) + + with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"): + query_parser.parse_and_validate_query( + metric_names=["revenue_cumulative"], + ) + + +def test_cumulative_metric_wrong_time_dimension_validation() -> None: + """Test that queries for cumulative metrics fail if the agg_time_dimension is not selected. + + Our current behavior for cases where a different time dimension is selected by the agg_time_dimension is + not is undefined. Until we add support for grouping by a different time dimension for a cumulative metric + computed against metric_time, overriding the agg_time_dimension at query time, or both, this query is + restricted. + + This is a test of validation enforcement to ensure users don't get incorrect results due to current + limitations, and should be deleted or updated when this restriction is lifted. + """ + bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML) + metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML) + revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML) + query_parser = query_parser_from_yaml( + [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file] + ) + + with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"): + query_parser.parse_and_validate_query( + metric_names=["revenue_cumulative"], + group_by_names=["company__loaded_at"], + ) + + +def test_cumulative_metric_agg_time_dimension_name_validation() -> None: + """Test that queries for cumulative metrics fail if the agg_time_dimension is selected by name. + + Currently, cumulative metrics only return correct results if the query includes the `metric_time` virtual + dimension. In many cases the underlying agg_time_dimension is a single column and users will use it + directly instead of requesting metric_time. While shis should be fine, we cannot allow it until we fix + the query rendering issues that prevent this from working correctly. + + This is a test of validation enforcement to ensure users don't get incorrect results due to current + limitations, and should be deleted or updated when this restriction is lifted. + """ + bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML) + metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML) + revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML) + query_parser = query_parser_from_yaml( + [EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file] + ) + + with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"): + query_parser.parse_and_validate_query( + metric_names=["revenue_cumulative"], + group_by_names=["company__ds"], + ) + + def test_derived_metric_query_parsing() -> None: """Test derived metric inputs are properly validated.""" bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)