Skip to content

Commit

Permalink
Fix query validation for metric_time requirements
Browse files Browse the repository at this point in the history
As of right now we require metric_time in the group by expression
for any metric query against cumulative metrics or derived metrics
with time offset windows or an offset_to_grain parameter set.

The current query validation was only asserting that some time
dimension was included in the group by. In the case of derived
metrics, this simply failed later in the query building phase for
any query with a different time dimension in the group by expression.

In the case of cumulative metrics, this would run an incorrectly
rendered query and then return incorrect results.

This validation change ensures that users have the proper
configuration on queries based on our current limitations.
  • Loading branch information
tlento committed Nov 8, 2023
1 parent 5e6ea1f commit 46f9943
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 4 deletions.
14 changes: 10 additions & 4 deletions metricflow/query/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,13 @@ def parse_and_validate_query(
finally:
logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s")

def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricReference]) -> None:
"""Validate if all requested metrics are queryable without a time dimension."""
def _validate_no_metric_time_dimension_query(
self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec]
) -> None:
"""Validate if all requested metrics are queryable without grouping by metric_time."""
if any([spec.reference == DataSet.metric_time_dimension_reference() for spec in time_dimension_specs]):
return

for metric_reference in metric_references:
metric = self._metric_lookup.get_metric(metric_reference)
if metric.type == MetricType.CUMULATIVE:
Expand Down Expand Up @@ -471,8 +476,9 @@ def _parse_and_validate_query(
time_dimension_specs = requested_linkable_specs.time_dimension_specs + tuple(
time_dimension_spec for _, time_dimension_spec in partial_time_dimension_spec_replacements.items()
)
if len(time_dimension_specs) == 0:
self._validate_no_time_dimension_query(metric_references=metric_references)
self._validate_no_metric_time_dimension_query(
metric_references=metric_references, time_dimension_specs=time_dimension_specs
)

self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs)
self._validate_date_part(metric_references, time_dimension_specs)
Expand Down
74 changes: 74 additions & 0 deletions metricflow/test/query/test_query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@
expr: created_at
type_params:
time_granularity: month
- name: loaded_at
type: time
expr: created_at
type_params:
time_granularity: day
- name: country
type: categorical
expr: country
Expand Down Expand Up @@ -412,6 +417,75 @@ def test_parse_and_validate_metric_constraint_dims() -> None:
)


def test_cumulative_metric_no_time_dimension_validation() -> None:
"""Test that queries for cumulative metrics fail if no time dimensions are selected.
This is a test of validation enforcement to ensure users don't get incorrect results due to current
limitations, and should be deleted or updated when this restriction is lifted.
"""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
query_parser = query_parser_from_yaml(
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
)


def test_cumulative_metric_wrong_time_dimension_validation() -> None:
"""Test that queries for cumulative metrics fail if the agg_time_dimension is not selected.
Our current behavior for cases where a different time dimension is selected by the agg_time_dimension is
not is undefined. Until we add support for grouping by a different time dimension for a cumulative metric
computed against metric_time, overriding the agg_time_dimension at query time, or both, this query is
restricted.
This is a test of validation enforcement to ensure users don't get incorrect results due to current
limitations, and should be deleted or updated when this restriction is lifted.
"""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
query_parser = query_parser_from_yaml(
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
group_by_names=["company__loaded_at"],
)


def test_cumulative_metric_agg_time_dimension_name_validation() -> None:
"""Test that queries for cumulative metrics fail if the agg_time_dimension is selected by name.
Currently, cumulative metrics only return correct results if the query includes the `metric_time` virtual
dimension. In many cases the underlying agg_time_dimension is a single column and users will use it
directly instead of requesting metric_time. While shis should be fine, we cannot allow it until we fix
the query rendering issues that prevent this from working correctly.
This is a test of validation enforcement to ensure users don't get incorrect results due to current
limitations, and should be deleted or updated when this restriction is lifted.
"""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
query_parser = query_parser_from_yaml(
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
group_by_names=["company__ds"],
)


def test_derived_metric_query_parsing() -> None:
"""Test derived metric inputs are properly validated."""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
Expand Down

0 comments on commit 46f9943

Please sign in to comment.