Skip to content

Commit

Permalink
Merge pull request #857 from dbt-labs/fix-metric-time-requirement-val…
Browse files Browse the repository at this point in the history
…idation

Fix query validation for metric_time requirements
  • Loading branch information
tlento authored Nov 8, 2023
2 parents b812e7a + 362d934 commit 1410c46
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 4 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231108-150708.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix query validation for metric_time requirements
time: 2023-11-08T15:07:08.681102-08:00
custom:
Author: tlento
Issue: "825"
14 changes: 10 additions & 4 deletions metricflow/query/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,13 @@ def parse_and_validate_query(
finally:
logger.info(f"Parsing the query took: {time.time() - start_time:.2f}s")

def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricReference]) -> None:
"""Validate if all requested metrics are queryable without a time dimension."""
def _validate_no_metric_time_dimension_query(
self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec]
) -> None:
"""Validate if all requested metrics are queryable without grouping by metric_time."""
if any([spec.reference == DataSet.metric_time_dimension_reference() for spec in time_dimension_specs]):
return

for metric_reference in metric_references:
metric = self._metric_lookup.get_metric(metric_reference)
if metric.type == MetricType.CUMULATIVE:
Expand Down Expand Up @@ -471,8 +476,9 @@ def _parse_and_validate_query(
time_dimension_specs = requested_linkable_specs.time_dimension_specs + tuple(
time_dimension_spec for _, time_dimension_spec in partial_time_dimension_spec_replacements.items()
)
if len(time_dimension_specs) == 0:
self._validate_no_time_dimension_query(metric_references=metric_references)
self._validate_no_metric_time_dimension_query(
metric_references=metric_references, time_dimension_specs=time_dimension_specs
)

self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs)
self._validate_date_part(metric_references, time_dimension_specs)
Expand Down
74 changes: 74 additions & 0 deletions metricflow/test/query/test_query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@
expr: created_at
type_params:
time_granularity: month
- name: loaded_at
type: time
expr: created_at
type_params:
time_granularity: day
- name: country
type: categorical
expr: country
Expand Down Expand Up @@ -412,6 +417,75 @@ def test_parse_and_validate_metric_constraint_dims() -> None:
)


def test_cumulative_metric_no_time_dimension_validation() -> None:
"""Test that queries for cumulative metrics fail if no time dimensions are selected.
This is a test of validation enforcement to ensure users don't get incorrect results due to current
limitations, and should be deleted or updated when this restriction is lifted.
"""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
query_parser = query_parser_from_yaml(
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
)


def test_cumulative_metric_wrong_time_dimension_validation() -> None:
"""Test that queries for cumulative metrics fail if the agg_time_dimension is not selected.
Our current behavior for cases where a different time dimension is selected by the agg_time_dimension is
not is undefined. Until we add support for grouping by a different time dimension for a cumulative metric
computed against metric_time, overriding the agg_time_dimension at query time, or both, this query is
restricted.
This is a test of validation enforcement to ensure users don't get incorrect results due to current
limitations, and should be deleted or updated when this restriction is lifted.
"""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
query_parser = query_parser_from_yaml(
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
group_by_names=["company__loaded_at"],
)


def test_cumulative_metric_agg_time_dimension_name_validation() -> None:
"""Test that queries for cumulative metrics fail if the agg_time_dimension is selected by name.
Currently, cumulative metrics only return correct results if the query includes the `metric_time` virtual
dimension. In many cases the underlying agg_time_dimension is a single column and users will use it
directly instead of requesting metric_time. While shis should be fine, we cannot allow it until we fix
the query rendering issues that prevent this from working correctly.
This is a test of validation enforcement to ensure users don't get incorrect results due to current
limitations, and should be deleted or updated when this restriction is lifted.
"""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
query_parser = query_parser_from_yaml(
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

with pytest.raises(UnableToSatisfyQueryError, match="must be queried with the dimension 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
group_by_names=["company__ds"],
)


def test_derived_metric_query_parsing() -> None:
"""Test derived metric inputs are properly validated."""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
Expand Down

0 comments on commit 1410c46

Please sign in to comment.