Skip to content

Commit

Permalink
Change MetricFlowQueryParser.parse_and_validate_query() to return `…
Browse files Browse the repository at this point in the history
…ParseQueryResult` (#1202)

### Description

Similar to the parser update for saved queries, this PR updates
`MetricFlowQueryParser.parse_and_validate_query()` to return a result
object so that more context can be returned to the caller.

Note: `.query_spec` was added to get tests to pass in this PR. In a
later PR, snapshots are used to check the parser results, so those go
away.
<!--- 
  Before requesting review, please make sure you have:
1. read [the contributing
guide](https://github.com/dbt-labs/metricflow/blob/main/CONTRIBUTING.md),
2. signed the
[CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
3. run `changie new` to [create a changelog
entry](https://github.com/dbt-labs/metricflow/blob/main/CONTRIBUTING.md#adding-a-changelog-entry)
-->
  • Loading branch information
plypaul authored May 15, 2024
1 parent 8a4d342 commit a55c402
Show file tree
Hide file tree
Showing 15 changed files with 104 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def parse_and_validate_query(
order_by_names: Optional[Sequence[str]] = None,
order_by: Optional[Sequence[OrderByQueryParameter]] = None,
min_max_only: bool = False,
) -> MetricFlowQuerySpec:
) -> ParseQueryResult:
"""Parse the query into spec objects, validating them in the process.
e.g. make sure that the given metric is a valid metric name.
Expand All @@ -341,7 +341,7 @@ def parse_and_validate_query(
order_by_names=order_by_names,
order_by=order_by,
min_max_only=min_max_only,
).query_spec
)

@log_runtime()
def _parse_and_validate_query(
Expand Down Expand Up @@ -533,7 +533,7 @@ def build_query_spec_for_group_by_metric_source_node(
return self.parse_and_validate_query(
metrics=(MetricParameter(group_by_metric_spec.reference.element_name),),
group_by=(DimensionOrEntityParameter(group_by_metric_spec.metric_subquery_entity_spec.qualified_name),),
)
).query_spec


@dataclass(frozen=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_resolvable_ambiguous_entity_path( # noqa: D103
query_spec = multi_hop_query_parser.parse_and_validate_query(
metric_names=["entity_1_metric"],
group_by_names=["entity_0__country"],
)
).query_spec

assert_object_snapshot_equal(
request=request,
Expand All @@ -47,7 +47,7 @@ def test_ambiguous_entity_path_resolves_to_shortest_entity_path_item(
query_spec = multi_hop_query_parser.parse_and_validate_query(
metric_names=["all_entity_metric"],
group_by_names=["entity_1__country"],
)
).query_spec

assert_object_snapshot_equal(
request=request,
Expand All @@ -70,7 +70,7 @@ def test_non_resolvable_ambiguous_entity_path_due_to_multiple_matches(
multi_hop_query_parser.parse_and_validate_query(
metric_names=["entity_1_and_entity_2_metric"],
group_by_names=["entity_0__country"],
)
).query_spec

assert_str_snapshot_equal(
request=request,
Expand All @@ -93,7 +93,7 @@ def test_non_resolvable_ambiguous_entity_path_due_to_mismatch(
multi_hop_query_parser.parse_and_validate_query(
metric_names=["entity_0_metric", "entity_1_metric"],
group_by_names=["entity_0__country"],
)
).query_spec

assert_str_snapshot_equal(
request=request,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def test_query_parser(bookings_query_parser: MetricFlowQueryParser) -> None: #
metric_names=["bookings"],
group_by_names=["booking__is_instant", "listing", MTD],
order_by_names=[MTD, "-bookings"],
)
).query_spec

assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),)
assert query_spec.dimension_specs == (
Expand Down Expand Up @@ -228,7 +228,7 @@ def test_query_parser_case_insensitivity(bookings_query_parser: MetricFlowQueryP
metric_names=["BOOKINGS"],
group_by_names=["BOOKING__IS_INSTANT", "LISTING", MTD.upper()],
order_by_names=[MTD.upper(), "-BOOKINGS"],
)
).query_spec

assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),)
assert query_spec.dimension_specs == (
Expand Down Expand Up @@ -260,7 +260,9 @@ def test_query_parser_case_insensitivity(bookings_query_parser: MetricFlowQueryP
OrderByParameter(order_by=TimeDimensionParameter(MTD.upper())),
OrderByParameter(order_by=MetricParameter("BOOKINGS"), descending=True),
)
query_spec = bookings_query_parser.parse_and_validate_query(metrics=[metric], group_by=group_by, order_by=order_by)
query_spec = bookings_query_parser.parse_and_validate_query(
metrics=[metric], group_by=group_by, order_by=order_by
).query_spec
assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),)
assert query_spec.dimension_specs == (
DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),
Expand All @@ -283,7 +285,7 @@ def test_query_parser_case_insensitivity(bookings_query_parser: MetricFlowQueryP

def test_query_parser_invalid_group_by(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D103
with pytest.raises(InvalidQueryException):
bookings_query_parser.parse_and_validate_query(group_by_names=["random_stuff"])
bookings_query_parser.parse_and_validate_query(group_by_names=["random_stuff"]).query_spec


def test_query_parser_with_object_params(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D103
Expand All @@ -297,7 +299,9 @@ def test_query_parser_with_object_params(bookings_query_parser: MetricFlowQueryP
OrderByParameter(order_by=TimeDimensionParameter(MTD)),
OrderByParameter(order_by=MetricParameter("bookings"), descending=True),
)
query_spec = bookings_query_parser.parse_and_validate_query(metrics=[metric], group_by=group_by, order_by=order_by)
query_spec = bookings_query_parser.parse_and_validate_query(
metrics=[metric], group_by=group_by, order_by=order_by
).query_spec
assert query_spec.metric_specs == (MetricSpec(element_name="bookings"),)
assert query_spec.dimension_specs == (
DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),)),
Expand Down Expand Up @@ -333,7 +337,7 @@ def test_order_by_granularity_conversion() -> None:
)
query_spec = query_parser.parse_and_validate_query(
metric_names=["bookings", "revenue"], group_by_names=[MTD], order_by_names=[f"-{MTD}"]
)
).query_spec

# The lowest common granularity is MONTH, so we expect the PTD in the order by to have that granularity.
assert (
Expand All @@ -347,7 +351,7 @@ def test_order_by_granularity_conversion() -> None:
def test_order_by_granularity_no_conversion(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D103
query_spec = bookings_query_parser.parse_and_validate_query(
metric_names=["bookings"], group_by_names=[MTD], order_by_names=[MTD]
)
).query_spec

# The only granularity is DAY, so we expect the PTD in the order by to have that granularity.
assert (
Expand All @@ -373,7 +377,7 @@ def test_time_range_constraint_conversion() -> None:
group_by_names=[MTD],
time_constraint_start=as_datetime("2020-01-15"),
time_constraint_end=as_datetime("2020-02-15"),
)
).query_spec

assert (
TimeRangeConstraint(start_time=as_datetime("2020-01-01"), end_time=as_datetime("2020-02-29"))
Expand All @@ -390,7 +394,7 @@ def test_parse_and_validate_where_constraint_dims(bookings_query_parser: MetricF
time_constraint_start=as_datetime("2020-01-15"),
time_constraint_end=as_datetime("2020-02-15"),
where_constraint_str="{{ Dimension('booking__invalid_dim') }} = '1'",
)
).query_spec

with pytest.raises(InvalidQueryException, match="Error parsing where filter"):
bookings_query_parser.parse_and_validate_query(
Expand All @@ -399,15 +403,15 @@ def test_parse_and_validate_where_constraint_dims(bookings_query_parser: MetricF
time_constraint_start=as_datetime("2020-01-15"),
time_constraint_end=as_datetime("2020-02-15"),
where_constraint_str="{{ Dimension('invalid_format') }} = '1'",
)
).query_spec

query_spec = bookings_query_parser.parse_and_validate_query(
metric_names=["bookings"],
group_by_names=[MTD],
time_constraint_start=as_datetime("2020-01-15"),
time_constraint_end=as_datetime("2020-02-15"),
where_constraint_str="{{ Dimension('booking__is_instant') }} = '1'",
)
).query_spec
assert (
DimensionSpec(element_name="is_instant", entity_links=(EntityReference("booking"),))
not in query_spec.dimension_specs
Expand All @@ -424,7 +428,7 @@ def test_parse_and_validate_where_constraint_metric_time() -> None:
metric_names=["revenue"],
group_by_names=[MTD],
where_constraint_str="{{ TimeDimension('metric_time', 'day') }} > '2020-01-15'",
)
).query_spec


def test_parse_and_validate_metric_constraint_dims() -> None:
Expand All @@ -442,7 +446,7 @@ def test_parse_and_validate_metric_constraint_dims() -> None:
group_by_names=[MTD],
time_constraint_start=as_datetime("2020-01-15"),
time_constraint_end=as_datetime("2020-02-15"),
)
).query_spec


def test_cumulative_metric_no_time_dimension_validation() -> None:
Expand All @@ -461,7 +465,7 @@ def test_cumulative_metric_no_time_dimension_validation() -> None:
with pytest.raises(InvalidQueryException, match="do not include 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
)
).query_spec


def test_cumulative_metric_wrong_time_dimension_validation() -> None:
Expand All @@ -486,7 +490,7 @@ def test_cumulative_metric_wrong_time_dimension_validation() -> None:
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
group_by_names=["revenue_instance__loaded_at"],
)
).query_spec


def test_cumulative_metric_agg_time_dimension_name_validation() -> None:
Expand All @@ -498,7 +502,9 @@ def test_cumulative_metric_agg_time_dimension_name_validation() -> None:
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

query_parser.parse_and_validate_query(metric_names=["revenue_cumulative"], group_by_names=["revenue_instance__ds"])
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"], group_by_names=["revenue_instance__ds"]
).query_spec


def test_derived_metric_query_parsing() -> None:
Expand All @@ -514,20 +520,20 @@ def test_derived_metric_query_parsing() -> None:
query_parser.parse_and_validate_query(
metric_names=["revenue_sub_10"],
group_by_names=[],
)
).query_spec

# Attempt to query with non-time dimension
with pytest.raises(InvalidQueryException, match="does not match any of the available"):
query_parser.parse_and_validate_query(
metric_names=["revenue_sub_10"],
group_by_names=["country"],
)
).query_spec

# Query with time dimension
query_parser.parse_and_validate_query(
metric_names=["revenue_sub_10"],
group_by_names=[MTD],
)
).query_spec


def test_derived_metric_with_offset_parsing() -> None:
Expand All @@ -542,20 +548,20 @@ def test_derived_metric_with_offset_parsing() -> None:
query_parser.parse_and_validate_query(
metric_names=["revenue_growth_2_weeks"],
group_by_names=[],
)
).query_spec

# Attempt to query with non-time dimension
with pytest.raises(InvalidQueryException, match="do not include 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_growth_2_weeks"],
group_by_names=["revenue_instance__country"],
)
).query_spec

# Query with time dimension
query_parser.parse_and_validate_query(
metric_names=["revenue_growth_2_weeks"],
group_by_names=[MTD],
)
).query_spec


def test_date_part_parsing() -> None:
Expand All @@ -571,21 +577,21 @@ def test_date_part_parsing() -> None:
query_parser.parse_and_validate_query(
metric_names=["revenue"],
group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.DOW),),
)
).query_spec

# Can't query date part for cumulative metrics
with pytest.raises(InvalidQueryException, match="does not match any of the available"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.YEAR),),
)
).query_spec

# Can't query date part for metrics with offset to grain
with pytest.raises(InvalidQueryException, match="does not allow group-by-items with a date part in the query"):
query_parser.parse_and_validate_query(
metric_names=["revenue_since_start_of_year"],
group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.MONTH),),
)
).query_spec

# Requested granularity doesn't match resolved granularity
with pytest.raises(InvalidQueryException, match="does not match any of the available"):
Expand All @@ -594,26 +600,26 @@ def test_date_part_parsing() -> None:
group_by=(
TimeDimensionParameter(name="metric_time", grain=TimeGranularity.YEAR, date_part=DatePart.MONTH),
),
)
).query_spec

# Date part is compatible
query_parser.parse_and_validate_query(
metric_names=["revenue"],
group_by=(TimeDimensionParameter(name="metric_time", date_part=DatePart.MONTH),),
)
).query_spec


def test_duplicate_metric_query(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D103
with pytest.raises(InvalidQueryException, match="duplicate metrics"):
bookings_query_parser.parse_and_validate_query(
metric_names=["bookings", "bookings"],
group_by_names=[MTD],
)
).query_spec


def test_no_metrics_or_group_by(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D103
with pytest.raises(InvalidQueryException, match="no metrics or group by items"):
bookings_query_parser.parse_and_validate_query()
bookings_query_parser.parse_and_validate_query().query_spec


def test_offset_metric_with_diff_agg_time_dims_error() -> None: # noqa: D103
Expand All @@ -626,7 +632,7 @@ def test_offset_metric_with_diff_agg_time_dims_error() -> None: # noqa: D103
query_parser.parse_and_validate_query(
metric_names=["monthly_revenue_last_7_days"],
group_by_names=["revenue___ds"],
)
).query_spec


def query_parser_from_yaml(yaml_contents: List[YamlConfigFile]) -> MetricFlowQueryParser:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def test_suggestions_for_group_by_item( # noqa: D103
request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, query_parser: MetricFlowQueryParser
) -> None:
with pytest.raises(InvalidQueryException) as e:
query_parser.parse_and_validate_query(metric_names=("bookings",), group_by_names=("booking__instant",))
query_parser.parse_and_validate_query(
metric_names=("bookings",), group_by_names=("booking__instant",)
).query_spec

assert_str_snapshot_equal(
request=request,
Expand All @@ -46,7 +48,9 @@ def test_suggestions_for_metric( # noqa: D103
request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, query_parser: MetricFlowQueryParser
) -> None:
with pytest.raises(InvalidQueryException) as e:
query_parser.parse_and_validate_query(metric_names=("booking",), group_by_names=(METRIC_TIME_ELEMENT_NAME,))
query_parser.parse_and_validate_query(
metric_names=("booking",), group_by_names=(METRIC_TIME_ELEMENT_NAME,)
).query_spec

assert_str_snapshot_equal(
request=request,
Expand All @@ -60,7 +64,9 @@ def test_suggestions_for_multiple_metrics( # noqa: D103
request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, query_parser: MetricFlowQueryParser
) -> None:
with pytest.raises(InvalidQueryException) as e:
query_parser.parse_and_validate_query(metric_names=("bookings", "listings"), group_by_names=("booking__ds",))
query_parser.parse_and_validate_query(
metric_names=("bookings", "listings"), group_by_names=("booking__ds",)
).query_spec

assert_str_snapshot_equal(
request=request,
Expand Down Expand Up @@ -93,7 +99,9 @@ def test_suggestions_for_defined_where_filter( # noqa: D103
semantic_manifest_lookup=semantic_manifest_lookup,
)
with pytest.raises(InvalidQueryException) as e:
query_parser.parse_and_validate_query(metric_names=("listings",), group_by_names=(METRIC_TIME_ELEMENT_NAME,))
query_parser.parse_and_validate_query(
metric_names=("listings",), group_by_names=(METRIC_TIME_ELEMENT_NAME,)
).query_spec

assert_str_snapshot_equal(
request=request,
Expand Down Expand Up @@ -145,7 +153,7 @@ def test_suggestions_for_defined_filters_in_multi_metric_query(
"listings",
),
group_by_names=(METRIC_TIME_ELEMENT_NAME,),
)
).query_spec

assert_str_snapshot_equal(
request=request,
Expand Down
2 changes: 1 addition & 1 deletion metricflow/engine/metricflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me
order_by_names=mf_query_request.order_by_names,
order_by=mf_query_request.order_by,
min_max_only=mf_query_request.min_max_only,
)
).query_spec
logger.info(f"Query spec is:\n{mf_pformat(query_spec)}")

output_selection_specs: Optional[InstanceSpecSet] = None
Expand Down
2 changes: 1 addition & 1 deletion scripts/ci_tests/metricflow_semantics_package_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def log_query_spec() -> None: # noqa: D103
query_parser = MetricFlowQueryParser(SemanticManifestLookup(semantic_manifest))
query_spec = query_parser.parse_and_validate_query(
metric_names=["bookings"], group_by_names=["booking__is_instant"]
)
).query_spec

logger.info(f"{query_spec.__class__.__name__}:\n{mf_pformat(query_spec)}")

Expand Down
Loading

0 comments on commit a55c402

Please sign in to comment.