Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metric filters for distinct values queries #1107

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240329-182759.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Enable metric filters for distinct values queries.
time: 2024-03-29T18:27:59.807712-07:00
custom:
Author: courtneyholcomb
Issue: "1107"
4 changes: 1 addition & 3 deletions metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,9 +640,7 @@ def _build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Da
)

query_level_filter_specs = filter_spec_factory.create_from_where_filter_intersection(
filter_location=WhereFilterLocation.for_query(
tuple(metric_spec.reference for metric_spec in query_spec.metric_specs)
),
filter_location=WhereFilterLocation.for_query(metric_references=tuple()),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You know, it took me way too long to figure out why there were no metric specs in this query. 🤦

filter_intersection=query_spec.filter_intersection,
)

Expand Down
40 changes: 34 additions & 6 deletions metricflow/model/semantics/linkable_spec_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,16 +548,44 @@ def __init__(
metrics.add(metric_reference)
self._joinable_metrics_for_semantic_models[semantic_model_reference] = metrics

# If no metrics are specified, the query interface supports distinct dimension values from a single semantic
# model.
linkable_element_sets_to_merge: List[LinkableElementSet] = []

# If no metrics are specified, the query interface supports querying distinct values for dimensions, entities,
# and group by metrics.
linkable_element_sets_for_no_metrics_queries: List[LinkableElementSet] = []
for semantic_model in semantic_manifest.semantic_models:
linkable_element_sets_to_merge.append(self._get_elements_in_semantic_model(semantic_model))
linkable_element_sets_for_no_metrics_queries.append(self._get_elements_in_semantic_model(semantic_model))
joinable_metrics = self._joinable_metrics_for_semantic_models.get(semantic_model.reference, set())
for entity in semantic_model.entities:
linkable_metrics_set = LinkableElementSet(
path_key_to_linkable_dimensions={},
path_key_to_linkable_entities={},
path_key_to_linkable_metrics={
ElementPathKey(
element_name=metric.element_name,
entity_links=(entity.reference,),
time_granularity=None,
date_part=None,
): (
LinkableMetric(
element_name=metric.element_name,
entity_links=(entity.reference,),
join_path=(
SemanticModelJoinPathElement(
semantic_model_reference=semantic_model.reference,
join_on_entity=entity.reference,
),
),
join_by_semantic_model=semantic_model.reference,
properties=frozenset({LinkableElementProperties.METRIC}),
),
)
for metric in joinable_metrics
},
)
linkable_element_sets_for_no_metrics_queries.append(linkable_metrics_set)

metric_time_elements_for_no_metrics = self._get_metric_time_elements(measure_reference=None)
self._no_metric_linkable_element_set = LinkableElementSet.merge_by_path_key(
linkable_element_sets_to_merge + [metric_time_elements_for_no_metrics]
linkable_element_sets_for_no_metrics_queries + [metric_time_elements_for_no_metrics]
)

logger.info(f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s")
Expand Down
26 changes: 26 additions & 0 deletions tests/integration/test_cases/itest_dimensions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,29 @@ integration_test:
MIN({{ render_date_trunc('ds', TimeGranularity.DAY) }}) AS metric_time__day__min
, MAX({{ render_date_trunc('ds', TimeGranularity.DAY) }}) AS metric_time__day__max
FROM {{ source_schema }}.mf_time_spine
---
integration_test:
name: distinct_values_query_with_metric_filter
description: Query without metrics using a metric filter
model: SIMPLE_MODEL
group_bys: ["listing"]
where_filter: "{{ render_metric_template('bookings', ['listing']) }} > 2"
check_query: |
SELECT
listing
FROM (
SELECT
l.listing_id AS listing
, a.bookings AS listing__bookings
FROM {{ source_schema }}.dim_lux_listing_id_mapping l
FULL OUTER JOIN (
SELECT
listing_id AS listing
, SUM(1) AS bookings
FROM {{ source_schema }}.fct_bookings
GROUP BY listing_id
) a
ON l.listing_id = a.listing
)
WHERE listing__bookings > 2
GROUP BY listing
27 changes: 27 additions & 0 deletions tests/query_rendering/test_metric_filter_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,4 +202,31 @@ def test_filter_by_metric_in_same_semantic_model_as_queried_metric(
)


@pytest.mark.sql_engine_snapshot
def test_distinct_values_query_with_metric_filter(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
sql_client: SqlClient,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
query_parser: MetricFlowQueryParser,
) -> None:
"""Tests a distinct values query with a metric in the query-level where filter."""
query_spec = query_parser.parse_and_validate_query(
group_by_names=("listing",),
where_constraint=PydanticWhereFilter(
where_sql_template="{{ Metric('bookings', ['listing']) }} > 2",
),
)
dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values(query_spec)

convert_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
node=dataflow_plan.sink_output_nodes[0].parent_node,
)


# TODO: tests for filters with conversion metrics
Loading
Loading