Skip to content

Commit

Permalink
Metric filters for distinct values queries (#1107)
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb authored Apr 9, 2024
1 parent 72f7d6e commit 94ead4c
Show file tree
Hide file tree
Showing 19 changed files with 2,110 additions and 9 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240329-182759.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Enable metric filters for distinct values queries.
time: 2024-03-29T18:27:59.807712-07:00
custom:
Author: courtneyholcomb
Issue: "1107"
4 changes: 1 addition & 3 deletions metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,9 +640,7 @@ def _build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Da
)

query_level_filter_specs = filter_spec_factory.create_from_where_filter_intersection(
filter_location=WhereFilterLocation.for_query(
tuple(metric_spec.reference for metric_spec in query_spec.metric_specs)
),
filter_location=WhereFilterLocation.for_query(metric_references=tuple()),
filter_intersection=query_spec.filter_intersection,
)

Expand Down
40 changes: 34 additions & 6 deletions metricflow/model/semantics/linkable_spec_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,16 +548,44 @@ def __init__(
metrics.add(metric_reference)
self._joinable_metrics_for_semantic_models[semantic_model_reference] = metrics

# If no metrics are specified, the query interface supports distinct dimension values from a single semantic
# model.
linkable_element_sets_to_merge: List[LinkableElementSet] = []

# If no metrics are specified, the query interface supports querying distinct values for dimensions, entities,
# and group by metrics.
linkable_element_sets_for_no_metrics_queries: List[LinkableElementSet] = []
for semantic_model in semantic_manifest.semantic_models:
linkable_element_sets_to_merge.append(self._get_elements_in_semantic_model(semantic_model))
linkable_element_sets_for_no_metrics_queries.append(self._get_elements_in_semantic_model(semantic_model))
joinable_metrics = self._joinable_metrics_for_semantic_models.get(semantic_model.reference, set())
for entity in semantic_model.entities:
linkable_metrics_set = LinkableElementSet(
path_key_to_linkable_dimensions={},
path_key_to_linkable_entities={},
path_key_to_linkable_metrics={
ElementPathKey(
element_name=metric.element_name,
entity_links=(entity.reference,),
time_granularity=None,
date_part=None,
): (
LinkableMetric(
element_name=metric.element_name,
entity_links=(entity.reference,),
join_path=(
SemanticModelJoinPathElement(
semantic_model_reference=semantic_model.reference,
join_on_entity=entity.reference,
),
),
join_by_semantic_model=semantic_model.reference,
properties=frozenset({LinkableElementProperties.METRIC}),
),
)
for metric in joinable_metrics
},
)
linkable_element_sets_for_no_metrics_queries.append(linkable_metrics_set)

metric_time_elements_for_no_metrics = self._get_metric_time_elements(measure_reference=None)
self._no_metric_linkable_element_set = LinkableElementSet.merge_by_path_key(
linkable_element_sets_to_merge + [metric_time_elements_for_no_metrics]
linkable_element_sets_for_no_metrics_queries + [metric_time_elements_for_no_metrics]
)

logger.info(f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s")
Expand Down
26 changes: 26 additions & 0 deletions tests/integration/test_cases/itest_dimensions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,29 @@ integration_test:
MIN({{ render_date_trunc('ds', TimeGranularity.DAY) }}) AS metric_time__day__min
, MAX({{ render_date_trunc('ds', TimeGranularity.DAY) }}) AS metric_time__day__max
FROM {{ source_schema }}.mf_time_spine
---
integration_test:
name: distinct_values_query_with_metric_filter
description: Query without metrics using a metric filter
model: SIMPLE_MODEL
group_bys: ["listing"]
where_filter: "{{ render_metric_template('bookings', ['listing']) }} > 2"
check_query: |
SELECT
listing
FROM (
SELECT
l.listing_id AS listing
, a.bookings AS listing__bookings
FROM {{ source_schema }}.dim_lux_listing_id_mapping l
FULL OUTER JOIN (
SELECT
listing_id AS listing
, SUM(1) AS bookings
FROM {{ source_schema }}.fct_bookings
GROUP BY listing_id
) a
ON l.listing_id = a.listing
)
WHERE listing__bookings > 2
GROUP BY listing
27 changes: 27 additions & 0 deletions tests/query_rendering/test_metric_filter_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,4 +202,31 @@ def test_filter_by_metric_in_same_semantic_model_as_queried_metric(
)


@pytest.mark.sql_engine_snapshot
def test_distinct_values_query_with_metric_filter(
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
sql_client: SqlClient,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
query_parser: MetricFlowQueryParser,
) -> None:
"""Tests a distinct values query with a metric in the query-level where filter."""
query_spec = query_parser.parse_and_validate_query(
group_by_names=("listing",),
where_constraint=PydanticWhereFilter(
where_sql_template="{{ Metric('bookings', ['listing']) }} > 2",
),
)
dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values(query_spec)

convert_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
node=dataflow_plan.sink_output_nodes[0].parent_node,
)


# TODO: tests for filters with conversion metrics
Loading

0 comments on commit 94ead4c

Please sign in to comment.