Skip to content

Commit

Permalink
WIP - testing. separate out logic for deduping path keys
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Apr 26, 2024
1 parent e6b9b7b commit f640e2c
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 9 deletions.
62 changes: 53 additions & 9 deletions metricflow/model/semantics/linkable_spec_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,14 @@ def element_name(self) -> str: # noqa: D102
return self.reference.element_name

@property
def path_key(self) -> ElementPathKey: # noqa: D102
return ElementPathKey(element_name=self.element_name, entity_links=self.join_path.entity_links)
def path_key(self) -> ElementPathKey:
"""Key that can uniquely identify an element and the joins used to realize the element.
Use qualified name to guarantee uniqueness.
"""
return ElementPathKey(
element_name=self.as_group_by_metric_spec.qualified_name, entity_links=self.join_path.entity_links
)

@property
def reference(self) -> MetricReference: # noqa: D102
Expand All @@ -141,6 +147,14 @@ def metric_subquery_entity_links(self) -> Tuple[EntityReference, ...]:
"""Entity links used to join the metric to the entity it's grouped by in the metric subquery."""
return self.join_path.metric_subquery_join_path_element.entity_links

@property
def as_group_by_metric_spec(self) -> GroupByMetricSpec:
return GroupByMetricSpec(
element_name=self.element_name,
entity_links=self.join_path.entity_links,
metric_subquery_entity_links=self.metric_subquery_entity_links,
)


@dataclass(frozen=True)
class LinkableElementSet:
Expand Down Expand Up @@ -256,7 +270,9 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]
for path_key, entities in join_path_to_linkable_entities.items()
},
path_key_to_linkable_metrics={
path_key: tuple(sorted(metrics, key=lambda linkable_metric: linkable_metric.element_name))
path_key: tuple(
sorted(metrics, key=lambda linkable_metric: linkable_metric.as_group_by_metric_spec.qualified_name)
)
for path_key, metrics in join_path_to_linkable_metrics.items()
},
)
Expand Down Expand Up @@ -354,11 +370,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102
for path_key in self.path_key_to_linkable_entities
),
group_by_metric_specs=tuple(
GroupByMetricSpec(
element_name=linkable_metric.element_name,
entity_links=linkable_metric.join_path.entity_links,
metric_subquery_entity_links=linkable_metric.metric_subquery_entity_links,
)
linkable_metric.as_group_by_metric_spec
for linkable_metrics in self.path_key_to_linkable_metrics.values()
for linkable_metric in linkable_metrics
),
Expand Down Expand Up @@ -627,10 +639,17 @@ def __init__(
linkable_element_set_for_metric = self.get_linkable_elements_for_metrics([metric_reference])
for linkable_entities in linkable_element_set_for_metric.path_key_to_linkable_entities.values():
for linkable_entity in linkable_entities:
if (
metric_reference.element_name == "bookings_per_listing"
and linkable_entity.element_name == "user"
):
print("metric bookings_per_listing can be queried with:", linkable_entity)
metric_subquery_join_path_element = MetricSubqueryJoinPathElement(
metric_reference=metric_reference,
join_on_entity=linkable_entity.reference,
metric_to_entity_join_path=SemanticModelJoinPath(linkable_entity.join_path),
metric_to_entity_join_path=(
SemanticModelJoinPath(linkable_entity.join_path) if linkable_entity.join_path else None
),
)
self._joinable_metrics_for_entities[linkable_entity.reference].add(
metric_subquery_join_path_element
Expand All @@ -653,6 +672,23 @@ def __init__(

logger.info(f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s")

print(
[
subq
for subq in self._joinable_metrics_for_entities[EntityReference("user")]
if subq.metric_reference.element_name == "bookings_per_listing"
]
)
# one of these has an empty join path. why? would expect None in that case.

# when succeeding, we have:
# 1. MetricSubqueryJoinPathElement(metric_reference=MetricReference(element_name='bookings_per_listing'), join_on_entity=EntityReference(element_name='user'), metric_to_entity_join_path=SemanticModelJoinPath(path_elements=()))
# 2. MetricSubqueryJoinPathElement(metric_reference=MetricReference(element_name='bookings_per_listing'), join_on_entity=EntityReference(element_name='user'), metric_to_entity_join_path=SemanticModelJoinPath(path_elements=(SemanticModelJoinPathElement(semantic_model_reference=SemanticModelReference(semantic_model_name='listings_latest'), join_on_entity=EntityReference(element_name='listing')),)))
# DIFFERENT ORDER FAILS!
# 1. Why does order matter? -> are linkable specs overriding each other?
# 2. Why is there no metric_to_entity_join_path in the first? is that expected + does that matter?
# 3. Am I misreading the error? is it on the subquery?

def _metric_requires_metric_time(self, metric: Metric) -> bool:
"""Checks if the metric can only be queried with metric_time. Also checks input metrics.
Expand Down Expand Up @@ -1008,6 +1044,14 @@ def get_linkable_elements_for_metrics(
without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
) -> LinkableElementSet:
"""Gets the valid linkable elements that are common to all requested metrics."""

# Is this working? seems like maybe not for derived metrics?
# It's broken!!! UGHHHHH. only used in metadata queries! but it's wrong!!!
# Start a new PR to fix this
# High level: we need to align these things:
# 1. DFP uses same logic as group by resolution
# 2. Metadata queries - test that if you list dimensions + list entities for a given metric, you can successfully query each of them
# THat's a good place to start the next PR!
linkable_element_sets = []
for metric_reference in metric_references:
element_sets = self._metric_to_linkable_element_sets.get(metric_reference.element_name)
Expand Down
54 changes: 54 additions & 0 deletions tests/dataflow/builder/test_dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@

import datetime
import logging
import string

import pytest
from _pytest.fixtures import FixtureRequest
from dbt_semantic_interfaces.implementations.filters.where_filter import PydanticWhereFilter
from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME
from dbt_semantic_interfaces.references import MeasureReference
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder
from metricflow.dataset.dataset import DataSet
from metricflow.errors.errors import UnableToSatisfyQueryError
from metricflow.filters.time_constraint import TimeRangeConstraint
from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty
from metricflow.query.query_parser import MetricFlowQueryParser
from metricflow.specs.column_assoc import ColumnAssociationResolver
from metricflow.specs.specs import (
Expand Down Expand Up @@ -1319,3 +1322,54 @@ def test_metric_in_metric_where_filter(
mf_test_configuration=mf_test_configuration,
dag_graph=dataflow_plan,
)


def test_all_available_single_join_metric_filters(
dataflow_plan_builder: DataflowPlanBuilder, query_parser: MetricFlowQueryParser
) -> None:
"""Checks that all allowed metric filters do not error when used in dataflow plan (single-hop for both inner and out query)."""
for group_by_metric in dataflow_plan_builder._metric_lookup.linkable_elements_for_measure(
MeasureReference("listings"), without_any_of={LinkableElementProperty.MULTI_HOP}
).as_spec_set.group_by_metric_specs:
entity_spec = group_by_metric.metric_subquery_entity_spec
if entity_spec.entity_links:
# TODO: Will handle in a different test.
continue
print("testing:", group_by_metric)
query_spec = query_parser.parse_and_validate_query(
metric_names=("listings",),
where_constraint=PydanticWhereFilter(
where_sql_template=string.Template("{{ Metric('$metric_name', ['$entity_name']) }} > 2").substitute(
metric_name=group_by_metric.element_name, entity_name=entity_spec.element_name
),
),
)
dataflow_plan_builder.build_plan(query_spec)
assert 0


def test_temp(dataflow_plan_builder: DataflowPlanBuilder, query_parser: MetricFlowQueryParser) -> None:
query_spec = query_parser.parse_and_validate_query(
metric_names=("listings",),
where_constraint=PydanticWhereFilter(
where_sql_template=string.Template("{{ Metric('$metric_name', ['$entity_name']) }} > 2").substitute(
metric_name="bookings_per_listing", entity_name="user"
),
),
)
dataflow_plan_builder.build_plan(query_spec)

# FLAKE
# 1. this combo should never be allowed - why is it?
# 2. but also why does it not fail every time?
# - let's investigate 2 first. why does the order matter? or does it? is that a bad assumption?

assert 0


def test_temp2(dataflow_plan_builder: DataflowPlanBuilder, query_parser: MetricFlowQueryParser) -> None:
# does this pass? does it flake?
query_spec = query_parser.parse_and_validate_query(metric_names=("bookings_per_listing",), group_by_names=("user",))
dataflow_plan_builder.build_plan(query_spec)
assert 0
# fails consistently - this combo should not be allowed

0 comments on commit f640e2c

Please sign in to comment.