WIP - testing. separate out logic for deduping path keys

dbt-labs · Apr 26, 2024 · f640e2c · f640e2c
1 parent e6b9b7b
commit f640e2c
Show file tree

Hide file tree

Showing 2 changed files with 107 additions and 9 deletions.
diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py
@@ -120,8 +120,14 @@ def element_name(self) -> str:  # noqa: D102
         return self.reference.element_name
 
     @property
-    def path_key(self) -> ElementPathKey:  # noqa: D102
-        return ElementPathKey(element_name=self.element_name, entity_links=self.join_path.entity_links)
+    def path_key(self) -> ElementPathKey:
+        """Key that can uniquely identify an element and the joins used to realize the element.
+
+        Use qualified name to guarantee uniqueness.
+        """
+        return ElementPathKey(
+            element_name=self.as_group_by_metric_spec.qualified_name, entity_links=self.join_path.entity_links
+        )
 
     @property
     def reference(self) -> MetricReference:  # noqa: D102
@@ -141,6 +147,14 @@ def metric_subquery_entity_links(self) -> Tuple[EntityReference, ...]:
         """Entity links used to join the metric to the entity it's grouped by in the metric subquery."""
         return self.join_path.metric_subquery_join_path_element.entity_links
 
+    @property
+    def as_group_by_metric_spec(self) -> GroupByMetricSpec:
+        return GroupByMetricSpec(
+            element_name=self.element_name,
+            entity_links=self.join_path.entity_links,
+            metric_subquery_entity_links=self.metric_subquery_entity_links,
+        )
+
 
 @dataclass(frozen=True)
 class LinkableElementSet:
@@ -256,7 +270,9 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]
                 for path_key, entities in join_path_to_linkable_entities.items()
             },
             path_key_to_linkable_metrics={
-                path_key: tuple(sorted(metrics, key=lambda linkable_metric: linkable_metric.element_name))
+                path_key: tuple(
+                    sorted(metrics, key=lambda linkable_metric: linkable_metric.as_group_by_metric_spec.qualified_name)
+                )
                 for path_key, metrics in join_path_to_linkable_metrics.items()
             },
         )
@@ -354,11 +370,7 @@ def as_spec_set(self) -> LinkableSpecSet:  # noqa: D102
                 for path_key in self.path_key_to_linkable_entities
             ),
             group_by_metric_specs=tuple(
-                GroupByMetricSpec(
-                    element_name=linkable_metric.element_name,
-                    entity_links=linkable_metric.join_path.entity_links,
-                    metric_subquery_entity_links=linkable_metric.metric_subquery_entity_links,
-                )
+                linkable_metric.as_group_by_metric_spec
                 for linkable_metrics in self.path_key_to_linkable_metrics.values()
                 for linkable_metric in linkable_metrics
             ),
@@ -627,10 +639,17 @@ def __init__(
             linkable_element_set_for_metric = self.get_linkable_elements_for_metrics([metric_reference])
             for linkable_entities in linkable_element_set_for_metric.path_key_to_linkable_entities.values():
                 for linkable_entity in linkable_entities:
+                    if (
+                        metric_reference.element_name == "bookings_per_listing"
+                        and linkable_entity.element_name == "user"
+                    ):
+                        print("metric bookings_per_listing can be queried with:", linkable_entity)
                     metric_subquery_join_path_element = MetricSubqueryJoinPathElement(
                         metric_reference=metric_reference,
                         join_on_entity=linkable_entity.reference,
-                        metric_to_entity_join_path=SemanticModelJoinPath(linkable_entity.join_path),
+                        metric_to_entity_join_path=(
+                            SemanticModelJoinPath(linkable_entity.join_path) if linkable_entity.join_path else None
+                        ),
                     )
                     self._joinable_metrics_for_entities[linkable_entity.reference].add(
                         metric_subquery_join_path_element
@@ -653,6 +672,23 @@ def __init__(
 
         logger.info(f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s")
 
+        print(
+            [
+                subq
+                for subq in self._joinable_metrics_for_entities[EntityReference("user")]
+                if subq.metric_reference.element_name == "bookings_per_listing"
+            ]
+        )
+        # one of these has an empty join path. why? would expect None in that case.
+
+        # when succeeding, we have:
+        # 1. MetricSubqueryJoinPathElement(metric_reference=MetricReference(element_name='bookings_per_listing'), join_on_entity=EntityReference(element_name='user'), metric_to_entity_join_path=SemanticModelJoinPath(path_elements=()))
+        # 2. MetricSubqueryJoinPathElement(metric_reference=MetricReference(element_name='bookings_per_listing'), join_on_entity=EntityReference(element_name='user'), metric_to_entity_join_path=SemanticModelJoinPath(path_elements=(SemanticModelJoinPathElement(semantic_model_reference=SemanticModelReference(semantic_model_name='listings_latest'), join_on_entity=EntityReference(element_name='listing')),)))
+        # DIFFERENT ORDER FAILS!
+        # 1. Why does order matter? -> are linkable specs overriding each other?
+        # 2. Why is there no metric_to_entity_join_path in the first? is that expected + does that matter?
+        # 3. Am I misreading the error? is it on the subquery?
+
     def _metric_requires_metric_time(self, metric: Metric) -> bool:
         """Checks if the metric can only be queried with metric_time. Also checks input metrics.
 
@@ -1008,6 +1044,14 @@ def get_linkable_elements_for_metrics(
         without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
     ) -> LinkableElementSet:
         """Gets the valid linkable elements that are common to all requested metrics."""
+
+        # Is this working? seems like maybe not for derived metrics?
+        # It's broken!!! UGHHHHH. only used in metadata queries! but it's wrong!!!
+        # Start a new PR to fix this
+        # High  level: we need to align these things:
+        # 1. DFP uses same logic as group by resolution
+        # 2. Metadata queries - test that if you list dimensions + list entities for a given metric, you can successfully query each of them
+        # THat's a good place to start the next PR!
         linkable_element_sets = []
         for metric_reference in metric_references:
             element_sets = self._metric_to_linkable_element_sets.get(metric_reference.element_name)

diff --git a/tests/dataflow/builder/test_dataflow_plan_builder.py b/tests/dataflow/builder/test_dataflow_plan_builder.py
@@ -2,17 +2,20 @@
 
 import datetime
 import logging
+import string
 
 import pytest
 from _pytest.fixtures import FixtureRequest
 from dbt_semantic_interfaces.implementations.filters.where_filter import PydanticWhereFilter
 from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME
+from dbt_semantic_interfaces.references import MeasureReference
 from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
 
 from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder
 from metricflow.dataset.dataset import DataSet
 from metricflow.errors.errors import UnableToSatisfyQueryError
 from metricflow.filters.time_constraint import TimeRangeConstraint
+from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty
 from metricflow.query.query_parser import MetricFlowQueryParser
 from metricflow.specs.column_assoc import ColumnAssociationResolver
 from metricflow.specs.specs import (
@@ -1319,3 +1322,54 @@ def test_metric_in_metric_where_filter(
         mf_test_configuration=mf_test_configuration,
         dag_graph=dataflow_plan,
     )
+
+
+def test_all_available_single_join_metric_filters(
+    dataflow_plan_builder: DataflowPlanBuilder, query_parser: MetricFlowQueryParser
+) -> None:
+    """Checks that all allowed metric filters do not error when used in dataflow plan (single-hop for both inner and out query)."""
+    for group_by_metric in dataflow_plan_builder._metric_lookup.linkable_elements_for_measure(
+        MeasureReference("listings"), without_any_of={LinkableElementProperty.MULTI_HOP}
+    ).as_spec_set.group_by_metric_specs:
+        entity_spec = group_by_metric.metric_subquery_entity_spec
+        if entity_spec.entity_links:
+            # TODO: Will handle in a different test.
+            continue
+        print("testing:", group_by_metric)
+        query_spec = query_parser.parse_and_validate_query(
+            metric_names=("listings",),
+            where_constraint=PydanticWhereFilter(
+                where_sql_template=string.Template("{{ Metric('$metric_name', ['$entity_name']) }} > 2").substitute(
+                    metric_name=group_by_metric.element_name, entity_name=entity_spec.element_name
+                ),
+            ),
+        )
+        dataflow_plan_builder.build_plan(query_spec)
+    assert 0
+
+
+def test_temp(dataflow_plan_builder: DataflowPlanBuilder, query_parser: MetricFlowQueryParser) -> None:
+    query_spec = query_parser.parse_and_validate_query(
+        metric_names=("listings",),
+        where_constraint=PydanticWhereFilter(
+            where_sql_template=string.Template("{{ Metric('$metric_name', ['$entity_name']) }} > 2").substitute(
+                metric_name="bookings_per_listing", entity_name="user"
+            ),
+        ),
+    )
+    dataflow_plan_builder.build_plan(query_spec)
+
+    # FLAKE
+    # 1. this combo should never be allowed - why is it?
+    # 2. but also why does it not fail every time?
+    # - let's investigate 2 first. why does the order matter? or does it? is that a bad assumption?
+
+    assert 0
+
+
+def test_temp2(dataflow_plan_builder: DataflowPlanBuilder, query_parser: MetricFlowQueryParser) -> None:
+    # does this pass? does it flake?
+    query_spec = query_parser.parse_and_validate_query(metric_names=("bookings_per_listing",), group_by_names=("user",))
+    dataflow_plan_builder.build_plan(query_spec)
+    assert 0
+    # fails consistently - this combo should not be allowed