From bb4eda44e0a38c219ed34e365e6f7047fc7c58f1 Mon Sep 17 00:00:00 2001 From: tlento Date: Tue, 25 Jun 2024 13:19:20 -0700 Subject: [PATCH] Fix bug with functionally identical comparisons on WhereConstraintNode Due to an oversight the functionally_identical and with_new_parents logic was not updated on the WhereConstraintNode to account for the always_apply property. In isolation this doesn't affect the predicate pushdown optimizer, but in conjunction with the source scan optimizer this causes problems. Rather than merge something broken I'm amending this PR. --- metricflow/dataflow/nodes/where_filter.py | 14 ++++++++++---- .../optimizer/predicate_pushdown_optimizer.py | 7 +++++++ ...test_join_to_time_spine_with_filters__dfp_0.xml | 1 + ...ith_post_agg_join_predicate_pushdown__dfp_0.xml | 2 ++ ...th_post_agg_join_predicate_pushdown__dfpo_0.xml | 2 ++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/metricflow/dataflow/nodes/where_filter.py b/metricflow/dataflow/nodes/where_filter.py index 60dcc9a1f6..04240a9029 100644 --- a/metricflow/dataflow/nodes/where_filter.py +++ b/metricflow/dataflow/nodes/where_filter.py @@ -66,14 +66,20 @@ def description(self) -> str: # noqa: D102 @property def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 - return tuple(super().displayed_properties) + (DisplayedProperty("where_condition", self.where),) + properties = tuple(super().displayed_properties) + (DisplayedProperty("where_condition", self.where),) + if self.always_apply: + properties = properties + (DisplayedProperty("All filters always applied:", self.always_apply),) + return properties def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 - return isinstance(other_node, self.__class__) and other_node.where == self.where + return ( + isinstance(other_node, self.__class__) + and other_node.where == self.where + and other_node.always_apply == self.always_apply + ) def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> WhereConstraintNode: # noqa: D102 assert len(new_parent_nodes) == 1 return WhereConstraintNode( - parent_node=new_parent_nodes[0], - where_specs=self.input_where_specs, + parent_node=new_parent_nodes[0], where_specs=self.input_where_specs, always_apply=self.always_apply ) diff --git a/metricflow/dataflow/optimizer/predicate_pushdown_optimizer.py b/metricflow/dataflow/optimizer/predicate_pushdown_optimizer.py index 39bb0c8259..aed67dea7e 100644 --- a/metricflow/dataflow/optimizer/predicate_pushdown_optimizer.py +++ b/metricflow/dataflow/optimizer/predicate_pushdown_optimizer.py @@ -385,6 +385,13 @@ def visit_where_constraint_node(self, node: WhereConstraintNode) -> OptimizeBran ) if node.always_apply: + logger.log( + level=self._log_level, + msg=( + "Applying original filter spec set based on node-level override directive. Additional specs " + + f"appled:\n{[spec for spec in node.input_where_specs if spec not in filter_specs_to_apply]}" + ), + ) optimized_node = OptimizeBranchResult( optimized_branch=node.with_new_parents((optimized_parent.optimized_branch,)) ) diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_filters__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_filters__dfp_0.xml index 21371744ac..d358d42813 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_filters__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_with_filters__dfp_0.xml @@ -64,6 +64,7 @@ + diff --git a/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfp_0.xml b/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfp_0.xml index 0ee0ae8bc5..079fa45fed 100644 --- a/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfp_0.xml +++ b/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfp_0.xml @@ -107,6 +107,7 @@ + @@ -299,6 +300,7 @@ + diff --git a/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfpo_0.xml b/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfpo_0.xml index 3f397a5c82..a63d20297c 100644 --- a/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfpo_0.xml +++ b/tests_metricflow/snapshots/test_predicate_pushdown_optimizer.py/DataflowPlan/test_fill_nulls_time_spine_metric_with_post_agg_join_predicate_pushdown__dfpo_0.xml @@ -107,6 +107,7 @@ + @@ -307,6 +308,7 @@ +