From 06d08cef5a5d9b750b571a71c8b3c3f049ec2e5a Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 20:20:39 -0700 Subject: [PATCH 1/3] Changelog --- .changes/unreleased/Fixes-20231009-195312.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20231009-195312.yaml diff --git a/.changes/unreleased/Fixes-20231009-195312.yaml b/.changes/unreleased/Fixes-20231009-195312.yaml new file mode 100644 index 0000000000..3f50551bb4 --- /dev/null +++ b/.changes/unreleased/Fixes-20231009-195312.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Prioritize source nodes based on correct cost +time: 2023-10-09T19:53:12.491719-07:00 +custom: + Author: courtneyholcomb + Issue: "801" From c2740455f29cc768bd997743527fb1f653ef4606 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 20:21:53 -0700 Subject: [PATCH 2/3] Bug fix: prioritize nodes based on evaluation cost --- .../dataflow/builder/dataflow_plan_builder.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 8235a87c8b..324acb2477 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -557,16 +557,17 @@ def _find_measure_recipe( logger.info(f"Found {len(node_to_evaluation)} candidate measure nodes.") if len(node_to_evaluation) > 0: - cost_function = DefaultCostFunction() - - node_with_lowest_cost = min(node_to_evaluation, key=cost_function.calculate_cost) - evaluation = node_to_evaluation[node_with_lowest_cost] + # All source nodes cost the same. Find evaluation with lowest number of joins. + node_with_lowest_cost_plan = min( + node_to_evaluation, key=lambda node: len(node_to_evaluation[node].join_recipes) + ) + evaluation = node_to_evaluation[node_with_lowest_cost_plan] logger.info( - "Lowest cost node is:\n" + "Lowest cost plan is:\n" + pformat_big_objects( - lowest_cost_node=dataflow_dag_as_text(node_with_lowest_cost), + node=dataflow_dag_as_text(node_with_lowest_cost_plan), evaluation=evaluation, - cost=cost_function.calculate_cost(node_with_lowest_cost), + joins=len(node_to_evaluation[node_with_lowest_cost_plan].join_recipes), ) ) @@ -584,14 +585,14 @@ def _find_measure_recipe( ) return MeasureRecipe( - measure_node=node_with_lowest_cost, + measure_node=node_with_lowest_cost_plan, required_local_linkable_specs=( evaluation.local_linkable_specs + required_local_entity_specs + required_local_dimension_specs + required_local_time_dimension_specs ), - join_linkable_instances_recipes=node_to_evaluation[node_with_lowest_cost].join_recipes, + join_linkable_instances_recipes=node_to_evaluation[node_with_lowest_cost_plan].join_recipes, ) logger.error("No recipe could be constructed.") From 34f4d4f138813f5ea3591e3bf33bd1ffe11297f0 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 20:22:25 -0700 Subject: [PATCH 3/3] Stop evaluating nodees if you find one with 0 joins --- metricflow/dataflow/builder/dataflow_plan_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 324acb2477..48e3c743a7 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -553,6 +553,7 @@ def _find_measure_recipe( # this is going to be the lowest cost solution. if len(evaluation.join_recipes) == 0: logger.info("Not evaluating other nodes since we found one that doesn't require joins") + break logger.info(f"Found {len(node_to_evaluation)} candidate measure nodes.")