From 4c4f44021ba0b5881c78a4bd5ff54668c88095d0 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Fri, 3 Nov 2023 17:53:10 -0700 Subject: [PATCH] Remove join_type param --- .../dataflow/builder/dataflow_plan_builder.py | 8 +----- metricflow/dataflow/dataflow_plan.py | 25 ++----------------- .../source_scan/source_scan_optimizer.py | 4 +-- metricflow/plan_conversion/dataflow_to_sql.py | 6 ++--- .../test_common_semantic_model__dfp_0.xml | 2 -- ..._derived_metric_offset_to_grain__dfp_0.xml | 2 -- ...in_to_time_spine_derived_metric__dfp_0.xml | 2 -- .../test_measure_constraint_plan__dfp_0.xml | 2 -- ...traint_with_reused_measure_plan__dfp_0.xml | 2 -- ...mantic_model_ratio_metrics_plan__dfp_0.xml | 2 -- .../test_multiple_metrics_plan__dfp_0.xml | 2 -- ...mantic_model_ratio_metrics_plan__dfp_0.xml | 2 -- ...2_metrics_from_1_semantic_model__dfp_0.xml | 2 -- ..._metrics_from_2_semantic_models__dfp_0.xml | 2 -- ...metrics_from_2_semantic_models__dfpo_0.xml | 2 -- ...o_metrics_from_1_semantic_model__dfp_0.xml | 6 ----- ..._metrics_from_2_semantic_models__dfp_0.xml | 2 -- ...metrics_from_2_semantic_models__dfpo_0.xml | 2 -- ...constrained_metric_not_combined__dfp_0.xml | 2 -- ...onstrained_metric_not_combined__dfpo_0.xml | 2 -- .../test_derived_metric__dfp_0.xml | 2 -- ..._metric_with_non_derived_metric__dfp_0.xml | 4 --- ...metric_with_non_derived_metric__dfpo_0.xml | 2 -- .../test_nested_derived_metric__dfp_0.xml | 4 --- .../test_nested_derived_metric__dfpo_0.xml | 2 -- 25 files changed, 7 insertions(+), 86 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index dfa50f7e33..c0b5c146bb 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -213,7 +213,6 @@ def _build_metrics_output_node( queried_linkable_specs: LinkableSpecSet, where_constraint: Optional[WhereFilterSpec] = None, time_range_constraint: Optional[TimeRangeConstraint] = None, - combine_metrics_join_type: SqlJoinType = SqlJoinType.FULL_OUTER, ) -> BaseOutput: """Builds a computed metrics output node. @@ -222,7 +221,6 @@ def _build_metrics_output_node( queried_linkable_specs: Dimensions/entities that were queried for. where_constraint: Where constraint used to compute the metric. time_range_constraint: Time range constraint used to compute the metric. - combine_metrics_join_type: The join used when combining the computed metrics. """ output_nodes: List[BaseOutput] = [] compute_metrics_node: Optional[ComputeMetricsNode] = None @@ -247,7 +245,6 @@ def _build_metrics_output_node( queried_linkable_specs=queried_linkable_specs, where_constraint=where_constraint, time_range_constraint=time_range_constraint, - combine_metrics_join_type=SqlJoinType.FULL_OUTER, ), metric_specs=[metric_spec], ) @@ -294,10 +291,7 @@ def _build_metrics_output_node( if len(output_nodes) == 1: return output_nodes[0] - return CombineMetricsNode( - parent_nodes=output_nodes, - join_type=combine_metrics_join_type, - ) + return CombineMetricsNode(parent_nodes=output_nodes) def build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> DataflowPlan: """Generate a plan that would get the distinct values of a linkable instance. diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 0a2140aedb..1449b88723 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -1225,9 +1225,7 @@ class CombineMetricsNode(ComputedMetricsOutput): def __init__( # noqa: D self, parent_nodes: Sequence[Union[BaseOutput, ComputedMetricsOutput]], - join_type: SqlJoinType = SqlJoinType.FULL_OUTER, ) -> None: - self._join_type = join_type super().__init__(node_id=self.create_unique_id(), parent_nodes=list(parent_nodes)) @classmethod @@ -1241,31 +1239,12 @@ def accept(self, visitor: DataflowPlanNodeVisitor[VisitorOutputT]) -> VisitorOut def description(self) -> str: # noqa: D return "Combine Metrics" - @property - def displayed_properties(self) -> List[DisplayedProperty]: - """Prints details about the join types and how the node will behave.""" - custom_properties = [DisplayedProperty("join type", self.join_type)] - if self.join_type is SqlJoinType.FULL_OUTER: - custom_properties.append( - DisplayedProperty("de-duplication method", "post-join aggregation across all dimensions") - ) - - return super().displayed_properties + custom_properties - - @property - def join_type(self) -> SqlJoinType: - """The type of join used for combining metrics.""" - return self._join_type - def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D - return isinstance(other_node, self.__class__) and other_node.join_type == self.join_type + return isinstance(other_node, self.__class__) def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> CombineMetricsNode: # noqa: D assert len(new_parent_nodes) == 1 - return CombineMetricsNode( - parent_nodes=new_parent_nodes, - join_type=self.join_type, - ) + return CombineMetricsNode(parent_nodes=new_parent_nodes) class ConstrainTimeRangeNode(AggregatedMeasuresOutput, BaseOutput): diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index 5938c63836..26beacef35 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -287,9 +287,7 @@ def visit_combine_metrics_node(self, node: CombineMetricsNode) -> OptimizeBranch if len(combined_parent_branches) == 1: return OptimizeBranchResult(base_output_node=combined_parent_branches[0]) - return OptimizeBranchResult( - base_output_node=CombineMetricsNode(parent_nodes=combined_parent_branches, join_type=node.join_type) - ) + return OptimizeBranchResult(base_output_node=CombineMetricsNode(parent_nodes=combined_parent_branches)) def visit_constrain_time_range_node(self, node: ConstrainTimeRangeNode) -> OptimizeBranchResult: # noqa: D self._log_visit_node_type(node) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 87b850b937..e403f3648e 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -959,7 +959,7 @@ def visit_combine_metrics_node(self, node: CombineMetricsNode) -> SqlDataSet: ), "All parent nodes should have the same set of linkable instances since all values are coalesced." linkable_spec_set = from_data_set.data_set.instance_set.spec_set.transform(SelectOnlyLinkableSpecs()) - join_type = SqlJoinType.CROSS_JOIN if len(linkable_spec_set.all_specs) == 0 else node.join_type + join_type = SqlJoinType.CROSS_JOIN if len(linkable_spec_set.all_specs) == 0 else SqlJoinType.FULL_OUTER joins_descriptions: List[SqlJoinDescription] = [] # TODO: refactor this loop into SqlQueryPlanJoinBuilder @@ -984,7 +984,7 @@ def visit_combine_metrics_node(self, node: CombineMetricsNode) -> SqlDataSet: output_instance_set = InstanceSet.merge([x.data_set.instance_set for x in parent_data_sets]) output_instance_set = output_instance_set.transform(ChangeAssociatedColumns(self._column_association_resolver)) - metric_aggregation_type = AggregationType.MAX if node.join_type is SqlJoinType.FULL_OUTER else None + metric_aggregation_type = AggregationType.MAX metric_select_column_set = SelectColumnSet( metric_columns=self._make_select_columns_for_metrics( table_alias_to_metric_specs, aggregation_type=metric_aggregation_type @@ -1006,7 +1006,7 @@ def visit_combine_metrics_node(self, node: CombineMetricsNode) -> SqlDataSet: from_source=from_data_set.data_set.sql_select_node, from_source_alias=from_data_set.alias, joins_descs=tuple(joins_descriptions), - group_bys=linkable_select_column_set.as_tuple() if node.join_type is SqlJoinType.FULL_OUTER else (), + group_bys=linkable_select_column_set.as_tuple(), where=None, order_bys=(), ), diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml index ff9c8b4707..995d2264c8 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml index acfd39d3dd..6e7a05c15d 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml @@ -15,8 +15,6 @@ - - diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml index 374e7da04d..ba0faa72af 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_join_to_time_spine_derived_metric__dfp_0.xml @@ -15,8 +15,6 @@ - - diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml index 0e45438be4..c284514433 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml @@ -15,8 +15,6 @@ - - diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml index ae67a800bb..37e1a3123e 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml @@ -15,8 +15,6 @@ - - diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml index 8bdcd7e080..2688100204 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -15,8 +15,6 @@ - - diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml index 7234390739..455487089d 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml index 26a114d041..ad2e199a63 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -15,8 +15,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml index ff9c8b4707..995d2264c8 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml index cb3913a1e9..4349bbc86d 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml index 29f009e284..ccbea73132 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml index 31896ee893..b70b76fb45 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml @@ -5,8 +5,6 @@ - - @@ -20,8 +18,6 @@ - - @@ -127,8 +123,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml index af1adfb56f..c577d49dac 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml index 1536e6c8f9..4d3f2c07ad 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml index ce2766cc9f..75c50ed00d 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml index aa11bdf2c4..92b366667f 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml index e10bd4a7a0..29526bca43 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml @@ -15,8 +15,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml index e1d61a2ec1..f9e04cae8b 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml @@ -5,8 +5,6 @@ - - @@ -65,8 +63,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml index b23dd5f90b..95b7f60916 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml @@ -5,8 +5,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml index 949bae2ee7..1c4e8d0e42 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml @@ -15,8 +15,6 @@ - - @@ -30,8 +28,6 @@ - - diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml index a4137fb8ab..09232c4bd3 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml @@ -15,8 +15,6 @@ - -