From 04f2e90cfc58ab635b7482af1123c3feb7daab4f Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Wed, 8 May 2024 20:02:56 -0700 Subject: [PATCH 1/8] /* PR_START p--smr 04 */ Remove DataflowPlanNode output types / sink nodes. --- .../dataflow/builder/dataflow_plan_builder.py | 61 +++++++------ metricflow/dataflow/builder/node_evaluator.py | 8 +- metricflow/dataflow/builder/source_node.py | 12 +-- metricflow/dataflow/dataflow_plan.py | 72 +++------------ .../dataflow/nodes/add_generated_uuid.py | 10 ++- .../dataflow/nodes/aggregate_measures.py | 20 ++--- .../nodes/combine_aggregated_outputs.py | 10 +-- metricflow/dataflow/nodes/compute_metrics.py | 10 +-- metricflow/dataflow/nodes/constrain_time.py | 10 +-- metricflow/dataflow/nodes/filter_elements.py | 10 +-- .../dataflow/nodes/join_conversion_events.py | 10 +-- metricflow/dataflow/nodes/join_over_time.py | 10 +-- metricflow/dataflow/nodes/join_to_base.py | 12 +-- .../dataflow/nodes/join_to_time_spine.py | 10 +-- .../dataflow/nodes/metric_time_transform.py | 10 +-- metricflow/dataflow/nodes/min_max.py | 10 +-- metricflow/dataflow/nodes/order_by_limit.py | 10 +-- metricflow/dataflow/nodes/read_sql_source.py | 6 +- .../dataflow/nodes/semi_additive_join.py | 10 +-- metricflow/dataflow/nodes/where_filter.py | 9 +- .../dataflow/nodes/write_to_dataframe.py | 16 ++-- metricflow/dataflow/nodes/write_to_table.py | 14 +-- .../source_scan/cm_branch_combiner.py | 13 ++- .../source_scan/source_scan_optimizer.py | 36 ++++---- metricflow/execution/dataflow_to_execution.py | 88 ++++++++++++++++++- metricflow/plan_conversion/dataflow_to_sql.py | 3 +- metricflow/plan_conversion/node_processor.py | 26 +++--- .../data_warehouse_model_validator.py | 10 ++- .../source_scan/test_cm_branch_combiner.py | 4 +- .../test_conversion_metrics_to_sql.py | 12 +-- .../test_distinct_values_to_sql.py | 4 +- .../test_metric_time_dimension_to_sql.py | 2 +- .../test_dataflow_to_sql_plan.py | 10 +-- .../query_rendering/compare_rendered_query.py | 6 +- .../test_cumulative_metric_rendering.py | 18 ++-- .../test_derived_metric_rendering.py | 60 ++++++------- .../test_fill_nulls_with_rendering.py | 16 ++-- .../test_granularity_date_part_rendering.py | 6 +- .../test_metric_filter_rendering.py | 22 ++--- .../test_metric_time_without_metrics.py | 8 +- .../query_rendering/test_query_rendering.py | 40 ++++----- .../test_time_spine_join_rendering.py | 2 +- 42 files changed, 373 insertions(+), 363 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index a1f11aaf33..dc05e1760a 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -61,9 +61,8 @@ ) from metricflow.dataflow.builder.source_node import SourceNodeBuilder, SourceNodeSet from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlan, - SinkOutput, + DataflowPlanNode, ) from metricflow.dataflow.nodes.add_generated_uuid import AddGeneratedUuidColumnNode from metricflow.dataflow.nodes.aggregate_measures import AggregateMeasuresNode @@ -93,7 +92,7 @@ class DataflowRecipe: """Get a recipe for how to build a dataflow plan node that outputs measures and linkable instances as needed.""" - source_node: BaseOutput + source_node: DataflowPlanNode required_local_linkable_specs: Tuple[LinkableInstanceSpec, ...] join_linkable_instances_recipes: Tuple[JoinLinkableInstancesRecipe, ...] @@ -151,7 +150,7 @@ def build_plan( def _build_query_output_node( self, query_spec: MetricFlowQuerySpec, for_group_by_source_node: bool = False - ) -> BaseOutput: + ) -> DataflowPlanNode: """Build SQL output node from query inputs. May be used to build query DFP or source node.""" for metric_spec in query_spec.metric_specs: if ( @@ -234,7 +233,7 @@ def _build_aggregated_conversion_node( queried_linkable_specs: LinkableSpecSet, time_range_constraint: Optional[TimeRangeConstraint] = None, constant_properties: Optional[Sequence[ConstantPropertyInput]] = None, - ) -> BaseOutput: + ) -> DataflowPlanNode: """Builds a node that contains aggregated values of conversions and opportunities.""" # Build measure recipes base_required_linkable_specs, _ = self.__get_required_and_extraneous_linkable_specs( @@ -455,7 +454,7 @@ def _build_derived_metric_output_node( filter_spec_factory: WhereSpecFactory, time_range_constraint: Optional[TimeRangeConstraint] = None, for_group_by_source_node: bool = False, - ) -> BaseOutput: + ) -> DataflowPlanNode: """Builds a node to compute a metric defined from other metrics.""" metric = self._metric_lookup.get_metric(metric_spec.reference) metric_input_specs = self._build_input_metric_specs_for_derived_metric( @@ -470,7 +469,7 @@ def _build_derived_metric_output_node( queried_linkable_specs=queried_linkable_specs, filter_specs=metric_spec.filter_specs ) - parent_nodes: List[BaseOutput] = [] + parent_nodes: List[DataflowPlanNode] = [] # This is the filter that's defined for the metric in the configs. metric_definition_filter_specs = filter_spec_factory.create_from_where_filter_intersection( @@ -509,7 +508,7 @@ def _build_derived_metric_output_node( parent_node = ( parent_nodes[0] if len(parent_nodes) == 1 else CombineAggregatedOutputsNode(parent_nodes=parent_nodes) ) - output_node: BaseOutput = ComputeMetricsNode( + output_node: DataflowPlanNode = ComputeMetricsNode( parent_node=parent_node, metric_specs=[metric_spec], for_group_by_source_node=for_group_by_source_node, @@ -553,7 +552,7 @@ def _build_any_metric_output_node( filter_spec_factory: WhereSpecFactory, time_range_constraint: Optional[TimeRangeConstraint] = None, for_group_by_source_node: bool = False, - ) -> BaseOutput: + ) -> DataflowPlanNode: """Builds a node to compute a metric of any type.""" metric = self._metric_lookup.get_metric(metric_spec.reference) @@ -592,7 +591,7 @@ def _build_metrics_output_node( filter_spec_factory: WhereSpecFactory, time_range_constraint: Optional[TimeRangeConstraint] = None, for_group_by_source_node: bool = False, - ) -> BaseOutput: + ) -> DataflowPlanNode: """Builds a node that computes all requested metrics. Args: @@ -602,7 +601,7 @@ def _build_metrics_output_node( filter_spec_factory: Constructs WhereFilterSpecs with the resolved ambiguous group-by-items in the filter. time_range_constraint: Time range constraint used to compute the metric. """ - output_nodes: List[BaseOutput] = [] + output_nodes: List[DataflowPlanNode] = [] for metric_spec in metric_specs: logger.info(f"Generating compute metrics node for:\n{indent(mf_pformat(metric_spec))}") @@ -687,14 +686,14 @@ def _build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Da @staticmethod def build_sink_node( - parent_node: BaseOutput, + parent_node: DataflowPlanNode, order_by_specs: Sequence[OrderBySpec], output_sql_table: Optional[SqlTable] = None, limit: Optional[int] = None, output_selection_specs: Optional[InstanceSpecSet] = None, - ) -> SinkOutput: + ) -> DataflowPlanNode: """Adds order by / limit / write nodes.""" - pre_result_node: Optional[BaseOutput] = None + pre_result_node: Optional[DataflowPlanNode] = None if order_by_specs or limit: pre_result_node = OrderByLimitNode( @@ -706,7 +705,7 @@ def build_sink_node( parent_node=pre_result_node or parent_node, include_specs=output_selection_specs ) - write_result_node: SinkOutput + write_result_node: DataflowPlanNode if not output_sql_table: write_result_node = WriteToResultDataframeNode(parent_node=pre_result_node or parent_node) else: @@ -721,21 +720,21 @@ def _contains_multihop_linkables(linkable_specs: Sequence[LinkableInstanceSpec]) """Returns true if any of the linkable specs requires a multi-hop join to realize.""" return any(len(x.entity_links) > 1 for x in linkable_specs) - def _sort_by_suitability(self, nodes: Sequence[BaseOutput]) -> Sequence[BaseOutput]: + def _sort_by_suitability(self, nodes: Sequence[DataflowPlanNode]) -> Sequence[DataflowPlanNode]: """Sort nodes by the number of linkable specs. The lower the number of linkable specs means less aggregation required. """ - def sort_function(node: BaseOutput) -> int: + def sort_function(node: DataflowPlanNode) -> int: data_set = self._node_data_set_resolver.get_output_data_set(node) return len(data_set.instance_set.spec_set.linkable_specs) return sorted(nodes, key=sort_function) def _select_source_nodes_with_measures( - self, measure_specs: Set[MeasureSpec], source_nodes: Sequence[BaseOutput] - ) -> Sequence[BaseOutput]: + self, measure_specs: Set[MeasureSpec], source_nodes: Sequence[DataflowPlanNode] + ) -> Sequence[DataflowPlanNode]: nodes = [] measure_specs_set = set(measure_specs) for source_node in source_nodes: @@ -747,11 +746,11 @@ def _select_source_nodes_with_measures( return nodes def _select_source_nodes_with_linkable_specs( - self, linkable_specs: LinkableSpecSet, source_nodes: Sequence[BaseOutput] - ) -> Sequence[BaseOutput]: + self, linkable_specs: LinkableSpecSet, source_nodes: Sequence[DataflowPlanNode] + ) -> Sequence[DataflowPlanNode]: """Find source nodes with requested linkable specs and no measures.""" # Use a dictionary to dedupe for consistent ordering. - selected_nodes: Dict[BaseOutput, None] = {} + selected_nodes: Dict[DataflowPlanNode, None] = {} requested_linkable_specs_set = set(linkable_specs.as_tuple) for source_node in source_nodes: output_spec_set = self._node_data_set_resolver.get_output_data_set(source_node).instance_set.spec_set @@ -823,8 +822,8 @@ def _find_dataflow_recipe( time_range_constraint: Optional[TimeRangeConstraint] = None, ) -> Optional[DataflowRecipe]: linkable_specs = linkable_spec_set.as_tuple - candidate_nodes_for_left_side_of_join: List[BaseOutput] = [] - candidate_nodes_for_right_side_of_join: List[BaseOutput] = [] + candidate_nodes_for_left_side_of_join: List[DataflowPlanNode] = [] + candidate_nodes_for_right_side_of_join: List[DataflowPlanNode] = [] if measure_spec_properties: candidate_nodes_for_right_side_of_join += self._source_node_set.source_nodes_for_metric_queries @@ -909,7 +908,7 @@ def _find_dataflow_recipe( ) # Dict from the node that contains the source node to the evaluation results. - node_to_evaluation: Dict[BaseOutput, LinkableInstanceSatisfiabilityEvaluation] = {} + node_to_evaluation: Dict[DataflowPlanNode, LinkableInstanceSatisfiabilityEvaluation] = {} for node in self._sort_by_suitability(candidate_nodes_for_left_side_of_join): data_set = self._node_data_set_resolver.get_output_data_set(node) @@ -1009,7 +1008,7 @@ def _find_dataflow_recipe( def build_computed_metrics_node( self, metric_spec: MetricSpec, - aggregated_measures_node: Union[AggregateMeasuresNode, BaseOutput], + aggregated_measures_node: Union[AggregateMeasuresNode, DataflowPlanNode], aggregated_to_elements: Set[LinkableInstanceSpec], for_group_by_source_node: bool = False, ) -> ComputeMetricsNode: @@ -1182,7 +1181,7 @@ def build_aggregated_measure( queried_linkable_specs: LinkableSpecSet, time_range_constraint: Optional[TimeRangeConstraint] = None, measure_recipe: Optional[DataflowRecipe] = None, - ) -> BaseOutput: + ) -> DataflowPlanNode: """Returns a node where the measures are aggregated by the linkable specs and constrained appropriately. This might be a node representing a single aggregation over one semantic model, or a node representing @@ -1234,7 +1233,7 @@ def _build_aggregated_measure_from_measure_source_node( queried_linkable_specs: LinkableSpecSet, time_range_constraint: Optional[TimeRangeConstraint] = None, measure_recipe: Optional[DataflowRecipe] = None, - ) -> BaseOutput: + ) -> DataflowPlanNode: measure_spec = metric_input_measure_spec.measure_spec cumulative = metric_input_measure_spec.cumulative_description is not None cumulative_window = ( @@ -1359,7 +1358,7 @@ def _build_aggregated_measure_from_measure_source_node( ) join_targets = measure_recipe.join_targets - unaggregated_measure_node: BaseOutput + unaggregated_measure_node: DataflowPlanNode if len(join_targets) > 0: filtered_measures_with_joined_elements = JoinToBaseOutputNode( left_node=filtered_measure_source_node, @@ -1388,7 +1387,7 @@ def _build_aggregated_measure_from_measure_source_node( unaggregated_measure_node, time_range_constraint ) - pre_aggregate_node: BaseOutput = cumulative_metric_constrained_node or unaggregated_measure_node + pre_aggregate_node: DataflowPlanNode = cumulative_metric_constrained_node or unaggregated_measure_node merged_where_filter_spec = WhereFilterSpec.merge_iterable(metric_input_measure_spec.filter_specs) if len(metric_input_measure_spec.filter_specs) > 0: # Apply where constraint on the node @@ -1444,7 +1443,7 @@ def _build_aggregated_measure_from_measure_source_node( f"Expected {SqlJoinType.LEFT_OUTER} for joining to time spine after aggregation. Remove this if " f"there's a new use case." ) - output_node: BaseOutput = JoinToTimeSpineNode( + output_node: DataflowPlanNode = JoinToTimeSpineNode( parent_node=aggregate_measures_node, requested_agg_time_dimension_specs=queried_agg_time_dimension_specs, use_custom_agg_time_dimension=not queried_linkable_specs.contains_metric_time, diff --git a/metricflow/dataflow/builder/node_evaluator.py b/metricflow/dataflow/builder/node_evaluator.py index 984d12c8e0..262d26b854 100644 --- a/metricflow/dataflow/builder/node_evaluator.py +++ b/metricflow/dataflow/builder/node_evaluator.py @@ -39,7 +39,7 @@ PartitionJoinResolver, PartitionTimeDimensionJoinDescription, ) -from metricflow.dataflow.dataflow_plan import BaseOutput +from metricflow.dataflow.dataflow_plan import DataflowPlanNode from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_to_base import JoinDescription, ValidityWindowJoinDescription from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode @@ -59,7 +59,7 @@ class JoinLinkableInstancesRecipe: satisfiable_linkable_specs. """ - node_to_join: BaseOutput + node_to_join: DataflowPlanNode # The entity to join "node_to_join" on. Only nullable if using CROSS JOIN. join_on_entity: Optional[LinklessEntitySpec] # The linkable instances from the query that can be satisfied if we join this node. Note that this is different from @@ -168,7 +168,7 @@ class NodeEvaluatorForLinkableInstances: def __init__( self, semantic_model_lookup: SemanticModelLookup, - nodes_available_for_joins: Sequence[BaseOutput], + nodes_available_for_joins: Sequence[DataflowPlanNode], node_data_set_resolver: DataflowPlanNodeOutputDataSetResolver, time_spine_node: MetricTimeDimensionTransformNode, ) -> None: @@ -387,7 +387,7 @@ def _update_candidates_that_can_satisfy_linkable_specs( def evaluate_node( self, - left_node: BaseOutput, + left_node: DataflowPlanNode, required_linkable_specs: Sequence[LinkableInstanceSpec], default_join_type: SqlJoinType, ) -> LinkableInstanceSatisfiabilityEvaluation: diff --git a/metricflow/dataflow/builder/source_node.py b/metricflow/dataflow/builder/source_node.py index 5bd439405e..7b23d58cbf 100644 --- a/metricflow/dataflow/builder/source_node.py +++ b/metricflow/dataflow/builder/source_node.py @@ -10,7 +10,7 @@ from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec from metricflow_semantics.specs.spec_classes import GroupByMetricSpec -from metricflow.dataflow.dataflow_plan import BaseOutput +from metricflow.dataflow.dataflow_plan import DataflowPlanNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.read_sql_source import ReadSqlSourceNode from metricflow.dataset.convert_semantic_model import SemanticModelToDataSetConverter @@ -30,17 +30,17 @@ class SourceNodeSet: # mapped to components with a transformation node to add `metric_time` / to support multiple aggregation time # dimensions. Each semantic model containing measures with k different aggregation time dimensions is mapped to k # components. - source_nodes_for_metric_queries: Tuple[BaseOutput, ...] + source_nodes_for_metric_queries: Tuple[DataflowPlanNode, ...] # Semantic models are 1:1 mapped to a ReadSqlSourceNode. The tuple also contains the same `time_spine_node` as # below. See usage in `DataflowPlanBuilder`. - source_nodes_for_group_by_item_queries: Tuple[BaseOutput, ...] + source_nodes_for_group_by_item_queries: Tuple[DataflowPlanNode, ...] # Provides the time spine. time_spine_node: MetricTimeDimensionTransformNode @property - def all_nodes(self) -> Sequence[BaseOutput]: # noqa: D102 + def all_nodes(self) -> Sequence[DataflowPlanNode]: # noqa: D102 return ( self.source_nodes_for_metric_queries + self.source_nodes_for_group_by_item_queries + (self.time_spine_node,) ) @@ -67,8 +67,8 @@ def __init__( # noqa: D107 def create_from_data_sets(self, data_sets: Sequence[SemanticModelDataSet]) -> SourceNodeSet: """Creates a `SourceNodeSet` from SemanticModelDataSets.""" - group_by_item_source_nodes: List[BaseOutput] = [] - source_nodes_for_metric_queries: List[BaseOutput] = [] + group_by_item_source_nodes: List[DataflowPlanNode] = [] + source_nodes_for_metric_queries: List[DataflowPlanNode] = [] for data_set in data_sets: read_node = ReadSqlSourceNode(data_set) diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 34ba99fee7..ef1c54edc3 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -9,11 +9,10 @@ from metricflow_semantics.dag.id_prefix import StaticIdPrefix from metricflow_semantics.dag.mf_dag import DagId, DagNode, MetricFlowDag, NodeId +from metricflow_semantics.specs.spec_classes import LinkableInstanceSpec from metricflow_semantics.visitor import Visitable, VisitorOutputT if typing.TYPE_CHECKING: - from metricflow_semantics.specs.spec_classes import LinkableInstanceSpec - from metricflow.dataflow.nodes.add_generated_uuid import AddGeneratedUuidColumnNode from metricflow.dataflow.nodes.aggregate_measures import AggregateMeasuresNode from metricflow.dataflow.nodes.combine_aggregated_outputs import CombineAggregatedOutputsNode @@ -75,7 +74,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: raise NotImplementedError @abstractmethod - def with_new_parents(self: NodeSelfT, new_parent_nodes: Sequence[BaseOutput]) -> NodeSelfT: + def with_new_parents(self: NodeSelfT, new_parent_nodes: Sequence[DataflowPlanNode]) -> NodeSelfT: """Creates a node with the same behavior as this node, but with a different set of parents. typing.Self would be useful here, but not available in Python 3.8. @@ -87,6 +86,11 @@ def node_type(self) -> Type: # noqa: D102 # TODO: Remove. return self.__class__ + @property + def aggregated_to_elements(self) -> Set[LinkableInstanceSpec]: + """Indicates that the node has been aggregated to these specs, guaranteeing uniqueness in all combinations.""" + return set() + class DataflowPlanNodeVisitor(Generic[VisitorOutputT], ABC): """An object that can be used to visit the nodes of a dataflow plan. @@ -171,62 +175,12 @@ def visit_join_conversion_events_node(self, node: JoinConversionEventsNode) -> V pass -class BaseOutput(DataflowPlanNode, ABC): - """A node that outputs data in a "base" format. - - The base format is where the columns represent un-aggregated measures, dimensions, and entities. - """ - - @property - def aggregated_to_elements(self) -> Set[LinkableInstanceSpec]: - """Indicates that the node has been aggregated to these specs, guaranteeing uniqueness in each combination of them.""" - return set() - - -class AggregatedMeasuresOutput(BaseOutput, ABC): - """A node that outputs data where the measures are aggregated. - - The measures are aggregated with respect to the present entities and dimensions. - """ - - pass - - -class ComputedMetricsOutput(BaseOutput, ABC): - """A node that outputs data that contains metrics computed from measures.""" - - pass - - -class SinkNodeVisitor(Generic[VisitorOutputT], ABC): - """Similar to DataflowPlanNodeVisitor, but only for sink nodes.""" - - @abstractmethod - def visit_write_to_result_dataframe_node(self, node: WriteToResultDataframeNode) -> VisitorOutputT: # noqa: D102 - pass - - @abstractmethod - def visit_write_to_result_table_node(self, node: WriteToResultTableNode) -> VisitorOutputT: # noqa: D102 - pass - - -class SinkOutput(DataflowPlanNode, ABC): - """A node where incoming data goes out of the graph.""" - - @abstractmethod - def accept_sink_node_visitor(self, visitor: SinkNodeVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D102 - pass - - @property - @abstractmethod - def parent_node(self) -> BaseOutput: # noqa: D102 - pass - - -class DataflowPlan(MetricFlowDag[SinkOutput]): +class DataflowPlan(MetricFlowDag[DataflowPlanNode]): """Describes the flow of metric data as it goes from source nodes to sink nodes in the graph.""" - def __init__(self, sink_output_nodes: Sequence[SinkOutput], plan_id: Optional[DagId] = None) -> None: # noqa: D107 + def __init__( # noqa: D107 + self, sink_output_nodes: Sequence[DataflowPlanNode], plan_id: Optional[DagId] = None + ) -> None: if len(sink_output_nodes) == 0: raise RuntimeError("Can't create a dataflow plan without sink node(s).") self._sink_output_nodes = tuple(sink_output_nodes) @@ -236,10 +190,10 @@ def __init__(self, sink_output_nodes: Sequence[SinkOutput], plan_id: Optional[Da ) @property - def sink_output_nodes(self) -> Sequence[SinkOutput]: # noqa: D102 + def sink_output_nodes(self) -> Sequence[DataflowPlanNode]: # noqa: D102 return self._sink_output_nodes @property - def sink_output_node(self) -> SinkOutput: # noqa: D102 + def sink_output_node(self) -> DataflowPlanNode: # noqa: D102 assert len(self._sink_output_nodes) == 1, f"Only 1 sink node supported. Got: {self._sink_output_nodes}" return self._sink_output_nodes[0] diff --git a/metricflow/dataflow/nodes/add_generated_uuid.py b/metricflow/dataflow/nodes/add_generated_uuid.py index eb39bed52f..c3832819ab 100644 --- a/metricflow/dataflow/nodes/add_generated_uuid.py +++ b/metricflow/dataflow/nodes/add_generated_uuid.py @@ -6,13 +6,13 @@ from metricflow_semantics.dag.mf_dag import DisplayedProperty from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class AddGeneratedUuidColumnNode(BaseOutput): +class AddGeneratedUuidColumnNode(DataflowPlanNode): """Adds a UUID column.""" - def __init__(self, parent_node: BaseOutput) -> None: # noqa: D107 + def __init__(self, parent_node: DataflowPlanNode) -> None: # noqa: D107 super().__init__(node_id=self.create_unique_id(), parent_nodes=[parent_node]) @classmethod @@ -38,6 +38,8 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> AddGeneratedUuidColumnNode: # noqa: D102 + def with_new_parents( # noqa: D102 + self, new_parent_nodes: Sequence[DataflowPlanNode] + ) -> AddGeneratedUuidColumnNode: assert len(new_parent_nodes) == 1 return AddGeneratedUuidColumnNode(parent_node=new_parent_nodes[0]) diff --git a/metricflow/dataflow/nodes/aggregate_measures.py b/metricflow/dataflow/nodes/aggregate_measures.py index ea567fb16b..619986a093 100644 --- a/metricflow/dataflow/nodes/aggregate_measures.py +++ b/metricflow/dataflow/nodes/aggregate_measures.py @@ -1,25 +1,15 @@ from __future__ import annotations -from abc import ABC from typing import Sequence, Tuple from metricflow_semantics.dag.id_prefix import IdPrefix, StaticIdPrefix from metricflow_semantics.specs.spec_classes import MetricInputMeasureSpec from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class AggregatedMeasuresOutput(BaseOutput, ABC): - """A node that outputs data where the measures are aggregated. - - The measures are aggregated with respect to the present entities and dimensions. - """ - - pass - - -class AggregateMeasuresNode(AggregatedMeasuresOutput): +class AggregateMeasuresNode(DataflowPlanNode): """A node that aggregates the measures by the associated group by elements. In the event that one or more of the aggregated input measures has an alias assigned to it, any output query @@ -30,7 +20,7 @@ class AggregateMeasuresNode(AggregatedMeasuresOutput): def __init__( self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, metric_input_measure_specs: Sequence[MetricInputMeasureSpec], ) -> None: """Initializer for AggregateMeasuresNode. @@ -56,7 +46,7 @@ def description(self) -> str: # noqa: D102 return """Aggregate Measures""" @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node @property @@ -73,7 +63,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.metric_input_measure_specs == self.metric_input_measure_specs ) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> AggregateMeasuresNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> AggregateMeasuresNode: # noqa: D102 assert len(new_parent_nodes) == 1 return AggregateMeasuresNode( parent_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/combine_aggregated_outputs.py b/metricflow/dataflow/nodes/combine_aggregated_outputs.py index e9fc415e42..df414b1329 100644 --- a/metricflow/dataflow/nodes/combine_aggregated_outputs.py +++ b/metricflow/dataflow/nodes/combine_aggregated_outputs.py @@ -6,19 +6,17 @@ from metricflow_semantics.visitor import VisitorOutputT from metricflow.dataflow.dataflow_plan import ( - BaseOutput, - ComputedMetricsOutput, DataflowPlanNode, DataflowPlanNodeVisitor, ) -class CombineAggregatedOutputsNode(ComputedMetricsOutput): +class CombineAggregatedOutputsNode(DataflowPlanNode): """Combines metrics from different nodes into a single output.""" def __init__( # noqa: D107 self, - parent_nodes: Sequence[Union[BaseOutput, ComputedMetricsOutput]], + parent_nodes: Sequence[Union[DataflowPlanNode, DataflowPlanNode]], ) -> None: super().__init__(node_id=self.create_unique_id(), parent_nodes=parent_nodes) @@ -36,6 +34,8 @@ def description(self) -> str: # noqa: D102 def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> CombineAggregatedOutputsNode: # noqa: D102 + def with_new_parents( # noqa: D102 + self, new_parent_nodes: Sequence[DataflowPlanNode] + ) -> CombineAggregatedOutputsNode: assert len(new_parent_nodes) == 1 return CombineAggregatedOutputsNode(parent_nodes=new_parent_nodes) diff --git a/metricflow/dataflow/nodes/compute_metrics.py b/metricflow/dataflow/nodes/compute_metrics.py index 84199dac48..8461021445 100644 --- a/metricflow/dataflow/nodes/compute_metrics.py +++ b/metricflow/dataflow/nodes/compute_metrics.py @@ -8,19 +8,17 @@ from metricflow_semantics.visitor import VisitorOutputT from metricflow.dataflow.dataflow_plan import ( - BaseOutput, - ComputedMetricsOutput, DataflowPlanNode, DataflowPlanNodeVisitor, ) -class ComputeMetricsNode(ComputedMetricsOutput): +class ComputeMetricsNode(DataflowPlanNode): """A node that computes metrics from input measures. Dimensions / entities are passed through.""" def __init__( self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, metric_specs: Sequence[MetricSpec], aggregated_to_elements: Set[LinkableInstanceSpec], for_group_by_source_node: bool = False, @@ -69,7 +67,7 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 return displayed_properties @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 @@ -100,7 +98,7 @@ def can_combine(self, other_node: ComputeMetricsNode) -> Tuple[bool, str]: return True, "" - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> ComputeMetricsNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> ComputeMetricsNode: # noqa: D102 assert len(new_parent_nodes) == 1 return ComputeMetricsNode( parent_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/constrain_time.py b/metricflow/dataflow/nodes/constrain_time.py index 66711a1b14..2fd840212f 100644 --- a/metricflow/dataflow/nodes/constrain_time.py +++ b/metricflow/dataflow/nodes/constrain_time.py @@ -7,11 +7,11 @@ from metricflow_semantics.filters.time_constraint import TimeRangeConstraint from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor -from metricflow.dataflow.nodes.aggregate_measures import AggregatedMeasuresOutput +from metricflow.dataflow.dataflow_plan import DataflowPlanNodeVisitor +from metricflow.dataflow.nodes.aggregate_measures import DataflowPlanNode -class ConstrainTimeRangeNode(AggregatedMeasuresOutput, BaseOutput): +class ConstrainTimeRangeNode(DataflowPlanNode): """Constrains the time range of the input data set. For example, if the input data set had "sales by date", then this would restrict the data set so that it only @@ -20,7 +20,7 @@ class ConstrainTimeRangeNode(AggregatedMeasuresOutput, BaseOutput): def __init__( # noqa: D107 self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, time_range_constraint: TimeRangeConstraint, ) -> None: self._time_range_constraint = time_range_constraint @@ -59,7 +59,7 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) and self.time_range_constraint == other_node.time_range_constraint - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> ConstrainTimeRangeNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> ConstrainTimeRangeNode: # noqa: D102 assert len(new_parent_nodes) == 1 return ConstrainTimeRangeNode( parent_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/filter_elements.py b/metricflow/dataflow/nodes/filter_elements.py index a20ef5c146..f250d1db52 100644 --- a/metricflow/dataflow/nodes/filter_elements.py +++ b/metricflow/dataflow/nodes/filter_elements.py @@ -8,15 +8,15 @@ from metricflow_semantics.specs.spec_set import InstanceSpecSet from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class FilterElementsNode(BaseOutput): +class FilterElementsNode(DataflowPlanNode): """Only passes the listed elements.""" def __init__( # noqa: D107 self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, include_specs: InstanceSpecSet, replace_description: Optional[str] = None, distinct: bool = False, @@ -63,7 +63,7 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 return tuple(super().displayed_properties) + additional_properties @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 @@ -73,7 +73,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.distinct == self.distinct ) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> FilterElementsNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> FilterElementsNode: # noqa: D102 assert len(new_parent_nodes) == 1 return FilterElementsNode( parent_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/join_conversion_events.py b/metricflow/dataflow/nodes/join_conversion_events.py index fe029a4b05..62474ec8a3 100644 --- a/metricflow/dataflow/nodes/join_conversion_events.py +++ b/metricflow/dataflow/nodes/join_conversion_events.py @@ -14,17 +14,17 @@ ) from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class JoinConversionEventsNode(BaseOutput): +class JoinConversionEventsNode(DataflowPlanNode): """Builds a data set containing successful conversion events.""" def __init__( self, - base_node: BaseOutput, + base_node: DataflowPlanNode, base_time_dimension_spec: TimeDimensionSpec, - conversion_node: BaseOutput, + conversion_node: DataflowPlanNode, conversion_measure_spec: MeasureSpec, conversion_time_dimension_spec: TimeDimensionSpec, unique_identifier_keys: Sequence[InstanceSpec], @@ -133,7 +133,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.constant_properties == self.constant_properties ) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> JoinConversionEventsNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> JoinConversionEventsNode: # noqa: D102 assert len(new_parent_nodes) == 2 return JoinConversionEventsNode( base_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/join_over_time.py b/metricflow/dataflow/nodes/join_over_time.py index 3b2b51f0b2..ce77996364 100644 --- a/metricflow/dataflow/nodes/join_over_time.py +++ b/metricflow/dataflow/nodes/join_over_time.py @@ -10,15 +10,15 @@ from metricflow_semantics.specs.spec_classes import TimeDimensionSpec from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class JoinOverTimeRangeNode(BaseOutput): +class JoinOverTimeRangeNode(DataflowPlanNode): """A node that allows for cumulative metric computation by doing a self join across a cumulative date range.""" def __init__( self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, time_dimension_spec_for_join: TimeDimensionSpec, window: Optional[MetricTimeWindow], grain_to_date: Optional[TimeGranularity], @@ -67,7 +67,7 @@ def description(self) -> str: # noqa: D102 return """Join Self Over Time Range""" @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node @property @@ -86,7 +86,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.time_range_constraint == self.time_range_constraint ) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> JoinOverTimeRangeNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> JoinOverTimeRangeNode: # noqa: D102 assert len(new_parent_nodes) == 1 return JoinOverTimeRangeNode( parent_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/join_to_base.py b/metricflow/dataflow/nodes/join_to_base.py index 870fe3fb38..b30e9006dd 100644 --- a/metricflow/dataflow/nodes/join_to_base.py +++ b/metricflow/dataflow/nodes/join_to_base.py @@ -13,7 +13,7 @@ PartitionDimensionJoinDescription, PartitionTimeDimensionJoinDescription, ) -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor @dataclass(frozen=True) @@ -28,7 +28,7 @@ class ValidityWindowJoinDescription: class JoinDescription: """Describes how data from a node should be joined to data from another node.""" - join_node: BaseOutput + join_node: DataflowPlanNode join_on_entity: Optional[LinklessEntitySpec] join_type: SqlJoinType @@ -42,12 +42,12 @@ def __post_init__(self) -> None: # noqa: D105 raise RuntimeError("`join_on_entity` is required unless using CROSS JOIN.") -class JoinToBaseOutputNode(BaseOutput): +class JoinToBaseOutputNode(DataflowPlanNode): """A node that joins data from other nodes to a standard output node, one by one via entity.""" def __init__( self, - left_node: BaseOutput, + left_node: DataflowPlanNode, join_targets: Sequence[JoinDescription], node_id: Optional[NodeId] = None, ) -> None: @@ -79,7 +79,7 @@ def description(self) -> str: # noqa: D102 return """Join Standard Outputs""" @property - def left_node(self) -> BaseOutput: # noqa: D102 + def left_node(self) -> DataflowPlanNode: # noqa: D102 return self._left_node @property @@ -109,7 +109,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: return False return True - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> JoinToBaseOutputNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> JoinToBaseOutputNode: # noqa: D102 assert len(new_parent_nodes) > 1 new_left_node = new_parent_nodes[0] new_join_nodes = new_parent_nodes[1:] diff --git a/metricflow/dataflow/nodes/join_to_time_spine.py b/metricflow/dataflow/nodes/join_to_time_spine.py index 309320a1d3..0d8a9a298a 100644 --- a/metricflow/dataflow/nodes/join_to_time_spine.py +++ b/metricflow/dataflow/nodes/join_to_time_spine.py @@ -12,15 +12,15 @@ from metricflow_semantics.sql.sql_join_type import SqlJoinType from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class JoinToTimeSpineNode(BaseOutput, ABC): +class JoinToTimeSpineNode(DataflowPlanNode, ABC): """Join parent dataset to time spine dataset.""" def __init__( self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, requested_agg_time_dimension_specs: Sequence[TimeDimensionSpec], use_custom_agg_time_dimension: bool, join_type: SqlJoinType, @@ -110,7 +110,7 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 ) @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 @@ -124,7 +124,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.join_type == self.join_type ) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> JoinToTimeSpineNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> JoinToTimeSpineNode: # noqa: D102 assert len(new_parent_nodes) == 1 return JoinToTimeSpineNode( parent_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/metric_time_transform.py b/metricflow/dataflow/nodes/metric_time_transform.py index 08e46336e3..1dd5389f16 100644 --- a/metricflow/dataflow/nodes/metric_time_transform.py +++ b/metricflow/dataflow/nodes/metric_time_transform.py @@ -7,10 +7,10 @@ from metricflow_semantics.dag.mf_dag import DisplayedProperty from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class MetricTimeDimensionTransformNode(BaseOutput): +class MetricTimeDimensionTransformNode(DataflowPlanNode): """A node transforms the input data set so that it contains the metric time dimension and relevant measures. The metric time dimension is used later to aggregate all measures in the data set. @@ -23,7 +23,7 @@ class MetricTimeDimensionTransformNode(BaseOutput): def __init__( # noqa: D107 self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, aggregation_time_dimension_reference: TimeDimensionReference, ) -> None: self._aggregation_time_dimension_reference = aggregation_time_dimension_reference @@ -53,7 +53,7 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 ) @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 @@ -63,7 +63,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: ) def with_new_parents( # noqa: D102 - self, new_parent_nodes: Sequence[BaseOutput] + self, new_parent_nodes: Sequence[DataflowPlanNode] ) -> MetricTimeDimensionTransformNode: # noqa: D102 assert len(new_parent_nodes) == 1 return MetricTimeDimensionTransformNode( diff --git a/metricflow/dataflow/nodes/min_max.py b/metricflow/dataflow/nodes/min_max.py index 6825175e2a..1f6268b76c 100644 --- a/metricflow/dataflow/nodes/min_max.py +++ b/metricflow/dataflow/nodes/min_max.py @@ -5,13 +5,13 @@ from metricflow_semantics.dag.id_prefix import IdPrefix, StaticIdPrefix from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class MinMaxNode(BaseOutput): +class MinMaxNode(DataflowPlanNode): """Calculate the min and max of a single instance data set.""" - def __init__(self, parent_node: BaseOutput) -> None: # noqa: D107 + def __init__(self, parent_node: DataflowPlanNode) -> None: # noqa: D107 self._parent_node = parent_node super().__init__(node_id=self.create_unique_id(), parent_nodes=(parent_node,)) @@ -27,12 +27,12 @@ def description(self) -> str: # noqa: D102 return "Calculate min and max" @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> MinMaxNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> MinMaxNode: # noqa: D102 assert len(new_parent_nodes) == 1 return MinMaxNode(parent_node=new_parent_nodes[0]) diff --git a/metricflow/dataflow/nodes/order_by_limit.py b/metricflow/dataflow/nodes/order_by_limit.py index be4643e773..7319618954 100644 --- a/metricflow/dataflow/nodes/order_by_limit.py +++ b/metricflow/dataflow/nodes/order_by_limit.py @@ -8,20 +8,18 @@ from metricflow_semantics.visitor import VisitorOutputT from metricflow.dataflow.dataflow_plan import ( - BaseOutput, - ComputedMetricsOutput, DataflowPlanNode, DataflowPlanNodeVisitor, ) -class OrderByLimitNode(ComputedMetricsOutput): +class OrderByLimitNode(DataflowPlanNode): """A node that re-orders the input data with a limit.""" def __init__( self, order_by_specs: Sequence[OrderBySpec], - parent_node: Union[BaseOutput, ComputedMetricsOutput], + parent_node: Union[DataflowPlanNode, DataflowPlanNode], limit: Optional[int] = None, ) -> None: """Constructor. @@ -68,7 +66,7 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 ) @property - def parent_node(self) -> Union[BaseOutput, ComputedMetricsOutput]: # noqa: D102 + def parent_node(self) -> Union[DataflowPlanNode, DataflowPlanNode]: # noqa: D102 return self._parent_node def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 @@ -78,7 +76,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.limit == self.limit ) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> OrderByLimitNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> OrderByLimitNode: # noqa: D102 assert len(new_parent_nodes) == 1 return OrderByLimitNode( diff --git a/metricflow/dataflow/nodes/read_sql_source.py b/metricflow/dataflow/nodes/read_sql_source.py index 8c0ce2cc5d..a8285db5a2 100644 --- a/metricflow/dataflow/nodes/read_sql_source.py +++ b/metricflow/dataflow/nodes/read_sql_source.py @@ -8,11 +8,11 @@ from metricflow_semantics.dag.mf_dag import DisplayedProperty from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor from metricflow.dataset.sql_dataset import SqlDataSet -class ReadSqlSourceNode(BaseOutput): +class ReadSqlSourceNode(DataflowPlanNode): """A source node where data from an SQL table or SQL query is read and output.""" def __init__(self, data_set: SqlDataSet) -> None: @@ -56,6 +56,6 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) and other_node.data_set == self.data_set - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> ReadSqlSourceNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> ReadSqlSourceNode: # noqa: D102 assert len(new_parent_nodes) == 0 return ReadSqlSourceNode(data_set=self.data_set) diff --git a/metricflow/dataflow/nodes/semi_additive_join.py b/metricflow/dataflow/nodes/semi_additive_join.py index 3641e7f813..f68d2e3765 100644 --- a/metricflow/dataflow/nodes/semi_additive_join.py +++ b/metricflow/dataflow/nodes/semi_additive_join.py @@ -8,10 +8,10 @@ from metricflow_semantics.specs.spec_classes import LinklessEntitySpec, TimeDimensionSpec from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class SemiAdditiveJoinNode(BaseOutput): +class SemiAdditiveJoinNode(DataflowPlanNode): """A node that performs a row filter by aggregating a given non-additive dimension. This is designed to filter a dataset down to singular non-additive time dimension values by aggregating @@ -62,7 +62,7 @@ class SemiAdditiveJoinNode(BaseOutput): def __init__( self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, entity_specs: Sequence[LinklessEntitySpec], time_dimension_spec: TimeDimensionSpec, agg_by_function: AggregationType, @@ -99,7 +99,7 @@ def description(self) -> str: # noqa: D102 return f"""Join on {self.agg_by_function.name}({self.time_dimension_spec.element_name}) and {[i.element_name for i in self.entity_specs]} grouping by {self.queried_time_dimension_spec.element_name if self.queried_time_dimension_spec else None}""" @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 return self._parent_node @property @@ -134,7 +134,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: and other_node.queried_time_dimension_spec == self.queried_time_dimension_spec ) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> SemiAdditiveJoinNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> SemiAdditiveJoinNode: # noqa: D102 assert len(new_parent_nodes) == 1 return SemiAdditiveJoinNode( diff --git a/metricflow/dataflow/nodes/where_filter.py b/metricflow/dataflow/nodes/where_filter.py index e76d67bca4..fbed72fa2e 100644 --- a/metricflow/dataflow/nodes/where_filter.py +++ b/metricflow/dataflow/nodes/where_filter.py @@ -7,16 +7,15 @@ from metricflow_semantics.specs.spec_classes import WhereFilterSpec from metricflow_semantics.visitor import VisitorOutputT -from metricflow.dataflow.dataflow_plan import BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor -from metricflow.dataflow.nodes.aggregate_measures import AggregatedMeasuresOutput +from metricflow.dataflow.dataflow_plan import DataflowPlanNode, DataflowPlanNodeVisitor -class WhereConstraintNode(AggregatedMeasuresOutput): +class WhereConstraintNode(DataflowPlanNode): """Remove rows using a WHERE clause.""" def __init__( # noqa: D107 self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, where_constraint: WhereFilterSpec, ) -> None: self._where = where_constraint @@ -48,7 +47,7 @@ def displayed_properties(self) -> Sequence[DisplayedProperty]: # noqa: D102 def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) and other_node.where == self.where - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> WhereConstraintNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> WhereConstraintNode: # noqa: D102 assert len(new_parent_nodes) == 1 return WhereConstraintNode( parent_node=new_parent_nodes[0], diff --git a/metricflow/dataflow/nodes/write_to_dataframe.py b/metricflow/dataflow/nodes/write_to_dataframe.py index f3de8050c4..7585385838 100644 --- a/metricflow/dataflow/nodes/write_to_dataframe.py +++ b/metricflow/dataflow/nodes/write_to_dataframe.py @@ -6,18 +6,15 @@ from metricflow_semantics.visitor import VisitorOutputT from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor, - SinkNodeVisitor, - SinkOutput, ) -class WriteToResultDataframeNode(SinkOutput): +class WriteToResultDataframeNode(DataflowPlanNode): """A node where incoming data gets written to a dataframe.""" - def __init__(self, parent_node: BaseOutput) -> None: # noqa: D107 + def __init__(self, parent_node: DataflowPlanNode) -> None: # noqa: D107 self._parent_node = parent_node super().__init__(node_id=self.create_unique_id(), parent_nodes=(parent_node,)) @@ -33,16 +30,15 @@ def description(self) -> str: # noqa: D102 return """Write to Dataframe""" @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 assert len(self.parent_nodes) == 1 return self._parent_node - def accept_sink_node_visitor(self, visitor: SinkNodeVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D102 - return visitor.visit_write_to_result_dataframe_node(self) - def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> WriteToResultDataframeNode: # noqa: D102 + def with_new_parents( # noqa: D102 + self, new_parent_nodes: Sequence[DataflowPlanNode] + ) -> WriteToResultDataframeNode: assert len(new_parent_nodes) == 1 return WriteToResultDataframeNode(parent_node=new_parent_nodes[0]) diff --git a/metricflow/dataflow/nodes/write_to_table.py b/metricflow/dataflow/nodes/write_to_table.py index 72860e946e..fb5ebd4caf 100644 --- a/metricflow/dataflow/nodes/write_to_table.py +++ b/metricflow/dataflow/nodes/write_to_table.py @@ -6,21 +6,18 @@ from metricflow_semantics.visitor import VisitorOutputT from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor, - SinkNodeVisitor, - SinkOutput, ) from metricflow.sql.sql_table import SqlTable -class WriteToResultTableNode(SinkOutput): +class WriteToResultTableNode(DataflowPlanNode): """A node where incoming data gets written to a table.""" def __init__( self, - parent_node: BaseOutput, + parent_node: DataflowPlanNode, output_sql_table: SqlTable, ) -> None: """Constructor. @@ -45,13 +42,10 @@ def description(self) -> str: # noqa: D102 return """Write to Table""" @property - def parent_node(self) -> BaseOutput: # noqa: D102 + def parent_node(self) -> DataflowPlanNode: # noqa: D102 assert len(self.parent_nodes) == 1 return self._parent_node - def accept_sink_node_visitor(self, visitor: SinkNodeVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D102 - return visitor.visit_write_to_result_table_node(self) - @property def output_sql_table(self) -> SqlTable: # noqa: D102 return self._output_sql_table @@ -59,7 +53,7 @@ def output_sql_table(self) -> SqlTable: # noqa: D102 def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D102 return isinstance(other_node, self.__class__) and other_node.output_sql_table == self.output_sql_table - def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> WriteToResultTableNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> WriteToResultTableNode: # noqa: D102 return WriteToResultTableNode( parent_node=new_parent_nodes[0], output_sql_table=self.output_sql_table, diff --git a/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py b/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py index a5da4de58d..e8f4a26abc 100644 --- a/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py +++ b/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py @@ -7,7 +7,6 @@ from metricflow_semantics.specs.spec_classes import MetricSpec from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor, ) @@ -38,7 +37,7 @@ class ComputeMetricsBranchCombinerResult: # noqa: D101 # Perhaps adding more metadata about how nodes got combined would be useful. # If combined_branch is None, it means combination could not occur. - combined_branch: Optional[BaseOutput] = None + combined_branch: Optional[DataflowPlanNode] = None @property def combined(self) -> bool: @@ -46,7 +45,7 @@ def combined(self) -> bool: return self.combined_branch is not None @property - def checked_combined_branch(self) -> BaseOutput: # noqa: D102 + def checked_combined_branch(self) -> DataflowPlanNode: # noqa: D102 assert self.combined_branch is not None return self.combined_branch @@ -126,7 +125,7 @@ class ComputeMetricsBranchCombiner(DataflowPlanNodeVisitor[ComputeMetricsBranchC is propagated up to the result at the root node. """ - def __init__(self, left_branch_node: BaseOutput) -> None: # noqa: D107 + def __init__(self, left_branch_node: DataflowPlanNode) -> None: # noqa: D107 self._current_left_node: DataflowPlanNode = left_branch_node self._log_level = logging.DEBUG @@ -156,7 +155,7 @@ def _log_combine_success( msg=f"Combined left_node={left_node} right_node={right_node} combined_node: {combined_node}", ) - def _combine_parent_branches(self, current_right_node: BaseOutput) -> Optional[Sequence[BaseOutput]]: + def _combine_parent_branches(self, current_right_node: DataflowPlanNode) -> Optional[Sequence[DataflowPlanNode]]: if len(self._current_left_node.parent_nodes) != len(current_right_node.parent_nodes): self._log_combine_failure( left_node=self._current_left_node, @@ -173,7 +172,7 @@ def _combine_parent_branches(self, current_right_node: BaseOutput) -> Optional[S results_of_visiting_parent_nodes.append(right_node_parent_node.accept(self)) self._current_left_node = left_position_before_recursion - combined_parents: List[BaseOutput] = [] + combined_parents: List[DataflowPlanNode] = [] for result in results_of_visiting_parent_nodes: if result.combined_branch is None: self._log_combine_failure( @@ -186,7 +185,7 @@ def _combine_parent_branches(self, current_right_node: BaseOutput) -> Optional[S return combined_parents - def _default_handler(self, current_right_node: BaseOutput) -> ComputeMetricsBranchCombinerResult: + def _default_handler(self, current_right_node: DataflowPlanNode) -> ComputeMetricsBranchCombinerResult: combined_parent_nodes = self._combine_parent_branches(current_right_node) if combined_parent_nodes is None: return ComputeMetricsBranchCombinerResult() diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index fbeaf4341b..aa13773ee0 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -8,11 +8,9 @@ from metricflow_semantics.dag.mf_dag import DagId from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlan, DataflowPlanNode, DataflowPlanNodeVisitor, - SinkOutput, ) from metricflow.dataflow.nodes.add_generated_uuid import AddGeneratedUuidColumnNode from metricflow.dataflow.nodes.aggregate_measures import AggregateMeasuresNode @@ -43,17 +41,17 @@ @dataclass(frozen=True) class OptimizeBranchResult: # noqa: D101 - base_output_node: Optional[BaseOutput] = None - sink_node: Optional[SinkOutput] = None + base_output_node: Optional[DataflowPlanNode] = None + sink_node: Optional[DataflowPlanNode] = None @property - def checked_base_output(self) -> BaseOutput: # noqa: D102 - assert self.base_output_node, f"Expected the result of traversal to produce a {BaseOutput}" + def checked_base_output(self) -> DataflowPlanNode: # noqa: D102 + assert self.base_output_node, f"Expected the result of traversal to produce a {DataflowPlanNode}" return self.base_output_node @property - def checked_sink_node(self) -> SinkOutput: # noqa: D102 - assert self.sink_node, f"Expected the result of traversal to produce a {SinkOutput}" + def checked_sink_node(self) -> DataflowPlanNode: # noqa: D102 + assert self.sink_node, f"Expected the result of traversal to produce a {DataflowPlanNode}" return self.sink_node @@ -61,9 +59,9 @@ def checked_sink_node(self) -> SinkOutput: # noqa: D102 class BranchCombinationResult: """Holds the results of combining a branch (right_branch) with one of the branches in a list (left_branch).""" - left_branch: BaseOutput - right_branch: BaseOutput - combined_branch: Optional[BaseOutput] = None + left_branch: DataflowPlanNode + right_branch: DataflowPlanNode + combined_branch: Optional[DataflowPlanNode] = None class SourceScanOptimizer( @@ -128,24 +126,24 @@ def _log_visit_node_type(self, node: DataflowPlanNode) -> None: def _default_base_output_handler( self, - node: BaseOutput, + node: DataflowPlanNode, ) -> OptimizeBranchResult: optimized_parents: Sequence[OptimizeBranchResult] = tuple( parent_node.accept(self) for parent_node in node.parent_nodes ) - # Parents should always be BaseOutput + # Parents should always be DataflowPlanNode return OptimizeBranchResult( base_output_node=node.with_new_parents(tuple(x.checked_base_output for x in optimized_parents)) ) def _default_sink_node_handler( self, - node: SinkOutput, + node: DataflowPlanNode, ) -> OptimizeBranchResult: optimized_parents: Sequence[OptimizeBranchResult] = tuple( parent_node.accept(self) for parent_node in node.parent_nodes ) - # Parents should always be BaseOutput + # Parents should always be DataflowPlanNode return OptimizeBranchResult( sink_node=node.with_new_parents(tuple(x.checked_base_output for x in optimized_parents)) ) @@ -202,7 +200,7 @@ def visit_filter_elements_node(self, node: FilterElementsNode) -> OptimizeBranch @staticmethod def _combine_branches( - left_branches: Sequence[BaseOutput], right_branch: BaseOutput + left_branches: Sequence[DataflowPlanNode], right_branch: DataflowPlanNode ) -> Sequence[BranchCombinationResult]: """Combine the right branch with one of the left branches. @@ -253,18 +251,18 @@ def visit_combine_aggregated_outputs_node( # noqa: D102 assert result.sink_node is None, ( f"Traversing the parents of of {node.__class__.__name__} should not have produced any " - f"{SinkOutput.__class__.__name__} nodes" + f"{DataflowPlanNode.__class__.__name__} nodes" ) assert ( result.base_output_node is not None - ), f"Traversing the parents of a CombineAggregatedOutputsNode should always produce a BaseOutput. Got: {result}" + ), f"Traversing the parents of a CombineAggregatedOutputsNode should always produce a DataflowPlanNode. Got: {result}" optimized_parent_branches.append(result.base_output_node) # Try to combine (using ComputeMetricsBranchCombiner) as many parent branches as possible in a # greedy N^2 approach. The optimality of this approach needs more thought to prove conclusively, but given # the seemingly transitive properties of the combination operation, this seems reasonable. - combined_parent_branches: List[BaseOutput] = [] + combined_parent_branches: List[DataflowPlanNode] = [] for optimized_parent_branch in optimized_parent_branches: combination_results = SourceScanOptimizer._combine_branches( left_branches=combined_parent_branches, right_branch=optimized_parent_branch diff --git a/metricflow/execution/dataflow_to_execution.py b/metricflow/execution/dataflow_to_execution.py index 1773e0ce39..1f079a566d 100644 --- a/metricflow/execution/dataflow_to_execution.py +++ b/metricflow/execution/dataflow_to_execution.py @@ -7,8 +7,24 @@ from metricflow.dataflow.dataflow_plan import ( DataflowPlan, DataflowPlanNode, - SinkNodeVisitor, + DataflowPlanNodeVisitor, ) +from metricflow.dataflow.nodes.add_generated_uuid import AddGeneratedUuidColumnNode +from metricflow.dataflow.nodes.aggregate_measures import AggregateMeasuresNode +from metricflow.dataflow.nodes.combine_aggregated_outputs import CombineAggregatedOutputsNode +from metricflow.dataflow.nodes.compute_metrics import ComputeMetricsNode +from metricflow.dataflow.nodes.constrain_time import ConstrainTimeRangeNode +from metricflow.dataflow.nodes.filter_elements import FilterElementsNode +from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode +from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode +from metricflow.dataflow.nodes.join_to_base import JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode +from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode +from metricflow.dataflow.nodes.min_max import MinMaxNode +from metricflow.dataflow.nodes.order_by_limit import OrderByLimitNode +from metricflow.dataflow.nodes.read_sql_source import ReadSqlSourceNode +from metricflow.dataflow.nodes.semi_additive_join import SemiAdditiveJoinNode +from metricflow.dataflow.nodes.where_filter import WhereConstraintNode from metricflow.dataflow.nodes.write_to_dataframe import WriteToResultDataframeNode from metricflow.dataflow.nodes.write_to_table import WriteToResultTableNode from metricflow.execution.convert_to_execution_plan import ConvertToExecutionPlanResult @@ -25,7 +41,7 @@ logger = logging.getLogger(__name__) -class DataflowToExecutionPlanConverter(SinkNodeVisitor[ConvertToExecutionPlanResult]): +class DataflowToExecutionPlanConverter(DataflowPlanNodeVisitor[ConvertToExecutionPlanResult]): """Converts a dataflow plan to an execution plan.""" def __init__( @@ -99,4 +115,70 @@ def visit_write_to_result_table_node(self, node: WriteToResultTableNode) -> Conv def convert_to_execution_plan(self, dataflow_plan: DataflowPlan) -> ConvertToExecutionPlanResult: """Convert the dataflow plan to an execution plan.""" assert len(dataflow_plan.sink_output_nodes) == 1, "Only 1 sink node in the plan is currently supported." - return dataflow_plan.sink_output_nodes[0].accept_sink_node_visitor(self) + return dataflow_plan.sink_output_nodes[0].accept(self) + + @override + def visit_source_node(self, node: ReadSqlSourceNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_aggregate_measures_node(self, node: AggregateMeasuresNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_compute_metrics_node(self, node: ComputeMetricsNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_order_by_limit_node(self, node: OrderByLimitNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_where_constraint_node(self, node: WhereConstraintNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_filter_elements_node(self, node: FilterElementsNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_combine_aggregated_outputs_node(self, node: CombineAggregatedOutputsNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_constrain_time_range_node(self, node: ConstrainTimeRangeNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_semi_additive_join_node(self, node: SemiAdditiveJoinNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_metric_time_dimension_transform_node( + self, node: MetricTimeDimensionTransformNode + ) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_min_max_node(self, node: MinMaxNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_add_generated_uuid_column_node(self, node: AddGeneratedUuidColumnNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError + + @override + def visit_join_conversion_events_node(self, node: JoinConversionEventsNode) -> ConvertToExecutionPlanResult: + raise NotImplementedError diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index a290b487f6..77b5bc1cd4 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -42,7 +42,6 @@ from metricflow_semantics.time.time_constants import ISO8601_PYTHON_FORMAT from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlanNode, DataflowPlanNodeVisitor, ) @@ -442,7 +441,7 @@ def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> SqlDataS for join_description in node.join_targets: join_on_entity = join_description.join_on_entity - right_node_to_join: BaseOutput = join_description.join_node + right_node_to_join: DataflowPlanNode = join_description.join_node right_data_set: SqlDataSet = right_node_to_join.accept(self) right_data_set_alias = self._next_unique_table_alias() diff --git a/metricflow/plan_conversion/node_processor.py b/metricflow/plan_conversion/node_processor.py index 3133b9db31..5ad52973c9 100644 --- a/metricflow/plan_conversion/node_processor.py +++ b/metricflow/plan_conversion/node_processor.py @@ -17,7 +17,7 @@ from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver from metricflow.dataflow.builder.partitions import PartitionJoinResolver from metricflow.dataflow.dataflow_plan import ( - BaseOutput, + DataflowPlanNode, ) from metricflow.dataflow.nodes.constrain_time import ConstrainTimeRangeNode from metricflow.dataflow.nodes.filter_elements import FilterElementsNode @@ -43,8 +43,8 @@ class MultiHopJoinCandidateLineage: to get the country dimension. """ - first_node_to_join: BaseOutput - second_node_to_join: BaseOutput + first_node_to_join: DataflowPlanNode + second_node_to_join: DataflowPlanNode join_second_node_by_entity: LinklessEntitySpec @@ -55,7 +55,7 @@ class MultiHopJoinCandidate: Also see MultiHopJoinCandidateLineage. """ - node_with_multi_hop_elements: BaseOutput + node_with_multi_hop_elements: DataflowPlanNode lineage: MultiHopJoinCandidateLineage @@ -90,12 +90,12 @@ def __init__( # noqa: D107 def add_time_range_constraint( self, - source_nodes: Sequence[BaseOutput], + source_nodes: Sequence[DataflowPlanNode], metric_time_dimension_reference: TimeDimensionReference, time_range_constraint: Optional[TimeRangeConstraint] = None, - ) -> Sequence[BaseOutput]: + ) -> Sequence[DataflowPlanNode]: """Adds a time range constraint node to the input nodes.""" - processed_nodes: List[BaseOutput] = [] + processed_nodes: List[DataflowPlanNode] = [] for source_node in source_nodes: # Constrain the time range if specified. if time_range_constraint: @@ -120,7 +120,7 @@ def add_time_range_constraint( def _node_contains_entity( self, - node: BaseOutput, + node: DataflowPlanNode, entity_reference: EntityReference, ) -> bool: """Returns true if the output of the node contains an entity of the given types.""" @@ -149,7 +149,7 @@ def _node_contains_entity( return False def _get_candidates_nodes_for_multi_hop( - self, desired_linkable_spec: LinkableInstanceSpec, nodes: Sequence[BaseOutput], join_type: SqlJoinType + self, desired_linkable_spec: LinkableInstanceSpec, nodes: Sequence[DataflowPlanNode], join_type: SqlJoinType ) -> Sequence[MultiHopJoinCandidate]: """Assemble nodes representing all possible one-hop joins.""" if len(desired_linkable_spec.entity_links) > MAX_JOIN_HOPS: @@ -282,9 +282,9 @@ def _get_candidates_nodes_for_multi_hop( def add_multi_hop_joins( self, desired_linkable_specs: Sequence[LinkableInstanceSpec], - nodes: Sequence[BaseOutput], + nodes: Sequence[DataflowPlanNode], join_type: SqlJoinType, - ) -> Sequence[BaseOutput]: + ) -> Sequence[DataflowPlanNode]: """Assemble nodes representing all possible one-hop joins.""" all_multi_hop_join_candidates: List[MultiHopJoinCandidate] = [] lineage_for_all_multi_hop_join_candidates: Set[MultiHopJoinCandidateLineage] = set() @@ -303,10 +303,10 @@ def add_multi_hop_joins( def remove_unnecessary_nodes( self, desired_linkable_specs: Sequence[LinkableInstanceSpec], - nodes: Sequence[BaseOutput], + nodes: Sequence[DataflowPlanNode], metric_time_dimension_reference: TimeDimensionReference, time_spine_node: MetricTimeDimensionTransformNode, - ) -> List[BaseOutput]: + ) -> List[DataflowPlanNode]: """Filters out many of the nodes that can't possibly be useful for joins to obtain the desired linkable specs. A simple filter is to remove any nodes that don't share a common element with the query. Having a common element diff --git a/metricflow/validation/data_warehouse_model_validator.py b/metricflow/validation/data_warehouse_model_validator.py index 99a35829c2..2e1833b67e 100644 --- a/metricflow/validation/data_warehouse_model_validator.py +++ b/metricflow/validation/data_warehouse_model_validator.py @@ -36,7 +36,7 @@ from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver from metricflow.dataflow.builder.source_node import SourceNodeBuilder -from metricflow.dataflow.dataflow_plan import BaseOutput +from metricflow.dataflow.dataflow_plan import DataflowPlanNode from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataset.convert_semantic_model import SemanticModelToDataSetConverter from metricflow.dataset.dataset_classes import DataSet @@ -104,7 +104,9 @@ def _remove_entity_link_specs(specs: Tuple[LinkableInstanceSpecT, ...]) -> Tuple return tuple(spec for spec in specs if not spec.entity_links) @staticmethod - def _semantic_model_nodes(render_tools: QueryRenderingTools, semantic_model: SemanticModel) -> Sequence[BaseOutput]: + def _semantic_model_nodes( + render_tools: QueryRenderingTools, semantic_model: SemanticModel + ) -> Sequence[DataflowPlanNode]: """Builds and returns the SemanticModelDataSet node for the given semantic model.""" fetched_semantic_model = render_tools.semantic_manifest_lookup.semantic_model_lookup.get_by_reference( SemanticModelReference(semantic_model_name=semantic_model.name) @@ -347,7 +349,7 @@ def gen_measure_tasks(cls, manifest: SemanticManifest, sql_client: SqlClient) -> dataset = render_tools.converter.create_sql_source_data_set(semantic_model) semantic_model_specs = dataset.instance_set.spec_set.measure_specs - source_node_by_measure_spec: Dict[MeasureSpec, BaseOutput] = {} + source_node_by_measure_spec: Dict[MeasureSpec, DataflowPlanNode] = {} measure_specs_source_node_pair = [] for source_node in source_nodes: measure_specs = render_tools.node_resolver.get_output_data_set( @@ -357,7 +359,7 @@ def gen_measure_tasks(cls, manifest: SemanticManifest, sql_client: SqlClient) -> measure_specs_source_node_pair.append((measure_specs, source_node)) source_node_to_sub_task: DefaultDict[ - BaseOutput, List[DataWarehouseValidationTask] + DataflowPlanNode, List[DataWarehouseValidationTask] ] = collections.defaultdict(list) for spec in semantic_model_specs: obtained_source_node = source_node_by_measure_spec.get(spec) diff --git a/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py b/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py index 88f959e50c..5313d58297 100644 --- a/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py +++ b/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py @@ -12,8 +12,8 @@ from metricflow_semantics.test_helpers.snapshot_helpers import assert_plan_snapshot_text_equal from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlan, + DataflowPlanNode, ) from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.write_to_dataframe import WriteToResultDataframeNode @@ -25,7 +25,7 @@ from tests_metricflow.fixtures.manifest_fixtures import MetricFlowEngineTestFixture, SemanticManifestSetup -def make_dataflow_plan(node: BaseOutput) -> DataflowPlan: # noqa: D103 +def make_dataflow_plan(node: DataflowPlanNode) -> DataflowPlan: # noqa: D103 return DataflowPlan( sink_output_nodes=[WriteToResultDataframeNode(node)], plan_id=DagId.from_id_prefix(StaticIdPrefix.OPTIMIZED_DATAFLOW_PLAN_PREFIX), diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py index 2531bcd295..6d6a23ff00 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py @@ -45,7 +45,7 @@ def test_conversion_rate( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -80,7 +80,7 @@ def test_conversion_rate_with_window( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -106,7 +106,7 @@ def test_conversion_rate_with_no_group_by( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -132,7 +132,7 @@ def test_conversion_count_with_no_group_by( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -166,7 +166,7 @@ def test_conversion_rate_with_constant_properties( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -195,5 +195,5 @@ def test_conversion_metric_join_to_timespine_and_fill_nulls_with_0( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py index 1da3d464aa..c846f25fde 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py @@ -38,7 +38,7 @@ def test_dimensions_requiring_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -66,5 +66,5 @@ def test_dimension_values_with_a_join_and_a_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py index 065e19eb02..e71ccbdb9b 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py @@ -93,5 +93,5 @@ def test_simple_query_with_metric_time_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node.parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py index e0c75aa5bd..21482d8bac 100644 --- a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py +++ b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py @@ -36,8 +36,8 @@ from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder from metricflow.dataflow.dataflow_plan import ( - BaseOutput, DataflowPlan, + DataflowPlanNode, ) from metricflow.dataflow.nodes.aggregate_measures import AggregateMeasuresNode from metricflow.dataflow.nodes.combine_aggregated_outputs import CombineAggregatedOutputsNode @@ -64,7 +64,7 @@ def convert_and_check( mf_test_configuration: MetricFlowTestConfiguration, dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, sql_client: SqlClient, - node: BaseOutput, + node: DataflowPlanNode, ) -> None: """Convert the dataflow plan to SQL and compare with snapshots.""" # Generate plans w/o optimizers @@ -1101,7 +1101,7 @@ def test_compute_metrics_node_ratio_from_multiple_semantic_models( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -1187,7 +1187,7 @@ def test_dimensions_requiring_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -1213,5 +1213,5 @@ def test_dimension_with_joined_where_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/compare_rendered_query.py b/tests_metricflow/query_rendering/compare_rendered_query.py index 3647ab12fc..861a7b8271 100644 --- a/tests_metricflow/query_rendering/compare_rendered_query.py +++ b/tests_metricflow/query_rendering/compare_rendered_query.py @@ -6,7 +6,7 @@ from metricflow_semantics.dag.mf_dag import DagId from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration -from metricflow.dataflow.dataflow_plan import BaseOutput +from metricflow.dataflow.dataflow_plan import DataflowPlanNode from metricflow.plan_conversion.dataflow_to_sql import DataflowToSqlQueryPlanConverter from metricflow.protocols.sql_client import SqlClient from metricflow.sql.optimizer.optimization_levels import SqlQueryOptimizationLevel @@ -19,9 +19,9 @@ def convert_and_check( mf_test_configuration: MetricFlowTestConfiguration, dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, sql_client: SqlClient, - node: BaseOutput, + node: DataflowPlanNode, ) -> None: - """Renders an engine-specific query output from a BaseOutput DataFlowPlan node. + """Renders an engine-specific query output from a DataflowPlanNode DataFlowPlan node. TODO: refine interface once file move operations are complete. """ diff --git a/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py b/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py index e44a0f878d..24b4f7fd29 100644 --- a/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py +++ b/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py @@ -54,7 +54,7 @@ def test_cumulative_metric( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -95,7 +95,7 @@ def test_cumulative_metric_with_time_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -133,7 +133,7 @@ def test_cumulative_metric_with_non_adjustable_time_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -160,7 +160,7 @@ def test_cumulative_metric_no_ds( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -193,7 +193,7 @@ def test_cumulative_metric_no_window( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -223,7 +223,7 @@ def test_cumulative_metric_no_window_with_time_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -256,7 +256,7 @@ def test_cumulative_metric_grain_to_date( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -286,7 +286,7 @@ def test_cumulative_metric_month( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=extended_date_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -315,5 +315,5 @@ def test_cumulative_metric_with_agg_time_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/test_derived_metric_rendering.py b/tests_metricflow/query_rendering/test_derived_metric_rendering.py index 65939fa56f..0b29902b02 100644 --- a/tests_metricflow/query_rendering/test_derived_metric_rendering.py +++ b/tests_metricflow/query_rendering/test_derived_metric_rendering.py @@ -51,7 +51,7 @@ def test_derived_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -75,7 +75,7 @@ def test_nested_derived_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -99,7 +99,7 @@ def test_derived_metric_with_offset_window( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -130,7 +130,7 @@ def test_derived_metric_with_offset_window_and_time_filter( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -154,7 +154,7 @@ def test_derived_metric_with_offset_to_grain( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -178,7 +178,7 @@ def test_derived_metric_with_offset_window_and_offset_to_grain( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -202,7 +202,7 @@ def test_derived_offset_metric_with_one_input_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -226,7 +226,7 @@ def test_derived_metric_with_offset_window_and_granularity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -250,7 +250,7 @@ def test_derived_metric_with_month_dimension_and_offset_window( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=extended_date_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -274,7 +274,7 @@ def test_derived_metric_with_offset_to_grain_and_granularity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -298,7 +298,7 @@ def test_derived_metric_with_offset_window_and_offset_to_grain_and_granularity( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -322,7 +322,7 @@ def test_derived_offset_cumulative_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -347,7 +347,7 @@ def test_nested_offsets( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -372,7 +372,7 @@ def test_nested_derived_metric_with_offset_multiple_input_metrics( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -404,7 +404,7 @@ def test_nested_offsets_with_where_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -432,7 +432,7 @@ def test_nested_offsets_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -459,7 +459,7 @@ def test_time_offset_metric_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -482,7 +482,7 @@ def test_nested_filters( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -510,7 +510,7 @@ def test_cumulative_time_offset_metric_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -540,7 +540,7 @@ def test_nested_derived_metric_offset_with_joined_where_constraint_not_selected( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -566,7 +566,7 @@ def test_offset_window_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -592,7 +592,7 @@ def test_offset_to_grain_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -618,7 +618,7 @@ def test_derived_offset_metric_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -646,7 +646,7 @@ def test_multi_metric_fill_null( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -671,7 +671,7 @@ def test_nested_fill_nulls_without_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -699,7 +699,7 @@ def test_nested_fill_nulls_without_time_spine_multi_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -725,7 +725,7 @@ def test_offset_window_metric_multiple_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -751,7 +751,7 @@ def test_offset_to_grain_metric_multiple_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -780,7 +780,7 @@ def test_offset_window_metric_filter_and_query_have_different_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -809,5 +809,5 @@ def test_offset_to_grain_metric_filter_and_query_have_different_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py index 5490338441..685e12e695 100644 --- a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py +++ b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py @@ -47,7 +47,7 @@ def test_simple_fill_nulls_with_0_metric_time( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -71,7 +71,7 @@ def test_simple_fill_nulls_with_0_month( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -97,7 +97,7 @@ def test_simple_fill_nulls_with_0_with_non_metric_time( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -121,7 +121,7 @@ def test_simple_fill_nulls_with_0_with_categorical_dimension( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -145,7 +145,7 @@ def test_simple_fill_nulls_without_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -169,7 +169,7 @@ def test_cumulative_fill_nulls( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -193,7 +193,7 @@ def test_derived_fill_nulls_for_one_input_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -222,5 +222,5 @@ def test_join_to_time_spine_with_filters( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py b/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py index b40622a7b0..25fab161cc 100644 --- a/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py +++ b/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py @@ -45,7 +45,7 @@ def test_simple_query_with_date_part( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -76,7 +76,7 @@ def test_simple_query_with_multiple_date_parts( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -102,5 +102,5 @@ def test_offset_window_with_date_part( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/test_metric_filter_rendering.py b/tests_metricflow/query_rendering/test_metric_filter_rendering.py index b4e3940d24..248aec2566 100644 --- a/tests_metricflow/query_rendering/test_metric_filter_rendering.py +++ b/tests_metricflow/query_rendering/test_metric_filter_rendering.py @@ -35,7 +35,7 @@ def test_query_with_simple_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -60,7 +60,7 @@ def test_metric_with_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -87,7 +87,7 @@ def test_query_with_derived_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -114,7 +114,7 @@ def test_query_with_ratio_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -144,7 +144,7 @@ def test_query_with_cumulative_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -171,7 +171,7 @@ def test_query_with_multiple_metrics_in_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -198,7 +198,7 @@ def test_filter_by_metric_in_same_semantic_model_as_queried_metric( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -225,7 +225,7 @@ def test_distinct_values_query_with_metric_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -252,7 +252,7 @@ def test_metric_filtered_by_itself( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -278,7 +278,7 @@ def test_group_by_has_local_entity_prefix( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -304,5 +304,5 @@ def test_filter_with_conversion_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/test_metric_time_without_metrics.py b/tests_metricflow/query_rendering/test_metric_time_without_metrics.py index e28988d435..e6ea451682 100644 --- a/tests_metricflow/query_rendering/test_metric_time_without_metrics.py +++ b/tests_metricflow/query_rendering/test_metric_time_without_metrics.py @@ -38,7 +38,7 @@ def test_metric_time_only( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -65,7 +65,7 @@ def test_metric_time_quarter_alone( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -92,7 +92,7 @@ def test_metric_time_with_other_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -122,5 +122,5 @@ def test_dimensions_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/test_query_rendering.py b/tests_metricflow/query_rendering/test_query_rendering.py index 927ad92e07..20f1fa3a71 100644 --- a/tests_metricflow/query_rendering/test_query_rendering.py +++ b/tests_metricflow/query_rendering/test_query_rendering.py @@ -63,7 +63,7 @@ def test_multihop_node( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=multihop_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -91,7 +91,7 @@ def test_filter_with_where_constraint_on_join_dim( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -121,7 +121,7 @@ def test_partitioned_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -152,7 +152,7 @@ def test_limit_rows( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -182,7 +182,7 @@ def test_distinct_values( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -211,7 +211,7 @@ def test_local_dimension_using_local_entity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -235,7 +235,7 @@ def test_measure_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -259,7 +259,7 @@ def test_measure_constraint_with_reused_measure( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -284,7 +284,7 @@ def test_measure_constraint_with_single_expr_and_alias( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -313,7 +313,7 @@ def test_join_to_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -339,7 +339,7 @@ def test_multi_hop_through_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -365,7 +365,7 @@ def test_multi_hop_to_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -391,7 +391,7 @@ def test_multiple_metrics_no_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -414,7 +414,7 @@ def test_metric_with_measures_from_multiple_sources_no_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -438,7 +438,7 @@ def test_common_semantic_model( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -468,7 +468,7 @@ def test_min_max_only_categorical( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -499,7 +499,7 @@ def test_min_max_only_time( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -530,7 +530,7 @@ def test_min_max_only_time_quarter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -555,7 +555,7 @@ def test_min_max_metric_time( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) @@ -580,5 +580,5 @@ def test_min_max_metric_time_week( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) diff --git a/tests_metricflow/query_rendering/test_time_spine_join_rendering.py b/tests_metricflow/query_rendering/test_time_spine_join_rendering.py index 774e0095d8..9ec92c3e73 100644 --- a/tests_metricflow/query_rendering/test_time_spine_join_rendering.py +++ b/tests_metricflow/query_rendering/test_time_spine_join_rendering.py @@ -44,5 +44,5 @@ def test_simple_join_to_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_nodes[0].parent_node, + node=dataflow_plan.sink_output_node, ) From f9462807abfce6a254d5a7edd6671e741e082a5c Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Wed, 15 May 2024 17:07:55 -0700 Subject: [PATCH 2/8] Rename `JoinToBaseOutputNode`. --- metricflow/dataflow/builder/dataflow_plan_builder.py | 8 ++++---- metricflow/dataflow/builder/node_data_set.py | 4 ++-- metricflow/dataflow/dataflow_plan.py | 4 ++-- metricflow/dataflow/nodes/join_to_base.py | 8 ++++---- .../optimizer/source_scan/cm_branch_combiner.py | 4 ++-- .../optimizer/source_scan/source_scan_optimizer.py | 4 ++-- metricflow/execution/dataflow_to_execution.py | 4 ++-- metricflow/plan_conversion/dataflow_to_sql.py | 4 ++-- metricflow/plan_conversion/node_processor.py | 4 ++-- .../dataflow/builder/test_node_data_set.py | 4 ++-- .../source_scan/test_source_scan_optimizer.py | 4 ++-- .../plan_conversion/test_dataflow_to_sql_plan.py | 12 ++++++------ 12 files changed, 32 insertions(+), 32 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index dc05e1760a..91f6f81335 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -72,7 +72,7 @@ from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode -from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.min_max import MinMaxNode from metricflow.dataflow.nodes.order_by_limit import OrderByLimitNode @@ -301,7 +301,7 @@ def _build_aggregated_conversion_node( # Build the unaggregated base measure node for computing conversions unaggregated_base_measure_node = base_measure_recipe.source_node if base_measure_recipe.join_targets: - unaggregated_base_measure_node = JoinToBaseOutputNode( + unaggregated_base_measure_node = JoinOnEntitiesNode( left_node=unaggregated_base_measure_node, join_targets=base_measure_recipe.join_targets ) filtered_unaggregated_base_node = FilterElementsNode( @@ -660,7 +660,7 @@ def _build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Da output_node = dataflow_recipe.source_node if dataflow_recipe.join_targets: - output_node = JoinToBaseOutputNode(left_node=output_node, join_targets=dataflow_recipe.join_targets) + output_node = JoinOnEntitiesNode(left_node=output_node, join_targets=dataflow_recipe.join_targets) if len(query_level_filter_specs) > 0: output_node = WhereConstraintNode( @@ -1360,7 +1360,7 @@ def _build_aggregated_measure_from_measure_source_node( join_targets = measure_recipe.join_targets unaggregated_measure_node: DataflowPlanNode if len(join_targets) > 0: - filtered_measures_with_joined_elements = JoinToBaseOutputNode( + filtered_measures_with_joined_elements = JoinOnEntitiesNode( left_node=filtered_measure_source_node, join_targets=join_targets, ) diff --git a/metricflow/dataflow/builder/node_data_set.py b/metricflow/dataflow/builder/node_data_set.py index 46e7d80921..ff5cf26677 100644 --- a/metricflow/dataflow/builder/node_data_set.py +++ b/metricflow/dataflow/builder/node_data_set.py @@ -41,13 +41,13 @@ class DataflowPlanNodeOutputDataSetResolver(DataflowToSqlQueryPlanConverter): generate a set of nodes that already include the multi-hop dimensions available, the join resolution logic becomes much simpler. For example, a node like: - + - + would have the dimension user_id__device_id__platform available, so to NodeEvaluatorForLinkableInstances, it's the same problem as doing a single-hop join. This simplifies the join resolution logic, though now the input diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index ef1c54edc3..4fa70a36e7 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -21,7 +21,7 @@ from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode - from metricflow.dataflow.nodes.join_to_base import JoinToBaseOutputNode + from metricflow.dataflow.nodes.join_to_base import JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.min_max import MinMaxNode @@ -105,7 +105,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> VisitorOutputT: # noqa: pass @abstractmethod - def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> VisitorOutputT: # noqa: D102 + def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> VisitorOutputT: # noqa: D102 pass @abstractmethod diff --git a/metricflow/dataflow/nodes/join_to_base.py b/metricflow/dataflow/nodes/join_to_base.py index b30e9006dd..af059beb33 100644 --- a/metricflow/dataflow/nodes/join_to_base.py +++ b/metricflow/dataflow/nodes/join_to_base.py @@ -42,8 +42,8 @@ def __post_init__(self) -> None: # noqa: D105 raise RuntimeError("`join_on_entity` is required unless using CROSS JOIN.") -class JoinToBaseOutputNode(DataflowPlanNode): - """A node that joins data from other nodes to a standard output node, one by one via entity.""" +class JoinOnEntitiesNode(DataflowPlanNode): + """A node that joins data from other nodes via the entities in the inputs.""" def __init__( self, @@ -109,13 +109,13 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: return False return True - def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> JoinToBaseOutputNode: # noqa: D102 + def with_new_parents(self, new_parent_nodes: Sequence[DataflowPlanNode]) -> JoinOnEntitiesNode: # noqa: D102 assert len(new_parent_nodes) > 1 new_left_node = new_parent_nodes[0] new_join_nodes = new_parent_nodes[1:] assert len(new_join_nodes) == len(self._join_targets) - return JoinToBaseOutputNode( + return JoinOnEntitiesNode( left_node=new_left_node, join_targets=[ JoinDescription( diff --git a/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py b/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py index e8f4a26abc..61dc101d7e 100644 --- a/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py +++ b/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py @@ -18,7 +18,7 @@ from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode -from metricflow.dataflow.nodes.join_to_base import JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.min_max import MinMaxNode @@ -213,7 +213,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> ComputeMetricsBranchComb return self._default_handler(node) def visit_join_to_base_output_node( # noqa: D102 - self, node: JoinToBaseOutputNode + self, node: JoinOnEntitiesNode ) -> ComputeMetricsBranchCombinerResult: # noqa: D102 self._log_visit_node_type(node) return self._default_handler(node) diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index aa13773ee0..d2c2cadd8c 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -20,7 +20,7 @@ from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode -from metricflow.dataflow.nodes.join_to_base import JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.min_max import MinMaxNode @@ -152,7 +152,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> OptimizeBranchResult: # self._log_visit_node_type(node) return self._default_base_output_handler(node) - def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> OptimizeBranchResult: # noqa: D102 + def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> OptimizeBranchResult: # noqa: D102 self._log_visit_node_type(node) return self._default_base_output_handler(node) diff --git a/metricflow/execution/dataflow_to_execution.py b/metricflow/execution/dataflow_to_execution.py index 1f079a566d..df12c65594 100644 --- a/metricflow/execution/dataflow_to_execution.py +++ b/metricflow/execution/dataflow_to_execution.py @@ -17,7 +17,7 @@ from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode -from metricflow.dataflow.nodes.join_to_base import JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.min_max import MinMaxNode @@ -122,7 +122,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> ConvertToExecutionPlanRe raise NotImplementedError @override - def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> ConvertToExecutionPlanResult: + def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> ConvertToExecutionPlanResult: raise NotImplementedError @override diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 77b5bc1cd4..4367b66e44 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -53,7 +53,7 @@ from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode -from metricflow.dataflow.nodes.join_to_base import JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.min_max import MinMaxNode @@ -419,7 +419,7 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat ), ) - def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> SqlDataSet: + def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> SqlDataSet: """Generates the query that realizes the behavior of the JoinToStandardOutputNode.""" # Keep a mapping between the table aliases that would be used in the query and the MDO instances in that source. # e.g. when building "FROM from_table a JOIN right_table b", the value for key "a" would be the instances in diff --git a/metricflow/plan_conversion/node_processor.py b/metricflow/plan_conversion/node_processor.py index 5ad52973c9..88904e3cbc 100644 --- a/metricflow/plan_conversion/node_processor.py +++ b/metricflow/plan_conversion/node_processor.py @@ -21,7 +21,7 @@ ) from metricflow.dataflow.nodes.constrain_time import ConstrainTimeRangeNode from metricflow.dataflow.nodes.filter_elements import FilterElementsNode -from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinOnEntitiesNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.validation.dataflow_join_validator import JoinDataflowOutputValidator @@ -242,7 +242,7 @@ def _get_candidates_nodes_for_multi_hop( multi_hop_join_candidates.append( MultiHopJoinCandidate( - node_with_multi_hop_elements=JoinToBaseOutputNode( + node_with_multi_hop_elements=JoinOnEntitiesNode( left_node=first_node_that_could_be_joined, join_targets=[ JoinDescription( diff --git a/tests_metricflow/dataflow/builder/test_node_data_set.py b/tests_metricflow/dataflow/builder/test_node_data_set.py index 30267070a5..8e392b01d3 100644 --- a/tests_metricflow/dataflow/builder/test_node_data_set.py +++ b/tests_metricflow/dataflow/builder/test_node_data_set.py @@ -22,7 +22,7 @@ from metricflow_semantics.test_helpers.snapshot_helpers import assert_spec_set_snapshot_equal from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver -from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinOnEntitiesNode from metricflow.dataflow.nodes.read_sql_source import ReadSqlSourceNode from metricflow.dataset.sql_dataset import SqlDataSet from metricflow.plan_conversion.time_spine import TimeSpineSource @@ -110,7 +110,7 @@ def test_joined_node_data_set( # Join "revenue" with "users_latest" to get "user__home_state_latest" revenue_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping["revenue"] users_node = mf_engine_test_fixture_mapping[SemanticManifestSetup.SIMPLE_MANIFEST].read_node_mapping["users_latest"] - join_node = JoinToBaseOutputNode( + join_node = JoinOnEntitiesNode( left_node=revenue_node, join_targets=[ JoinDescription( diff --git a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py index 859cb4fd64..659a943585 100644 --- a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py +++ b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py @@ -31,7 +31,7 @@ from metricflow.dataflow.nodes.filter_elements import FilterElementsNode from metricflow.dataflow.nodes.join_conversion_events import JoinConversionEventsNode from metricflow.dataflow.nodes.join_over_time import JoinOverTimeRangeNode -from metricflow.dataflow.nodes.join_to_base import JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.min_max import MinMaxNode @@ -57,7 +57,7 @@ def _sum_parents(self, node: DataflowPlanNode) -> int: def visit_source_node(self, node: ReadSqlSourceNode) -> int: # noqa: D102 return 1 - def visit_join_to_base_output_node(self, node: JoinToBaseOutputNode) -> int: # noqa: D102 + def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> int: # noqa: D102 return self._sum_parents(node) def visit_aggregate_measures_node(self, node: AggregateMeasuresNode) -> int: # noqa: D102 diff --git a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py index 21482d8bac..8fc0e0bb13 100644 --- a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py +++ b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py @@ -44,7 +44,7 @@ from metricflow.dataflow.nodes.compute_metrics import ComputeMetricsNode from metricflow.dataflow.nodes.constrain_time import ConstrainTimeRangeNode from metricflow.dataflow.nodes.filter_elements import FilterElementsNode -from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinToBaseOutputNode +from metricflow.dataflow.nodes.join_to_base import JoinDescription, JoinOnEntitiesNode from metricflow.dataflow.nodes.join_to_time_spine import JoinToTimeSpineNode from metricflow.dataflow.nodes.metric_time_transform import MetricTimeDimensionTransformNode from metricflow.dataflow.nodes.order_by_limit import OrderByLimitNode @@ -313,7 +313,7 @@ def test_single_join_node( ), ) - join_node = JoinToBaseOutputNode( + join_node = JoinOnEntitiesNode( left_node=filtered_measure_node, join_targets=[ JoinDescription( @@ -371,7 +371,7 @@ def test_multi_join_node( ), ) - join_node = JoinToBaseOutputNode( + join_node = JoinOnEntitiesNode( left_node=filtered_measure_node, join_targets=[ JoinDescription( @@ -440,7 +440,7 @@ def test_compute_metrics_node( ), ) - join_node = JoinToBaseOutputNode( + join_node = JoinOnEntitiesNode( left_node=filtered_measure_node, join_targets=[ JoinDescription( @@ -510,7 +510,7 @@ def test_compute_metrics_node_simple_expr( ), ) - join_node = JoinToBaseOutputNode( + join_node = JoinOnEntitiesNode( left_node=filtered_measure_node, join_targets=[ JoinDescription( @@ -820,7 +820,7 @@ def test_compute_metrics_node_ratio_from_single_semantic_model( ), ) - join_node = JoinToBaseOutputNode( + join_node = JoinOnEntitiesNode( left_node=filtered_measures_node, join_targets=[ JoinDescription( From e881c0a84976a13c86ad2f00ef811b9ed29e0d07 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Wed, 15 May 2024 17:21:40 -0700 Subject: [PATCH 3/8] Rename `visit_join_to_base_output_node` -> `visit_join_on_entities_node`. --- metricflow/dataflow/dataflow_plan.py | 2 +- metricflow/dataflow/nodes/join_to_base.py | 2 +- metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py | 2 +- .../dataflow/optimizer/source_scan/source_scan_optimizer.py | 2 +- metricflow/execution/dataflow_to_execution.py | 2 +- metricflow/plan_conversion/dataflow_to_sql.py | 2 +- .../optimizer/source_scan/test_source_scan_optimizer.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 4fa70a36e7..884fb012f4 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -105,7 +105,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> VisitorOutputT: # noqa: pass @abstractmethod - def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> VisitorOutputT: # noqa: D102 + def visit_join_on_entities_node(self, node: JoinOnEntitiesNode) -> VisitorOutputT: # noqa: D102 pass @abstractmethod diff --git a/metricflow/dataflow/nodes/join_to_base.py b/metricflow/dataflow/nodes/join_to_base.py index af059beb33..4894bcc459 100644 --- a/metricflow/dataflow/nodes/join_to_base.py +++ b/metricflow/dataflow/nodes/join_to_base.py @@ -72,7 +72,7 @@ def id_prefix(cls) -> IdPrefix: # noqa: D102 return StaticIdPrefix.DATAFLOW_NODE_JOIN_TO_STANDARD_OUTPUT_ID_PREFIX def accept(self, visitor: DataflowPlanNodeVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D102 - return visitor.visit_join_to_base_output_node(self) + return visitor.visit_join_on_entities_node(self) @property def description(self) -> str: # noqa: D102 diff --git a/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py b/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py index 61dc101d7e..e14e8a1ee7 100644 --- a/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py +++ b/metricflow/dataflow/optimizer/source_scan/cm_branch_combiner.py @@ -212,7 +212,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> ComputeMetricsBranchComb self._log_visit_node_type(node) return self._default_handler(node) - def visit_join_to_base_output_node( # noqa: D102 + def visit_join_on_entities_node( # noqa: D102 self, node: JoinOnEntitiesNode ) -> ComputeMetricsBranchCombinerResult: # noqa: D102 self._log_visit_node_type(node) diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index d2c2cadd8c..5097a7182d 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -152,7 +152,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> OptimizeBranchResult: # self._log_visit_node_type(node) return self._default_base_output_handler(node) - def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> OptimizeBranchResult: # noqa: D102 + def visit_join_on_entities_node(self, node: JoinOnEntitiesNode) -> OptimizeBranchResult: # noqa: D102 self._log_visit_node_type(node) return self._default_base_output_handler(node) diff --git a/metricflow/execution/dataflow_to_execution.py b/metricflow/execution/dataflow_to_execution.py index df12c65594..3ec2943615 100644 --- a/metricflow/execution/dataflow_to_execution.py +++ b/metricflow/execution/dataflow_to_execution.py @@ -122,7 +122,7 @@ def visit_source_node(self, node: ReadSqlSourceNode) -> ConvertToExecutionPlanRe raise NotImplementedError @override - def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> ConvertToExecutionPlanResult: + def visit_join_on_entities_node(self, node: JoinOnEntitiesNode) -> ConvertToExecutionPlanResult: raise NotImplementedError @override diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 4367b66e44..b7b51ee9a1 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -419,7 +419,7 @@ def visit_join_over_time_range_node(self, node: JoinOverTimeRangeNode) -> SqlDat ), ) - def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> SqlDataSet: + def visit_join_on_entities_node(self, node: JoinOnEntitiesNode) -> SqlDataSet: """Generates the query that realizes the behavior of the JoinToStandardOutputNode.""" # Keep a mapping between the table aliases that would be used in the query and the MDO instances in that source. # e.g. when building "FROM from_table a JOIN right_table b", the value for key "a" would be the instances in diff --git a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py index 659a943585..ce0c2be8d0 100644 --- a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py +++ b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py @@ -57,7 +57,7 @@ def _sum_parents(self, node: DataflowPlanNode) -> int: def visit_source_node(self, node: ReadSqlSourceNode) -> int: # noqa: D102 return 1 - def visit_join_to_base_output_node(self, node: JoinOnEntitiesNode) -> int: # noqa: D102 + def visit_join_on_entities_node(self, node: JoinOnEntitiesNode) -> int: # noqa: D102 return self._sum_parents(node) def visit_aggregate_measures_node(self, node: AggregateMeasuresNode) -> int: # noqa: D102 From b13603e4782b1501da745b33913797f967dc633e Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Wed, 8 May 2024 20:10:37 -0700 Subject: [PATCH 4/8] Remove references to `sink_output_node` from `DataflowPlan`. --- .../dataflow/builder/dataflow_plan_builder.py | 4 +- metricflow/dataflow/dataflow_plan.py | 21 +++---- .../source_scan/source_scan_optimizer.py | 8 +-- metricflow/engine/metricflow_engine.py | 4 +- metricflow/execution/dataflow_to_execution.py | 4 +- .../source_scan/test_cm_branch_combiner.py | 2 +- .../source_scan/test_source_scan_optimizer.py | 2 +- .../test_conversion_metrics_to_sql.py | 12 ++-- .../test_distinct_values_to_sql.py | 4 +- .../test_metric_time_dimension_to_sql.py | 2 +- .../test_dataflow_to_sql_plan.py | 14 ++--- .../test_cumulative_metric_rendering.py | 18 +++--- .../test_derived_metric_rendering.py | 60 +++++++++---------- .../test_fill_nulls_with_rendering.py | 16 ++--- .../test_granularity_date_part_rendering.py | 6 +- .../test_metric_filter_rendering.py | 22 +++---- .../test_metric_time_without_metrics.py | 8 +-- .../query_rendering/test_query_rendering.py | 40 ++++++------- .../test_time_spine_join_rendering.py | 2 +- 19 files changed, 121 insertions(+), 128 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 91f6f81335..0d7cc19297 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -210,7 +210,7 @@ def _build_plan( ) plan_id = DagId.from_id_prefix(StaticIdPrefix.DATAFLOW_PLAN_PREFIX) - plan = DataflowPlan(sink_output_nodes=[sink_node], plan_id=plan_id) + plan = DataflowPlan(sink_nodes=[sink_node], plan_id=plan_id) for optimizer in optimizers: logger.info(f"Applying {optimizer.__class__.__name__}") try: @@ -682,7 +682,7 @@ def _build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Da parent_node=output_node, order_by_specs=query_spec.order_by_specs, limit=query_spec.limit ) - return DataflowPlan(sink_output_nodes=[sink_node]) + return DataflowPlan(sink_nodes=[sink_node]) @staticmethod def build_sink_node( diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 884fb012f4..808c481da2 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -178,22 +178,15 @@ def visit_join_conversion_events_node(self, node: JoinConversionEventsNode) -> V class DataflowPlan(MetricFlowDag[DataflowPlanNode]): """Describes the flow of metric data as it goes from source nodes to sink nodes in the graph.""" - def __init__( # noqa: D107 - self, sink_output_nodes: Sequence[DataflowPlanNode], plan_id: Optional[DagId] = None - ) -> None: - if len(sink_output_nodes) == 0: - raise RuntimeError("Can't create a dataflow plan without sink node(s).") - self._sink_output_nodes = tuple(sink_output_nodes) + def __init__(self, sink_nodes: Sequence[DataflowPlanNode], plan_id: Optional[DagId] = None) -> None: # noqa: D107 + assert len(sink_nodes) == 1, f"Exactly 1 sink node is supported. Got: {sink_nodes}" super().__init__( dag_id=plan_id or DagId.from_id_prefix(StaticIdPrefix.DATAFLOW_PLAN_PREFIX), - sink_nodes=tuple(sink_output_nodes), + sink_nodes=tuple(sink_nodes), ) @property - def sink_output_nodes(self) -> Sequence[DataflowPlanNode]: # noqa: D102 - return self._sink_output_nodes - - @property - def sink_output_node(self) -> DataflowPlanNode: # noqa: D102 - assert len(self._sink_output_nodes) == 1, f"Only 1 sink node supported. Got: {self._sink_output_nodes}" - return self._sink_output_nodes[0] + def checked_sink_node(self) -> DataflowPlanNode: + """If this has a single sink node, return it. Otherwise, raise an exception.""" + assert len(self._sink_nodes) == 1, f"Exactly 1 sink node is supported. Got: {self._sink_nodes}" + return self._sink_nodes[0] diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index 5097a7182d..7bc82d5fd2 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -313,12 +313,12 @@ def visit_metric_time_dimension_transform_node( # noqa: D102 return self._default_base_output_handler(node) def optimize(self, dataflow_plan: DataflowPlan) -> DataflowPlan: # noqa: D102 - optimized_result: OptimizeBranchResult = dataflow_plan.sink_output_node.accept(self) + optimized_result: OptimizeBranchResult = dataflow_plan.checked_sink_node.accept(self) logger.log( level=self._log_level, msg=f"Optimized:\n\n" - f"{dataflow_plan.sink_output_node.structure_text()}\n\n" + f"{dataflow_plan.checked_sink_node.structure_text()}\n\n" f"to:\n\n" f"{optimized_result.checked_sink_node.structure_text()}", ) @@ -326,11 +326,11 @@ def optimize(self, dataflow_plan: DataflowPlan) -> DataflowPlan: # noqa: D102 if optimized_result.sink_node: return DataflowPlan( plan_id=DagId.from_id_prefix(StaticIdPrefix.OPTIMIZED_DATAFLOW_PLAN_PREFIX), - sink_output_nodes=[optimized_result.sink_node], + sink_nodes=[optimized_result.sink_node], ) logger.log(level=self._log_level, msg="Optimizer didn't produce a result, so returning the same plan") return DataflowPlan( - sink_output_nodes=[dataflow_plan.sink_output_node], + sink_nodes=[dataflow_plan.checked_sink_node], ) def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> OptimizeBranchResult: # noqa: D102 diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index 79a6aea9cd..f3d70d860c 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -507,10 +507,10 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me else: dataflow_plan = self._dataflow_plan_builder.build_plan_for_distinct_values(query_spec=query_spec) - if len(dataflow_plan.sink_output_nodes) > 1: + if len(dataflow_plan.sink_nodes) > 1: raise NotImplementedError( f"Multiple output nodes in the dataflow plan not yet supported. " - f"Got tasks: {dataflow_plan.sink_output_nodes}" + f"Got tasks: {dataflow_plan.sink_nodes}" ) convert_to_execution_plan_result = self._to_execution_plan_converter.convert_to_execution_plan(dataflow_plan) diff --git a/metricflow/execution/dataflow_to_execution.py b/metricflow/execution/dataflow_to_execution.py index 3ec2943615..534a7a94c2 100644 --- a/metricflow/execution/dataflow_to_execution.py +++ b/metricflow/execution/dataflow_to_execution.py @@ -114,8 +114,8 @@ def visit_write_to_result_table_node(self, node: WriteToResultTableNode) -> Conv def convert_to_execution_plan(self, dataflow_plan: DataflowPlan) -> ConvertToExecutionPlanResult: """Convert the dataflow plan to an execution plan.""" - assert len(dataflow_plan.sink_output_nodes) == 1, "Only 1 sink node in the plan is currently supported." - return dataflow_plan.sink_output_nodes[0].accept(self) + assert len(dataflow_plan.sink_nodes) == 1, "Only 1 sink node in the plan is currently supported." + return dataflow_plan.sink_nodes[0].accept(self) @override def visit_source_node(self, node: ReadSqlSourceNode) -> ConvertToExecutionPlanResult: diff --git a/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py b/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py index 5313d58297..f08d917117 100644 --- a/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py +++ b/tests_metricflow/dataflow/optimizer/source_scan/test_cm_branch_combiner.py @@ -27,7 +27,7 @@ def make_dataflow_plan(node: DataflowPlanNode) -> DataflowPlan: # noqa: D103 return DataflowPlan( - sink_output_nodes=[WriteToResultDataframeNode(node)], + sink_nodes=[WriteToResultDataframeNode(node)], plan_id=DagId.from_id_prefix(StaticIdPrefix.OPTIMIZED_DATAFLOW_PLAN_PREFIX), ) diff --git a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py index ce0c2be8d0..169bd44253 100644 --- a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py +++ b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py @@ -109,7 +109,7 @@ def visit_join_conversion_events_node(self, node: JoinConversionEventsNode) -> i return self._sum_parents(node) def count_source_nodes(self, dataflow_plan: DataflowPlan) -> int: # noqa: D102 - return dataflow_plan.sink_output_node.accept(self) + return dataflow_plan.checked_sink_node.accept(self) def check_optimization( # noqa: D103 diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py index 6d6a23ff00..ee3fc5404d 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py @@ -45,7 +45,7 @@ def test_conversion_rate( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -80,7 +80,7 @@ def test_conversion_rate_with_window( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -106,7 +106,7 @@ def test_conversion_rate_with_no_group_by( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -132,7 +132,7 @@ def test_conversion_count_with_no_group_by( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -166,7 +166,7 @@ def test_conversion_rate_with_constant_properties( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -195,5 +195,5 @@ def test_conversion_metric_join_to_timespine_and_fill_nulls_with_0( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py index c846f25fde..8319f4a84e 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py @@ -38,7 +38,7 @@ def test_dimensions_requiring_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -66,5 +66,5 @@ def test_dimension_values_with_a_join_and_a_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py index e71ccbdb9b..30f91f213c 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py @@ -93,5 +93,5 @@ def test_simple_query_with_metric_time_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py index 8fc0e0bb13..4fa68e8d74 100644 --- a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py +++ b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py @@ -534,7 +534,7 @@ def test_compute_metrics_node_simple_expr( ) sink_node = WriteToResultDataframeNode(compute_metrics_node) - dataflow_plan = DataflowPlan(sink_output_nodes=[sink_node], plan_id=DagId.from_str("plan0")) + dataflow_plan = DataflowPlan(sink_nodes=[sink_node], plan_id=DagId.from_str("plan0")) assert_plan_snapshot_text_equal( request=request, @@ -606,7 +606,7 @@ def test_join_to_time_spine_node_without_offset( join_type=SqlJoinType.INNER, ) sink_node = WriteToResultDataframeNode(join_to_time_spine_node) - dataflow_plan = DataflowPlan(sink_output_nodes=[sink_node], plan_id=DagId.from_str("plan0")) + dataflow_plan = DataflowPlan(sink_nodes=[sink_node], plan_id=DagId.from_str("plan0")) assert_plan_snapshot_text_equal( request=request, @@ -679,7 +679,7 @@ def test_join_to_time_spine_node_with_offset_window( ) sink_node = WriteToResultDataframeNode(join_to_time_spine_node) - dataflow_plan = DataflowPlan(sink_output_nodes=[sink_node], plan_id=DagId.from_str("plan0")) + dataflow_plan = DataflowPlan(sink_nodes=[sink_node], plan_id=DagId.from_str("plan0")) assert_plan_snapshot_text_equal( request=request, @@ -753,7 +753,7 @@ def test_join_to_time_spine_node_with_offset_to_grain( ) sink_node = WriteToResultDataframeNode(join_to_time_spine_node) - dataflow_plan = DataflowPlan(sink_output_nodes=[sink_node], plan_id=DagId.from_str("plan0")) + dataflow_plan = DataflowPlan(sink_nodes=[sink_node], plan_id=DagId.from_str("plan0")) assert_plan_snapshot_text_equal( request=request, @@ -1101,7 +1101,7 @@ def test_compute_metrics_node_ratio_from_multiple_semantic_models( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -1187,7 +1187,7 @@ def test_dimensions_requiring_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -1213,5 +1213,5 @@ def test_dimension_with_joined_where_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py b/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py index 24b4f7fd29..6fb5bece6d 100644 --- a/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py +++ b/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py @@ -54,7 +54,7 @@ def test_cumulative_metric( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -95,7 +95,7 @@ def test_cumulative_metric_with_time_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -133,7 +133,7 @@ def test_cumulative_metric_with_non_adjustable_time_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -160,7 +160,7 @@ def test_cumulative_metric_no_ds( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -193,7 +193,7 @@ def test_cumulative_metric_no_window( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -223,7 +223,7 @@ def test_cumulative_metric_no_window_with_time_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -256,7 +256,7 @@ def test_cumulative_metric_grain_to_date( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -286,7 +286,7 @@ def test_cumulative_metric_month( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=extended_date_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -315,5 +315,5 @@ def test_cumulative_metric_with_agg_time_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_derived_metric_rendering.py b/tests_metricflow/query_rendering/test_derived_metric_rendering.py index 0b29902b02..6551fa1a11 100644 --- a/tests_metricflow/query_rendering/test_derived_metric_rendering.py +++ b/tests_metricflow/query_rendering/test_derived_metric_rendering.py @@ -51,7 +51,7 @@ def test_derived_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -75,7 +75,7 @@ def test_nested_derived_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -99,7 +99,7 @@ def test_derived_metric_with_offset_window( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -130,7 +130,7 @@ def test_derived_metric_with_offset_window_and_time_filter( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -154,7 +154,7 @@ def test_derived_metric_with_offset_to_grain( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -178,7 +178,7 @@ def test_derived_metric_with_offset_window_and_offset_to_grain( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -202,7 +202,7 @@ def test_derived_offset_metric_with_one_input_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -226,7 +226,7 @@ def test_derived_metric_with_offset_window_and_granularity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -250,7 +250,7 @@ def test_derived_metric_with_month_dimension_and_offset_window( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=extended_date_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -274,7 +274,7 @@ def test_derived_metric_with_offset_to_grain_and_granularity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -298,7 +298,7 @@ def test_derived_metric_with_offset_window_and_offset_to_grain_and_granularity( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -322,7 +322,7 @@ def test_derived_offset_cumulative_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -347,7 +347,7 @@ def test_nested_offsets( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -372,7 +372,7 @@ def test_nested_derived_metric_with_offset_multiple_input_metrics( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -404,7 +404,7 @@ def test_nested_offsets_with_where_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -432,7 +432,7 @@ def test_nested_offsets_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -459,7 +459,7 @@ def test_time_offset_metric_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -482,7 +482,7 @@ def test_nested_filters( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -510,7 +510,7 @@ def test_cumulative_time_offset_metric_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -540,7 +540,7 @@ def test_nested_derived_metric_offset_with_joined_where_constraint_not_selected( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -566,7 +566,7 @@ def test_offset_window_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -592,7 +592,7 @@ def test_offset_to_grain_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -618,7 +618,7 @@ def test_derived_offset_metric_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -646,7 +646,7 @@ def test_multi_metric_fill_null( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -671,7 +671,7 @@ def test_nested_fill_nulls_without_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -699,7 +699,7 @@ def test_nested_fill_nulls_without_time_spine_multi_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -725,7 +725,7 @@ def test_offset_window_metric_multiple_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -751,7 +751,7 @@ def test_offset_to_grain_metric_multiple_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -780,7 +780,7 @@ def test_offset_window_metric_filter_and_query_have_different_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -809,5 +809,5 @@ def test_offset_to_grain_metric_filter_and_query_have_different_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py index 685e12e695..25192e89ed 100644 --- a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py +++ b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py @@ -47,7 +47,7 @@ def test_simple_fill_nulls_with_0_metric_time( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -71,7 +71,7 @@ def test_simple_fill_nulls_with_0_month( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -97,7 +97,7 @@ def test_simple_fill_nulls_with_0_with_non_metric_time( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -121,7 +121,7 @@ def test_simple_fill_nulls_with_0_with_categorical_dimension( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -145,7 +145,7 @@ def test_simple_fill_nulls_without_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -169,7 +169,7 @@ def test_cumulative_fill_nulls( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -193,7 +193,7 @@ def test_derived_fill_nulls_for_one_input_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -222,5 +222,5 @@ def test_join_to_time_spine_with_filters( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py b/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py index 25fab161cc..c845140021 100644 --- a/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py +++ b/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py @@ -45,7 +45,7 @@ def test_simple_query_with_date_part( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -76,7 +76,7 @@ def test_simple_query_with_multiple_date_parts( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -102,5 +102,5 @@ def test_offset_window_with_date_part( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_metric_filter_rendering.py b/tests_metricflow/query_rendering/test_metric_filter_rendering.py index 248aec2566..57498b7ed4 100644 --- a/tests_metricflow/query_rendering/test_metric_filter_rendering.py +++ b/tests_metricflow/query_rendering/test_metric_filter_rendering.py @@ -35,7 +35,7 @@ def test_query_with_simple_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -60,7 +60,7 @@ def test_metric_with_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -87,7 +87,7 @@ def test_query_with_derived_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -114,7 +114,7 @@ def test_query_with_ratio_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -144,7 +144,7 @@ def test_query_with_cumulative_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -171,7 +171,7 @@ def test_query_with_multiple_metrics_in_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -198,7 +198,7 @@ def test_filter_by_metric_in_same_semantic_model_as_queried_metric( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -225,7 +225,7 @@ def test_distinct_values_query_with_metric_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -252,7 +252,7 @@ def test_metric_filtered_by_itself( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -278,7 +278,7 @@ def test_group_by_has_local_entity_prefix( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -304,5 +304,5 @@ def test_filter_with_conversion_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_metric_time_without_metrics.py b/tests_metricflow/query_rendering/test_metric_time_without_metrics.py index e6ea451682..2f5fd1a917 100644 --- a/tests_metricflow/query_rendering/test_metric_time_without_metrics.py +++ b/tests_metricflow/query_rendering/test_metric_time_without_metrics.py @@ -38,7 +38,7 @@ def test_metric_time_only( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -65,7 +65,7 @@ def test_metric_time_quarter_alone( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -92,7 +92,7 @@ def test_metric_time_with_other_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -122,5 +122,5 @@ def test_dimensions_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_query_rendering.py b/tests_metricflow/query_rendering/test_query_rendering.py index 20f1fa3a71..3200e0c8da 100644 --- a/tests_metricflow/query_rendering/test_query_rendering.py +++ b/tests_metricflow/query_rendering/test_query_rendering.py @@ -63,7 +63,7 @@ def test_multihop_node( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=multihop_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -91,7 +91,7 @@ def test_filter_with_where_constraint_on_join_dim( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -121,7 +121,7 @@ def test_partitioned_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -152,7 +152,7 @@ def test_limit_rows( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -182,7 +182,7 @@ def test_distinct_values( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -211,7 +211,7 @@ def test_local_dimension_using_local_entity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -235,7 +235,7 @@ def test_measure_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -259,7 +259,7 @@ def test_measure_constraint_with_reused_measure( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -284,7 +284,7 @@ def test_measure_constraint_with_single_expr_and_alias( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -313,7 +313,7 @@ def test_join_to_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -339,7 +339,7 @@ def test_multi_hop_through_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -365,7 +365,7 @@ def test_multi_hop_to_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -391,7 +391,7 @@ def test_multiple_metrics_no_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -414,7 +414,7 @@ def test_metric_with_measures_from_multiple_sources_no_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -438,7 +438,7 @@ def test_common_semantic_model( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -468,7 +468,7 @@ def test_min_max_only_categorical( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -499,7 +499,7 @@ def test_min_max_only_time( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -530,7 +530,7 @@ def test_min_max_only_time_quarter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -555,7 +555,7 @@ def test_min_max_metric_time( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) @@ -580,5 +580,5 @@ def test_min_max_metric_time_week( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) diff --git a/tests_metricflow/query_rendering/test_time_spine_join_rendering.py b/tests_metricflow/query_rendering/test_time_spine_join_rendering.py index 9ec92c3e73..797f762f70 100644 --- a/tests_metricflow/query_rendering/test_time_spine_join_rendering.py +++ b/tests_metricflow/query_rendering/test_time_spine_join_rendering.py @@ -44,5 +44,5 @@ def test_simple_join_to_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.sink_output_node, + node=dataflow_plan.checked_sink_node, ) From 80f5af9787d24366a6fc50541b2d5e1e76225283 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Thu, 9 May 2024 00:09:46 -0700 Subject: [PATCH 5/8] Rename `base_output_node` -> `optimized_branch`. --- .../source_scan/source_scan_optimizer.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index 7bc82d5fd2..81673ae9fa 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -41,13 +41,13 @@ @dataclass(frozen=True) class OptimizeBranchResult: # noqa: D101 - base_output_node: Optional[DataflowPlanNode] = None + optimized_branch: Optional[DataflowPlanNode] = None sink_node: Optional[DataflowPlanNode] = None @property def checked_base_output(self) -> DataflowPlanNode: # noqa: D102 - assert self.base_output_node, f"Expected the result of traversal to produce a {DataflowPlanNode}" - return self.base_output_node + assert self.optimized_branch, f"Expected the result of traversal to produce a {DataflowPlanNode}" + return self.optimized_branch @property def checked_sink_node(self) -> DataflowPlanNode: # noqa: D102 @@ -133,7 +133,7 @@ def _default_base_output_handler( ) # Parents should always be DataflowPlanNode return OptimizeBranchResult( - base_output_node=node.with_new_parents(tuple(x.checked_base_output for x in optimized_parents)) + optimized_branch=node.with_new_parents(tuple(x.checked_base_output for x in optimized_parents)) ) def _default_sink_node_handler( @@ -164,17 +164,17 @@ def visit_compute_metrics_node(self, node: ComputeMetricsNode) -> OptimizeBranch self._log_visit_node_type(node) # Run the optimizer on the parent branch to handle derived metrics, which are defined recursively in the DAG. optimized_parent_result: OptimizeBranchResult = node.parent_node.accept(self) - if optimized_parent_result.base_output_node is not None: + if optimized_parent_result.optimized_branch is not None: return OptimizeBranchResult( - base_output_node=ComputeMetricsNode( - parent_node=optimized_parent_result.base_output_node, + optimized_branch=ComputeMetricsNode( + parent_node=optimized_parent_result.optimized_branch, metric_specs=node.metric_specs, for_group_by_source_node=node.for_group_by_source_node, aggregated_to_elements=node.aggregated_to_elements, ) ) - return OptimizeBranchResult(base_output_node=node) + return OptimizeBranchResult(optimized_branch=node) def visit_order_by_limit_node(self, node: OrderByLimitNode) -> OptimizeBranchResult: # noqa: D102 self._log_visit_node_type(node) @@ -255,9 +255,9 @@ def visit_combine_aggregated_outputs_node( # noqa: D102 ) assert ( - result.base_output_node is not None + result.optimized_branch is not None ), f"Traversing the parents of a CombineAggregatedOutputsNode should always produce a DataflowPlanNode. Got: {result}" - optimized_parent_branches.append(result.base_output_node) + optimized_parent_branches.append(result.optimized_branch) # Try to combine (using ComputeMetricsBranchCombiner) as many parent branches as possible in a # greedy N^2 approach. The optimality of this approach needs more thought to prove conclusively, but given @@ -288,10 +288,10 @@ def visit_combine_aggregated_outputs_node( # noqa: D102 # If we were able to reduce the parent branches of the CombineAggregatedOutputsNode into a single one, there's no need # for a CombineAggregatedOutputsNode. if len(combined_parent_branches) == 1: - return OptimizeBranchResult(base_output_node=combined_parent_branches[0]) + return OptimizeBranchResult(optimized_branch=combined_parent_branches[0]) return OptimizeBranchResult( - base_output_node=CombineAggregatedOutputsNode(parent_nodes=combined_parent_branches) + optimized_branch=CombineAggregatedOutputsNode(parent_nodes=combined_parent_branches) ) def visit_constrain_time_range_node(self, node: ConstrainTimeRangeNode) -> OptimizeBranchResult: # noqa: D102 From 3c942fc14d42e4ff8bc8c32f17932268902e0b24 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Thu, 9 May 2024 00:15:42 -0700 Subject: [PATCH 6/8] Simplify `OptimizeBranchResult`. --- .../source_scan/source_scan_optimizer.py | 51 +++---------------- 1 file changed, 7 insertions(+), 44 deletions(-) diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index 81673ae9fa..d669a7c8dc 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -41,18 +41,7 @@ @dataclass(frozen=True) class OptimizeBranchResult: # noqa: D101 - optimized_branch: Optional[DataflowPlanNode] = None - sink_node: Optional[DataflowPlanNode] = None - - @property - def checked_base_output(self) -> DataflowPlanNode: # noqa: D102 - assert self.optimized_branch, f"Expected the result of traversal to produce a {DataflowPlanNode}" - return self.optimized_branch - - @property - def checked_sink_node(self) -> DataflowPlanNode: # noqa: D102 - assert self.sink_node, f"Expected the result of traversal to produce a {DataflowPlanNode}" - return self.sink_node + optimized_branch: DataflowPlanNode @dataclass(frozen=True) @@ -133,19 +122,7 @@ def _default_base_output_handler( ) # Parents should always be DataflowPlanNode return OptimizeBranchResult( - optimized_branch=node.with_new_parents(tuple(x.checked_base_output for x in optimized_parents)) - ) - - def _default_sink_node_handler( - self, - node: DataflowPlanNode, - ) -> OptimizeBranchResult: - optimized_parents: Sequence[OptimizeBranchResult] = tuple( - parent_node.accept(self) for parent_node in node.parent_nodes - ) - # Parents should always be DataflowPlanNode - return OptimizeBranchResult( - sink_node=node.with_new_parents(tuple(x.checked_base_output for x in optimized_parents)) + optimized_branch=node.with_new_parents(tuple(x.optimized_branch for x in optimized_parents)) ) def visit_source_node(self, node: ReadSqlSourceNode) -> OptimizeBranchResult: # noqa: D102 @@ -188,11 +165,11 @@ def visit_write_to_result_dataframe_node( # noqa: D102 self, node: WriteToResultDataframeNode ) -> OptimizeBranchResult: # noqa: D102 self._log_visit_node_type(node) - return self._default_sink_node_handler(node) + return self._default_base_output_handler(node) def visit_write_to_result_table_node(self, node: WriteToResultTableNode) -> OptimizeBranchResult: # noqa: D102 self._log_visit_node_type(node) - return self._default_sink_node_handler(node) + return self._default_base_output_handler(node) def visit_filter_elements_node(self, node: FilterElementsNode) -> OptimizeBranchResult: # noqa: D102 self._log_visit_node_type(node) @@ -248,15 +225,6 @@ def visit_combine_aggregated_outputs_node( # noqa: D102 # Run the optimizer on the parent branch to handle derived metrics, which are defined recursively in the DAG. for parent_branch in node.parent_nodes: result: OptimizeBranchResult = parent_branch.accept(self) - - assert result.sink_node is None, ( - f"Traversing the parents of of {node.__class__.__name__} should not have produced any " - f"{DataflowPlanNode.__class__.__name__} nodes" - ) - - assert ( - result.optimized_branch is not None - ), f"Traversing the parents of a CombineAggregatedOutputsNode should always produce a DataflowPlanNode. Got: {result}" optimized_parent_branches.append(result.optimized_branch) # Try to combine (using ComputeMetricsBranchCombiner) as many parent branches as possible in a @@ -320,17 +288,12 @@ def optimize(self, dataflow_plan: DataflowPlan) -> DataflowPlan: # noqa: D102 msg=f"Optimized:\n\n" f"{dataflow_plan.checked_sink_node.structure_text()}\n\n" f"to:\n\n" - f"{optimized_result.checked_sink_node.structure_text()}", + f"{optimized_result.optimized_branch.structure_text()}", ) - if optimized_result.sink_node: - return DataflowPlan( - plan_id=DagId.from_id_prefix(StaticIdPrefix.OPTIMIZED_DATAFLOW_PLAN_PREFIX), - sink_nodes=[optimized_result.sink_node], - ) - logger.log(level=self._log_level, msg="Optimizer didn't produce a result, so returning the same plan") return DataflowPlan( - sink_nodes=[dataflow_plan.checked_sink_node], + plan_id=DagId.from_id_prefix(StaticIdPrefix.OPTIMIZED_DATAFLOW_PLAN_PREFIX), + sink_nodes=[optimized_result.optimized_branch], ) def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> OptimizeBranchResult: # noqa: D102 From 6f5dcce4b5e5f6dac8e45b4703af48420abebd9a Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Wed, 8 May 2024 20:03:01 -0700 Subject: [PATCH 7/8] Update snapshots. --- .../DataflowPlan/test_cyclic_join__dfp_0.xml | 4 ++-- .../DataflowPlan/test_common_semantic_model__dfp_0.xml | 8 ++++---- .../test_dimensions_with_time_constraint__dfp_0.xml | 4 ++-- .../test_distinct_values_plan_with_join__dfp_0.xml | 4 ++-- .../DataflowPlan/test_joined_plan__dfp_0.xml | 4 ++-- .../DataflowPlan/test_measure_constraint_plan__dfp_0.xml | 8 ++++---- .../test_metric_in_metric_where_filter__dfp_0.xml | 4 ++-- .../test_metric_in_query_where_filter__dfp_0.xml | 4 ++-- .../test_metric_time_with_other_dimensions__dfp_0.xml | 4 ++-- ...est_multi_semantic_model_ratio_metrics_plan__dfp_0.xml | 8 ++++---- .../DataflowPlan/test_multihop_join_plan__dfp_0.xml | 8 ++++---- ...st_single_semantic_model_ratio_metrics_plan__dfp_0.xml | 8 ++++---- .../DataflowPlan/test_where_constrained_plan__dfp_0.xml | 4 ++-- ...where_constrained_with_common_linkable_plan__dfp_0.xml | 4 ++-- .../test_compute_metrics_node_simple_expr__plan0.xml | 4 ++-- .../test_2_metrics_from_1_semantic_model__dfp_0.xml | 8 ++++---- .../test_2_metrics_from_1_semantic_model__dfpo_0.xml | 4 ++-- 17 files changed, 46 insertions(+), 46 deletions(-) diff --git a/tests_metricflow/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/tests_metricflow/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index 1ade48ce17..45c1d57b39 100644 --- a/tests_metricflow/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/tests_metricflow/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -19,7 +19,7 @@ - + @@ -57,7 +57,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml index 7b3ec666b1..b90e88c3b1 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml @@ -24,7 +24,7 @@ - + @@ -68,7 +68,7 @@ - + @@ -91,7 +91,7 @@ - + @@ -136,7 +136,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml index 99530dd03e..c83d58ff9a 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_dimensions_with_time_constraint__dfp_0.xml @@ -14,7 +14,7 @@ - + @@ -40,7 +40,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml index 37d092fc5a..4993c6a9ef 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml @@ -58,7 +58,7 @@ - + @@ -84,7 +84,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml index 1c9054d896..65069397ea 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml @@ -25,7 +25,7 @@ - + @@ -73,7 +73,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml index b7e705562b..c6ff54aff1 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml @@ -119,7 +119,7 @@ - + @@ -168,7 +168,7 @@ - + @@ -284,7 +284,7 @@ - + @@ -332,7 +332,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_metric_where_filter__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_metric_where_filter__dfp_0.xml index 3630a0079d..7c57dc644e 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_metric_where_filter__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_metric_where_filter__dfp_0.xml @@ -42,7 +42,7 @@ - + @@ -107,7 +107,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_query_where_filter__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_query_where_filter__dfp_0.xml index 2eb712ec67..2ad14bb3e6 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_query_where_filter__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_in_query_where_filter__dfp_0.xml @@ -62,7 +62,7 @@ - + @@ -127,7 +127,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml index 077dec9a99..18c22f048b 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_with_other_dimensions__dfp_0.xml @@ -14,7 +14,7 @@ - + @@ -59,7 +59,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml index a99dea7083..73f42dfb84 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -28,7 +28,7 @@ - + @@ -73,7 +73,7 @@ - + @@ -96,7 +96,7 @@ - + @@ -141,7 +141,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml index 1aaf339860..5f40f7ab69 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml @@ -22,7 +22,7 @@ - + @@ -74,7 +74,7 @@ - + @@ -270,9 +270,9 @@ - + - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml index b90a880dda..f939bcdf1b 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -28,7 +28,7 @@ - + @@ -73,7 +73,7 @@ - + @@ -96,7 +96,7 @@ - + @@ -141,7 +141,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml index f885245449..159ef536f6 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml @@ -116,7 +116,7 @@ - + @@ -165,7 +165,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml index 783785f5cb..9a9099768e 100644 --- a/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml +++ b/tests_metricflow/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml @@ -100,7 +100,7 @@ - + @@ -143,7 +143,7 @@ - + diff --git a/tests_metricflow/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml b/tests_metricflow/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml index 3e12b66d6e..ae6b453518 100644 --- a/tests_metricflow/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml +++ b/tests_metricflow/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml @@ -9,7 +9,7 @@ - + @@ -42,7 +42,7 @@ - + diff --git a/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml b/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml index 7b3ec666b1..b90e88c3b1 100644 --- a/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml +++ b/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml @@ -24,7 +24,7 @@ - + @@ -68,7 +68,7 @@ - + @@ -91,7 +91,7 @@ - + @@ -136,7 +136,7 @@ - + diff --git a/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml b/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml index c672eced1d..3d6bb14838 100644 --- a/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml +++ b/tests_metricflow/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml @@ -24,7 +24,7 @@ - + @@ -70,7 +70,7 @@ - + From 73d7566c4d369f6096043ecaab64dfec08f0e026 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Wed, 15 May 2024 19:06:22 -0700 Subject: [PATCH 8/8] Rename `checked_sink_node`. --- metricflow/dataflow/dataflow_plan.py | 4 +- .../source_scan/source_scan_optimizer.py | 4 +- .../source_scan/test_source_scan_optimizer.py | 2 +- .../test_conversion_metrics_to_sql.py | 12 ++-- .../test_distinct_values_to_sql.py | 4 +- .../test_metric_time_dimension_to_sql.py | 2 +- .../test_dataflow_to_sql_plan.py | 6 +- .../test_cumulative_metric_rendering.py | 18 +++--- .../test_derived_metric_rendering.py | 60 +++++++++---------- .../test_fill_nulls_with_rendering.py | 16 ++--- .../test_granularity_date_part_rendering.py | 6 +- .../test_metric_filter_rendering.py | 22 +++---- .../test_metric_time_without_metrics.py | 8 +-- .../query_rendering/test_query_rendering.py | 40 ++++++------- .../test_time_spine_join_rendering.py | 2 +- 15 files changed, 102 insertions(+), 104 deletions(-) diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 808c481da2..640a50be3f 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -186,7 +186,5 @@ def __init__(self, sink_nodes: Sequence[DataflowPlanNode], plan_id: Optional[Dag ) @property - def checked_sink_node(self) -> DataflowPlanNode: - """If this has a single sink node, return it. Otherwise, raise an exception.""" - assert len(self._sink_nodes) == 1, f"Exactly 1 sink node is supported. Got: {self._sink_nodes}" + def sink_node(self) -> DataflowPlanNode: # noqa: D102 return self._sink_nodes[0] diff --git a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py index d669a7c8dc..38bc6c4a56 100644 --- a/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py +++ b/metricflow/dataflow/optimizer/source_scan/source_scan_optimizer.py @@ -281,12 +281,12 @@ def visit_metric_time_dimension_transform_node( # noqa: D102 return self._default_base_output_handler(node) def optimize(self, dataflow_plan: DataflowPlan) -> DataflowPlan: # noqa: D102 - optimized_result: OptimizeBranchResult = dataflow_plan.checked_sink_node.accept(self) + optimized_result: OptimizeBranchResult = dataflow_plan.sink_node.accept(self) logger.log( level=self._log_level, msg=f"Optimized:\n\n" - f"{dataflow_plan.checked_sink_node.structure_text()}\n\n" + f"{dataflow_plan.sink_node.structure_text()}\n\n" f"to:\n\n" f"{optimized_result.optimized_branch.structure_text()}", ) diff --git a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py index 169bd44253..c83b2fe80b 100644 --- a/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py +++ b/tests_metricflow/dataflow/optimizer/source_scan/test_source_scan_optimizer.py @@ -109,7 +109,7 @@ def visit_join_conversion_events_node(self, node: JoinConversionEventsNode) -> i return self._sum_parents(node) def count_source_nodes(self, dataflow_plan: DataflowPlan) -> int: # noqa: D102 - return dataflow_plan.checked_sink_node.accept(self) + return dataflow_plan.sink_node.accept(self) def check_optimization( # noqa: D103 diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py index ee3fc5404d..074d13951e 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_conversion_metrics_to_sql.py @@ -45,7 +45,7 @@ def test_conversion_rate( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -80,7 +80,7 @@ def test_conversion_rate_with_window( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -106,7 +106,7 @@ def test_conversion_rate_with_no_group_by( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -132,7 +132,7 @@ def test_conversion_count_with_no_group_by( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -166,7 +166,7 @@ def test_conversion_rate_with_constant_properties( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -195,5 +195,5 @@ def test_conversion_metric_join_to_timespine_and_fill_nulls_with_0( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py index 8319f4a84e..2898dedfa9 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_distinct_values_to_sql.py @@ -38,7 +38,7 @@ def test_dimensions_requiring_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -66,5 +66,5 @@ def test_dimension_values_with_a_join_and_a_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py b/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py index 30f91f213c..15e8da4a9f 100644 --- a/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py +++ b/tests_metricflow/plan_conversion/dataflow_to_sql/test_metric_time_dimension_to_sql.py @@ -93,5 +93,5 @@ def test_simple_query_with_metric_time_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py index 4fa68e8d74..3bf34c2236 100644 --- a/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py +++ b/tests_metricflow/plan_conversion/test_dataflow_to_sql_plan.py @@ -1101,7 +1101,7 @@ def test_compute_metrics_node_ratio_from_multiple_semantic_models( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -1187,7 +1187,7 @@ def test_dimensions_requiring_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -1213,5 +1213,5 @@ def test_dimension_with_joined_where_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py b/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py index 6fb5bece6d..153fca4727 100644 --- a/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py +++ b/tests_metricflow/query_rendering/test_cumulative_metric_rendering.py @@ -54,7 +54,7 @@ def test_cumulative_metric( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -95,7 +95,7 @@ def test_cumulative_metric_with_time_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -133,7 +133,7 @@ def test_cumulative_metric_with_non_adjustable_time_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -160,7 +160,7 @@ def test_cumulative_metric_no_ds( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -193,7 +193,7 @@ def test_cumulative_metric_no_window( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -223,7 +223,7 @@ def test_cumulative_metric_no_window_with_time_constraint( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -256,7 +256,7 @@ def test_cumulative_metric_grain_to_date( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -286,7 +286,7 @@ def test_cumulative_metric_month( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=extended_date_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -315,5 +315,5 @@ def test_cumulative_metric_with_agg_time_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_derived_metric_rendering.py b/tests_metricflow/query_rendering/test_derived_metric_rendering.py index 6551fa1a11..f19c13af67 100644 --- a/tests_metricflow/query_rendering/test_derived_metric_rendering.py +++ b/tests_metricflow/query_rendering/test_derived_metric_rendering.py @@ -51,7 +51,7 @@ def test_derived_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -75,7 +75,7 @@ def test_nested_derived_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -99,7 +99,7 @@ def test_derived_metric_with_offset_window( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -130,7 +130,7 @@ def test_derived_metric_with_offset_window_and_time_filter( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -154,7 +154,7 @@ def test_derived_metric_with_offset_to_grain( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -178,7 +178,7 @@ def test_derived_metric_with_offset_window_and_offset_to_grain( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -202,7 +202,7 @@ def test_derived_offset_metric_with_one_input_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -226,7 +226,7 @@ def test_derived_metric_with_offset_window_and_granularity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -250,7 +250,7 @@ def test_derived_metric_with_month_dimension_and_offset_window( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=extended_date_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -274,7 +274,7 @@ def test_derived_metric_with_offset_to_grain_and_granularity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -298,7 +298,7 @@ def test_derived_metric_with_offset_window_and_offset_to_grain_and_granularity( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -322,7 +322,7 @@ def test_derived_offset_cumulative_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -347,7 +347,7 @@ def test_nested_offsets( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -372,7 +372,7 @@ def test_nested_derived_metric_with_offset_multiple_input_metrics( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -404,7 +404,7 @@ def test_nested_offsets_with_where_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -432,7 +432,7 @@ def test_nested_offsets_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -459,7 +459,7 @@ def test_time_offset_metric_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -482,7 +482,7 @@ def test_nested_filters( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -510,7 +510,7 @@ def test_cumulative_time_offset_metric_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -540,7 +540,7 @@ def test_nested_derived_metric_offset_with_joined_where_constraint_not_selected( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -566,7 +566,7 @@ def test_offset_window_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -592,7 +592,7 @@ def test_offset_to_grain_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -618,7 +618,7 @@ def test_derived_offset_metric_with_agg_time_dim( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -646,7 +646,7 @@ def test_multi_metric_fill_null( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -671,7 +671,7 @@ def test_nested_fill_nulls_without_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -699,7 +699,7 @@ def test_nested_fill_nulls_without_time_spine_multi_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -725,7 +725,7 @@ def test_offset_window_metric_multiple_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -751,7 +751,7 @@ def test_offset_to_grain_metric_multiple_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -780,7 +780,7 @@ def test_offset_window_metric_filter_and_query_have_different_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -809,5 +809,5 @@ def test_offset_to_grain_metric_filter_and_query_have_different_granularities( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py index 25192e89ed..5adb4e2148 100644 --- a/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py +++ b/tests_metricflow/query_rendering/test_fill_nulls_with_rendering.py @@ -47,7 +47,7 @@ def test_simple_fill_nulls_with_0_metric_time( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -71,7 +71,7 @@ def test_simple_fill_nulls_with_0_month( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -97,7 +97,7 @@ def test_simple_fill_nulls_with_0_with_non_metric_time( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -121,7 +121,7 @@ def test_simple_fill_nulls_with_0_with_categorical_dimension( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -145,7 +145,7 @@ def test_simple_fill_nulls_without_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -169,7 +169,7 @@ def test_cumulative_fill_nulls( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -193,7 +193,7 @@ def test_derived_fill_nulls_for_one_input_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -222,5 +222,5 @@ def test_join_to_time_spine_with_filters( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py b/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py index c845140021..8a853fe3e7 100644 --- a/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py +++ b/tests_metricflow/query_rendering/test_granularity_date_part_rendering.py @@ -45,7 +45,7 @@ def test_simple_query_with_date_part( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -76,7 +76,7 @@ def test_simple_query_with_multiple_date_parts( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -102,5 +102,5 @@ def test_offset_window_with_date_part( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_metric_filter_rendering.py b/tests_metricflow/query_rendering/test_metric_filter_rendering.py index 57498b7ed4..96f78e07dd 100644 --- a/tests_metricflow/query_rendering/test_metric_filter_rendering.py +++ b/tests_metricflow/query_rendering/test_metric_filter_rendering.py @@ -35,7 +35,7 @@ def test_query_with_simple_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -60,7 +60,7 @@ def test_metric_with_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -87,7 +87,7 @@ def test_query_with_derived_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -114,7 +114,7 @@ def test_query_with_ratio_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -144,7 +144,7 @@ def test_query_with_cumulative_metric_in_where_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -171,7 +171,7 @@ def test_query_with_multiple_metrics_in_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -198,7 +198,7 @@ def test_filter_by_metric_in_same_semantic_model_as_queried_metric( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -225,7 +225,7 @@ def test_distinct_values_query_with_metric_filter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -252,7 +252,7 @@ def test_metric_filtered_by_itself( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -278,7 +278,7 @@ def test_group_by_has_local_entity_prefix( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -304,5 +304,5 @@ def test_filter_with_conversion_metric( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_metric_time_without_metrics.py b/tests_metricflow/query_rendering/test_metric_time_without_metrics.py index 2f5fd1a917..e83fade1b8 100644 --- a/tests_metricflow/query_rendering/test_metric_time_without_metrics.py +++ b/tests_metricflow/query_rendering/test_metric_time_without_metrics.py @@ -38,7 +38,7 @@ def test_metric_time_only( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -65,7 +65,7 @@ def test_metric_time_quarter_alone( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -92,7 +92,7 @@ def test_metric_time_with_other_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -122,5 +122,5 @@ def test_dimensions_with_time_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_query_rendering.py b/tests_metricflow/query_rendering/test_query_rendering.py index 3200e0c8da..af13901435 100644 --- a/tests_metricflow/query_rendering/test_query_rendering.py +++ b/tests_metricflow/query_rendering/test_query_rendering.py @@ -63,7 +63,7 @@ def test_multihop_node( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=multihop_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -91,7 +91,7 @@ def test_filter_with_where_constraint_on_join_dim( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -121,7 +121,7 @@ def test_partitioned_join( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -152,7 +152,7 @@ def test_limit_rows( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -182,7 +182,7 @@ def test_distinct_values( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -211,7 +211,7 @@ def test_local_dimension_using_local_entity( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -235,7 +235,7 @@ def test_measure_constraint( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -259,7 +259,7 @@ def test_measure_constraint_with_reused_measure( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -284,7 +284,7 @@ def test_measure_constraint_with_single_expr_and_alias( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -313,7 +313,7 @@ def test_join_to_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -339,7 +339,7 @@ def test_multi_hop_through_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -365,7 +365,7 @@ def test_multi_hop_to_scd_dimension( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=scd_dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -391,7 +391,7 @@ def test_multiple_metrics_no_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -414,7 +414,7 @@ def test_metric_with_measures_from_multiple_sources_no_dimensions( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -438,7 +438,7 @@ def test_common_semantic_model( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -468,7 +468,7 @@ def test_min_max_only_categorical( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -499,7 +499,7 @@ def test_min_max_only_time( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -530,7 +530,7 @@ def test_min_max_only_time_quarter( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -555,7 +555,7 @@ def test_min_max_metric_time( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) @@ -580,5 +580,5 @@ def test_min_max_metric_time_week( mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, ) diff --git a/tests_metricflow/query_rendering/test_time_spine_join_rendering.py b/tests_metricflow/query_rendering/test_time_spine_join_rendering.py index 797f762f70..16b7a5610e 100644 --- a/tests_metricflow/query_rendering/test_time_spine_join_rendering.py +++ b/tests_metricflow/query_rendering/test_time_spine_join_rendering.py @@ -44,5 +44,5 @@ def test_simple_join_to_time_spine( # noqa: D103 mf_test_configuration=mf_test_configuration, dataflow_to_sql_converter=dataflow_to_sql_converter, sql_client=sql_client, - node=dataflow_plan.checked_sink_node, + node=dataflow_plan.sink_node, )