From 05292000bac3029757962a0b9d424a07483bb913 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 18 Sep 2023 11:29:58 -0700 Subject: [PATCH 01/19] WIP --- .../dataflow/builder/dataflow_plan_builder.py | 208 ++++++++---------- metricflow/dataflow/builder/source_node.py | 39 ++-- metricflow/engine/metricflow_engine.py | 19 +- .../test/dataflow/builder/test_cyclic_join.py | 1 + .../builder/test_dataflow_plan_builder.py | 12 +- metricflow/test/fixtures/dataflow_fixtures.py | 3 + metricflow/test/fixtures/model_fixtures.py | 10 +- .../test_dataflow_to_sql_plan.py | 12 +- 8 files changed, 152 insertions(+), 152 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 004283db1e..916c87f10e 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -12,7 +12,6 @@ from dbt_semantic_interfaces.references import TimeDimensionReference from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity -from metricflow.assert_one_arg import assert_exactly_one_arg_set from metricflow.dag.id_generation import DATAFLOW_PLAN_PREFIX, IdGeneratorRegistry from metricflow.dataflow.builder.costing import DataflowPlanNodeCostFunction, DefaultCostFunction from metricflow.dataflow.builder.measure_additiveness import group_measure_specs_by_additiveness @@ -53,8 +52,6 @@ from metricflow.plan_conversion.node_processor import PreDimensionJoinNodeProcessor from metricflow.specs.column_assoc import ColumnAssociationResolver from metricflow.specs.specs import ( - DimensionSpec, - EntitySpec, InstanceSpecSet, LinkableInstanceSpec, LinkableSpecSet, @@ -74,7 +71,23 @@ @dataclass(frozen=True) -class MeasureRecipe: +class DataflowRecipe: + """Get a recipe for how to build a dataflow plan node that outputs measures and linkable instances as needed.""" + + source_node: BaseOutput + required_local_linkable_specs: Tuple[LinkableInstanceSpec, ...] + join_linkable_instances_recipes: Tuple[JoinLinkableInstancesRecipe, ...] + + def to_measure_recipe(self) -> MeasureRecipe: + return MeasureRecipe( + source_node=self.source_node, + required_local_linkable_specs=self.required_local_linkable_specs, + join_linkable_instances_recipes=self.join_linkable_instances_recipes, + ) + + +@dataclass(frozen=True) +class MeasureRecipe(DataflowRecipe): """Get a recipe for how to build a dataflow plan node that outputs measures and the needed linkable instances. The recipe involves filtering the measure node so that it only outputs the measures and the instances associated with @@ -82,9 +95,9 @@ class MeasureRecipe: in join_linkable_instances_recipes. """ - measure_node: BaseOutput - required_local_linkable_specs: Tuple[LinkableInstanceSpec, ...] - join_linkable_instances_recipes: Tuple[JoinLinkableInstancesRecipe, ...] + @property + def measure_node(self) -> BaseOutput: + return self.source_node @dataclass(frozen=True) @@ -103,6 +116,7 @@ class DataflowPlanBuilder: def __init__( # noqa: D self, source_nodes: Sequence[BaseOutput], + source_nodes_without_measures: Sequence[BaseOutput], semantic_manifest_lookup: SemanticManifestLookup, cost_function: DataflowPlanNodeCostFunction = DefaultCostFunction(), node_output_resolver: Optional[DataflowPlanNodeOutputDataSetResolver] = None, @@ -113,6 +127,7 @@ def __init__( # noqa: D self._metric_time_dimension_reference = DataSet.metric_time_dimension_reference() self._cost_function = cost_function self._source_nodes = source_nodes + self._source_nodes_without_measures = source_nodes_without_measures self._column_association_resolver = ( DunderColumnAssociationResolver(semantic_manifest_lookup) if not column_association_resolver @@ -146,8 +161,8 @@ def build_plan( time_range_constraint=query_spec.time_range_constraint, ) - sink_node = DataflowPlanBuilder.build_sink_node_from_metrics_output_node( - computed_metrics_output=metrics_output_node, + sink_node = DataflowPlanBuilder.build_sink_node( + parent_node=metrics_output_node, order_by_specs=query_spec.order_by_specs, output_sql_table=output_sql_table, limit=query_spec.limit, @@ -259,69 +274,35 @@ def _build_metrics_output_node( join_type=combine_metrics_join_type, ) - def build_plan_for_distinct_values( - self, - metric_specs: Sequence[MetricSpec], - dimension_spec: Optional[DimensionSpec] = None, - time_dimension_spec: Optional[TimeDimensionSpec] = None, - entity_spec: Optional[EntitySpec] = None, - time_range_constraint: Optional[TimeRangeConstraint] = None, - limit: Optional[int] = None, - ) -> DataflowPlan: + def build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> DataflowPlan: """Generate a plan that would get the distinct values of a linkable instance. e.g. distinct listing__country_latest for bookings by listing__country_latest """ - assert_exactly_one_arg_set( - dimension_spec=dimension_spec, time_dimension_spec=time_dimension_spec, entity_spec=entity_spec - ) + assert not query_spec.metric_specs, "Can't build distinct values plan with metrics." - # Doing this to keep the type checker happy, but assert_exactly_one_arg_set should ensure this. - linkable_spec: Optional[LinkableInstanceSpec] = dimension_spec or time_dimension_spec or entity_spec - assert linkable_spec - - query_spec = MetricFlowQuerySpec( - metric_specs=tuple(metric_specs), - dimension_specs=(dimension_spec,) if dimension_spec else (), - time_dimension_specs=(time_dimension_spec,) if time_dimension_spec else (), - entity_specs=(entity_spec,) if entity_spec else (), - time_range_constraint=time_range_constraint, - ) - metrics_output_node = self._build_metrics_output_node( - metric_specs=query_spec.metric_specs, - queried_linkable_specs=query_spec.linkable_specs, - where_constraint=query_spec.where_constraint, - time_range_constraint=query_spec.time_range_constraint, - ) + linkable_specs = query_spec.linkable_specs + dataflow_recipe = self._find_dataflow_recipe(linkable_spec_set=linkable_specs) + if not dataflow_recipe: + raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {linkable_specs}.") + source_node = dataflow_recipe.source_node distinct_values_node = FilterElementsNode( - parent_node=metrics_output_node, - include_specs=InstanceSpecSet.create_from_linkable_specs((linkable_spec,)), + parent_node=source_node, include_specs=InstanceSpecSet.create_from_linkable_specs(linkable_specs.as_tuple) ) - sink_node = self.build_sink_node_from_metrics_output_node( - computed_metrics_output=distinct_values_node, - order_by_specs=( - OrderBySpec( - dimension_spec=dimension_spec, - time_dimension_spec=time_dimension_spec, - entity_spec=entity_spec, - descending=False, - ), - ), - limit=limit, + # Need to apply group by since that normally gets applied in aggregate measures node? + sink_node = self.build_sink_node( + parent_node=distinct_values_node, order_by_specs=query_spec.order_by_specs, limit=query_spec.limit ) plan_id = IdGeneratorRegistry.for_class(DataflowPlanBuilder).create_id(DATAFLOW_PLAN_PREFIX) - return DataflowPlan( - plan_id=plan_id, - sink_output_nodes=[sink_node], - ) + return DataflowPlan(plan_id=plan_id, sink_output_nodes=[sink_node]) @staticmethod - def build_sink_node_from_metrics_output_node( - computed_metrics_output: BaseOutput, + def build_sink_node( + parent_node: BaseOutput, order_by_specs: Sequence[OrderBySpec], output_sql_table: Optional[SqlTable] = None, limit: Optional[int] = None, @@ -332,26 +313,20 @@ def build_sink_node_from_metrics_output_node( if order_by_specs or limit: pre_result_node = OrderByLimitNode( - order_by_specs=list(order_by_specs), - limit=limit, - parent_node=computed_metrics_output, + order_by_specs=list(order_by_specs), limit=limit, parent_node=parent_node ) if output_selection_specs: pre_result_node = FilterElementsNode( - parent_node=pre_result_node or computed_metrics_output, - include_specs=output_selection_specs, + parent_node=pre_result_node or parent_node, include_specs=output_selection_specs ) write_result_node: SinkOutput if not output_sql_table: - write_result_node = WriteToResultDataframeNode( - parent_node=pre_result_node or computed_metrics_output, - ) + write_result_node = WriteToResultDataframeNode(parent_node=pre_result_node or parent_node) else: write_result_node = WriteToResultTableNode( - parent_node=pre_result_node or computed_metrics_output, - output_sql_table=output_sql_table, + parent_node=pre_result_node or parent_node, output_sql_table=output_sql_table ) return write_result_node @@ -399,6 +374,21 @@ def _select_source_nodes_with_measures( nodes.append(source_node) return nodes + def _select_source_nodes_with_linkable_specs( + self, linkable_specs: LinkableSpecSet, source_nodes: Sequence[BaseOutput] + ) -> Sequence[BaseOutput]: + """Find source nodes with requested linkable specs and no measures.""" + nodes = [] + linkable_specs_set = set(linkable_specs.as_tuple) + for source_node in source_nodes: + output_spec_set = self._node_data_set_resolver.get_output_data_set(source_node).instance_set.spec_set + if output_spec_set.measure_specs: + continue + linkable_specs_in_node = output_spec_set.linkable_specs + if linkable_specs_set.intersection(set(linkable_specs_in_node)) == linkable_specs_set: + nodes.append(source_node) + return nodes + def _find_non_additive_dimension_in_linkable_specs( self, agg_time_dimension: TimeDimensionReference, @@ -451,39 +441,36 @@ def _build_measure_spec_properties(self, measure_specs: Sequence[MeasureSpec]) - non_additive_dimension_spec=non_additive_dimension_spec, ) - def _find_measure_recipe( + def _find_dataflow_recipe( self, - measure_spec_properties: MeasureSpecProperties, - linkable_specs: Sequence[LinkableInstanceSpec], + linkable_spec_set: LinkableSpecSet, + measure_spec_properties: Optional[MeasureSpecProperties] = None, time_range_constraint: Optional[TimeRangeConstraint] = None, - ) -> Optional[MeasureRecipe]: - """Find a recipe for getting measure_specs along with the linkable specs. + ) -> Optional[DataflowRecipe]: + linkable_specs = linkable_spec_set.as_tuple + if measure_spec_properties: + source_nodes = self._source_nodes + potential_source_nodes: Sequence[BaseOutput] = self._select_source_nodes_with_measures( + measure_specs=set(measure_spec_properties.measure_specs), source_nodes=source_nodes + ) + else: + source_nodes = self._source_nodes_without_measures + potential_source_nodes = self._select_source_nodes_with_linkable_specs( + linkable_specs=linkable_spec_set, source_nodes=source_nodes + ) + + logger.info(f"There are {len(potential_source_nodes)} potential source nodes") + + logger.info(f"Starting search with {len(source_nodes)} source nodes") + start_time = time.time() - Prior to calling this method we should always be checking that all input measure specs come from - the same base semantic model, otherwise the internal conditions here will be impossible to satisfy - """ - measure_specs = measure_spec_properties.measure_specs node_processor = PreDimensionJoinNodeProcessor( semantic_model_lookup=self._semantic_model_lookup, node_data_set_resolver=self._node_data_set_resolver, ) - - source_nodes: Sequence[BaseOutput] = self._source_nodes - - # We only care about nodes that have all required measures - potential_measure_nodes: Sequence[BaseOutput] = self._select_source_nodes_with_measures( - measure_specs=set(measure_specs), source_nodes=source_nodes - ) - - logger.info(f"There are {len(potential_measure_nodes)} potential measure source nodes") - - logger.info(f"Starting search with {len(source_nodes)} source nodes") - start_time = time.time() - # Only apply the time constraint to nodes that will be used for measures because some dimensional sources have - # measures in them, and time constraining those would result in incomplete joins. if time_range_constraint: - potential_measure_nodes = node_processor.add_time_range_constraint( - source_nodes=potential_measure_nodes, + potential_source_nodes = node_processor.add_time_range_constraint( + source_nodes=potential_source_nodes, metric_time_dimension_reference=self._metric_time_dimension_reference, time_range_constraint=time_range_constraint, ) @@ -511,21 +498,11 @@ def _find_measure_recipe( node_data_set_resolver=self._node_data_set_resolver, ) - # Dict from the node that contains the measure spec to the evaluation results. + # Dict from the node that contains the source node to the evaluation results. node_to_evaluation: Dict[BaseOutput, LinkableInstanceSatisfiabilityEvaluation] = {} - for node in self._sort_by_suitability(potential_measure_nodes): - data_set = self._node_data_set_resolver.get_output_data_set(node) - - missing_specs = [spec for spec in measure_specs if spec not in data_set.instance_set.spec_set.measure_specs] - if missing_specs: - logger.debug( - f"Skipping evaluation for node since it does not have all of the measure specs {missing_specs}:" - f"\n\n{dataflow_dag_as_text(node)}" - ) - continue - - logger.debug(f"Evaluating measure node:\n{pformat_big_objects(measure_node=dataflow_dag_as_text(node))}") + for node in self._sort_by_suitability(potential_source_nodes): + logger.debug(f"Evaluating source node:\n{pformat_big_objects(source_node=dataflow_dag_as_text(node))}") start_time = time.time() evaluation = node_evaluator.evaluate_node( @@ -535,7 +512,7 @@ def _find_measure_recipe( logger.info(f"Evaluation of {node} took {time.time() - start_time:.2f}s") logger.debug( - f"Evaluation for measure node is:\n" + f"Evaluation for source node is:\n" f"{pformat_big_objects(node=dataflow_dag_as_text(node), evaluation=evaluation)}" ) @@ -556,7 +533,7 @@ def _find_measure_recipe( if len(evaluation.join_recipes) == 0: logger.info("Not evaluating other nodes since we found one that doesn't require joins") - logger.info(f"Found {len(node_to_evaluation)} candidate measure nodes.") + logger.info(f"Found {len(node_to_evaluation)} candidate source nodes.") if len(node_to_evaluation) > 0: cost_function = DefaultCostFunction() @@ -572,8 +549,8 @@ def _find_measure_recipe( ) ) - # Nodes containing the linkable instances will be joined to the node containing the measure, so these - # entities will need to be present in the measure node. + # Nodes containing the linkable instances will be joined to the source node, so these + # entities will need to be present in the source node. required_local_entity_specs = tuple(x.join_on_entity for x in evaluation.join_recipes) # Same thing with partitions. required_local_dimension_specs = tuple( @@ -585,8 +562,8 @@ def _find_measure_recipe( for y in x.join_on_partition_time_dimensions ) - return MeasureRecipe( - measure_node=node_with_lowest_cost, + return DataflowRecipe( + source_node=node_with_lowest_cost, required_local_linkable_specs=( evaluation.local_linkable_specs + required_local_entity_specs @@ -756,24 +733,25 @@ def _build_aggregated_measures_from_measure_source_node( ) find_recipe_start_time = time.time() - measure_recipe = self._find_measure_recipe( + dataflow_recipe = self._find_dataflow_recipe( measure_spec_properties=measure_properties, time_range_constraint=cumulative_metric_adjusted_time_constraint or time_range_constraint, - linkable_specs=required_linkable_specs.as_tuple, + linkable_spec_set=required_linkable_specs, ) logger.info( f"With {len(self._source_nodes)} source nodes, finding a recipe took " f"{time.time() - find_recipe_start_time:.2f}s" ) - logger.info(f"Using recipe:\n{pformat_big_objects(measure_recipe=measure_recipe)}") + logger.info(f"Using recipe:\n{pformat_big_objects(dataflow_recipe=dataflow_recipe)}") - if not measure_recipe: + if not dataflow_recipe: # TODO: Improve for better user understandability. raise UnableToSatisfyQueryError( f"Recipe not found for measure specs: {measure_specs} and linkable specs: {required_linkable_specs}" ) + measure_recipe = dataflow_recipe.to_measure_recipe() # If a cumulative metric is queried with metric_time, join over time range. # Otherwise, the measure will be aggregated over all time. time_range_node: Optional[JoinOverTimeRangeNode] = None diff --git a/metricflow/dataflow/builder/source_node.py b/metricflow/dataflow/builder/source_node.py index e2f2da7790..a345273dd3 100644 --- a/metricflow/dataflow/builder/source_node.py +++ b/metricflow/dataflow/builder/source_node.py @@ -21,28 +21,33 @@ class SourceNodeBuilder: def __init__(self, semantic_manifest_lookup: SemanticManifestLookup) -> None: # noqa: D self._semantic_manifest_lookup = semantic_manifest_lookup - def create_from_data_sets(self, data_sets: Sequence[SemanticModelDataSet]) -> Sequence[BaseOutput]: + def create_from_data_sets( + self, data_sets: Sequence[SemanticModelDataSet], with_measures: bool = True + ) -> Sequence[BaseOutput]: """Creates source nodes from SemanticModelDataSets.""" source_nodes: List[BaseOutput] = [] for data_set in data_sets: read_node = ReadSqlSourceNode(data_set) - agg_time_dim_to_measures_grouper = ( - self._semantic_manifest_lookup.semantic_model_lookup.get_aggregation_time_dimensions_with_measures( - data_set.semantic_model_reference - ) - ) - - # Dimension sources may not have any measures -> no aggregation time dimensions. - time_dimension_references = agg_time_dim_to_measures_grouper.keys - if len(time_dimension_references) == 0: + if not with_measures: source_nodes.append(read_node) else: - # Splits the measures by distinct aggregate time dimension. - for time_dimension_reference in time_dimension_references: - source_nodes.append( - MetricTimeDimensionTransformNode( - parent_node=read_node, - aggregation_time_dimension_reference=time_dimension_reference, - ) + agg_time_dim_to_measures_grouper = ( + self._semantic_manifest_lookup.semantic_model_lookup.get_aggregation_time_dimensions_with_measures( + data_set.semantic_model_reference ) + ) + + # Dimension sources may not have any measures -> no aggregation time dimensions. + time_dimension_references = agg_time_dim_to_measures_grouper.keys + if len(time_dimension_references) == 0: + source_nodes.append(read_node) + else: + # Splits the measures by distinct aggregate time dimension. + for time_dimension_reference in time_dimension_references: + source_nodes.append( + MetricTimeDimensionTransformNode( + parent_node=read_node, + aggregation_time_dimension_reference=time_dimension_reference, + ) + ) return source_nodes diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index 1d459bca78..31d2d8a95f 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -347,6 +347,9 @@ def __init__( source_node_builder = SourceNodeBuilder(self._semantic_manifest_lookup) source_nodes = source_node_builder.create_from_data_sets(self._source_data_sets) + source_nodes_without_measures = source_node_builder.create_from_data_sets( + self._source_data_sets, with_measures=False + ) node_output_resolver = DataflowPlanNodeOutputDataSetResolver( column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup), @@ -355,6 +358,7 @@ def __init__( self._dataflow_plan_builder = DataflowPlanBuilder( source_nodes=source_nodes, + source_nodes_without_measures=source_nodes_without_measures, semantic_manifest_lookup=self._semantic_manifest_lookup, ) self._to_sql_query_plan_converter = DataflowToSqlQueryPlanConverter( @@ -478,12 +482,15 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me time_dimension_specs=query_spec.time_dimension_specs, ) - dataflow_plan = self._dataflow_plan_builder.build_plan( - query_spec=query_spec, - output_sql_table=output_table, - output_selection_specs=output_selection_specs, - optimizers=(SourceScanOptimizer(),), - ) + if query_spec.metric_specs: + dataflow_plan = self._dataflow_plan_builder.build_plan( + query_spec=query_spec, + output_sql_table=output_table, + output_selection_specs=output_selection_specs, + optimizers=(SourceScanOptimizer(),), + ) + else: + dataflow_plan = self._dataflow_plan_builder.build_plan_for_distinct_values(query_spec=query_spec) if len(dataflow_plan.sink_output_nodes) > 1: raise NotImplementedError( diff --git a/metricflow/test/dataflow/builder/test_cyclic_join.py b/metricflow/test/dataflow/builder/test_cyclic_join.py index 8762e45ce3..73a9aed2ec 100644 --- a/metricflow/test/dataflow/builder/test_cyclic_join.py +++ b/metricflow/test/dataflow/builder/test_cyclic_join.py @@ -34,6 +34,7 @@ def cyclic_join_manifest_dataflow_plan_builder( # noqa: D return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.cyclic_join_source_nodes, + source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=cyclic_join_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) diff --git a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py index 625226cddf..43bb1de17a 100644 --- a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py +++ b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py @@ -565,12 +565,12 @@ def test_distinct_values_plan( # noqa: D ) -> None: """Tests a plan to get distinct values of a dimension.""" dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( - metric_specs=(MetricSpec(element_name="bookings"),), - dimension_spec=DimensionSpec( - element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), - ), - limit=100, + query_spec=MetricFlowQuerySpec( + dimension_specs=( + DimensionSpec(element_name="country_latest", entity_links=(EntityReference(element_name="listing"),)), + ), + limit=100, + ) ) assert_plan_snapshot_text_equal( diff --git a/metricflow/test/fixtures/dataflow_fixtures.py b/metricflow/test/fixtures/dataflow_fixtures.py index 06616aa7f1..a87324b146 100644 --- a/metricflow/test/fixtures/dataflow_fixtures.py +++ b/metricflow/test/fixtures/dataflow_fixtures.py @@ -34,6 +34,7 @@ def dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.simple_model_source_nodes, + source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=simple_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) @@ -47,6 +48,7 @@ def multihop_dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.multihop_model_source_nodes, + source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=multi_hop_join_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) @@ -68,6 +70,7 @@ def scd_dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.scd_model_source_nodes, + source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=scd_semantic_manifest_lookup, cost_function=DefaultCostFunction(), column_association_resolver=scd_column_association_resolver, diff --git a/metricflow/test/fixtures/model_fixtures.py b/metricflow/test/fixtures/model_fixtures.py index 6d7fcea2ce..9fd2044dd9 100644 --- a/metricflow/test/fixtures/model_fixtures.py +++ b/metricflow/test/fixtures/model_fixtures.py @@ -45,10 +45,12 @@ def _data_set_to_read_nodes(data_sets: OrderedDict[str, SemanticModelDataSet]) - def _data_set_to_source_nodes( - semantic_manifest_lookup: SemanticManifestLookup, data_sets: OrderedDict[str, SemanticModelDataSet] + semantic_manifest_lookup: SemanticManifestLookup, + data_sets: OrderedDict[str, SemanticModelDataSet], + with_measures: bool = True, ) -> Sequence[BaseOutput]: source_node_builder = SourceNodeBuilder(semantic_manifest_lookup) - return source_node_builder.create_from_data_sets(list(data_sets.values())) + return source_node_builder.create_from_data_sets(list(data_sets.values()), with_measures=with_measures) def query_parser_from_yaml(yaml_contents: List[YamlConfigFile]) -> MetricFlowQueryParser: @@ -78,6 +80,7 @@ class ConsistentIdObjectRepository: simple_model_data_sets: OrderedDict[str, SemanticModelDataSet] simple_model_read_nodes: OrderedDict[str, ReadSqlSourceNode] simple_model_source_nodes: Sequence[BaseOutput] + simple_model_source_nodes_without_measures: Sequence[BaseOutput] multihop_model_read_nodes: OrderedDict[str, ReadSqlSourceNode] multihop_model_source_nodes: Sequence[BaseOutput] @@ -111,6 +114,9 @@ def consistent_id_object_repository( simple_model_data_sets=sm_data_sets, simple_model_read_nodes=_data_set_to_read_nodes(sm_data_sets), simple_model_source_nodes=_data_set_to_source_nodes(simple_semantic_manifest_lookup, sm_data_sets), + simple_model_source_nodes_without_measures=_data_set_to_source_nodes( + simple_semantic_manifest_lookup, sm_data_sets, with_measures=False + ), multihop_model_read_nodes=_data_set_to_read_nodes(multihop_data_sets), multihop_model_source_nodes=_data_set_to_source_nodes( multi_hop_join_semantic_manifest_lookup, multihop_data_sets diff --git a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py index a387521655..ea8a1e0539 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py @@ -1252,12 +1252,12 @@ def test_distinct_values( # noqa: D ) -> None: """Tests a plan to get distinct values for a dimension.""" dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( - metric_specs=(MetricSpec(element_name="bookings"),), - dimension_spec=DimensionSpec( - element_name="country_latest", - entity_links=(EntityReference(element_name="listing"),), - ), - limit=100, + query_spec=MetricFlowQuerySpec( + dimension_specs=( + DimensionSpec(element_name="country_latest", entity_links=(EntityReference(element_name="listing"),)), + ), + limit=100, + ) ) convert_and_check( From 2f984b1d20b86c277c7b996d487c3f92031ed842 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 20 Sep 2023 17:41:02 -0700 Subject: [PATCH 02/19] Update snapshot node_ids --- .../DataflowPlan/test_cyclic_join__dfp_0.xml | 4 ++-- .../DataflowPlan/test_multihop_join_plan__dfp_0.xml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index e5b3d39969..24159db194 100644 --- a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -61,7 +61,7 @@ - + @@ -83,7 +83,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml index 0513c678ae..dcfd0e527e 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml @@ -77,7 +77,7 @@ - + @@ -127,7 +127,7 @@ - + @@ -227,7 +227,7 @@ - + From 310f7bb97ce6b4fb8fe7c48ede1846609fd0d170 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 20 Sep 2023 18:47:39 -0700 Subject: [PATCH 03/19] Get dataflow plan working --- .../dataflow/builder/dataflow_plan_builder.py | 23 +- metricflow/dataflow/dataflow_plan.py | 16 +- metricflow/plan_conversion/dataflow_to_sql.py | 14 +- metricflow/query/query_parser.py | 11 +- .../builder/test_dataflow_plan_builder.py | 18 + .../test/integration/configured_test_case.py | 2 +- .../test_cases/itest_dimensions.yaml | 43 + .../test_dataflow_to_sql_plan.py | 18 + .../test_filter_combination__dfpo_0.xml | 1 + .../DataflowPlan/test_cyclic_join__dfp_0.xml | 3 + .../test_common_semantic_model__dfp_0.xml | 6 + ...indow_or_grain_with_metric_time__dfp_0.xml | 1 + ...ow_or_grain_without_metric_time__dfp_0.xml | 1 + ...t_cumulative_metric_with_window__dfp_0.xml | 1 + ..._derived_metric_offset_to_grain__dfp_0.xml | 2 + ...st_derived_metric_offset_window__dfp_0.xml | 1 + ..._metric_offset_with_granularity__dfp_0.xml | 1 + ...erived_offset_cumulative_metric__dfp_0.xml | 1 + .../test_distinct_values_plan__dfp_0.xml | 131 +-- .../DataflowPlan/test_joined_plan__dfp_0.xml | 3 + .../test_limit_rows_plan__dfp_0.xml | 1 + .../test_measure_constraint_plan__dfp_0.xml | 9 + ...traint_with_reused_measure_plan__dfp_0.xml | 3 + ...mantic_model_ratio_metrics_plan__dfp_0.xml | 6 + .../test_multihop_join_plan__dfp_0.xml | 4 + .../test_multiple_metrics_plan__dfp_0.xml | 2 + .../test_order_by_plan__dfp_0.xml | 1 + .../test_primary_entity_dimension__dfp_0.xml | 1 + .../DataflowPlan/test_simple_plan__dfp_0.xml | 1 + ...mantic_model_ratio_metrics_plan__dfp_0.xml | 6 + .../test_where_constrained_plan__dfp_0.xml | 4 + ...constrained_plan_time_dimension__dfp_0.xml | 2 + ...ained_with_common_linkable_plan__dfp_0.xml | 3 + ...ompute_metrics_node_simple_expr__plan0.xml | 2 + ...spine_node_with_offset_to_grain__plan0.xml | 1 + ...e_spine_node_with_offset_window__plan0.xml | 1 + ..._time_spine_node_without_offset__plan0.xml | 1 + .../DuckDB/test_distinct_values__plan0.sql | 288 +---- .../test_distinct_values__plan0_optimized.sql | 30 +- .../test_distinct_values__plan0.xml | 1028 +++-------------- ...2_metrics_from_1_semantic_model__dfp_0.xml | 6 + ..._metrics_from_1_semantic_model__dfpo_0.xml | 3 + ..._metrics_from_2_semantic_models__dfp_0.xml | 2 + ...metrics_from_2_semantic_models__dfpo_0.xml | 2 + ...o_metrics_from_1_semantic_model__dfp_0.xml | 4 + ..._metrics_from_1_semantic_model__dfpo_0.xml | 1 + ..._metrics_from_2_semantic_models__dfp_0.xml | 3 + ...metrics_from_2_semantic_models__dfpo_0.xml | 2 + ...constrained_metric_not_combined__dfp_0.xml | 3 + ...onstrained_metric_not_combined__dfpo_0.xml | 3 + .../test_derived_metric__dfp_0.xml | 2 + .../test_derived_metric__dfpo_0.xml | 1 + ..._metric_with_non_derived_metric__dfp_0.xml | 3 + ...metric_with_non_derived_metric__dfpo_0.xml | 2 + .../test_nested_derived_metric__dfp_0.xml | 4 + .../test_nested_derived_metric__dfpo_0.xml | 2 + 56 files changed, 500 insertions(+), 1234 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 916c87f10e..82beb89282 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -78,7 +78,7 @@ class DataflowRecipe: required_local_linkable_specs: Tuple[LinkableInstanceSpec, ...] join_linkable_instances_recipes: Tuple[JoinLinkableInstancesRecipe, ...] - def to_measure_recipe(self) -> MeasureRecipe: + def to_measure_recipe(self) -> MeasureRecipe: # noqa: D return MeasureRecipe( source_node=self.source_node, required_local_linkable_specs=self.required_local_linkable_specs, @@ -96,7 +96,7 @@ class MeasureRecipe(DataflowRecipe): """ @property - def measure_node(self) -> BaseOutput: + def measure_node(self) -> BaseOutput: # noqa: D return self.source_node @@ -288,12 +288,22 @@ def build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Dat source_node = dataflow_recipe.source_node distinct_values_node = FilterElementsNode( - parent_node=source_node, include_specs=InstanceSpecSet.create_from_linkable_specs(linkable_specs.as_tuple) + parent_node=source_node, + include_specs=InstanceSpecSet.create_from_linkable_specs(linkable_specs.as_tuple), + distinct_values=True, ) - # Need to apply group by since that normally gets applied in aggregate measures node? + where_constraint_node: Optional[WhereConstraintNode] = None + if query_spec.where_constraint: + where_constraint_node = WhereConstraintNode( + parent_node=distinct_values_node, + where_constraint=query_spec.where_constraint, + ) + sink_node = self.build_sink_node( - parent_node=distinct_values_node, order_by_specs=query_spec.order_by_specs, limit=query_spec.limit + parent_node=where_constraint_node or distinct_values_node, + order_by_specs=query_spec.order_by_specs, + limit=query_spec.limit, ) plan_id = IdGeneratorRegistry.for_class(DataflowPlanBuilder).create_id(DATAFLOW_PLAN_PREFIX) @@ -382,11 +392,10 @@ def _select_source_nodes_with_linkable_specs( linkable_specs_set = set(linkable_specs.as_tuple) for source_node in source_nodes: output_spec_set = self._node_data_set_resolver.get_output_data_set(source_node).instance_set.spec_set - if output_spec_set.measure_specs: - continue linkable_specs_in_node = output_spec_set.linkable_specs if linkable_specs_set.intersection(set(linkable_specs_in_node)) == linkable_specs_set: nodes.append(source_node) + return nodes def _find_non_additive_dimension_in_linkable_specs( diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 89aef6c18c..b96d427863 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -1098,10 +1098,12 @@ def __init__( # noqa: D parent_node: BaseOutput, include_specs: InstanceSpecSet, replace_description: Optional[str] = None, + distinct_values: bool = False, ) -> None: self._include_specs = include_specs self._replace_description = replace_description self._parent_node = parent_node + self._distinct_values = distinct_values super().__init__(node_id=self.create_unique_id(), parent_nodes=[parent_node]) @classmethod @@ -1113,6 +1115,11 @@ def include_specs(self) -> InstanceSpecSet: """Returns the specs for the elements that it should pass.""" return self._include_specs + @property + def distinct_values(self) -> bool: + """True if you only want the distinct values for the selected specs.""" + return self._distinct_values + def accept(self, visitor: DataflowPlanNodeVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D return visitor.visit_pass_elements_filter_node(self) @@ -1132,7 +1139,7 @@ def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D if not self._replace_description: additional_properties = [ DisplayedProperty("include_spec", include_spec) for include_spec in self._include_specs.all_specs - ] + ] + [DisplayedProperty("distinct_values", self._distinct_values)] return super().displayed_properties + additional_properties @property @@ -1140,13 +1147,18 @@ def parent_node(self) -> BaseOutput: # noqa: D return self._parent_node def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: D - return isinstance(other_node, self.__class__) and other_node.include_specs == self.include_specs + return ( + isinstance(other_node, self.__class__) + and other_node.include_specs == self.include_specs + and other_node.distinct_values == self.distinct_values + ) def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> FilterElementsNode: # noqa: D assert len(new_parent_nodes) == 1 return FilterElementsNode( parent_node=new_parent_nodes[0], include_specs=self.include_specs, + distinct_values=self.distinct_values, replace_description=self._replace_description, ) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 27b6efe4e1..34b41d579d 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -793,18 +793,22 @@ def visit_pass_elements_filter_node(self, node: FilterElementsNode) -> SqlDataSe # Also, the output columns should always follow the resolver format. output_instance_set = output_instance_set.transform(ChangeAssociatedColumns(self._column_association_resolver)) + # This creates select expressions for all columns referenced in the instance set. + select_columns = output_instance_set.transform( + CreateSelectColumnsForInstances(from_data_set_alias, self._column_association_resolver) + ).as_tuple() + + # If no measures are passed, group by all columns. + group_bys = select_columns if node.distinct_values else () return SqlDataSet( instance_set=output_instance_set, sql_select_node=SqlSelectStatementNode( description=node.description, - # This creates select expressions for all columns referenced in the instance set. - select_columns=output_instance_set.transform( - CreateSelectColumnsForInstances(from_data_set_alias, self._column_association_resolver) - ).as_tuple(), + select_columns=select_columns, from_source=from_data_set.sql_select_node, from_source_alias=from_data_set_alias, joins_descs=(), - group_bys=(), + group_bys=group_bys, where=None, order_bys=(), ), diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 0027e68322..23686aa457 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -461,11 +461,12 @@ def _parse_and_validate_query( ) # Validate all of them together. - self._validate_linkable_specs( - metric_references=metric_references, - all_linkable_specs=requested_linkable_specs_with_requested_filter_specs, - time_dimension_specs=time_dimension_specs, - ) + if metric_references: + self._validate_linkable_specs( + metric_references=metric_references, + all_linkable_specs=requested_linkable_specs_with_requested_filter_specs, + time_dimension_specs=time_dimension_specs, + ) self._validate_order_by_specs( order_by_specs=order_by_specs, diff --git a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py index 43bb1de17a..f063bf9fd3 100644 --- a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py +++ b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py @@ -562,6 +562,7 @@ def test_distinct_values_plan( # noqa: D request: FixtureRequest, mf_test_session_state: MetricFlowTestSessionState, dataflow_plan_builder: DataflowPlanBuilder, + column_association_resolver: ColumnAssociationResolver, ) -> None: """Tests a plan to get distinct values of a dimension.""" dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( @@ -569,6 +570,23 @@ def test_distinct_values_plan( # noqa: D dimension_specs=( DimensionSpec(element_name="country_latest", entity_links=(EntityReference(element_name="listing"),)), ), + where_constraint=( + WhereSpecFactory( + column_association_resolver=column_association_resolver, + ).create_from_where_filter( + PydanticWhereFilter( + where_sql_template="{{ Dimension('listing__country_latest') }} = 'us'", + ) + ) + ), + order_by_specs=( + OrderBySpec( + dimension_spec=DimensionSpec( + element_name="country_latest", entity_links=(EntityReference(element_name="listing"),) + ), + descending=True, + ), + ), limit=100, ) ) diff --git a/metricflow/test/integration/configured_test_case.py b/metricflow/test/integration/configured_test_case.py index a6c0ac8e20..3994e93849 100644 --- a/metricflow/test/integration/configured_test_case.py +++ b/metricflow/test/integration/configured_test_case.py @@ -48,10 +48,10 @@ class Config: # noqa: D name: str # Name of the semantic model to use. model: IntegrationTestModel - metrics: Tuple[str, ...] # The SQL query that can be run to obtain the expected results. check_query: str file_path: str + metrics: Tuple[str, ...] = () group_bys: Tuple[str, ...] = () order_bys: Tuple[str, ...] = () # The required features in the DW engine for the test to complete. diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index ea5affa47c..787c9f4ca3 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -102,3 +102,46 @@ integration_test: GROUP BY v.ds , u.home_state +--- +integration_test: + name: query_dimension_only + description: Query dimenension only + model: SIMPLE_MODEL + group_bys: ["user__home_state"] + check_query: | + SELECT + u.home_state AS user__home_state + FROM {{ source_schema }}.dim_users u + GROUP BY + u.home_state +--- +integration_test: + name: query_dimensions_only + description: Query multiple dimensions without metrics + model: SIMPLE_MODEL + group_bys: ["ds", "user__home_state"] + check_query: | + SELECT + u.home_state AS user__home_state + , v.ds + FROM {{ source_schema }}.fct_id_verifications v + LEFT OUTER JOIN {{ source_schema }}.dim_users u + ON u.user_id = v.user_id + AND u.ds = v.ds + GROUP BY + v.ds + , u.home_state +--- +integration_test: + name: query_dimension_only_with_constraint + description: Query dimenension only + model: SIMPLE_MODEL + group_bys: ["user__home_state"] + where_filter: "{{ render_dimension_template('user__home_state') }} = 'CA'" + check_query: | + SELECT + u.home_state AS user__home_state + FROM {{ source_schema }}.dim_users u + GROUP BY + u.home_state + WHERE user__home_state = 'CA' diff --git a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py index ea8a1e0539..b34a8b0b46 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py @@ -1248,6 +1248,7 @@ def test_distinct_values( # noqa: D mf_test_session_state: MetricFlowTestSessionState, dataflow_plan_builder: DataflowPlanBuilder, dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, + column_association_resolver: ColumnAssociationResolver, sql_client: SqlClient, ) -> None: """Tests a plan to get distinct values for a dimension.""" @@ -1256,6 +1257,23 @@ def test_distinct_values( # noqa: D dimension_specs=( DimensionSpec(element_name="country_latest", entity_links=(EntityReference(element_name="listing"),)), ), + where_constraint=( + WhereSpecFactory( + column_association_resolver=column_association_resolver, + ).create_from_where_filter( + PydanticWhereFilter( + where_sql_template="{{ Dimension('listing__country_latest') }} = 'us'", + ) + ) + ), + order_by_specs=( + OrderBySpec( + dimension_spec=DimensionSpec( + element_name="country_latest", entity_links=(EntityReference(element_name="listing"),) + ), + descending=True, + ), + ), limit=100, ) ) diff --git a/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml b/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml index a3e9b689d1..a5abebf496 100644 --- a/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml +++ b/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml @@ -15,6 +15,7 @@ + diff --git a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index 24159db194..65a577898a 100644 --- a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -29,6 +29,7 @@ + @@ -54,6 +55,7 @@ + @@ -80,6 +82,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml index ff4e3095e1..bbec2125ae 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml @@ -39,6 +39,7 @@ + @@ -67,6 +68,7 @@ + @@ -88,6 +90,7 @@ + @@ -137,6 +140,7 @@ + @@ -165,6 +169,7 @@ + @@ -186,6 +191,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml index 4aac7f8a75..c6623ff87a 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml @@ -30,6 +30,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml index bd50f72a90..da36eaeb4e 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml @@ -24,6 +24,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml index 3d6d6e3630..3dcdd3a256 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml @@ -30,6 +30,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml index babc67f7c8..92e62eb15c 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml @@ -44,6 +44,7 @@ + @@ -87,6 +88,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml index a8ecff962b..55f887cbe4 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml @@ -42,6 +42,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml index 0cedbbee96..c478472782 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml @@ -42,6 +42,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml index a2ee6ee189..807baea1a9 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml @@ -42,6 +42,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml index 387eb97250..4ae25b6615 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml @@ -7,7 +7,7 @@ - + @@ -16,102 +16,39 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml index 55f0470165..be5cbe3041 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml @@ -32,6 +32,7 @@ + @@ -58,6 +59,7 @@ + @@ -79,6 +81,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml index 6c68f6700e..01f1a95ce3 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml @@ -34,6 +34,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml index 9a2f8e0984..bfc0ddce55 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml @@ -54,6 +54,7 @@ + @@ -87,6 +88,7 @@ + @@ -115,6 +117,7 @@ + @@ -135,6 +138,7 @@ + @@ -192,6 +196,7 @@ + @@ -225,6 +230,7 @@ + @@ -253,6 +259,7 @@ + @@ -273,6 +280,7 @@ + @@ -320,6 +328,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml index 814ba44eff..d7c5184d04 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml @@ -54,6 +54,7 @@ + @@ -87,6 +88,7 @@ + @@ -132,6 +134,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml index de477d1c55..e5aa8040d9 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -48,6 +48,7 @@ + @@ -76,6 +77,7 @@ + @@ -97,6 +99,7 @@ + @@ -146,6 +149,7 @@ + @@ -174,6 +178,7 @@ + @@ -195,6 +200,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml index dcfd0e527e..924e93ff83 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml @@ -29,6 +29,7 @@ + @@ -70,6 +71,7 @@ + @@ -102,6 +104,7 @@ + @@ -224,6 +227,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml index 7db1f469eb..f4e81782c5 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml @@ -39,6 +39,7 @@ + @@ -86,6 +87,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml index 4e848370c1..143d3c0dc8 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml @@ -57,6 +57,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml index 75695a7288..ea5b947419 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml @@ -28,6 +28,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml index 75695a7288..ea5b947419 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml @@ -28,6 +28,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml index 3469a47a37..92e38e973a 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -48,6 +48,7 @@ + @@ -76,6 +77,7 @@ + @@ -97,6 +99,7 @@ + @@ -146,6 +149,7 @@ + @@ -174,6 +178,7 @@ + @@ -195,6 +200,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml index 347e702ec1..1b6a1fd180 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml @@ -28,6 +28,7 @@ + @@ -59,6 +60,7 @@ + @@ -85,6 +87,7 @@ + @@ -106,6 +109,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml index 84cda54d21..e7be93e8c0 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml @@ -28,6 +28,7 @@ + @@ -62,6 +63,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml index d6b05b3cd9..b4d89f4c64 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml @@ -42,6 +42,7 @@ + @@ -64,6 +65,7 @@ + @@ -85,6 +87,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml index 1b06389d8f..fa850673ed 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml @@ -37,6 +37,7 @@ + @@ -53,6 +54,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml index d5d1074893..60acbaeec9 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml @@ -37,6 +37,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml index 2ead8da01d..0e7296501e 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml @@ -37,6 +37,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml index 6abd3f4209..712f32308b 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml @@ -37,6 +37,7 @@ + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql index 870b2c80ea..334e8afddf 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql @@ -1,250 +1,56 @@ -- Order By ['listing__country_latest'] Limit 100 SELECT - subq_10.listing__country_latest + subq_2.listing__country_latest FROM ( - -- Pass Only Elements: - -- ['listing__country_latest'] + -- Constrain Output with WHERE SELECT - subq_9.listing__country_latest + subq_1.listing__country_latest FROM ( - -- Compute Metrics via Expressions + -- Pass Only Elements: + -- ['listing__country_latest'] SELECT - subq_8.listing__country_latest - , subq_8.bookings + subq_0.listing__country_latest FROM ( - -- Aggregate Measures + -- Read Elements From Semantic Model 'listings_latest' SELECT - subq_7.listing__country_latest - , SUM(subq_7.bookings) AS bookings - FROM ( - -- Pass Only Elements: - -- ['bookings', 'listing__country_latest'] - SELECT - subq_6.listing__country_latest - , subq_6.bookings - FROM ( - -- Join Standard Outputs - SELECT - subq_2.listing AS listing - , subq_5.country_latest AS listing__country_latest - , subq_2.bookings AS bookings - FROM ( - -- Pass Only Elements: - -- ['bookings', 'listing'] - SELECT - subq_1.listing - , subq_1.bookings - FROM ( - -- Metric Time Dimension 'ds' - SELECT - subq_0.ds__day - , subq_0.ds__week - , subq_0.ds__month - , subq_0.ds__quarter - , subq_0.ds__year - , subq_0.ds_partitioned__day - , subq_0.ds_partitioned__week - , subq_0.ds_partitioned__month - , subq_0.ds_partitioned__quarter - , subq_0.ds_partitioned__year - , subq_0.paid_at__day - , subq_0.paid_at__week - , subq_0.paid_at__month - , subq_0.paid_at__quarter - , subq_0.paid_at__year - , subq_0.booking__ds__day - , subq_0.booking__ds__week - , subq_0.booking__ds__month - , subq_0.booking__ds__quarter - , subq_0.booking__ds__year - , subq_0.booking__ds_partitioned__day - , subq_0.booking__ds_partitioned__week - , subq_0.booking__ds_partitioned__month - , subq_0.booking__ds_partitioned__quarter - , subq_0.booking__ds_partitioned__year - , subq_0.booking__paid_at__day - , subq_0.booking__paid_at__week - , subq_0.booking__paid_at__month - , subq_0.booking__paid_at__quarter - , subq_0.booking__paid_at__year - , subq_0.ds__day AS metric_time__day - , subq_0.ds__week AS metric_time__week - , subq_0.ds__month AS metric_time__month - , subq_0.ds__quarter AS metric_time__quarter - , subq_0.ds__year AS metric_time__year - , subq_0.listing - , subq_0.guest - , subq_0.host - , subq_0.booking__listing - , subq_0.booking__guest - , subq_0.booking__host - , subq_0.is_instant - , subq_0.booking__is_instant - , subq_0.bookings - , subq_0.instant_bookings - , subq_0.booking_value - , subq_0.max_booking_value - , subq_0.min_booking_value - , subq_0.bookers - , subq_0.average_booking_value - , subq_0.referred_bookings - , subq_0.median_booking_value - , subq_0.booking_value_p99 - , subq_0.discrete_booking_value_p99 - , subq_0.approximate_continuous_booking_value_p99 - , subq_0.approximate_discrete_booking_value_p99 - FROM ( - -- Read Elements From Semantic Model 'bookings_source' - SELECT - 1 AS bookings - , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings - , bookings_source_src_10001.booking_value - , bookings_source_src_10001.booking_value AS max_booking_value - , bookings_source_src_10001.booking_value AS min_booking_value - , bookings_source_src_10001.guest_id AS bookers - , bookings_source_src_10001.booking_value AS average_booking_value - , bookings_source_src_10001.booking_value AS booking_payments - , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings - , bookings_source_src_10001.booking_value AS median_booking_value - , bookings_source_src_10001.booking_value AS booking_value_p99 - , bookings_source_src_10001.booking_value AS discrete_booking_value_p99 - , bookings_source_src_10001.booking_value AS approximate_continuous_booking_value_p99 - , bookings_source_src_10001.booking_value AS approximate_discrete_booking_value_p99 - , bookings_source_src_10001.is_instant - , bookings_source_src_10001.ds AS ds__day - , DATE_TRUNC('week', bookings_source_src_10001.ds) AS ds__week - , DATE_TRUNC('month', bookings_source_src_10001.ds) AS ds__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS ds__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds) AS ds__year - , bookings_source_src_10001.ds_partitioned AS ds_partitioned__day - , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__week - , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__year - , bookings_source_src_10001.paid_at AS paid_at__day - , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS paid_at__week - , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS paid_at__month - , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS paid_at__quarter - , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS paid_at__year - , bookings_source_src_10001.is_instant AS booking__is_instant - , bookings_source_src_10001.ds AS booking__ds__day - , DATE_TRUNC('week', bookings_source_src_10001.ds) AS booking__ds__week - , DATE_TRUNC('month', bookings_source_src_10001.ds) AS booking__ds__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS booking__ds__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds) AS booking__ds__year - , bookings_source_src_10001.ds_partitioned AS booking__ds_partitioned__day - , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__week - , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS booking__ds_partitioned__year - , bookings_source_src_10001.paid_at AS booking__paid_at__day - , DATE_TRUNC('week', bookings_source_src_10001.paid_at) AS booking__paid_at__week - , DATE_TRUNC('month', bookings_source_src_10001.paid_at) AS booking__paid_at__month - , DATE_TRUNC('quarter', bookings_source_src_10001.paid_at) AS booking__paid_at__quarter - , DATE_TRUNC('year', bookings_source_src_10001.paid_at) AS booking__paid_at__year - , bookings_source_src_10001.listing_id AS listing - , bookings_source_src_10001.guest_id AS guest - , bookings_source_src_10001.host_id AS host - , bookings_source_src_10001.listing_id AS booking__listing - , bookings_source_src_10001.guest_id AS booking__guest - , bookings_source_src_10001.host_id AS booking__host - FROM ***************************.fct_bookings bookings_source_src_10001 - ) subq_0 - ) subq_1 - ) subq_2 - LEFT OUTER JOIN ( - -- Pass Only Elements: - -- ['country_latest', 'listing'] - SELECT - subq_4.listing - , subq_4.country_latest - FROM ( - -- Metric Time Dimension 'ds' - SELECT - subq_3.ds__day - , subq_3.ds__week - , subq_3.ds__month - , subq_3.ds__quarter - , subq_3.ds__year - , subq_3.created_at__day - , subq_3.created_at__week - , subq_3.created_at__month - , subq_3.created_at__quarter - , subq_3.created_at__year - , subq_3.listing__ds__day - , subq_3.listing__ds__week - , subq_3.listing__ds__month - , subq_3.listing__ds__quarter - , subq_3.listing__ds__year - , subq_3.listing__created_at__day - , subq_3.listing__created_at__week - , subq_3.listing__created_at__month - , subq_3.listing__created_at__quarter - , subq_3.listing__created_at__year - , subq_3.ds__day AS metric_time__day - , subq_3.ds__week AS metric_time__week - , subq_3.ds__month AS metric_time__month - , subq_3.ds__quarter AS metric_time__quarter - , subq_3.ds__year AS metric_time__year - , subq_3.listing - , subq_3.user - , subq_3.listing__user - , subq_3.country_latest - , subq_3.is_lux_latest - , subq_3.capacity_latest - , subq_3.listing__country_latest - , subq_3.listing__is_lux_latest - , subq_3.listing__capacity_latest - , subq_3.listings - , subq_3.largest_listing - , subq_3.smallest_listing - FROM ( - -- Read Elements From Semantic Model 'listings_latest' - SELECT - 1 AS listings - , listings_latest_src_10004.capacity AS largest_listing - , listings_latest_src_10004.capacity AS smallest_listing - , listings_latest_src_10004.created_at AS ds__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS ds__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS ds__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS ds__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS ds__year - , listings_latest_src_10004.created_at AS created_at__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS created_at__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS created_at__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS created_at__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS created_at__year - , listings_latest_src_10004.country AS country_latest - , listings_latest_src_10004.is_lux AS is_lux_latest - , listings_latest_src_10004.capacity AS capacity_latest - , listings_latest_src_10004.created_at AS listing__ds__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__ds__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__ds__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__ds__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__ds__year - , listings_latest_src_10004.created_at AS listing__created_at__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__created_at__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__created_at__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__created_at__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__created_at__year - , listings_latest_src_10004.country AS listing__country_latest - , listings_latest_src_10004.is_lux AS listing__is_lux_latest - , listings_latest_src_10004.capacity AS listing__capacity_latest - , listings_latest_src_10004.listing_id AS listing - , listings_latest_src_10004.user_id AS user - , listings_latest_src_10004.user_id AS listing__user - FROM ***************************.dim_listings_latest listings_latest_src_10004 - ) subq_3 - ) subq_4 - ) subq_5 - ON - subq_2.listing = subq_5.listing - ) subq_6 - ) subq_7 - GROUP BY - subq_7.listing__country_latest - ) subq_8 - ) subq_9 -) subq_10 -ORDER BY subq_10.listing__country_latest + 1 AS listings + , listings_latest_src_10004.capacity AS largest_listing + , listings_latest_src_10004.capacity AS smallest_listing + , listings_latest_src_10004.created_at AS ds__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS ds__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS ds__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS ds__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS ds__year + , listings_latest_src_10004.created_at AS created_at__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS created_at__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS created_at__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS created_at__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS created_at__year + , listings_latest_src_10004.country AS country_latest + , listings_latest_src_10004.is_lux AS is_lux_latest + , listings_latest_src_10004.capacity AS capacity_latest + , listings_latest_src_10004.created_at AS listing__ds__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__ds__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__ds__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__ds__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__ds__year + , listings_latest_src_10004.created_at AS listing__created_at__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__created_at__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__created_at__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__created_at__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__created_at__year + , listings_latest_src_10004.country AS listing__country_latest + , listings_latest_src_10004.is_lux AS listing__is_lux_latest + , listings_latest_src_10004.capacity AS listing__capacity_latest + , listings_latest_src_10004.listing_id AS listing + , listings_latest_src_10004.user_id AS user + , listings_latest_src_10004.user_id AS listing__user + FROM ***************************.dim_listings_latest listings_latest_src_10004 + ) subq_0 + GROUP BY + subq_0.listing__country_latest + ) subq_1 + WHERE listing__country_latest = 'us' +) subq_2 +ORDER BY subq_2.listing__country_latest DESC LIMIT 100 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql index 328d6a2d79..592773c228 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql @@ -1,19 +1,17 @@ --- Join Standard Outputs --- Pass Only Elements: --- ['bookings', 'listing__country_latest'] --- Aggregate Measures --- Compute Metrics via Expressions --- Pass Only Elements: --- ['listing__country_latest'] +-- Constrain Output with WHERE -- Order By ['listing__country_latest'] Limit 100 SELECT - listings_latest_src_10004.country AS listing__country_latest -FROM ***************************.fct_bookings bookings_source_src_10001 -LEFT OUTER JOIN - ***************************.dim_listings_latest listings_latest_src_10004 -ON - bookings_source_src_10001.listing_id = listings_latest_src_10004.listing_id -GROUP BY - listings_latest_src_10004.country -ORDER BY listing__country_latest + listing__country_latest +FROM ( + -- Read Elements From Semantic Model 'listings_latest' + -- Pass Only Elements: + -- ['listing__country_latest'] + SELECT + country AS listing__country_latest + FROM ***************************.dim_listings_latest listings_latest_src_10004 + GROUP BY + country +) subq_4 +WHERE listing__country_latest = 'us' +ORDER BY listing__country_latest DESC LIMIT 100 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml index 0a98623c4d..050261f56d 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml @@ -1,872 +1,180 @@ - - - - - - + + + + + + - - - - + + + + - - - - - - - - - - + + + + + + + + + - - - - - - - - - - - + + + + + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml index ff4e3095e1..bbec2125ae 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml @@ -39,6 +39,7 @@ + @@ -67,6 +68,7 @@ + @@ -88,6 +90,7 @@ + @@ -137,6 +140,7 @@ + @@ -165,6 +169,7 @@ + @@ -186,6 +191,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml index 567f1b3146..e39985abe2 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml @@ -45,6 +45,7 @@ + @@ -77,6 +78,7 @@ + @@ -98,6 +100,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml index 6e1d6b4714..a7fd9bc79c 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml @@ -35,6 +35,7 @@ + @@ -78,6 +79,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml index 34e73d06a2..1cc411fa9e 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml @@ -35,6 +35,7 @@ + @@ -78,6 +79,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml index 1f42c277f8..5cac4c6502 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml @@ -49,6 +49,7 @@ + @@ -92,6 +93,7 @@ + @@ -151,6 +153,7 @@ + @@ -194,6 +197,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml index d8a77edaeb..2bbabe3a4e 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml @@ -76,6 +76,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml index 9475198ca2..698fb9716b 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml @@ -35,6 +35,7 @@ + @@ -78,6 +79,7 @@ + @@ -121,6 +123,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml index 0b0f3c1d06..97cc8dfb6c 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml @@ -46,6 +46,7 @@ + @@ -89,6 +90,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml index 8b2fc3bdcb..db07c19cd2 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml @@ -35,6 +35,7 @@ + @@ -88,6 +89,7 @@ + @@ -121,6 +123,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml index 5985e50ac7..2be09b7929 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml @@ -35,6 +35,7 @@ + @@ -88,6 +89,7 @@ + @@ -121,6 +123,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml index 8f72acf134..82f90ea0ca 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml @@ -44,6 +44,7 @@ + @@ -87,6 +88,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml index 0892b0a651..fbbb11c578 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml @@ -51,6 +51,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml index fe405e35b4..159a17cbef 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml @@ -35,6 +35,7 @@ + @@ -92,6 +93,7 @@ + @@ -135,6 +137,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml index 4236d7a342..716cc3cc9e 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml @@ -35,6 +35,7 @@ + @@ -99,6 +100,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml index 94e4d36375..1c326d36ba 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml @@ -58,6 +58,7 @@ + @@ -101,6 +102,7 @@ + @@ -146,6 +148,7 @@ + @@ -189,6 +192,7 @@ + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml index 294f42931e..b8d0a40614 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml @@ -65,6 +65,7 @@ + @@ -120,6 +121,7 @@ + From 57498ecd09e3a02f61bd1df7b90c01370e48f502 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 21 Sep 2023 13:06:51 -0700 Subject: [PATCH 04/19] Test fix --- .../test/integration/test_cases/itest_dimensions.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index 787c9f4ca3..6645a8974a 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -119,11 +119,11 @@ integration_test: name: query_dimensions_only description: Query multiple dimensions without metrics model: SIMPLE_MODEL - group_bys: ["ds", "user__home_state"] + group_bys: ["ds__day", "user__home_state"] check_query: | SELECT u.home_state AS user__home_state - , v.ds + , v.ds AS ds__day FROM {{ source_schema }}.fct_id_verifications v LEFT OUTER JOIN {{ source_schema }}.dim_users u ON u.user_id = v.user_id @@ -142,6 +142,6 @@ integration_test: SELECT u.home_state AS user__home_state FROM {{ source_schema }}.dim_users u + WHERE user__home_state = 'CA' GROUP BY u.home_state - WHERE user__home_state = 'CA' From 13bd280c0e54dcadb847c14f36cf1b681c4a518d Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 21 Sep 2023 16:58:12 -0700 Subject: [PATCH 05/19] WIP --- metricflow/dataflow/builder/source_node.py | 35 ++-- metricflow/query/query_parser.py | 169 +++++++++++++----- .../test_cases/itest_dimensions.yaml | 31 +++- metricflow/time/time_granularity_solver.py | 64 ++++--- 4 files changed, 201 insertions(+), 98 deletions(-) diff --git a/metricflow/dataflow/builder/source_node.py b/metricflow/dataflow/builder/source_node.py index a345273dd3..6c930c4fdb 100644 --- a/metricflow/dataflow/builder/source_node.py +++ b/metricflow/dataflow/builder/source_node.py @@ -28,26 +28,23 @@ def create_from_data_sets( source_nodes: List[BaseOutput] = [] for data_set in data_sets: read_node = ReadSqlSourceNode(data_set) - if not with_measures: - source_nodes.append(read_node) - else: - agg_time_dim_to_measures_grouper = ( - self._semantic_manifest_lookup.semantic_model_lookup.get_aggregation_time_dimensions_with_measures( - data_set.semantic_model_reference - ) + agg_time_dim_to_measures_grouper = ( + self._semantic_manifest_lookup.semantic_model_lookup.get_aggregation_time_dimensions_with_measures( + data_set.semantic_model_reference ) + ) - # Dimension sources may not have any measures -> no aggregation time dimensions. - time_dimension_references = agg_time_dim_to_measures_grouper.keys - if len(time_dimension_references) == 0: - source_nodes.append(read_node) - else: - # Splits the measures by distinct aggregate time dimension. - for time_dimension_reference in time_dimension_references: - source_nodes.append( - MetricTimeDimensionTransformNode( - parent_node=read_node, - aggregation_time_dimension_reference=time_dimension_reference, - ) + # Dimension sources may not have any measures -> no aggregation time dimensions. + time_dimension_references = agg_time_dim_to_measures_grouper.keys + if len(time_dimension_references) == 0: + source_nodes.append(read_node) + else: + # Splits the measures by distinct aggregate time dimension. + for time_dimension_reference in time_dimension_references: + source_nodes.append( + MetricTimeDimensionTransformNode( + parent_node=read_node, + aggregation_time_dimension_reference=time_dimension_reference, ) + ) return source_nodes diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 23686aa457..ddf12f0727 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -30,7 +30,6 @@ from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName from metricflow.query.query_exceptions import InvalidQueryException from metricflow.specs.column_assoc import ColumnAssociationResolver -from metricflow.specs.query_interface import QueryInterfaceMetric, QueryParameter from metricflow.specs.specs import ( DimensionSpec, EntitySpec, @@ -169,16 +168,16 @@ def _top_fuzzy_matches( def parse_and_validate_query( self, metric_names: Optional[Sequence[str]] = None, - metrics: Optional[Sequence[QueryInterfaceMetric]] = None, + metrics: Optional[Sequence[QueryParameterMetric]] = None, group_by_names: Optional[Sequence[str]] = None, - group_by: Optional[Sequence[QueryParameter]] = None, + group_by: Optional[Sequence[QueryParameterDimension]] = None, limit: Optional[int] = None, time_constraint_start: Optional[datetime.datetime] = None, time_constraint_end: Optional[datetime.datetime] = None, where_constraint: Optional[WhereFilter] = None, where_constraint_str: Optional[str] = None, order: Optional[Sequence[str]] = None, - order_by: Optional[Sequence[QueryParameter]] = None, + order_by: Optional[Sequence[QueryParameterDimension]] = None, time_granularity: Optional[TimeGranularity] = None, ) -> MetricFlowQuerySpec: """Parse the query into spec objects, validating them in the process. @@ -289,22 +288,8 @@ def _construct_metric_specs_for_query( ) return tuple(metric_specs) - def _get_group_by_names( - self, group_by_names: Optional[Sequence[str]], group_by: Optional[Sequence[QueryParameter]] - ) -> Sequence[str]: - assert not ( - group_by_names and group_by - ), "Both group_by_names and group_by were set, but if a group by is specified you should only use one of these!" - return ( - group_by_names - if group_by_names - else [f"{g.name}__{g.grain}" if g.grain else g.name for g in group_by] - if group_by - else [] - ) - def _get_metric_names( - self, metric_names: Optional[Sequence[str]], metrics: Optional[Sequence[QueryInterfaceMetric]] + self, metric_names: Optional[Sequence[str]], metrics: Optional[Sequence[QueryParameterMetric]] ) -> Sequence[str]: assert_exactly_one_arg_set(metric_names=metric_names, metrics=metrics) return metric_names if metric_names else [m.name for m in metrics] if metrics else [] @@ -321,7 +306,9 @@ def _get_where_filter( PydanticWhereFilter(where_sql_template=where_constraint_str) if where_constraint_str else where_constraint ) - def _get_order(self, order: Optional[Sequence[str]], order_by: Optional[Sequence[QueryParameter]]) -> Sequence[str]: + def _get_order( + self, order: Optional[Sequence[str]], order_by: Optional[Sequence[QueryParameterDimension]] + ) -> Sequence[str]: assert not ( order and order_by ), "Both order_by_names and order_by were set, but if an order by is specified you should only use one of these!" @@ -330,20 +317,19 @@ def _get_order(self, order: Optional[Sequence[str]], order_by: Optional[Sequence def _parse_and_validate_query( self, metric_names: Optional[Sequence[str]] = None, - metrics: Optional[Sequence[QueryInterfaceMetric]] = None, + metrics: Optional[Sequence[QueryParameterMetric]] = None, group_by_names: Optional[Sequence[str]] = None, - group_by: Optional[Sequence[QueryParameter]] = None, + group_by: Optional[Sequence[QueryParameterDimension]] = None, limit: Optional[int] = None, time_constraint_start: Optional[datetime.datetime] = None, time_constraint_end: Optional[datetime.datetime] = None, where_constraint: Optional[WhereFilter] = None, where_constraint_str: Optional[str] = None, order: Optional[Sequence[str]] = None, - order_by: Optional[Sequence[QueryParameter]] = None, + order_by: Optional[Sequence[QueryParameterDimension]] = None, time_granularity: Optional[TimeGranularity] = None, ) -> MetricFlowQuerySpec: metric_names = self._get_metric_names(metric_names, metrics) - group_by_names = self._get_group_by_names(group_by_names, group_by) where_filter = self._get_where_filter(where_constraint, where_constraint_str) order = self._get_order(order, order_by) @@ -393,7 +379,9 @@ def _parse_and_validate_query( # If the time constraint is all time, just ignore and not render time_constraint = None - requested_linkable_specs = self._parse_linkable_element_names(group_by_names, metric_references) + requested_linkable_specs = self._parse_linkable_elements( + qualified_linkable_names=group_by_names, linkable_elements=group_by, metric_references=metric_references + ) where_filter_spec: Optional[WhereFilterSpec] = None if where_filter is not None: try: @@ -427,6 +415,7 @@ def _parse_and_validate_query( self._validate_no_time_dimension_query(metric_references=metric_references) self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs) + self._validate_date_part(metric_references, time_dimension_specs) order_by_specs = self._parse_order_by(order or [], partial_time_dimension_spec_replacements) @@ -436,8 +425,9 @@ def _parse_and_validate_query( for metric_reference in metric_references: metric = self._metric_lookup.get_metric(metric_reference) if metric.filter is not None: - group_by_specs_for_one_metric = self._parse_linkable_element_names( + group_by_specs_for_one_metric = self._parse_linkable_elements( qualified_linkable_names=group_by_names, + linkable_elements=group_by, metric_references=(metric_reference,), ) @@ -461,12 +451,11 @@ def _parse_and_validate_query( ) # Validate all of them together. - if metric_references: - self._validate_linkable_specs( - metric_references=metric_references, - all_linkable_specs=requested_linkable_specs_with_requested_filter_specs, - time_dimension_specs=time_dimension_specs, - ) + self._validate_linkable_specs( + metric_references=metric_references, + all_linkable_specs=requested_linkable_specs_with_requested_filter_specs, + time_dimension_specs=time_dimension_specs, + ) self._validate_order_by_specs( order_by_specs=order_by_specs, @@ -530,6 +519,35 @@ def _validate_order_by_specs( ): raise InvalidQueryException(f"Order by item {order_by_spec} not in the query") + def _validate_date_part( + self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec] + ) -> None: + """Validate that date parts can be used for metrics. + + TODO: figure out expected behavior for date part with these types of metrics. + """ + date_part_requested = False + for time_dimension_spec in time_dimension_specs: + if time_dimension_spec.date_part: + date_part_requested = True + if time_dimension_spec.date_part.to_int() < time_dimension_spec.time_granularity.to_int(): + raise RequestTimeGranularityException( + f"Date part {time_dimension_spec.date_part.name} is not compatible with time granularity " + f"{time_dimension_spec.time_granularity.name}. Compatible granularities include: " + f"{[granularity.name for granularity in time_dimension_spec.date_part.compatible_granularities]}" + ) + if date_part_requested: + for metric_reference in metric_references: + metric = self._metric_lookup.get_metric(metric_reference) + if metric.type == MetricType.CUMULATIVE: + raise UnableToSatisfyQueryError("Cannot extract date part for cumulative metrics.") + elif metric.type == MetricType.DERIVED: + for input_metric in metric.type_params.metrics or []: + if input_metric.offset_to_grain: + raise UnableToSatisfyQueryError( + "Cannot extract date part for metrics with offset_to_grain." + ) + def _adjust_time_range_constraint( self, metric_references: Sequence[MetricReference], @@ -644,26 +662,44 @@ def _parse_metric_names( metric_references.extend(list(input_metrics)) return tuple(metric_references) - def _parse_linkable_element_names( + def _parse_linkable_elements( self, - qualified_linkable_names: Sequence[str], metric_references: Sequence[MetricReference], + qualified_linkable_names: Optional[Sequence[str]] = None, + linkable_elements: Optional[Sequence[QueryParameterDimension]] = None, ) -> QueryTimeLinkableSpecSet: """Convert the linkable spec names into the respective specification objects.""" - qualified_linkable_names = [x.lower() for x in qualified_linkable_names] + # TODO: refactor to only support group_by object inputs (removing group_by_names param) + assert not ( + qualified_linkable_names and linkable_elements + ), "Both group_by_names and group_by were set, but if a group by is specified you should only use one of these!" + + structured_names: List[StructuredLinkableSpecName] = [] + if qualified_linkable_names: + qualified_linkable_names = [x.lower() for x in qualified_linkable_names] + structured_names = [StructuredLinkableSpecName.from_name(name) for name in qualified_linkable_names] + elif linkable_elements: + for linkable_element in linkable_elements: + parsed_name = StructuredLinkableSpecName.from_name(linkable_element.name) + structured_name = StructuredLinkableSpecName( + entity_link_names=parsed_name.entity_link_names, + element_name=parsed_name.element_name, + time_granularity=linkable_element.grain, + date_part=linkable_element.date_part, + ) + structured_names.append(structured_name) dimension_specs = [] time_dimension_specs = [] partial_time_dimension_specs = [] entity_specs = [] - for qualified_name in qualified_linkable_names: - structured_name = StructuredLinkableSpecName.from_name(qualified_name) + for structured_name in structured_names: element_name = structured_name.element_name entity_links = tuple(EntityReference(element_name=x) for x in structured_name.entity_link_names) # Create the spec based on the type of element referenced. if TimeDimensionReference(element_name=element_name) in self._known_time_dimension_element_references: - if structured_name.time_granularity: + if structured_name.time_granularity and not structured_name.date_part: time_dimension_specs.append( TimeDimensionSpec( element_name=element_name, @@ -672,31 +708,47 @@ def _parse_linkable_element_names( ) ) else: - partial_time_dimension_specs.append( - PartialTimeDimensionSpec( - element_name=element_name, - entity_links=entity_links, - ) + partial_time_dimension_spec = PartialTimeDimensionSpec( + element_name=element_name, entity_links=entity_links, date_part=structured_name.date_part ) + # If both granularity & date_part are requested, verify requested & resolved granularities match. + if structured_name.time_granularity and structured_name.date_part: + self._verify_resolved_granularity_for_date_part( + requested_dimension_structured_name=structured_name, + partial_time_dimension_spec=partial_time_dimension_spec, + metric_references=metric_references, + ) + partial_time_dimension_specs.append(partial_time_dimension_spec) + elif DimensionReference(element_name=element_name) in self._known_dimension_element_references: dimension_specs.append(DimensionSpec(element_name=element_name, entity_links=entity_links)) elif EntityReference(element_name=element_name) in self._known_entity_element_references: entity_specs.append(EntitySpec(element_name=element_name, entity_links=entity_links)) else: + valid_group_bys_for_metrics = self._metric_lookup.element_specs_for_metrics(list(metric_references)) valid_group_by_names_for_metrics = sorted( - x.qualified_name for x in self._metric_lookup.element_specs_for_metrics(list(metric_references)) + list( + set( + x.qualified_name if qualified_linkable_names else x.element_name + for x in valid_group_bys_for_metrics + ) + ) ) + # If requested by name, show qualified name. If requested as object, show element name. + display_name = structured_name.qualified_name if qualified_linkable_names else element_name suggestions = { - f"Suggestions for '{qualified_name}'": pformat_big_objects( + f"Suggestions for '{display_name}'": pformat_big_objects( MetricFlowQueryParser._top_fuzzy_matches( - item=qualified_name, + item=display_name, candidate_items=valid_group_by_names_for_metrics, ) ) } raise UnableToSatisfyQueryError( - f"Unknown element name '{element_name}' in dimension name '{qualified_name}'", + f"Unknown element name '{element_name}' in dimension name '{display_name}'" + if qualified_linkable_names + else f"Unknown dimension {element_name}", context=suggestions, ) @@ -707,6 +759,28 @@ def _parse_linkable_element_names( entity_specs=tuple(entity_specs), ) + def _verify_resolved_granularity_for_date_part( + self, + requested_dimension_structured_name: StructuredLinkableSpecName, + partial_time_dimension_spec: PartialTimeDimensionSpec, + metric_references: Sequence[MetricReference], + ) -> None: + """Enforce that any granularity value associated with a date part query is the minimum. + + By default, we will always ensure that a date_part query request uses the minimum granularity. + However, there are some interfaces where the user must pass in a granularity, so we need a check to + ensure that the correct value was passed in. + """ + resolved_granularity = self._time_granularity_solver.find_minimum_granularity_for_partial_time_dimension_spec( + partial_time_dimension_spec=partial_time_dimension_spec, metric_references=metric_references + ) + if resolved_granularity != requested_dimension_structured_name.time_granularity: + raise RequestTimeGranularityException( + f"When applying a date part to dimension '{requested_dimension_structured_name.qualified_name}' with " + f"metrics {[metric.element_name for metric in metric_references]}, only {resolved_granularity.name} " + "granularity can be used." + ) + def _get_invalid_linkable_specs( self, metric_references: Tuple[MetricReference, ...], @@ -796,6 +870,7 @@ def _parse_order_by( element_name=parsed_name.element_name, entity_links=entity_links, time_granularity=parsed_name.time_granularity, + date_part=parsed_name.date_part, ), descending=descending, ) diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index 6645a8974a..269682b5b1 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -123,14 +123,39 @@ integration_test: check_query: | SELECT u.home_state AS user__home_state - , v.ds AS ds__day + , u.ds AS ds__day + FROM {{ source_schema }}.dim_users u + GROUP BY + u.ds + , u.home_state +--- +integration_test: + name: query_dimensions_from_different_tables + description: Query multiple dimensions without metrics, requiring a join + model: SIMPLE_MODEL + group_bys: ["user__home_state", "verification__ds__day"] + check_query: | + SELECT + u.home_state AS user__home_state + , v.ds AS verification__ds__day FROM {{ source_schema }}.fct_id_verifications v LEFT OUTER JOIN {{ source_schema }}.dim_users u ON u.user_id = v.user_id - AND u.ds = v.ds + GROUP BY + u.home_state + , v.ds +--- +integration_test: + name: query_time_dimension_without_granularity + description: Query just a time dimension, no granularity specified + model: SIMPLE_MODEL + group_bys: [ "verification__ds"] + check_query: | + SELECT + v.ds + FROM {{ source_schema }}.fct_id_verifications v GROUP BY v.ds - , u.home_state --- integration_test: name: query_dimension_only_with_constraint diff --git a/metricflow/time/time_granularity_solver.py b/metricflow/time/time_granularity_solver.py index 8fbf95c601..fa13733f37 100644 --- a/metricflow/time/time_granularity_solver.py +++ b/metricflow/time/time_granularity_solver.py @@ -72,6 +72,9 @@ def validate_time_granularity( e.g. throw an error if "ds__week" is specified for a metric with a time granularity of MONTH. """ + if not metric_references: + return None + valid_group_by_elements = self._semantic_manifest_lookup.metric_lookup.linkable_set_for_metrics( metric_references=metric_references, ) @@ -100,36 +103,39 @@ def resolve_granularity_for_partial_time_dimension_specs( Returns a dictionary that maps how the partial time dimension spec should be turned into a time dimension spec. """ - valid_group_by_elements = self._semantic_manifest_lookup.metric_lookup.linkable_set_for_metrics( - metric_references=metric_references, - ) - result: Dict[PartialTimeDimensionSpec, TimeDimensionSpec] = {} - for partial_time_dimension_spec in partial_time_dimension_specs: - minimum_time_granularity: Optional[TimeGranularity] = None - for path_key in valid_group_by_elements.path_key_to_linkable_dimensions: - if ( - path_key.element_name == partial_time_dimension_spec.element_name - and path_key.entity_links == partial_time_dimension_spec.entity_links - and path_key.time_granularity is not None - ): - minimum_time_granularity = ( - path_key.time_granularity - if minimum_time_granularity is None - else min(minimum_time_granularity, path_key.time_granularity) + if metric_references: + valid_group_by_elements = self._semantic_manifest_lookup.metric_lookup.linkable_set_for_metrics( + metric_references=metric_references, + ) + result: Dict[PartialTimeDimensionSpec, TimeDimensionSpec] = {} + for partial_time_dimension_spec in partial_time_dimension_specs: + minimum_time_granularity: Optional[TimeGranularity] = None + for path_key in valid_group_by_elements.path_key_to_linkable_dimensions: + if ( + path_key.element_name == partial_time_dimension_spec.element_name + and path_key.entity_links == partial_time_dimension_spec.entity_links + and path_key.time_granularity is not None + ): + minimum_time_granularity = ( + path_key.time_granularity + if minimum_time_granularity is None + else min(minimum_time_granularity, path_key.time_granularity) + ) + + if minimum_time_granularity is not None: + result[partial_time_dimension_spec] = TimeDimensionSpec( + element_name=partial_time_dimension_spec.element_name, + entity_links=partial_time_dimension_spec.entity_links, + time_granularity=minimum_time_granularity, ) - - if minimum_time_granularity is not None: - result[partial_time_dimension_spec] = TimeDimensionSpec( - element_name=partial_time_dimension_spec.element_name, - entity_links=partial_time_dimension_spec.entity_links, - time_granularity=minimum_time_granularity, - ) - else: - raise RequestTimeGranularityException( - f"Unable to resolve the time dimension spec for {partial_time_dimension_spec}. " - f"Valid group by elements are:\n" - f"{pformat_big_objects([spec.qualified_name for spec in valid_group_by_elements.as_spec_set.as_tuple])}" - ) + else: + raise RequestTimeGranularityException( + f"Unable to resolve the time dimension spec for {partial_time_dimension_spec}. " + f"Valid group by elements are:\n" + f"{pformat_big_objects([spec.qualified_name for spec in valid_group_by_elements.as_spec_set.as_tuple])}" + ) + else: + raise NotImplementedError # find minimum granularity for time dimension return result def adjust_time_range_to_granularity( From 8067f24fb112c84974a88116173ccb9201e26606 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 21 Sep 2023 21:21:05 -0700 Subject: [PATCH 06/19] Fix source node --- metricflow/query/query_parser.py | 3 + .../DataflowPlan/test_cyclic_join__dfp_0.xml | 2 +- .../test_distinct_values_plan__dfp_0.xml | 19 +- .../test_multihop_join_plan__dfp_0.xml | 2 +- .../DuckDB/test_distinct_values__plan0.sql | 188 ++++- .../test_distinct_values__plan0_optimized.sql | 3 +- .../test_distinct_values__plan0.xml | 728 ++++++++++++++---- metricflow/time/time_granularity_solver.py | 3 + 8 files changed, 735 insertions(+), 213 deletions(-) diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index a8e7deda0b..d63b94b711 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -230,6 +230,9 @@ def _validate_linkable_specs( all_linkable_specs: QueryTimeLinkableSpecSet, time_dimension_specs: Tuple[TimeDimensionSpec, ...], ) -> None: + if not metric_references: + return None + invalid_group_bys = self._get_invalid_linkable_specs( metric_references=metric_references, dimension_specs=all_linkable_specs.dimension_specs, diff --git a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index 65a577898a..30f00fecf5 100644 --- a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -58,7 +58,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml index 4ae25b6615..06f25732e1 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml @@ -40,13 +40,18 @@ - - - - - - - + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml index 0e33993ef5..434a4c173a 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml @@ -77,7 +77,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql index 334e8afddf..5e955f5039 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql @@ -1,56 +1,160 @@ -- Order By ['listing__country_latest'] Limit 100 SELECT - subq_2.listing__country_latest + subq_3.listing__country_latest FROM ( -- Constrain Output with WHERE SELECT - subq_1.listing__country_latest + subq_2.listing__country_latest FROM ( -- Pass Only Elements: -- ['listing__country_latest'] SELECT - subq_0.listing__country_latest + subq_1.listing__country_latest FROM ( - -- Read Elements From Semantic Model 'listings_latest' + -- Metric Time Dimension 'ds' SELECT - 1 AS listings - , listings_latest_src_10004.capacity AS largest_listing - , listings_latest_src_10004.capacity AS smallest_listing - , listings_latest_src_10004.created_at AS ds__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS ds__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS ds__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS ds__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS ds__year - , listings_latest_src_10004.created_at AS created_at__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS created_at__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS created_at__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS created_at__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS created_at__year - , listings_latest_src_10004.country AS country_latest - , listings_latest_src_10004.is_lux AS is_lux_latest - , listings_latest_src_10004.capacity AS capacity_latest - , listings_latest_src_10004.created_at AS listing__ds__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__ds__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__ds__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__ds__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__ds__year - , listings_latest_src_10004.created_at AS listing__created_at__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__created_at__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__created_at__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__created_at__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__created_at__year - , listings_latest_src_10004.country AS listing__country_latest - , listings_latest_src_10004.is_lux AS listing__is_lux_latest - , listings_latest_src_10004.capacity AS listing__capacity_latest - , listings_latest_src_10004.listing_id AS listing - , listings_latest_src_10004.user_id AS user - , listings_latest_src_10004.user_id AS listing__user - FROM ***************************.dim_listings_latest listings_latest_src_10004 - ) subq_0 + subq_0.ds__day + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds__extract_year + , subq_0.ds__extract_quarter + , subq_0.ds__extract_month + , subq_0.ds__extract_week + , subq_0.ds__extract_day + , subq_0.ds__extract_dow + , subq_0.ds__extract_doy + , subq_0.created_at__day + , subq_0.created_at__week + , subq_0.created_at__month + , subq_0.created_at__quarter + , subq_0.created_at__year + , subq_0.created_at__extract_year + , subq_0.created_at__extract_quarter + , subq_0.created_at__extract_month + , subq_0.created_at__extract_week + , subq_0.created_at__extract_day + , subq_0.created_at__extract_dow + , subq_0.created_at__extract_doy + , subq_0.listing__ds__day + , subq_0.listing__ds__week + , subq_0.listing__ds__month + , subq_0.listing__ds__quarter + , subq_0.listing__ds__year + , subq_0.listing__ds__extract_year + , subq_0.listing__ds__extract_quarter + , subq_0.listing__ds__extract_month + , subq_0.listing__ds__extract_week + , subq_0.listing__ds__extract_day + , subq_0.listing__ds__extract_dow + , subq_0.listing__ds__extract_doy + , subq_0.listing__created_at__day + , subq_0.listing__created_at__week + , subq_0.listing__created_at__month + , subq_0.listing__created_at__quarter + , subq_0.listing__created_at__year + , subq_0.listing__created_at__extract_year + , subq_0.listing__created_at__extract_quarter + , subq_0.listing__created_at__extract_month + , subq_0.listing__created_at__extract_week + , subq_0.listing__created_at__extract_day + , subq_0.listing__created_at__extract_dow + , subq_0.listing__created_at__extract_doy + , subq_0.ds__day AS metric_time__day + , subq_0.ds__week AS metric_time__week + , subq_0.ds__month AS metric_time__month + , subq_0.ds__quarter AS metric_time__quarter + , subq_0.ds__year AS metric_time__year + , subq_0.ds__extract_year AS metric_time__extract_year + , subq_0.ds__extract_quarter AS metric_time__extract_quarter + , subq_0.ds__extract_month AS metric_time__extract_month + , subq_0.ds__extract_week AS metric_time__extract_week + , subq_0.ds__extract_day AS metric_time__extract_day + , subq_0.ds__extract_dow AS metric_time__extract_dow + , subq_0.ds__extract_doy AS metric_time__extract_doy + , subq_0.listing + , subq_0.user + , subq_0.listing__user + , subq_0.country_latest + , subq_0.is_lux_latest + , subq_0.capacity_latest + , subq_0.listing__country_latest + , subq_0.listing__is_lux_latest + , subq_0.listing__capacity_latest + , subq_0.listings + , subq_0.largest_listing + , subq_0.smallest_listing + FROM ( + -- Read Elements From Semantic Model 'listings_latest' + SELECT + 1 AS listings + , listings_latest_src_10004.capacity AS largest_listing + , listings_latest_src_10004.capacity AS smallest_listing + , listings_latest_src_10004.created_at AS ds__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS ds__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS ds__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS ds__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS ds__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS ds__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS ds__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS ds__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS ds__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS ds__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS ds__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS ds__extract_doy + , listings_latest_src_10004.created_at AS created_at__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS created_at__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS created_at__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS created_at__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS created_at__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS created_at__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS created_at__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS created_at__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS created_at__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS created_at__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS created_at__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS created_at__extract_doy + , listings_latest_src_10004.country AS country_latest + , listings_latest_src_10004.is_lux AS is_lux_latest + , listings_latest_src_10004.capacity AS capacity_latest + , listings_latest_src_10004.created_at AS listing__ds__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__ds__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__ds__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__ds__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__ds__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS listing__ds__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS listing__ds__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS listing__ds__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS listing__ds__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS listing__ds__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS listing__ds__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS listing__ds__extract_doy + , listings_latest_src_10004.created_at AS listing__created_at__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__created_at__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__created_at__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__created_at__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__created_at__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_doy + , listings_latest_src_10004.country AS listing__country_latest + , listings_latest_src_10004.is_lux AS listing__is_lux_latest + , listings_latest_src_10004.capacity AS listing__capacity_latest + , listings_latest_src_10004.listing_id AS listing + , listings_latest_src_10004.user_id AS user + , listings_latest_src_10004.user_id AS listing__user + FROM ***************************.dim_listings_latest listings_latest_src_10004 + ) subq_0 + ) subq_1 GROUP BY - subq_0.listing__country_latest - ) subq_1 + subq_1.listing__country_latest + ) subq_2 WHERE listing__country_latest = 'us' -) subq_2 -ORDER BY subq_2.listing__country_latest DESC +) subq_3 +ORDER BY subq_3.listing__country_latest DESC LIMIT 100 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql index 592773c228..8417e18d52 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql @@ -4,6 +4,7 @@ SELECT listing__country_latest FROM ( -- Read Elements From Semantic Model 'listings_latest' + -- Metric Time Dimension 'ds' -- Pass Only Elements: -- ['listing__country_latest'] SELECT @@ -11,7 +12,7 @@ FROM ( FROM ***************************.dim_listings_latest listings_latest_src_10004 GROUP BY country -) subq_4 +) subq_6 WHERE listing__country_latest = 'us' ORDER BY listing__country_latest DESC LIMIT 100 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml index 050261f56d..9beb3baaeb 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml @@ -1,180 +1,586 @@ - - - - - - + + + + + + - - - - + + + + - - - - - - + + + + + + - - - - - - - - - - + + + + + + + + + + - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/time/time_granularity_solver.py b/metricflow/time/time_granularity_solver.py index f3df8ceb21..004e476f35 100644 --- a/metricflow/time/time_granularity_solver.py +++ b/metricflow/time/time_granularity_solver.py @@ -105,6 +105,9 @@ def resolve_granularity_for_partial_time_dimension_specs( Returns a dictionary that maps how the partial time dimension spec should be turned into a time dimension spec. """ + if not partial_time_dimension_specs: + return {} + if metric_references: result: Dict[PartialTimeDimensionSpec, TimeDimensionSpec] = {} for partial_time_dimension_spec in partial_time_dimension_specs: From 2074663430ce2adb639829a63f0c9c98f716af90 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 21 Sep 2023 21:33:40 -0700 Subject: [PATCH 07/19] Remove unneeded changes --- metricflow/dataflow/builder/dataflow_plan_builder.py | 5 +---- metricflow/engine/metricflow_engine.py | 4 ---- metricflow/test/dataflow/builder/test_cyclic_join.py | 1 - metricflow/test/fixtures/dataflow_fixtures.py | 3 --- metricflow/test/fixtures/model_fixtures.py | 4 ---- .../DataflowPlan/test_cyclic_join__dfp_0.xml | 6 +++--- .../DataflowPlan/test_distinct_values_plan__dfp_0.xml | 4 ++-- .../DataflowPlan/test_multihop_join_plan__dfp_0.xml | 8 ++++---- 8 files changed, 10 insertions(+), 25 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 82beb89282..d1c91139b3 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -116,7 +116,6 @@ class DataflowPlanBuilder: def __init__( # noqa: D self, source_nodes: Sequence[BaseOutput], - source_nodes_without_measures: Sequence[BaseOutput], semantic_manifest_lookup: SemanticManifestLookup, cost_function: DataflowPlanNodeCostFunction = DefaultCostFunction(), node_output_resolver: Optional[DataflowPlanNodeOutputDataSetResolver] = None, @@ -127,7 +126,6 @@ def __init__( # noqa: D self._metric_time_dimension_reference = DataSet.metric_time_dimension_reference() self._cost_function = cost_function self._source_nodes = source_nodes - self._source_nodes_without_measures = source_nodes_without_measures self._column_association_resolver = ( DunderColumnAssociationResolver(semantic_manifest_lookup) if not column_association_resolver @@ -457,13 +455,12 @@ def _find_dataflow_recipe( time_range_constraint: Optional[TimeRangeConstraint] = None, ) -> Optional[DataflowRecipe]: linkable_specs = linkable_spec_set.as_tuple + source_nodes = self._source_nodes if measure_spec_properties: - source_nodes = self._source_nodes potential_source_nodes: Sequence[BaseOutput] = self._select_source_nodes_with_measures( measure_specs=set(measure_spec_properties.measure_specs), source_nodes=source_nodes ) else: - source_nodes = self._source_nodes_without_measures potential_source_nodes = self._select_source_nodes_with_linkable_specs( linkable_specs=linkable_spec_set, source_nodes=source_nodes ) diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index bd160b7d37..5039d3dee6 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -347,9 +347,6 @@ def __init__( source_node_builder = SourceNodeBuilder(self._semantic_manifest_lookup) source_nodes = source_node_builder.create_from_data_sets(self._source_data_sets) - source_nodes_without_measures = source_node_builder.create_from_data_sets( - self._source_data_sets, with_measures=False - ) node_output_resolver = DataflowPlanNodeOutputDataSetResolver( column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup), @@ -358,7 +355,6 @@ def __init__( self._dataflow_plan_builder = DataflowPlanBuilder( source_nodes=source_nodes, - source_nodes_without_measures=source_nodes_without_measures, semantic_manifest_lookup=self._semantic_manifest_lookup, ) self._to_sql_query_plan_converter = DataflowToSqlQueryPlanConverter( diff --git a/metricflow/test/dataflow/builder/test_cyclic_join.py b/metricflow/test/dataflow/builder/test_cyclic_join.py index 73a9aed2ec..8762e45ce3 100644 --- a/metricflow/test/dataflow/builder/test_cyclic_join.py +++ b/metricflow/test/dataflow/builder/test_cyclic_join.py @@ -34,7 +34,6 @@ def cyclic_join_manifest_dataflow_plan_builder( # noqa: D return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.cyclic_join_source_nodes, - source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=cyclic_join_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) diff --git a/metricflow/test/fixtures/dataflow_fixtures.py b/metricflow/test/fixtures/dataflow_fixtures.py index a87324b146..06616aa7f1 100644 --- a/metricflow/test/fixtures/dataflow_fixtures.py +++ b/metricflow/test/fixtures/dataflow_fixtures.py @@ -34,7 +34,6 @@ def dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.simple_model_source_nodes, - source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=simple_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) @@ -48,7 +47,6 @@ def multihop_dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.multihop_model_source_nodes, - source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=multi_hop_join_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) @@ -70,7 +68,6 @@ def scd_dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.scd_model_source_nodes, - source_nodes_without_measures=consistent_id_object_repository.simple_model_source_nodes_without_measures, semantic_manifest_lookup=scd_semantic_manifest_lookup, cost_function=DefaultCostFunction(), column_association_resolver=scd_column_association_resolver, diff --git a/metricflow/test/fixtures/model_fixtures.py b/metricflow/test/fixtures/model_fixtures.py index c9f7021182..e4cf809b12 100644 --- a/metricflow/test/fixtures/model_fixtures.py +++ b/metricflow/test/fixtures/model_fixtures.py @@ -82,7 +82,6 @@ class ConsistentIdObjectRepository: simple_model_data_sets: OrderedDict[str, SemanticModelDataSet] simple_model_read_nodes: OrderedDict[str, ReadSqlSourceNode] simple_model_source_nodes: Sequence[BaseOutput] - simple_model_source_nodes_without_measures: Sequence[BaseOutput] multihop_model_read_nodes: OrderedDict[str, ReadSqlSourceNode] multihop_model_source_nodes: Sequence[BaseOutput] @@ -116,9 +115,6 @@ def consistent_id_object_repository( simple_model_data_sets=sm_data_sets, simple_model_read_nodes=_data_set_to_read_nodes(sm_data_sets), simple_model_source_nodes=_data_set_to_source_nodes(simple_semantic_manifest_lookup, sm_data_sets), - simple_model_source_nodes_without_measures=_data_set_to_source_nodes( - simple_semantic_manifest_lookup, sm_data_sets, with_measures=False - ), multihop_model_read_nodes=_data_set_to_read_nodes(multihop_data_sets), multihop_model_source_nodes=_data_set_to_source_nodes( multi_hop_join_semantic_manifest_lookup, multihop_data_sets diff --git a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index 30f00fecf5..1d4bea6e1e 100644 --- a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -58,12 +58,12 @@ - + - + @@ -86,7 +86,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml index 06f25732e1..9a9b6b09f4 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml @@ -42,12 +42,12 @@ - + - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml index 434a4c173a..baa48cf4f7 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml @@ -77,12 +77,12 @@ - + - + @@ -136,7 +136,7 @@ - + @@ -359,7 +359,7 @@ - + From e700c966bfa4566b305a6b099e8c2de732bfd421 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Fri, 22 Sep 2023 19:03:52 -0700 Subject: [PATCH 08/19] Resolve some bugs --- .../dataflow/builder/dataflow_plan_builder.py | 87 ++++++++++++++++--- .../model/semantics/semantic_model_lookup.py | 6 ++ metricflow/protocols/semantics.py | 7 ++ .../test_cases/itest_dimensions.yaml | 11 +-- .../test/integration/test_configured_cases.py | 1 + 5 files changed, 95 insertions(+), 17 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index d1c91139b3..2bf53a5bb5 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -9,7 +9,12 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.pretty_print import pformat_big_objects from dbt_semantic_interfaces.protocols.metric import MetricTimeWindow, MetricType -from dbt_semantic_interfaces.references import TimeDimensionReference +from dbt_semantic_interfaces.protocols.semantic_model import SemanticModel +from dbt_semantic_interfaces.references import ( + DimensionReference, + EntityReference, + TimeDimensionReference, +) from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity from metricflow.dag.id_generation import DATAFLOW_PLAN_PREFIX, IdGeneratorRegistry @@ -272,24 +277,82 @@ def _build_metrics_output_node( join_type=combine_metrics_join_type, ) + def __get_semantic_models_for_linkable_specs( + self, linkable_specs: LinkableSpecSet + ) -> Dict[SemanticModel, LinkableSpecSet]: + """Build dict of semantic models to associated linkable specs.""" + semantic_models_to_linkable_specs: Dict[SemanticModel, LinkableSpecSet] = {} + + # Dimensions + for dimension_spec in linkable_specs.dimension_specs: + semantic_models = self._semantic_model_lookup.get_semantic_models_for_linkable_element( + linkable_element=DimensionReference(element_name=dimension_spec.element_name) + ) + for semantic_model in semantic_models: + new_linkable_spec_set = LinkableSpecSet(dimension_specs=(dimension_spec,)) + linkable_specs_for_semantic_model = semantic_models_to_linkable_specs.get(semantic_model) + semantic_models_to_linkable_specs[semantic_model] = ( + LinkableSpecSet.merge([linkable_specs_for_semantic_model, new_linkable_spec_set]) + if linkable_specs_for_semantic_model + else new_linkable_spec_set + ) + # Time dimensions + for time_dimension_spec in linkable_specs.time_dimension_specs: + semantic_models = self._semantic_model_lookup.get_semantic_models_for_linkable_element( + linkable_element=TimeDimensionReference(element_name=time_dimension_spec.element_name) + ) + for semantic_model in semantic_models: + new_linkable_spec_set = LinkableSpecSet(time_dimension_specs=(time_dimension_spec,)) + semantic_models_to_linkable_specs[semantic_model] = ( + LinkableSpecSet.merge([linkable_specs_for_semantic_model, new_linkable_spec_set]) + if linkable_specs_for_semantic_model + else new_linkable_spec_set + ) + # Entities + for entity_spec in linkable_specs.entity_specs: + semantic_models = self._semantic_model_lookup.get_semantic_models_for_linkable_element( + linkable_element=EntityReference(element_name=entity_spec.element_name) + ) + for semantic_model in semantic_models: + new_linkable_spec_set = LinkableSpecSet(entity_specs=(entity_spec,)) + semantic_models_to_linkable_specs[semantic_model] = ( + LinkableSpecSet.merge([linkable_specs_for_semantic_model, new_linkable_spec_set]) + if linkable_specs_for_semantic_model + else new_linkable_spec_set + ) + + return semantic_models_to_linkable_specs + def build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> DataflowPlan: """Generate a plan that would get the distinct values of a linkable instance. e.g. distinct listing__country_latest for bookings by listing__country_latest """ assert not query_spec.metric_specs, "Can't build distinct values plan with metrics." + output_nodes = [] + for linkable_specs in self.__get_semantic_models_for_linkable_specs( + linkable_specs=query_spec.linkable_specs + ).values(): + dataflow_recipe = self._find_dataflow_recipe(linkable_spec_set=linkable_specs) + if not dataflow_recipe: + raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {linkable_specs}.") + output_nodes.append(dataflow_recipe.source_node) + + if not output_nodes: + raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {query_spec.linkable_specs}") - linkable_specs = query_spec.linkable_specs - dataflow_recipe = self._find_dataflow_recipe(linkable_spec_set=linkable_specs) - if not dataflow_recipe: - raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {linkable_specs}.") - - source_node = dataflow_recipe.source_node - distinct_values_node = FilterElementsNode( - parent_node=source_node, - include_specs=InstanceSpecSet.create_from_linkable_specs(linkable_specs.as_tuple), - distinct_values=True, - ) + if len(output_nodes) == 1: + distinct_values_node = FilterElementsNode( + parent_node=output_nodes[0], + include_specs=InstanceSpecSet.create_from_linkable_specs(query_spec.linkable_specs.as_tuple), + distinct_values=True, + ) + else: + distinct_values_node = FilterElementsNode( + parent_node=JoinAggregatedMeasuresByGroupByColumnsNode(parent_nodes=output_nodes), + include_specs=query_spec.linkable_specs.as_spec_set, + distinct_values=True, + ) where_constraint_node: Optional[WhereConstraintNode] = None if query_spec.where_constraint: diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 7c459949cd..44d8ba0a03 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -268,6 +268,12 @@ def get_semantic_models_for_entity(self, entity_reference: EntityReference) -> S entity = self._entity_ref_to_entity[entity_reference] return set(self._entity_index[entity]) + def get_semantic_models_for_linkable_element( + self, linkable_element: LinkableElementReference + ) -> Set[SemanticModel]: + """Return all semantic models associated with a linkable element reference.""" + return set(self._linkable_reference_index[linkable_element]) + @staticmethod def get_entity_from_semantic_model( semantic_model: SemanticModel, entity_reference: LinkableElementReference diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index 6fc4e00a39..8ba4833d4e 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -19,6 +19,7 @@ from dbt_semantic_interfaces.references import ( DimensionReference, EntityReference, + LinkableElementReference, MeasureReference, MetricReference, SemanticModelElementReference, @@ -101,6 +102,12 @@ def get_entity_in_semantic_model(self, ref: SemanticModelElementReference) -> Op """Retrieve the entity matching the element -> semantic model mapping, if any.""" raise NotImplementedError + def get_semantic_models_for_linkable_element( + self, linkable_element: LinkableElementReference + ) -> Set[SemanticModel]: + """Return all semantic models associated with a linkable element reference.""" + raise NotImplementedError + @abstractmethod def get_by_reference(self, semantic_model_reference: SemanticModelReference) -> Optional[SemanticModel]: """Retrieve the semantic model object matching the input semantic model reference, if any.""" diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index 269682b5b1..0a20739a78 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -133,17 +133,18 @@ integration_test: name: query_dimensions_from_different_tables description: Query multiple dimensions without metrics, requiring a join model: SIMPLE_MODEL - group_bys: ["user__home_state", "verification__ds__day"] + group_bys: ["user__home_state", "listing__is_lux_latest"] check_query: | SELECT u.home_state AS user__home_state - , v.ds AS verification__ds__day - FROM {{ source_schema }}.fct_id_verifications v - LEFT OUTER JOIN {{ source_schema }}.dim_users u + , l.is_lux AS listing__is_lux_latest + FROM {{ source_schema }}.dim_listings_latest l + ON l.listing_id = v.listing_id + LEFT OUTER JOIN {{ source_schema }}.dim_users_latest u ON u.user_id = v.user_id GROUP BY u.home_state - , v.ds + , l.is_lux --- integration_test: name: query_time_dimension_without_granularity diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index af897edcaa..b18c5588d8 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -206,6 +206,7 @@ def filter_not_supported_features( @pytest.mark.parametrize( "name", CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, + # ["itest_dimensions.yaml/query_dimensions_from_different_tables"], ids=lambda name: f"name={name}", ) def test_case( From e5739762f232a74f8a4bd66238b68f1ada1c96fc Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 27 Sep 2023 12:11:23 -0700 Subject: [PATCH 09/19] Cleanup --- metricflow/query/query_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 1571020e74..17b4139eb1 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -671,7 +671,6 @@ def _parse_group_by( group_by: Optional[Tuple[GroupByParameter, ...]] = None, ) -> QueryTimeLinkableSpecSet: """Convert the linkable spec names into the respective specification objects.""" - # TODO: refactor to only support group_by object inputs (removing group_by_names param) assert not ( group_by_names and group_by ), "Both group_by_names and group_by were set, but if a group by is specified you should only use one of these!" From bfc95439c770704d6840691e1c4a70c4cec6b7f2 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 27 Sep 2023 12:20:11 -0700 Subject: [PATCH 10/19] Cleanup --- .../dataflow/builder/dataflow_plan_builder.py | 19 +++++++------------ metricflow/dataflow/dataflow_plan.py | 14 +++++++------- metricflow/plan_conversion/dataflow_to_sql.py | 2 +- .../test/integration/test_configured_cases.py | 1 - .../test_filter_combination__dfpo_0.xml | 2 +- .../DataflowPlan/test_cyclic_join__dfp_0.xml | 6 +++--- .../test_common_semantic_model__dfp_0.xml | 12 ++++++------ ...indow_or_grain_with_metric_time__dfp_0.xml | 2 +- ...ow_or_grain_without_metric_time__dfp_0.xml | 2 +- ...t_cumulative_metric_with_window__dfp_0.xml | 2 +- ..._derived_metric_offset_to_grain__dfp_0.xml | 4 ++-- ...st_derived_metric_offset_window__dfp_0.xml | 2 +- ..._metric_offset_with_granularity__dfp_0.xml | 2 +- ...erived_offset_cumulative_metric__dfp_0.xml | 2 +- .../test_distinct_values_plan__dfp_0.xml | 2 +- .../DataflowPlan/test_joined_plan__dfp_0.xml | 6 +++--- .../test_limit_rows_plan__dfp_0.xml | 2 +- .../test_measure_constraint_plan__dfp_0.xml | 18 +++++++++--------- ...traint_with_reused_measure_plan__dfp_0.xml | 6 +++--- ...mantic_model_ratio_metrics_plan__dfp_0.xml | 12 ++++++------ .../test_multihop_join_plan__dfp_0.xml | 8 ++++---- .../test_multiple_metrics_plan__dfp_0.xml | 4 ++-- .../test_order_by_plan__dfp_0.xml | 2 +- .../test_primary_entity_dimension__dfp_0.xml | 2 +- .../DataflowPlan/test_simple_plan__dfp_0.xml | 2 +- ...mantic_model_ratio_metrics_plan__dfp_0.xml | 12 ++++++------ .../test_where_constrained_plan__dfp_0.xml | 8 ++++---- ...constrained_plan_time_dimension__dfp_0.xml | 4 ++-- ...ained_with_common_linkable_plan__dfp_0.xml | 6 +++--- ...ompute_metrics_node_simple_expr__plan0.xml | 4 ++-- ...spine_node_with_offset_to_grain__plan0.xml | 2 +- ...e_spine_node_with_offset_window__plan0.xml | 2 +- ..._time_spine_node_without_offset__plan0.xml | 2 +- ...2_metrics_from_1_semantic_model__dfp_0.xml | 12 ++++++------ ..._metrics_from_1_semantic_model__dfpo_0.xml | 6 +++--- ..._metrics_from_2_semantic_models__dfp_0.xml | 4 ++-- ...metrics_from_2_semantic_models__dfpo_0.xml | 4 ++-- ...o_metrics_from_1_semantic_model__dfp_0.xml | 8 ++++---- ..._metrics_from_1_semantic_model__dfpo_0.xml | 2 +- ..._metrics_from_2_semantic_models__dfp_0.xml | 6 +++--- ...metrics_from_2_semantic_models__dfpo_0.xml | 4 ++-- ...constrained_metric_not_combined__dfp_0.xml | 6 +++--- ...onstrained_metric_not_combined__dfpo_0.xml | 6 +++--- .../test_derived_metric__dfp_0.xml | 4 ++-- .../test_derived_metric__dfpo_0.xml | 2 +- ..._metric_with_non_derived_metric__dfp_0.xml | 6 +++--- ...metric_with_non_derived_metric__dfpo_0.xml | 4 ++-- .../test_nested_derived_metric__dfp_0.xml | 8 ++++---- .../test_nested_derived_metric__dfpo_0.xml | 4 ++-- 49 files changed, 128 insertions(+), 134 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 2bf53a5bb5..deb5a14ee7 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -341,18 +341,13 @@ def build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Dat if not output_nodes: raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {query_spec.linkable_specs}") - if len(output_nodes) == 1: - distinct_values_node = FilterElementsNode( - parent_node=output_nodes[0], - include_specs=InstanceSpecSet.create_from_linkable_specs(query_spec.linkable_specs.as_tuple), - distinct_values=True, - ) - else: - distinct_values_node = FilterElementsNode( - parent_node=JoinAggregatedMeasuresByGroupByColumnsNode(parent_nodes=output_nodes), - include_specs=query_spec.linkable_specs.as_spec_set, - distinct_values=True, - ) + distinct_values_node = FilterElementsNode( + parent_node=output_nodes[0] + if len(output_nodes) == 1 + else JoinAggregatedMeasuresByGroupByColumnsNode(parent_nodes=output_nodes), + include_specs=query_spec.linkable_specs.as_spec_set, + distinct=True, + ) where_constraint_node: Optional[WhereConstraintNode] = None if query_spec.where_constraint: diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index b96d427863..b8913f56d0 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -1098,12 +1098,12 @@ def __init__( # noqa: D parent_node: BaseOutput, include_specs: InstanceSpecSet, replace_description: Optional[str] = None, - distinct_values: bool = False, + distinct: bool = False, ) -> None: self._include_specs = include_specs self._replace_description = replace_description self._parent_node = parent_node - self._distinct_values = distinct_values + self._distinct = distinct super().__init__(node_id=self.create_unique_id(), parent_nodes=[parent_node]) @classmethod @@ -1116,9 +1116,9 @@ def include_specs(self) -> InstanceSpecSet: return self._include_specs @property - def distinct_values(self) -> bool: + def distinct(self) -> bool: """True if you only want the distinct values for the selected specs.""" - return self._distinct_values + return self._distinct def accept(self, visitor: DataflowPlanNodeVisitor[VisitorOutputT]) -> VisitorOutputT: # noqa: D return visitor.visit_pass_elements_filter_node(self) @@ -1139,7 +1139,7 @@ def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D if not self._replace_description: additional_properties = [ DisplayedProperty("include_spec", include_spec) for include_spec in self._include_specs.all_specs - ] + [DisplayedProperty("distinct_values", self._distinct_values)] + ] + [DisplayedProperty("distinct", self._distinct)] return super().displayed_properties + additional_properties @property @@ -1150,7 +1150,7 @@ def functionally_identical(self, other_node: DataflowPlanNode) -> bool: # noqa: return ( isinstance(other_node, self.__class__) and other_node.include_specs == self.include_specs - and other_node.distinct_values == self.distinct_values + and other_node.distinct == self.distinct ) def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> FilterElementsNode: # noqa: D @@ -1158,7 +1158,7 @@ def with_new_parents(self, new_parent_nodes: Sequence[BaseOutput]) -> FilterElem return FilterElementsNode( parent_node=new_parent_nodes[0], include_specs=self.include_specs, - distinct_values=self.distinct_values, + distinct=self.distinct, replace_description=self._replace_description, ) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index e348880fd6..179bc14f51 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -801,7 +801,7 @@ def visit_pass_elements_filter_node(self, node: FilterElementsNode) -> SqlDataSe ).as_tuple() # If no measures are passed, group by all columns. - group_bys = select_columns if node.distinct_values else () + group_bys = select_columns if node.distinct else () return SqlDataSet( instance_set=output_instance_set, sql_select_node=SqlSelectStatementNode( diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index 3d7e45cc7f..eec106d555 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -207,7 +207,6 @@ def filter_not_supported_features( @pytest.mark.parametrize( "name", CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, - # ["itest_dimensions.yaml/query_dimensions_from_different_tables"], ids=lambda name: f"name={name}", ) def test_case( diff --git a/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml b/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml index a5abebf496..8b44ecbdd0 100644 --- a/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml +++ b/metricflow/test/snapshots/test_cm_branch_combiner.py/DataflowPlan/test_filter_combination__dfpo_0.xml @@ -15,7 +15,7 @@ - + diff --git a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index 1d4bea6e1e..7f186e865b 100644 --- a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -29,7 +29,7 @@ - + @@ -55,7 +55,7 @@ - + @@ -82,7 +82,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml index a92a634e0d..ff9c8b4707 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_common_semantic_model__dfp_0.xml @@ -40,7 +40,7 @@ - + @@ -70,7 +70,7 @@ - + @@ -92,7 +92,7 @@ - + @@ -143,7 +143,7 @@ - + @@ -173,7 +173,7 @@ - + @@ -195,7 +195,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml index 550582d4ff..b25be80860 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_with_metric_time__dfp_0.xml @@ -31,7 +31,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml index da36eaeb4e..3b58bd51b7 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_no_window_or_grain_without_metric_time__dfp_0.xml @@ -24,7 +24,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml index 4e42eb8f5e..12efb1df0d 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_cumulative_metric_with_window__dfp_0.xml @@ -31,7 +31,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml index 0182c76329..7704ab793b 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_to_grain__dfp_0.xml @@ -45,7 +45,7 @@ - + @@ -90,7 +90,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml index 603be13d50..bedaa4a5af 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_window__dfp_0.xml @@ -43,7 +43,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml index daa412d958..72074aa37a 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_metric_offset_with_granularity__dfp_0.xml @@ -43,7 +43,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml index 859882507b..1f10a03b3f 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_derived_offset_cumulative_metric__dfp_0.xml @@ -43,7 +43,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml index 9a9b6b09f4..10f2780c9b 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml @@ -39,7 +39,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml index be5cbe3041..07ef1d04f2 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_joined_plan__dfp_0.xml @@ -32,7 +32,7 @@ - + @@ -59,7 +59,7 @@ - + @@ -81,7 +81,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml index a76c27f2f8..b4fd8b7143 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_limit_rows_plan__dfp_0.xml @@ -35,7 +35,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml index 7625fc9dd0..14e3100122 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_plan__dfp_0.xml @@ -55,7 +55,7 @@ - + @@ -90,7 +90,7 @@ - + @@ -120,7 +120,7 @@ - + @@ -141,7 +141,7 @@ - + @@ -200,7 +200,7 @@ - + @@ -235,7 +235,7 @@ - + @@ -265,7 +265,7 @@ - + @@ -286,7 +286,7 @@ - + @@ -335,7 +335,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml index 6bc2e6391d..f25d9f6748 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_measure_constraint_with_reused_measure_plan__dfp_0.xml @@ -55,7 +55,7 @@ - + @@ -90,7 +90,7 @@ - + @@ -137,7 +137,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml index ac0e1427b9..7de1511904 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multi_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -49,7 +49,7 @@ - + @@ -79,7 +79,7 @@ - + @@ -101,7 +101,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -182,7 +182,7 @@ - + @@ -204,7 +204,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml index baa48cf4f7..a98cafb501 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multihop_join_plan__dfp_0.xml @@ -29,7 +29,7 @@ - + @@ -74,7 +74,7 @@ - + @@ -108,7 +108,7 @@ - + @@ -355,7 +355,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml index f2043b5bf5..7234390739 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_multiple_metrics_plan__dfp_0.xml @@ -40,7 +40,7 @@ - + @@ -89,7 +89,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml index 82d59d2779..59709aac25 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_order_by_plan__dfp_0.xml @@ -59,7 +59,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml index ea5b947419..ccb2dd4669 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_primary_entity_dimension__dfp_0.xml @@ -28,7 +28,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml index ea5b947419..ccb2dd4669 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_simple_plan__dfp_0.xml @@ -28,7 +28,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml index 7c1a6947cd..22856f8489 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_single_semantic_model_ratio_metrics_plan__dfp_0.xml @@ -49,7 +49,7 @@ - + @@ -79,7 +79,7 @@ - + @@ -101,7 +101,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -182,7 +182,7 @@ - + @@ -204,7 +204,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml index 1b6a1fd180..062689be39 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan__dfp_0.xml @@ -28,7 +28,7 @@ - + @@ -60,7 +60,7 @@ - + @@ -87,7 +87,7 @@ - + @@ -109,7 +109,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml index 2c2674d4cf..b8248be3ce 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_plan_time_dimension__dfp_0.xml @@ -28,7 +28,7 @@ - + @@ -65,7 +65,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml index b4d89f4c64..1ef34a8a04 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_where_constrained_with_common_linkable_plan__dfp_0.xml @@ -42,7 +42,7 @@ - + @@ -65,7 +65,7 @@ - + @@ -87,7 +87,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml index fa850673ed..d1662fcdda 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_compute_metrics_node_simple_expr__plan0.xml @@ -37,7 +37,7 @@ - + @@ -54,7 +54,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml index f7bf794432..4a4e2a24f4 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_to_grain__plan0.xml @@ -38,7 +38,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml index e2d4ff2eb6..b0c2d678ce 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_with_offset_window__plan0.xml @@ -38,7 +38,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml index 4b9f42957f..5a31f12687 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/DataflowPlan/test_join_to_time_spine_node_without_offset__plan0.xml @@ -38,7 +38,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml index a92a634e0d..ff9c8b4707 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfp_0.xml @@ -40,7 +40,7 @@ - + @@ -70,7 +70,7 @@ - + @@ -92,7 +92,7 @@ - + @@ -143,7 +143,7 @@ - + @@ -173,7 +173,7 @@ - + @@ -195,7 +195,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml index f1614b642c..878492035e 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_1_semantic_model__dfpo_0.xml @@ -46,7 +46,7 @@ - + @@ -80,7 +80,7 @@ - + @@ -102,7 +102,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml index 8276f207bc..cb3913a1e9 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfp_0.xml @@ -36,7 +36,7 @@ - + @@ -81,7 +81,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml index 38de18c96f..29f009e284 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_metrics_from_2_semantic_models__dfpo_0.xml @@ -36,7 +36,7 @@ - + @@ -81,7 +81,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml index af2f2e07be..59b7d47c75 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfp_0.xml @@ -50,7 +50,7 @@ - + @@ -95,7 +95,7 @@ - + @@ -156,7 +156,7 @@ - + @@ -201,7 +201,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml index 260935e20a..443bab0657 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_2_ratio_metrics_from_1_semantic_model__dfpo_0.xml @@ -77,7 +77,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml index 27b2b60760..af1adfb56f 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfp_0.xml @@ -36,7 +36,7 @@ - + @@ -81,7 +81,7 @@ - + @@ -126,7 +126,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml index 7feff7021e..1536e6c8f9 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_3_metrics_from_2_semantic_models__dfpo_0.xml @@ -47,7 +47,7 @@ - + @@ -92,7 +92,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml index 48b4d980af..ce2766cc9f 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfp_0.xml @@ -36,7 +36,7 @@ - + @@ -91,7 +91,7 @@ - + @@ -126,7 +126,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml index a688554b4d..aa11bdf2c4 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_constrained_metric_not_combined__dfpo_0.xml @@ -36,7 +36,7 @@ - + @@ -91,7 +91,7 @@ - + @@ -126,7 +126,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml index 91ae9219df..4d72662222 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfp_0.xml @@ -45,7 +45,7 @@ - + @@ -90,7 +90,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml index 368d0d919c..aa3dfef91a 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric__dfpo_0.xml @@ -52,7 +52,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml index c87d90e56f..a26d3cb22b 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfp_0.xml @@ -36,7 +36,7 @@ - + @@ -95,7 +95,7 @@ - + @@ -140,7 +140,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml index 1e942e91ad..b23dd5f90b 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_derived_metric_with_non_derived_metric__dfpo_0.xml @@ -36,7 +36,7 @@ - + @@ -102,7 +102,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml index 72ad5dcc41..5af98e9f24 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfp_0.xml @@ -59,7 +59,7 @@ - + @@ -104,7 +104,7 @@ - + @@ -151,7 +151,7 @@ - + @@ -196,7 +196,7 @@ - + diff --git a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml index efc1b11a20..dcccd29791 100644 --- a/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml +++ b/metricflow/test/snapshots/test_source_scan_optimizer.py/DataflowPlan/test_nested_derived_metric__dfpo_0.xml @@ -66,7 +66,7 @@ - + @@ -123,7 +123,7 @@ - + From bd491ee06a354800840e1c11449fa82d8fe12ad0 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 17:21:54 -0700 Subject: [PATCH 11/19] WIP --- metricflow/dataflow/builder/costing.py | 6 +- .../dataflow/builder/dataflow_plan_builder.py | 254 +++++++++--------- metricflow/dataflow/builder/node_evaluator.py | 10 +- metricflow/dataflow/builder/source_node.py | 8 +- metricflow/engine/metricflow_engine.py | 2 + metricflow/plan_conversion/node_processor.py | 5 +- .../test/dataflow/builder/test_cyclic_join.py | 1 + .../dataflow/builder/test_node_evaluator.py | 4 +- metricflow/test/fixtures/dataflow_fixtures.py | 3 + metricflow/test/fixtures/model_fixtures.py | 8 +- .../test_cases/itest_dimensions.yaml | 9 +- .../test/integration/test_configured_cases.py | 4 +- 12 files changed, 159 insertions(+), 155 deletions(-) diff --git a/metricflow/dataflow/builder/costing.py b/metricflow/dataflow/builder/costing.py index 981bf406d5..b7c47a0faf 100644 --- a/metricflow/dataflow/builder/costing.py +++ b/metricflow/dataflow/builder/costing.py @@ -132,7 +132,11 @@ def visit_write_to_result_table_node(self, node: WriteToResultTableNode) -> Defa return DefaultCost.sum([x.accept(self) for x in node.parent_nodes]) def visit_pass_elements_filter_node(self, node: FilterElementsNode) -> DefaultCost: # noqa: D - return DefaultCost.sum([x.accept(self) for x in node.parent_nodes]) + parent_costs = [x.accept(self) for x in node.parent_nodes] + + # 1 aggregation if grouping by distinct values + node_cost = DefaultCost(num_aggregations=1 if node.distinct else 0) + return DefaultCost.sum(parent_costs + [node_cost]) def visit_combine_metrics_node(self, node: CombineMetricsNode) -> DefaultCost: # noqa: D return DefaultCost.sum([x.accept(self) for x in node.parent_nodes]) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index deb5a14ee7..42ad16b8dc 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -18,7 +18,7 @@ from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity from metricflow.dag.id_generation import DATAFLOW_PLAN_PREFIX, IdGeneratorRegistry -from metricflow.dataflow.builder.costing import DataflowPlanNodeCostFunction, DefaultCostFunction +from metricflow.dataflow.builder.costing import DataflowPlanNodeCostFunction, DefaultCost, DefaultCostFunction from metricflow.dataflow.builder.measure_additiveness import group_measure_specs_by_additiveness from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver from metricflow.dataflow.builder.node_evaluator import ( @@ -54,7 +54,7 @@ from metricflow.filters.time_constraint import TimeRangeConstraint from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow.plan_conversion.column_resolver import DunderColumnAssociationResolver -from metricflow.plan_conversion.node_processor import PreDimensionJoinNodeProcessor +from metricflow.plan_conversion.node_processor import PreJoinNodeProcessor from metricflow.specs.column_assoc import ColumnAssociationResolver from metricflow.specs.specs import ( InstanceSpecSet, @@ -83,26 +83,52 @@ class DataflowRecipe: required_local_linkable_specs: Tuple[LinkableInstanceSpec, ...] join_linkable_instances_recipes: Tuple[JoinLinkableInstancesRecipe, ...] - def to_measure_recipe(self) -> MeasureRecipe: # noqa: D - return MeasureRecipe( - source_node=self.source_node, - required_local_linkable_specs=self.required_local_linkable_specs, - join_linkable_instances_recipes=self.join_linkable_instances_recipes, - ) - + @property + def join_targets(self) -> List[JoinDescription]: + """Joins to be made to source node.""" + join_targets = [] + for join_recipe in self.join_linkable_instances_recipes: + # Figure out what elements to filter from the joined node. -@dataclass(frozen=True) -class MeasureRecipe(DataflowRecipe): - """Get a recipe for how to build a dataflow plan node that outputs measures and the needed linkable instances. + # Sanity check - all linkable specs should have a link, or else why would we be joining them. + assert all([len(x.entity_links) > 0 for x in join_recipe.satisfiable_linkable_specs]) - The recipe involves filtering the measure node so that it only outputs the measures and the instances associated with - required_local_linkable_specs, then joining the nodes containing the linkable instances according to the recipes - in join_linkable_instances_recipes. - """ + # If we're joining something in, then we need the associated entity, partitions, and time dimension + # specs defining the validity window (if necessary) + include_specs: List[LinkableInstanceSpec] = [ + LinklessEntitySpec.from_reference(x.entity_links[0]) for x in join_recipe.satisfiable_linkable_specs + ] + include_specs.extend([x.node_to_join_dimension_spec for x in join_recipe.join_on_partition_dimensions]) + include_specs.extend( + [x.node_to_join_time_dimension_spec for x in join_recipe.join_on_partition_time_dimensions] + ) + if join_recipe.validity_window: + include_specs.extend( + [ + join_recipe.validity_window.window_start_dimension, + join_recipe.validity_window.window_end_dimension, + ] + ) - @property - def measure_node(self) -> BaseOutput: # noqa: D - return self.source_node + # satisfiable_linkable_specs describes what can be satisfied after the join, so remove the entity + # link when filtering before the join. + # e.g. if the node is used to satisfy "user_id__country", then the node must have the entity + # "user_id" and the "country" dimension so that it can be joined to the measure node. + include_specs.extend([x.without_first_entity_link for x in join_recipe.satisfiable_linkable_specs]) + filtered_node_to_join = FilterElementsNode( + parent_node=join_recipe.node_to_join, + include_specs=InstanceSpecSet.create_from_linkable_specs(include_specs), + ) + join_targets.append( + JoinDescription( + join_node=filtered_node_to_join, + join_on_entity=join_recipe.join_on_entity, + join_on_partition_dimensions=join_recipe.join_on_partition_dimensions, + join_on_partition_time_dimensions=join_recipe.join_on_partition_time_dimensions, + validity_window=join_recipe.validity_window, + ) + ) + return join_targets @dataclass(frozen=True) @@ -121,6 +147,7 @@ class DataflowPlanBuilder: def __init__( # noqa: D self, source_nodes: Sequence[BaseOutput], + read_nodes: Sequence[BaseOutput], semantic_manifest_lookup: SemanticManifestLookup, cost_function: DataflowPlanNodeCostFunction = DefaultCostFunction(), node_output_resolver: Optional[DataflowPlanNodeOutputDataSetResolver] = None, @@ -131,6 +158,7 @@ def __init__( # noqa: D self._metric_time_dimension_reference = DataSet.metric_time_dimension_reference() self._cost_function = cost_function self._source_nodes = source_nodes + self._read_nodes = read_nodes self._column_association_resolver = ( DunderColumnAssociationResolver(semantic_manifest_lookup) if not column_association_resolver @@ -176,7 +204,7 @@ def build_plan( plan = DataflowPlan(plan_id=plan_id, sink_output_nodes=[sink_node]) for optimizer in optimizers: - logger.info(f"Applying {optimizer.__class__.__name__}") + print(f"Applying {optimizer.__class__.__name__}") try: plan = optimizer.optimize(plan) except Exception: @@ -205,7 +233,7 @@ def _build_metrics_output_node( compute_metrics_node: Optional[ComputeMetricsNode] = None for metric_spec in metric_specs: - logger.info(f"Generating compute metrics node for {metric_spec}") + print(f"Generating compute metrics node for {metric_spec}") metric_reference = metric_spec.as_reference metric = self._metric_lookup.get_metric(metric_reference) @@ -214,7 +242,7 @@ def _build_metrics_output_node( metric_reference=metric_reference, column_association_resolver=self._column_association_resolver, ) - logger.info( + print( f"For {metric.type} metric: {metric_spec}, needed metrics are:\n" f"{pformat_big_objects(metric_input_specs=metric_input_specs)}" ) @@ -235,7 +263,7 @@ def _build_metrics_output_node( column_association_resolver=self._column_association_resolver, ) - logger.info( + print( f"For {metric_spec}, needed measures are:\n" f"{pformat_big_objects(metric_input_measure_specs=metric_input_measure_specs)}" ) @@ -329,25 +357,30 @@ def build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> Dat e.g. distinct listing__country_latest for bookings by listing__country_latest """ assert not query_spec.metric_specs, "Can't build distinct values plan with metrics." - output_nodes = [] - for linkable_specs in self.__get_semantic_models_for_linkable_specs( - linkable_specs=query_spec.linkable_specs - ).values(): - dataflow_recipe = self._find_dataflow_recipe(linkable_spec_set=linkable_specs) - if not dataflow_recipe: - raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {linkable_specs}.") - output_nodes.append(dataflow_recipe.source_node) - - if not output_nodes: + # linkable_specs_to_dataflow_recipes: Dict[LinkableSpecSet, DataflowRecipe] = {} + # for linkable_specs in self.__get_semantic_models_for_linkable_specs( + # linkable_specs=query_spec.linkable_specs + # ).values(): + dataflow_recipe = self._find_dataflow_recipe(linkable_spec_set=query_spec.linkable_specs) + if not dataflow_recipe: + raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {query_spec.linkable_specs}.") + # linkable_specs_to_dataflow_recipes[linkable_specs] = dataflow_recipe + + if not dataflow_recipe: raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {query_spec.linkable_specs}") - distinct_values_node = FilterElementsNode( - parent_node=output_nodes[0] - if len(output_nodes) == 1 - else JoinAggregatedMeasuresByGroupByColumnsNode(parent_nodes=output_nodes), - include_specs=query_spec.linkable_specs.as_spec_set, - distinct=True, - ) + join_targets = dataflow_recipe.join_targets + if join_targets: + joined_node = JoinToBaseOutputNode(left_node=dataflow_recipe.source_node, join_targets=join_targets) + distinct_values_node = FilterElementsNode( + parent_node=joined_node, include_specs=query_spec.linkable_specs.as_spec_set, distinct=True + ) + else: + distinct_values_node = FilterElementsNode( + parent_node=dataflow_recipe.source_node, + include_specs=query_spec.linkable_specs.as_spec_set, + distinct=True, + ) where_constraint_node: Optional[WhereConstraintNode] = None if query_spec.where_constraint: @@ -440,19 +473,20 @@ def _select_source_nodes_with_measures( nodes.append(source_node) return nodes - def _select_source_nodes_with_linkable_specs( - self, linkable_specs: LinkableSpecSet, source_nodes: Sequence[BaseOutput] - ) -> Sequence[BaseOutput]: + def _select_read_nodes_with_linkable_specs( + self, linkable_specs: LinkableSpecSet, read_nodes: Sequence[BaseOutput] + ) -> Dict[BaseOutput, Set[LinkableInstanceSpec]]: """Find source nodes with requested linkable specs and no measures.""" - nodes = [] + nodes_to_linkable_specs: Dict[BaseOutput, Set[LinkableInstanceSpec]] = {} linkable_specs_set = set(linkable_specs.as_tuple) - for source_node in source_nodes: - output_spec_set = self._node_data_set_resolver.get_output_data_set(source_node).instance_set.spec_set - linkable_specs_in_node = output_spec_set.linkable_specs - if linkable_specs_set.intersection(set(linkable_specs_in_node)) == linkable_specs_set: - nodes.append(source_node) + for read_node in read_nodes: + output_spec_set = self._node_data_set_resolver.get_output_data_set(read_node).instance_set.spec_set + linkable_specs_in_node = set(output_spec_set.linkable_specs) + requested_linkable_specs_in_node = linkable_specs_set.intersection(linkable_specs_in_node) + if requested_linkable_specs_in_node: + nodes_to_linkable_specs[read_node] = requested_linkable_specs_in_node - return nodes + return nodes_to_linkable_specs def _find_non_additive_dimension_in_linkable_specs( self, @@ -513,22 +547,25 @@ def _find_dataflow_recipe( time_range_constraint: Optional[TimeRangeConstraint] = None, ) -> Optional[DataflowRecipe]: linkable_specs = linkable_spec_set.as_tuple - source_nodes = self._source_nodes if measure_spec_properties: + source_nodes = self._source_nodes potential_source_nodes: Sequence[BaseOutput] = self._select_source_nodes_with_measures( measure_specs=set(measure_spec_properties.measure_specs), source_nodes=source_nodes ) else: - potential_source_nodes = self._select_source_nodes_with_linkable_specs( - linkable_specs=linkable_spec_set, source_nodes=source_nodes + # Only read nodes can be source nodes for queries without measures + source_nodes = self._read_nodes + source_nodes_to_linkable_specs = self._select_read_nodes_with_linkable_specs( + linkable_specs=linkable_spec_set, read_nodes=source_nodes ) + potential_source_nodes = list(source_nodes_to_linkable_specs.keys()) + # issue: getting ds__day from the wrong table + print(f"There are {len(potential_source_nodes)} potential source nodes") - logger.info(f"There are {len(potential_source_nodes)} potential source nodes") - - logger.info(f"Starting search with {len(source_nodes)} source nodes") + print(f"Starting search with {len(source_nodes)} source nodes") start_time = time.time() - node_processor = PreDimensionJoinNodeProcessor( + node_processor = PreJoinNodeProcessor( semantic_model_lookup=self._semantic_model_lookup, node_data_set_resolver=self._node_data_set_resolver, ) @@ -544,17 +581,14 @@ def _find_dataflow_recipe( nodes=source_nodes, metric_time_dimension_reference=self._metric_time_dimension_reference, ) - logger.info( - f"After removing unnecessary nodes, there are {len(nodes_available_for_joins)} nodes available for joins" - ) + print(f"After removing unnecessary nodes, there are {len(nodes_available_for_joins)} nodes available for joins") if DataflowPlanBuilder._contains_multihop_linkables(linkable_specs): nodes_available_for_joins = node_processor.add_multi_hop_joins(linkable_specs, source_nodes) - logger.info( + print( f"After adding multi-hop nodes, there are {len(nodes_available_for_joins)} nodes available for joins:\n" f"{pformat_big_objects(nodes_available_for_joins)}" ) - - logger.info(f"Processing nodes took: {time.time()-start_time:.2f}s") + print(f"Processing nodes took: {time.time()-start_time:.2f}s") node_evaluator = NodeEvaluatorForLinkableInstances( semantic_model_lookup=self._semantic_model_lookup, @@ -566,45 +600,44 @@ def _find_dataflow_recipe( node_to_evaluation: Dict[BaseOutput, LinkableInstanceSatisfiabilityEvaluation] = {} for node in self._sort_by_suitability(potential_source_nodes): - logger.debug(f"Evaluating source node:\n{pformat_big_objects(source_node=dataflow_dag_as_text(node))}") + print(f"\n\n\nEvaluating source node:\n{pformat_big_objects(source_node=dataflow_dag_as_text(node))}") start_time = time.time() - evaluation = node_evaluator.evaluate_node( - start_node=node, - required_linkable_specs=list(linkable_specs), - ) - logger.info(f"Evaluation of {node} took {time.time() - start_time:.2f}s") + evaluation = node_evaluator.evaluate_node(start_node=node, required_linkable_specs=list(linkable_specs)) + print(f"Evaluation of {node} took {time.time() - start_time:.2f}s") - logger.debug( + print( f"Evaluation for source node is:\n" f"{pformat_big_objects(node=dataflow_dag_as_text(node), evaluation=evaluation)}" ) if len(evaluation.unjoinable_linkable_specs) > 0: - logger.debug( + print( f"Skipping {node.node_id} since it contains un-joinable specs: " f"{evaluation.unjoinable_linkable_specs}" ) continue num_joins_required = len(evaluation.join_recipes) - logger.info(f"Found candidate with node ID '{node.node_id}' with {num_joins_required} joins required.") + print(f"Found candidate with node ID '{node.node_id}' with {num_joins_required} joins required.") node_to_evaluation[node] = evaluation # Since are evaluating nodes with the lowest cost first, if we find one without requiring any joins, then # this is going to be the lowest cost solution. if len(evaluation.join_recipes) == 0: - logger.info("Not evaluating other nodes since we found one that doesn't require joins") + print("Not evaluating other nodes since we found one that doesn't require joins") + # But we don't break the loop here? why not? - logger.info(f"Found {len(node_to_evaluation)} candidate source nodes.") + print(f"Found {len(node_to_evaluation)} candidate source nodes.") if len(node_to_evaluation) > 0: cost_function = DefaultCostFunction() - + for node in node_to_evaluation: + assert cost_function.calculate_cost(node) == DefaultCost(num_joins=0, num_aggregations=0) node_with_lowest_cost = min(node_to_evaluation, key=cost_function.calculate_cost) evaluation = node_to_evaluation[node_with_lowest_cost] - logger.info( + print( "Lowest cost node is:\n" + pformat_big_objects( lowest_cost_node=dataflow_dag_as_text(node_with_lowest_cost), @@ -687,7 +720,7 @@ def build_aggregated_measures( ) for (semantic_model, measure_constraint), measures in semantic_models_and_constraints_to_measures.items(): - logger.info( + print( f"Building aggregated measures for {semantic_model}. " f" Input measures: {measures} with constraints: {measure_constraint}" ) @@ -710,7 +743,7 @@ def build_aggregated_measures( if non_additive_spec is not None: non_additive_message = f" with non-additive dimension spec: {non_additive_spec}" - logger.info(f"Building aggregated measures for {semantic_model}{non_additive_message}") + print(f"Building aggregated measures for {semantic_model}{non_additive_message}") input_specs = tuple(input_specs_by_measure_spec[measure_spec] for measure_spec in measure_specs) output_nodes.append( self._build_aggregated_measures_from_measure_source_node( @@ -763,7 +796,7 @@ def _build_aggregated_measures_from_measure_source_node( cumulative_metric_adjusted_time_constraint: Optional[TimeRangeConstraint] = None if cumulative and time_range_constraint is not None: - logger.info(f"Time range constraint before adjustment is {time_range_constraint}") + print(f"Time range constraint before adjustment is {time_range_constraint}") granularity: Optional[TimeGranularity] = None count = 0 if cumulative_window is not None: @@ -776,7 +809,7 @@ def _build_aggregated_measures_from_measure_source_node( cumulative_metric_adjusted_time_constraint = ( time_range_constraint.adjust_time_constraint_for_cumulative_metric(granularity, count) ) - logger.info(f"Adjusted time range constraint {cumulative_metric_adjusted_time_constraint}") + print(f"Adjusted time range constraint {cumulative_metric_adjusted_time_constraint}") # Extraneous linkable specs are specs that are used in this phase that should not show up in the final result # unless it was already a requested spec in the query @@ -791,37 +824,36 @@ def _build_aggregated_measures_from_measure_source_node( ) required_linkable_specs = LinkableSpecSet.merge((queried_linkable_specs, extraneous_linkable_specs)) - logger.info( + print( f"Looking for a recipe to get:\n" f"{pformat_big_objects(measure_specs=measure_specs, required_linkable_set=required_linkable_specs)}" ) find_recipe_start_time = time.time() - dataflow_recipe = self._find_dataflow_recipe( + measure_recipe = self._find_dataflow_recipe( measure_spec_properties=measure_properties, time_range_constraint=cumulative_metric_adjusted_time_constraint or time_range_constraint, linkable_spec_set=required_linkable_specs, ) - logger.info( + print( f"With {len(self._source_nodes)} source nodes, finding a recipe took " f"{time.time() - find_recipe_start_time:.2f}s" ) - logger.info(f"Using recipe:\n{pformat_big_objects(dataflow_recipe=dataflow_recipe)}") + print(f"Using recipe:\n{pformat_big_objects(measure_recipe=measure_recipe)}") - if not dataflow_recipe: + if not measure_recipe: # TODO: Improve for better user understandability. raise UnableToSatisfyQueryError( f"Recipe not found for measure specs: {measure_specs} and linkable specs: {required_linkable_specs}" ) - measure_recipe = dataflow_recipe.to_measure_recipe() # If a cumulative metric is queried with metric_time, join over time range. # Otherwise, the measure will be aggregated over all time. time_range_node: Optional[JoinOverTimeRangeNode] = None if cumulative and metric_time_dimension_requested: time_range_node = JoinOverTimeRangeNode( - parent_node=measure_recipe.measure_node, + parent_node=measure_recipe.source_node, window=cumulative_window, grain_to_date=cumulative_grain_to_date, time_range_constraint=time_range_constraint, @@ -832,7 +864,7 @@ def _build_aggregated_measures_from_measure_source_node( if metric_spec.offset_window or metric_spec.offset_to_grain: assert metric_time_dimension_specs, "Joining to time spine requires querying with metric time." join_to_time_spine_node = JoinToTimeSpineNode( - parent_node=time_range_node or measure_recipe.measure_node, + parent_node=time_range_node or measure_recipe.source_node, metric_time_dimension_specs=metric_time_dimension_specs, time_range_constraint=time_range_constraint, offset_window=metric_spec.offset_window, @@ -841,7 +873,7 @@ def _build_aggregated_measures_from_measure_source_node( # Only get the required measure and the local linkable instances so that aggregations work correctly. filtered_measure_source_node = FilterElementsNode( - parent_node=join_to_time_spine_node or time_range_node or measure_recipe.measure_node, + parent_node=join_to_time_spine_node or time_range_node or measure_recipe.source_node, include_specs=InstanceSpecSet.merge( ( InstanceSpecSet(measure_specs=measure_specs), @@ -850,49 +882,7 @@ def _build_aggregated_measures_from_measure_source_node( ), ) - join_targets = [] - for join_recipe in measure_recipe.join_linkable_instances_recipes: - # Figure out what elements to filter from the joined node. - - # Sanity check - all linkable specs should have a link, or else why would we be joining them. - assert all([len(x.entity_links) > 0 for x in join_recipe.satisfiable_linkable_specs]) - - # If we're joining something in, then we need the associated entity, partitions, and time dimension - # specs defining the validity window (if necessary) - include_specs: List[LinkableInstanceSpec] = [ - LinklessEntitySpec.from_reference(x.entity_links[0]) for x in join_recipe.satisfiable_linkable_specs - ] - include_specs.extend([x.node_to_join_dimension_spec for x in join_recipe.join_on_partition_dimensions]) - include_specs.extend( - [x.node_to_join_time_dimension_spec for x in join_recipe.join_on_partition_time_dimensions] - ) - if join_recipe.validity_window: - include_specs.extend( - [ - join_recipe.validity_window.window_start_dimension, - join_recipe.validity_window.window_end_dimension, - ] - ) - - # satisfiable_linkable_specs describes what can be satisfied after the join, so remove the entity - # link when filtering before the join. - # e.g. if the node is used to satisfy "user_id__country", then the node must have the entity - # "user_id" and the "country" dimension so that it can be joined to the measure node. - include_specs.extend([x.without_first_entity_link for x in join_recipe.satisfiable_linkable_specs]) - filtered_node_to_join = FilterElementsNode( - parent_node=join_recipe.node_to_join, - include_specs=InstanceSpecSet.create_from_linkable_specs(include_specs), - ) - join_targets.append( - JoinDescription( - join_node=filtered_node_to_join, - join_on_entity=join_recipe.join_on_entity, - join_on_partition_dimensions=join_recipe.join_on_partition_dimensions, - join_on_partition_time_dimensions=join_recipe.join_on_partition_time_dimensions, - validity_window=join_recipe.validity_window, - ) - ) - + join_targets = measure_recipe.join_targets unaggregated_measure_node: BaseOutput if len(join_targets) > 0: filtered_measures_with_joined_elements = JoinToBaseOutputNode( diff --git a/metricflow/dataflow/builder/node_evaluator.py b/metricflow/dataflow/builder/node_evaluator.py index 5ab12bed9f..1a1c8a6c2e 100644 --- a/metricflow/dataflow/builder/node_evaluator.py +++ b/metricflow/dataflow/builder/node_evaluator.py @@ -349,18 +349,18 @@ def evaluate_node( ) join_candidates: List[JoinLinkableInstancesRecipe] = [] - logger.info("Looping over nodes that can be joined to get the required linkable specs") + print("Looping over nodes that can be joined to get the required linkable specs") # Using a greedy approach, try to get the "possibly_joinable_linkable_specs" by iteratively joining nodes with # the most matching linkable specs. We try to join nodes with the most matching specs to minimize the number of # joins that we have to do to. A knapsack solution is ideal, but punting on that for simplicity. while len(possibly_joinable_linkable_specs) > 0: - logger.info(f"Looking for linkable specs:\n{pformat_big_objects(possibly_joinable_linkable_specs)}") + print(f"Looking for linkable specs:\n{pformat_big_objects(possibly_joinable_linkable_specs)}") # We've run out of candidate data sets, but there are more linkable specs that we need. That means the # rest of the linkable specs can't be joined in, and we're left with unjoinable specs remaining. if len(candidates_for_join) == 0: - logger.info( + print( "There are no more candidate nodes that can be joined, but not all linkable specs have " "been acquired." ) @@ -369,7 +369,7 @@ def evaluate_node( # Join the best candidate to realize the linkable specs next_candidate = candidates_for_join.pop(0) - logger.info(f"The next candidate node to be joined is:\n{pformat_big_objects(next_candidate)}") + print(f"The next candidate node to be joined is:\n{pformat_big_objects(next_candidate)}") join_candidates.append(next_candidate) # Update the candidates. Since we'll be joined/ing the previously selected candidate, we no longer need @@ -386,7 +386,7 @@ def evaluate_node( x for x in possibly_joinable_linkable_specs if x not in next_candidate.satisfiable_linkable_specs ] - logger.info("Done evaluating possible joins") + print("Done evaluating possible joins") return LinkableInstanceSatisfiabilityEvaluation( local_linkable_specs=tuple(local_linkable_specs), joinable_linkable_specs=tuple( diff --git a/metricflow/dataflow/builder/source_node.py b/metricflow/dataflow/builder/source_node.py index 6c930c4fdb..b77c2ac69f 100644 --- a/metricflow/dataflow/builder/source_node.py +++ b/metricflow/dataflow/builder/source_node.py @@ -21,9 +21,7 @@ class SourceNodeBuilder: def __init__(self, semantic_manifest_lookup: SemanticManifestLookup) -> None: # noqa: D self._semantic_manifest_lookup = semantic_manifest_lookup - def create_from_data_sets( - self, data_sets: Sequence[SemanticModelDataSet], with_measures: bool = True - ) -> Sequence[BaseOutput]: + def create_from_data_sets(self, data_sets: Sequence[SemanticModelDataSet]) -> Sequence[BaseOutput]: """Creates source nodes from SemanticModelDataSets.""" source_nodes: List[BaseOutput] = [] for data_set in data_sets: @@ -48,3 +46,7 @@ def create_from_data_sets( ) ) return source_nodes + + def create_read_nodes_from_data_sets(self, data_sets: Sequence[SemanticModelDataSet]) -> Sequence[BaseOutput]: + """Creates read nodes from SemanticModelDataSets.""" + return [ReadSqlSourceNode(data_set) for data_set in data_sets] diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index a601f3feff..afad49e9ae 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -350,6 +350,7 @@ def __init__( source_node_builder = SourceNodeBuilder(self._semantic_manifest_lookup) source_nodes = source_node_builder.create_from_data_sets(self._source_data_sets) + read_nodes = source_node_builder.create_read_nodes_from_data_sets(self._source_data_sets) node_output_resolver = DataflowPlanNodeOutputDataSetResolver( column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup), @@ -358,6 +359,7 @@ def __init__( self._dataflow_plan_builder = DataflowPlanBuilder( source_nodes=source_nodes, + read_nodes=read_nodes, semantic_manifest_lookup=self._semantic_manifest_lookup, ) self._to_sql_query_plan_converter = DataflowToSqlQueryPlanConverter( diff --git a/metricflow/plan_conversion/node_processor.py b/metricflow/plan_conversion/node_processor.py index febcb3d5ac..ac66c35737 100644 --- a/metricflow/plan_conversion/node_processor.py +++ b/metricflow/plan_conversion/node_processor.py @@ -56,8 +56,8 @@ class MultiHopJoinCandidate: lineage: MultiHopJoinCandidateLineage -class PreDimensionJoinNodeProcessor: - """Processes source nodes before measures are joined to dimensions. +class PreJoinNodeProcessor: + """Processes source nodes before other nodes are joined. Generally, the source nodes will be combined with other dataflow plan nodes to produce a new set of nodes to realize a condition of the query. For example, to realize a time range constraint, a ConstrainTimeRangeNode will be added @@ -85,6 +85,7 @@ def __init__( # noqa: D self._semantic_model_lookup = semantic_model_lookup self._join_evaluator = SemanticModelJoinEvaluator(semantic_model_lookup) + # TODO: add test with time constraint def add_time_range_constraint( self, source_nodes: Sequence[BaseOutput], diff --git a/metricflow/test/dataflow/builder/test_cyclic_join.py b/metricflow/test/dataflow/builder/test_cyclic_join.py index 8762e45ce3..d35a2b3987 100644 --- a/metricflow/test/dataflow/builder/test_cyclic_join.py +++ b/metricflow/test/dataflow/builder/test_cyclic_join.py @@ -34,6 +34,7 @@ def cyclic_join_manifest_dataflow_plan_builder( # noqa: D return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.cyclic_join_source_nodes, + read_nodes=list(consistent_id_object_repository.cyclic_join_read_nodes.values()), semantic_manifest_lookup=cyclic_join_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) diff --git a/metricflow/test/dataflow/builder/test_node_evaluator.py b/metricflow/test/dataflow/builder/test_node_evaluator.py index 523cb416ed..c88fa7319e 100644 --- a/metricflow/test/dataflow/builder/test_node_evaluator.py +++ b/metricflow/test/dataflow/builder/test_node_evaluator.py @@ -18,7 +18,7 @@ from metricflow.dataset.dataset import DataSet from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow.plan_conversion.column_resolver import DunderColumnAssociationResolver -from metricflow.plan_conversion.node_processor import PreDimensionJoinNodeProcessor +from metricflow.plan_conversion.node_processor import PreJoinNodeProcessor from metricflow.specs.specs import ( DimensionSpec, EntityReference, @@ -65,7 +65,7 @@ def make_multihop_node_evaluator( semantic_manifest_lookup=semantic_manifest_lookup_with_multihop_links, ) - node_processor = PreDimensionJoinNodeProcessor( + node_processor = PreJoinNodeProcessor( semantic_model_lookup=semantic_manifest_lookup_with_multihop_links.semantic_model_lookup, node_data_set_resolver=node_data_set_resolver, ) diff --git a/metricflow/test/fixtures/dataflow_fixtures.py b/metricflow/test/fixtures/dataflow_fixtures.py index 06616aa7f1..e1af9103d2 100644 --- a/metricflow/test/fixtures/dataflow_fixtures.py +++ b/metricflow/test/fixtures/dataflow_fixtures.py @@ -34,6 +34,7 @@ def dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.simple_model_source_nodes, + read_nodes=list(consistent_id_object_repository.simple_model_read_nodes.values()), semantic_manifest_lookup=simple_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) @@ -47,6 +48,7 @@ def multihop_dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.multihop_model_source_nodes, + read_nodes=list(consistent_id_object_repository.multihop_model_read_nodes.values()), semantic_manifest_lookup=multi_hop_join_semantic_manifest_lookup, cost_function=DefaultCostFunction(), ) @@ -68,6 +70,7 @@ def scd_dataflow_plan_builder( # noqa: D ) -> DataflowPlanBuilder: return DataflowPlanBuilder( source_nodes=consistent_id_object_repository.scd_model_source_nodes, + read_nodes=list(consistent_id_object_repository.scd_model_read_nodes.values()), semantic_manifest_lookup=scd_semantic_manifest_lookup, cost_function=DefaultCostFunction(), column_association_resolver=scd_column_association_resolver, diff --git a/metricflow/test/fixtures/model_fixtures.py b/metricflow/test/fixtures/model_fixtures.py index e4cf809b12..60e69bdb53 100644 --- a/metricflow/test/fixtures/model_fixtures.py +++ b/metricflow/test/fixtures/model_fixtures.py @@ -47,12 +47,10 @@ def _data_set_to_read_nodes(data_sets: OrderedDict[str, SemanticModelDataSet]) - def _data_set_to_source_nodes( - semantic_manifest_lookup: SemanticManifestLookup, - data_sets: OrderedDict[str, SemanticModelDataSet], - with_measures: bool = True, + semantic_manifest_lookup: SemanticManifestLookup, data_sets: OrderedDict[str, SemanticModelDataSet] ) -> Sequence[BaseOutput]: source_node_builder = SourceNodeBuilder(semantic_manifest_lookup) - return source_node_builder.create_from_data_sets(list(data_sets.values()), with_measures=with_measures) + return source_node_builder.create_from_data_sets(list(data_sets.values())) def query_parser_from_yaml(yaml_contents: List[YamlConfigFile]) -> MetricFlowQueryParser: @@ -90,6 +88,7 @@ class ConsistentIdObjectRepository: scd_model_read_nodes: OrderedDict[str, ReadSqlSourceNode] scd_model_source_nodes: Sequence[BaseOutput] + cyclic_join_read_nodes: OrderedDict[str, ReadSqlSourceNode] cyclic_join_source_nodes: Sequence[BaseOutput] @@ -124,6 +123,7 @@ def consistent_id_object_repository( scd_model_source_nodes=_data_set_to_source_nodes( semantic_manifest_lookup=scd_semantic_manifest_lookup, data_sets=scd_data_sets ), + cyclic_join_read_nodes=_data_set_to_read_nodes(cyclic_join_data_sets), cyclic_join_source_nodes=_data_set_to_source_nodes( semantic_manifest_lookup=cyclic_join_semantic_manifest_lookup, data_sets=cyclic_join_data_sets ), diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index 0a20739a78..a4b42d42db 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -133,17 +133,16 @@ integration_test: name: query_dimensions_from_different_tables description: Query multiple dimensions without metrics, requiring a join model: SIMPLE_MODEL - group_bys: ["user__home_state", "listing__is_lux_latest"] + group_bys: ["user__home_state_latest", "listing__is_lux_latest"] check_query: | SELECT - u.home_state AS user__home_state + u.home_state_latest AS user__home_state_latest , l.is_lux AS listing__is_lux_latest FROM {{ source_schema }}.dim_listings_latest l - ON l.listing_id = v.listing_id LEFT OUTER JOIN {{ source_schema }}.dim_users_latest u - ON u.user_id = v.user_id + ON u.user_id = l.user_id GROUP BY - u.home_state + u.home_state_latest , l.is_lux --- integration_test: diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index eec106d555..e5c33bc2ab 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -206,7 +206,8 @@ def filter_not_supported_features( @pytest.mark.parametrize( "name", - CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, + # CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, + ["itest_dimensions.yaml/query_dimensions_only"], ids=lambda name: f"name={name}", ) def test_case( @@ -320,5 +321,6 @@ def test_case( double_data_type_name=check_query_helpers.double_data_type_name, ) ) + # If we sort, it's effectively not checking the order whatever order that the output was would be overwritten. assert_dataframes_equal(actual, expected, sort_columns=not case.check_order, allow_empty=case.allow_empty) From 33082b91eeff78438780d68f76709d419244d1a5 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 17:30:16 -0700 Subject: [PATCH 12/19] Update snapshots & remove prints --- .../dataflow/builder/dataflow_plan_builder.py | 50 +- metricflow/dataflow/builder/node_evaluator.py | 10 +- .../DataflowPlan/test_cyclic_join__dfp_0.xml | 4 +- .../test_distinct_values_plan__dfp_0.xml | 19 +- .../DuckDB/test_distinct_values__plan0.sql | 216 ++--- .../test_distinct_values__plan0_optimized.sql | 3 +- .../test_distinct_values__plan0.xml | 832 ++++++------------ 7 files changed, 380 insertions(+), 754 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 42ad16b8dc..4352bbeb41 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -204,7 +204,7 @@ def build_plan( plan = DataflowPlan(plan_id=plan_id, sink_output_nodes=[sink_node]) for optimizer in optimizers: - print(f"Applying {optimizer.__class__.__name__}") + logger.info(f"Applying {optimizer.__class__.__name__}") try: plan = optimizer.optimize(plan) except Exception: @@ -233,7 +233,7 @@ def _build_metrics_output_node( compute_metrics_node: Optional[ComputeMetricsNode] = None for metric_spec in metric_specs: - print(f"Generating compute metrics node for {metric_spec}") + logger.info(f"Generating compute metrics node for {metric_spec}") metric_reference = metric_spec.as_reference metric = self._metric_lookup.get_metric(metric_reference) @@ -242,7 +242,7 @@ def _build_metrics_output_node( metric_reference=metric_reference, column_association_resolver=self._column_association_resolver, ) - print( + logger.info( f"For {metric.type} metric: {metric_spec}, needed metrics are:\n" f"{pformat_big_objects(metric_input_specs=metric_input_specs)}" ) @@ -263,7 +263,7 @@ def _build_metrics_output_node( column_association_resolver=self._column_association_resolver, ) - print( + logger.info( f"For {metric_spec}, needed measures are:\n" f"{pformat_big_objects(metric_input_measure_specs=metric_input_measure_specs)}" ) @@ -560,9 +560,9 @@ def _find_dataflow_recipe( ) potential_source_nodes = list(source_nodes_to_linkable_specs.keys()) # issue: getting ds__day from the wrong table - print(f"There are {len(potential_source_nodes)} potential source nodes") + logger.info(f"There are {len(potential_source_nodes)} potential source nodes") - print(f"Starting search with {len(source_nodes)} source nodes") + logger.info(f"Starting search with {len(source_nodes)} source nodes") start_time = time.time() node_processor = PreJoinNodeProcessor( @@ -581,14 +581,16 @@ def _find_dataflow_recipe( nodes=source_nodes, metric_time_dimension_reference=self._metric_time_dimension_reference, ) - print(f"After removing unnecessary nodes, there are {len(nodes_available_for_joins)} nodes available for joins") + logger.info( + f"After removing unnecessary nodes, there are {len(nodes_available_for_joins)} nodes available for joins" + ) if DataflowPlanBuilder._contains_multihop_linkables(linkable_specs): nodes_available_for_joins = node_processor.add_multi_hop_joins(linkable_specs, source_nodes) - print( + logger.info( f"After adding multi-hop nodes, there are {len(nodes_available_for_joins)} nodes available for joins:\n" f"{pformat_big_objects(nodes_available_for_joins)}" ) - print(f"Processing nodes took: {time.time()-start_time:.2f}s") + logger.info(f"Processing nodes took: {time.time()-start_time:.2f}s") node_evaluator = NodeEvaluatorForLinkableInstances( semantic_model_lookup=self._semantic_model_lookup, @@ -600,36 +602,36 @@ def _find_dataflow_recipe( node_to_evaluation: Dict[BaseOutput, LinkableInstanceSatisfiabilityEvaluation] = {} for node in self._sort_by_suitability(potential_source_nodes): - print(f"\n\n\nEvaluating source node:\n{pformat_big_objects(source_node=dataflow_dag_as_text(node))}") + logger.debug(f"Evaluating source node:\n{pformat_big_objects(source_node=dataflow_dag_as_text(node))}") start_time = time.time() evaluation = node_evaluator.evaluate_node(start_node=node, required_linkable_specs=list(linkable_specs)) - print(f"Evaluation of {node} took {time.time() - start_time:.2f}s") + logger.info(f"Evaluation of {node} took {time.time() - start_time:.2f}s") - print( + logger.debug( f"Evaluation for source node is:\n" f"{pformat_big_objects(node=dataflow_dag_as_text(node), evaluation=evaluation)}" ) if len(evaluation.unjoinable_linkable_specs) > 0: - print( + logger.info( f"Skipping {node.node_id} since it contains un-joinable specs: " f"{evaluation.unjoinable_linkable_specs}" ) continue num_joins_required = len(evaluation.join_recipes) - print(f"Found candidate with node ID '{node.node_id}' with {num_joins_required} joins required.") + logger.info(f"Found candidate with node ID '{node.node_id}' with {num_joins_required} joins required.") node_to_evaluation[node] = evaluation # Since are evaluating nodes with the lowest cost first, if we find one without requiring any joins, then # this is going to be the lowest cost solution. if len(evaluation.join_recipes) == 0: - print("Not evaluating other nodes since we found one that doesn't require joins") + logger.info("Not evaluating other nodes since we found one that doesn't require joins") # But we don't break the loop here? why not? - print(f"Found {len(node_to_evaluation)} candidate source nodes.") + logger.info(f"Found {len(node_to_evaluation)} candidate source nodes.") if len(node_to_evaluation) > 0: cost_function = DefaultCostFunction() @@ -637,7 +639,7 @@ def _find_dataflow_recipe( assert cost_function.calculate_cost(node) == DefaultCost(num_joins=0, num_aggregations=0) node_with_lowest_cost = min(node_to_evaluation, key=cost_function.calculate_cost) evaluation = node_to_evaluation[node_with_lowest_cost] - print( + logger.info( "Lowest cost node is:\n" + pformat_big_objects( lowest_cost_node=dataflow_dag_as_text(node_with_lowest_cost), @@ -720,7 +722,7 @@ def build_aggregated_measures( ) for (semantic_model, measure_constraint), measures in semantic_models_and_constraints_to_measures.items(): - print( + logger.info( f"Building aggregated measures for {semantic_model}. " f" Input measures: {measures} with constraints: {measure_constraint}" ) @@ -743,7 +745,7 @@ def build_aggregated_measures( if non_additive_spec is not None: non_additive_message = f" with non-additive dimension spec: {non_additive_spec}" - print(f"Building aggregated measures for {semantic_model}{non_additive_message}") + logger.info(f"Building aggregated measures for {semantic_model}{non_additive_message}") input_specs = tuple(input_specs_by_measure_spec[measure_spec] for measure_spec in measure_specs) output_nodes.append( self._build_aggregated_measures_from_measure_source_node( @@ -796,7 +798,7 @@ def _build_aggregated_measures_from_measure_source_node( cumulative_metric_adjusted_time_constraint: Optional[TimeRangeConstraint] = None if cumulative and time_range_constraint is not None: - print(f"Time range constraint before adjustment is {time_range_constraint}") + logger.info(f"Time range constraint before adjustment is {time_range_constraint}") granularity: Optional[TimeGranularity] = None count = 0 if cumulative_window is not None: @@ -809,7 +811,7 @@ def _build_aggregated_measures_from_measure_source_node( cumulative_metric_adjusted_time_constraint = ( time_range_constraint.adjust_time_constraint_for_cumulative_metric(granularity, count) ) - print(f"Adjusted time range constraint {cumulative_metric_adjusted_time_constraint}") + logger.info(f"Adjusted time range constraint {cumulative_metric_adjusted_time_constraint}") # Extraneous linkable specs are specs that are used in this phase that should not show up in the final result # unless it was already a requested spec in the query @@ -824,7 +826,7 @@ def _build_aggregated_measures_from_measure_source_node( ) required_linkable_specs = LinkableSpecSet.merge((queried_linkable_specs, extraneous_linkable_specs)) - print( + logger.info( f"Looking for a recipe to get:\n" f"{pformat_big_objects(measure_specs=measure_specs, required_linkable_set=required_linkable_specs)}" ) @@ -835,12 +837,12 @@ def _build_aggregated_measures_from_measure_source_node( time_range_constraint=cumulative_metric_adjusted_time_constraint or time_range_constraint, linkable_spec_set=required_linkable_specs, ) - print( + logger.info( f"With {len(self._source_nodes)} source nodes, finding a recipe took " f"{time.time() - find_recipe_start_time:.2f}s" ) - print(f"Using recipe:\n{pformat_big_objects(measure_recipe=measure_recipe)}") + logger.info(f"Using recipe:\n{pformat_big_objects(measure_recipe=measure_recipe)}") if not measure_recipe: # TODO: Improve for better user understandability. diff --git a/metricflow/dataflow/builder/node_evaluator.py b/metricflow/dataflow/builder/node_evaluator.py index 1a1c8a6c2e..5ab12bed9f 100644 --- a/metricflow/dataflow/builder/node_evaluator.py +++ b/metricflow/dataflow/builder/node_evaluator.py @@ -349,18 +349,18 @@ def evaluate_node( ) join_candidates: List[JoinLinkableInstancesRecipe] = [] - print("Looping over nodes that can be joined to get the required linkable specs") + logger.info("Looping over nodes that can be joined to get the required linkable specs") # Using a greedy approach, try to get the "possibly_joinable_linkable_specs" by iteratively joining nodes with # the most matching linkable specs. We try to join nodes with the most matching specs to minimize the number of # joins that we have to do to. A knapsack solution is ideal, but punting on that for simplicity. while len(possibly_joinable_linkable_specs) > 0: - print(f"Looking for linkable specs:\n{pformat_big_objects(possibly_joinable_linkable_specs)}") + logger.info(f"Looking for linkable specs:\n{pformat_big_objects(possibly_joinable_linkable_specs)}") # We've run out of candidate data sets, but there are more linkable specs that we need. That means the # rest of the linkable specs can't be joined in, and we're left with unjoinable specs remaining. if len(candidates_for_join) == 0: - print( + logger.info( "There are no more candidate nodes that can be joined, but not all linkable specs have " "been acquired." ) @@ -369,7 +369,7 @@ def evaluate_node( # Join the best candidate to realize the linkable specs next_candidate = candidates_for_join.pop(0) - print(f"The next candidate node to be joined is:\n{pformat_big_objects(next_candidate)}") + logger.info(f"The next candidate node to be joined is:\n{pformat_big_objects(next_candidate)}") join_candidates.append(next_candidate) # Update the candidates. Since we'll be joined/ing the previously selected candidate, we no longer need @@ -386,7 +386,7 @@ def evaluate_node( x for x in possibly_joinable_linkable_specs if x not in next_candidate.satisfiable_linkable_specs ] - print("Done evaluating possible joins") + logger.info("Done evaluating possible joins") return LinkableInstanceSatisfiabilityEvaluation( local_linkable_specs=tuple(local_linkable_specs), joinable_linkable_specs=tuple( diff --git a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml index 7f186e865b..2382b41493 100644 --- a/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml +++ b/metricflow/test/snapshots/test_cyclic_join.py/DataflowPlan/test_cyclic_join__dfp_0.xml @@ -63,7 +63,7 @@ - + @@ -86,7 +86,7 @@ - + diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml index 10f2780c9b..f5ce7094e6 100644 --- a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan__dfp_0.xml @@ -40,18 +40,13 @@ - - - - - - - - - - - - + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql index 5e955f5039..1e80d3e9c5 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0.sql @@ -1,160 +1,84 @@ -- Order By ['listing__country_latest'] Limit 100 SELECT - subq_3.listing__country_latest + subq_2.listing__country_latest FROM ( -- Constrain Output with WHERE SELECT - subq_2.listing__country_latest + subq_1.listing__country_latest FROM ( -- Pass Only Elements: -- ['listing__country_latest'] SELECT - subq_1.listing__country_latest + subq_0.listing__country_latest FROM ( - -- Metric Time Dimension 'ds' + -- Read Elements From Semantic Model 'listings_latest' SELECT - subq_0.ds__day - , subq_0.ds__week - , subq_0.ds__month - , subq_0.ds__quarter - , subq_0.ds__year - , subq_0.ds__extract_year - , subq_0.ds__extract_quarter - , subq_0.ds__extract_month - , subq_0.ds__extract_week - , subq_0.ds__extract_day - , subq_0.ds__extract_dow - , subq_0.ds__extract_doy - , subq_0.created_at__day - , subq_0.created_at__week - , subq_0.created_at__month - , subq_0.created_at__quarter - , subq_0.created_at__year - , subq_0.created_at__extract_year - , subq_0.created_at__extract_quarter - , subq_0.created_at__extract_month - , subq_0.created_at__extract_week - , subq_0.created_at__extract_day - , subq_0.created_at__extract_dow - , subq_0.created_at__extract_doy - , subq_0.listing__ds__day - , subq_0.listing__ds__week - , subq_0.listing__ds__month - , subq_0.listing__ds__quarter - , subq_0.listing__ds__year - , subq_0.listing__ds__extract_year - , subq_0.listing__ds__extract_quarter - , subq_0.listing__ds__extract_month - , subq_0.listing__ds__extract_week - , subq_0.listing__ds__extract_day - , subq_0.listing__ds__extract_dow - , subq_0.listing__ds__extract_doy - , subq_0.listing__created_at__day - , subq_0.listing__created_at__week - , subq_0.listing__created_at__month - , subq_0.listing__created_at__quarter - , subq_0.listing__created_at__year - , subq_0.listing__created_at__extract_year - , subq_0.listing__created_at__extract_quarter - , subq_0.listing__created_at__extract_month - , subq_0.listing__created_at__extract_week - , subq_0.listing__created_at__extract_day - , subq_0.listing__created_at__extract_dow - , subq_0.listing__created_at__extract_doy - , subq_0.ds__day AS metric_time__day - , subq_0.ds__week AS metric_time__week - , subq_0.ds__month AS metric_time__month - , subq_0.ds__quarter AS metric_time__quarter - , subq_0.ds__year AS metric_time__year - , subq_0.ds__extract_year AS metric_time__extract_year - , subq_0.ds__extract_quarter AS metric_time__extract_quarter - , subq_0.ds__extract_month AS metric_time__extract_month - , subq_0.ds__extract_week AS metric_time__extract_week - , subq_0.ds__extract_day AS metric_time__extract_day - , subq_0.ds__extract_dow AS metric_time__extract_dow - , subq_0.ds__extract_doy AS metric_time__extract_doy - , subq_0.listing - , subq_0.user - , subq_0.listing__user - , subq_0.country_latest - , subq_0.is_lux_latest - , subq_0.capacity_latest - , subq_0.listing__country_latest - , subq_0.listing__is_lux_latest - , subq_0.listing__capacity_latest - , subq_0.listings - , subq_0.largest_listing - , subq_0.smallest_listing - FROM ( - -- Read Elements From Semantic Model 'listings_latest' - SELECT - 1 AS listings - , listings_latest_src_10004.capacity AS largest_listing - , listings_latest_src_10004.capacity AS smallest_listing - , listings_latest_src_10004.created_at AS ds__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS ds__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS ds__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS ds__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS ds__year - , EXTRACT(year FROM listings_latest_src_10004.created_at) AS ds__extract_year - , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS ds__extract_quarter - , EXTRACT(month FROM listings_latest_src_10004.created_at) AS ds__extract_month - , EXTRACT(week FROM listings_latest_src_10004.created_at) AS ds__extract_week - , EXTRACT(day FROM listings_latest_src_10004.created_at) AS ds__extract_day - , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS ds__extract_dow - , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS ds__extract_doy - , listings_latest_src_10004.created_at AS created_at__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS created_at__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS created_at__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS created_at__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS created_at__year - , EXTRACT(year FROM listings_latest_src_10004.created_at) AS created_at__extract_year - , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS created_at__extract_quarter - , EXTRACT(month FROM listings_latest_src_10004.created_at) AS created_at__extract_month - , EXTRACT(week FROM listings_latest_src_10004.created_at) AS created_at__extract_week - , EXTRACT(day FROM listings_latest_src_10004.created_at) AS created_at__extract_day - , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS created_at__extract_dow - , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS created_at__extract_doy - , listings_latest_src_10004.country AS country_latest - , listings_latest_src_10004.is_lux AS is_lux_latest - , listings_latest_src_10004.capacity AS capacity_latest - , listings_latest_src_10004.created_at AS listing__ds__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__ds__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__ds__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__ds__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__ds__year - , EXTRACT(year FROM listings_latest_src_10004.created_at) AS listing__ds__extract_year - , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS listing__ds__extract_quarter - , EXTRACT(month FROM listings_latest_src_10004.created_at) AS listing__ds__extract_month - , EXTRACT(week FROM listings_latest_src_10004.created_at) AS listing__ds__extract_week - , EXTRACT(day FROM listings_latest_src_10004.created_at) AS listing__ds__extract_day - , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS listing__ds__extract_dow - , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS listing__ds__extract_doy - , listings_latest_src_10004.created_at AS listing__created_at__day - , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__created_at__week - , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__created_at__month - , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__created_at__quarter - , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__created_at__year - , EXTRACT(year FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_year - , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_quarter - , EXTRACT(month FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_month - , EXTRACT(week FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_week - , EXTRACT(day FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_day - , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_dow - , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_doy - , listings_latest_src_10004.country AS listing__country_latest - , listings_latest_src_10004.is_lux AS listing__is_lux_latest - , listings_latest_src_10004.capacity AS listing__capacity_latest - , listings_latest_src_10004.listing_id AS listing - , listings_latest_src_10004.user_id AS user - , listings_latest_src_10004.user_id AS listing__user - FROM ***************************.dim_listings_latest listings_latest_src_10004 - ) subq_0 - ) subq_1 + 1 AS listings + , listings_latest_src_10004.capacity AS largest_listing + , listings_latest_src_10004.capacity AS smallest_listing + , listings_latest_src_10004.created_at AS ds__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS ds__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS ds__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS ds__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS ds__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS ds__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS ds__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS ds__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS ds__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS ds__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS ds__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS ds__extract_doy + , listings_latest_src_10004.created_at AS created_at__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS created_at__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS created_at__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS created_at__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS created_at__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS created_at__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS created_at__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS created_at__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS created_at__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS created_at__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS created_at__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS created_at__extract_doy + , listings_latest_src_10004.country AS country_latest + , listings_latest_src_10004.is_lux AS is_lux_latest + , listings_latest_src_10004.capacity AS capacity_latest + , listings_latest_src_10004.created_at AS listing__ds__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__ds__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__ds__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__ds__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__ds__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS listing__ds__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS listing__ds__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS listing__ds__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS listing__ds__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS listing__ds__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS listing__ds__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS listing__ds__extract_doy + , listings_latest_src_10004.created_at AS listing__created_at__day + , DATE_TRUNC('week', listings_latest_src_10004.created_at) AS listing__created_at__week + , DATE_TRUNC('month', listings_latest_src_10004.created_at) AS listing__created_at__month + , DATE_TRUNC('quarter', listings_latest_src_10004.created_at) AS listing__created_at__quarter + , DATE_TRUNC('year', listings_latest_src_10004.created_at) AS listing__created_at__year + , EXTRACT(year FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_year + , EXTRACT(quarter FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_quarter + , EXTRACT(month FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_month + , EXTRACT(week FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_week + , EXTRACT(day FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_day + , EXTRACT(dow FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_dow + , EXTRACT(doy FROM listings_latest_src_10004.created_at) AS listing__created_at__extract_doy + , listings_latest_src_10004.country AS listing__country_latest + , listings_latest_src_10004.is_lux AS listing__is_lux_latest + , listings_latest_src_10004.capacity AS listing__capacity_latest + , listings_latest_src_10004.listing_id AS listing + , listings_latest_src_10004.user_id AS user + , listings_latest_src_10004.user_id AS listing__user + FROM ***************************.dim_listings_latest listings_latest_src_10004 + ) subq_0 GROUP BY - subq_1.listing__country_latest - ) subq_2 + subq_0.listing__country_latest + ) subq_1 WHERE listing__country_latest = 'us' -) subq_3 -ORDER BY subq_3.listing__country_latest DESC +) subq_2 +ORDER BY subq_2.listing__country_latest DESC LIMIT 100 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql index 8417e18d52..592773c228 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_distinct_values__plan0_optimized.sql @@ -4,7 +4,6 @@ SELECT listing__country_latest FROM ( -- Read Elements From Semantic Model 'listings_latest' - -- Metric Time Dimension 'ds' -- Pass Only Elements: -- ['listing__country_latest'] SELECT @@ -12,7 +11,7 @@ FROM ( FROM ***************************.dim_listings_latest listings_latest_src_10004 GROUP BY country -) subq_6 +) subq_4 WHERE listing__country_latest = 'us' ORDER BY listing__country_latest DESC LIMIT 100 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml index 9beb3baaeb..f092918942 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml @@ -1,586 +1,292 @@ - - - - - - + + + + + + - - - - + + + + - - - - - - + + + + + + - - - - - - - - - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + From 1d27dff004240e19456410f130966ab687d38daa Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 18:09:04 -0700 Subject: [PATCH 13/19] Fix node evaluation logic --- .../dataflow/builder/dataflow_plan_builder.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 4352bbeb41..5b4a7c945e 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -18,7 +18,7 @@ from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity from metricflow.dag.id_generation import DATAFLOW_PLAN_PREFIX, IdGeneratorRegistry -from metricflow.dataflow.builder.costing import DataflowPlanNodeCostFunction, DefaultCost, DefaultCostFunction +from metricflow.dataflow.builder.costing import DataflowPlanNodeCostFunction, DefaultCostFunction from metricflow.dataflow.builder.measure_additiveness import group_measure_specs_by_additiveness from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver from metricflow.dataflow.builder.node_evaluator import ( @@ -629,22 +629,23 @@ def _find_dataflow_recipe( # this is going to be the lowest cost solution. if len(evaluation.join_recipes) == 0: logger.info("Not evaluating other nodes since we found one that doesn't require joins") - # But we don't break the loop here? why not? + break logger.info(f"Found {len(node_to_evaluation)} candidate source nodes.") if len(node_to_evaluation) > 0: cost_function = DefaultCostFunction() - for node in node_to_evaluation: - assert cost_function.calculate_cost(node) == DefaultCost(num_joins=0, num_aggregations=0) - node_with_lowest_cost = min(node_to_evaluation, key=cost_function.calculate_cost) - evaluation = node_to_evaluation[node_with_lowest_cost] + # All source nodes cost 0. Get evaluation with lowest cost. + node_with_lowest_evaluation_cost = min( + node_to_evaluation, key=lambda x: len(node_to_evaluation[x].join_recipes) + ) + evaluation = node_to_evaluation[node_with_lowest_evaluation_cost] logger.info( "Lowest cost node is:\n" + pformat_big_objects( - lowest_cost_node=dataflow_dag_as_text(node_with_lowest_cost), + lowest_cost_node=dataflow_dag_as_text(node_with_lowest_evaluation_cost), evaluation=evaluation, - cost=cost_function.calculate_cost(node_with_lowest_cost), + cost=cost_function.calculate_cost(node_with_lowest_evaluation_cost), ) ) @@ -662,14 +663,14 @@ def _find_dataflow_recipe( ) return DataflowRecipe( - source_node=node_with_lowest_cost, + source_node=node_with_lowest_evaluation_cost, required_local_linkable_specs=( evaluation.local_linkable_specs + required_local_entity_specs + required_local_dimension_specs + required_local_time_dimension_specs ), - join_linkable_instances_recipes=node_to_evaluation[node_with_lowest_cost].join_recipes, + join_linkable_instances_recipes=node_to_evaluation[node_with_lowest_evaluation_cost].join_recipes, ) logger.error("No recipe could be constructed.") From f5bc0bce2a642456cbbc36aace14c53c03c83f03 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 18:09:57 -0700 Subject: [PATCH 14/19] Remove test change --- metricflow/test/integration/test_configured_cases.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index e5c33bc2ab..d6d8e1e729 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -206,8 +206,7 @@ def filter_not_supported_features( @pytest.mark.parametrize( "name", - # CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, - ["itest_dimensions.yaml/query_dimensions_only"], + CONFIGURED_INTEGRATION_TESTS_REPOSITORY.all_test_case_names, ids=lambda name: f"name={name}", ) def test_case( From b986d608cc617133182272168e29deb4ada863ae Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 18:14:45 -0700 Subject: [PATCH 15/19] Display correct cost for evaluation --- metricflow/dataflow/builder/dataflow_plan_builder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 5b4a7c945e..3099bac0fc 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -634,7 +634,6 @@ def _find_dataflow_recipe( logger.info(f"Found {len(node_to_evaluation)} candidate source nodes.") if len(node_to_evaluation) > 0: - cost_function = DefaultCostFunction() # All source nodes cost 0. Get evaluation with lowest cost. node_with_lowest_evaluation_cost = min( node_to_evaluation, key=lambda x: len(node_to_evaluation[x].join_recipes) @@ -645,7 +644,7 @@ def _find_dataflow_recipe( + pformat_big_objects( lowest_cost_node=dataflow_dag_as_text(node_with_lowest_evaluation_cost), evaluation=evaluation, - cost=cost_function.calculate_cost(node_with_lowest_evaluation_cost), + joins=len(node_to_evaluation[node_with_lowest_evaluation_cost].join_recipes), ) ) From cc61b1cff74f627b11b75b0f7ce2193d9a1240c5 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 18:19:55 -0700 Subject: [PATCH 16/19] Delete unneeded changes --- .../dataflow/builder/dataflow_plan_builder.py | 58 +------------------ .../model/semantics/semantic_model_lookup.py | 6 -- metricflow/protocols/semantics.py | 7 --- 3 files changed, 1 insertion(+), 70 deletions(-) diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 3099bac0fc..145783ff5f 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -9,10 +9,7 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.pretty_print import pformat_big_objects from dbt_semantic_interfaces.protocols.metric import MetricTimeWindow, MetricType -from dbt_semantic_interfaces.protocols.semantic_model import SemanticModel from dbt_semantic_interfaces.references import ( - DimensionReference, - EntityReference, TimeDimensionReference, ) from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity @@ -305,67 +302,14 @@ def _build_metrics_output_node( join_type=combine_metrics_join_type, ) - def __get_semantic_models_for_linkable_specs( - self, linkable_specs: LinkableSpecSet - ) -> Dict[SemanticModel, LinkableSpecSet]: - """Build dict of semantic models to associated linkable specs.""" - semantic_models_to_linkable_specs: Dict[SemanticModel, LinkableSpecSet] = {} - - # Dimensions - for dimension_spec in linkable_specs.dimension_specs: - semantic_models = self._semantic_model_lookup.get_semantic_models_for_linkable_element( - linkable_element=DimensionReference(element_name=dimension_spec.element_name) - ) - for semantic_model in semantic_models: - new_linkable_spec_set = LinkableSpecSet(dimension_specs=(dimension_spec,)) - linkable_specs_for_semantic_model = semantic_models_to_linkable_specs.get(semantic_model) - semantic_models_to_linkable_specs[semantic_model] = ( - LinkableSpecSet.merge([linkable_specs_for_semantic_model, new_linkable_spec_set]) - if linkable_specs_for_semantic_model - else new_linkable_spec_set - ) - # Time dimensions - for time_dimension_spec in linkable_specs.time_dimension_specs: - semantic_models = self._semantic_model_lookup.get_semantic_models_for_linkable_element( - linkable_element=TimeDimensionReference(element_name=time_dimension_spec.element_name) - ) - for semantic_model in semantic_models: - new_linkable_spec_set = LinkableSpecSet(time_dimension_specs=(time_dimension_spec,)) - semantic_models_to_linkable_specs[semantic_model] = ( - LinkableSpecSet.merge([linkable_specs_for_semantic_model, new_linkable_spec_set]) - if linkable_specs_for_semantic_model - else new_linkable_spec_set - ) - # Entities - for entity_spec in linkable_specs.entity_specs: - semantic_models = self._semantic_model_lookup.get_semantic_models_for_linkable_element( - linkable_element=EntityReference(element_name=entity_spec.element_name) - ) - for semantic_model in semantic_models: - new_linkable_spec_set = LinkableSpecSet(entity_specs=(entity_spec,)) - semantic_models_to_linkable_specs[semantic_model] = ( - LinkableSpecSet.merge([linkable_specs_for_semantic_model, new_linkable_spec_set]) - if linkable_specs_for_semantic_model - else new_linkable_spec_set - ) - - return semantic_models_to_linkable_specs - def build_plan_for_distinct_values(self, query_spec: MetricFlowQuerySpec) -> DataflowPlan: """Generate a plan that would get the distinct values of a linkable instance. e.g. distinct listing__country_latest for bookings by listing__country_latest """ assert not query_spec.metric_specs, "Can't build distinct values plan with metrics." - # linkable_specs_to_dataflow_recipes: Dict[LinkableSpecSet, DataflowRecipe] = {} - # for linkable_specs in self.__get_semantic_models_for_linkable_specs( - # linkable_specs=query_spec.linkable_specs - # ).values(): - dataflow_recipe = self._find_dataflow_recipe(linkable_spec_set=query_spec.linkable_specs) - if not dataflow_recipe: - raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {query_spec.linkable_specs}.") - # linkable_specs_to_dataflow_recipes[linkable_specs] = dataflow_recipe + dataflow_recipe = self._find_dataflow_recipe(linkable_spec_set=query_spec.linkable_specs) if not dataflow_recipe: raise UnableToSatisfyQueryError(f"Recipe not found for linkable specs: {query_spec.linkable_specs}") diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 44d8ba0a03..7c459949cd 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -268,12 +268,6 @@ def get_semantic_models_for_entity(self, entity_reference: EntityReference) -> S entity = self._entity_ref_to_entity[entity_reference] return set(self._entity_index[entity]) - def get_semantic_models_for_linkable_element( - self, linkable_element: LinkableElementReference - ) -> Set[SemanticModel]: - """Return all semantic models associated with a linkable element reference.""" - return set(self._linkable_reference_index[linkable_element]) - @staticmethod def get_entity_from_semantic_model( semantic_model: SemanticModel, entity_reference: LinkableElementReference diff --git a/metricflow/protocols/semantics.py b/metricflow/protocols/semantics.py index 8ba4833d4e..6fc4e00a39 100644 --- a/metricflow/protocols/semantics.py +++ b/metricflow/protocols/semantics.py @@ -19,7 +19,6 @@ from dbt_semantic_interfaces.references import ( DimensionReference, EntityReference, - LinkableElementReference, MeasureReference, MetricReference, SemanticModelElementReference, @@ -102,12 +101,6 @@ def get_entity_in_semantic_model(self, ref: SemanticModelElementReference) -> Op """Retrieve the entity matching the element -> semantic model mapping, if any.""" raise NotImplementedError - def get_semantic_models_for_linkable_element( - self, linkable_element: LinkableElementReference - ) -> Set[SemanticModel]: - """Return all semantic models associated with a linkable element reference.""" - raise NotImplementedError - @abstractmethod def get_by_reference(self, semantic_model_reference: SemanticModelReference) -> Optional[SemanticModel]: """Retrieve the semantic model object matching the input semantic model reference, if any.""" From cda9a5230a073217378df061cde3d80dc95a1c21 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 19:29:42 -0700 Subject: [PATCH 17/19] Cleanup --- metricflow/plan_conversion/dataflow_to_sql.py | 2 +- metricflow/query/query_parser.py | 9 ++-- .../builder/test_dataflow_plan_builder.py | 48 +++++++++++++++++++ .../test_cases/itest_dimensions.yaml | 7 +-- .../test/integration/test_configured_cases.py | 1 - 5 files changed, 58 insertions(+), 9 deletions(-) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 179bc14f51..d79c30a678 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -800,7 +800,7 @@ def visit_pass_elements_filter_node(self, node: FilterElementsNode) -> SqlDataSe CreateSelectColumnsForInstances(from_data_set_alias, self._column_association_resolver) ).as_tuple() - # If no measures are passed, group by all columns. + # If distinct values requested, group by all select columns. group_bys = select_columns if node.distinct else () return SqlDataSet( instance_set=output_instance_set, diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 17b4139eb1..4ae7b16822 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -228,7 +228,8 @@ def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricRe "dimension 'metric_time'." ) - def _validate_linkable_specs( + # TODO: write tests for invalid linkable specs - should error + def _validate_linkable_specs_for_metrics( self, metric_references: Tuple[MetricReference, ...], all_linkable_specs: QueryTimeLinkableSpecSet, @@ -423,7 +424,7 @@ def _parse_and_validate_query( # For each metric, verify that it's possible to retrieve all group by elements, including the ones as required # by the filters. - # TODO: Consider moving this logic into _validate_linkable_specs(). + # TODO: Consider moving this logic into _validate_linkable_specs_for_metrics(). for metric_reference in metric_references: metric = self._metric_lookup.get_metric(metric_reference) if metric.filter is not None: @@ -435,7 +436,7 @@ def _parse_and_validate_query( # Combine the group by elements from the query with the group by elements that are required by the # metric filter to see if that's a valid set that could be queried. - self._validate_linkable_specs( + self._validate_linkable_specs_for_metrics( metric_references=(metric_reference,), all_linkable_specs=QueryTimeLinkableSpecSet.combine( ( @@ -453,7 +454,7 @@ def _parse_and_validate_query( ) # Validate all of them together. - self._validate_linkable_specs( + self._validate_linkable_specs_for_metrics( metric_references=metric_references, all_linkable_specs=requested_linkable_specs_with_requested_filter_specs, time_dimension_specs=time_dimension_specs, diff --git a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py index f063bf9fd3..1a9f1b5726 100644 --- a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py +++ b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py @@ -605,6 +605,54 @@ def test_distinct_values_plan( # noqa: D ) +def test_distinct_values_plan_with_join( # noqa: D + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, + column_association_resolver: ColumnAssociationResolver, +) -> None: + """Tests a plan to get distinct values of 2 dimensions, where a join is required.""" + dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( + query_spec=MetricFlowQuerySpec( + dimension_specs=( + DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference(element_name="user"),)), + DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference(element_name="listing"),)), + ), + where_constraint=( + WhereSpecFactory( + column_association_resolver=column_association_resolver, + ).create_from_where_filter( + PydanticWhereFilter( + where_sql_template="{{ Dimension('listing__country_latest') }} = 'us'", + ) + ) + ), + order_by_specs=( + OrderBySpec( + dimension_spec=DimensionSpec( + element_name="country_latest", entity_links=(EntityReference(element_name="listing"),) + ), + descending=True, + ), + ), + limit=100, + ) + ) + + assert_plan_snapshot_text_equal( + request=request, + mf_test_session_state=mf_test_session_state, + plan=dataflow_plan, + plan_snapshot_text=dataflow_plan_as_text(dataflow_plan), + ) + + display_graph_if_requested( + request=request, + mf_test_session_state=mf_test_session_state, + dag_graph=dataflow_plan, + ) + + def test_measure_constraint_plan( request: FixtureRequest, mf_test_session_state: MetricFlowTestSessionState, diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index a4b42d42db..33ea65c039 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -145,17 +145,18 @@ integration_test: u.home_state_latest , l.is_lux --- +# TODO: test for dimension with non-day granularity integration_test: name: query_time_dimension_without_granularity - description: Query just a time dimension, no granularity specified + description: Query just a time dimension, no granularity specified. Should assume default granularity for dimension. model: SIMPLE_MODEL group_bys: [ "verification__ds"] check_query: | SELECT - v.ds + v.ds__day FROM {{ source_schema }}.fct_id_verifications v GROUP BY - v.ds + v.ds__day --- integration_test: name: query_dimension_only_with_constraint diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index d6d8e1e729..eec106d555 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -320,6 +320,5 @@ def test_case( double_data_type_name=check_query_helpers.double_data_type_name, ) ) - # If we sort, it's effectively not checking the order whatever order that the output was would be overwritten. assert_dataframes_equal(actual, expected, sort_columns=not case.check_order, allow_empty=case.allow_empty) From 5f331f1e297f068ef6d24bf9a4c296ba965a23ad Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 23:00:05 -0700 Subject: [PATCH 18/19] Resolve granularity for partial time dimensions --- metricflow/engine/metricflow_engine.py | 2 +- metricflow/query/query_parser.py | 13 +- metricflow/test/fixtures/model_fixtures.py | 13 +- .../test_cases/itest_dimensions.yaml | 17 ++- ..._distinct_values_plan_with_join__dfp_0.xml | 90 +++++++++++++ .../test/time/test_time_granularity_solver.py | 22 +++- metricflow/time/time_granularity_solver.py | 121 ++++++++++++------ 7 files changed, 229 insertions(+), 49 deletions(-) create mode 100644 metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index afad49e9ae..666821cf07 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -376,7 +376,7 @@ def __init__( self._query_parser = MetricFlowQueryParser( column_association_resolver=self._column_association_resolver, model=self._semantic_manifest_lookup, - source_nodes=source_nodes, + read_nodes=read_nodes, node_output_resolver=node_output_resolver, ) diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 4ae7b16822..1e1827b613 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -118,13 +118,15 @@ def __init__( # noqa: D self, column_association_resolver: ColumnAssociationResolver, model: SemanticManifestLookup, - source_nodes: Sequence[BaseOutput], + read_nodes: Sequence[BaseOutput], node_output_resolver: DataflowPlanNodeOutputDataSetResolver, ) -> None: self._column_association_resolver = column_association_resolver self._model = model self._metric_lookup = model.metric_lookup self._semantic_model_lookup = model.semantic_model_lookup + self._node_output_resolver = node_output_resolver + self._read_nodes = read_nodes # Set up containers for known element names self._known_entity_element_references = self._semantic_model_lookup.get_entity_references() @@ -404,6 +406,8 @@ def _parse_and_validate_query( self._time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs( metric_references=metric_references, partial_time_dimension_specs=requested_linkable_specs.partial_time_dimension_specs, + read_nodes=self._read_nodes, + node_output_resolver=self._node_output_resolver, ) ) @@ -575,6 +579,8 @@ def _adjust_time_range_constraint( self._time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs( metric_references=metric_references, partial_time_dimension_specs=(partial_metric_time_spec,), + read_nodes=self._read_nodes, + node_output_resolver=self._node_output_resolver, ) ) adjust_to_granularity = partial_time_dimension_spec_to_time_dimension_spec[ @@ -773,7 +779,10 @@ def _verify_resolved_granularity_for_date_part( ensure that the correct value was passed in. """ resolved_granularity = self._time_granularity_solver.find_minimum_granularity_for_partial_time_dimension_spec( - partial_time_dimension_spec=partial_time_dimension_spec, metric_references=metric_references + partial_time_dimension_spec=partial_time_dimension_spec, + metric_references=metric_references, + read_nodes=self._read_nodes, + node_output_resolver=self._node_output_resolver, ) if resolved_granularity != requested_dimension_structured_name.time_granularity: raise RequestTimeGranularityException( diff --git a/metricflow/test/fixtures/model_fixtures.py b/metricflow/test/fixtures/model_fixtures.py index 60e69bdb53..afcf7bf38e 100644 --- a/metricflow/test/fixtures/model_fixtures.py +++ b/metricflow/test/fixtures/model_fixtures.py @@ -61,11 +61,10 @@ def query_parser_from_yaml(yaml_contents: List[YamlConfigFile]) -> MetricFlowQue ).semantic_manifest ) SemanticManifestValidator[SemanticManifest]().checked_validations(semantic_manifest_lookup.semantic_manifest) - source_nodes = _data_set_to_source_nodes(semantic_manifest_lookup, create_data_sets(semantic_manifest_lookup)) return MetricFlowQueryParser( model=semantic_manifest_lookup, column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup), - source_nodes=source_nodes, + read_nodes=list(_data_set_to_read_nodes(create_data_sets(semantic_manifest_lookup)).values()), node_output_resolver=DataflowPlanNodeOutputDataSetResolver( column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup), semantic_manifest_lookup=semantic_manifest_lookup, @@ -241,3 +240,13 @@ def cyclic_join_semantic_manifest_lookup(template_mapping: Dict[str, str]) -> Se """Manifest that contains a potential cycle in the join graph (if not handled properly).""" build_result = load_semantic_manifest("cyclic_join_manifest", template_mapping) return SemanticManifestLookup(build_result.semantic_manifest) + + +@pytest.fixture(scope="session") +def node_output_resolver( # noqa:D + simple_semantic_manifest_lookup: SemanticManifestLookup, +) -> DataflowPlanNodeOutputDataSetResolver: + return DataflowPlanNodeOutputDataSetResolver( + column_association_resolver=DunderColumnAssociationResolver(simple_semantic_manifest_lookup), + semantic_manifest_lookup=simple_semantic_manifest_lookup, + ) diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index 33ea65c039..c6d5912629 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -145,7 +145,6 @@ integration_test: u.home_state_latest , l.is_lux --- -# TODO: test for dimension with non-day granularity integration_test: name: query_time_dimension_without_granularity description: Query just a time dimension, no granularity specified. Should assume default granularity for dimension. @@ -153,10 +152,22 @@ integration_test: group_bys: [ "verification__ds"] check_query: | SELECT - v.ds__day + v.ds as verification__ds__day FROM {{ source_schema }}.fct_id_verifications v GROUP BY - v.ds__day + v.ds +--- +integration_test: + name: query_non_default_time_dimension_without_granularity + description: Query just a time dimension, no granularity specified. Should assume default granularity for dimension. + model: EXTENDED_DATE_MODEL + group_bys: [ "monthly_ds"] + check_query: | + SELECT + ds AS monthly_ds__month + FROM {{ source_schema }}.fct_bookings_extended_monthly + GROUP BY + ds --- integration_test: name: query_dimension_only_with_constraint diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml new file mode 100644 index 0000000000..7b51c3e7e6 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_distinct_values_plan_with_join__dfp_0.xml @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/test/time/test_time_granularity_solver.py b/metricflow/test/time/test_time_granularity_solver.py index 025778df10..0fcdf0ce28 100644 --- a/metricflow/test/time/test_time_granularity_solver.py +++ b/metricflow/test/time/test_time_granularity_solver.py @@ -6,9 +6,11 @@ from dbt_semantic_interfaces.references import MetricReference from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity +from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver from metricflow.dataset.dataset import DataSet from metricflow.filters.time_constraint import TimeRangeConstraint from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup +from metricflow.test.fixtures.model_fixtures import ConsistentIdObjectRepository from metricflow.test.time.metric_time_dimension import MTD_SPEC_DAY, MTD_SPEC_MONTH from metricflow.time.time_granularity_solver import ( PartialTimeDimensionSpec, @@ -89,19 +91,31 @@ def test_validate_day_granularity_for_day_and_month_metric( # noqa: D PARTIAL_PTD_SPEC = PartialTimeDimensionSpec(element_name=DataSet.metric_time_dimension_name(), entity_links=()) -def test_granularity_solution_for_day_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D +def test_granularity_solution_for_day_metric( # noqa: D + time_granularity_solver: TimeGranularitySolver, + node_output_resolver: DataflowPlanNodeOutputDataSetResolver, + consistent_id_object_repository: ConsistentIdObjectRepository, +) -> None: assert time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs( metric_references=[MetricReference(element_name="bookings")], partial_time_dimension_specs=[PARTIAL_PTD_SPEC], + node_output_resolver=node_output_resolver, + read_nodes=list(consistent_id_object_repository.simple_model_read_nodes.values()), ) == { PARTIAL_PTD_SPEC: MTD_SPEC_DAY, } -def test_granularity_solution_for_month_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D +def test_granularity_solution_for_month_metric( # noqa: D + time_granularity_solver: TimeGranularitySolver, + node_output_resolver: DataflowPlanNodeOutputDataSetResolver, + consistent_id_object_repository: ConsistentIdObjectRepository, +) -> None: assert time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs( metric_references=[MetricReference(element_name="bookings_monthly")], partial_time_dimension_specs=[PARTIAL_PTD_SPEC], + node_output_resolver=node_output_resolver, + read_nodes=list(consistent_id_object_repository.simple_model_read_nodes.values()), ) == { PARTIAL_PTD_SPEC: MTD_SPEC_MONTH, } @@ -109,10 +123,14 @@ def test_granularity_solution_for_month_metric(time_granularity_solver: TimeGran def test_granularity_solution_for_day_and_month_metrics( # noqa: D time_granularity_solver: TimeGranularitySolver, + node_output_resolver: DataflowPlanNodeOutputDataSetResolver, + consistent_id_object_repository: ConsistentIdObjectRepository, ) -> None: assert time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs( metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="bookings_monthly")], partial_time_dimension_specs=[PARTIAL_PTD_SPEC], + node_output_resolver=node_output_resolver, + read_nodes=list(consistent_id_object_repository.simple_model_read_nodes.values()), ) == {PARTIAL_PTD_SPEC: MTD_SPEC_MONTH} diff --git a/metricflow/time/time_granularity_solver.py b/metricflow/time/time_granularity_solver.py index 004e476f35..1a3bbe62d4 100644 --- a/metricflow/time/time_granularity_solver.py +++ b/metricflow/time/time_granularity_solver.py @@ -14,8 +14,11 @@ ) from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity +from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver +from metricflow.dataflow.dataflow_plan import BaseOutput from metricflow.filters.time_constraint import TimeRangeConstraint from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup +from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName from metricflow.specs.specs import ( TimeDimensionSpec, ) @@ -100,60 +103,100 @@ def resolve_granularity_for_partial_time_dimension_specs( self, metric_references: Sequence[MetricReference], partial_time_dimension_specs: Sequence[PartialTimeDimensionSpec], + read_nodes: Sequence[BaseOutput], + node_output_resolver: DataflowPlanNodeOutputDataSetResolver, ) -> Dict[PartialTimeDimensionSpec, TimeDimensionSpec]: """Figure out the lowest granularity possible for the partially specified time dimension specs. Returns a dictionary that maps how the partial time dimension spec should be turned into a time dimension spec. """ - if not partial_time_dimension_specs: - return {} - - if metric_references: - result: Dict[PartialTimeDimensionSpec, TimeDimensionSpec] = {} - for partial_time_dimension_spec in partial_time_dimension_specs: - minimum_time_granularity = self.find_minimum_granularity_for_partial_time_dimension_spec( - partial_time_dimension_spec=partial_time_dimension_spec, metric_references=metric_references - ) - result[partial_time_dimension_spec] = TimeDimensionSpec( - element_name=partial_time_dimension_spec.element_name, - entity_links=partial_time_dimension_spec.entity_links, - time_granularity=minimum_time_granularity, - date_part=partial_time_dimension_spec.date_part, - ) - return result - else: - raise NotImplementedError # find minimum granularity for time dimension + result: Dict[PartialTimeDimensionSpec, TimeDimensionSpec] = {} + + for partial_time_dimension_spec in partial_time_dimension_specs: + minimum_time_granularity = self.find_minimum_granularity_for_partial_time_dimension_spec( + partial_time_dimension_spec=partial_time_dimension_spec, + metric_references=metric_references, + read_nodes=read_nodes, + node_output_resolver=node_output_resolver, + ) + result[partial_time_dimension_spec] = TimeDimensionSpec( + element_name=partial_time_dimension_spec.element_name, + entity_links=partial_time_dimension_spec.entity_links, + time_granularity=minimum_time_granularity, + date_part=partial_time_dimension_spec.date_part, + ) + return result def find_minimum_granularity_for_partial_time_dimension_spec( - self, partial_time_dimension_spec: PartialTimeDimensionSpec, metric_references: Sequence[MetricReference] + self, + partial_time_dimension_spec: PartialTimeDimensionSpec, + metric_references: Sequence[MetricReference], + read_nodes: Sequence[BaseOutput], + node_output_resolver: DataflowPlanNodeOutputDataSetResolver, ) -> TimeGranularity: """Find minimum granularity allowed for time dimension when queried with given metrics.""" - valid_group_by_elements = self._semantic_manifest_lookup.metric_lookup.linkable_set_for_metrics( - metric_references=metric_references, - ) - minimum_time_granularity: Optional[TimeGranularity] = None - for path_key in valid_group_by_elements.path_key_to_linkable_dimensions: - if ( - path_key.element_name == partial_time_dimension_spec.element_name - and path_key.entity_links == partial_time_dimension_spec.entity_links - and path_key.time_granularity is not None - ): - minimum_time_granularity = ( - path_key.time_granularity - if minimum_time_granularity is None - else min(minimum_time_granularity, path_key.time_granularity) - ) - if not minimum_time_granularity: - raise RequestTimeGranularityException( - f"Unable to resolve the time dimension spec for {partial_time_dimension_spec}. " - f"Valid group by elements are:\n" - f"{pformat_big_objects([spec.qualified_name for spec in valid_group_by_elements.as_spec_set.as_tuple])}" + if metric_references: + valid_group_by_elements = self._semantic_manifest_lookup.metric_lookup.linkable_set_for_metrics( + metric_references=metric_references, ) + for path_key in valid_group_by_elements.path_key_to_linkable_dimensions: + if ( + path_key.element_name == partial_time_dimension_spec.element_name + and path_key.entity_links == partial_time_dimension_spec.entity_links + and path_key.time_granularity is not None + ): + minimum_time_granularity = ( + path_key.time_granularity + if minimum_time_granularity is None + else min(minimum_time_granularity, path_key.time_granularity) + ) + if not minimum_time_granularity: + raise RequestTimeGranularityException( + f"Unable to resolve the time dimension spec for {partial_time_dimension_spec}. " + f"Valid group by elements are:\n" + f"{pformat_big_objects([spec.qualified_name for spec in valid_group_by_elements.as_spec_set.as_tuple])}" + ) + else: + minimum_time_granularity = self.get_min_granularity_for_partial_time_dimension_without_metrics( + read_nodes=read_nodes, + node_output_resolver=node_output_resolver, + partial_time_dimension_spec=partial_time_dimension_spec, + ) + if not minimum_time_granularity: + raise RequestTimeGranularityException( + f"Unable to resolve the time dimension spec for {partial_time_dimension_spec}. " + ) return minimum_time_granularity + def get_min_granularity_for_partial_time_dimension_without_metrics( + self, + read_nodes: Sequence[BaseOutput], + node_output_resolver: DataflowPlanNodeOutputDataSetResolver, + partial_time_dimension_spec: PartialTimeDimensionSpec, + ) -> Optional[TimeGranularity]: + """Find the minimum.""" + granularity_free_qualified_name = StructuredLinkableSpecName( + entity_link_names=tuple( + [entity_link.element_name for entity_link in partial_time_dimension_spec.entity_links] + ), + element_name=partial_time_dimension_spec.element_name, + ).granularity_free_qualified_name + for read_node in read_nodes: + output_data_set = node_output_resolver.get_output_data_set(read_node) + for time_dimension_instance in output_data_set.instance_set.time_dimension_instances: + if time_dimension_instance.spec.date_part: + continue + time_dim_name_without_granularity = StructuredLinkableSpecName.from_name( + time_dimension_instance.spec.qualified_name + ).granularity_free_qualified_name + if time_dim_name_without_granularity == granularity_free_qualified_name: + return time_dimension_instance.spec.time_granularity + + return None + def adjust_time_range_to_granularity( self, time_range_constraint: TimeRangeConstraint, time_granularity: TimeGranularity ) -> TimeRangeConstraint: From 42d4e270892a83eb1054f04aa5b620bf99853186 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 23:10:14 -0700 Subject: [PATCH 19/19] Handle TODOs --- metricflow/plan_conversion/node_processor.py | 1 - metricflow/query/query_parser.py | 4 +++- metricflow/test/query/test_query_parser.py | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/metricflow/plan_conversion/node_processor.py b/metricflow/plan_conversion/node_processor.py index ac66c35737..1b94e4ceac 100644 --- a/metricflow/plan_conversion/node_processor.py +++ b/metricflow/plan_conversion/node_processor.py @@ -85,7 +85,6 @@ def __init__( # noqa: D self._semantic_model_lookup = semantic_model_lookup self._join_evaluator = SemanticModelJoinEvaluator(semantic_model_lookup) - # TODO: add test with time constraint def add_time_range_constraint( self, source_nodes: Sequence[BaseOutput], diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 1e1827b613..0192bed0e9 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -230,7 +230,6 @@ def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricRe "dimension 'metric_time'." ) - # TODO: write tests for invalid linkable specs - should error def _validate_linkable_specs_for_metrics( self, metric_references: Tuple[MetricReference, ...], @@ -302,6 +301,9 @@ def _construct_metric_specs_for_query( def _get_metric_names( self, metric_names: Optional[Sequence[str]], metrics: Optional[Sequence[MetricQueryParameter]] ) -> Sequence[str]: + if not (metric_names or metrics): + return [] + assert_exactly_one_arg_set(metric_names=metric_names, metrics=metrics) return metric_names if metric_names else [m.name for m in metrics] if metrics else [] diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index 51f272879c..481e25ca1e 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -191,6 +191,9 @@ def test_query_parser(bookings_query_parser: MetricFlowQueryParser) -> None: # ), ) + with pytest.raises(UnableToSatisfyQueryError): + bookings_query_parser.parse_and_validate_query(group_by_names=["random_stuff"]) + def test_query_parser_with_object_params(bookings_query_parser: MetricFlowQueryParser) -> None: # noqa: D Metric = namedtuple("Metric", ["name", "descending"])