diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index 61ac8811d8..8a29e1fd7a 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -32,6 +32,7 @@ JoinOverTimeRangeNode, JoinToBaseOutputNode, JoinToTimeSpineNode, + MetricTimeDimensionTransformNode, OrderByLimitNode, ReadSqlSourceNode, SemiAdditiveJoinNode, @@ -44,12 +45,16 @@ from metricflow.dataflow.optimizer.dataflow_plan_optimizer import DataflowPlanOptimizer from metricflow.dataflow.sql_table import SqlTable from metricflow.dataset.dataset import DataSet +from metricflow.dataset.sql_dataset import SqlDataSet from metricflow.errors.errors import UnableToSatisfyQueryError from metricflow.filters.time_constraint import TimeRangeConstraint +from metricflow.instances import InstanceSet, TimeDimensionInstance from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup +from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName from metricflow.plan_conversion.column_resolver import DunderColumnAssociationResolver from metricflow.plan_conversion.node_processor import PreJoinNodeProcessor -from metricflow.specs.column_assoc import ColumnAssociationResolver +from metricflow.plan_conversion.time_spine import TIME_SPINE_DATA_SET_DESCRIPTION +from metricflow.specs.column_assoc import ColumnAssociation, ColumnAssociationResolver, SingleColumnCorrelationKey from metricflow.specs.specs import ( InstanceSpecSet, LinkableInstanceSpec, @@ -64,7 +69,8 @@ TimeDimensionSpec, WhereFilterSpec, ) -from metricflow.sql.sql_plan import SqlJoinType +from metricflow.sql.sql_exprs import SqlColumnReference, SqlColumnReferenceExpression, SqlDateTruncExpression +from metricflow.sql.sql_plan import SqlJoinType, SqlSelectColumn, SqlSelectStatementNode, SqlTableFromClauseNode logger = logging.getLogger(__name__) @@ -148,6 +154,7 @@ def __init__( # noqa: D ) -> None: self._semantic_model_lookup = semantic_manifest_lookup.semantic_model_lookup self._metric_lookup = semantic_manifest_lookup.metric_lookup + self._time_spine_source = semantic_manifest_lookup.time_spine_source self._metric_time_dimension_reference = DataSet.metric_time_dimension_reference() self._source_nodes = source_nodes self._read_nodes = read_nodes @@ -408,18 +415,18 @@ def _select_source_nodes_with_measures( def _select_read_nodes_with_linkable_specs( self, linkable_specs: LinkableSpecSet, read_nodes: Sequence[ReadSqlSourceNode] - ) -> Dict[BaseOutput, Set[LinkableInstanceSpec]]: + ) -> List[ReadSqlSourceNode]: """Find source nodes with requested linkable specs and no measures.""" - nodes_to_linkable_specs: Dict[BaseOutput, Set[LinkableInstanceSpec]] = {} - linkable_specs_set = set(linkable_specs.as_tuple) + selected_nodes: List[ReadSqlSourceNode] = [] + requested_linkable_specs_set = set(linkable_specs.as_tuple) for read_node in read_nodes: output_spec_set = self._node_data_set_resolver.get_output_data_set(read_node).instance_set.spec_set - linkable_specs_in_node = set(output_spec_set.linkable_specs) - requested_linkable_specs_in_node = linkable_specs_set.intersection(linkable_specs_in_node) + all_linkable_specs_in_node = set(output_spec_set.linkable_specs) + requested_linkable_specs_in_node = requested_linkable_specs_set.intersection(all_linkable_specs_in_node) if requested_linkable_specs_in_node: - nodes_to_linkable_specs[read_node] = requested_linkable_specs_in_node + selected_nodes.append(read_node) - return nodes_to_linkable_specs + return selected_nodes def _find_non_additive_dimension_in_linkable_specs( self, @@ -473,6 +480,70 @@ def _build_measure_spec_properties(self, measure_specs: Sequence[MeasureSpec]) - non_additive_dimension_spec=non_additive_dimension_spec, ) + # TODO: this should live somewhere else. Figure out where makes sense + def _create_metric_time_node_from_time_spine(self) -> MetricTimeDimensionTransformNode: + """Build a ReadSqlSourceNode that represents reading from the time spine table.""" + time_spine_source = self._time_spine_source + from_source_alias = IdGeneratorRegistry.for_class(self.__class__).create_id("time_spine_src") + + # TODO: add date part to instances & select columns. Can we use the same logic as elsewhere?? + time_spine_instances: List[TimeDimensionInstance] = [] + select_columns: List[SqlSelectColumn] = [] + for granularity in TimeGranularity: + if granularity.to_int() >= time_spine_source.time_column_granularity.to_int(): + column_alias = StructuredLinkableSpecName( + entity_link_names=(), + element_name=time_spine_source.time_column_name, + time_granularity=granularity, + ).qualified_name + time_spine_instance = TimeDimensionInstance( + defined_from=(), + associated_columns=( + ColumnAssociation( + column_name=column_alias, + single_column_correlation_key=SingleColumnCorrelationKey(), + ), + ), + spec=TimeDimensionSpec( + element_name=time_spine_source.time_column_name, entity_links=(), time_granularity=granularity + ), + ) + time_spine_instances.append(time_spine_instance) + select_column = SqlSelectColumn( + SqlDateTruncExpression( + time_granularity=granularity, + arg=SqlColumnReferenceExpression( + SqlColumnReference( + table_alias=from_source_alias, + column_name=time_spine_source.time_column_name, + ), + ), + ), + column_alias=column_alias, + ) + select_columns.append(select_column) + + time_spine_instance_set = InstanceSet(time_dimension_instances=tuple(time_spine_instances)) + time_spine_data_set = SqlDataSet( + instance_set=time_spine_instance_set, + sql_select_node=SqlSelectStatementNode( + description=TIME_SPINE_DATA_SET_DESCRIPTION, + select_columns=tuple(select_columns), + from_source=SqlTableFromClauseNode(sql_table=time_spine_source.spine_table), + from_source_alias=from_source_alias, + joins_descs=(), + group_bys=(), + order_bys=(), + ), + ) + # need this if we have above?? + return MetricTimeDimensionTransformNode( + parent_node=ReadSqlSourceNode(data_set=time_spine_data_set), + aggregation_time_dimension_reference=TimeDimensionReference( + element_name=time_spine_source.time_column_name + ), + ) + def _find_dataflow_recipe( self, linkable_spec_set: LinkableSpecSet, @@ -480,24 +551,32 @@ def _find_dataflow_recipe( time_range_constraint: Optional[TimeRangeConstraint] = None, ) -> Optional[DataflowRecipe]: linkable_specs = linkable_spec_set.as_tuple + potential_source_nodes: Sequence[BaseOutput] if measure_spec_properties: source_nodes = self._source_nodes - potential_source_nodes: Sequence[BaseOutput] = self._select_source_nodes_with_measures( + potential_source_nodes = self._select_source_nodes_with_measures( measure_specs=set(measure_spec_properties.measure_specs), source_nodes=source_nodes ) else: # Only read nodes can be source nodes for queries without measures source_nodes = list(self._read_nodes) - source_nodes_to_linkable_specs = self._select_read_nodes_with_linkable_specs( - linkable_specs=linkable_spec_set, read_nodes=source_nodes - ) - # Add time_spine to potential source nodes w/ metric_time as linkable spec - # Maybe only do this if requested - potential_source_nodes = list(source_nodes_to_linkable_specs.keys()) + potential_source_nodes = self._select_read_nodes_with_linkable_specs( + linkable_specs=linkable_spec_set, read_nodes=self._read_nodes + ) + # `metric_time` does not exist if there is no metric in the query. + # In that case, we'll use the time spine table to represent `metric_time` values. + requested_metric_time_specs = [ + time_dimension_spec + for time_dimension_spec in linkable_spec_set.time_dimension_specs + if time_dimension_spec.element_name == self._metric_time_dimension_reference.element_name + ] + if requested_metric_time_specs: + # Add time_spine to potential source nodes for requested metric_time specs + time_spine_node = self._create_metric_time_node_from_time_spine() + potential_source_nodes = list(potential_source_nodes) + [time_spine_node] logger.info(f"There are {len(potential_source_nodes)} potential source nodes") - logger.info(f"Starting search with {len(source_nodes)} source nodes") start_time = time.time() node_processor = PreJoinNodeProcessor( diff --git a/metricflow/dataflow/builder/node_evaluator.py b/metricflow/dataflow/builder/node_evaluator.py index 5ab12bed9f..7490b05eb6 100644 --- a/metricflow/dataflow/builder/node_evaluator.py +++ b/metricflow/dataflow/builder/node_evaluator.py @@ -41,6 +41,7 @@ LinkableInstanceSpec, LinklessEntitySpec, ) +from metricflow.test.time.metric_time_dimension import MTD logger = logging.getLogger(__name__) @@ -322,11 +323,11 @@ def evaluate_node( data_set_linkable_specs = candidate_spec_set.linkable_specs - # These are linkable specs in the same data set as the measure. Those are considered "local". - local_linkable_specs = [] + # These are linkable specs in the start node data set. Those are considered "local". + local_linkable_specs: List[LinkableInstanceSpec] = [] # These are linkable specs that aren't in the data set, but they might be able to be joined in. - possibly_joinable_linkable_specs = [] + possibly_joinable_linkable_specs: List[LinkableInstanceSpec] = [] # Group required_linkable_specs into local / un-joinable / or possibly joinable. unjoinable_linkable_specs = [] @@ -364,7 +365,10 @@ def evaluate_node( "There are no more candidate nodes that can be joined, but not all linkable specs have " "been acquired." ) - unjoinable_linkable_specs.extend(possibly_joinable_linkable_specs) + if all(spec.element_name == MTD for spec in possibly_joinable_linkable_specs): + pass + else: + unjoinable_linkable_specs.extend(possibly_joinable_linkable_specs) break # Join the best candidate to realize the linkable specs diff --git a/metricflow/dataset/convert_semantic_model.py b/metricflow/dataset/convert_semantic_model.py index cc29f45bca..5f713c7bf6 100644 --- a/metricflow/dataset/convert_semantic_model.py +++ b/metricflow/dataset/convert_semantic_model.py @@ -16,7 +16,7 @@ from metricflow.aggregation_properties import AggregationState from metricflow.dag.id_generation import IdGeneratorRegistry from metricflow.dataflow.sql_table import SqlTable -from metricflow.dataset.semantic_model_adapter import SemanticModelDataSet +from metricflow.dataset.semantic_model_adapter import SemanticModelDataSet, SqlDataSet from metricflow.instances import ( DimensionInstance, EntityInstance, @@ -26,6 +26,7 @@ ) from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup from metricflow.model.spec_converters import MeasureConverter +from metricflow.plan_conversion.time_spine import TimeSpineSource from metricflow.specs.column_assoc import ColumnAssociationResolver from metricflow.specs.specs import ( DEFAULT_TIME_GRANULARITY, @@ -478,3 +479,8 @@ def create_sql_source_data_set(self, semantic_model: SemanticModel) -> SemanticM ), sql_select_node=select_statement_node, ) + + # move logic here? + def create_data_set_from_time_spine(self, time_spine_source: TimeSpineSource) -> SqlDataSet: + """Create a SQL source data set from time spine table.""" + pass diff --git a/metricflow/dataset/sql_dataset.py b/metricflow/dataset/sql_dataset.py index 4d054834a1..437faad279 100644 --- a/metricflow/dataset/sql_dataset.py +++ b/metricflow/dataset/sql_dataset.py @@ -28,6 +28,8 @@ def __init__(self, instance_set: InstanceSet, sql_select_node: SqlSelectStatemen self._sql_select_node = sql_select_node super().__init__(instance_set=instance_set) + # TODO: add optional __repr__ to display the table name or pass in custom name (time spine) + @property def sql_select_node(self) -> SqlSelectStatementNode: """Return a SELECT node that can be used to read data from the given SQL table or SQL query.""" diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index e7f238c686..31b4b360e0 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -67,7 +67,7 @@ ColumnEqualityDescription, SqlQueryPlanJoinBuilder, ) -from metricflow.plan_conversion.time_spine import TimeSpineSource +from metricflow.plan_conversion.time_spine import TIME_SPINE_DATA_SET_DESCRIPTION, TimeSpineSource from metricflow.protocols.sql_client import SqlEngine from metricflow.specs.column_assoc import ColumnAssociation, ColumnAssociationResolver, SingleColumnCorrelationKey from metricflow.specs.specs import ( @@ -185,10 +185,7 @@ def _make_time_spine_data_set( spec=metric_time_dimension_instance.spec, ), ) - time_spine_instance_set = InstanceSet( - time_dimension_instances=time_spine_instance, - ) - description = "Date Spine" + time_spine_instance_set = InstanceSet(time_dimension_instances=time_spine_instance) time_spine_table_alias = self._next_unique_table_alias() # If the requested granularity is the same as the granularity of the spine, do a direct select. @@ -196,8 +193,7 @@ def _make_time_spine_data_set( return SqlDataSet( instance_set=time_spine_instance_set, sql_select_node=SqlSelectStatementNode( - description=description, - # This creates select expressions for all columns referenced in the instance set. + description=TIME_SPINE_DATA_SET_DESCRIPTION, select_columns=( SqlSelectColumn( expr=SqlColumnReferenceExpression( @@ -242,8 +238,7 @@ def _make_time_spine_data_set( return SqlDataSet( instance_set=time_spine_instance_set, sql_select_node=SqlSelectStatementNode( - description=description, - # This creates select expressions for all columns referenced in the instance set. + description=TIME_SPINE_DATA_SET_DESCRIPTION, select_columns=select_columns, from_source=SqlTableFromClauseNode(sql_table=time_spine_source.spine_table), from_source_alias=time_spine_table_alias, @@ -1035,11 +1030,11 @@ def visit_metric_time_dimension_transform_node(self, node: MetricTimeDimensionTr if aggregation_time_dimension_for_measure == node.aggregation_time_dimension_reference: output_measure_instances.append(measure_instance) - if len(output_measure_instances) == 0: - raise RuntimeError( - f"No measure instances in the input source match the aggregation time dimension " - f"{node.aggregation_time_dimension_reference}. Check if the dataflow plan was constructed correctly." - ) + # if len(output_measure_instances) == 0: + # raise RuntimeError( + # f"No measure instances in the input source match the aggregation time dimension " + # f"{node.aggregation_time_dimension_reference}. Check if the dataflow plan was constructed correctly." + # ) # Find time dimension instances that refer to the same dimension as the one specified in the node. matching_time_dimension_instances = [] diff --git a/metricflow/plan_conversion/time_spine.py b/metricflow/plan_conversion/time_spine.py index 4e8830b226..71a5d773d8 100644 --- a/metricflow/plan_conversion/time_spine.py +++ b/metricflow/plan_conversion/time_spine.py @@ -9,6 +9,8 @@ logger = logging.getLogger(__name__) +TIME_SPINE_DATA_SET_DESCRIPTION = "Date Spine" + @dataclass(frozen=True) class TimeSpineSource: diff --git a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py index fdbe59fcbe..a307981c22 100644 --- a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py +++ b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py @@ -99,7 +99,6 @@ def test_primary_entity_dimension( # noqa: D def test_joined_plan( # noqa: D request: FixtureRequest, mf_test_session_state: MetricFlowTestSessionState, - column_association_resolver: ColumnAssociationResolver, dataflow_plan_builder: DataflowPlanBuilder, ) -> None: """Tests a plan getting a measure and a joined dimension.""" @@ -741,7 +740,7 @@ def test_common_semantic_model( # noqa: D MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings"), MetricSpec(element_name="booking_value")), dimension_specs=( - DataSet.metric_time_dimension_spec(TimeGranularity.DAY), + MTD_SPEC_DAY, DimensionSpec(element_name="country_latest", entity_links=(EntityReference("listing"),)), ), ) @@ -771,7 +770,7 @@ def test_derived_metric_offset_window( # noqa: D dataflow_plan = dataflow_plan_builder.build_plan( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings_5_day_lag"),), - time_dimension_specs=(DataSet.metric_time_dimension_spec(TimeGranularity.DAY),), + time_dimension_specs=(MTD_SPEC_DAY,), ) ) @@ -799,7 +798,7 @@ def test_derived_metric_offset_to_grain( # noqa: D dataflow_plan = dataflow_plan_builder.build_plan( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings_growth_since_start_of_month"),), - time_dimension_specs=(DataSet.metric_time_dimension_spec(TimeGranularity.DAY),), + time_dimension_specs=(MTD_SPEC_DAY,), ) ) @@ -853,7 +852,7 @@ def test_derived_offset_cumulative_metric( # noqa: D dataflow_plan = dataflow_plan_builder.build_plan( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="every_2_days_bookers_2_days_ago"),), - time_dimension_specs=(DataSet.metric_time_dimension_spec(TimeGranularity.DAY),), + time_dimension_specs=(MTD_SPEC_DAY,), ) ) @@ -879,7 +878,7 @@ def test_join_to_time_spine_with_metric_time( # noqa: D dataflow_plan = dataflow_plan_builder.build_plan( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings_fill_nulls_with_0"),), - time_dimension_specs=(DataSet.metric_time_dimension_spec(TimeGranularity.DAY),), + time_dimension_specs=(MTD_SPEC_DAY,), ) ) @@ -905,7 +904,7 @@ def test_join_to_time_spine_derived_metric( # noqa: D dataflow_plan = dataflow_plan_builder.build_plan( MetricFlowQuerySpec( metric_specs=(MetricSpec(element_name="bookings_growth_2_weeks_fill_nulls_with_0"),), - time_dimension_specs=(DataSet.metric_time_dimension_spec(TimeGranularity.DAY),), + time_dimension_specs=(MTD_SPEC_DAY,), ) ) @@ -972,3 +971,50 @@ def test_dont_join_to_time_spine_if_no_time_dimension_requested( # noqa: D mf_test_session_state=mf_test_session_state, dag_graph=dataflow_plan, ) + + +def test_metric_time_only( # noqa: D + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, +) -> None: + dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( + MetricFlowQuerySpec(time_dimension_specs=(MTD_SPEC_DAY,)) + ) + + assert_plan_snapshot_text_equal( + request=request, + mf_test_session_state=mf_test_session_state, + plan=dataflow_plan, + plan_snapshot_text=dataflow_plan_as_text(dataflow_plan), + ) + + display_graph_if_requested( + request=request, + mf_test_session_state=mf_test_session_state, + dag_graph=dataflow_plan, + ) + + +def test_metric_time_with_another_dimension( # noqa: D + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, +) -> None: + dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( + MetricFlowQuerySpec(time_dimension_specs=(MTD_SPEC_DAY,)) + ) + assert 0, "add another dimension to this test" + + assert_plan_snapshot_text_equal( + request=request, + mf_test_session_state=mf_test_session_state, + plan=dataflow_plan, + plan_snapshot_text=dataflow_plan_as_text(dataflow_plan), + ) + + display_graph_if_requested( + request=request, + mf_test_session_state=mf_test_session_state, + dag_graph=dataflow_plan, + ) diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index 12e008a665..e29db96694 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -139,7 +139,7 @@ integration_test: u.home_state_latest AS user__home_state_latest , l.is_lux AS listing__is_lux_latest FROM {{ source_schema }}.dim_listings_latest l - LEFT OUTER JOIN {{ source_schema }}.dim_users_latest u + FULL OUTER JOIN {{ source_schema }}.dim_users_latest u ON u.user_id = l.user_id GROUP BY u.home_state_latest @@ -189,7 +189,10 @@ integration_test: model: SIMPLE_MODEL group_bys: ["metric_time"] check_query: | - SELECT 1 + SELECT + {{ render_date_trunc("ds", TimeGranularity.DAY) }} AS metric_time__day + FROM {{ source_schema }}.mf_time_spine + GROUP BY metric_time__day --- integration_test: name: metric_time_week @@ -197,12 +200,15 @@ integration_test: model: SIMPLE_MODEL group_bys: ["metric_time__week"] check_query: | - SELECT 1 + SELECT + {{ render_date_trunc("ds", TimeGranularity.WEEK) }} AS metric_time__week + FROM {{ source_schema }}.mf_time_spine + GROUP BY metric_time__week --- integration_test: name: multiple_dimensions_with_metric_time description: Query metric_time along with a different dimension - model: SIMPLE_MODEL - group_bys: ["metric_time", "booking__monthly_ds"] + model: EXTENDED_DATE_MODEL + group_bys: ["metric_time__year", "booking__monthly_ds"] check_query: | SELECT 1 diff --git a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py index caebaf4b6d..303668deb4 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py @@ -1003,3 +1003,80 @@ def test_compute_metrics_node_ratio_from_multiple_semantic_models( sql_client=sql_client, node=dataflow_plan.sink_output_nodes[0].parent_node, ) + + +@pytest.mark.sql_engine_snapshot +def test_metric_time_only( + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, + dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, + sql_client: SqlClient, +) -> None: + """Tests querying only metric time.""" + dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( + query_spec=MetricFlowQuerySpec( + time_dimension_specs=(TimeDimensionSpec(element_name="metric_time", entity_links=()),), + ), + ) + + convert_and_check( + request=request, + mf_test_session_state=mf_test_session_state, + dataflow_to_sql_converter=dataflow_to_sql_converter, + sql_client=sql_client, + node=dataflow_plan.sink_output_nodes[0].parent_node, + ) + + +@pytest.mark.sql_engine_snapshot +def test_metric_time_quarter( + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, + dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, + sql_client: SqlClient, +) -> None: + """Tests querying only metric time.""" + dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( + query_spec=MetricFlowQuerySpec( + time_dimension_specs=( + TimeDimensionSpec(element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.YEAR), + ), + ), + ) + + convert_and_check( + request=request, + mf_test_session_state=mf_test_session_state, + dataflow_to_sql_converter=dataflow_to_sql_converter, + sql_client=sql_client, + node=dataflow_plan.sink_output_nodes[0].parent_node, + ) + + +@pytest.mark.sql_engine_snapshot +def test_metric_time_with_another_dimension( + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, + dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter, + sql_client: SqlClient, +) -> None: + """Tests querying only metric time.""" + dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( + query_spec=MetricFlowQuerySpec( + time_dimension_specs=( + TimeDimensionSpec(element_name="metric_time", entity_links=(), time_granularity=TimeGranularity.YEAR), + ), + dimension_specs=(DimensionSpec(element_name="country_latest", entity_links=(EntityReference("listing"),)),), + ), + ) + + convert_and_check( + request=request, + mf_test_session_state=mf_test_session_state, + dataflow_to_sql_converter=dataflow_to_sql_converter, + sql_client=sql_client, + node=dataflow_plan.sink_output_nodes[0].parent_node, + ) diff --git a/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_only__dfp_1.xml b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_only__dfp_1.xml new file mode 100644 index 0000000000..15092721af --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_plan_builder.py/DataflowPlan/test_metric_time_only__dfp_1.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_only__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_only__plan0.sql new file mode 100644 index 0000000000..2397c257d7 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_only__plan0.sql @@ -0,0 +1,30 @@ +-- Pass Only Elements: +-- ['metric_time__day'] +SELECT + subq_1.metric_time__day +FROM ( + -- Metric Time Dimension 'ds' + SELECT + subq_0.ds__day + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds__day AS metric_time__day + , subq_0.ds__week AS metric_time__week + , subq_0.ds__month AS metric_time__month + , subq_0.ds__quarter AS metric_time__quarter + , subq_0.ds__year AS metric_time__year + FROM ( + -- Date Spine + SELECT + DATE_TRUNC('day', time_spine_src_0.ds) AS ds__day + , DATE_TRUNC('week', time_spine_src_0.ds) AS ds__week + , DATE_TRUNC('month', time_spine_src_0.ds) AS ds__month + , DATE_TRUNC('quarter', time_spine_src_0.ds) AS ds__quarter + , DATE_TRUNC('year', time_spine_src_0.ds) AS ds__year + FROM ***************************.mf_time_spine time_spine_src_0 + ) subq_0 +) subq_1 +GROUP BY + subq_1.metric_time__day diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_only__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_only__plan0_optimized.sql new file mode 100644 index 0000000000..183f605710 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_only__plan0_optimized.sql @@ -0,0 +1,9 @@ +-- Date Spine +-- Metric Time Dimension 'ds' +-- Pass Only Elements: +-- ['metric_time__day'] +SELECT + DATE_TRUNC('day', ds) AS metric_time__day +FROM ***************************.mf_time_spine time_spine_src_0 +GROUP BY + DATE_TRUNC('day', ds) diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_quarter__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_quarter__plan0.sql new file mode 100644 index 0000000000..317cbc4af1 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_quarter__plan0.sql @@ -0,0 +1,30 @@ +-- Pass Only Elements: +-- ['metric_time__year'] +SELECT + subq_1.metric_time__year +FROM ( + -- Metric Time Dimension 'ds' + SELECT + subq_0.ds__day + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds__day AS metric_time__day + , subq_0.ds__week AS metric_time__week + , subq_0.ds__month AS metric_time__month + , subq_0.ds__quarter AS metric_time__quarter + , subq_0.ds__year AS metric_time__year + FROM ( + -- Date Spine + SELECT + DATE_TRUNC('day', time_spine_src_0.ds) AS ds__day + , DATE_TRUNC('week', time_spine_src_0.ds) AS ds__week + , DATE_TRUNC('month', time_spine_src_0.ds) AS ds__month + , DATE_TRUNC('quarter', time_spine_src_0.ds) AS ds__quarter + , DATE_TRUNC('year', time_spine_src_0.ds) AS ds__year + FROM ***************************.mf_time_spine time_spine_src_0 + ) subq_0 +) subq_1 +GROUP BY + subq_1.metric_time__year diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_quarter__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_quarter__plan0_optimized.sql new file mode 100644 index 0000000000..604fa276c5 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDB/test_metric_time_quarter__plan0_optimized.sql @@ -0,0 +1,9 @@ +-- Date Spine +-- Metric Time Dimension 'ds' +-- Pass Only Elements: +-- ['metric_time__year'] +SELECT + DATE_TRUNC('year', ds) AS metric_time__year +FROM ***************************.mf_time_spine time_spine_src_0 +GROUP BY + DATE_TRUNC('year', ds) diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_time_only__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_time_only__plan0.xml new file mode 100644 index 0000000000..7819df90c9 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_time_only__plan0.xml @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_time_quarter__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_time_quarter__plan0.xml new file mode 100644 index 0000000000..ed40e04214 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_time_quarter__plan0.xml @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/time/time_granularity_solver.py b/metricflow/time/time_granularity_solver.py index 55e33ca865..01b121349c 100644 --- a/metricflow/time/time_granularity_solver.py +++ b/metricflow/time/time_granularity_solver.py @@ -24,6 +24,7 @@ from metricflow.specs.specs import ( TimeDimensionSpec, ) +from metricflow.test.time.metric_time_dimension import MTD from metricflow.time.time_granularity import ( adjust_to_end_of_period, adjust_to_start_of_period, @@ -84,6 +85,11 @@ def __init__( # noqa: D self._time_dimension_names_to_supported_granularities[granularity_free_qualified_name].add( time_dimension_instance.spec.time_granularity ) + self._time_dimension_names_to_supported_granularities[MTD] = { + granularity + for granularity in TimeGranularity + if granularity.to_int() >= semantic_manifest_lookup.time_spine_source.time_column_granularity.to_int() + } def validate_time_granularity( self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec]