From 966456be4beffd8234e00797c38494ffcfa0b426 Mon Sep 17 00:00:00 2001 From: William Deng Date: Tue, 22 Nov 2022 11:48:29 -0500 Subject: [PATCH 1/8] replace SqlFunctionExpression to SqlAggregateFunctionExpression --- metricflow/plan_conversion/dataflow_to_sql.py | 4 ++-- .../plan_conversion/instance_converters.py | 4 ++-- metricflow/plan_conversion/spec_transforms.py | 4 ++-- metricflow/sql/render/expr_renderer.py | 4 ++-- metricflow/sql/sql_exprs.py | 22 ++++++++++++------- ...select_columns_with_measures_aggregated.py | 16 +++++++------- .../test_rewriting_sub_query_reducer.py | 10 ++++----- metricflow/test/sql/test_sql_expr_render.py | 12 +++++----- metricflow/test/sql/test_sql_plan_render.py | 6 +++-- 9 files changed, 45 insertions(+), 37 deletions(-) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index c931edacb0..fa06d90214 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -89,7 +89,7 @@ SqlDateTruncExpression, SqlStringLiteralExpression, SqlBetweenExpression, - SqlFunctionExpression, + SqlAggregateFunctionExpression, ) from metricflow.sql.sql_plan import ( SqlQueryPlan, @@ -1168,7 +1168,7 @@ def visit_semi_additive_join_node(self, node: SemiAdditiveJoinNode) -> SqlDataSe aggregation_state=AggregationState.COMPLETE, ).column_name time_dimension_select_column = SqlSelectColumn( - expr=SqlFunctionExpression.from_aggregation_type( + expr=SqlAggregateFunctionExpression.from_aggregation_type( node.agg_by_function, SqlColumnReferenceExpression( SqlColumnReference( diff --git a/metricflow/plan_conversion/instance_converters.py b/metricflow/plan_conversion/instance_converters.py index 3bfb359da5..4f8b3c9712 100644 --- a/metricflow/plan_conversion/instance_converters.py +++ b/metricflow/plan_conversion/instance_converters.py @@ -39,7 +39,7 @@ from metricflow.sql.sql_exprs import ( SqlColumnReferenceExpression, SqlColumnReference, - SqlFunctionExpression, + SqlAggregateFunctionExpression, ) from metricflow.sql.sql_plan import SqlSelectColumn from metricflow.time.time_granularity import TimeGranularity @@ -205,7 +205,7 @@ def _make_sql_column_expression_to_aggregate_measure( # noqa: D SqlColumnReference(self._table_alias, column_name_in_table) ) - expression_to_aggregate_measure = SqlFunctionExpression.from_aggregation_type( + expression_to_aggregate_measure = SqlAggregateFunctionExpression.from_aggregation_type( aggregation_type=aggregation_type, sql_column_expression=expression_to_get_measure ) diff --git a/metricflow/plan_conversion/spec_transforms.py b/metricflow/plan_conversion/spec_transforms.py index 0830be934a..d3175ab8c6 100644 --- a/metricflow/plan_conversion/spec_transforms.py +++ b/metricflow/plan_conversion/spec_transforms.py @@ -14,7 +14,7 @@ SqlComparison, SqlColumnReferenceExpression, SqlColumnReference, - SqlFunctionExpression, + SqlAggregateFunctionExpression, SqlFunction, ) from metricflow.sql.sql_plan import SqlSelectColumn @@ -50,7 +50,7 @@ def _make_coalesced_expr(table_aliases: Sequence[str], column_alias: str) -> Sql ) ) ) - return SqlFunctionExpression( + return SqlAggregateFunctionExpression( sql_function=SqlFunction.COALESCE, sql_function_args=columns_to_coalesce, ) diff --git a/metricflow/sql/render/expr_renderer.py b/metricflow/sql/render/expr_renderer.py index 0d1da4a819..7781ac8f9c 100644 --- a/metricflow/sql/render/expr_renderer.py +++ b/metricflow/sql/render/expr_renderer.py @@ -17,7 +17,7 @@ SqlComparisonExpression, SqlExpressionNode, SqlFunction, - SqlFunctionExpression, + SqlAggregateFunctionExpression, SqlNullExpression, SqlLogicalExpression, SqlStringLiteralExpression, @@ -107,7 +107,7 @@ def visit_comparison_expr(self, node: SqlComparisonExpression) -> SqlExpressionR execution_parameters=combined_params, ) - def visit_function_expr(self, node: SqlFunctionExpression) -> SqlExpressionRenderResult: # noqa: D + def visit_function_expr(self, node: SqlAggregateFunctionExpression) -> SqlExpressionRenderResult: # noqa: D """Render a function call like CONCAT(a, b)""" args_rendered = [self.render_sql_expr(x) for x in node.sql_function_args] combined_params = SqlBindParameters() diff --git a/metricflow/sql/sql_exprs.py b/metricflow/sql/sql_exprs.py index 58ec4bbefa..17db1c1767 100644 --- a/metricflow/sql/sql_exprs.py +++ b/metricflow/sql/sql_exprs.py @@ -113,7 +113,7 @@ class SqlExpressionTreeLineage: """Captures the lineage of an expression node - contains itself and all ancestor nodes.""" string_exprs: Tuple[SqlStringExpression, ...] = () - function_exprs: Tuple[SqlFunctionExpression, ...] = () + function_exprs: Tuple[SqlAggregateFunctionExpression, ...] = () column_reference_exprs: Tuple[SqlColumnReferenceExpression, ...] = () column_alias_reference_exprs: Tuple[SqlColumnAliasReferenceExpression, ...] = () other_exprs: Tuple[SqlExpressionNode, ...] = () @@ -177,7 +177,7 @@ def visit_comparison_expr(self, node: SqlComparisonExpression) -> VisitorOutputT pass @abstractmethod - def visit_function_expr(self, node: SqlFunctionExpression) -> VisitorOutputT: # noqa: D + def visit_function_expr(self, node: SqlAggregateFunctionExpression) -> VisitorOutputT: # noqa: D pass @abstractmethod @@ -688,14 +688,20 @@ def from_aggregation_type(aggregation_type: AggregationType) -> SqlFunction: class SqlFunctionExpression(SqlExpressionNode): - """A function expression like SUM(1).""" + """Denotes a function expression in SQL.""" + + pass + + +class SqlAggregateFunctionExpression(SqlFunctionExpression): + """An aggregate function expression like SUM(1).""" @staticmethod def from_aggregation_type( aggregation_type: AggregationType, sql_column_expression: SqlColumnReferenceExpression - ) -> SqlFunctionExpression: + ) -> SqlAggregateFunctionExpression: """Given the aggregation type, return an SQL function expression that does that aggregation on the given col.""" - return SqlFunctionExpression( + return SqlAggregateFunctionExpression( sql_function=SqlFunction.from_aggregation_type(aggregation_type=aggregation_type), sql_function_args=[sql_column_expression], ) @@ -751,7 +757,7 @@ def rewrite( # noqa: D column_replacements: Optional[SqlColumnReplacements] = None, should_render_table_alias: Optional[bool] = None, ) -> SqlExpressionNode: - return SqlFunctionExpression( + return SqlAggregateFunctionExpression( sql_function=self.sql_function, sql_function_args=[ x.rewrite(column_replacements, should_render_table_alias) for x in self.sql_function_args @@ -765,7 +771,7 @@ def lineage(self) -> SqlExpressionTreeLineage: # noqa: D ) def matches(self, other: SqlExpressionNode) -> bool: # noqa: D - if not isinstance(other, SqlFunctionExpression): + if not isinstance(other, SqlAggregateFunctionExpression): return False return self.sql_function == other.sql_function and self._parents_match(other) @@ -1094,7 +1100,7 @@ class SqlRatioComputationExpression(SqlExpressionNode): """Node for expressing Ratio metrics to allow for appropriate casting to float/double in each engine In future we might wish to break this up into a set of nodes, e.g., SqlCastExpression and SqlMathExpression - or even add CAST to SqlFunctionExpression. However, at this time the only mathematical operation we encode + or even add CAST to SqlAggregateFunctionExpression. However, at this time the only mathematical operation we encode is division, and we only use that for ratios. Similarly, the only times we do typecasting are when we are coercing timestamps (already handled) or computing ratio metrics. """ diff --git a/metricflow/test/plan_conversion/instance_converters/test_create_select_columns_with_measures_aggregated.py b/metricflow/test/plan_conversion/instance_converters/test_create_select_columns_with_measures_aggregated.py index 9bae6afd41..eb1b0baca8 100644 --- a/metricflow/test/plan_conversion/instance_converters/test_create_select_columns_with_measures_aggregated.py +++ b/metricflow/test/plan_conversion/instance_converters/test_create_select_columns_with_measures_aggregated.py @@ -9,7 +9,7 @@ from metricflow.specs import MeasureSpec, MetricInputMeasureSpec from metricflow.sql.sql_exprs import ( SqlFunction, - SqlFunctionExpression, + SqlAggregateFunctionExpression, ) from metricflow.test.fixtures.model_fixtures import ConsistentIdObjectRepository @@ -46,7 +46,7 @@ def test_sum_aggregation( assert len(select_column_set.measure_columns) == 1 measure_column = select_column_set.measure_columns[0] expr = measure_column.expr - assert isinstance(expr, SqlFunctionExpression) + assert isinstance(expr, SqlAggregateFunctionExpression) assert expr.sql_function == SqlFunction.SUM @@ -68,7 +68,7 @@ def test_sum_boolean_aggregation( assert len(select_column_set.measure_columns) == 1 measure_column = select_column_set.measure_columns[0] expr = measure_column.expr - assert isinstance(expr, SqlFunctionExpression) + assert isinstance(expr, SqlAggregateFunctionExpression) # The SUM_BOOLEAN aggregation type is transformed to SUM at model parsing time assert expr.sql_function == SqlFunction.SUM @@ -91,7 +91,7 @@ def test_avg_aggregation( assert len(select_column_set.measure_columns) == 1 measure_column = select_column_set.measure_columns[0] expr = measure_column.expr - assert isinstance(expr, SqlFunctionExpression) + assert isinstance(expr, SqlAggregateFunctionExpression) assert expr.sql_function == SqlFunction.AVERAGE @@ -113,7 +113,7 @@ def test_count_distinct_aggregation( assert len(select_column_set.measure_columns) == 1 measure_column = select_column_set.measure_columns[0] expr = measure_column.expr - assert isinstance(expr, SqlFunctionExpression) + assert isinstance(expr, SqlAggregateFunctionExpression) assert expr.sql_function == SqlFunction.COUNT_DISTINCT @@ -135,7 +135,7 @@ def test_max_aggregation( assert len(select_column_set.measure_columns) == 1 measure_column = select_column_set.measure_columns[0] expr = measure_column.expr - assert isinstance(expr, SqlFunctionExpression) + assert isinstance(expr, SqlAggregateFunctionExpression) assert expr.sql_function == SqlFunction.MAX @@ -157,7 +157,7 @@ def test_min_aggregation( assert len(select_column_set.measure_columns) == 1 measure_column = select_column_set.measure_columns[0] expr = measure_column.expr - assert isinstance(expr, SqlFunctionExpression) + assert isinstance(expr, SqlAggregateFunctionExpression) assert expr.sql_function == SqlFunction.MIN @@ -179,6 +179,6 @@ def test_aliased_sum( assert len(select_column_set.measure_columns) == 1 measure_column = select_column_set.measure_columns[0] expr = measure_column.expr - assert isinstance(expr, SqlFunctionExpression) + assert isinstance(expr, SqlAggregateFunctionExpression) assert expr.sql_function == SqlFunction.SUM assert measure_column.column_alias == "bvalue" diff --git a/metricflow/test/sql/optimizer/test_rewriting_sub_query_reducer.py b/metricflow/test/sql/optimizer/test_rewriting_sub_query_reducer.py index 9c88d3702d..4d0eb87a08 100644 --- a/metricflow/test/sql/optimizer/test_rewriting_sub_query_reducer.py +++ b/metricflow/test/sql/optimizer/test_rewriting_sub_query_reducer.py @@ -9,7 +9,7 @@ SqlComparisonExpression, SqlComparison, SqlStringLiteralExpression, - SqlFunctionExpression, + SqlAggregateFunctionExpression, SqlFunction, SqlStringExpression, ) @@ -56,7 +56,7 @@ def base_select_statement() -> SqlSelectStatementNode: description="src3", select_columns=( SqlSelectColumn( - expr=SqlFunctionExpression( + expr=SqlAggregateFunctionExpression( sql_function=SqlFunction.SUM, sql_function_args=[ SqlColumnReferenceExpression( @@ -220,7 +220,7 @@ def join_select_statement() -> SqlSelectStatementNode: description="query", select_columns=( SqlSelectColumn( - expr=SqlFunctionExpression( + expr=SqlAggregateFunctionExpression( sql_function=SqlFunction.SUM, sql_function_args=[ SqlColumnReferenceExpression( @@ -391,7 +391,7 @@ def colliding_select_statement() -> SqlSelectStatementNode: description="query", select_columns=( SqlSelectColumn( - expr=SqlFunctionExpression( + expr=SqlAggregateFunctionExpression( sql_function=SqlFunction.SUM, sql_function_args=[ SqlColumnReferenceExpression( @@ -572,7 +572,7 @@ def reduce_all_join_select_statement() -> SqlSelectStatementNode: description="query", select_columns=( SqlSelectColumn( - expr=SqlFunctionExpression( + expr=SqlAggregateFunctionExpression( sql_function=SqlFunction.SUM, sql_function_args=[ SqlColumnReferenceExpression( diff --git a/metricflow/test/sql/test_sql_expr_render.py b/metricflow/test/sql/test_sql_expr_render.py index 5c67335c0d..33f4d6aecc 100644 --- a/metricflow/test/sql/test_sql_expr_render.py +++ b/metricflow/test/sql/test_sql_expr_render.py @@ -10,7 +10,7 @@ SqlColumnReference, SqlComparisonExpression, SqlComparison, - SqlFunctionExpression, + SqlAggregateFunctionExpression, SqlFunction, SqlNullExpression, SqlLogicalExpression, @@ -72,7 +72,7 @@ def test_require_parenthesis(default_expr_renderer: DefaultSqlExpressionRenderer def test_function_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> None: # noqa: D actual = default_expr_renderer.render_sql_expr( - SqlFunctionExpression( + SqlAggregateFunctionExpression( sql_function=SqlFunction.SUM, sql_function_args=[ SqlColumnReferenceExpression(SqlColumnReference("my_table", "a")), @@ -86,7 +86,7 @@ def test_function_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> N def test_distinct_agg_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> None: """Distinct aggregation functions require the insertion of the DISTINCT keyword in the rendered function expr""" actual = default_expr_renderer.render_sql_expr( - SqlFunctionExpression( + SqlAggregateFunctionExpression( sql_function=SqlFunction.COUNT_DISTINCT, sql_function_args=[ SqlColumnReferenceExpression(SqlColumnReference("my_table", "a")), @@ -100,11 +100,11 @@ def test_distinct_agg_expr(default_expr_renderer: DefaultSqlExpressionRenderer) def test_nested_function_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> None: # noqa: D actual = default_expr_renderer.render_sql_expr( - SqlFunctionExpression( + SqlAggregateFunctionExpression( sql_function=SqlFunction.CONCAT, sql_function_args=[ SqlColumnReferenceExpression(SqlColumnReference("my_table", "a")), - SqlFunctionExpression( + SqlAggregateFunctionExpression( sql_function=SqlFunction.CONCAT, sql_function_args=[ SqlColumnReferenceExpression(SqlColumnReference("my_table", "b")), @@ -191,7 +191,7 @@ def test_date_trunc_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> def test_ratio_computation_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> None: # noqa: D actual = default_expr_renderer.render_sql_expr( SqlRatioComputationExpression( - numerator=SqlFunctionExpression( + numerator=SqlAggregateFunctionExpression( SqlFunction.SUM, sql_function_args=[SqlStringExpression(sql_expr="1", requires_parenthesis=False)] ), denominator=SqlColumnReferenceExpression(SqlColumnReference(column_name="divide_by_me", table_alias="a")), diff --git a/metricflow/test/sql/test_sql_plan_render.py b/metricflow/test/sql/test_sql_plan_render.py index db3dce05ab..748e2e1576 100644 --- a/metricflow/test/sql/test_sql_plan_render.py +++ b/metricflow/test/sql/test_sql_plan_render.py @@ -12,7 +12,7 @@ SqlColumnReference, SqlComparisonExpression, SqlComparison, - SqlFunctionExpression, + SqlAggregateFunctionExpression, SqlFunction, ) from metricflow.sql.sql_plan import ( @@ -44,7 +44,9 @@ def test_component_rendering( # Test single SELECT column select_columns = [ SqlSelectColumn( - expr=SqlFunctionExpression(sql_function=SqlFunction.SUM, sql_function_args=[SqlStringExpression("1")]), + expr=SqlAggregateFunctionExpression( + sql_function=SqlFunction.SUM, sql_function_args=[SqlStringExpression("1")] + ), column_alias="bookings", ), ] From 91698efdd1cd4770498f97ea565e6b32880aac8c Mon Sep 17 00:00:00 2001 From: William Deng Date: Tue, 22 Nov 2022 12:08:01 -0500 Subject: [PATCH 2/8] added SqlWindowFunctionExpression --- metricflow/dag/id_generation.py | 1 + metricflow/sql/render/expr_renderer.py | 43 ++++++ metricflow/sql/sql_exprs.py | 149 +++++++++++++++++++- metricflow/test/sql/test_sql_expr_render.py | 32 +++++ 4 files changed, 224 insertions(+), 1 deletion(-) diff --git a/metricflow/dag/id_generation.py b/metricflow/dag/id_generation.py index 35d15ee302..349771b247 100644 --- a/metricflow/dag/id_generation.py +++ b/metricflow/dag/id_generation.py @@ -34,6 +34,7 @@ SQL_EXPR_DATE_TRUNC = "dt" SQL_EXPR_RATIO_COMPUTATION = "rc" SQL_EXPR_BETWEEN_PREFIX = "betw" +SQL_EXPR_WINDOW_FUNCTION_ID_PREFIX = "wfnc" SQL_PLAN_SELECT_STATEMENT_ID_PREFIX = "ss" SQL_PLAN_TABLE_FROM_CLAUSE_ID_PREFIX = "tfc" diff --git a/metricflow/sql/render/expr_renderer.py b/metricflow/sql/render/expr_renderer.py index 7781ac8f9c..55e550b15b 100644 --- a/metricflow/sql/render/expr_renderer.py +++ b/metricflow/sql/render/expr_renderer.py @@ -28,6 +28,7 @@ SqlRatioComputationExpression, SqlColumnAliasReferenceExpression, SqlBetweenExpression, + SqlWindowFunctionExpression, ) from metricflow.time.time_granularity import TimeGranularity @@ -271,3 +272,45 @@ def visit_between_expr(self, node: SqlBetweenExpression) -> SqlExpressionRenderR sql=f"{rendered_column_arg.sql} BETWEEN {rendered_start_expr.sql} AND {rendered_end_expr.sql}", execution_parameters=execution_parameters, ) + + def visit_window_function_expr(self, node: SqlWindowFunctionExpression) -> SqlExpressionRenderResult: # noqa: D + sql_function_args_rendered = [self.render_sql_expr(x) for x in node.sql_function_args] + partition_by_args_rendered = [self.render_sql_expr(x) for x in node.partition_by_args] + order_by_args_rendered = {self.render_sql_expr(x.expr): x for x in node.order_by_args} + + combined_params = SqlBindParameters() + args_rendered = [] + if sql_function_args_rendered: + args_rendered.extend(sql_function_args_rendered) + if partition_by_args_rendered: + args_rendered.extend(partition_by_args_rendered) + if order_by_args_rendered: + args_rendered.extend(list(order_by_args_rendered.keys())) + for arg_rendered in args_rendered: + combined_params.update(arg_rendered.execution_parameters) + + sql_function_args_string = ", ".join([x.sql for x in sql_function_args_rendered]) + partition_by_args_string = ( + ("PARTITION BY " + ", ".join([x.sql for x in partition_by_args_rendered])) + if partition_by_args_rendered + else "" + ) + order_by_args_string = ( + ( + "ORDER BY " + + ", ".join( + [ + rendered_result.sql + (f" {x.suffix}" if x.suffix else "") + for rendered_result, x in order_by_args_rendered.items() + ] + ) + ) + if order_by_args_rendered + else "" + ) + + window_string = " ".join(filter(bool, [partition_by_args_string, order_by_args_string])) + return SqlExpressionRenderResult( + sql=f"{node.sql_function.value}({sql_function_args_string}) OVER ({window_string})", + execution_parameters=combined_params, + ) diff --git a/metricflow/sql/sql_exprs.py b/metricflow/sql/sql_exprs.py index 17db1c1767..7e8e48c742 100644 --- a/metricflow/sql/sql_exprs.py +++ b/metricflow/sql/sql_exprs.py @@ -22,6 +22,7 @@ SQL_EXPR_DATE_TRUNC, SQL_EXPR_RATIO_COMPUTATION, SQL_EXPR_BETWEEN_PREFIX, + SQL_EXPR_WINDOW_FUNCTION_ID_PREFIX, ) from metricflow.sql.sql_bind_parameters import SqlBindParameters from metricflow.visitor import Visitable, VisitorOutputT @@ -113,7 +114,7 @@ class SqlExpressionTreeLineage: """Captures the lineage of an expression node - contains itself and all ancestor nodes.""" string_exprs: Tuple[SqlStringExpression, ...] = () - function_exprs: Tuple[SqlAggregateFunctionExpression, ...] = () + function_exprs: Tuple[SqlFunctionExpression, ...] = () column_reference_exprs: Tuple[SqlColumnReferenceExpression, ...] = () column_alias_reference_exprs: Tuple[SqlColumnAliasReferenceExpression, ...] = () other_exprs: Tuple[SqlExpressionNode, ...] = () @@ -216,6 +217,10 @@ def visit_ratio_computation_expr(self, node: SqlRatioComputationExpression) -> V def visit_between_expr(self, node: SqlBetweenExpression) -> VisitorOutputT: # noqa: D pass + @abstractmethod + def visit_window_function_expr(self, node: SqlWindowFunctionExpression) -> VisitorOutputT: # noqa: D + pass + class SqlStringExpression(SqlExpressionNode): """An SQL expression in a string format, so it lacks information about the structure. @@ -776,6 +781,148 @@ def matches(self, other: SqlExpressionNode) -> bool: # noqa: D return self.sql_function == other.sql_function and self._parents_match(other) +class SqlWindowFunction(Enum): + """Names of known SQL window functions like SUM(), RANK(), ROW_NUMBER() + + Values are the SQL string to be used in rendering. + """ + + FIRST_VALUE = "first_value" + ROW_NUMBER = "row_number" + + +@dataclass(frozen=True) +class SqlWindowOrderByArg: + """In window functions, the ORDER BY clause can accept an expr, ordering, null ranking.""" + + expr: SqlExpressionNode + descending: Optional[bool] = None + nulls_last: Optional[bool] = None + + @property + def suffix(self) -> str: + """Helper to build suffix to append to {expr}{suffix}""" + result = [] + if self.descending is not None: + result.append("DESC" if self.descending else "ASC") + if self.nulls_last is not None: + result.append("NULLS LAST" if self.nulls_last else "NULLS FIRST") + return " ".join(result) + + +class SqlWindowFunctionExpression(SqlFunctionExpression): + """A window function expression like SUM(foo) OVER bar""" + + def __init__( + self, + sql_function: SqlWindowFunction, + sql_function_args: Optional[List[SqlExpressionNode]] = None, + partition_by_args: Optional[List[SqlExpressionNode]] = None, + order_by_args: Optional[List[SqlWindowOrderByArg]] = None, + ) -> None: + """Constructor. + + Args: + sql_function: The function that this represents. + sql_function_args: The arguments that should go into the function. e.g. for "CONCAT(a, b)", the arg + expressions should be "a" and "b". + partition_by_args: The arguments to partition the rows. e.g. PARTITION BY expr1, expr2, + the args are "expr1", "expr2". + order_by_args: The expr to order the partitions by. + """ + self._sql_function = sql_function + self._sql_function_args = sql_function_args + self._partition_by_args = partition_by_args + self._order_by_args = order_by_args + parent_nodes = [] + if sql_function_args: + parent_nodes.extend(sql_function_args) + if partition_by_args: + parent_nodes.extend(partition_by_args) + if order_by_args: + parent_nodes.extend([x.expr for x in order_by_args]) + super().__init__(node_id=self.create_unique_id(), parent_nodes=parent_nodes) + + @classmethod + def id_prefix(cls) -> str: # noqa: D + return SQL_EXPR_WINDOW_FUNCTION_ID_PREFIX + + @property + def requires_parenthesis(self) -> bool: # noqa: D + return False + + def accept(self, visitor: SqlExpressionNodeVisitor) -> VisitorOutputT: # noqa: D + return visitor.visit_window_function_expr(self) + + @property + def description(self) -> str: # noqa: D + return f"{self._sql_function.value} Window Function Expression" + + @property + def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D + return ( + super().displayed_properties + + [DisplayedProperty("function", self.sql_function)] + + [DisplayedProperty("argument", x) for x in self.sql_function_args] + + [DisplayedProperty("partition_by_argument", x) for x in self.partition_by_args] + + [DisplayedProperty("order_by_argument", x) for x in self.order_by_args] + ) + + @property + def sql_function(self) -> SqlWindowFunction: # noqa: D + return self._sql_function + + @property + def sql_function_args(self) -> List[SqlExpressionNode]: # noqa: D + return self._sql_function_args or [] + + @property + def partition_by_args(self) -> List[SqlExpressionNode]: # noqa: D + return self._partition_by_args or [] + + @property + def order_by_args(self) -> List[SqlWindowOrderByArg]: # noqa: D + return self._order_by_args or [] + + def __repr__(self) -> str: # noqa: D + return f"{self.__class__.__name__}(node_id={self.node_id}, sql_function={self.sql_function.name})" + + def rewrite( # noqa: D + self, + column_replacements: Optional[SqlColumnReplacements] = None, + should_render_table_alias: Optional[bool] = None, + ) -> SqlExpressionNode: + return SqlWindowFunctionExpression( + sql_function=self.sql_function, + sql_function_args=[ + x.rewrite(column_replacements, should_render_table_alias) for x in self.sql_function_args + ], + partition_by_args=[ + x.rewrite(column_replacements, should_render_table_alias) for x in self.partition_by_args + ], + order_by_args=[ + SqlWindowOrderByArg( + expr=x.expr.rewrite(column_replacements, should_render_table_alias), + descending=x.descending, + nulls_last=x.nulls_last, + ) + for x in self.order_by_args + ], + ) + + @property + def lineage(self) -> SqlExpressionTreeLineage: # noqa: D + return SqlExpressionTreeLineage.combine( + tuple(x.lineage for x in self.parent_nodes) + (SqlExpressionTreeLineage(function_exprs=(self,)),) + ) + + def matches(self, other: SqlExpressionNode) -> bool: # noqa: D + if not isinstance(other, SqlWindowFunctionExpression): + return False + order_by_matches = all(x == y for x, y in itertools.zip_longest(self.order_by_args, other.order_by_args)) + return self.sql_function == other.sql_function and order_by_matches and self._parents_match(other) + + class SqlNullExpression(SqlExpressionNode): """Represents NULL.""" diff --git a/metricflow/test/sql/test_sql_expr_render.py b/metricflow/test/sql/test_sql_expr_render.py index 33f4d6aecc..294627868f 100644 --- a/metricflow/test/sql/test_sql_expr_render.py +++ b/metricflow/test/sql/test_sql_expr_render.py @@ -22,6 +22,9 @@ SqlColumnReplacements, SqlCastToTimestampExpression, SqlBetweenExpression, + SqlWindowFunctionExpression, + SqlWindowFunction, + SqlWindowOrderByArg, ) from metricflow.time.time_granularity import TimeGranularity @@ -236,3 +239,32 @@ def test_between_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> No ) ).sql assert actual == "a.col0 BETWEEN CAST('2020-01-01' AS TIMESTAMP) AND CAST('2020-01-10' AS TIMESTAMP)" + + +def test_window_function_expr(default_expr_renderer: DefaultSqlExpressionRenderer) -> None: # noqa: D + actual = default_expr_renderer.render_sql_expr( + SqlWindowFunctionExpression( + sql_function=SqlWindowFunction.FIRST_VALUE, + sql_function_args=[SqlColumnReferenceExpression(SqlColumnReference("a", "col0"))], + partition_by_args=[ + SqlColumnReferenceExpression(SqlColumnReference("b", "col0")), + SqlColumnReferenceExpression(SqlColumnReference("b", "col1")), + ], + order_by_args=[ + SqlWindowOrderByArg( + expr=SqlColumnReferenceExpression(SqlColumnReference("a", "col0")), + descending=True, + nulls_last=False, + ), + SqlWindowOrderByArg( + expr=SqlColumnReferenceExpression(SqlColumnReference("b", "col0")), + descending=False, + nulls_last=True, + ), + ], + ) + ).sql + assert ( + actual + == "first_value(a.col0) OVER (PARTITION BY b.col0, b.col1 ORDER BY a.col0 DESC NULLS FIRST, b.col0 ASC NULLS LAST)" + ) From 7dc8a1968896abeb426850f1b1c30c89652bf15f Mon Sep 17 00:00:00 2001 From: William Deng Date: Tue, 22 Nov 2022 12:11:01 -0500 Subject: [PATCH 3/8] added ExtraSpec/Instance --- metricflow/instances.py | 14 +++++++++ metricflow/plan_conversion/column_resolver.py | 7 +++++ .../plan_conversion/instance_converters.py | 29 +++++++++++++++++++ .../plan_conversion/select_column_gen.py | 4 +++ metricflow/specs.py | 22 ++++++++++++++ 5 files changed, 76 insertions(+) diff --git a/metricflow/instances.py b/metricflow/instances.py index b5b467f4c2..0fd26faafb 100644 --- a/metricflow/instances.py +++ b/metricflow/instances.py @@ -11,6 +11,7 @@ from metricflow.dataclass_serialization import SerializableDataclass from metricflow.references import ElementReference from metricflow.specs import ( + ExtraSpec, MeasureSpec, DimensionSpec, IdentifierSpec, @@ -171,6 +172,12 @@ class MetricInstance(MdoInstance[MetricSpec], SerializableDataclass): # noqa: D defined_from: Tuple[MetricModelReference, ...] +@dataclass(frozen=True) +class ExtraInstance(MdoInstance[ExtraSpec], SerializableDataclass): # noqa: D + associated_columns: Tuple[ColumnAssociation, ...] + spec: ExtraSpec + + # Output type of transform function TransformOutputT = TypeVar("TransformOutputT") @@ -200,6 +207,7 @@ class InstanceSet(SerializableDataclass): time_dimension_instances: Tuple[TimeDimensionInstance, ...] = () identifier_instances: Tuple[IdentifierInstance, ...] = () metric_instances: Tuple[MetricInstance, ...] = () + extra_instances: Tuple[ExtraInstance, ...] = () def transform(self, transform_function: InstanceSetTransform[TransformOutputT]) -> TransformOutputT: # noqa: D return transform_function.transform(self) @@ -215,6 +223,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: time_dimension_instances: List[TimeDimensionInstance] = [] identifier_instances: List[IdentifierInstance] = [] metric_instances: List[MetricInstance] = [] + extra_instances: List[ExtraInstance] = [] for instance_set in instance_sets: for measure_instance in instance_set.measure_instances: @@ -232,6 +241,9 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: for metric_instance in instance_set.metric_instances: if metric_instance.spec not in {x.spec for x in metric_instances}: metric_instances.append(metric_instance) + for extra_instance in instance_set.extra_instances: + if extra_instance.spec not in {x.spec for x in extra_instances}: + extra_instances.append(extra_instance) return InstanceSet( measure_instances=tuple(measure_instances), @@ -239,6 +251,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: time_dimension_instances=tuple(time_dimension_instances), identifier_instances=tuple(identifier_instances), metric_instances=tuple(metric_instances), + extra_instances=tuple(extra_instances), ) @property @@ -249,4 +262,5 @@ def spec_set(self) -> InstanceSpecSet: # noqa: D time_dimension_specs=tuple(x.spec for x in self.time_dimension_instances), identifier_specs=tuple(x.spec for x in self.identifier_instances), metric_specs=tuple(x.spec for x in self.metric_instances), + extra_specs=tuple(x.spec for x in self.extra_instances), ) diff --git a/metricflow/plan_conversion/column_resolver.py b/metricflow/plan_conversion/column_resolver.py index 07f82a6ac8..03348adf22 100644 --- a/metricflow/plan_conversion/column_resolver.py +++ b/metricflow/plan_conversion/column_resolver.py @@ -9,6 +9,7 @@ ) from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName from metricflow.specs import ( + ExtraSpec, MetricSpec, MeasureSpec, DimensionSpec, @@ -109,3 +110,9 @@ def resolve_identifier_spec(self, identifier_spec: IdentifierSpec) -> Tuple[Colu single_column_correlation_key=SingleColumnCorrelationKey(), ), ) + + def resolve_extra_spec(self, extra_spec: ExtraSpec) -> ColumnAssociation: # noqa: D + return ColumnAssociation( + column_name=extra_spec.element_name, + single_column_correlation_key=SingleColumnCorrelationKey(), + ) diff --git a/metricflow/plan_conversion/instance_converters.py b/metricflow/plan_conversion/instance_converters.py index 4f8b3c9712..fc3cd61405 100644 --- a/metricflow/plan_conversion/instance_converters.py +++ b/metricflow/plan_conversion/instance_converters.py @@ -15,6 +15,7 @@ MdoInstance, DimensionInstance, IdentifierInstance, + ExtraInstance, MetricInstance, MeasureInstance, InstanceSet, @@ -87,12 +88,16 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D identifier_cols = list( chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.identifier_instances]) ) + extra_cols = list( + chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.extra_instances]) + ) return SelectColumnSet( metric_columns=metric_cols, measure_columns=measure_cols, dimension_columns=dimension_cols, time_dimension_columns=time_dimension_cols, identifier_columns=identifier_cols, + extra_columns=extra_cols, ) def _make_sql_column_expression( @@ -234,12 +239,16 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D identifier_cols = list( chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.identifier_instances]) ) + extra_cols = list( + chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.extra_instances]) + ) return SelectColumnSet( metric_columns=metric_cols, measure_columns=measure_cols, dimension_columns=dimension_cols, time_dimension_columns=time_dimension_cols, identifier_columns=identifier_cols, + extra_columns=extra_cols, ) @@ -436,6 +445,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=tuple(time_dimension_instances_with_additional_link), identifier_instances=tuple(identifier_instances_with_additional_link), metric_instances=(), + extra_instances=(), ) @@ -477,6 +487,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=filtered_time_dimension_instances, identifier_instances=filtered_identifier_instances, metric_instances=instance_set.metric_instances, + extra_instances=instance_set.extra_instances, ) return output @@ -535,6 +546,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D ), identifier_instances=tuple(x for x in instance_set.identifier_instances if self._should_pass(x.spec)), metric_instances=tuple(x for x in instance_set.metric_instances if self._should_pass(x.spec)), + extra_instances=tuple(x for x in instance_set.extra_instances if self._should_pass(x.spec)), ) return output @@ -573,6 +585,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances, + extra_instances=instance_set.extra_instances, ) @@ -626,6 +639,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances, + extra_instances=instance_set.extra_instances, ) @@ -642,6 +656,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances + tuple(self._metric_instances), + extra_instances=instance_set.extra_instances, ) @@ -655,6 +670,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances, + extra_instances=instance_set.extra_instances, ) @@ -668,6 +684,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=(), + extra_instances=instance_set.extra_instances, ) @@ -745,12 +762,24 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D ) ) + output_extra_instances = [] + for input_extra_instance in instance_set.extra_instances: + output_extra_instances.append( + ExtraInstance( + associated_columns=( + self._column_association_resolver.resolve_extra_spec(extra_spec=input_extra_instance.spec), + ), + spec=input_extra_instance.spec, + ) + ) + return InstanceSet( measure_instances=tuple(output_measure_instances), dimension_instances=tuple(output_dimension_instances), time_dimension_instances=tuple(output_time_dimension_instances), identifier_instances=tuple(output_identifier_instances), metric_instances=tuple(output_metric_instances), + extra_instances=tuple(output_extra_instances), ) diff --git a/metricflow/plan_conversion/select_column_gen.py b/metricflow/plan_conversion/select_column_gen.py index 7486e7203a..7ffb31e95f 100644 --- a/metricflow/plan_conversion/select_column_gen.py +++ b/metricflow/plan_conversion/select_column_gen.py @@ -18,6 +18,7 @@ class SelectColumnSet: dimension_columns: List[SqlSelectColumn] = field(default_factory=list) time_dimension_columns: List[SqlSelectColumn] = field(default_factory=list) identifier_columns: List[SqlSelectColumn] = field(default_factory=list) + extra_columns: List[SqlSelectColumn] = field(default_factory=list) def merge(self, other_set: SelectColumnSet) -> SelectColumnSet: """Combine the select columns by type.""" @@ -27,6 +28,7 @@ def merge(self, other_set: SelectColumnSet) -> SelectColumnSet: dimension_columns=self.dimension_columns + other_set.dimension_columns, time_dimension_columns=self.time_dimension_columns + other_set.time_dimension_columns, identifier_columns=self.identifier_columns + other_set.identifier_columns, + extra_columns=self.extra_columns + other_set.extra_columns, ) def as_tuple(self) -> Tuple[SqlSelectColumn, ...]: @@ -38,6 +40,7 @@ def as_tuple(self) -> Tuple[SqlSelectColumn, ...]: + self.dimension_columns + self.metric_columns + self.measure_columns + + self.extra_columns ) def without_measure_columns(self) -> SelectColumnSet: @@ -47,4 +50,5 @@ def without_measure_columns(self) -> SelectColumnSet: dimension_columns=self.dimension_columns, time_dimension_columns=self.time_dimension_columns, identifier_columns=self.identifier_columns, + extra_columns=self.extra_columns, ) diff --git a/metricflow/specs.py b/metricflow/specs.py index cfeae9dcf5..8d07aebb21 100644 --- a/metricflow/specs.py +++ b/metricflow/specs.py @@ -71,6 +71,10 @@ def resolve_time_dimension_spec( # noqa: D def resolve_identifier_spec(self, identifier_spec: IdentifierSpec) -> Tuple[ColumnAssociation, ...]: # noqa: D pass + @abstractmethod + def resolve_extra_spec(self, extra_spec: ExtraSpec) -> ColumnAssociation: # noqa: D + pass + @dataclass(frozen=True) class InstanceSpec(SerializableDataclass): @@ -107,6 +111,20 @@ def qualified_name(self) -> str: raise NotImplementedError() +@dataclass(frozen=True) +class ExtraSpec(InstanceSpec): + """A specification for a specification that is built during the dataflow plan and not defined in config.""" + + element_name: str + + def column_associations(self, resolver: ColumnAssociationResolver) -> Tuple[ColumnAssociation, ...]: # noqa: D + return (resolver.resolve_extra_spec(self),) + + @property + def qualified_name(self) -> str: # noqa: D + return self.element_name + + @dataclass(frozen=True) class LinkableInstanceSpec(InstanceSpec): """Generally a dimension or identifier that may be specified using identifier links. @@ -576,17 +594,20 @@ class InstanceSpecSet(SerializableDataclass): dimension_specs: Tuple[DimensionSpec, ...] = () identifier_specs: Tuple[IdentifierSpec, ...] = () time_dimension_specs: Tuple[TimeDimensionSpec, ...] = () + extra_specs: Tuple[ExtraSpec, ...] = () def merge(self, others: Sequence[InstanceSpecSet]) -> InstanceSpecSet: """Merge all sets into one set, without de-duplication.""" return InstanceSpecSet( metric_specs=self.metric_specs + tuple(itertools.chain.from_iterable([x.metric_specs for x in others])), + measure_specs=self.measure_specs + tuple(itertools.chain.from_iterable([x.measure_specs for x in others])), dimension_specs=self.dimension_specs + tuple(itertools.chain.from_iterable([x.dimension_specs for x in others])), identifier_specs=self.identifier_specs + tuple(itertools.chain.from_iterable([x.identifier_specs for x in others])), time_dimension_specs=self.time_dimension_specs + tuple(itertools.chain.from_iterable([x.time_dimension_specs for x in others])), + extra_specs=self.extra_specs + tuple(itertools.chain.from_iterable([x.extra_specs for x in others])), ) @property @@ -603,6 +624,7 @@ def all_specs(self) -> Sequence[InstanceSpec]: # noqa: D self.time_dimension_specs, self.identifier_specs, self.metric_specs, + self.extra_specs, ) ) From f6643184051360897fee891d10c52dbe96be2fa0 Mon Sep 17 00:00:00 2001 From: William Deng Date: Tue, 22 Nov 2022 12:34:07 -0500 Subject: [PATCH 4/8] added AppendRowNumberColumnNode --- metricflow/dag/id_generation.py | 1 + metricflow/dataflow/builder/costing.py | 6 ++ metricflow/dataflow/dataflow_plan.py | 44 +++++++++++++ metricflow/plan_conversion/dataflow_to_sql.py | 62 +++++++++++++++++++ .../plan_conversion/instance_converters.py | 41 ++++++++++++ metricflow/specs.py | 4 ++ 6 files changed, 158 insertions(+) diff --git a/metricflow/dag/id_generation.py b/metricflow/dag/id_generation.py index 349771b247..8a91f895ef 100644 --- a/metricflow/dag/id_generation.py +++ b/metricflow/dag/id_generation.py @@ -21,6 +21,7 @@ DATAFLOW_NODE_CONSTRAIN_TIME_RANGE_ID_PREFIX = "ctr" DATAFLOW_NODE_SET_MEASURE_AGGREGATION_TIME = "sma" DATAFLOW_NODE_SEMI_ADDITIVE_JOIN_ID_PREFIX = "saj" +DATAFLOW_NODE_APPEND_ROW_NUMBER_COLUMN_PREFIX = "rnum" SQL_EXPR_COLUMN_REFERENCE_ID_PREFIX = "cr" SQL_EXPR_COMPARISON_ID_PREFIX = "cmp" diff --git a/metricflow/dataflow/builder/costing.py b/metricflow/dataflow/builder/costing.py index bd8c2d560f..19138f8b1a 100644 --- a/metricflow/dataflow/builder/costing.py +++ b/metricflow/dataflow/builder/costing.py @@ -32,6 +32,7 @@ WriteToResultTableNode, SemiAdditiveJoinNode, MetricTimeDimensionTransformNode, + AppendRowNumberColumnNode, ) @@ -161,3 +162,8 @@ def visit_semi_additive_join_node(self, node: SemiAdditiveJoinNode[SourceDataSet # Add number of joins to the cost. node_cost = DefaultCost(num_joins=1) return DefaultCost.sum(parent_costs + [node_cost]) + + def visit_append_row_number_column_node( # noqa: D + self, node: AppendRowNumberColumnNode[SourceDataSetT] + ) -> DefaultCost: + return DefaultCost.sum([x.accept(self) for x in node.parent_nodes]) diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 117987c53f..260145d584 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -25,6 +25,7 @@ DATAFLOW_NODE_COMBINE_METRICS_ID_PREFIX, DATAFLOW_NODE_CONSTRAIN_TIME_RANGE_ID_PREFIX, DATAFLOW_NODE_SET_MEASURE_AGGREGATION_TIME, + DATAFLOW_NODE_APPEND_ROW_NUMBER_COLUMN_PREFIX, ) from metricflow.dag.mf_dag import DagNode, DisplayedProperty, MetricFlowDag, NodeId from metricflow.dataflow.builder.partitions import ( @@ -160,6 +161,12 @@ def visit_metric_time_dimension_transform_node( # noqa: D ) -> VisitorOutputT: pass + @abstractmethod + def visit_append_row_number_column_node( # noqa: D + self, node: AppendRowNumberColumnNode[SourceDataSetT] + ) -> VisitorOutputT: + pass + class BaseOutput(Generic[SourceDataSetT], DataflowPlanNode[SourceDataSetT], ABC): """A node that outputs data in a "base" format. @@ -977,6 +984,43 @@ def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D ] +class AppendRowNumberColumnNode(Generic[SourceDataSetT], BaseOutput[SourceDataSetT]): + """A node transforms the input data set so that it contains the row number. + + The metric time dimension is used later to aggregate all measures in the data set. + + Input: a data set + + Output: a data set with the addition of an primary identifier that delegates the row number. + """ + + def __init__( # noqa: D + self, + parent_node: BaseOutput[SourceDataSetT], + ) -> None: + self._parent_node = parent_node + super().__init__(node_id=self.create_unique_id(), parent_nodes=[parent_node]) + + @classmethod + def id_prefix(cls) -> str: # noqa: D + return DATAFLOW_NODE_APPEND_ROW_NUMBER_COLUMN_PREFIX + + def accept(self, visitor: DataflowPlanNodeVisitor[SourceDataSetT, VisitorOutputT]) -> VisitorOutputT: # noqa: D + return visitor.visit_append_row_number_column_node(self) + + @property + def description(self) -> str: # noqa: D + return "Append row number column" + + @property + def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D + return super().displayed_properties + + @property + def parent_node(self) -> BaseOutput[SourceDataSetT]: # noqa: D + return self._parent_node + + class DataflowPlan(Generic[SourceDataSetT], MetricFlowDag[SinkOutput[SourceDataSetT]]): """Describes the flow of metric data as it goes from source nodes to sink nodes in the graph.""" diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index fa06d90214..a2b0c843ea 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -28,9 +28,11 @@ JoinOverTimeRangeNode, SemiAdditiveJoinNode, MetricTimeDimensionTransformNode, + AppendRowNumberColumnNode, ) from metricflow.dataset.dataset import DataSet from metricflow.instances import ( + ExtraInstance, InstanceSet, MetricInstance, MetricModelReference, @@ -46,6 +48,7 @@ AddMetrics, CreateSelectColumnsForInstances, CreateSelectColumnsWithMeasuresAggregated, + CreateSqlColumnReferencesForInstances, create_select_columns_for_instance_sets, AddLinkToLinkableElements, FilterElements, @@ -71,6 +74,7 @@ from metricflow.protocols.sql_client import SqlEngineAttributes, SqlEngine from metricflow.specs import ( ColumnAssociationResolver, + ExtraSpec, MetricSpec, TimeDimensionSpec, MeasureSpec, @@ -90,6 +94,9 @@ SqlStringLiteralExpression, SqlBetweenExpression, SqlAggregateFunctionExpression, + SqlWindowFunction, + SqlWindowFunctionExpression, + SqlWindowOrderByArg, ) from metricflow.sql.sql_plan import ( SqlQueryPlan, @@ -1264,3 +1271,58 @@ def visit_semi_additive_join_node(self, node: SemiAdditiveJoinNode) -> SqlDataSe order_bys=(), ), ) + + def visit_append_row_number_column_node(self, node: AppendRowNumberColumnNode) -> SqlDataSet: + """Appends a row number identifier column to the data set.""" + input_data_set: SqlDataSet = node.parent_node.accept(self) + input_data_set_alias = self._next_unique_table_alias() + + row_number_spec = ExtraSpec.from_name("mf_row_number") + row_number_spec_column_name = self._column_association_resolver.resolve_extra_spec(row_number_spec) + + input_sql_column_references = input_data_set.instance_set.transform( + CreateSqlColumnReferencesForInstances(input_data_set_alias, self._column_association_resolver) + ) + # Build enumerated column using ROW_NUMBER function + row_number_select_column = SqlSelectColumn( + expr=SqlWindowFunctionExpression( + sql_function=SqlWindowFunction.ROW_NUMBER, + order_by_args=[SqlWindowOrderByArg(expr=x) for x in input_sql_column_references], + ), + column_alias=row_number_spec_column_name.column_name, + ) + + # Build output instance set + output_extra_instances = list(input_data_set.instance_set.extra_instances) + output_extra_instances.append( + ExtraInstance( + associated_columns=(row_number_spec_column_name,), + spec=row_number_spec, + ) + ) + output_instance_set = InstanceSet( + measure_instances=input_data_set.instance_set.measure_instances, + dimension_instances=input_data_set.instance_set.dimension_instances, + time_dimension_instances=input_data_set.instance_set.time_dimension_instances, + identifier_instances=input_data_set.instance_set.identifier_instances, + metric_instances=input_data_set.instance_set.metric_instances, + extra_instances=tuple(output_extra_instances), + ) + output_instance_set = ChangeAssociatedColumns(self._column_association_resolver).transform(output_instance_set) + + sql_select_columns = input_data_set.instance_set.transform( + CreateSelectColumnsForInstances(input_data_set_alias, self._column_association_resolver) + ).as_tuple() + (row_number_select_column,) + return SqlDataSet( + instance_set=output_instance_set, + sql_select_node=SqlSelectStatementNode( + description=node.description, + select_columns=sql_select_columns, + from_source=input_data_set.sql_select_node, + from_source_alias=input_data_set_alias, + joins_descs=(), + group_bys=(), + where=None, + order_bys=(), + ), + ) diff --git a/metricflow/plan_conversion/instance_converters.py b/metricflow/plan_conversion/instance_converters.py index fc3cd61405..9df80d2aa6 100644 --- a/metricflow/plan_conversion/instance_converters.py +++ b/metricflow/plan_conversion/instance_converters.py @@ -688,6 +688,47 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D ) +class CreateSqlColumnReferencesForInstances(InstanceSetTransform[Tuple[SqlColumnReferenceExpression, ...]]): + """Create select column expressions that will express all instances in the set. + + It assumes that the column names of the instances are represented by the supplied column association resolver and + come from the given table alias. + """ + + def __init__( + self, + table_alias: str, + column_resolver: ColumnAssociationResolver, + ) -> None: + """Initializer. + + Args: + table_alias: the table alias to select columns from + column_resolver: resolver to name columns. + """ + self._table_alias = table_alias + self._column_resolver = column_resolver + + def transform(self, instance_set: InstanceSet) -> Tuple[SqlColumnReferenceExpression, ...]: # noqa: D + column_names = [ + col.column_name + for col in ( + chain.from_iterable( + [x.column_associations(self._column_resolver) for x in instance_set.spec_set.all_specs] + ) + ) + ] + return tuple( + SqlColumnReferenceExpression( + SqlColumnReference( + table_alias=self._table_alias, + column_name=column_name, + ), + ) + for column_name in column_names + ) + + class ChangeAssociatedColumns(InstanceSetTransform[InstanceSet]): """Change the columns associated with instances to the one specified by the resolver.""" diff --git a/metricflow/specs.py b/metricflow/specs.py index 8d07aebb21..4a9832831a 100644 --- a/metricflow/specs.py +++ b/metricflow/specs.py @@ -124,6 +124,10 @@ def column_associations(self, resolver: ColumnAssociationResolver) -> Tuple[Colu def qualified_name(self) -> str: # noqa: D return self.element_name + @staticmethod + def from_name(name: str) -> ExtraSpec: # noqa: D + return ExtraSpec(element_name=name) + @dataclass(frozen=True) class LinkableInstanceSpec(InstanceSpec): From fa2b1ad391e54aef379d24fe553432b6c5aa3e2a Mon Sep 17 00:00:00 2001 From: William Deng Date: Tue, 22 Nov 2022 12:41:21 -0500 Subject: [PATCH 5/8] added test for AppendRowNumberColumnNode --- .../test_dataflow_to_sql_plan.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py index b7c687d515..45e9c05602 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py @@ -7,6 +7,7 @@ from metricflow.constraints.time_constraint import TimeRangeConstraint from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder from metricflow.dataflow.dataflow_plan import ( + AppendRowNumberColumnNode, DataflowPlan, WriteToResultDataframeNode, FilterElementsNode, @@ -1496,3 +1497,22 @@ def test_metric_with_measures_from_multiple_sources_no_dimensions( # noqa: D sql_client=sql_client, node=dataflow_plan.sink_output_nodes[0].parent_node, ) + + +def test_append_row_number_column_node( + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + consistent_id_object_repository: ConsistentIdObjectRepository, + dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter[DataSourceDataSet], + sql_client: SqlClient, +) -> None: + """Tests converting a dataflow plan to a SQL query plan using a AppendRowNumberColumnNode.""" + measure_source_node = consistent_id_object_repository.simple_model_read_nodes["bookings_source"] + append_row_number_column_node = AppendRowNumberColumnNode[DataSourceDataSet](parent_node=measure_source_node) + convert_and_check( + request=request, + mf_test_session_state=mf_test_session_state, + dataflow_to_sql_converter=dataflow_to_sql_converter, + sql_client=sql_client, + node=append_row_number_column_node, + ) From b4889ffbe890c5659a31760d648171046c043d5e Mon Sep 17 00:00:00 2001 From: William Deng Date: Tue, 22 Nov 2022 12:41:27 -0500 Subject: [PATCH 6/8] updated snapshots --- ...t_append_row_number_column_node__plan0.sql | 107 +++++ ...ow_number_column_node__plan0_optimized.sql | 56 +++ ...t_append_row_number_column_node__plan0.xml | 406 ++++++++++++++++++ .../test_composite_identifier__plan0.xml | 8 +- ..._composite_identifier_with_join__plan0.xml | 8 +- ...posite_identifier_with_order_by__plan0.xml | 8 +- .../test_compute_metrics_node__plan0.xml | 8 +- ...atio_from_multiple_data_sources__plan0.xml | 16 +- ...e_ratio_from_single_data_source__plan0.xml | 16 +- ...ompute_metrics_node_simple_expr__plan0.xml | 8 +- .../test_cumulative_metric__plan0.xml | 8 +- ...cumulative_metric_grain_to_date__plan0.xml | 8 +- .../test_cumulative_metric_no_ds__plan0.xml | 8 +- ...est_cumulative_metric_no_window__plan0.xml | 8 +- ..._no_window_with_time_constraint__plan0.xml | 8 +- ...ive_metric_with_time_constraint__plan0.xml | 8 +- .../test_derived_metric__plan0.xml | 24 +- .../test_distinct_values__plan0.xml | 8 +- ...th_where_constraint_on_join_dim__plan0.xml | 8 +- .../test_join_to_scd_dimension__plan0.xml | 8 +- .../SqlQueryPlan/test_limit_rows__plan0.xml | 8 +- ...imension_using_local_identifier__plan0.xml | 8 +- .../test_measure_aggregation_node__plan0.xml | 32 +- .../test_measure_constraint__plan0.xml | 24 +- ..._constraint_with_reused_measure__plan0.xml | 16 +- ...aint_with_single_expr_and_alias__plan0.xml | 8 +- ..._multiple_sources_no_dimensions__plan0.xml | 16 +- ...multi_hop_through_scd_dimension__plan0.xml | 8 +- ...test_multi_hop_to_scd_dimension__plan0.xml | 8 +- .../test_multihop_node__plan0.xml | 8 +- ..._multiple_metrics_no_dimensions__plan0.xml | 16 +- .../test_nested_derived_metric__plan0.xml | 48 +-- .../test_order_by_node__plan0.xml | 8 +- .../test_partitioned_join__plan0.xml | 8 +- .../test_semi_additive_join_node__plan0.xml | 8 +- ...dditive_join_node_with_grouping__plan0.xml | 8 +- ...join_node_with_queried_group_by__plan0.xml | 8 +- ...uery_with_metric_time_dimension__plan0.xml | 24 +- 38 files changed, 785 insertions(+), 216 deletions(-) create mode 100644 metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql create mode 100644 metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql create mode 100644 metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql new file mode 100644 index 0000000000..d85fd18192 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql @@ -0,0 +1,107 @@ +-- Append row number column +SELECT + subq_0.ds + , subq_0.ds__week + , subq_0.ds__month + , subq_0.ds__quarter + , subq_0.ds__year + , subq_0.ds_partitioned + , subq_0.ds_partitioned__week + , subq_0.ds_partitioned__month + , subq_0.ds_partitioned__quarter + , subq_0.ds_partitioned__year + , subq_0.booking_paid_at + , subq_0.booking_paid_at__week + , subq_0.booking_paid_at__month + , subq_0.booking_paid_at__quarter + , subq_0.booking_paid_at__year + , subq_0.create_a_cycle_in_the_join_graph__ds + , subq_0.create_a_cycle_in_the_join_graph__ds__week + , subq_0.create_a_cycle_in_the_join_graph__ds__month + , subq_0.create_a_cycle_in_the_join_graph__ds__quarter + , subq_0.create_a_cycle_in_the_join_graph__ds__year + , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned + , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__week + , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__month + , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__quarter + , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__year + , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at + , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__week + , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__month + , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__quarter + , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__year + , subq_0.listing + , subq_0.guest + , subq_0.host + , subq_0.create_a_cycle_in_the_join_graph + , subq_0.create_a_cycle_in_the_join_graph__listing + , subq_0.create_a_cycle_in_the_join_graph__guest + , subq_0.create_a_cycle_in_the_join_graph__host + , subq_0.is_instant + , subq_0.create_a_cycle_in_the_join_graph__is_instant + , subq_0.bookings + , subq_0.instant_bookings + , subq_0.booking_value + , subq_0.max_booking_value + , subq_0.min_booking_value + , subq_0.bookers + , subq_0.average_booking_value + , subq_0.booking_payments + , subq_0.referred_bookings + , row_number() OVER (ORDER BY subq_0.bookings, subq_0.instant_bookings, subq_0.booking_value, subq_0.max_booking_value, subq_0.min_booking_value, subq_0.bookers, subq_0.average_booking_value, subq_0.booking_payments, subq_0.referred_bookings, subq_0.is_instant, subq_0.create_a_cycle_in_the_join_graph__is_instant, subq_0.ds, subq_0.ds__week, subq_0.ds__month, subq_0.ds__quarter, subq_0.ds__year, subq_0.ds_partitioned, subq_0.ds_partitioned__week, subq_0.ds_partitioned__month, subq_0.ds_partitioned__quarter, subq_0.ds_partitioned__year, subq_0.booking_paid_at, subq_0.booking_paid_at__week, subq_0.booking_paid_at__month, subq_0.booking_paid_at__quarter, subq_0.booking_paid_at__year, subq_0.create_a_cycle_in_the_join_graph__ds, subq_0.create_a_cycle_in_the_join_graph__ds__week, subq_0.create_a_cycle_in_the_join_graph__ds__month, subq_0.create_a_cycle_in_the_join_graph__ds__quarter, subq_0.create_a_cycle_in_the_join_graph__ds__year, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__week, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__month, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__quarter, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__year, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__week, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__month, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__quarter, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__year, subq_0.listing, subq_0.guest, subq_0.host, subq_0.create_a_cycle_in_the_join_graph, subq_0.create_a_cycle_in_the_join_graph__listing, subq_0.create_a_cycle_in_the_join_graph__guest, subq_0.create_a_cycle_in_the_join_graph__host) AS mf_row_number +FROM ( + -- Read Elements From Data Source 'bookings_source' + SELECT + 1 AS bookings + , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings + , bookings_source_src_10001.booking_value + , bookings_source_src_10001.booking_value AS max_booking_value + , bookings_source_src_10001.booking_value AS min_booking_value + , bookings_source_src_10001.guest_id AS bookers + , bookings_source_src_10001.booking_value AS average_booking_value + , bookings_source_src_10001.booking_value AS booking_payments + , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings + , bookings_source_src_10001.is_instant + , bookings_source_src_10001.ds + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS ds__year + , bookings_source_src_10001.ds_partitioned + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__year + , bookings_source_src_10001.booking_paid_at + , DATE_TRUNC('week', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__year + , bookings_source_src_10001.is_instant AS create_a_cycle_in_the_join_graph__is_instant + , bookings_source_src_10001.ds AS create_a_cycle_in_the_join_graph__ds + , DATE_TRUNC('week', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__week + , DATE_TRUNC('month', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__year + , bookings_source_src_10001.ds_partitioned AS create_a_cycle_in_the_join_graph__ds_partitioned + , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__week + , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__month + , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__quarter + , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__year + , bookings_source_src_10001.booking_paid_at AS create_a_cycle_in_the_join_graph__booking_paid_at + , DATE_TRUNC('week', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__week + , DATE_TRUNC('month', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__month + , DATE_TRUNC('quarter', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__quarter + , DATE_TRUNC('year', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__year + , bookings_source_src_10001.listing_id AS listing + , bookings_source_src_10001.guest_id AS guest + , bookings_source_src_10001.host_id AS host + , bookings_source_src_10001.guest_id AS create_a_cycle_in_the_join_graph + , bookings_source_src_10001.listing_id AS create_a_cycle_in_the_join_graph__listing + , bookings_source_src_10001.guest_id AS create_a_cycle_in_the_join_graph__guest + , bookings_source_src_10001.host_id AS create_a_cycle_in_the_join_graph__host + FROM ( + -- User Defined SQL Query + SELECT * FROM ***************************.fct_bookings + ) bookings_source_src_10001 +) subq_0 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql new file mode 100644 index 0000000000..56b803cbe5 --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql @@ -0,0 +1,56 @@ +-- Read Elements From Data Source 'bookings_source' +-- Append row number column +SELECT + ds + , DATE_TRUNC('week', ds) AS ds__week + , DATE_TRUNC('month', ds) AS ds__month + , DATE_TRUNC('quarter', ds) AS ds__quarter + , DATE_TRUNC('year', ds) AS ds__year + , ds_partitioned + , DATE_TRUNC('week', ds_partitioned) AS ds_partitioned__week + , DATE_TRUNC('month', ds_partitioned) AS ds_partitioned__month + , DATE_TRUNC('quarter', ds_partitioned) AS ds_partitioned__quarter + , DATE_TRUNC('year', ds_partitioned) AS ds_partitioned__year + , booking_paid_at + , DATE_TRUNC('week', booking_paid_at) AS booking_paid_at__week + , DATE_TRUNC('month', booking_paid_at) AS booking_paid_at__month + , DATE_TRUNC('quarter', booking_paid_at) AS booking_paid_at__quarter + , DATE_TRUNC('year', booking_paid_at) AS booking_paid_at__year + , ds AS create_a_cycle_in_the_join_graph__ds + , DATE_TRUNC('week', ds) AS create_a_cycle_in_the_join_graph__ds__week + , DATE_TRUNC('month', ds) AS create_a_cycle_in_the_join_graph__ds__month + , DATE_TRUNC('quarter', ds) AS create_a_cycle_in_the_join_graph__ds__quarter + , DATE_TRUNC('year', ds) AS create_a_cycle_in_the_join_graph__ds__year + , ds_partitioned AS create_a_cycle_in_the_join_graph__ds_partitioned + , DATE_TRUNC('week', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__week + , DATE_TRUNC('month', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__month + , DATE_TRUNC('quarter', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__quarter + , DATE_TRUNC('year', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__year + , booking_paid_at AS create_a_cycle_in_the_join_graph__booking_paid_at + , DATE_TRUNC('week', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__week + , DATE_TRUNC('month', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__month + , DATE_TRUNC('quarter', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__quarter + , DATE_TRUNC('year', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__year + , listing_id AS listing + , guest_id AS guest + , host_id AS host + , guest_id AS create_a_cycle_in_the_join_graph + , listing_id AS create_a_cycle_in_the_join_graph__listing + , guest_id AS create_a_cycle_in_the_join_graph__guest + , host_id AS create_a_cycle_in_the_join_graph__host + , is_instant + , is_instant AS create_a_cycle_in_the_join_graph__is_instant + , 1 AS bookings + , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings + , booking_value + , booking_value AS max_booking_value + , booking_value AS min_booking_value + , guest_id AS bookers + , booking_value AS average_booking_value + , booking_value AS booking_payments + , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings + , row_number() OVER (ORDER BY 1, CASE WHEN is_instant THEN 1 ELSE 0 END, booking_value, guest_id, CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END, is_instant, ds, DATE_TRUNC('week', ds), DATE_TRUNC('month', ds), DATE_TRUNC('quarter', ds), DATE_TRUNC('year', ds), ds_partitioned, DATE_TRUNC('week', ds_partitioned), DATE_TRUNC('month', ds_partitioned), DATE_TRUNC('quarter', ds_partitioned), DATE_TRUNC('year', ds_partitioned), booking_paid_at, DATE_TRUNC('week', booking_paid_at), DATE_TRUNC('month', booking_paid_at), DATE_TRUNC('quarter', booking_paid_at), DATE_TRUNC('year', booking_paid_at), listing_id, host_id) AS mf_row_number +FROM ( + -- User Defined SQL Query + SELECT * FROM ***************************.fct_bookings +) bookings_source_src_10001 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml new file mode 100644 index 0000000000..2337deec0d --- /dev/null +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml @@ -0,0 +1,406 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier__plan0.xml index 402b988cf0..86cf3e8089 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier__plan0.xml @@ -27,10 +27,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_join__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_join__plan0.xml index ff20347ecc..9c15ccca15 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_join__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_join__plan0.xml @@ -35,10 +35,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_order_by__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_order_by__plan0.xml index 7956b51d64..13ec116169 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_order_by__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_composite_identifier_with_order_by__plan0.xml @@ -52,10 +52,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node__plan0.xml index 7ed9c2dc81..d0dc520287 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node__plan0.xml @@ -27,10 +27,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_multiple_data_sources__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_multiple_data_sources__plan0.xml index 7cc0d5ab60..9d54117c57 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_multiple_data_sources__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_multiple_data_sources__plan0.xml @@ -77,10 +77,10 @@ - - - - + + + + @@ -896,10 +896,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_single_data_source__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_single_data_source__plan0.xml index bc560731e5..07db9cb500 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_single_data_source__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_ratio_from_single_data_source__plan0.xml @@ -27,14 +27,14 @@ - - - - - - - - + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_simple_expr__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_simple_expr__plan0.xml index 48b5ca8e0d..3d8e37ac19 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_simple_expr__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_compute_metrics_node_simple_expr__plan0.xml @@ -27,10 +27,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric__plan0.xml index f56abed0fc..61ca539dfc 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_grain_to_date__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_grain_to_date__plan0.xml index e38d77eacf..4f0a3a64ca 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_grain_to_date__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_grain_to_date__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_ds__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_ds__plan0.xml index 5c8337e0d3..524b3e9746 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_ds__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_ds__plan0.xml @@ -11,10 +11,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window__plan0.xml index 09d1a4b721..8f23dad803 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window_with_time_constraint__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window_with_time_constraint__plan0.xml index fb2fbeb53f..3a0eabc5d9 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window_with_time_constraint__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_no_window_with_time_constraint__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_with_time_constraint__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_with_time_constraint__plan0.xml index ada67eb503..c82fcc4aaa 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_with_time_constraint__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_cumulative_metric_with_time_constraint__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_derived_metric__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_derived_metric__plan0.xml index 188df2fb57..4bea59c302 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_derived_metric__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_derived_metric__plan0.xml @@ -15,10 +15,10 @@ - - - - + + + + @@ -55,10 +55,10 @@ - - - - + + + + @@ -519,10 +519,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml index 8386b2eefd..39fb42a6bb 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_distinct_values__plan0.xml @@ -43,10 +43,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_filter_with_where_constraint_on_join_dim__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_filter_with_where_constraint_on_join_dim__plan0.xml index c0601b8d84..ee211f46c6 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_filter_with_where_constraint_on_join_dim__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_filter_with_where_constraint_on_join_dim__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_join_to_scd_dimension__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_join_to_scd_dimension__plan0.xml index c1050efab2..c61673873d 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_join_to_scd_dimension__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_join_to_scd_dimension__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_limit_rows__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_limit_rows__plan0.xml index c9d0e501fa..ca80767100 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_limit_rows__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_limit_rows__plan0.xml @@ -32,10 +32,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_local_dimension_using_local_identifier__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_local_dimension_using_local_identifier__plan0.xml index f20ff9c816..feeb1adfcb 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_local_dimension_using_local_identifier__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_local_dimension_using_local_identifier__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_aggregation_node__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_aggregation_node__plan0.xml index 113a53fc1a..eed275d38c 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_aggregation_node__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_aggregation_node__plan0.xml @@ -2,22 +2,22 @@ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint__plan0.xml index 125a88edb4..33b2a07792 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint__plan0.xml @@ -69,14 +69,14 @@ - - - - - - - - + + + + + + + + @@ -939,10 +939,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_reused_measure__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_reused_measure__plan0.xml index 530b1b3863..55cc2c4874 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_reused_measure__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_reused_measure__plan0.xml @@ -61,10 +61,10 @@ - - - - + + + + @@ -549,10 +549,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_single_expr_and_alias__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_single_expr_and_alias__plan0.xml index 6b55ed0962..23783414ab 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_single_expr_and_alias__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_measure_constraint_with_single_expr_and_alias__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_with_measures_from_multiple_sources_no_dimensions__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_with_measures_from_multiple_sources_no_dimensions__plan0.xml index 70510c4e3c..7ab9db8261 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_with_measures_from_multiple_sources_no_dimensions__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_metric_with_measures_from_multiple_sources_no_dimensions__plan0.xml @@ -45,10 +45,10 @@ - - - - + + + + @@ -483,10 +483,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_through_scd_dimension__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_through_scd_dimension__plan0.xml index f80313e649..98573544b7 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_through_scd_dimension__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_through_scd_dimension__plan0.xml @@ -27,10 +27,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_to_scd_dimension__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_to_scd_dimension__plan0.xml index 509456d435..8a44947f5f 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_to_scd_dimension__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multi_hop_to_scd_dimension__plan0.xml @@ -27,10 +27,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multihop_node__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multihop_node__plan0.xml index 757bcac6fd..011532621e 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multihop_node__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multihop_node__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multiple_metrics_no_dimensions__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multiple_metrics_no_dimensions__plan0.xml index 25b6863929..f4399656a0 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multiple_metrics_no_dimensions__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_multiple_metrics_no_dimensions__plan0.xml @@ -30,10 +30,10 @@ - - - - + + + + @@ -693,10 +693,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_nested_derived_metric__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_nested_derived_metric__plan0.xml index fd6fea4b7b..131cb46293 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_nested_derived_metric__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_nested_derived_metric__plan0.xml @@ -15,10 +15,10 @@ - - - - + + + + @@ -61,10 +61,10 @@ - - - - + + + + @@ -101,10 +101,10 @@ - - - - + + + + @@ -565,10 +565,10 @@ - - - - + + + + @@ -1031,10 +1031,10 @@ - - - - + + + + @@ -1495,10 +1495,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_order_by_node__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_order_by_node__plan0.xml index 20008d0b7a..69bcc0baa3 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_order_by_node__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_order_by_node__plan0.xml @@ -52,10 +52,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_partitioned_join__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_partitioned_join__plan0.xml index 0fce36ff56..0fdfc2d73a 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_partitioned_join__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_partitioned_join__plan0.xml @@ -19,10 +19,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node__plan0.xml index f92ddd8159..57765accb3 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node__plan0.xml @@ -103,10 +103,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_grouping__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_grouping__plan0.xml index 9b66d6a477..1ef9811e05 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_grouping__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_grouping__plan0.xml @@ -107,10 +107,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_queried_group_by__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_queried_group_by__plan0.xml index 30fadde42a..95a0705568 100644 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_queried_group_by__plan0.xml +++ b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_semi_additive_join_node_with_queried_group_by__plan0.xml @@ -103,10 +103,10 @@ - - - - + + + + diff --git a/metricflow/test/snapshots/test_metric_time_dimension_to_sql.py/SqlQueryPlan/test_simple_query_with_metric_time_dimension__plan0.xml b/metricflow/test/snapshots/test_metric_time_dimension_to_sql.py/SqlQueryPlan/test_simple_query_with_metric_time_dimension__plan0.xml index 4124c567aa..a09a871f53 100644 --- a/metricflow/test/snapshots/test_metric_time_dimension_to_sql.py/SqlQueryPlan/test_simple_query_with_metric_time_dimension__plan0.xml +++ b/metricflow/test/snapshots/test_metric_time_dimension_to_sql.py/SqlQueryPlan/test_simple_query_with_metric_time_dimension__plan0.xml @@ -2,10 +2,10 @@ - - - - + + + + @@ -42,10 +42,10 @@ - - - - + + + + @@ -506,10 +506,10 @@ - - - - + + + + From 27db0e9c29a555730271cbf4a88350f6e0472c95 Mon Sep 17 00:00:00 2001 From: William Deng Date: Tue, 29 Nov 2022 13:23:05 -0500 Subject: [PATCH 7/8] address comments --- metricflow/instances.py | 20 ++++---- .../model/validations/unique_valid_name.py | 3 ++ metricflow/plan_conversion/column_resolver.py | 6 +-- metricflow/plan_conversion/dataflow_to_sql.py | 21 +++++---- .../plan_conversion/instance_converters.py | 46 ++++++++++--------- .../plan_conversion/select_column_gen.py | 8 ++-- metricflow/specs.py | 17 +++---- metricflow/sql/sql_exprs.py | 17 ++++--- metricflow/test/sql/test_sql_expr_render.py | 6 +-- 9 files changed, 77 insertions(+), 67 deletions(-) diff --git a/metricflow/instances.py b/metricflow/instances.py index 0fd26faafb..6fa3bf79ed 100644 --- a/metricflow/instances.py +++ b/metricflow/instances.py @@ -11,7 +11,7 @@ from metricflow.dataclass_serialization import SerializableDataclass from metricflow.references import ElementReference from metricflow.specs import ( - ExtraSpec, + MetadataSpec, MeasureSpec, DimensionSpec, IdentifierSpec, @@ -173,9 +173,9 @@ class MetricInstance(MdoInstance[MetricSpec], SerializableDataclass): # noqa: D @dataclass(frozen=True) -class ExtraInstance(MdoInstance[ExtraSpec], SerializableDataclass): # noqa: D +class MetadataInstance(MdoInstance[MetadataSpec], SerializableDataclass): # noqa: D associated_columns: Tuple[ColumnAssociation, ...] - spec: ExtraSpec + spec: MetadataSpec # Output type of transform function @@ -207,7 +207,7 @@ class InstanceSet(SerializableDataclass): time_dimension_instances: Tuple[TimeDimensionInstance, ...] = () identifier_instances: Tuple[IdentifierInstance, ...] = () metric_instances: Tuple[MetricInstance, ...] = () - extra_instances: Tuple[ExtraInstance, ...] = () + metadata_instances: Tuple[MetadataInstance, ...] = () def transform(self, transform_function: InstanceSetTransform[TransformOutputT]) -> TransformOutputT: # noqa: D return transform_function.transform(self) @@ -223,7 +223,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: time_dimension_instances: List[TimeDimensionInstance] = [] identifier_instances: List[IdentifierInstance] = [] metric_instances: List[MetricInstance] = [] - extra_instances: List[ExtraInstance] = [] + metadata_instances: List[MetadataInstance] = [] for instance_set in instance_sets: for measure_instance in instance_set.measure_instances: @@ -241,9 +241,9 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: for metric_instance in instance_set.metric_instances: if metric_instance.spec not in {x.spec for x in metric_instances}: metric_instances.append(metric_instance) - for extra_instance in instance_set.extra_instances: - if extra_instance.spec not in {x.spec for x in extra_instances}: - extra_instances.append(extra_instance) + for metadata_instance in instance_set.metadata_instances: + if metadata_instance.spec not in {x.spec for x in metadata_instances}: + metadata_instances.append(metadata_instance) return InstanceSet( measure_instances=tuple(measure_instances), @@ -251,7 +251,7 @@ def merge(instance_sets: List[InstanceSet]) -> InstanceSet: time_dimension_instances=tuple(time_dimension_instances), identifier_instances=tuple(identifier_instances), metric_instances=tuple(metric_instances), - extra_instances=tuple(extra_instances), + metadata_instances=tuple(metadata_instances), ) @property @@ -262,5 +262,5 @@ def spec_set(self) -> InstanceSpecSet: # noqa: D time_dimension_specs=tuple(x.spec for x in self.time_dimension_instances), identifier_specs=tuple(x.spec for x in self.identifier_instances), metric_specs=tuple(x.spec for x in self.metric_instances), - extra_specs=tuple(x.spec for x in self.extra_instances), + metadata_specs=tuple(x.spec for x in self.metadata_instances), ) diff --git a/metricflow/model/validations/unique_valid_name.py b/metricflow/model/validations/unique_valid_name.py index 518a67fb0c..1b93ce9011 100644 --- a/metricflow/model/validations/unique_valid_name.py +++ b/metricflow/model/validations/unique_valid_name.py @@ -35,6 +35,7 @@ class MetricFlowReservedKeywords(enum.Enum): """Enumeration of reserved keywords with helper for accessing the reason they are reserved""" METRIC_TIME = "metric_time" + ROW_NUMBER = "mf_row_number" @staticmethod def get_reserved_reason(keyword: MetricFlowReservedKeywords) -> str: @@ -44,6 +45,8 @@ def get_reserved_reason(keyword: MetricFlowReservedKeywords) -> str: "Used as the query input for creating time series metrics from measures with " "different time dimension names." ) + elif keyword is MetricFlowReservedKeywords.ROW_NUMBER: + return "Used by the framework to denote as a column name for row number column." else: assert_values_exhausted(keyword) diff --git a/metricflow/plan_conversion/column_resolver.py b/metricflow/plan_conversion/column_resolver.py index 03348adf22..8f56afcdff 100644 --- a/metricflow/plan_conversion/column_resolver.py +++ b/metricflow/plan_conversion/column_resolver.py @@ -9,7 +9,7 @@ ) from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName from metricflow.specs import ( - ExtraSpec, + MetadataSpec, MetricSpec, MeasureSpec, DimensionSpec, @@ -111,8 +111,8 @@ def resolve_identifier_spec(self, identifier_spec: IdentifierSpec) -> Tuple[Colu ), ) - def resolve_extra_spec(self, extra_spec: ExtraSpec) -> ColumnAssociation: # noqa: D + def resolve_metadata_spec(self, metadata_spec: MetadataSpec) -> ColumnAssociation: # noqa: D return ColumnAssociation( - column_name=extra_spec.element_name, + column_name=metadata_spec.element_name, single_column_correlation_key=SingleColumnCorrelationKey(), ) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index a2b0c843ea..4aabf99472 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -32,7 +32,7 @@ ) from metricflow.dataset.dataset import DataSet from metricflow.instances import ( - ExtraInstance, + MetadataInstance, InstanceSet, MetricInstance, MetricModelReference, @@ -40,6 +40,7 @@ ) from metricflow.model.objects.metric import MetricType from metricflow.model.semantic_model import SemanticModel +from metricflow.model.validations.unique_valid_name import MetricFlowReservedKeywords from metricflow.object_utils import assert_values_exhausted from metricflow.plan_conversion.instance_converters import ( AliasAggregatedMeasures, @@ -74,7 +75,7 @@ from metricflow.protocols.sql_client import SqlEngineAttributes, SqlEngine from metricflow.specs import ( ColumnAssociationResolver, - ExtraSpec, + MetadataSpec, MetricSpec, TimeDimensionSpec, MeasureSpec, @@ -96,7 +97,7 @@ SqlAggregateFunctionExpression, SqlWindowFunction, SqlWindowFunctionExpression, - SqlWindowOrderByArg, + SqlWindowOrderByArgument, ) from metricflow.sql.sql_plan import ( SqlQueryPlan, @@ -1277,8 +1278,8 @@ def visit_append_row_number_column_node(self, node: AppendRowNumberColumnNode) - input_data_set: SqlDataSet = node.parent_node.accept(self) input_data_set_alias = self._next_unique_table_alias() - row_number_spec = ExtraSpec.from_name("mf_row_number") - row_number_spec_column_name = self._column_association_resolver.resolve_extra_spec(row_number_spec) + row_number_spec = MetadataSpec.from_name(MetricFlowReservedKeywords.ROW_NUMBER.value) + row_number_spec_column_name = self._column_association_resolver.resolve_metadata_spec(row_number_spec) input_sql_column_references = input_data_set.instance_set.transform( CreateSqlColumnReferencesForInstances(input_data_set_alias, self._column_association_resolver) @@ -1287,15 +1288,15 @@ def visit_append_row_number_column_node(self, node: AppendRowNumberColumnNode) - row_number_select_column = SqlSelectColumn( expr=SqlWindowFunctionExpression( sql_function=SqlWindowFunction.ROW_NUMBER, - order_by_args=[SqlWindowOrderByArg(expr=x) for x in input_sql_column_references], + order_by_args=[SqlWindowOrderByArgument(expr=x) for x in input_sql_column_references], ), column_alias=row_number_spec_column_name.column_name, ) # Build output instance set - output_extra_instances = list(input_data_set.instance_set.extra_instances) - output_extra_instances.append( - ExtraInstance( + output_metadata_instances = list(input_data_set.instance_set.metadata_instances) + output_metadata_instances.append( + MetadataInstance( associated_columns=(row_number_spec_column_name,), spec=row_number_spec, ) @@ -1306,7 +1307,7 @@ def visit_append_row_number_column_node(self, node: AppendRowNumberColumnNode) - time_dimension_instances=input_data_set.instance_set.time_dimension_instances, identifier_instances=input_data_set.instance_set.identifier_instances, metric_instances=input_data_set.instance_set.metric_instances, - extra_instances=tuple(output_extra_instances), + metadata_instances=tuple(output_metadata_instances), ) output_instance_set = ChangeAssociatedColumns(self._column_association_resolver).transform(output_instance_set) diff --git a/metricflow/plan_conversion/instance_converters.py b/metricflow/plan_conversion/instance_converters.py index 9df80d2aa6..c023d79f65 100644 --- a/metricflow/plan_conversion/instance_converters.py +++ b/metricflow/plan_conversion/instance_converters.py @@ -15,7 +15,7 @@ MdoInstance, DimensionInstance, IdentifierInstance, - ExtraInstance, + MetadataInstance, MetricInstance, MeasureInstance, InstanceSet, @@ -88,8 +88,8 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D identifier_cols = list( chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.identifier_instances]) ) - extra_cols = list( - chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.extra_instances]) + metadata_cols = list( + chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.metadata_instances]) ) return SelectColumnSet( metric_columns=metric_cols, @@ -97,7 +97,7 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D dimension_columns=dimension_cols, time_dimension_columns=time_dimension_cols, identifier_columns=identifier_cols, - extra_columns=extra_cols, + metadata_columns=metadata_cols, ) def _make_sql_column_expression( @@ -239,8 +239,8 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D identifier_cols = list( chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.identifier_instances]) ) - extra_cols = list( - chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.extra_instances]) + metadata_cols = list( + chain.from_iterable([self._make_sql_column_expression(x) for x in instance_set.metadata_instances]) ) return SelectColumnSet( metric_columns=metric_cols, @@ -248,7 +248,7 @@ def transform(self, instance_set: InstanceSet) -> SelectColumnSet: # noqa: D dimension_columns=dimension_cols, time_dimension_columns=time_dimension_cols, identifier_columns=identifier_cols, - extra_columns=extra_cols, + metadata_columns=metadata_cols, ) @@ -445,7 +445,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=tuple(time_dimension_instances_with_additional_link), identifier_instances=tuple(identifier_instances_with_additional_link), metric_instances=(), - extra_instances=(), + metadata_instances=(), ) @@ -487,7 +487,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=filtered_time_dimension_instances, identifier_instances=filtered_identifier_instances, metric_instances=instance_set.metric_instances, - extra_instances=instance_set.extra_instances, + metadata_instances=instance_set.metadata_instances, ) return output @@ -546,7 +546,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D ), identifier_instances=tuple(x for x in instance_set.identifier_instances if self._should_pass(x.spec)), metric_instances=tuple(x for x in instance_set.metric_instances if self._should_pass(x.spec)), - extra_instances=tuple(x for x in instance_set.extra_instances if self._should_pass(x.spec)), + metadata_instances=tuple(x for x in instance_set.metadata_instances if self._should_pass(x.spec)), ) return output @@ -585,7 +585,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances, - extra_instances=instance_set.extra_instances, + metadata_instances=instance_set.metadata_instances, ) @@ -639,7 +639,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances, - extra_instances=instance_set.extra_instances, + metadata_instances=instance_set.metadata_instances, ) @@ -656,7 +656,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances + tuple(self._metric_instances), - extra_instances=instance_set.extra_instances, + metadata_instances=instance_set.metadata_instances, ) @@ -670,7 +670,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=instance_set.metric_instances, - extra_instances=instance_set.extra_instances, + metadata_instances=instance_set.metadata_instances, ) @@ -684,7 +684,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=instance_set.time_dimension_instances, identifier_instances=instance_set.identifier_instances, metric_instances=(), - extra_instances=instance_set.extra_instances, + metadata_instances=instance_set.metadata_instances, ) @@ -803,14 +803,16 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D ) ) - output_extra_instances = [] - for input_extra_instance in instance_set.extra_instances: - output_extra_instances.append( - ExtraInstance( + output_metadata_instances = [] + for input_metadata_instance in instance_set.metadata_instances: + output_metadata_instances.append( + MetadataInstance( associated_columns=( - self._column_association_resolver.resolve_extra_spec(extra_spec=input_extra_instance.spec), + self._column_association_resolver.resolve_metadata_spec( + metadata_spec=input_metadata_instance.spec + ), ), - spec=input_extra_instance.spec, + spec=input_metadata_instance.spec, ) ) @@ -820,7 +822,7 @@ def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D time_dimension_instances=tuple(output_time_dimension_instances), identifier_instances=tuple(output_identifier_instances), metric_instances=tuple(output_metric_instances), - extra_instances=tuple(output_extra_instances), + metadata_instances=tuple(output_metadata_instances), ) diff --git a/metricflow/plan_conversion/select_column_gen.py b/metricflow/plan_conversion/select_column_gen.py index 7ffb31e95f..1e313ed2ef 100644 --- a/metricflow/plan_conversion/select_column_gen.py +++ b/metricflow/plan_conversion/select_column_gen.py @@ -18,7 +18,7 @@ class SelectColumnSet: dimension_columns: List[SqlSelectColumn] = field(default_factory=list) time_dimension_columns: List[SqlSelectColumn] = field(default_factory=list) identifier_columns: List[SqlSelectColumn] = field(default_factory=list) - extra_columns: List[SqlSelectColumn] = field(default_factory=list) + metadata_columns: List[SqlSelectColumn] = field(default_factory=list) def merge(self, other_set: SelectColumnSet) -> SelectColumnSet: """Combine the select columns by type.""" @@ -28,7 +28,7 @@ def merge(self, other_set: SelectColumnSet) -> SelectColumnSet: dimension_columns=self.dimension_columns + other_set.dimension_columns, time_dimension_columns=self.time_dimension_columns + other_set.time_dimension_columns, identifier_columns=self.identifier_columns + other_set.identifier_columns, - extra_columns=self.extra_columns + other_set.extra_columns, + metadata_columns=self.metadata_columns + other_set.metadata_columns, ) def as_tuple(self) -> Tuple[SqlSelectColumn, ...]: @@ -40,7 +40,7 @@ def as_tuple(self) -> Tuple[SqlSelectColumn, ...]: + self.dimension_columns + self.metric_columns + self.measure_columns - + self.extra_columns + + self.metadata_columns ) def without_measure_columns(self) -> SelectColumnSet: @@ -50,5 +50,5 @@ def without_measure_columns(self) -> SelectColumnSet: dimension_columns=self.dimension_columns, time_dimension_columns=self.time_dimension_columns, identifier_columns=self.identifier_columns, - extra_columns=self.extra_columns, + metadata_columns=self.metadata_columns, ) diff --git a/metricflow/specs.py b/metricflow/specs.py index 4a9832831a..8701cb244f 100644 --- a/metricflow/specs.py +++ b/metricflow/specs.py @@ -72,7 +72,7 @@ def resolve_identifier_spec(self, identifier_spec: IdentifierSpec) -> Tuple[Colu pass @abstractmethod - def resolve_extra_spec(self, extra_spec: ExtraSpec) -> ColumnAssociation: # noqa: D + def resolve_metadata_spec(self, metadata_spec: MetadataSpec) -> ColumnAssociation: # noqa: D pass @@ -112,21 +112,21 @@ def qualified_name(self) -> str: @dataclass(frozen=True) -class ExtraSpec(InstanceSpec): +class MetadataSpec(InstanceSpec): """A specification for a specification that is built during the dataflow plan and not defined in config.""" element_name: str def column_associations(self, resolver: ColumnAssociationResolver) -> Tuple[ColumnAssociation, ...]: # noqa: D - return (resolver.resolve_extra_spec(self),) + return (resolver.resolve_metadata_spec(self),) @property def qualified_name(self) -> str: # noqa: D return self.element_name @staticmethod - def from_name(name: str) -> ExtraSpec: # noqa: D - return ExtraSpec(element_name=name) + def from_name(name: str) -> MetadataSpec: # noqa: D + return MetadataSpec(element_name=name) @dataclass(frozen=True) @@ -598,7 +598,7 @@ class InstanceSpecSet(SerializableDataclass): dimension_specs: Tuple[DimensionSpec, ...] = () identifier_specs: Tuple[IdentifierSpec, ...] = () time_dimension_specs: Tuple[TimeDimensionSpec, ...] = () - extra_specs: Tuple[ExtraSpec, ...] = () + metadata_specs: Tuple[MetadataSpec, ...] = () def merge(self, others: Sequence[InstanceSpecSet]) -> InstanceSpecSet: """Merge all sets into one set, without de-duplication.""" @@ -611,7 +611,8 @@ def merge(self, others: Sequence[InstanceSpecSet]) -> InstanceSpecSet: + tuple(itertools.chain.from_iterable([x.identifier_specs for x in others])), time_dimension_specs=self.time_dimension_specs + tuple(itertools.chain.from_iterable([x.time_dimension_specs for x in others])), - extra_specs=self.extra_specs + tuple(itertools.chain.from_iterable([x.extra_specs for x in others])), + metadata_specs=self.metadata_specs + + tuple(itertools.chain.from_iterable([x.metadata_specs for x in others])), ) @property @@ -628,7 +629,7 @@ def all_specs(self) -> Sequence[InstanceSpec]: # noqa: D self.time_dimension_specs, self.identifier_specs, self.metric_specs, - self.extra_specs, + self.metadata_specs, ) ) diff --git a/metricflow/sql/sql_exprs.py b/metricflow/sql/sql_exprs.py index 7e8e48c742..df88d60f15 100644 --- a/metricflow/sql/sql_exprs.py +++ b/metricflow/sql/sql_exprs.py @@ -792,7 +792,7 @@ class SqlWindowFunction(Enum): @dataclass(frozen=True) -class SqlWindowOrderByArg: +class SqlWindowOrderByArgument: """In window functions, the ORDER BY clause can accept an expr, ordering, null ranking.""" expr: SqlExpressionNode @@ -818,7 +818,7 @@ def __init__( sql_function: SqlWindowFunction, sql_function_args: Optional[List[SqlExpressionNode]] = None, partition_by_args: Optional[List[SqlExpressionNode]] = None, - order_by_args: Optional[List[SqlWindowOrderByArg]] = None, + order_by_args: Optional[List[SqlWindowOrderByArgument]] = None, ) -> None: """Constructor. @@ -881,7 +881,7 @@ def partition_by_args(self) -> List[SqlExpressionNode]: # noqa: D return self._partition_by_args or [] @property - def order_by_args(self) -> List[SqlWindowOrderByArg]: # noqa: D + def order_by_args(self) -> List[SqlWindowOrderByArgument]: # noqa: D return self._order_by_args or [] def __repr__(self) -> str: # noqa: D @@ -901,7 +901,7 @@ def rewrite( # noqa: D x.rewrite(column_replacements, should_render_table_alias) for x in self.partition_by_args ], order_by_args=[ - SqlWindowOrderByArg( + SqlWindowOrderByArgument( expr=x.expr.rewrite(column_replacements, should_render_table_alias), descending=x.descending, nulls_last=x.nulls_last, @@ -919,8 +919,11 @@ def lineage(self) -> SqlExpressionTreeLineage: # noqa: D def matches(self, other: SqlExpressionNode) -> bool: # noqa: D if not isinstance(other, SqlWindowFunctionExpression): return False - order_by_matches = all(x == y for x, y in itertools.zip_longest(self.order_by_args, other.order_by_args)) - return self.sql_function == other.sql_function and order_by_matches and self._parents_match(other) + return ( + self.sql_function == other.sql_function + and self.order_by_args == other.order_by_args + and self._parents_match(other) + ) class SqlNullExpression(SqlExpressionNode): @@ -1247,7 +1250,7 @@ class SqlRatioComputationExpression(SqlExpressionNode): """Node for expressing Ratio metrics to allow for appropriate casting to float/double in each engine In future we might wish to break this up into a set of nodes, e.g., SqlCastExpression and SqlMathExpression - or even add CAST to SqlAggregateFunctionExpression. However, at this time the only mathematical operation we encode + or even add CAST to SqlFunctionExpression. However, at this time the only mathematical operation we encode is division, and we only use that for ratios. Similarly, the only times we do typecasting are when we are coercing timestamps (already handled) or computing ratio metrics. """ diff --git a/metricflow/test/sql/test_sql_expr_render.py b/metricflow/test/sql/test_sql_expr_render.py index 294627868f..a2f7a6cc52 100644 --- a/metricflow/test/sql/test_sql_expr_render.py +++ b/metricflow/test/sql/test_sql_expr_render.py @@ -24,7 +24,7 @@ SqlBetweenExpression, SqlWindowFunctionExpression, SqlWindowFunction, - SqlWindowOrderByArg, + SqlWindowOrderByArgument, ) from metricflow.time.time_granularity import TimeGranularity @@ -251,12 +251,12 @@ def test_window_function_expr(default_expr_renderer: DefaultSqlExpressionRendere SqlColumnReferenceExpression(SqlColumnReference("b", "col1")), ], order_by_args=[ - SqlWindowOrderByArg( + SqlWindowOrderByArgument( expr=SqlColumnReferenceExpression(SqlColumnReference("a", "col0")), descending=True, nulls_last=False, ), - SqlWindowOrderByArg( + SqlWindowOrderByArgument( expr=SqlColumnReferenceExpression(SqlColumnReference("b", "col0")), descending=False, nulls_last=True, From 1b98d2b4a04efbe6414f93e7c75d588f15d83c33 Mon Sep 17 00:00:00 2001 From: William Deng Date: Wed, 30 Nov 2022 15:23:29 -0500 Subject: [PATCH 8/8] revert AppendRowNumberColumnNode --- metricflow/dag/id_generation.py | 1 - metricflow/dataflow/builder/costing.py | 6 - metricflow/dataflow/dataflow_plan.py | 44 -- .../model/validations/unique_valid_name.py | 3 - metricflow/plan_conversion/dataflow_to_sql.py | 63 --- .../test_dataflow_to_sql_plan.py | 20 - ...t_append_row_number_column_node__plan0.sql | 107 ----- ...ow_number_column_node__plan0_optimized.sql | 56 --- ...t_append_row_number_column_node__plan0.xml | 406 ------------------ 9 files changed, 706 deletions(-) delete mode 100644 metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql delete mode 100644 metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql delete mode 100644 metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml diff --git a/metricflow/dag/id_generation.py b/metricflow/dag/id_generation.py index 8a91f895ef..349771b247 100644 --- a/metricflow/dag/id_generation.py +++ b/metricflow/dag/id_generation.py @@ -21,7 +21,6 @@ DATAFLOW_NODE_CONSTRAIN_TIME_RANGE_ID_PREFIX = "ctr" DATAFLOW_NODE_SET_MEASURE_AGGREGATION_TIME = "sma" DATAFLOW_NODE_SEMI_ADDITIVE_JOIN_ID_PREFIX = "saj" -DATAFLOW_NODE_APPEND_ROW_NUMBER_COLUMN_PREFIX = "rnum" SQL_EXPR_COLUMN_REFERENCE_ID_PREFIX = "cr" SQL_EXPR_COMPARISON_ID_PREFIX = "cmp" diff --git a/metricflow/dataflow/builder/costing.py b/metricflow/dataflow/builder/costing.py index 19138f8b1a..bd8c2d560f 100644 --- a/metricflow/dataflow/builder/costing.py +++ b/metricflow/dataflow/builder/costing.py @@ -32,7 +32,6 @@ WriteToResultTableNode, SemiAdditiveJoinNode, MetricTimeDimensionTransformNode, - AppendRowNumberColumnNode, ) @@ -162,8 +161,3 @@ def visit_semi_additive_join_node(self, node: SemiAdditiveJoinNode[SourceDataSet # Add number of joins to the cost. node_cost = DefaultCost(num_joins=1) return DefaultCost.sum(parent_costs + [node_cost]) - - def visit_append_row_number_column_node( # noqa: D - self, node: AppendRowNumberColumnNode[SourceDataSetT] - ) -> DefaultCost: - return DefaultCost.sum([x.accept(self) for x in node.parent_nodes]) diff --git a/metricflow/dataflow/dataflow_plan.py b/metricflow/dataflow/dataflow_plan.py index 260145d584..117987c53f 100644 --- a/metricflow/dataflow/dataflow_plan.py +++ b/metricflow/dataflow/dataflow_plan.py @@ -25,7 +25,6 @@ DATAFLOW_NODE_COMBINE_METRICS_ID_PREFIX, DATAFLOW_NODE_CONSTRAIN_TIME_RANGE_ID_PREFIX, DATAFLOW_NODE_SET_MEASURE_AGGREGATION_TIME, - DATAFLOW_NODE_APPEND_ROW_NUMBER_COLUMN_PREFIX, ) from metricflow.dag.mf_dag import DagNode, DisplayedProperty, MetricFlowDag, NodeId from metricflow.dataflow.builder.partitions import ( @@ -161,12 +160,6 @@ def visit_metric_time_dimension_transform_node( # noqa: D ) -> VisitorOutputT: pass - @abstractmethod - def visit_append_row_number_column_node( # noqa: D - self, node: AppendRowNumberColumnNode[SourceDataSetT] - ) -> VisitorOutputT: - pass - class BaseOutput(Generic[SourceDataSetT], DataflowPlanNode[SourceDataSetT], ABC): """A node that outputs data in a "base" format. @@ -984,43 +977,6 @@ def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D ] -class AppendRowNumberColumnNode(Generic[SourceDataSetT], BaseOutput[SourceDataSetT]): - """A node transforms the input data set so that it contains the row number. - - The metric time dimension is used later to aggregate all measures in the data set. - - Input: a data set - - Output: a data set with the addition of an primary identifier that delegates the row number. - """ - - def __init__( # noqa: D - self, - parent_node: BaseOutput[SourceDataSetT], - ) -> None: - self._parent_node = parent_node - super().__init__(node_id=self.create_unique_id(), parent_nodes=[parent_node]) - - @classmethod - def id_prefix(cls) -> str: # noqa: D - return DATAFLOW_NODE_APPEND_ROW_NUMBER_COLUMN_PREFIX - - def accept(self, visitor: DataflowPlanNodeVisitor[SourceDataSetT, VisitorOutputT]) -> VisitorOutputT: # noqa: D - return visitor.visit_append_row_number_column_node(self) - - @property - def description(self) -> str: # noqa: D - return "Append row number column" - - @property - def displayed_properties(self) -> List[DisplayedProperty]: # noqa: D - return super().displayed_properties - - @property - def parent_node(self) -> BaseOutput[SourceDataSetT]: # noqa: D - return self._parent_node - - class DataflowPlan(Generic[SourceDataSetT], MetricFlowDag[SinkOutput[SourceDataSetT]]): """Describes the flow of metric data as it goes from source nodes to sink nodes in the graph.""" diff --git a/metricflow/model/validations/unique_valid_name.py b/metricflow/model/validations/unique_valid_name.py index 1b93ce9011..518a67fb0c 100644 --- a/metricflow/model/validations/unique_valid_name.py +++ b/metricflow/model/validations/unique_valid_name.py @@ -35,7 +35,6 @@ class MetricFlowReservedKeywords(enum.Enum): """Enumeration of reserved keywords with helper for accessing the reason they are reserved""" METRIC_TIME = "metric_time" - ROW_NUMBER = "mf_row_number" @staticmethod def get_reserved_reason(keyword: MetricFlowReservedKeywords) -> str: @@ -45,8 +44,6 @@ def get_reserved_reason(keyword: MetricFlowReservedKeywords) -> str: "Used as the query input for creating time series metrics from measures with " "different time dimension names." ) - elif keyword is MetricFlowReservedKeywords.ROW_NUMBER: - return "Used by the framework to denote as a column name for row number column." else: assert_values_exhausted(keyword) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 4aabf99472..fa06d90214 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -28,11 +28,9 @@ JoinOverTimeRangeNode, SemiAdditiveJoinNode, MetricTimeDimensionTransformNode, - AppendRowNumberColumnNode, ) from metricflow.dataset.dataset import DataSet from metricflow.instances import ( - MetadataInstance, InstanceSet, MetricInstance, MetricModelReference, @@ -40,7 +38,6 @@ ) from metricflow.model.objects.metric import MetricType from metricflow.model.semantic_model import SemanticModel -from metricflow.model.validations.unique_valid_name import MetricFlowReservedKeywords from metricflow.object_utils import assert_values_exhausted from metricflow.plan_conversion.instance_converters import ( AliasAggregatedMeasures, @@ -49,7 +46,6 @@ AddMetrics, CreateSelectColumnsForInstances, CreateSelectColumnsWithMeasuresAggregated, - CreateSqlColumnReferencesForInstances, create_select_columns_for_instance_sets, AddLinkToLinkableElements, FilterElements, @@ -75,7 +71,6 @@ from metricflow.protocols.sql_client import SqlEngineAttributes, SqlEngine from metricflow.specs import ( ColumnAssociationResolver, - MetadataSpec, MetricSpec, TimeDimensionSpec, MeasureSpec, @@ -95,9 +90,6 @@ SqlStringLiteralExpression, SqlBetweenExpression, SqlAggregateFunctionExpression, - SqlWindowFunction, - SqlWindowFunctionExpression, - SqlWindowOrderByArgument, ) from metricflow.sql.sql_plan import ( SqlQueryPlan, @@ -1272,58 +1264,3 @@ def visit_semi_additive_join_node(self, node: SemiAdditiveJoinNode) -> SqlDataSe order_bys=(), ), ) - - def visit_append_row_number_column_node(self, node: AppendRowNumberColumnNode) -> SqlDataSet: - """Appends a row number identifier column to the data set.""" - input_data_set: SqlDataSet = node.parent_node.accept(self) - input_data_set_alias = self._next_unique_table_alias() - - row_number_spec = MetadataSpec.from_name(MetricFlowReservedKeywords.ROW_NUMBER.value) - row_number_spec_column_name = self._column_association_resolver.resolve_metadata_spec(row_number_spec) - - input_sql_column_references = input_data_set.instance_set.transform( - CreateSqlColumnReferencesForInstances(input_data_set_alias, self._column_association_resolver) - ) - # Build enumerated column using ROW_NUMBER function - row_number_select_column = SqlSelectColumn( - expr=SqlWindowFunctionExpression( - sql_function=SqlWindowFunction.ROW_NUMBER, - order_by_args=[SqlWindowOrderByArgument(expr=x) for x in input_sql_column_references], - ), - column_alias=row_number_spec_column_name.column_name, - ) - - # Build output instance set - output_metadata_instances = list(input_data_set.instance_set.metadata_instances) - output_metadata_instances.append( - MetadataInstance( - associated_columns=(row_number_spec_column_name,), - spec=row_number_spec, - ) - ) - output_instance_set = InstanceSet( - measure_instances=input_data_set.instance_set.measure_instances, - dimension_instances=input_data_set.instance_set.dimension_instances, - time_dimension_instances=input_data_set.instance_set.time_dimension_instances, - identifier_instances=input_data_set.instance_set.identifier_instances, - metric_instances=input_data_set.instance_set.metric_instances, - metadata_instances=tuple(output_metadata_instances), - ) - output_instance_set = ChangeAssociatedColumns(self._column_association_resolver).transform(output_instance_set) - - sql_select_columns = input_data_set.instance_set.transform( - CreateSelectColumnsForInstances(input_data_set_alias, self._column_association_resolver) - ).as_tuple() + (row_number_select_column,) - return SqlDataSet( - instance_set=output_instance_set, - sql_select_node=SqlSelectStatementNode( - description=node.description, - select_columns=sql_select_columns, - from_source=input_data_set.sql_select_node, - from_source_alias=input_data_set_alias, - joins_descs=(), - group_bys=(), - where=None, - order_bys=(), - ), - ) diff --git a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py index 45e9c05602..b7c687d515 100644 --- a/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py +++ b/metricflow/test/plan_conversion/test_dataflow_to_sql_plan.py @@ -7,7 +7,6 @@ from metricflow.constraints.time_constraint import TimeRangeConstraint from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder from metricflow.dataflow.dataflow_plan import ( - AppendRowNumberColumnNode, DataflowPlan, WriteToResultDataframeNode, FilterElementsNode, @@ -1497,22 +1496,3 @@ def test_metric_with_measures_from_multiple_sources_no_dimensions( # noqa: D sql_client=sql_client, node=dataflow_plan.sink_output_nodes[0].parent_node, ) - - -def test_append_row_number_column_node( - request: FixtureRequest, - mf_test_session_state: MetricFlowTestSessionState, - consistent_id_object_repository: ConsistentIdObjectRepository, - dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter[DataSourceDataSet], - sql_client: SqlClient, -) -> None: - """Tests converting a dataflow plan to a SQL query plan using a AppendRowNumberColumnNode.""" - measure_source_node = consistent_id_object_repository.simple_model_read_nodes["bookings_source"] - append_row_number_column_node = AppendRowNumberColumnNode[DataSourceDataSet](parent_node=measure_source_node) - convert_and_check( - request=request, - mf_test_session_state=mf_test_session_state, - dataflow_to_sql_converter=dataflow_to_sql_converter, - sql_client=sql_client, - node=append_row_number_column_node, - ) diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql deleted file mode 100644 index d85fd18192..0000000000 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0.sql +++ /dev/null @@ -1,107 +0,0 @@ --- Append row number column -SELECT - subq_0.ds - , subq_0.ds__week - , subq_0.ds__month - , subq_0.ds__quarter - , subq_0.ds__year - , subq_0.ds_partitioned - , subq_0.ds_partitioned__week - , subq_0.ds_partitioned__month - , subq_0.ds_partitioned__quarter - , subq_0.ds_partitioned__year - , subq_0.booking_paid_at - , subq_0.booking_paid_at__week - , subq_0.booking_paid_at__month - , subq_0.booking_paid_at__quarter - , subq_0.booking_paid_at__year - , subq_0.create_a_cycle_in_the_join_graph__ds - , subq_0.create_a_cycle_in_the_join_graph__ds__week - , subq_0.create_a_cycle_in_the_join_graph__ds__month - , subq_0.create_a_cycle_in_the_join_graph__ds__quarter - , subq_0.create_a_cycle_in_the_join_graph__ds__year - , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned - , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__week - , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__month - , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__quarter - , subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__year - , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at - , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__week - , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__month - , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__quarter - , subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__year - , subq_0.listing - , subq_0.guest - , subq_0.host - , subq_0.create_a_cycle_in_the_join_graph - , subq_0.create_a_cycle_in_the_join_graph__listing - , subq_0.create_a_cycle_in_the_join_graph__guest - , subq_0.create_a_cycle_in_the_join_graph__host - , subq_0.is_instant - , subq_0.create_a_cycle_in_the_join_graph__is_instant - , subq_0.bookings - , subq_0.instant_bookings - , subq_0.booking_value - , subq_0.max_booking_value - , subq_0.min_booking_value - , subq_0.bookers - , subq_0.average_booking_value - , subq_0.booking_payments - , subq_0.referred_bookings - , row_number() OVER (ORDER BY subq_0.bookings, subq_0.instant_bookings, subq_0.booking_value, subq_0.max_booking_value, subq_0.min_booking_value, subq_0.bookers, subq_0.average_booking_value, subq_0.booking_payments, subq_0.referred_bookings, subq_0.is_instant, subq_0.create_a_cycle_in_the_join_graph__is_instant, subq_0.ds, subq_0.ds__week, subq_0.ds__month, subq_0.ds__quarter, subq_0.ds__year, subq_0.ds_partitioned, subq_0.ds_partitioned__week, subq_0.ds_partitioned__month, subq_0.ds_partitioned__quarter, subq_0.ds_partitioned__year, subq_0.booking_paid_at, subq_0.booking_paid_at__week, subq_0.booking_paid_at__month, subq_0.booking_paid_at__quarter, subq_0.booking_paid_at__year, subq_0.create_a_cycle_in_the_join_graph__ds, subq_0.create_a_cycle_in_the_join_graph__ds__week, subq_0.create_a_cycle_in_the_join_graph__ds__month, subq_0.create_a_cycle_in_the_join_graph__ds__quarter, subq_0.create_a_cycle_in_the_join_graph__ds__year, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__week, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__month, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__quarter, subq_0.create_a_cycle_in_the_join_graph__ds_partitioned__year, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__week, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__month, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__quarter, subq_0.create_a_cycle_in_the_join_graph__booking_paid_at__year, subq_0.listing, subq_0.guest, subq_0.host, subq_0.create_a_cycle_in_the_join_graph, subq_0.create_a_cycle_in_the_join_graph__listing, subq_0.create_a_cycle_in_the_join_graph__guest, subq_0.create_a_cycle_in_the_join_graph__host) AS mf_row_number -FROM ( - -- Read Elements From Data Source 'bookings_source' - SELECT - 1 AS bookings - , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings - , bookings_source_src_10001.booking_value - , bookings_source_src_10001.booking_value AS max_booking_value - , bookings_source_src_10001.booking_value AS min_booking_value - , bookings_source_src_10001.guest_id AS bookers - , bookings_source_src_10001.booking_value AS average_booking_value - , bookings_source_src_10001.booking_value AS booking_payments - , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings - , bookings_source_src_10001.is_instant - , bookings_source_src_10001.ds - , DATE_TRUNC('week', bookings_source_src_10001.ds) AS ds__week - , DATE_TRUNC('month', bookings_source_src_10001.ds) AS ds__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS ds__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds) AS ds__year - , bookings_source_src_10001.ds_partitioned - , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__week - , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS ds_partitioned__year - , bookings_source_src_10001.booking_paid_at - , DATE_TRUNC('week', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__week - , DATE_TRUNC('month', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__month - , DATE_TRUNC('quarter', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__quarter - , DATE_TRUNC('year', bookings_source_src_10001.booking_paid_at) AS booking_paid_at__year - , bookings_source_src_10001.is_instant AS create_a_cycle_in_the_join_graph__is_instant - , bookings_source_src_10001.ds AS create_a_cycle_in_the_join_graph__ds - , DATE_TRUNC('week', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__week - , DATE_TRUNC('month', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds) AS create_a_cycle_in_the_join_graph__ds__year - , bookings_source_src_10001.ds_partitioned AS create_a_cycle_in_the_join_graph__ds_partitioned - , DATE_TRUNC('week', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__week - , DATE_TRUNC('month', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__month - , DATE_TRUNC('quarter', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__quarter - , DATE_TRUNC('year', bookings_source_src_10001.ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__year - , bookings_source_src_10001.booking_paid_at AS create_a_cycle_in_the_join_graph__booking_paid_at - , DATE_TRUNC('week', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__week - , DATE_TRUNC('month', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__month - , DATE_TRUNC('quarter', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__quarter - , DATE_TRUNC('year', bookings_source_src_10001.booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__year - , bookings_source_src_10001.listing_id AS listing - , bookings_source_src_10001.guest_id AS guest - , bookings_source_src_10001.host_id AS host - , bookings_source_src_10001.guest_id AS create_a_cycle_in_the_join_graph - , bookings_source_src_10001.listing_id AS create_a_cycle_in_the_join_graph__listing - , bookings_source_src_10001.guest_id AS create_a_cycle_in_the_join_graph__guest - , bookings_source_src_10001.host_id AS create_a_cycle_in_the_join_graph__host - FROM ( - -- User Defined SQL Query - SELECT * FROM ***************************.fct_bookings - ) bookings_source_src_10001 -) subq_0 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql deleted file mode 100644 index 56b803cbe5..0000000000 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/DuckDbSqlClient/test_append_row_number_column_node__plan0_optimized.sql +++ /dev/null @@ -1,56 +0,0 @@ --- Read Elements From Data Source 'bookings_source' --- Append row number column -SELECT - ds - , DATE_TRUNC('week', ds) AS ds__week - , DATE_TRUNC('month', ds) AS ds__month - , DATE_TRUNC('quarter', ds) AS ds__quarter - , DATE_TRUNC('year', ds) AS ds__year - , ds_partitioned - , DATE_TRUNC('week', ds_partitioned) AS ds_partitioned__week - , DATE_TRUNC('month', ds_partitioned) AS ds_partitioned__month - , DATE_TRUNC('quarter', ds_partitioned) AS ds_partitioned__quarter - , DATE_TRUNC('year', ds_partitioned) AS ds_partitioned__year - , booking_paid_at - , DATE_TRUNC('week', booking_paid_at) AS booking_paid_at__week - , DATE_TRUNC('month', booking_paid_at) AS booking_paid_at__month - , DATE_TRUNC('quarter', booking_paid_at) AS booking_paid_at__quarter - , DATE_TRUNC('year', booking_paid_at) AS booking_paid_at__year - , ds AS create_a_cycle_in_the_join_graph__ds - , DATE_TRUNC('week', ds) AS create_a_cycle_in_the_join_graph__ds__week - , DATE_TRUNC('month', ds) AS create_a_cycle_in_the_join_graph__ds__month - , DATE_TRUNC('quarter', ds) AS create_a_cycle_in_the_join_graph__ds__quarter - , DATE_TRUNC('year', ds) AS create_a_cycle_in_the_join_graph__ds__year - , ds_partitioned AS create_a_cycle_in_the_join_graph__ds_partitioned - , DATE_TRUNC('week', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__week - , DATE_TRUNC('month', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__month - , DATE_TRUNC('quarter', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__quarter - , DATE_TRUNC('year', ds_partitioned) AS create_a_cycle_in_the_join_graph__ds_partitioned__year - , booking_paid_at AS create_a_cycle_in_the_join_graph__booking_paid_at - , DATE_TRUNC('week', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__week - , DATE_TRUNC('month', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__month - , DATE_TRUNC('quarter', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__quarter - , DATE_TRUNC('year', booking_paid_at) AS create_a_cycle_in_the_join_graph__booking_paid_at__year - , listing_id AS listing - , guest_id AS guest - , host_id AS host - , guest_id AS create_a_cycle_in_the_join_graph - , listing_id AS create_a_cycle_in_the_join_graph__listing - , guest_id AS create_a_cycle_in_the_join_graph__guest - , host_id AS create_a_cycle_in_the_join_graph__host - , is_instant - , is_instant AS create_a_cycle_in_the_join_graph__is_instant - , 1 AS bookings - , CASE WHEN is_instant THEN 1 ELSE 0 END AS instant_bookings - , booking_value - , booking_value AS max_booking_value - , booking_value AS min_booking_value - , guest_id AS bookers - , booking_value AS average_booking_value - , booking_value AS booking_payments - , CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END AS referred_bookings - , row_number() OVER (ORDER BY 1, CASE WHEN is_instant THEN 1 ELSE 0 END, booking_value, guest_id, CASE WHEN referrer_id IS NOT NULL THEN 1 ELSE 0 END, is_instant, ds, DATE_TRUNC('week', ds), DATE_TRUNC('month', ds), DATE_TRUNC('quarter', ds), DATE_TRUNC('year', ds), ds_partitioned, DATE_TRUNC('week', ds_partitioned), DATE_TRUNC('month', ds_partitioned), DATE_TRUNC('quarter', ds_partitioned), DATE_TRUNC('year', ds_partitioned), booking_paid_at, DATE_TRUNC('week', booking_paid_at), DATE_TRUNC('month', booking_paid_at), DATE_TRUNC('quarter', booking_paid_at), DATE_TRUNC('year', booking_paid_at), listing_id, host_id) AS mf_row_number -FROM ( - -- User Defined SQL Query - SELECT * FROM ***************************.fct_bookings -) bookings_source_src_10001 diff --git a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml b/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml deleted file mode 100644 index 2337deec0d..0000000000 --- a/metricflow/test/snapshots/test_dataflow_to_sql_plan.py/SqlQueryPlan/test_append_row_number_column_node__plan0.xml +++ /dev/null @@ -1,406 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -