From b53a655806e7eb65407403d0fa88e7bb8ffa4022 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Wed, 18 Dec 2024 22:26:28 -0800 Subject: [PATCH] Don't reduce verbose SQL expressions --- .../metricflow_semantics/sql/sql_exprs.py | 16 ++++++++++++++++ .../sql/optimizer/rewriting_sub_query_reducer.py | 10 +++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/metricflow-semantics/metricflow_semantics/sql/sql_exprs.py b/metricflow-semantics/metricflow_semantics/sql/sql_exprs.py index 2b8d8e667..15e243b5a 100644 --- a/metricflow-semantics/metricflow_semantics/sql/sql_exprs.py +++ b/metricflow-semantics/metricflow_semantics/sql/sql_exprs.py @@ -71,6 +71,14 @@ def as_window_function_expression(self) -> Optional[SqlWindowFunctionExpression] """If this is a window function expression, return self.""" return None + @property + def is_verbose(self) -> bool: + """Denotes if the statement is typically verbose, and therefore can be hard to read when optimized. + + This is helpful in determining if statements will be harder to read when collapsed. + """ + return False + @abstractmethod def rewrite( self, @@ -1197,6 +1205,10 @@ def matches(self, other: SqlExpressionNode) -> bool: # noqa: D102 and self.sql_function_args == other.sql_function_args ) + @property + def is_verbose(self) -> bool: # noqa: D102 + return True + @dataclass(frozen=True, eq=False) class SqlNullExpression(SqlExpressionNode): @@ -1884,6 +1896,10 @@ def matches(self, other: SqlExpressionNode) -> bool: # noqa: D102 return False return self.when_to_then_exprs == other.when_to_then_exprs and self.else_expr == other.else_expr + @property + def is_verbose(self) -> bool: # noqa: D102 + return True + class SqlArithmeticOperator(Enum): """Arithmetic operator used to do math in a SQL expression.""" diff --git a/metricflow/sql/optimizer/rewriting_sub_query_reducer.py b/metricflow/sql/optimizer/rewriting_sub_query_reducer.py index 82efa5dcd..537ed68fb 100644 --- a/metricflow/sql/optimizer/rewriting_sub_query_reducer.py +++ b/metricflow/sql/optimizer/rewriting_sub_query_reducer.py @@ -241,6 +241,10 @@ def _current_node_can_be_reduced(self, node: SqlSelectStatementNode) -> bool: if len(from_source_node_as_select_node.group_bys) > 0 and len(node.group_bys) > 0: return False + # Skip this case for readability. + if any(col.expr.is_verbose for col in from_source_node_as_select_node.select_columns): + return False + # If there is a column in the parent group by that is not used in the current select statement, don't reduce or it # would leave an unselected column in the group by and change the meaning of the query. For example, in the SQL # below, reducing would remove the `is_instant` from the select statement. @@ -497,7 +501,11 @@ def _rewrite_node_with_join(self, node: SqlSelectStatementNode) -> SqlSelectStat join_select_node = join_desc.right_source.as_select_node # Verifying that it's simple makes it easier to reason about the logic. - if not join_select_node or not SqlRewritingSubQueryReducerVisitor._is_simple_source(join_select_node): + if ( + not join_select_node + or not SqlRewritingSubQueryReducerVisitor._is_simple_source(join_select_node) + or any(col.expr.is_verbose for col in join_select_node.select_columns) + ): new_join_descs.append(join_desc) continue