dbt-labs · plypaul · Nov 13, 2024 · Nov 4, 2024 · Nov 6, 2024 · Nov 7, 2024
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py
@@ -5,7 +5,7 @@
 
 from typing_extensions import override
 
-from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat_dict
 
 
 class LazyFormat:
@@ -52,7 +52,7 @@ def _str_value(self) -> str:
             message = self._message()
         else:
             message = self._message
-        return mf_pformat_many(message, self._kwargs, preserve_raw_strings=True)
+        return mf_pformat_dict(message, self._kwargs, preserve_raw_strings=True)
 
     @override
     def __str__(self) -> str:

diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/pretty_print.py b/metricflow-semantics/metricflow_semantics/mf_logging/pretty_print.py
@@ -5,7 +5,7 @@
 from collections.abc import Mapping
 from dataclasses import fields, is_dataclass
 from enum import Enum
-from typing import Any, Dict, List, Optional, Sized, Tuple, Union
+from typing import Any, List, Optional, Sized, Tuple, Union
 
 from dsi_pydantic_shim import BaseModel
 
@@ -428,9 +428,9 @@ def mf_pformat(  # type: ignore
         return str(obj)
 
 
-def mf_pformat_many(  # type: ignore
-    description: str,
-    obj_dict: Dict[str, Any],
+def mf_pformat_dict(  # type: ignore
+    description: Optional[str] = None,
+    obj_dict: Optional[Mapping[str, Any]] = None,
     max_line_length: int = 120,
     indent_prefix: str = "  ",
     include_object_field_names: bool = True,
@@ -444,7 +444,8 @@ def mf_pformat_many(  # type: ignore
     representation of the string. e.g. if value="foo", then "foo" instead of "'foo'". Useful for values that contain
     newlines.
     """
-    lines: List[str] = [description]
+    lines: List[str] = [description] if description is not None else []
+    obj_dict = obj_dict or {}
     for key, value in obj_dict.items():
         if preserve_raw_strings and isinstance(value, str):
             value_str = value
@@ -471,5 +472,9 @@ def mf_pformat_many(  # type: ignore
         else:
             item_block_lines = (f"{key}: {value_str}",)
         item_block = "\n".join(item_block_lines)
-        lines.append(indent(item_block))
+
+        if description is None:
+            lines.append(item_block)
+        else:
+            lines.append(indent(item_block))
     return "\n".join(lines)
diff --git a/...mantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py b/...mantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py
@@ -14,7 +14,7 @@
 
 from metricflow_semantics.mf_logging.formatting import indent
 from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
-from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
 from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
 from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
 from metricflow_semantics.query.group_by_item.candidate_push_down.group_by_item_candidate import GroupByItemCandidateSet
@@ -82,7 +82,7 @@ def __post_init__(self) -> None:  # noqa: D105
         # If there are errors, there shouldn't be any candidate sets.
         assert (not self.issue_set.has_errors and not self.candidate_set.is_empty) or (
             self.issue_set.has_errors and self.candidate_set.is_empty
-        ), mf_pformat_many(
+        ), mf_pformat_dict(
             "candidate_set / issue_set mismatch:", {"candidate_set": self.candidate_set, "issue_set": self.issue_set}
         )
 

diff --git a/metricflow-semantics/metricflow_semantics/query/query_resolver.py b/metricflow-semantics/metricflow_semantics/query/query_resolver.py
@@ -8,7 +8,7 @@
 from dbt_semantic_interfaces.references import MeasureReference, MetricReference, SemanticModelReference
 
 from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
-from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
 from metricflow_semantics.mf_logging.runtime import log_runtime
 from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
 from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
@@ -557,7 +557,7 @@ def _resolve_query(self, resolver_input_for_query: ResolverInputForQuery) -> Met
         if len(models_not_in_manifest) > 0:
             logger.error(
                 LazyFormat(
-                    lambda: mf_pformat_many(
+                    lambda: mf_pformat_dict(
                         "Semantic references that aren't in the manifest were found in the set used in "
                         "a query. This is a bug, and to avoid potential issues, they will be filtered out.",
                         {"models_not_in_manifest": models_not_in_manifest},

diff --git a/metricflow-semantics/metricflow_semantics/test_helpers/snapshot_helpers.py b/metricflow-semantics/metricflow_semantics/test_helpers/snapshot_helpers.py
@@ -56,6 +56,7 @@ def assert_snapshot_text_equal(
     expectation_description: Optional[str] = None,
 ) -> None:
     """Similar to assert_plan_snapshot_text_equal(), but with more controls on how the snapshot paths are generated."""
+    logger.debug(LazyFormat(lambda: "Generated snapshot text:\n" + indent(snapshot_text)))
     file_path = (
         str(
             snapshot_path_prefix(

diff --git a/metricflow-semantics/tests_metricflow_semantics/collection_helpers/test_pretty_print.py b/metricflow-semantics/tests_metricflow_semantics/collection_helpers/test_pretty_print.py
@@ -7,9 +7,10 @@
 
 from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension
 from dbt_semantic_interfaces.type_enums import DimensionType
+from metricflow_semantics.formatting.formatting_helpers import mf_dedent
 from metricflow_semantics.mf_logging.formatting import indent
 from metricflow_semantics.mf_logging.pretty_formattable import MetricFlowPrettyFormattable
-from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
 from metricflow_semantics.test_helpers.metric_time_dimension import MTD_SPEC_DAY
 from typing_extensions import override
 
@@ -145,7 +146,7 @@ def test_pydantic_model() -> None:  # noqa: D103
 
 
 def test_pformat_many() -> None:  # noqa: D103
-    result = mf_pformat_many("Example description:", obj_dict={"object_0": (1, 2, 3), "object_1": {4: 5}})
+    result = mf_pformat_dict("Example description:", obj_dict={"object_0": (1, 2, 3), "object_1": {4: 5}})
 
     assert (
         textwrap.dedent(
@@ -160,7 +161,7 @@ def test_pformat_many() -> None:  # noqa: D103
 
 
 def test_pformat_many_with_raw_strings() -> None:  # noqa: D103
-    result = mf_pformat_many("Example description:", obj_dict={"object_0": "foo\nbar"}, preserve_raw_strings=True)
+    result = mf_pformat_dict("Example description:", obj_dict={"object_0": "foo\nbar"}, preserve_raw_strings=True)
 
     assert (
         textwrap.dedent(
@@ -176,7 +177,7 @@ def test_pformat_many_with_raw_strings() -> None:  # noqa: D103
 
 
 def test_pformat_many_with_strings() -> None:  # noqa: D103
-    result = mf_pformat_many("Example description:", obj_dict={"object_0": "foo\nbar"})
+    result = mf_pformat_dict("Example description:", obj_dict={"object_0": "foo\nbar"})
     assert (
         textwrap.dedent(
             """\
@@ -202,3 +203,18 @@ def pretty_format(self) -> Optional[str]:
             return f"{self.__class__.__name__}({self.field_0:.2f})"
 
     assert mf_pformat(_ExampleDataclass(1.2345)) == f"{_ExampleDataclass.__name__}(1.23)"
+
+
+def test_pformat_dict_with_empty_message() -> None:
+    """Test `mf_pformat_dict` without a description."""
+    result = mf_pformat_dict(obj_dict={"object_0": (1, 2, 3), "object_1": {4: 5}})
+
+    assert (
+        mf_dedent(
+            """
+            object_0: (1, 2, 3)
+            object_1: {4: 5}
+            """
+        )
+        == result
+    )
diff --git a/metricflow/data_table/mf_table.py b/metricflow/data_table/mf_table.py
@@ -9,7 +9,7 @@
 
 import tabulate
 from metricflow_semantics.mf_logging.formatting import indent
-from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
 from typing_extensions import Self
 
 from metricflow.data_table.column_types import CellValue, InputCellValue, row_cell_types
@@ -47,7 +47,7 @@ def __post_init__(self) -> None:  # noqa: D105
             # Check that the type of the object in the rows match.
             for column_index, cell_value in enumerate(row):
                 expected_cell_value_type = self.column_descriptions[column_index].column_type
-                assert cell_value is None or isinstance(cell_value, expected_cell_value_type), mf_pformat_many(
+                assert cell_value is None or isinstance(cell_value, expected_cell_value_type), mf_pformat_dict(
                     "Cell value type mismatch.",
                     {
                         "row_index": row_index,
@@ -59,7 +59,7 @@ def __post_init__(self) -> None:  # noqa: D105
                 )
                 # Check that datetimes don't have a timezone set.
                 if isinstance(cell_value, datetime.datetime):
-                    assert cell_value.tzinfo is None, mf_pformat_many(
+                    assert cell_value.tzinfo is None, mf_pformat_dict(
                         "Time zone provided for datetime.",
                         {
                             "row_index": row_index,

diff --git a/metricflow/dataflow/builder/node_data_set.py b/metricflow/dataflow/builder/node_data_set.py
@@ -12,13 +12,15 @@
     DataflowPlanNode,
 )
 from metricflow.dataset.sql_dataset import SqlDataSet
-from metricflow.plan_conversion.dataflow_to_sql import DataflowToSqlQueryPlanConverter
+from metricflow.plan_conversion.dataflow_to_sql import (
+    DataflowNodeToSqlSubqueryVisitor,
+)
 
 if TYPE_CHECKING:
     from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
 
 
-class DataflowPlanNodeOutputDataSetResolver(DataflowToSqlQueryPlanConverter):
+class DataflowPlanNodeOutputDataSetResolver:
     """Given a node in a dataflow plan, figure out what is the data set output by that node.
 
     Recall that in the dataflow plan, the nodes represent computation, and the inputs and outputs of the nodes are
@@ -68,9 +70,11 @@ def __init__(  # noqa: D107
         _node_to_output_data_set: Optional[Dict[DataflowPlanNode, SqlDataSet]] = None,
     ) -> None:
         self._node_to_output_data_set: Dict[DataflowPlanNode, SqlDataSet] = _node_to_output_data_set or {}
-        super().__init__(
-            column_association_resolver=column_association_resolver,
-            semantic_manifest_lookup=semantic_manifest_lookup,
+        self._column_association_resolver = column_association_resolver
+        self._semantic_manifest_lookup = semantic_manifest_lookup
+        self._to_data_set_visitor = _NodeDataSetVisitor(
+            column_association_resolver=self._column_association_resolver,
+            semantic_manifest_lookup=self._semantic_manifest_lookup,
         )
 
     def get_output_data_set(self, node: DataflowPlanNode) -> SqlDataSet:
@@ -79,7 +83,7 @@ def get_output_data_set(self, node: DataflowPlanNode) -> SqlDataSet:
         # TODO: The cache needs to be pruned, but has not yet been an issue.
         """
         if node not in self._node_to_output_data_set:
-            self._node_to_output_data_set[node] = node.accept(self)
+            self._node_to_output_data_set[node] = node.accept(self._to_data_set_visitor)
 
         return self._node_to_output_data_set[node]
 
@@ -92,11 +96,13 @@ def cache_output_data_sets(self, nodes: Sequence[DataflowPlanNode]) -> None:
     def copy(self) -> DataflowPlanNodeOutputDataSetResolver:
         """Return a copy of this with the same nodes cached."""
         return DataflowPlanNodeOutputDataSetResolver(
-            column_association_resolver=self.column_association_resolver,
+            column_association_resolver=self._column_association_resolver,
             semantic_manifest_lookup=self._semantic_manifest_lookup,
             _node_to_output_data_set=dict(self._node_to_output_data_set),
         )
 
+
+class _NodeDataSetVisitor(DataflowNodeToSqlSubqueryVisitor):
     @override
     def _next_unique_table_alias(self) -> str:
         """Return the next unique table alias to use in generating queries.

diff --git a/metricflow/dataset/sql_dataset.py b/metricflow/dataset/sql_dataset.py
@@ -5,6 +5,7 @@
 from dbt_semantic_interfaces.references import SemanticModelReference
 from metricflow_semantics.assert_one_arg import assert_exactly_one_arg_set
 from metricflow_semantics.instances import EntityInstance, InstanceSet
+from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
 from metricflow_semantics.specs.column_assoc import ColumnAssociation
 from metricflow_semantics.specs.dimension_spec import DimensionSpec
 from metricflow_semantics.specs.entity_spec import EntitySpec
@@ -42,7 +43,9 @@ def __init__(
     def sql_node(self) -> SqlQueryPlanNode:  # noqa: D102
         node_to_return = self._sql_select_node or self._sql_node
         if node_to_return is None:
-            raise RuntimeError("This node was not created with a SQL node.")
+            raise RuntimeError(
+                "This node was not created with a SQL node. This should have been prevented by the initializer."
+            )
         return node_to_return
 
     @property
@@ -52,7 +55,14 @@ def checked_sql_select_node(self) -> SqlSelectStatementNode:
         Otherwise, an exception is thrown.
         """
         if self._sql_select_node is None:
-            raise RuntimeError(f"{self} was created with a SQL node that is not a {SqlSelectStatementNode}")
+            raise RuntimeError(
+                str(
+                    LazyFormat(
+                        f"{self.__class__.__name__} was created with a SQL node that is not a {SqlSelectStatementNode.__name__}",
+                        sql_node=self.sql_node.structure_text(),
+                    )
+                )
+            )
         return self._sql_select_node
 
     def column_associations_for_entity(