Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assorted formatting / logging updates #1518

Merged
merged 6 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from typing_extensions import override

from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many
from metricflow_semantics.mf_logging.pretty_print import mf_pformat_dict


class LazyFormat:
Expand Down Expand Up @@ -52,7 +52,7 @@ def _str_value(self) -> str:
message = self._message()
else:
message = self._message
return mf_pformat_many(message, self._kwargs, preserve_raw_strings=True)
return mf_pformat_dict(message, self._kwargs, preserve_raw_strings=True)

@override
def __str__(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Mapping
from dataclasses import fields, is_dataclass
from enum import Enum
from typing import Any, Dict, List, Optional, Sized, Tuple, Union
from typing import Any, List, Optional, Sized, Tuple, Union

from dsi_pydantic_shim import BaseModel

Expand Down Expand Up @@ -428,9 +428,9 @@ def mf_pformat( # type: ignore
return str(obj)


def mf_pformat_many( # type: ignore
description: str,
obj_dict: Dict[str, Any],
def mf_pformat_dict( # type: ignore
description: Optional[str] = None,
obj_dict: Optional[Mapping[str, Any]] = None,
max_line_length: int = 120,
indent_prefix: str = " ",
include_object_field_names: bool = True,
Expand All @@ -444,7 +444,8 @@ def mf_pformat_many( # type: ignore
representation of the string. e.g. if value="foo", then "foo" instead of "'foo'". Useful for values that contain
newlines.
"""
lines: List[str] = [description]
lines: List[str] = [description] if description is not None else []
obj_dict = obj_dict or {}
for key, value in obj_dict.items():
if preserve_raw_strings and isinstance(value, str):
value_str = value
Expand All @@ -471,5 +472,9 @@ def mf_pformat_many( # type: ignore
else:
item_block_lines = (f"{key}: {value_str}",)
item_block = "\n".join(item_block_lines)
lines.append(indent(item_block))

if description is None:
lines.append(item_block)
else:
lines.append(indent(item_block))
return "\n".join(lines)
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from metricflow_semantics.mf_logging.formatting import indent
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.query.group_by_item.candidate_push_down.group_by_item_candidate import GroupByItemCandidateSet
Expand Down Expand Up @@ -82,7 +82,7 @@ def __post_init__(self) -> None: # noqa: D105
# If there are errors, there shouldn't be any candidate sets.
assert (not self.issue_set.has_errors and not self.candidate_set.is_empty) or (
self.issue_set.has_errors and self.candidate_set.is_empty
), mf_pformat_many(
), mf_pformat_dict(
"candidate_set / issue_set mismatch:", {"candidate_set": self.candidate_set, "issue_set": self.issue_set}
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dbt_semantic_interfaces.references import MeasureReference, MetricReference, SemanticModelReference

from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
from metricflow_semantics.mf_logging.runtime import log_runtime
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
Expand Down Expand Up @@ -557,7 +557,7 @@ def _resolve_query(self, resolver_input_for_query: ResolverInputForQuery) -> Met
if len(models_not_in_manifest) > 0:
logger.error(
LazyFormat(
lambda: mf_pformat_many(
lambda: mf_pformat_dict(
"Semantic references that aren't in the manifest were found in the set used in "
"a query. This is a bug, and to avoid potential issues, they will be filtered out.",
{"models_not_in_manifest": models_not_in_manifest},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def assert_snapshot_text_equal(
expectation_description: Optional[str] = None,
) -> None:
"""Similar to assert_plan_snapshot_text_equal(), but with more controls on how the snapshot paths are generated."""
logger.debug(LazyFormat(lambda: "Generated snapshot text:\n" + indent(snapshot_text)))
file_path = (
str(
snapshot_path_prefix(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension
from dbt_semantic_interfaces.type_enums import DimensionType
from metricflow_semantics.formatting.formatting_helpers import mf_dedent
from metricflow_semantics.mf_logging.formatting import indent
from metricflow_semantics.mf_logging.pretty_formattable import MetricFlowPrettyFormattable
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
from metricflow_semantics.test_helpers.metric_time_dimension import MTD_SPEC_DAY
from typing_extensions import override

Expand Down Expand Up @@ -145,7 +146,7 @@ def test_pydantic_model() -> None: # noqa: D103


def test_pformat_many() -> None: # noqa: D103
result = mf_pformat_many("Example description:", obj_dict={"object_0": (1, 2, 3), "object_1": {4: 5}})
result = mf_pformat_dict("Example description:", obj_dict={"object_0": (1, 2, 3), "object_1": {4: 5}})

assert (
textwrap.dedent(
Expand All @@ -160,7 +161,7 @@ def test_pformat_many() -> None: # noqa: D103


def test_pformat_many_with_raw_strings() -> None: # noqa: D103
result = mf_pformat_many("Example description:", obj_dict={"object_0": "foo\nbar"}, preserve_raw_strings=True)
result = mf_pformat_dict("Example description:", obj_dict={"object_0": "foo\nbar"}, preserve_raw_strings=True)

assert (
textwrap.dedent(
Expand All @@ -176,7 +177,7 @@ def test_pformat_many_with_raw_strings() -> None: # noqa: D103


def test_pformat_many_with_strings() -> None: # noqa: D103
result = mf_pformat_many("Example description:", obj_dict={"object_0": "foo\nbar"})
result = mf_pformat_dict("Example description:", obj_dict={"object_0": "foo\nbar"})
assert (
textwrap.dedent(
"""\
Expand All @@ -202,3 +203,18 @@ def pretty_format(self) -> Optional[str]:
return f"{self.__class__.__name__}({self.field_0:.2f})"

assert mf_pformat(_ExampleDataclass(1.2345)) == f"{_ExampleDataclass.__name__}(1.23)"


def test_pformat_dict_with_empty_message() -> None:
"""Test `mf_pformat_dict` without a description."""
result = mf_pformat_dict(obj_dict={"object_0": (1, 2, 3), "object_1": {4: 5}})

assert (
mf_dedent(
"""
object_0: (1, 2, 3)
object_1: {4: 5}
"""
)
== result
)
6 changes: 3 additions & 3 deletions metricflow/data_table/mf_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import tabulate
from metricflow_semantics.mf_logging.formatting import indent
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_dict
from typing_extensions import Self

from metricflow.data_table.column_types import CellValue, InputCellValue, row_cell_types
Expand Down Expand Up @@ -47,7 +47,7 @@ def __post_init__(self) -> None: # noqa: D105
# Check that the type of the object in the rows match.
for column_index, cell_value in enumerate(row):
expected_cell_value_type = self.column_descriptions[column_index].column_type
assert cell_value is None or isinstance(cell_value, expected_cell_value_type), mf_pformat_many(
assert cell_value is None or isinstance(cell_value, expected_cell_value_type), mf_pformat_dict(
"Cell value type mismatch.",
{
"row_index": row_index,
Expand All @@ -59,7 +59,7 @@ def __post_init__(self) -> None: # noqa: D105
)
# Check that datetimes don't have a timezone set.
if isinstance(cell_value, datetime.datetime):
assert cell_value.tzinfo is None, mf_pformat_many(
assert cell_value.tzinfo is None, mf_pformat_dict(
"Time zone provided for datetime.",
{
"row_index": row_index,
Expand Down
20 changes: 13 additions & 7 deletions metricflow/dataflow/builder/node_data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
DataflowPlanNode,
)
from metricflow.dataset.sql_dataset import SqlDataSet
from metricflow.plan_conversion.dataflow_to_sql import DataflowToSqlQueryPlanConverter
from metricflow.plan_conversion.dataflow_to_sql import (
DataflowNodeToSqlSubqueryVisitor,
)

if TYPE_CHECKING:
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup


class DataflowPlanNodeOutputDataSetResolver(DataflowToSqlQueryPlanConverter):
class DataflowPlanNodeOutputDataSetResolver:
"""Given a node in a dataflow plan, figure out what is the data set output by that node.

Recall that in the dataflow plan, the nodes represent computation, and the inputs and outputs of the nodes are
Expand Down Expand Up @@ -68,9 +70,11 @@ def __init__( # noqa: D107
_node_to_output_data_set: Optional[Dict[DataflowPlanNode, SqlDataSet]] = None,
) -> None:
self._node_to_output_data_set: Dict[DataflowPlanNode, SqlDataSet] = _node_to_output_data_set or {}
super().__init__(
column_association_resolver=column_association_resolver,
semantic_manifest_lookup=semantic_manifest_lookup,
self._column_association_resolver = column_association_resolver
self._semantic_manifest_lookup = semantic_manifest_lookup
self._to_data_set_visitor = _NodeDataSetVisitor(
column_association_resolver=self._column_association_resolver,
semantic_manifest_lookup=self._semantic_manifest_lookup,
)

def get_output_data_set(self, node: DataflowPlanNode) -> SqlDataSet:
Expand All @@ -79,7 +83,7 @@ def get_output_data_set(self, node: DataflowPlanNode) -> SqlDataSet:
# TODO: The cache needs to be pruned, but has not yet been an issue.
"""
if node not in self._node_to_output_data_set:
self._node_to_output_data_set[node] = node.accept(self)
self._node_to_output_data_set[node] = node.accept(self._to_data_set_visitor)

return self._node_to_output_data_set[node]

Expand All @@ -92,11 +96,13 @@ def cache_output_data_sets(self, nodes: Sequence[DataflowPlanNode]) -> None:
def copy(self) -> DataflowPlanNodeOutputDataSetResolver:
"""Return a copy of this with the same nodes cached."""
return DataflowPlanNodeOutputDataSetResolver(
column_association_resolver=self.column_association_resolver,
column_association_resolver=self._column_association_resolver,
semantic_manifest_lookup=self._semantic_manifest_lookup,
_node_to_output_data_set=dict(self._node_to_output_data_set),
)


class _NodeDataSetVisitor(DataflowNodeToSqlSubqueryVisitor):
@override
def _next_unique_table_alias(self) -> str:
"""Return the next unique table alias to use in generating queries.
Expand Down
14 changes: 12 additions & 2 deletions metricflow/dataset/sql_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dbt_semantic_interfaces.references import SemanticModelReference
from metricflow_semantics.assert_one_arg import assert_exactly_one_arg_set
from metricflow_semantics.instances import EntityInstance, InstanceSet
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.specs.column_assoc import ColumnAssociation
from metricflow_semantics.specs.dimension_spec import DimensionSpec
from metricflow_semantics.specs.entity_spec import EntitySpec
Expand Down Expand Up @@ -42,7 +43,9 @@ def __init__(
def sql_node(self) -> SqlQueryPlanNode: # noqa: D102
node_to_return = self._sql_select_node or self._sql_node
if node_to_return is None:
raise RuntimeError("This node was not created with a SQL node.")
raise RuntimeError(
"This node was not created with a SQL node. This should have been prevented by the initializer."
)
return node_to_return

@property
Expand All @@ -52,7 +55,14 @@ def checked_sql_select_node(self) -> SqlSelectStatementNode:
Otherwise, an exception is thrown.
"""
if self._sql_select_node is None:
raise RuntimeError(f"{self} was created with a SQL node that is not a {SqlSelectStatementNode}")
raise RuntimeError(
str(
LazyFormat(
f"{self.__class__.__name__} was created with a SQL node that is not a {SqlSelectStatementNode.__name__}",
sql_node=self.sql_node.structure_text(),
)
)
)
return self._sql_select_node

def column_associations_for_entity(
Expand Down
Loading
Loading