From 4a803fb9396a005bbae644b960719a928622f5dd Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Fri, 6 Sep 2024 11:57:17 -0700 Subject: [PATCH] /* PR_START p--short-term-perf 02 */ Add `LazyFormat` for logging. --- .../formatting/formatting_helpers.py | 27 ++++++++ .../mf_logging/lazy_formattable.py | 59 ++++++++++++++++ .../mf_logging/recorded_logging_context.py | 51 ++++++++++++++ .../mf_logging/test_lazy_format.py | 68 +++++++++++++++++++ 4 files changed, 205 insertions(+) create mode 100644 metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py create mode 100644 metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py create mode 100644 metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py create mode 100644 metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py diff --git a/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py new file mode 100644 index 0000000000..33a0fb36dd --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import textwrap + + +def mf_dedent(text: str) -> str: + """Remove leading newlines, dedents, and remove tailing newlines. + + This function simplifies the somewhat-frequently used: + + text = textwrap.dedent( + [triple quote][backslash] + Line 0 + Line 1 + [triple quote] + ).rstrip() + + to: + + text = mf_dedent( + [triple quote] + Line 0 + Line 1 + [triple quote] + ) + """ + return textwrap.dedent(text.lstrip("\n")).rstrip("\n") diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py new file mode 100644 index 0000000000..3de7e3710a --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from functools import cached_property +from typing import Callable, Union + +from typing_extensions import override + +from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many + + +class LazyFormat: + """Lazily formats the given objects into a string representation for logging. + + The formatting is done when this object is converted to a string to be compatible with the standard `logging` + library. This is done lazily as the formatting operation can be expensive and allows debug log statements to not + incur a performance overhead in production. + + Example: + logger.debug(LazyFormat("Found path", start_point=point_0, end_point=point_1)) + + -> + + DEBUG - Found path + start_point: Point(x=0.0, y=0.0) + end_point: Point(x=1.0, y=1.0) + + To aid migration of existing log statements, the message can be a function so that f-strings can be lazily + evaluated as well. This style will be deprecated as log statements are updated. + + Example: + logger.debug(f"Result is: {expensive_function()}") + + -> + + logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}") + """ + + def __init__(self, message: Union[str, Callable[[], str]], **kwargs) -> None: # type: ignore[no-untyped-def] + """Initializer. + + Args: + message: The message or a function that returns a message. + **kwargs: A dictionary of objects to format to text when `__str__` is called on this object. + """ + self._message = message + self._kwargs = kwargs + + @cached_property + def _str_value(self) -> str: + """Cache the result as `__str__` can be called multiple times for multiple log handlers.""" + if callable(self._message): + message = self._message() + else: + message = self._message + return mf_pformat_many(message, self._kwargs, preserve_raw_strings=True) + + @override + def __str__(self) -> str: + return self._str_value diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py new file mode 100644 index 0000000000..8858bfbffc --- /dev/null +++ b/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import logging +from contextlib import contextmanager +from typing import Iterator, List, Optional, Sequence, Tuple + +from typing_extensions import override + + +class RecordingLogHandler(logging.Handler): + """A log-record handler that stores them so that they can be checked in tests.""" + + def __init__(self) -> None: # noqa: D107 + super().__init__() + self._log_records: List[logging.LogRecord] = [] + + @override + def emit(self, record: logging.LogRecord) -> None: + self._log_records.append(record) + + @property + def log_records(self) -> Sequence[logging.LogRecord]: + """Return the log records seen by the handler.""" + return self._log_records + + def get_last_message(self) -> Optional[str]: + """Return the message in the last log record, or None if this hasn't seen any.""" + if len(self._log_records) == 0: + return None + + return self._log_records[-1].message + + +@contextmanager +def recorded_logging_context( + logger: logging.Logger, logging_level_int: int +) -> Iterator[Tuple[logging.Logger, RecordingLogHandler]]: + """Context with a logger (with the given log level) and associated handler to check what was logged. + + The handler records all log records emitted that is appropriate for the given level during this context. + Log propagation could be disabled in this context to clean test log output, but some issues need to be resolved. + """ + previous_logging_level = logger.level + handler = RecordingLogHandler() + logger.addHandler(handler) + try: + logger.setLevel(logging_level_int) + yield logger, handler + finally: + logger.removeHandler(handler) + logger.setLevel(previous_logging_level) diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py new file mode 100644 index 0000000000..105d143ec7 --- /dev/null +++ b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import logging + +from metricflow_semantics.formatting.formatting_helpers import mf_dedent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat +from tests_metricflow_semantics.mf_logging.recorded_logging_context import RecordingLogHandler, recorded_logging_context +from typing_extensions import override + +logger = logging.getLogger(__name__) + + +def test_log_kwargs() -> None: + """Test that objects included via keyword args are formatted.""" + recorded_logger: logging.Logger + handler: RecordingLogHandler + with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler): + recorded_logger.debug( + LazyFormat("Found candidates.", matches=[1, 2, 3], parameters={"field_0": "value_0", "field_1": "value_1"}) + ) + assert handler.get_last_message() == mf_dedent( + """ + Found candidates. + matches: [1, 2, 3] + parameters: {'field_0': 'value_0', 'field_1': 'value_1'} + """ + ) + + +def test_lambda() -> None: + """Test that a lambda that is passed in is called.""" + example_message = "Example message." + + recorded_logger: logging.Logger + handler: RecordingLogHandler + with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler): + recorded_logger.debug(LazyFormat(lambda: example_message)) + assert handler.get_last_message() == example_message + + +def test_lazy_object() -> None: + """Test that formatting of objects are done lazily and not when the logging level is not appropriate.""" + assertion_message = "This should not have been formatted as a string." + + class _NotFormattedObject: + @override + def __repr__(self) -> str: + raise AssertionError(assertion_message) + + # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted. + recorded_logger: logging.Logger + handler: RecordingLogHandler + with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler): + recorded_logger.debug(LazyFormat("Found candidates", should_not_be_formatted=_NotFormattedObject())) + + +def test_lazy_lambda() -> None: + """Test that a lambda input is not evaluated when the logging level is not appropriate.""" + assertion_message = "This should not have been formatted as a string." + + def _should_not_be_called() -> str: + raise AssertionError(assertion_message) + + # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted. + recorded_logger: logging.Logger + handler: RecordingLogHandler + with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler): + recorded_logger.debug(LazyFormat(lambda: f"{_should_not_be_called()}"))