Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LazyFormat to reduce logging overhead on performance #1419

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from __future__ import annotations

import textwrap


def mf_dedent(text: str) -> str:
"""Remove leading newlines, dedents, and remove tailing newlines.

This function simplifies the somewhat-frequently used:

text = textwrap.dedent(
[triple quote][backslash]
Line 0
Line 1
[triple quote]
).rstrip()

to:

text = mf_dedent(
[triple quote]
Line 0
Line 1
[triple quote]
)
"""
return textwrap.dedent(text.lstrip("\n")).rstrip("\n")
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import annotations

from functools import cached_property
from typing import Callable, Union

from typing_extensions import override

from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many


class LazyFormat:
"""Lazily formats the given objects into a string representation for logging.

The formatting is done when this object is converted to a string to be compatible with the standard `logging`
library. This is done lazily as the formatting operation can be expensive and allows debug log statements to not
incur a performance overhead in production.

Example:
logger.debug(LazyFormat("Found path", start_point=point_0, end_point=point_1))

->

DEBUG - Found path
start_point: Point(x=0.0, y=0.0)
end_point: Point(x=1.0, y=1.0)

To aid migration of existing log statements, the message can be a function so that f-strings can be lazily
evaluated as well. This style will be deprecated as log statements are updated.

Example:
logger.debug(f"Result is: {expensive_function()}")

->

logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}")
"""

def __init__(self, message: Union[str, Callable[[], str]], **kwargs) -> None: # type: ignore[no-untyped-def]
"""Initializer.

Args:
message: The message or a function that returns a message.
**kwargs: A dictionary of objects to format to text when `__str__` is called on this object.
"""
self._message = message
self._kwargs = kwargs

@cached_property
def _str_value(self) -> str:
"""Cache the result as `__str__` can be called multiple times for multiple log handlers."""
if callable(self._message):
message = self._message()
else:
message = self._message
return mf_pformat_many(message, self._kwargs, preserve_raw_strings=True)

@override
def __str__(self) -> str:
return self._str_value
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

import logging
from contextlib import contextmanager
from typing import Iterator, List, Optional, Sequence, Tuple

from typing_extensions import override


class RecordingLogHandler(logging.Handler):
"""A log-record handler that stores them so that they can be checked in tests."""

def __init__(self) -> None: # noqa: D107
super().__init__()
self._log_records: List[logging.LogRecord] = []

@override
def emit(self, record: logging.LogRecord) -> None:
self._log_records.append(record)

@property
def log_records(self) -> Sequence[logging.LogRecord]:
"""Return the log records seen by the handler."""
return self._log_records

def get_last_message(self) -> Optional[str]:
"""Return the message in the last log record, or None if this hasn't seen any."""
if len(self._log_records) == 0:
return None

return self._log_records[-1].message


@contextmanager
def recorded_logging_context(
logger: logging.Logger, logging_level_int: int
) -> Iterator[Tuple[logging.Logger, RecordingLogHandler]]:
"""Context with a logger (with the given log level) and associated handler to check what was logged.

The handler records all log records emitted that is appropriate for the given level during this context.
Log propagation could be disabled in this context to clean test log output, but some issues need to be resolved.
"""
previous_logging_level = logger.level
handler = RecordingLogHandler()
logger.addHandler(handler)
try:
logger.setLevel(logging_level_int)
yield logger, handler
finally:
logger.removeHandler(handler)
logger.setLevel(previous_logging_level)
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from __future__ import annotations

import logging

from metricflow_semantics.formatting.formatting_helpers import mf_dedent
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from tests_metricflow_semantics.mf_logging.recorded_logging_context import RecordingLogHandler, recorded_logging_context
from typing_extensions import override

logger = logging.getLogger(__name__)


def test_log_kwargs() -> None:
"""Test that objects included via keyword args are formatted."""
recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
recorded_logger.debug(
LazyFormat("Found candidates.", matches=[1, 2, 3], parameters={"field_0": "value_0", "field_1": "value_1"})
)
assert handler.get_last_message() == mf_dedent(
"""
Found candidates.
matches: [1, 2, 3]
parameters: {'field_0': 'value_0', 'field_1': 'value_1'}
"""
)


def test_lambda() -> None:
"""Test that a lambda that is passed in is called."""
example_message = "Example message."

recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
recorded_logger.debug(LazyFormat(lambda: example_message))
assert handler.get_last_message() == example_message


def test_lazy_object() -> None:
"""Test that formatting of objects are done lazily and not when the logging level is not appropriate."""
assertion_message = "This should not have been formatted as a string."

class _NotFormattedObject:
@override
def __repr__(self) -> str:
raise AssertionError(assertion_message)

# Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
recorded_logger.debug(LazyFormat("Found candidates", should_not_be_formatted=_NotFormattedObject()))


def test_lazy_lambda() -> None:
"""Test that a lambda input is not evaluated when the logging level is not appropriate."""
assertion_message = "This should not have been formatted as a string."

def _should_not_be_called() -> str:
raise AssertionError(assertion_message)

# Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
recorded_logger.debug(LazyFormat(lambda: f"{_should_not_be_called()}"))
Loading