From 76692f68ecfab6ca86eb43712261c846bf57d3b8 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Mon, 23 Sep 2024 17:16:50 -0700 Subject: [PATCH] /* PR_START p--short-term-perf 03 */ Bulk update logging call sites. --- .../dbt_metricflow/cli/cli_context.py | 3 +- .../dbt_connectors/adapter_backed_client.py | 41 ++-- .../cli/dbt_connectors/dbt_config_accessor.py | 7 +- dbt-metricflow/dbt_metricflow/cli/main.py | 3 +- dbt-metricflow/dbt_metricflow/cli/utils.py | 7 +- .../filters/time_constraint.py | 14 +- .../mf_logging/lazy_formattable.py | 4 +- .../mf_logging/runtime.py | 14 +- .../model/semantics/linkable_element_set.py | 3 +- .../model/semantics/linkable_spec_resolver.py | 15 +- .../model/semantics/metric_lookup.py | 7 +- .../model/semantics/semantic_model_lookup.py | 7 +- .../candidate_push_down/push_down_visitor.py | 51 +++-- .../group_by_item/group_by_item_resolver.py | 13 +- .../query/query_parser.py | 41 ++-- .../query/query_resolver.py | 17 +- .../specs/non_additive_dimension_spec.py | 7 +- .../test_helpers/snapshot_helpers.py | 5 +- .../dataflow/builder/dataflow_plan_builder.py | 188 +++++++++++------- metricflow/dataflow/builder/node_evaluator.py | 21 +- metricflow/engine/metricflow_engine.py | 27 ++- metricflow/execution/dataflow_to_execution.py | 5 +- metricflow/execution/execution_plan.py | 5 +- metricflow/execution/executor.py | 15 +- metricflow/plan_conversion/dataflow_to_sql.py | 11 +- metricflow/plan_conversion/node_processor.py | 21 +- .../optimizer/rewriting_sub_query_reducer.py | 7 +- scripts/ci_tests/metricflow_package_test.py | 5 +- .../metricflow_semantics_package_test.py | 3 +- tests_metricflow/examples/test_node_sql.py | 11 +- .../fixtures/manifest_fixtures.py | 5 +- tests_metricflow/fixtures/setup_fixtures.py | 13 +- .../fixtures/sql_client_fixtures.py | 9 +- .../sql_clients/adapter_backed_ddl_client.py | 11 +- tests_metricflow/fixtures/table_fixtures.py | 9 +- tests_metricflow/generate_snapshots.py | 13 +- .../integration/test_configured_cases.py | 3 +- .../mf_logging/test_dag_to_text.py | 7 +- tests_metricflow/source_schema_tools.py | 7 +- 39 files changed, 429 insertions(+), 226 deletions(-) diff --git a/dbt-metricflow/dbt_metricflow/cli/cli_context.py b/dbt-metricflow/dbt_metricflow/cli/cli_context.py index bdc3fd2454..203e403aab 100644 --- a/dbt-metricflow/dbt_metricflow/cli/cli_context.py +++ b/dbt-metricflow/dbt_metricflow/cli/cli_context.py @@ -6,6 +6,7 @@ from typing import Dict, Optional from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from dbt_metricflow.cli.dbt_connectors.adapter_backed_client import AdapterBackedSqlClient @@ -111,7 +112,7 @@ def run_health_checks(self) -> Dict[str, Dict[str, str]]: except Exception as e: status = "FAIL" err_string = str(e) - logger.error(f"Health Check Item {step}: failed with error {err_string}") + logger.error(LazyFormat(lambda: f"Health Check Item {step}: failed with error {err_string}")) results[f"{self.sql_client.sql_engine_type} - {step}"] = { "status": status, diff --git a/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/adapter_backed_client.py b/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/adapter_backed_client.py index 889c7daff9..3536ec5ff4 100644 --- a/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/adapter_backed_client.py +++ b/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/adapter_backed_client.py @@ -9,6 +9,7 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from metricflow_semantics.errors.error_classes import SqlBindParametersNotSupportedError from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.random_id import random_id from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters @@ -111,7 +112,9 @@ def __init__(self, adapter: BaseAdapter): self._sql_engine_type = adapter_type.sql_engine_type self._sql_query_plan_renderer = adapter_type.sql_query_plan_renderer - logger.info(f"Initialized AdapterBackedSqlClient with dbt adapter type `{adapter_type.value}`") + logger.debug( + LazyFormat(lambda: f"Initialized AdapterBackedSqlClient with dbt adapter type `{adapter_type.value}`") + ) @property def sql_engine_type(self) -> SqlEngine: @@ -142,11 +145,13 @@ def query( f"Invalid execute statement - we do not support queries with bind parameters through dbt adapters! " f"Bind params: {sql_bind_parameters.param_dict}" ) - logger.info(AdapterBackedSqlClient._format_run_query_log_message(stmt, sql_bind_parameters)) + logger.debug( + LazyFormat(lambda: AdapterBackedSqlClient._format_run_query_log_message(stmt, sql_bind_parameters)) + ) with self._adapter.connection_named(f"MetricFlow_request_{request_id}"): # returns a Tuple[AdapterResponse, agate.Table] but the decorator converts it to Any result = self._adapter.execute(sql=stmt, auto_begin=True, fetch=True) - logger.info(f"Query returned from dbt Adapter with response {result[0]}") + logger.debug(LazyFormat(lambda: f"Query returned from dbt Adapter with response {result[0]}")) agate_data = result[1] rows = [row.values() for row in agate_data.rows] @@ -155,7 +160,11 @@ def query( rows=rows, ) stop = time.time() - logger.info(f"Finished running the query in {stop - start:.2f}s with {data_table.row_count} row(s) returned") + logger.debug( + LazyFormat( + lambda: f"Finished running the query in {stop - start:.2f}s with {data_table.row_count} row(s) returned" + ) + ) return data_table def execute( @@ -177,14 +186,16 @@ def execute( ) start = time.time() request_id = SqlRequestId(f"mf_rid__{random_id()}") - logger.info(AdapterBackedSqlClient._format_run_query_log_message(stmt, sql_bind_parameters)) + logger.debug( + LazyFormat(lambda: AdapterBackedSqlClient._format_run_query_log_message(stmt, sql_bind_parameters)) + ) with self._adapter.connection_named(f"MetricFlow_request_{request_id}"): result = self._adapter.execute(stmt, auto_begin=True, fetch=False) # Calls to execute often involve some amount of DDL so we commit here self._adapter.commit_if_has_connection() - logger.info(f"Query executed via dbt Adapter with response {result[0]}") + logger.debug(LazyFormat(lambda: f"Query executed via dbt Adapter with response {result[0]}")) stop = time.time() - logger.info(f"Finished running the query in {stop - start:.2f}s") + logger.debug(LazyFormat(lambda: f"Finished running the query in {stop - start:.2f}s")) return None def dry_run( @@ -202,10 +213,16 @@ def dry_run( concrete values for SQL query parameters. """ start = time.time() - logger.info( - f"Running dry_run of:" - f"\n\n{indent(stmt)}\n" - + (f"\nwith parameters: {dict(sql_bind_parameters.param_dict)}" if sql_bind_parameters.param_dict else "") + logger.debug( + LazyFormat( + lambda: f"Running dry_run of:" + f"\n\n{indent(stmt)}\n" + + ( + f"\nwith parameters: {dict(sql_bind_parameters.param_dict)}" + if sql_bind_parameters.param_dict + else "" + ) + ) ) request_id = SqlRequestId(f"mf_rid__{random_id()}") connection_name = f"MetricFlow_dry_run_request_{request_id}" @@ -242,7 +259,7 @@ def dry_run( raise DbtDatabaseError(f"Encountered error in Databricks dry run. Full output: {plan_output_str}") stop = time.time() - logger.info(f"Finished running the dry_run in {stop - start:.2f}s") + logger.debug(LazyFormat(lambda: f"Finished running the dry_run in {stop - start:.2f}s")) return def close(self) -> None: # noqa: D102 diff --git a/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/dbt_config_accessor.py b/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/dbt_config_accessor.py index 5ac5f32f7e..5282d4c9dc 100644 --- a/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/dbt_config_accessor.py +++ b/dbt-metricflow/dbt_metricflow/cli/dbt_connectors/dbt_config_accessor.py @@ -13,6 +13,7 @@ from dbt.config.runtime import load_profile, load_project from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest from metricflow_semantics.errors.error_classes import ModelCreationException +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.dbt_manifest_parser import parse_manifest_from_dbt_generated_manifest from typing_extensions import Self @@ -40,12 +41,14 @@ class dbtProjectMetadata: @classmethod def load_from_project_path(cls: Type[Self], project_path: Path) -> Self: """Loads all dbt artifacts for the project associated with the given project path.""" - logger.info(f"Loading dbt project metadata for project located at {project_path}") + logger.debug(LazyFormat(lambda: f"Loading dbt project metadata for project located at {project_path}")) dbtRunner().invoke(["-q", "debug"], project_dir=str(project_path)) profile = load_profile(str(project_path), {}) project = load_project(str(project_path), version_check=False, profile=profile) project_path = project_path - logger.info(f"Loaded project {project.project_name} with profile details:\n{mf_pformat(profile)}") + logger.debug( + LazyFormat(lambda: f"Loaded project {project.project_name} with profile details:\n{mf_pformat(profile)}") + ) return cls(profile=profile, project=project, project_path=project_path) @property diff --git a/dbt-metricflow/dbt_metricflow/cli/main.py b/dbt-metricflow/dbt_metricflow/cli/main.py index c59ad93435..969eb572ab 100644 --- a/dbt-metricflow/dbt_metricflow/cli/main.py +++ b/dbt-metricflow/dbt_metricflow/cli/main.py @@ -20,6 +20,7 @@ from dbt_semantic_interfaces.validations.validator_helpers import SemanticManifestValidationResults from halo import Halo from metricflow_semantics.dag.dag_visualization import display_dag_as_svg +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from update_checker import UpdateChecker import dbt_metricflow.cli.custom_click_types as click_custom @@ -92,7 +93,7 @@ def exit_signal_handler(signal_type: int, frame) -> None: # type: ignore try: # Note: we may wish to add support for canceling all queries if zombie queries are a problem - logger.info("Closing client connections") + logger.debug(LazyFormat(lambda: "Closing client connections")) cfg.sql_client.close() finally: sys.exit(-1) diff --git a/dbt-metricflow/dbt_metricflow/cli/utils.py b/dbt-metricflow/dbt_metricflow/cli/utils.py index 41047ecca7..d3e329a6a7 100644 --- a/dbt-metricflow/dbt_metricflow/cli/utils.py +++ b/dbt-metricflow/dbt_metricflow/cli/utils.py @@ -9,6 +9,7 @@ import click from dateutil.parser import parse +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat import dbt_metricflow.cli.custom_click_types as click_custom from dbt_metricflow.cli.cli_context import CLIContext @@ -122,8 +123,10 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: # type: ignore[misc] else: if not isinstance(args[0], CLIContext): logger.error( - f"Missing {CLIContext.__name__} as the first argument to the function " - f"{getattr(func, '__name__', repr(func))}" + LazyFormat( + lambda: f"Missing {CLIContext.__name__} as the first argument to the function " + f"{getattr(func, '__name__', repr(func))}" + ) ) click.echo(f"\nERROR: {str(e)}") if args and hasattr(args[0], "verbose") and args[0].verbose is True: diff --git a/metricflow-semantics/metricflow_semantics/filters/time_constraint.py b/metricflow-semantics/metricflow_semantics/filters/time_constraint.py index ad840a5d3f..8037782b76 100644 --- a/metricflow-semantics/metricflow_semantics/filters/time_constraint.py +++ b/metricflow-semantics/metricflow_semantics/filters/time_constraint.py @@ -6,6 +6,8 @@ from dbt_semantic_interfaces.dataclass_serialization import SerializableDataclass +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat + logger = logging.getLogger(__name__) @@ -18,10 +20,18 @@ class TimeRangeConstraint(SerializableDataclass): def __post_init__(self) -> None: # noqa: D105 if self.start_time > self.end_time: - logger.warning(f"start_time must not be > end_time. start_time={self.start_time} end_time={self.end_time}") + logger.warning( + LazyFormat( + lambda: f"start_time must not be > end_time. start_time={self.start_time} end_time={self.end_time}" + ) + ) if self.start_time < TimeRangeConstraint.ALL_TIME_BEGIN(): - logger.warning(f"start_time={self.start_time} exceeds the limits of {TimeRangeConstraint.ALL_TIME_BEGIN()}") + logger.warning( + LazyFormat( + lambda: f"start_time={self.start_time} exceeds the limits of {TimeRangeConstraint.ALL_TIME_BEGIN()}" + ) + ) if self.end_time > TimeRangeConstraint.ALL_TIME_END(): raise RuntimeError(f"end_time={self.end_time} exceeds the limits of {TimeRangeConstraint.ALL_TIME_END()}") diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py index 3de7e3710a..b669c26400 100644 --- a/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py +++ b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py @@ -16,7 +16,7 @@ class LazyFormat: incur a performance overhead in production. Example: - logger.debug(LazyFormat("Found path", start_point=point_0, end_point=point_1)) + logger.debug(LazyFormat(lambda: LazyFormat("Found path", start_point=point_0, end_point=point_1))) -> @@ -28,7 +28,7 @@ class LazyFormat: evaluated as well. This style will be deprecated as log statements are updated. Example: - logger.debug(f"Result is: {expensive_function()}") + logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}")) -> diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/runtime.py b/metricflow-semantics/metricflow_semantics/mf_logging/runtime.py index 94e6924fab..64321ce224 100644 --- a/metricflow-semantics/metricflow_semantics/mf_logging/runtime.py +++ b/metricflow-semantics/metricflow_semantics/mf_logging/runtime.py @@ -8,6 +8,8 @@ from typing_extensions import ParamSpec +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat + logger = logging.getLogger(__name__) ReturnType = TypeVar("ReturnType") @@ -29,15 +31,15 @@ def _inner(*args: ParametersType.args, **kwargs: ParametersType.kwargs) -> Retur # __qualname__ includes the path like MyClass.my_function function_name = f"{wrapped_function.__qualname__}()" start_time = time.time() - logger.info(f"Starting {function_name}") + logger.debug(LazyFormat(lambda: f"Starting {function_name}")) try: result = wrapped_function(*args, **kwargs) finally: runtime = time.time() - start_time - logger.info(f"Finished {function_name} in {runtime:.1f}s") + logger.debug(LazyFormat(lambda: f"Finished {function_name} in {runtime:.1f}s")) if runtime > runtime_warning_threshold: - logger.warning(f"{function_name} is slow with a runtime of {runtime:.1f}s") + logger.warning(LazyFormat(lambda: f"{function_name} is slow with a runtime of {runtime:.1f}s")) return result @@ -51,11 +53,11 @@ def log_block_runtime(code_block_name: str, runtime_warning_threshold: float = 5 """Logs the runtime of the enclosed code block.""" start_time = time.time() description = f"code_block_name={repr(code_block_name)}" - logger.info(f"Starting {description}") + logger.debug(LazyFormat(lambda: f"Starting {description}")) yield runtime = time.time() - start_time - logger.info(f"Finished {description} in {runtime:.1f}s") + logger.debug(LazyFormat(lambda: f"Finished {description} in {runtime:.1f}s")) if runtime > runtime_warning_threshold: - logger.warning(f"{description} is slow with a runtime of {runtime:.1f}s") + logger.warning(LazyFormat(lambda: f"{description} is slow with a runtime of {runtime:.1f}s")) diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py index ca95288d04..d64c96559a 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py @@ -10,6 +10,7 @@ from dbt_semantic_interfaces.references import SemanticModelReference from typing_extensions import override +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.linkable_element_property import LinkableElementProperty from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation from metricflow_semantics.model.semantics.linkable_element import ( @@ -413,5 +414,5 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka path_key_to_linkable_entities=path_key_to_linkable_entities, path_key_to_linkable_metrics=path_key_to_linkable_metrics, ) - logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s") + logger.debug(LazyFormat(lambda: f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s")) return filtered_elements diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_spec_resolver.py b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_spec_resolver.py index f0251edd54..40d1d9bdbb 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_spec_resolver.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_spec_resolver.py @@ -23,6 +23,7 @@ from dbt_semantic_interfaces.validations.unique_valid_name import MetricFlowReservedKeywords from metricflow_semantics.errors.error_classes import UnknownMetricLinkingError +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.linkable_element_property import LinkableElementProperty from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation @@ -203,7 +204,11 @@ def __init__( for linkable_entity in linkable_entities: # TODO: some users encounter a situation in which the entity reference is in the entity links. Debug why. if linkable_entity.reference in linkable_entity.entity_links: - logger.info(f"Found entity reference in entity links for linkable entity: {linkable_entity}") + logger.debug( + LazyFormat( + lambda: f"Found entity reference in entity links for linkable entity: {linkable_entity}" + ) + ) continue metric_subquery_join_path_element = MetricSubqueryJoinPathElement( metric_reference=metric_reference, @@ -227,7 +232,11 @@ def __init__( semantic_model, SemanticModelJoinPath(left_semantic_model_reference=semantic_model.reference) ) ) - logger.info(f"Building valid linkable metrics took: {time.time() - linkable_metrics_start_time:.2f}s") + logger.debug( + LazyFormat( + lambda: f"Building valid linkable metrics took: {time.time() - linkable_metrics_start_time:.2f}s" + ) + ) for semantic_model in semantic_manifest.semantic_models: linkable_element_sets_for_no_metrics_queries.append(self._get_elements_in_semantic_model(semantic_model)) @@ -237,7 +246,7 @@ def __init__( linkable_element_sets_for_no_metrics_queries + [metric_time_elements_for_no_metrics] ) - logger.info(f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s") + logger.debug(LazyFormat(lambda: f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s")) def _metric_requires_metric_time(self, metric: Metric) -> bool: """Checks if the metric can only be queried with metric_time. Also checks input metrics. diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py index b46be1214a..246b788da7 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py @@ -12,6 +12,7 @@ from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity from metricflow_semantics.errors.error_classes import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.linkable_element_property import LinkableElementProperty from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet from metricflow_semantics.model.semantics.linkable_spec_resolver import ( @@ -63,8 +64,10 @@ def linkable_elements_for_measure( with_any_of=frozen_with_any_of, without_any_of=frozen_without_any_of, ) - logger.info( - f"Getting valid linkable elements for measure '{measure_reference.element_name}' took: {time.time() - start_time:.2f}s" + logger.debug( + LazyFormat( + lambda: f"Getting valid linkable elements for measure '{measure_reference.element_name}' took: {time.time() - start_time:.2f}s" + ) ) return linkable_element_set diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py index 59931b2090..0fe1ae5645 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py @@ -20,6 +20,7 @@ from dbt_semantic_interfaces.type_enums import AggregationType, DimensionType, EntityType, TimeGranularity from metricflow_semantics.errors.error_classes import InvalidSemanticModelError +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.semantics.element_group import ElementGrouper from metricflow_semantics.model.spec_converters import MeasureConverter @@ -242,8 +243,10 @@ def _add_semantic_model(self, semantic_model: SemanticModel) -> None: if not StructuredLinkableSpecName.from_name(dim.name).is_element_name: # TODO: [custom granularity] change this to an assertion once we're sure there aren't exceptions logger.warning( - f"Dimension name `{dim.name}` contains annotations, but this name should be the plain element name " - "from the original model. This should have been blocked by validation!" + LazyFormat( + lambda: f"Dimension name `{dim.name}` contains annotations, but this name should be the plain element name " + "from the original model. This should have been blocked by validation!" + ) ) # TODO: Construct these specs correctly. All of the time dimension specs have the default granularity diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py index 5f5b691146..1548f8343a 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py @@ -12,6 +12,7 @@ from typing_extensions import override from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many from metricflow_semantics.model.linkable_element_property import LinkableElementProperty from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup @@ -168,7 +169,7 @@ def __init__( def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResult: """Push the group-by-item specs that are available to the measure and match the source patterns to the child.""" with self._path_from_start_node_tracker.track_node_visit(node) as current_traversal_path: - logger.info(f"Handling {node.ui_description}") + logger.debug(LazyFormat(lambda: f"Handling {node.ui_description}")) items_available_for_measure = self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_measure( measure_reference=node.measure_reference, with_any_of=self._with_any_property, @@ -198,13 +199,15 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu if logger.isEnabledFor(logging.DEBUG): logger.debug( - f"For {node.ui_description}:\n" - + indent( - "After applying patterns:\n" - + indent(mf_pformat(patterns_to_apply + self._source_spec_patterns)) - + "\n" - + "to inputs, matches are:\n" - + indent(mf_pformat(matching_items.specs)) + LazyFormat( + lambda: f"For {node.ui_description}:\n" + + indent( + "After applying patterns:\n" + + indent(mf_pformat(patterns_to_apply + self._source_spec_patterns)) + + "\n" + + "to inputs, matches are:\n" + + indent(mf_pformat(matching_items.specs)) + ) ) ) @@ -320,10 +323,13 @@ def visit_metric_node(self, node: MetricGroupByItemResolutionNode) -> PushDownRe }, current_traversal_path=current_traversal_path, ) - logger.info(f"Handling {node.ui_description}") + logger.debug(LazyFormat(lambda: f"Handling {node.ui_description}")) if logger.isEnabledFor(logging.DEBUG): logger.debug( - "Candidates from parents:\n" + indent(mf_pformat(merged_result_from_parents.candidate_set.specs)) + LazyFormat( + lambda: "Candidates from parents:\n" + + indent(mf_pformat(merged_result_from_parents.candidate_set.specs)) + ) ) if merged_result_from_parents.candidate_set.is_empty: return merged_result_from_parents @@ -356,13 +362,15 @@ def visit_metric_node(self, node: MetricGroupByItemResolutionNode) -> PushDownRe if logger.isEnabledFor(logging.DEBUG): matched_specs = matched_items.specs logger.debug( - f"For {node.ui_description}:\n" - + indent( - "After applying patterns:\n" - + indent(mf_pformat(patterns_to_apply)) - + "\n" - + "to inputs, outputs are:\n" - + indent(mf_pformat(matched_specs)) + LazyFormat( + lambda: f"For {node.ui_description}:\n" + + indent( + "After applying patterns:\n" + + indent(mf_pformat(patterns_to_apply)) + + "\n" + + "to inputs, outputs are:\n" + + indent(mf_pformat(matched_specs)) + ) ) ) @@ -431,7 +439,10 @@ def visit_query_node(self, node: QueryGroupByItemResolutionNode) -> PushDownResu ) if logger.isEnabledFor(logging.DEBUG): logger.debug( - "Candidates from parents:\n" + indent(mf_pformat(merged_result_from_parents.candidate_set.specs)) + LazyFormat( + lambda: "Candidates from parents:\n" + + indent(mf_pformat(merged_result_from_parents.candidate_set.specs)) + ) ) return merged_result_from_parents @@ -440,11 +451,11 @@ def visit_query_node(self, node: QueryGroupByItemResolutionNode) -> PushDownResu def visit_no_metrics_query_node(self, node: NoMetricsGroupByItemSourceNode) -> PushDownResult: """Pass the group-by items that can be queried without metrics to the child node.""" with self._path_from_start_node_tracker.track_node_visit(node) as current_traversal_path: - logger.info(f"Handling {node.ui_description}") + logger.debug(LazyFormat(lambda: f"Handling {node.ui_description}")) # This is a case for distinct dimension values from semantic models. candidate_elements = self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_no_metrics_query() if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Candidate elements are:\n{mf_pformat(candidate_elements)}") + logger.debug(LazyFormat(lambda: f"Candidate elements are:\n{mf_pformat(candidate_elements)}")) candidates_after_filtering = candidate_elements.filter_by_spec_patterns(self._source_spec_patterns) if candidates_after_filtering.spec_count == 0: diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py index 09c2475387..9799e572ab 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/group_by_item_resolver.py @@ -11,6 +11,7 @@ from typing_extensions import override from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation @@ -110,11 +111,13 @@ def resolve_matching_item_for_querying( ): push_down_result = push_down_result.filter_candidates_by_pattern(candidate_filter) - logger.info( - f"Spec pattern:\n" - f"{indent(mf_pformat(spec_pattern))}\n" - f"was resolved to:\n" - f"{indent(mf_pformat(push_down_result.candidate_set.specs))}" + logger.debug( + LazyFormat( + lambda: f"Spec pattern:\n" + f"{indent(mf_pformat(spec_pattern))}\n" + f"was resolved to:\n" + f"{indent(mf_pformat(push_down_result.candidate_set.specs))}" + ) ) if push_down_result.candidate_set.num_candidates > 1: return GroupByItemResolution( diff --git a/metricflow-semantics/metricflow_semantics/query/query_parser.py b/metricflow-semantics/metricflow_semantics/query/query_parser.py index 308761cdc8..abad86f532 100644 --- a/metricflow-semantics/metricflow_semantics/query/query_parser.py +++ b/metricflow-semantics/metricflow_semantics/query/query_parser.py @@ -18,6 +18,7 @@ from metricflow_semantics.filters.merge_where import merge_to_single_where_filter from metricflow_semantics.filters.time_constraint import TimeRangeConstraint from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.mf_logging.runtime import log_runtime from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup @@ -429,11 +430,13 @@ def _parse_and_validate_query( ) ) - logger.info( - "Converted group-by-item input:\n" - + indent(f"Input: {repr(group_by_name)}") - + "\n" - + indent(f"Resolver Input: {mf_pformat(resolver_input_for_group_by_item)}") + logger.debug( + LazyFormat( + lambda: "Converted group-by-item input:\n" + + indent(f"Input: {repr(group_by_name)}") + + "\n" + + indent(f"Resolver Input: {mf_pformat(resolver_input_for_group_by_item)}") + ) ) for group_by_parameter in group_by: @@ -441,11 +444,13 @@ def _parse_and_validate_query( semantic_manifest_lookup=self._manifest_lookup ) resolver_inputs_for_group_by_items.append(resolver_input_for_group_by_parameter) - logger.info( - "Converted group-by-item input:\n" - + indent(f"Input: {repr(group_by_parameter)}") - + "\n" - + indent(f"Resolver Input: {mf_pformat(resolver_input_for_group_by_parameter)}") + logger.debug( + LazyFormat( + lambda: "Converted group-by-item input:\n" + + indent(f"Input: {repr(group_by_parameter)}") + + "\n" + + indent(f"Resolver Input: {mf_pformat(resolver_input_for_group_by_parameter)}") + ) ) where_filters: List[PydanticWhereFilter] = [] @@ -483,11 +488,13 @@ def _parse_and_validate_query( min_max_only=resolver_input_for_min_max_only, ) - logger.info("Resolver input for query is:\n" + indent(mf_pformat(resolver_input_for_query))) + logger.debug( + LazyFormat(lambda: "Resolver input for query is:\n" + indent(mf_pformat(resolver_input_for_query))) + ) query_resolution = query_resolver.resolve_query(resolver_input_for_query) - logger.info("Query resolution is:\n" + indent(mf_pformat(query_resolution))) + logger.debug(LazyFormat(lambda: "Query resolution is:\n" + indent(mf_pformat(query_resolution)))) self._raise_exception_if_there_are_errors( input_to_issue_set=query_resolution.input_to_issue_set.merge( @@ -500,10 +507,14 @@ def _parse_and_validate_query( if time_constraint_start is not None or time_constraint_end is not None: if time_constraint_start is None: time_constraint_start = TimeRangeConstraint.ALL_TIME_BEGIN() - logger.info(f"time_constraint_start was None, so it was set to {time_constraint_start}") + logger.debug( + LazyFormat(lambda: f"time_constraint_start was None, so it was set to {time_constraint_start}") + ) if time_constraint_end is None: time_constraint_end = TimeRangeConstraint.ALL_TIME_END() - logger.info(f"time_constraint_end was None, so it was set to {time_constraint_end}") + logger.debug( + LazyFormat(lambda: f"time_constraint_end was None, so it was set to {time_constraint_end}") + ) time_constraint = TimeRangeConstraint( start_time=time_constraint_start, @@ -515,7 +526,7 @@ def _parse_and_validate_query( time_dimension_specs_in_query=query_spec.time_dimension_specs, time_constraint=time_constraint, ) - logger.info(f"Time constraint after adjustment is: {time_constraint}") + logger.debug(LazyFormat(lambda: f"Time constraint after adjustment is: {time_constraint}")) return ParseQueryResult( query_spec=query_spec.with_time_range_constraint(time_constraint), diff --git a/metricflow-semantics/metricflow_semantics/query/query_resolver.py b/metricflow-semantics/metricflow_semantics/query/query_resolver.py index 72ea1cb506..9f38fcf630 100644 --- a/metricflow-semantics/metricflow_semantics/query/query_resolver.py +++ b/metricflow-semantics/metricflow_semantics/query/query_resolver.py @@ -7,6 +7,7 @@ from dbt_semantic_interfaces.references import MeasureReference, MetricReference, SemanticModelReference +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many from metricflow_semantics.mf_logging.runtime import log_runtime from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup @@ -222,7 +223,7 @@ def _resolve_group_by_items_result( metric_references=metric_references, where_filter_intersection=filter_input.where_filter_intersection, ) - logger.info(f"Resolution DAG is:\n{resolution_dag.structure_text()}") + logger.debug(LazyFormat(lambda: f"Resolution DAG is:\n{resolution_dag.structure_text()}")) group_by_item_resolver = GroupByItemResolver( manifest_lookup=self._manifest_lookup, @@ -489,7 +490,9 @@ def _resolve_query(self, resolver_input_for_query: ResolverInputForQuery) -> Met # No errors. linkable_spec_set = group_specs_by_type(group_by_item_specs) - logger.info(f"Group-by-items were resolved to:\n{mf_pformat(linkable_spec_set.linkable_specs)}") + logger.debug( + LazyFormat(lambda: f"Group-by-items were resolved to:\n{mf_pformat(linkable_spec_set.linkable_specs)}") + ) # Run post-resolution validation rules to generate issues that are generated at the query-level. query_level_issue_set = self._post_resolution_query_validator.validate_query( @@ -552,10 +555,12 @@ def _resolve_query(self, resolver_input_for_query: ResolverInputForQuery) -> Met # a measure alias is used incorrectly. if len(models_not_in_manifest) > 0: logger.error( - mf_pformat_many( - "Semantic references that aren't in the manifest were found in the set used in " - "a query. This is a bug, and to avoid potential issues, they will be filtered out.", - {"models_not_in_manifest": models_not_in_manifest}, + LazyFormat( + lambda: mf_pformat_many( + "Semantic references that aren't in the manifest were found in the set used in " + "a query. This is a bug, and to avoid potential issues, they will be filtered out.", + {"models_not_in_manifest": models_not_in_manifest}, + ) ) ) queried_semantic_models -= models_not_in_manifest diff --git a/metricflow-semantics/metricflow_semantics/specs/non_additive_dimension_spec.py b/metricflow-semantics/metricflow_semantics/specs/non_additive_dimension_spec.py index a546cf2e82..ed583f2f00 100644 --- a/metricflow-semantics/metricflow_semantics/specs/non_additive_dimension_spec.py +++ b/metricflow-semantics/metricflow_semantics/specs/non_additive_dimension_spec.py @@ -8,6 +8,7 @@ from dbt_semantic_interfaces.dataclass_serialization import SerializableDataclass from dbt_semantic_interfaces.type_enums import AggregationType, TimeGranularity +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.naming.linkable_spec_name import DUNDER from metricflow_semantics.specs.entity_spec import LinklessEntitySpec from metricflow_semantics.specs.instance_spec import LinkableInstanceSpec @@ -43,8 +44,10 @@ def __post_init__(self) -> None: # TODO: [custom granularity] change this to an assertion once we're sure there aren't exceptions if not self.name.find(DUNDER) == -1: logger.warning( - f"Non-additive dimension spec references a dimension name `{self.name}`, with added annotations, but it " - "should be a simple element name reference. This should have been blocked by model validation!" + LazyFormat( + lambda: f"Non-additive dimension spec references a dimension name `{self.name}`, with added annotations, but it " + "should be a simple element name reference. This should have been blocked by model validation!" + ) ) @property diff --git a/metricflow-semantics/metricflow_semantics/test_helpers/snapshot_helpers.py b/metricflow-semantics/metricflow_semantics/test_helpers/snapshot_helpers.py index 2091414002..b96bad301a 100644 --- a/metricflow-semantics/metricflow_semantics/test_helpers/snapshot_helpers.py +++ b/metricflow-semantics/metricflow_semantics/test_helpers/snapshot_helpers.py @@ -14,6 +14,7 @@ from _pytest.fixtures import FixtureRequest from metricflow_semantics.dag.mf_dag import MetricFlowDag +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet from metricflow_semantics.naming.object_builder_scheme import ObjectBuilderNamingScheme @@ -85,7 +86,9 @@ def assert_snapshot_text_equal( if mf_test_configuration.display_snapshots: if not mf_test_configuration.overwrite_snapshots: - logger.warning(f"Not overwriting snapshots, so displaying existing snapshot at {file_path}") + logger.warning( + LazyFormat(lambda: f"Not overwriting snapshots, so displaying existing snapshot at {file_path}") + ) if len(request.session.items) > 1: raise ValueError("Displaying snapshots is only supported when there's a single item in a testing session.") diff --git a/metricflow/dataflow/builder/dataflow_plan_builder.py b/metricflow/dataflow/builder/dataflow_plan_builder.py index edbe4e0fce..21cc27f905 100644 --- a/metricflow/dataflow/builder/dataflow_plan_builder.py +++ b/metricflow/dataflow/builder/dataflow_plan_builder.py @@ -23,6 +23,7 @@ from metricflow_semantics.errors.error_classes import UnableToSatisfyQueryError from metricflow_semantics.filters.time_constraint import TimeRangeConstraint from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.mf_logging.runtime import log_runtime from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup @@ -233,12 +234,14 @@ def _build_plan( def _optimize_plan(self, plan: DataflowPlan, optimizations: FrozenSet[DataflowPlanOptimization]) -> DataflowPlan: optimizer_factory = DataflowPlanOptimizerFactory(self._node_data_set_resolver) for optimizer in optimizer_factory.get_optimizers(optimizations): - logger.info(f"Applying {optimizer.__class__.__name__}") + logger.debug(LazyFormat(lambda: f"Applying {optimizer.__class__.__name__}")) try: plan = optimizer.optimize(plan) - logger.info( - f"After applying {optimizer.__class__.__name__}, the dataflow plan is:\n" - f"{indent(plan.structure_text())}" + logger.debug( + LazyFormat( + lambda: f"After applying {optimizer.__class__.__name__}, the dataflow plan is:\n" + f"{indent(plan.structure_text())}" + ) ) except Exception: logger.exception(f"Got an exception applying {optimizer.__class__.__name__}") @@ -277,13 +280,15 @@ def _build_aggregated_conversion_node( predicate_pushdown_state=time_range_only_pushdown_state, linkable_spec_set=base_required_linkable_specs, ) - logger.info(f"Recipe for base measure aggregation:\n{mf_pformat(base_measure_recipe)}") + logger.debug(LazyFormat(lambda: f"Recipe for base measure aggregation:\n{mf_pformat(base_measure_recipe)}")) conversion_measure_recipe = self._find_dataflow_recipe( measure_spec_properties=self._build_measure_spec_properties([conversion_measure_spec.measure_spec]), predicate_pushdown_state=disabled_pushdown_state, linkable_spec_set=LinkableSpecSet(), ) - logger.info(f"Recipe for conversion measure aggregation:\n{mf_pformat(conversion_measure_recipe)}") + logger.debug( + LazyFormat(lambda: f"Recipe for conversion measure aggregation:\n{mf_pformat(conversion_measure_recipe)}") + ) if base_measure_recipe is None: raise UnableToSatisfyQueryError( f"Unable to join all items in request. Measure: {base_measure_spec.measure_spec}; Specs to join: {base_required_linkable_specs}" @@ -405,15 +410,19 @@ def _build_conversion_metric_output_node( # TODO: [custom granularity] change this to an assertion once we're sure there aren't exceptions if not StructuredLinkableSpecName.from_name(conversion_type_params.entity).is_element_name: logger.warning( - f"Found additional annotations in type param entity name `{conversion_type_params.entity}`, which " - "should be a simple element name reference. This should have been blocked by model validation!" + LazyFormat( + lambda: f"Found additional annotations in type param entity name `{conversion_type_params.entity}`, which " + "should be a simple element name reference. This should have been blocked by model validation!" + ) ) entity_spec = EntitySpec(element_name=conversion_type_params.entity, entity_links=()) - logger.info( - f"For conversion metric {metric_spec},\n" - f"base_measure is:\n{mf_pformat(base_measure)}\n" - f"conversion_measure is:\n{mf_pformat(conversion_measure)}\n" - f"entity is:\n{mf_pformat(entity_spec)}" + logger.debug( + LazyFormat( + lambda: f"For conversion metric {metric_spec},\n" + f"base_measure is:\n{mf_pformat(base_measure)}\n" + f"conversion_measure is:\n{mf_pformat(conversion_measure)}\n" + f"entity is:\n{mf_pformat(entity_spec)}" + ) ) aggregated_measures_node = self._build_aggregated_conversion_node( @@ -534,10 +543,12 @@ def _build_base_metric_output_node( ), descendent_filter_specs=metric_spec.filter_specs, ) - logger.info( - f"For\n{indent(mf_pformat(metric_spec))}" - f"\nneeded measure is:" - f"\n{indent(mf_pformat(metric_input_measure_spec))}" + logger.debug( + LazyFormat( + lambda: f"For\n{indent(mf_pformat(metric_spec))}" + f"\nneeded measure is:" + f"\n{indent(mf_pformat(metric_input_measure_spec))}" + ) ) aggregated_measures_node = self.build_aggregated_measure( @@ -566,8 +577,11 @@ def _build_derived_metric_output_node( metric_reference=metric_spec.reference, filter_spec_factory=filter_spec_factory, ) - logger.info( - f"For {metric.type} metric: {metric_spec}, needed metrics are:\n" f"{mf_pformat(metric_input_specs)}" + logger.debug( + LazyFormat( + lambda: f"For {metric.type} metric: {metric_spec}, needed metrics are:\n" + f"{mf_pformat(metric_input_specs)}" + ) ) required_linkable_specs, extraneous_linkable_specs = self.__get_required_and_extraneous_linkable_specs( @@ -729,7 +743,7 @@ def _build_metrics_output_node( output_nodes: List[DataflowPlanNode] = [] for metric_spec in metric_specs: - logger.info(f"Generating compute metrics node for:\n{indent(mf_pformat(metric_spec))}") + logger.debug(LazyFormat(lambda: f"Generating compute metrics node for:\n{indent(mf_pformat(metric_spec))}")) self._metric_lookup.get_metric(metric_spec.reference) output_nodes.append( @@ -975,10 +989,12 @@ def _find_dataflow_recipe( ) default_join_type = SqlJoinType.FULL_OUTER - logger.info( - f"Starting search with {len(candidate_nodes_for_left_side_of_join)} potential source nodes on the left " - f"side of the join, and {len(candidate_nodes_for_right_side_of_join)} potential nodes on the right side " - f"of the join." + logger.debug( + LazyFormat( + lambda: f"Starting search with {len(candidate_nodes_for_left_side_of_join)} potential source nodes on the left " + f"side of the join, and {len(candidate_nodes_for_right_side_of_join)} potential nodes on the right side " + f"of the join." + ) ) start_time = time.time() @@ -1009,9 +1025,11 @@ def _find_dataflow_recipe( metric_time_dimension_reference=self._metric_time_dimension_reference, time_spine_nodes=self._source_node_set.time_spine_nodes_tuple, ) - logger.info( - f"After removing unnecessary nodes, there are {len(candidate_nodes_for_right_side_of_join)} candidate " - f"nodes for the right side of the join" + logger.debug( + LazyFormat( + lambda: f"After removing unnecessary nodes, there are {len(candidate_nodes_for_right_side_of_join)} candidate " + f"nodes for the right side of the join" + ) ) if DataflowPlanBuilder._contains_multihop_linkables(linkable_specs): candidate_nodes_for_right_side_of_join = list( @@ -1021,16 +1039,20 @@ def _find_dataflow_recipe( join_type=default_join_type, ) ) - logger.info( - f"After adding multi-hop nodes, there are {len(candidate_nodes_for_right_side_of_join)} candidate " - f"nodes for the right side of the join:\n" - f"{mf_pformat(candidate_nodes_for_right_side_of_join)}" + logger.debug( + LazyFormat( + lambda: f"After adding multi-hop nodes, there are {len(candidate_nodes_for_right_side_of_join)} candidate " + f"nodes for the right side of the join:\n" + f"{mf_pformat(candidate_nodes_for_right_side_of_join)}" + ) ) # If there are MetricGroupBys in the requested linkable specs, build source nodes to satisfy them. # We do this at query time instead of during usual source node generation because the number of potential # MetricGroupBy source nodes could be extremely large (and potentially slow). - logger.info(f"Building source nodes for group by metrics: {linkable_spec_set.group_by_metric_specs}") + logger.debug( + LazyFormat(lambda: f"Building source nodes for group by metrics: {linkable_spec_set.group_by_metric_specs}") + ) candidate_nodes_for_right_side_of_join += [ self._build_query_output_node( query_spec=self._source_node_builder.build_source_node_inputs_for_group_by_metric(group_by_metric_spec), @@ -1039,7 +1061,7 @@ def _find_dataflow_recipe( for group_by_metric_spec in linkable_spec_set.group_by_metric_specs ] - logger.info(f"Processing nodes took: {time.time()-start_time:.2f}s") + logger.debug(LazyFormat(lambda: f"Processing nodes took: {time.time()-start_time:.2f}s")) node_evaluator = NodeEvaluatorForLinkableInstances( semantic_model_lookup=self._semantic_model_lookup, @@ -1061,15 +1083,19 @@ def _find_dataflow_recipe( ] if missing_specs: logger.debug( - f"Skipping evaluation for:\n" - f"{indent(node.structure_text())}" - f"since it does not have all of the measure specs:\n" - f"{indent(mf_pformat(missing_specs))}" + LazyFormat( + lambda: f"Skipping evaluation for:\n" + f"{indent(node.structure_text())}" + f"since it does not have all of the measure specs:\n" + f"{indent(mf_pformat(missing_specs))}" + ) ) continue logger.debug( - f"Evaluating candidate node for the left side of the join:\n{indent(mf_pformat(node.structure_text()))}" + LazyFormat( + lambda: f"Evaluating candidate node for the left side of the join:\n{indent(mf_pformat(node.structure_text()))}" + ) ) start_time = time.time() @@ -1078,33 +1104,43 @@ def _find_dataflow_recipe( required_linkable_specs=list(linkable_specs), default_join_type=default_join_type, ) - logger.info(f"Evaluation of {node} took {time.time() - start_time:.2f}s") + logger.debug(LazyFormat(lambda: f"Evaluation of {node} took {time.time() - start_time:.2f}s")) - logger.info( - "Evaluation for source node:" - + indent(f"\nnode:\n{indent(node.structure_text())}") - + indent(f"\nevaluation:\n{indent(mf_pformat(evaluation))}") + logger.debug( + LazyFormat( + lambda: "Evaluation for source node:" + + indent(f"\nnode:\n{indent(node.structure_text())}") + + indent(f"\nevaluation:\n{indent(mf_pformat(evaluation))}") + ) ) if len(evaluation.unjoinable_linkable_specs) > 0: - logger.info( - f"Skipping {node.node_id} since it contains un-joinable specs: " - f"{evaluation.unjoinable_linkable_specs}" + logger.debug( + LazyFormat( + lambda: f"Skipping {node.node_id} since it contains un-joinable specs: " + f"{evaluation.unjoinable_linkable_specs}" + ) ) continue num_joins_required = len(evaluation.join_recipes) - logger.info(f"Found candidate with node ID '{node.node_id}' with {num_joins_required} joins required.") + logger.debug( + LazyFormat( + lambda: f"Found candidate with node ID '{node.node_id}' with {num_joins_required} joins required." + ) + ) node_to_evaluation[node] = evaluation # Since are evaluating nodes with the lowest cost first, if we find one without requiring any joins, then # this is going to be the lowest cost solution. if len(evaluation.join_recipes) == 0: - logger.info("Not evaluating other nodes since we found one that doesn't require joins") + logger.debug( + LazyFormat(lambda: "Not evaluating other nodes since we found one that doesn't require joins") + ) break - logger.info(f"Found {len(node_to_evaluation)} candidate source nodes.") + logger.debug(LazyFormat(lambda: f"Found {len(node_to_evaluation)} candidate source nodes.")) if len(node_to_evaluation) > 0: # Find evaluation with lowest number of joins. @@ -1113,11 +1149,13 @@ def _find_dataflow_recipe( ) evaluation = node_to_evaluation[node_with_lowest_cost_plan] - logger.info( - "Lowest cost plan is:" - + indent(f"\nnode:\n{indent(node_with_lowest_cost_plan.structure_text())}") - + indent(f"\nevaluation:\n{indent(mf_pformat(evaluation))}") - + indent(f"\njoins: {len(node_to_evaluation[node_with_lowest_cost_plan].join_recipes)}") + logger.debug( + LazyFormat( + lambda: "Lowest cost plan is:" + + indent(f"\nnode:\n{indent(node_with_lowest_cost_plan.structure_text())}") + + indent(f"\nevaluation:\n{indent(mf_pformat(evaluation))}") + + indent(f"\njoins: {len(node_to_evaluation[node_with_lowest_cost_plan].join_recipes)}") + ) ) # Nodes containing the linkable instances will be joined to the source node, so these @@ -1143,7 +1181,7 @@ def _find_dataflow_recipe( join_linkable_instances_recipes=node_to_evaluation[node_with_lowest_cost_plan].join_recipes, ) - logger.error("No recipe could be constructed.") + logger.error(LazyFormat(lambda: "No recipe could be constructed.")) return None def build_computed_metrics_node( @@ -1331,10 +1369,12 @@ def build_aggregated_measure( """ measure_spec = metric_input_measure_spec.measure_spec - logger.info( - f"Building aggregated measure: {measure_spec} with input measure filters:\n" - f"{mf_pformat(metric_input_measure_spec.filter_specs)}\n" - f"and filters:\n{mf_pformat(metric_input_measure_spec.filter_specs)}" + logger.debug( + LazyFormat( + lambda: f"Building aggregated measure: {measure_spec} with input measure filters:\n" + f"{mf_pformat(metric_input_measure_spec.filter_specs)}\n" + f"and filters:\n{mf_pformat(metric_input_measure_spec.filter_specs)}" + ) ) return self._build_aggregated_measure_from_measure_source_node( @@ -1397,7 +1437,11 @@ def _build_aggregated_measure_from_measure_source_node( cumulative_metric_adjusted_time_constraint: Optional[TimeRangeConstraint] = None if cumulative and predicate_pushdown_state.time_range_constraint is not None: - logger.info(f"Time range constraint before adjustment is {predicate_pushdown_state.time_range_constraint}") + logger.debug( + LazyFormat( + lambda: f"Time range constraint before adjustment is {predicate_pushdown_state.time_range_constraint}" + ) + ) granularity: Optional[TimeGranularity] = None count = 0 if cumulative_window is not None: @@ -1412,7 +1456,9 @@ def _build_aggregated_measure_from_measure_source_node( predicate_pushdown_state.time_range_constraint, granularity, count ) ) - logger.info(f"Adjusted time range constraint to: {cumulative_metric_adjusted_time_constraint}") + logger.debug( + LazyFormat(lambda: f"Adjusted time range constraint to: {cumulative_metric_adjusted_time_constraint}") + ) required_linkable_specs, extraneous_linkable_specs = self.__get_required_and_extraneous_linkable_specs( queried_linkable_specs=queried_linkable_specs, @@ -1425,10 +1471,12 @@ def _build_aggregated_measure_from_measure_source_node( ) if measure_recipe is None: - logger.info( - "Looking for a recipe to get:" - + indent(f"\nmeasure_specs:\n{mf_pformat([measure_spec])}") - + indent(f"\nevaluation:\n{mf_pformat(required_linkable_specs)}") + logger.debug( + LazyFormat( + lambda: "Looking for a recipe to get:" + + indent(f"\nmeasure_specs:\n{mf_pformat([measure_spec])}") + + indent(f"\nevaluation:\n{mf_pformat(required_linkable_specs)}") + ) ) measure_time_constraint = ( (cumulative_metric_adjusted_time_constraint or predicate_pushdown_state.time_range_constraint) @@ -1449,12 +1497,14 @@ def _build_aggregated_measure_from_measure_source_node( predicate_pushdown_state=measure_pushdown_state, linkable_spec_set=required_linkable_specs, ) - logger.info( - f"With {len(self._source_node_set.source_nodes_for_metric_queries)} source nodes, finding a recipe " - f"took {time.time() - find_recipe_start_time:.2f}s" + logger.debug( + LazyFormat( + lambda: f"With {len(self._source_node_set.source_nodes_for_metric_queries)} source nodes, finding a recipe " + f"took {time.time() - find_recipe_start_time:.2f}s" + ) ) - logger.info(f"Using recipe:\n{indent(mf_pformat(measure_recipe))}") + logger.debug(LazyFormat(lambda: f"Using recipe:\n{indent(mf_pformat(measure_recipe))}")) if measure_recipe is None: raise UnableToSatisfyQueryError( diff --git a/metricflow/dataflow/builder/node_evaluator.py b/metricflow/dataflow/builder/node_evaluator.py index b5a11fb2e8..4e8ae8a457 100644 --- a/metricflow/dataflow/builder/node_evaluator.py +++ b/metricflow/dataflow/builder/node_evaluator.py @@ -23,6 +23,7 @@ from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME from metricflow_semantics.instances import InstanceSet +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.semantics.semantic_model_join_evaluator import SemanticModelJoinEvaluator from metricflow_semantics.model.semantics.semantic_model_lookup import SemanticModelLookup @@ -403,7 +404,7 @@ def evaluate_node( candidate_instance_set: InstanceSet = self._node_data_set_resolver.get_output_data_set(left_node).instance_set candidate_spec_set = candidate_instance_set.spec_set - logger.debug(f"Candidate spec set is:\n{mf_pformat(candidate_spec_set)}") + logger.debug(LazyFormat(lambda: f"Candidate spec set is:\n{mf_pformat(candidate_spec_set)}")) data_set_linkable_specs = candidate_spec_set.linkable_specs @@ -437,27 +438,31 @@ def evaluate_node( ) join_candidates: List[JoinLinkableInstancesRecipe] = [] - logger.info("Looping over nodes that can be joined to get the required linkable specs") + logger.debug(LazyFormat(lambda: "Looping over nodes that can be joined to get the required linkable specs")) # Using a greedy approach, try to get the "possibly_joinable_linkable_specs" by iteratively joining nodes with # the most matching linkable specs. We try to join nodes with the most matching specs to minimize the number of # joins that we have to do to. A knapsack solution is ideal, but punting on that for simplicity. while len(possibly_joinable_linkable_specs) > 0: - logger.info(f"Looking for linkable specs:\n{mf_pformat(possibly_joinable_linkable_specs)}") + logger.debug( + LazyFormat(lambda: f"Looking for linkable specs:\n{mf_pformat(possibly_joinable_linkable_specs)}") + ) # We've run out of candidate data sets, but there are more linkable specs that we need. That means the # rest of the linkable specs can't be joined in, and we're left with unjoinable specs remaining. if len(candidates_for_join) == 0: - logger.info( - "There are no more candidate nodes that can be joined, but not all linkable specs have " - "been acquired." + logger.debug( + LazyFormat( + lambda: "There are no more candidate nodes that can be joined, but not all linkable specs have " + "been acquired." + ) ) unjoinable_linkable_specs.extend(possibly_joinable_linkable_specs) break # Join the best candidate to realize the linkable specs next_candidate = candidates_for_join.pop(0) - logger.info(f"The next candidate node to be joined is:\n{mf_pformat(next_candidate)}") + logger.debug(LazyFormat(lambda: f"The next candidate node to be joined is:\n{mf_pformat(next_candidate)}")) join_candidates.append(next_candidate) # Update the candidates. Since we'll be joined/ing the previously selected candidate, we no longer need @@ -474,7 +479,7 @@ def evaluate_node( x for x in possibly_joinable_linkable_specs if x not in next_candidate.satisfiable_linkable_specs ] - logger.info("Done evaluating possible joins") + logger.debug(LazyFormat(lambda: "Done evaluating possible joins")) return LinkableInstanceSatisfiabilityEvaluation( local_linkable_specs=tuple(local_linkable_specs), joinable_linkable_specs=tuple( diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index ed0c52213d..08d95111db 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -15,6 +15,7 @@ from metricflow_semantics.errors.error_classes import ExecutionException from metricflow_semantics.filters.time_constraint import TimeRangeConstraint from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.linkable_element_property import LinkableElementProperty from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup @@ -357,9 +358,11 @@ def __init__( if self._reset_id_enumeration: # Some of the objects that are created below use generated IDs. To avoid collision with IDs that are # generated for queries, set the ID generation numbering to start at a high enough number. - logger.info( - f"For creating setup objects, setting numbering of generated IDs to start at: " - f"{MetricFlowEngine._ID_ENUMERATION_START_VALUE_FOR_INITIALIZER}" + logger.debug( + LazyFormat( + lambda: f"For creating setup objects, setting numbering of generated IDs to start at: " + f"{MetricFlowEngine._ID_ENUMERATION_START_VALUE_FOR_INITIALIZER}" + ) ) SequentialIdGenerator.reset(MetricFlowEngine._ID_ENUMERATION_START_VALUE_FOR_INITIALIZER) self._semantic_manifest_lookup = semantic_manifest_lookup @@ -378,7 +381,7 @@ def __init__( ): data_set = converter.create_sql_source_data_set(semantic_model) self._source_data_sets.append(data_set) - logger.info(f"Created source dataset from semantic model '{semantic_model.name}'") + logger.debug(LazyFormat(lambda: f"Created source dataset from semantic model '{semantic_model.name}'")) source_node_builder = SourceNodeBuilder( column_association_resolver=self._column_association_resolver, @@ -416,7 +419,7 @@ def __init__( @log_call(module_name=__name__, telemetry_reporter=_telemetry_reporter) def query(self, mf_request: MetricFlowQueryRequest) -> MetricFlowQueryResult: # noqa: D102 - logger.info(f"Starting query request:\n{indent(mf_pformat(mf_request))}") + logger.debug(LazyFormat(lambda: f"Starting query request:\n{indent(mf_pformat(mf_request))}")) explain_result = self._create_execution_plan(mf_request) execution_plan = explain_result.convert_to_execution_plan_result.execution_plan @@ -425,9 +428,9 @@ def query(self, mf_request: MetricFlowQueryRequest) -> MetricFlowQueryResult: # task = execution_plan.tasks[0] - logger.info(f"Sequentially running tasks in:\n" f"{execution_plan.structure_text()}") + logger.debug(LazyFormat(lambda: f"Sequentially running tasks in:\n" f"{execution_plan.structure_text()}")) execution_results = self._executor.execute_plan(execution_plan) - logger.info("Finished running tasks in execution plan") + logger.debug(LazyFormat(lambda: "Finished running tasks in execution plan")) if execution_results.contains_task_errors: raise ExecutionException(f"Got errors while executing tasks:\n{execution_results.get_result(task.task_id)}") @@ -436,7 +439,7 @@ def query(self, mf_request: MetricFlowQueryRequest) -> MetricFlowQueryResult: # assert task_execution_result.sql, "Task execution should have returned SQL that was run" - logger.info(f"Finished query request: {mf_request.request_id}") + logger.debug(LazyFormat(lambda: f"Finished query request: {mf_request.request_id}")) return MetricFlowQueryResult( query_spec=explain_result.query_spec, dataflow_plan=explain_result.dataflow_plan, @@ -452,8 +455,10 @@ def all_time_constraint(self) -> TimeRangeConstraint: def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> MetricFlowExplainResult: if self._reset_id_enumeration: - logger.info( - f"Setting ID generation to start at: {MetricFlowEngine._ID_ENUMERATION_START_VALUE_FOR_QUERIES}" + logger.debug( + LazyFormat( + lambda: f"Setting ID generation to start at: {MetricFlowEngine._ID_ENUMERATION_START_VALUE_FOR_QUERIES}" + ) ) SequentialIdGenerator.reset(MetricFlowEngine._ID_ENUMERATION_START_VALUE_FOR_QUERIES) @@ -489,7 +494,7 @@ def _create_execution_plan(self, mf_query_request: MetricFlowQueryRequest) -> Me order_by=mf_query_request.order_by, min_max_only=mf_query_request.min_max_only, ).query_spec - logger.info(f"Query spec is:\n{mf_pformat(query_spec)}") + logger.debug(LazyFormat(lambda: f"Query spec is:\n{mf_pformat(query_spec)}")) output_selection_specs: Optional[InstanceSpecSet] = None if mf_query_request.query_type == MetricFlowQueryType.DIMENSION_VALUES: diff --git a/metricflow/execution/dataflow_to_execution.py b/metricflow/execution/dataflow_to_execution.py index 8f2bb1de80..473a809d5d 100644 --- a/metricflow/execution/dataflow_to_execution.py +++ b/metricflow/execution/dataflow_to_execution.py @@ -2,6 +2,7 @@ import logging +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from typing_extensions import override from metricflow.dataflow.dataflow_plan import ( @@ -65,12 +66,12 @@ def __init__( self._sql_client = sql_client def _convert_to_sql_plan(self, node: DataflowPlanNode) -> ConvertToSqlPlanResult: - logger.info(f"Generating SQL query plan from {node.node_id}") + logger.debug(LazyFormat(lambda: f"Generating SQL query plan from {node.node_id}")) result = self._sql_plan_converter.convert_to_sql_query_plan( sql_engine_type=self._sql_client.sql_engine_type, dataflow_plan_node=node, ) - logger.debug(f"Generated SQL query plan is:\n{result.sql_plan.structure_text()}") + logger.debug(LazyFormat(lambda: f"Generated SQL query plan is:\n{result.sql_plan.structure_text()}")) return result def _render_sql(self, convert_to_sql_plan_result: ConvertToSqlPlanResult) -> SqlPlanRenderResult: diff --git a/metricflow/execution/execution_plan.py b/metricflow/execution/execution_plan.py index 50b2668c3b..fa3dd2c106 100644 --- a/metricflow/execution/execution_plan.py +++ b/metricflow/execution/execution_plan.py @@ -8,6 +8,7 @@ from metricflow_semantics.dag.id_prefix import IdPrefix, StaticIdPrefix from metricflow_semantics.dag.mf_dag import DagId, DagNode, DisplayedProperty, MetricFlowDag, NodeId +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.sql.sql_bind_parameters import SqlBindParameters from metricflow_semantics.sql.sql_table import SqlTable from metricflow_semantics.visitor import Visitable @@ -186,9 +187,9 @@ def execute(self) -> TaskExecutionResult: # noqa: D102 sql_query = self.sql_query assert sql_query is not None, f"{self.sql_query=} should have been set during creation." start_time = time.time() - logger.info(f"Dropping table {self.output_table} in case it already exists") + logger.debug(LazyFormat(lambda: f"Dropping table {self.output_table} in case it already exists")) self.sql_client.execute(f"DROP TABLE IF EXISTS {self.output_table.sql}") - logger.info(f"Creating table {self.output_table} using a query") + logger.debug(LazyFormat(lambda: f"Creating table {self.output_table} using a query")) self.sql_client.execute( sql_query.sql_query, sql_bind_parameters=sql_query.bind_parameters, diff --git a/metricflow/execution/executor.py b/metricflow/execution/executor.py index ea8ba04595..63989caf43 100644 --- a/metricflow/execution/executor.py +++ b/metricflow/execution/executor.py @@ -6,6 +6,7 @@ from typing import Dict from metricflow_semantics.dag.mf_dag import NodeId +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow.execution.execution_plan import ExecutionPlan, ExecutionPlanTask, TaskExecutionResult @@ -59,7 +60,7 @@ def _execute_dfs(self, current_task: ExecutionPlanTask, results: ExecutionResult return result = None - logger.info(f"Started task ID: {current_task.node_id}") + logger.debug(LazyFormat(lambda: f"Started task ID: {current_task.node_id}")) try: result = current_task.execute() results.add_result(current_task.task_id, result) @@ -67,11 +68,17 @@ def _execute_dfs(self, current_task: ExecutionPlanTask, results: ExecutionResult if result: runtime = f"{result.end_time - result.start_time:.2f}s" if result.errors: - logger.info(f"Finished task ID: {current_task.node_id} with errors: {result.errors} in {runtime}") + logger.debug( + LazyFormat( + lambda: f"Finished task ID: {current_task.node_id} with errors: {result.errors} in {runtime}" + ) + ) else: - logger.info(f"Finished task ID: {current_task.node_id} successfully in {runtime}") + logger.debug( + LazyFormat(lambda: f"Finished task ID: {current_task.node_id} successfully in {runtime}") + ) else: - logger.info(f"Task ID: {current_task.node_id} exited unexpectedly") + logger.debug(LazyFormat(lambda: f"Task ID: {current_task.node_id} exited unexpectedly")) def execute_plan(self, plan: ExecutionPlan) -> ExecutionResults: # noqa: D102 results = ExecutionResults() diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 92e3d33bbe..1516af33d7 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -27,6 +27,7 @@ TimeDimensionInstance, ) from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.specs.column_assoc import ( ColumnAssociation, @@ -221,11 +222,13 @@ def convert_to_sql_query_plan( for optimizer in SqlQueryOptimizerConfiguration.optimizers_for_level( optimization_level, use_column_alias_in_group_by=use_column_alias_in_group_by ): - logger.info(f"Applying optimizer: {optimizer.__class__.__name__}") + logger.debug(LazyFormat(lambda: f"Applying optimizer: {optimizer.__class__.__name__}")) sql_node = optimizer.optimize(sql_node) - logger.info( - f"After applying {optimizer.__class__.__name__}, the SQL query plan is:\n" - f"{indent(sql_node.structure_text())}" + logger.debug( + LazyFormat( + lambda: f"After applying {optimizer.__class__.__name__}, the SQL query plan is:\n" + f"{indent(sql_node.structure_text())}" + ) ) return ConvertToSqlPlanResult( diff --git a/metricflow/plan_conversion/node_processor.py b/metricflow/plan_conversion/node_processor.py index 8a139113db..59e712ccf1 100644 --- a/metricflow/plan_conversion/node_processor.py +++ b/metricflow/plan_conversion/node_processor.py @@ -8,6 +8,7 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.references import EntityReference, SemanticModelReference, TimeDimensionReference from metricflow_semantics.filters.time_constraint import TimeRangeConstraint +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.semantics.linkable_element import LinkableElementType from metricflow_semantics.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS @@ -475,7 +476,7 @@ def _get_candidates_nodes_for_multi_hop( return () multi_hop_join_candidates: List[MultiHopJoinCandidate] = [] - logger.info(f"Creating nodes for {desired_linkable_spec}") + logger.debug(LazyFormat(lambda: f"Creating nodes for {desired_linkable_spec}")) for first_node_that_could_be_joined in nodes: data_set_of_first_node_that_could_be_joined = self._node_data_set_resolver.get_output_data_set( @@ -587,8 +588,10 @@ def _get_candidates_nodes_for_multi_hop( multi_hop_join_candidate.node_with_multi_hop_elements ) logger.debug( - f"Node {multi_hop_join_candidate.node_with_multi_hop_elements} has spec set:\n" - f"{mf_pformat(output_data_set.instance_set.spec_set)}" + LazyFormat( + lambda: f"Node {multi_hop_join_candidate.node_with_multi_hop_elements} has spec set:\n" + f"{mf_pformat(output_data_set.instance_set.spec_set)}" + ) ) return multi_hop_join_candidates @@ -646,23 +649,27 @@ def remove_unnecessary_nodes( ): relevant_element_names.remove(metric_time_dimension_reference.element_name) - logger.info(f"Relevant names are: {relevant_element_names}") + logger.debug(LazyFormat(lambda: f"Relevant names are: {relevant_element_names}")) relevant_nodes = [] for node in nodes: - logger.debug(f"Examining {node} for pruning") + logger.debug(LazyFormat(lambda: f"Examining {node} for pruning")) data_set = self._node_data_set_resolver.get_output_data_set(node) element_names_in_data_set = ToElementNameSet().transform(data_set.instance_set.spec_set) element_names_intersection = element_names_in_data_set.intersection(relevant_element_names) if len(element_names_intersection) > 0: - logger.debug(f"Including {node} since `element_names_intersection` is {element_names_intersection}") + logger.debug( + LazyFormat( + lambda: f"Including {node} since `element_names_intersection` is {element_names_intersection}" + ) + ) relevant_nodes.append(node) continue # Used for group-by-item-values queries. if node in time_spine_nodes: - logger.debug(f"Including {node} since it matches `time_spine_node`") + logger.debug(LazyFormat(lambda: f"Including {node} since it matches `time_spine_node`")) relevant_nodes.append(node) continue diff --git a/metricflow/sql/optimizer/rewriting_sub_query_reducer.py b/metricflow/sql/optimizer/rewriting_sub_query_reducer.py index b21f914c3f..66b66832d3 100644 --- a/metricflow/sql/optimizer/rewriting_sub_query_reducer.py +++ b/metricflow/sql/optimizer/rewriting_sub_query_reducer.py @@ -5,6 +5,7 @@ from typing import List, Optional, Sequence, Tuple from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow.sql.optimizer.sql_query_plan_optimizer import SqlQueryPlanOptimizer from metricflow.sql.sql_exprs import ( @@ -740,7 +741,11 @@ def visit_select_statement_node(self, node: SqlSelectStatementNode) -> SqlQueryP ) ) else: - logger.info(f"Did not find matching select for {group_by} in:\n{indent(node.structure_text())}") + logger.debug( + LazyFormat( + lambda: f"Did not find matching select for {group_by} in:\n{indent(node.structure_text())}" + ) + ) new_group_bys.append(group_by) return SqlSelectStatementNode.create( diff --git a/scripts/ci_tests/metricflow_package_test.py b/scripts/ci_tests/metricflow_package_test.py index 9b0d4fffd0..81e055c24d 100644 --- a/scripts/ci_tests/metricflow_package_test.py +++ b/scripts/ci_tests/metricflow_package_test.py @@ -11,6 +11,7 @@ from dbt_semantic_interfaces.protocols import SemanticManifest, SemanticModel from dbt_semantic_interfaces.references import EntityReference from dbt_semantic_interfaces.validations.semantic_manifest_validator import SemanticManifestValidator +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.specs.column_assoc import ColumnAssociationResolver from metricflow_semantics.specs.dimension_spec import DimensionSpec @@ -167,12 +168,12 @@ def log_dataflow_plan() -> None: # noqa: D103 ) ) - logger.info(f"Dataflow plan is:\n{dataflow_plan.structure_text()}") + logger.debug(LazyFormat(lambda: f"Dataflow plan is:\n{dataflow_plan.structure_text()}")) def check_engine_import(metricflow_engine: MetricFlowEngine) -> None: """Doesn't need to run, but having this here means that the import is tested.""" - logger.info(f"Engine is {metricflow_engine}") + logger.debug(LazyFormat(lambda: f"Engine is {metricflow_engine}")) logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO) diff --git a/scripts/ci_tests/metricflow_semantics_package_test.py b/scripts/ci_tests/metricflow_semantics_package_test.py index 8cb603952f..e028f61e5c 100644 --- a/scripts/ci_tests/metricflow_semantics_package_test.py +++ b/scripts/ci_tests/metricflow_semantics_package_test.py @@ -8,6 +8,7 @@ from dbt_semantic_interfaces.parsing.objects import YamlConfigFile from dbt_semantic_interfaces.protocols import SemanticManifest from dbt_semantic_interfaces.validations.semantic_manifest_validator import SemanticManifestValidator +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.query.query_parser import MetricFlowQueryParser @@ -82,7 +83,7 @@ def log_query_spec() -> None: # noqa: D103 metric_names=["bookings"], group_by_names=["booking__is_instant"] ).query_spec - logger.info(f"{query_spec.__class__.__name__}:\n{mf_pformat(query_spec)}") + logger.debug(LazyFormat(lambda: f"{query_spec.__class__.__name__}:\n{mf_pformat(query_spec)}")) logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO) diff --git a/tests_metricflow/examples/test_node_sql.py b/tests_metricflow/examples/test_node_sql.py index fa5793f202..c7ac034512 100644 --- a/tests_metricflow/examples/test_node_sql.py +++ b/tests_metricflow/examples/test_node_sql.py @@ -5,6 +5,7 @@ import pytest from dbt_semantic_interfaces.references import SemanticModelReference, TimeDimensionReference from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.mf_logging.pretty_print import mf_pformat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.specs.dunder_column_association_resolver import DunderColumnAssociationResolver @@ -61,8 +62,8 @@ def test_view_sql_generated_at_a_node( sql_plan_at_read_node = conversion_result.sql_plan sql_at_read_node = sql_renderer.render_sql_query_plan(sql_plan_at_read_node).sql spec_set_at_read_node = node_output_resolver.get_output_data_set(read_source_node).instance_set.spec_set - logger.info(f"SQL generated at {read_source_node} is:\n\n{sql_at_read_node}") - logger.info(f"Spec set at {read_source_node} is:\n\n{mf_pformat(spec_set_at_read_node)}") + logger.debug(LazyFormat(lambda: f"SQL generated at {read_source_node} is:\n\n{sql_at_read_node}")) + logger.debug(LazyFormat(lambda: f"Spec set at {read_source_node} is:\n\n{mf_pformat(spec_set_at_read_node)}")) metric_time_node = MetricTimeDimensionTransformNode.create( parent_node=read_source_node, @@ -92,5 +93,7 @@ def test_view_sql_generated_at_a_node( spec_set_at_filter_elements_node = node_output_resolver.get_output_data_set( filter_elements_node ).instance_set.spec_set - logger.info(f"SQL generated at {filter_elements_node} is:\n\n{sql_at_filter_elements_node}") - logger.info(f"Spec set at {filter_elements_node} is:\n\n{mf_pformat(spec_set_at_filter_elements_node)}") + logger.debug(LazyFormat(lambda: f"SQL generated at {filter_elements_node} is:\n\n{sql_at_filter_elements_node}")) + logger.debug( + LazyFormat(lambda: f"Spec set at {filter_elements_node} is:\n\n{mf_pformat(spec_set_at_filter_elements_node)}") + ) diff --git a/tests_metricflow/fixtures/manifest_fixtures.py b/tests_metricflow/fixtures/manifest_fixtures.py index f34be3fc83..2efb7bca75 100644 --- a/tests_metricflow/fixtures/manifest_fixtures.py +++ b/tests_metricflow/fixtures/manifest_fixtures.py @@ -11,6 +11,7 @@ from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest from dbt_semantic_interfaces.protocols import SemanticModel from dbt_semantic_interfaces.test_utils import as_datetime +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.query.query_parser import MetricFlowQueryParser from metricflow_semantics.specs.column_assoc import ColumnAssociationResolver @@ -223,7 +224,9 @@ def _data_set_to_read_nodes( for semantic_model_name, data_set in data_sets.items(): return_dict[semantic_model_name] = ReadSqlSourceNode.create(data_set) logger.debug( - f"For semantic model {semantic_model_name}, creating node_id {return_dict[semantic_model_name].node_id}" + LazyFormat( + lambda: f"For semantic model {semantic_model_name}, creating node_id {return_dict[semantic_model_name].node_id}" + ) ) return return_dict diff --git a/tests_metricflow/fixtures/setup_fixtures.py b/tests_metricflow/fixtures/setup_fixtures.py index 57962f0961..e3835840f2 100644 --- a/tests_metricflow/fixtures/setup_fixtures.py +++ b/tests_metricflow/fixtures/setup_fixtures.py @@ -8,6 +8,7 @@ import _pytest.config import pytest from _pytest.fixtures import FixtureRequest +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.random_id import random_id from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration from metricflow_semantics.test_helpers.snapshot_helpers import ( @@ -126,11 +127,13 @@ def mf_test_configuration( # noqa: D103 f"schema: {mf_source_schema} default source schema: {default_source_schema}" ) - logger.info( - f"Since the flag {USE_PERSISTENT_SOURCE_SCHEMA_CLI_FLAG} was specified, this session will use the " - f"persistent source schema {mf_source_schema}. If the required source tables do not exist in this " - f"schema, they will be created. However, the source schema (and the associated tables) will not " - f"be dropped at the end of the testing session." + logger.debug( + LazyFormat( + lambda: f"Since the flag {USE_PERSISTENT_SOURCE_SCHEMA_CLI_FLAG} was specified, this session will use the " + f"persistent source schema {mf_source_schema}. If the required source tables do not exist in this " + f"schema, they will be created. However, the source schema (and the associated tables) will not " + f"be dropped at the end of the testing session." + ) ) else: mf_source_schema = default_source_schema diff --git a/tests_metricflow/fixtures/sql_client_fixtures.py b/tests_metricflow/fixtures/sql_client_fixtures.py index d19e760c44..242b9d68a1 100644 --- a/tests_metricflow/fixtures/sql_client_fixtures.py +++ b/tests_metricflow/fixtures/sql_client_fixtures.py @@ -10,6 +10,7 @@ from _pytest.fixtures import FixtureRequest from dbt.adapters.factory import get_adapter_by_type from dbt.cli.main import dbtRunner +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration from metricflow.protocols.sql_client import SqlClient @@ -191,21 +192,21 @@ def ddl_sql_client( password=mf_test_configuration.sql_engine_password, schema=mf_test_configuration.mf_source_schema, ) - logger.info(f"Creating schema '{mf_test_configuration.mf_system_schema}'") + logger.debug(LazyFormat(lambda: f"Creating schema '{mf_test_configuration.mf_system_schema}'")) sql_client.create_schema(mf_test_configuration.mf_system_schema) if mf_test_configuration.mf_system_schema != mf_test_configuration.mf_source_schema: - logger.info(f"Creating schema '{mf_test_configuration.mf_source_schema}'") + logger.debug(LazyFormat(lambda: f"Creating schema '{mf_test_configuration.mf_source_schema}'")) sql_client.create_schema(mf_test_configuration.mf_source_schema) yield sql_client - logger.info(f"Dropping schema '{mf_test_configuration.mf_system_schema}'") + logger.debug(LazyFormat(lambda: f"Dropping schema '{mf_test_configuration.mf_system_schema}'")) sql_client.drop_schema(mf_test_configuration.mf_system_schema, cascade=True) if ( mf_test_configuration.mf_system_schema != mf_test_configuration.mf_source_schema and not mf_test_configuration.use_persistent_source_schema ): - logger.info(f"Dropping schema '{mf_test_configuration.mf_source_schema}'") + logger.debug(LazyFormat(lambda: f"Dropping schema '{mf_test_configuration.mf_source_schema}'")) sql_client.drop_schema(mf_test_configuration.mf_source_schema, cascade=True) sql_client.close() diff --git a/tests_metricflow/fixtures/sql_clients/adapter_backed_ddl_client.py b/tests_metricflow/fixtures/sql_clients/adapter_backed_ddl_client.py index dc4df06c61..122dddc013 100644 --- a/tests_metricflow/fixtures/sql_clients/adapter_backed_ddl_client.py +++ b/tests_metricflow/fixtures/sql_clients/adapter_backed_ddl_client.py @@ -5,6 +5,7 @@ import time from typing import Optional +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.sql.sql_table import SqlTable from dbt_metricflow.cli.dbt_connectors.adapter_backed_client import AdapterBackedSqlClient @@ -33,7 +34,9 @@ def create_table_from_data_table( df: The Pandas DataTable object containing the column schema and data to load chunk_size: The number of rows to insert per transaction """ - logger.info(f"Creating table '{sql_table.sql}' from a DataTable with {df.row_count} row(s)") + logger.debug( + LazyFormat(lambda: f"Creating table '{sql_table.sql}' from a DataTable with {df.row_count} row(s)") + ) start_time = time.time() with self._adapter.connection_named("MetricFlow_create_from_dataframe"): @@ -85,7 +88,11 @@ def create_table_from_data_table( # Commit all insert transaction at once self._adapter.commit_if_has_connection() - logger.info(f"Created SQL table '{sql_table.sql}' from an in-memory table in {time.time() - start_time:.2f}s") + logger.debug( + LazyFormat( + lambda: f"Created SQL table '{sql_table.sql}' from an in-memory table in {time.time() - start_time:.2f}s" + ) + ) def _get_sql_type(self, column_description: ColumnDescription) -> str: """Helper method to get the engine-specific type value. diff --git a/tests_metricflow/fixtures/table_fixtures.py b/tests_metricflow/fixtures/table_fixtures.py index 2844178f21..0579a5cabf 100644 --- a/tests_metricflow/fixtures/table_fixtures.py +++ b/tests_metricflow/fixtures/table_fixtures.py @@ -6,6 +6,7 @@ import pytest from dbt_semantic_interfaces.type_enums import TimeGranularity +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration from metricflow_semantics.time.time_spine_source import TimeSpineSource @@ -78,9 +79,11 @@ def create_source_tables( If a table with a given name already exists in the source schema, it's assumed to have the expected schema / data. """ if mf_test_configuration.use_persistent_source_schema: - logger.info( - "This session was configured to use a persistent source schema, so this fixture won't create new tables. " - "See populate_source_schema() for more details." + logger.debug( + LazyFormat( + lambda: "This session was configured to use a persistent source schema, so this fixture won't create new tables. " + "See populate_source_schema() for more details." + ) ) return diff --git a/tests_metricflow/generate_snapshots.py b/tests_metricflow/generate_snapshots.py index 3da97d76f1..a67c685725 100644 --- a/tests_metricflow/generate_snapshots.py +++ b/tests_metricflow/generate_snapshots.py @@ -46,6 +46,7 @@ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.implementations.base import FrozenBaseModel +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow.protocols.sql_client import SqlEngine from tests_metricflow.fixtures.setup_fixtures import SQL_ENGINE_SNAPSHOT_MARKER_NAME @@ -111,7 +112,7 @@ def as_configurations(self) -> Sequence[MetricFlowTestConfiguration]: # noqa: D def run_command(command: str) -> None: # noqa: D103 - logger.info(f"Running command {command}") + logger.debug(LazyFormat(lambda: f"Running command {command}")) return_code = os.system(command) if return_code != 0: raise RuntimeError(f"Error running command: {command}") @@ -180,11 +181,15 @@ def run_cli(function_to_run: Callable) -> None: # noqa: D103 credential_sets = MetricFlowTestCredentialSetForAllEngines.parse_raw(credential_sets_json_str) - logger.info(f"Running tests in '{TEST_DIRECTORY}' with the marker '{SQL_ENGINE_SNAPSHOT_MARKER_NAME}'") + logger.debug( + LazyFormat(lambda: f"Running tests in '{TEST_DIRECTORY}' with the marker '{SQL_ENGINE_SNAPSHOT_MARKER_NAME}'") + ) for test_configuration in credential_sets.as_configurations: - logger.info( - f"Running tests for {test_configuration.engine} with URL: {test_configuration.credential_set.engine_url}" + logger.debug( + LazyFormat( + lambda: f"Running tests for {test_configuration.engine} with URL: {test_configuration.credential_set.engine_url}" + ) ) function_to_run(test_configuration) diff --git a/tests_metricflow/integration/test_configured_cases.py b/tests_metricflow/integration/test_configured_cases.py index b192c7acae..9c700d7d6f 100644 --- a/tests_metricflow/integration/test_configured_cases.py +++ b/tests_metricflow/integration/test_configured_cases.py @@ -12,6 +12,7 @@ from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasureAggregationParameters from dbt_semantic_interfaces.type_enums.date_part import DatePart from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.protocols.query_parameter import DimensionOrEntityQueryParameter from metricflow_semantics.specs.query_param_implementations import DimensionOrEntityParameter, TimeDimensionParameter from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration @@ -227,7 +228,7 @@ def test_case( ) -> None: """Runs all integration tests configured in the test case YAML directory.""" case = CONFIGURED_INTEGRATION_TESTS_REPOSITORY.get_test_case(name) - logger.info(f"Running integration test case: '{case.name}' from file '{case.file_path}'") + logger.debug(LazyFormat(lambda: f"Running integration test case: '{case.name}' from file '{case.file_path}'")) time_spine_source = time_spine_sources[TimeGranularity.DAY] missing_required_features = filter_not_supported_features(sql_client, case.required_features) diff --git a/tests_metricflow/mf_logging/test_dag_to_text.py b/tests_metricflow/mf_logging/test_dag_to_text.py index 66b5100e26..06b705357e 100644 --- a/tests_metricflow/mf_logging/test_dag_to_text.py +++ b/tests_metricflow/mf_logging/test_dag_to_text.py @@ -9,6 +9,7 @@ from metricflow_semantics.dag.dag_to_text import MetricFlowDagTextFormatter from metricflow_semantics.dag.mf_dag import DagId from metricflow_semantics.mf_logging.formatting import indent +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.sql.sql_table import SqlTable from metricflow.sql.sql_exprs import ( @@ -48,14 +49,14 @@ def test_multithread_dag_to_text() -> None: def _run_mf_pformat() -> None: current_thread = threading.current_thread() - logger.debug(f"In {current_thread} - Starting .dag_to_text()") + logger.debug(LazyFormat(lambda: f"In {current_thread} - Starting .dag_to_text()")) # Sleep a little bit so that all threads are likely to be running simultaneously. time.sleep(0.5) try: output = dag_to_text_formatter.dag_to_text(dag) - logger.debug(f"in {current_thread} - Output is:\n{indent(output)}") + logger.debug(LazyFormat(lambda: f"in {current_thread} - Output is:\n{indent(output)}")) thread_outputs.append(output) - logger.debug(f"In {current_thread} - Successfully finished .dag_to_text()") + logger.debug(LazyFormat(lambda: f"In {current_thread} - Successfully finished .dag_to_text()")) except Exception: logger.exception(f"In {current_thread} - Exiting due to an exception") diff --git a/tests_metricflow/source_schema_tools.py b/tests_metricflow/source_schema_tools.py index 3a3a96d410..0a8e655225 100644 --- a/tests_metricflow/source_schema_tools.py +++ b/tests_metricflow/source_schema_tools.py @@ -2,6 +2,7 @@ import logging +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration from metricflow.protocols.sql_client import SqlEngine @@ -22,7 +23,7 @@ def create_tables_listed_in_table_snapshot_repository( """Creates all tables in the table snapshot repository in the given schema.""" snapshot_loader = SqlTableSnapshotLoader(ddl_sql_client=ddl_sql_client, schema_name=schema_name) for table_snapshot in table_snapshot_repository.table_snapshots: - logger.info(f"Loading: {table_snapshot.table_name}") + logger.debug(LazyFormat(lambda: f"Loading: {table_snapshot.table_name}")) snapshot_loader.load(table_snapshot) @@ -57,9 +58,9 @@ def populate_source_schema( schema_name = mf_test_configuration.mf_source_schema - logger.info(f"Dropping schema {schema_name}") + logger.debug(LazyFormat(lambda: f"Dropping schema {schema_name}")) ddl_sql_client.drop_schema(schema_name=schema_name, cascade=True) - logger.info(f"Creating schema {schema_name}") + logger.debug(LazyFormat(lambda: f"Creating schema {schema_name}")) ddl_sql_client.create_schema(schema_name=schema_name) create_tables_listed_in_table_snapshot_repository( ddl_sql_client=ddl_sql_client,