From 57ae9180c2dba3b088c944f490212ece65b518c2 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 13 Oct 2021 16:05:54 -0400 Subject: [PATCH 01/31] init --- core/dbt/events/__init__.py | 0 core/dbt/events/events.py | 46 +++++++++++++++++++++++++++++++++++++ core/dbt/task/parse.py | 1 + 3 files changed, 47 insertions(+) create mode 100644 core/dbt/events/__init__.py create mode 100644 core/dbt/events/events.py diff --git a/core/dbt/events/__init__.py b/core/dbt/events/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/dbt/events/events.py b/core/dbt/events/events.py new file mode 100644 index 00000000000..0d4921e8ad6 --- /dev/null +++ b/core/dbt/events/events.py @@ -0,0 +1,46 @@ + +from typing import NamedTuple, NoReturn, Union + + +# common trick for getting mypy to do exhaustiveness checks +# will come up with something like `"assert_never" has incompatible type` +# if something is missing. +def assert_never(x: NoReturn) -> NoReturn: + raise AssertionError("Unhandled type: {}".format(type(x).__name__)) + +# The following classes represent the data necessary to describe a +# particular event to both human readable logs, and machine reliable +# event streams. The transformation to these forms will live in outside +# functions. +# +# Until we drop support for Python 3.6 we must use NamedTuples over +# frozen dataclasses. + +# TODO dummy class +class OK(NamedTuple): + result: int + + +# TODO dummy class +class Failure(NamedTuple): + msg: str + + +# using a union instead of inheritance means that this set cannot +# be extended outside this file, and thus mypy can do exhaustiveness +# checks for us. +Event = Union[OK, Failure] + + +# function that translates any instance of the above event types +# into its human-readable message. +# +# This could instead be implemented as a method on an ABC for all +# above classes, but this at least puts all that logic in one place. +def humanMsg(r: Event) -> str: + if isinstance(r, OK): + return str(r.result) + elif isinstance(r, Failure): + return "Failure: " + r.msg + else: + assert_never(r) diff --git a/core/dbt/task/parse.py b/core/dbt/task/parse.py index 985db062093..6f68df53b68 100644 --- a/core/dbt/task/parse.py +++ b/core/dbt/task/parse.py @@ -81,6 +81,7 @@ def compile_manifest(self): self.graph = compiler.compile(self.manifest) def run(self): + events.register(Progress(ParseStart)) print_timestamped_line('Start parsing.') self.get_full_manifest() if self.args.compile: From f9ef9da110e57921fdc0569226b90439fe2fa0e9 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Fri, 29 Oct 2021 09:16:06 -0400 Subject: [PATCH 02/31] Initial structured logging work with `fire_event` (#4137) add event type modeling and fire_event calls --- core/dbt/events/README.md | 9 +++ core/dbt/events/events.py | 46 ------------- core/dbt/events/functions.py | 28 ++++++++ core/dbt/events/history.py | 7 ++ core/dbt/events/types.py | 124 +++++++++++++++++++++++++++++++++++ core/dbt/logger.py | 6 +- core/dbt/task/parse.py | 31 +++++---- 7 files changed, 191 insertions(+), 60 deletions(-) create mode 100644 core/dbt/events/README.md delete mode 100644 core/dbt/events/events.py create mode 100644 core/dbt/events/functions.py create mode 100644 core/dbt/events/history.py create mode 100644 core/dbt/events/types.py diff --git a/core/dbt/events/README.md b/core/dbt/events/README.md new file mode 100644 index 00000000000..f3f765f34d6 --- /dev/null +++ b/core/dbt/events/README.md @@ -0,0 +1,9 @@ +# Events Module + +The Events module is the implmentation for structured logging. These events represent both a programatic interface to dbt processes as well as human-readable messaging in one centralized place. The centralization allows for leveraging mypy to enforce interface invariants across all dbt events, and the distinct type layer allows for decoupling events and libraries such as loggers. + +# Using the Events Module +The event module provides types that represent what is happening in dbt in `events.types`. These types are intended to represent an exhaustive list of all things happening within dbt that will need to be logged, streamed, or printed. To fire an event, `events.functions::fire_event` is the entry point to the module from everywhere in dbt. + +# Adding a New Event +In `events.types` add a new class that represents the new event. This may be a simple class with no values, or it may be a dataclass with some values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `CliEventABC`) as well as the loglevel this event belongs to. diff --git a/core/dbt/events/events.py b/core/dbt/events/events.py deleted file mode 100644 index 0d4921e8ad6..00000000000 --- a/core/dbt/events/events.py +++ /dev/null @@ -1,46 +0,0 @@ - -from typing import NamedTuple, NoReturn, Union - - -# common trick for getting mypy to do exhaustiveness checks -# will come up with something like `"assert_never" has incompatible type` -# if something is missing. -def assert_never(x: NoReturn) -> NoReturn: - raise AssertionError("Unhandled type: {}".format(type(x).__name__)) - -# The following classes represent the data necessary to describe a -# particular event to both human readable logs, and machine reliable -# event streams. The transformation to these forms will live in outside -# functions. -# -# Until we drop support for Python 3.6 we must use NamedTuples over -# frozen dataclasses. - -# TODO dummy class -class OK(NamedTuple): - result: int - - -# TODO dummy class -class Failure(NamedTuple): - msg: str - - -# using a union instead of inheritance means that this set cannot -# be extended outside this file, and thus mypy can do exhaustiveness -# checks for us. -Event = Union[OK, Failure] - - -# function that translates any instance of the above event types -# into its human-readable message. -# -# This could instead be implemented as a method on an ABC for all -# above classes, but this at least puts all that logic in one place. -def humanMsg(r: Event) -> str: - if isinstance(r, OK): - return str(r.result) - elif isinstance(r, Failure): - return "Failure: " + r.msg - else: - assert_never(r) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py new file mode 100644 index 00000000000..fea072ff8b2 --- /dev/null +++ b/core/dbt/events/functions.py @@ -0,0 +1,28 @@ + +import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger +from dbt.events.history import EVENT_HISTORY +from dbt.events.types import CliEventABC, Event + + +# top-level method for accessing the new eventing system +# this is where all the side effects happen branched by event type +# (i.e. - mutating the event history, printing to stdout, logging +# to files, etc.) +def fire_event(e: Event) -> None: + EVENT_HISTORY.append(e) + if isinstance(e, CliEventABC): + if e.level_tag() == 'test': + # TODO after implmenting #3977 send to new test level + logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'debug': + logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'info': + logger.GLOBAL_LOGGER.info(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'warn': + logger.GLOBAL_LOGGER.warning()(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'error': + logger.GLOBAL_LOGGER.error(logger.timestamped_line(e.cli_msg())) + else: + raise AssertionError( + f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}" + ) diff --git a/core/dbt/events/history.py b/core/dbt/events/history.py new file mode 100644 index 00000000000..1bbc1995f7d --- /dev/null +++ b/core/dbt/events/history.py @@ -0,0 +1,7 @@ +from dbt.events.types import Event +from typing import List + + +# the global history of events for this session +# TODO this is naive and the memory footprint is likely far too large. +EVENT_HISTORY: List[Event] = [] diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py new file mode 100644 index 00000000000..15e6003abde --- /dev/null +++ b/core/dbt/events/types.py @@ -0,0 +1,124 @@ +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass + + +# types to represent log levels + +# in preparation for #3977 +class TestLevel(): + def level_tag(self) -> str: + return "test" + + +class DebugLevel(): + def level_tag(self) -> str: + return "debug" + + +class InfoLevel(): + def level_tag(self) -> str: + return "info" + + +class WarnLevel(): + def level_tag(self) -> str: + return "warn" + + +class ErrorLevel(): + def level_tag(self) -> str: + return "error" + + +# The following classes represent the data necessary to describe a +# particular event to both human readable logs, and machine reliable +# event streams. classes extend superclasses that indicate what +# destinations they are intended for, which mypy uses to enforce +# that the necessary methods are defined. + + +# top-level superclass for all events +class Event(metaclass=ABCMeta): + # do not define this yourself. inherit it from one of the above level types. + @abstractmethod + def level_tag(self) -> str: + raise Exception("level_tag not implemented for event") + + +class CliEventABC(Event, metaclass=ABCMeta): + # Solely the human readable message. Timestamps and formatting will be added by the logger. + @abstractmethod + def cli_msg(self) -> str: + raise Exception("cli_msg not implemented for cli event") + + +class ParsingStart(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Start parsing." + + +class ParsingCompiling(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Compiling." + + +class ParsingWritingManifest(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Writing manifest." + + +class ParsingDone(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Done." + + +class ManifestDependenciesLoaded(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Dependencies loaded" + + +class ManifestLoaderCreated(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "ManifestLoader created" + + +class ManifestLoaded(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Manifest loaded" + + +class ManifestChecked(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Manifest checked" + + +class ManifestFlatGraphBuilt(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Flat graph built" + + +@dataclass +class ReportPerformancePath(InfoLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Performance info: {self.path}" + + +# since mypy doesn't run on every file we need to suggest to mypy that every +# class gets instantiated. But we don't actually want to run this code. +# making the conditional `if False` causes mypy to skip it as dead code so +# we need to skirt around that by computing something it doesn't check statically. +# +# TODO remove these lines once we run mypy everywhere. +if 1 == 0: + ParsingStart() + ParsingCompiling() + ParsingWritingManifest() + ParsingDone() + ManifestDependenciesLoaded() + ManifestLoaderCreated() + ManifestLoaded() + ManifestChecked() + ManifestFlatGraphBuilt() + ReportPerformancePath(path='') diff --git a/core/dbt/logger.py b/core/dbt/logger.py index 16eb7cd56a8..468fcb1d021 100644 --- a/core/dbt/logger.py +++ b/core/dbt/logger.py @@ -655,8 +655,12 @@ def get_timestamp(): return time.strftime("%H:%M:%S") +def timestamped_line(msg: str) -> str: + return "{} | {}".format(get_timestamp(), msg) + + def print_timestamped_line(msg: str, use_color: Optional[str] = None): if use_color is not None: msg = dbt.ui.color(msg, use_color) - GLOBAL_LOGGER.info("{} | {}".format(get_timestamp(), msg)) + GLOBAL_LOGGER.info(timestamped_line(msg)) diff --git a/core/dbt/task/parse.py b/core/dbt/task/parse.py index 6f68df53b68..e6a6d259951 100644 --- a/core/dbt/task/parse.py +++ b/core/dbt/task/parse.py @@ -11,8 +11,14 @@ from dbt.parser.manifest import ( Manifest, ManifestLoader, _check_manifest ) -from dbt.logger import DbtProcessState, print_timestamped_line +from dbt.logger import DbtProcessState from dbt.clients.system import write_file +from dbt.events.types import ( + ManifestDependenciesLoaded, ManifestLoaderCreated, ManifestLoaded, ManifestChecked, + ManifestFlatGraphBuilt, ParsingStart, ParsingCompiling, ParsingWritingManifest, ParsingDone, + ReportPerformancePath +) +from dbt.events.functions import fire_event from dbt.graph import Graph import time from typing import Optional @@ -40,7 +46,7 @@ def write_perf_info(self): path = os.path.join(self.config.target_path, PERF_INFO_FILE_NAME) write_file(path, json.dumps(self.loader._perf_info, cls=dbt.utils.JSONEncoder, indent=4)) - print_timestamped_line(f"Performance info: {path}") + fire_event(ReportPerformancePath(path=path)) # This method takes code that normally exists in other files # and pulls it in here, to simplify logging and make the @@ -58,22 +64,22 @@ def get_full_manifest(self): with PARSING_STATE: start_load_all = time.perf_counter() projects = root_config.load_dependencies() - print_timestamped_line("Dependencies loaded") + fire_event(ManifestDependenciesLoaded()) loader = ManifestLoader(root_config, projects, macro_hook) - print_timestamped_line("ManifestLoader created") + fire_event(ManifestLoaderCreated()) manifest = loader.load() - print_timestamped_line("Manifest loaded") + fire_event(ManifestLoaded()) _check_manifest(manifest, root_config) - print_timestamped_line("Manifest checked") + fire_event(ManifestChecked()) manifest.build_flat_graph() - print_timestamped_line("Flat graph built") + fire_event(ManifestFlatGraphBuilt()) loader._perf_info.load_all_elapsed = ( time.perf_counter() - start_load_all ) self.loader = loader self.manifest = manifest - print_timestamped_line("Manifest loaded") + fire_event(ManifestLoaded()) def compile_manifest(self): adapter = get_adapter(self.config) @@ -81,15 +87,14 @@ def compile_manifest(self): self.graph = compiler.compile(self.manifest) def run(self): - events.register(Progress(ParseStart)) - print_timestamped_line('Start parsing.') + fire_event(ParsingStart()) self.get_full_manifest() if self.args.compile: - print_timestamped_line('Compiling.') + fire_event(ParsingCompiling()) self.compile_manifest() if self.args.write_manifest: - print_timestamped_line('Writing manifest.') + fire_event(ParsingWritingManifest()) self.write_manifest() self.write_perf_info() - print_timestamped_line('Done.') + fire_event(ParsingDone()) From 69aa6bf9645bc479feeb3ab83bbc032cab40fc62 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 8 Nov 2021 14:17:25 -0600 Subject: [PATCH 03/31] context call sites (#4164) * updated context dir to new structured logging --- core/dbt/context/base.py | 7 ++++--- core/dbt/context/providers.py | 1 - core/dbt/events/types.py | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/core/dbt/context/base.py b/core/dbt/context/base.py index 4bc47000716..f3b82324fbb 100644 --- a/core/dbt/context/base.py +++ b/core/dbt/context/base.py @@ -12,7 +12,8 @@ ) from dbt.contracts.graph.compiled import CompiledResource from dbt.exceptions import raise_compiler_error, MacroReturn, raise_parsing_error -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import MacroEventInfo, MacroEventDebug from dbt.version import __version__ as dbt_version # These modules are added to the context. Consider alternative @@ -481,9 +482,9 @@ def log(msg: str, info: bool = False) -> str: {% endmacro %}" """ if info: - logger.info(msg) + fire_event(MacroEventInfo(msg)) else: - logger.debug(msg) + fire_event(MacroEventDebug(msg)) return '' @contextproperty diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 28eec309251..ea88d69b528 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -52,7 +52,6 @@ raise_parsing_error, ) from dbt.config import IsFQNResource -from dbt.logger import GLOBAL_LOGGER as logger, SECRET_ENV_PREFIX # noqa from dbt.node_types import NodeType from dbt.utils import ( diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 15e6003abde..892061417ad 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -105,6 +105,22 @@ def cli_msg(self) -> str: return f"Performance info: {self.path}" +@dataclass +class MacroEventInfo(InfoLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return self.msg + + +@dataclass +class MacroEventDebug(DebugLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return self.msg + + # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -122,3 +138,5 @@ def cli_msg(self) -> str: ManifestChecked() ManifestFlatGraphBuilt() ReportPerformancePath(path='') + MacroEventInfo(msg='') + MacroEventDebug(msg='') From 44a9da621eb05718ffecb0f8dc5081f0492a2c06 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Fri, 29 Oct 2021 16:01:04 -0400 Subject: [PATCH 04/31] Handle exec info (#4168) handle exec info --- core/dbt/events/functions.py | 50 +++++++++++++++++++++++++++++++----- core/dbt/events/types.py | 8 ++++++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index fea072ff8b2..5c97ed7fb32 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -1,7 +1,7 @@ import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger from dbt.events.history import EVENT_HISTORY -from dbt.events.types import CliEventABC, Event +from dbt.events.types import CliEventABC, Event, ShowException # top-level method for accessing the new eventing system @@ -11,17 +11,53 @@ def fire_event(e: Event) -> None: EVENT_HISTORY.append(e) if isinstance(e, CliEventABC): - if e.level_tag() == 'test': + if e.level_tag() == 'test' and not isinstance(e, ShowException): # TODO after implmenting #3977 send to new test level logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) - elif e.level_tag() == 'debug': + elif e.level_tag() == 'test' and isinstance(e, ShowException): + # TODO after implmenting #3977 send to new test level + logger.GLOBAL_LOGGER.debug( + logger.timestamped_line(e.cli_msg()), + exc_info=e.exc_info, + stack_info=e.stack_info, + extra=e.extra + ) + elif e.level_tag() == 'debug' and not isinstance(e, ShowException): logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) - elif e.level_tag() == 'info': + elif e.level_tag() == 'debug' and isinstance(e, ShowException): + logger.GLOBAL_LOGGER.debug( + logger.timestamped_line(e.cli_msg()), + exc_info=e.exc_info, + stack_info=e.stack_info, + extra=e.extra + ) + elif e.level_tag() == 'info' and not isinstance(e, ShowException): logger.GLOBAL_LOGGER.info(logger.timestamped_line(e.cli_msg())) - elif e.level_tag() == 'warn': - logger.GLOBAL_LOGGER.warning()(logger.timestamped_line(e.cli_msg())) - elif e.level_tag() == 'error': + elif e.level_tag() == 'info' and isinstance(e, ShowException): + logger.GLOBAL_LOGGER.info( + logger.timestamped_line(e.cli_msg()), + exc_info=e.exc_info, + stack_info=e.stack_info, + extra=e.extra + ) + elif e.level_tag() == 'warn' and not isinstance(e, ShowException): + logger.GLOBAL_LOGGER.warning(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'warn' and isinstance(e, ShowException): + logger.GLOBAL_LOGGER.warning( + logger.timestamped_line(e.cli_msg()), + exc_info=e.exc_info, + stack_info=e.stack_info, + extra=e.extra + ) + elif e.level_tag() == 'error' and not isinstance(e, ShowException): logger.GLOBAL_LOGGER.error(logger.timestamped_line(e.cli_msg())) + elif e.level_tag() == 'error' and isinstance(e, ShowException): + logger.GLOBAL_LOGGER.error( + logger.timestamped_line(e.cli_msg()), + exc_info=e.exc_info, + stack_info=e.stack_info, + extra=e.extra + ) else: raise AssertionError( f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}" diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 892061417ad..c8a1c8dedc1 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,5 +1,6 @@ from abc import ABCMeta, abstractmethod from dataclasses import dataclass +from typing import Any # types to represent log levels @@ -30,6 +31,13 @@ def level_tag(self) -> str: return "error" +@dataclass +class ShowException(): + exc_info: Any = None + stack_info: Any = None + extra: Any = None + + # The following classes represent the data necessary to describe a # particular event to both human readable logs, and machine reliable # event streams. classes extend superclasses that indicate what From 5b2562a919b066f86db3c0c3a9643ae1be7dcdd1 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Fri, 29 Oct 2021 16:35:48 -0400 Subject: [PATCH 05/31] Client call sites (#4163) update log call sites with new event system --- core/dbt/clients/git.py | 24 +++--- core/dbt/clients/jinja.py | 1 - core/dbt/clients/registry.py | 11 ++- core/dbt/clients/system.py | 23 +++--- core/dbt/events/types.py | 143 ++++++++++++++++++++++++++++++++++- 5 files changed, 175 insertions(+), 27 deletions(-) diff --git a/core/dbt/clients/git.py b/core/dbt/clients/git.py index a294cc013cc..e16da8560e4 100644 --- a/core/dbt/clients/git.py +++ b/core/dbt/clients/git.py @@ -2,7 +2,12 @@ import os.path from dbt.clients.system import run_cmd, rmdir -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + GitSparseCheckoutSubdirectory, GitProgressCheckoutRevision, + GitProgressUpdatingExistingDependency, GitProgressPullingNewDependency, + GitNothingToDo, GitProgressUpdatedCheckoutRange, GitProgressCheckedOutAt +) import dbt.exceptions from packaging import version @@ -28,7 +33,7 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirec clone_cmd = ['git', 'clone', '--depth', '1'] if subdirectory: - logger.debug(' Subdirectory specified: {}, using sparse checkout.'.format(subdirectory)) + fire_event(GitSparseCheckoutSubdirectory(subdir=subdirectory)) out, _ = run_cmd(cwd, ['git', '--version'], env={'LC_ALL': 'C'}) git_version = version.parse(re.search(r"\d+\.\d+\.\d+", out.decode("utf-8")).group(0)) if not git_version >= version.parse("2.25.0"): @@ -72,7 +77,7 @@ def list_tags(cwd): def _checkout(cwd, repo, revision): - logger.debug(' Checking out revision {}.'.format(revision)) + fire_event(GitProgressCheckoutRevision(revision=revision)) fetch_cmd = ["git", "fetch", "origin", "--depth", "1"] @@ -139,7 +144,7 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False, start_sha = None if exists: directory = exists.group(1) - logger.debug('Updating existing dependency {}.', directory) + fire_event(GitProgressUpdatingExistingDependency(dir=directory)) else: matches = re.match("Cloning into '(.+)'", err.decode('utf-8')) if matches is None: @@ -147,17 +152,18 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False, f'Error cloning {repo} - never saw "Cloning into ..." from git' ) directory = matches.group(1) - logger.debug('Pulling new dependency {}.', directory) + fire_event(GitProgressPullingNewDependency(dir=directory)) full_path = os.path.join(cwd, directory) start_sha = get_current_sha(full_path) checkout(full_path, repo, revision) end_sha = get_current_sha(full_path) if exists: if start_sha == end_sha: - logger.debug(' Already at {}, nothing to do.', start_sha[:7]) + fire_event(GitNothingToDo(sha=start_sha[:7])) else: - logger.debug(' Updated checkout from {} to {}.', - start_sha[:7], end_sha[:7]) + fire_event(GitProgressUpdatedCheckoutRange( + start_sha=start_sha[:7], end_sha=end_sha[:7] + )) else: - logger.debug(' Checked out at {}.', end_sha[:7]) + fire_event(GitProgressCheckedOutAt(end_sha=end_sha[:7])) return os.path.join(directory, subdirectory or '') diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py index aa3c57f7b91..0ba25237336 100644 --- a/core/dbt/clients/jinja.py +++ b/core/dbt/clients/jinja.py @@ -33,7 +33,6 @@ UndefinedMacroException ) from dbt import flags -from dbt.logger import GLOBAL_LOGGER as logger # noqa def _linecache_inject(source, write): diff --git a/core/dbt/clients/registry.py b/core/dbt/clients/registry.py index a727ff4d363..a23c43bba78 100644 --- a/core/dbt/clients/registry.py +++ b/core/dbt/clients/registry.py @@ -1,7 +1,11 @@ import functools import requests +from dbt.events.functions import fire_event +from dbt.events.types import ( + RegistryProgressMakingGETRequest, + RegistryProgressGETResponse +) from dbt.utils import memoized, _connection_exception_retry as connection_exception_retry -from dbt.logger import GLOBAL_LOGGER as logger from dbt import deprecations import os @@ -25,10 +29,9 @@ def _get_with_retries(path, registry_base_url=None): def _get(path, registry_base_url=None): url = _get_url(path, registry_base_url) - logger.debug('Making package registry request: GET {}'.format(url)) + fire_event(RegistryProgressMakingGETRequest(url=url)) resp = requests.get(url, timeout=30) - logger.debug('Response from registry: GET {} {}'.format(url, - resp.status_code)) + fire_event(RegistryProgressGETResponse(url=url, resp_code=resp.status_code)) resp.raise_for_status() if resp is None: raise requests.exceptions.ContentDecodingError( diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index 53f1d6f1be3..aa8b2cac2c9 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -15,8 +15,12 @@ Type, NoReturn, List, Optional, Dict, Any, Tuple, Callable, Union ) +from dbt.events.functions import fire_event +from dbt.events.types import ( + SystemErrorRetrievingModTime, SystemCouldNotWrite, SystemExecutingCmd, SystemStdOutMsg, + SystemStdErrMsg, SystemReportReturnCode +) import dbt.exceptions -from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import _connection_exception_retry as connection_exception_retry if sys.platform == 'win32': @@ -65,9 +69,7 @@ def find_matching( try: modification_time = os.path.getmtime(absolute_path) except OSError: - logger.exception( - f"Error retrieving modification time for file {absolute_path}" - ) + fire_event(SystemErrorRetrievingModTime(path=absolute_path)) if reobj.match(local_file): matching.append({ 'searched_path': relative_path_to_search, @@ -161,10 +163,7 @@ def write_file(path: str, contents: str = '') -> bool: reason = 'Path was possibly too long' # all our hard work and the path was still too long. Log and # continue. - logger.debug( - f'Could not write to path {path}({len(path)} characters): ' - f'{reason}\nexception: {exc}' - ) + fire_event(SystemCouldNotWrite(path=path, reason=reason, exc=exc)) else: raise return True @@ -412,7 +411,7 @@ def _interpret_oserror(exc: OSError, cwd: str, cmd: List[str]) -> NoReturn: def run_cmd( cwd: str, cmd: List[str], env: Optional[Dict[str, Any]] = None ) -> Tuple[bytes, bytes]: - logger.debug('Executing "{}"'.format(' '.join(cmd))) + fire_event(SystemExecutingCmd(cmd=cmd)) if len(cmd) == 0: raise dbt.exceptions.CommandError(cwd, cmd) @@ -438,11 +437,11 @@ def run_cmd( except OSError as exc: _interpret_oserror(exc, cwd, cmd) - logger.debug('STDOUT: "{!s}"'.format(out)) - logger.debug('STDERR: "{!s}"'.format(err)) + fire_event(SystemStdOutMsg(bmsg=out)) + fire_event(SystemStdErrMsg(bmsg=err)) if proc.returncode != 0: - logger.debug('command return code={}'.format(proc.returncode)) + fire_event(SystemReportReturnCode(code=proc.returncode)) raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode, out, err) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index c8a1c8dedc1..e6e717f19d7 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,6 +1,6 @@ from abc import ABCMeta, abstractmethod from dataclasses import dataclass -from typing import Any +from typing import Any, List # types to represent log levels @@ -113,6 +113,134 @@ def cli_msg(self) -> str: return f"Performance info: {self.path}" +@dataclass +class GitSparseCheckoutSubdirectory(DebugLevel, CliEventABC): + subdir: str + + def cli_msg(self) -> str: + return f" Subdirectory specified: {self.subdir}, using sparse checkout." + + +@dataclass +class GitProgressCheckoutRevision(DebugLevel, CliEventABC): + revision: str + + def cli_msg(self) -> str: + return f" Checking out revision {self.revision}." + + +@dataclass +class GitProgressUpdatingExistingDependency(DebugLevel, CliEventABC): + dir: str + + def cli_msg(self) -> str: + return f"Updating existing dependency {self.dir}." + + +@dataclass +class GitProgressPullingNewDependency(DebugLevel, CliEventABC): + dir: str + + def cli_msg(self) -> str: + return f"Pulling new dependency {self.dir}." + + +@dataclass +class GitNothingToDo(DebugLevel, CliEventABC): + sha: str + + def cli_msg(self) -> str: + return f"Already at {self.sha}, nothing to do." + + +@dataclass +class GitProgressUpdatedCheckoutRange(DebugLevel, CliEventABC): + start_sha: str + end_sha: str + + def cli_msg(self) -> str: + return f" Updated checkout from {self.start_sha} to {self.end_sha}." + + +@dataclass +class GitProgressCheckedOutAt(DebugLevel, CliEventABC): + end_sha: str + + def cli_msg(self) -> str: + return f" Checked out at {self.end_sha}." + + +@dataclass +class RegistryProgressMakingGETRequest(DebugLevel, CliEventABC): + url: str + + def cli_msg(self) -> str: + return f"Making package registry request: GET {self.url}" + + +@dataclass +class RegistryProgressGETResponse(DebugLevel, CliEventABC): + url: str + resp_code: int + + def cli_msg(self) -> str: + return f"Response from registry: GET {self.url} {self.resp_code}" + + +# TODO this was actually `logger.exception(...)` not `logger.error(...)` +@dataclass +class SystemErrorRetrievingModTime(ErrorLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Error retrieving modification time for file {self.path}" + + +@dataclass +class SystemCouldNotWrite(DebugLevel, CliEventABC): + path: str + reason: str + exc: Exception + + def cli_msg(self) -> str: + return ( + f"Could not write to path {self.path}({len(self.path)} characters): " + f"{self.reason}\nexception: {self.exc}" + ) + + +@dataclass +class SystemExecutingCmd(DebugLevel, CliEventABC): + cmd: List[str] + + def cli_msg(self) -> str: + return f'Executing "{" ".join(self.cmd)}"' + + +@dataclass +class SystemStdOutMsg(DebugLevel, CliEventABC): + bmsg: bytes + + def cli_msg(self) -> str: + return f'STDOUT: "{str(self.bmsg)}"' + + +@dataclass +class SystemStdErrMsg(DebugLevel, CliEventABC): + bmsg: bytes + + def cli_msg(self) -> str: + return f'STDERR: "{str(self.bmsg)}"' + + +@dataclass +class SystemReportReturnCode(DebugLevel, CliEventABC): + code: int + + def cli_msg(self) -> str: + return f"command return code={self.code}" + + @dataclass class MacroEventInfo(InfoLevel, CliEventABC): msg: str @@ -146,5 +274,18 @@ def cli_msg(self) -> str: ManifestChecked() ManifestFlatGraphBuilt() ReportPerformancePath(path='') + GitSparseCheckoutSubdirectory(subdir='') + GitProgressCheckoutRevision(revision='') + GitProgressUpdatingExistingDependency(dir='') + GitProgressPullingNewDependency(dir='') + GitNothingToDo(sha='') + GitProgressUpdatedCheckoutRange(start_sha='', end_sha='') + GitProgressCheckedOutAt(end_sha='') + SystemErrorRetrievingModTime(path='') + SystemCouldNotWrite(path='', reason='', exc=Exception('')) + SystemExecutingCmd(cmd=['']) + SystemStdOutMsg(bmsg=b'') + SystemStdErrMsg(bmsg=b'') + SystemReportReturnCode(code=0) MacroEventInfo(msg='') MacroEventDebug(msg='') From 51d8440dd43b6e69fa28f1221a20924c9bf411fb Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 8 Nov 2021 14:17:51 -0600 Subject: [PATCH 06/31] Change Graph logger call sites (#4165) graph call sites for structured logging --- core/dbt/events/types.py | 45 ++++++++++++++++++++++++++++++++++++++ core/dbt/graph/selector.py | 15 ++++++++----- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index e6e717f19d7..0a182d33b03 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -241,6 +241,48 @@ def cli_msg(self) -> str: return f"command return code={self.code}" +@dataclass +class SelectorAlertUpto3UnusedNodes(InfoLevel, CliEventABC): + node_names: List[str] + + def cli_msg(self) -> str: + summary_nodes_str = ("\n - ").join(self.node_names[:3]) + and_more_str = ( + f"\n - and {len(self.node_names) - 3} more" if len(self.node_names) > 4 else "" + ) + return ( + f"\nSome tests were excluded because at least one parent is not selected. " + f"Use the --greedy flag to include them." + f"\n - {summary_nodes_str}{and_more_str}" + ) + + +@dataclass +class SelectorAlertAllUnusedNodes(DebugLevel, CliEventABC): + node_names: List[str] + + def cli_msg(self) -> str: + debug_nodes_str = ("\n - ").join(self.node_names) + return ( + f"Full list of tests that were excluded:" + f"\n - {debug_nodes_str}" + ) + + +@dataclass +class SelectorReportInvalidSelector(InfoLevel, CliEventABC): + selector_methods: dict + spec_method: str + raw_spec: str + + def cli_msg(self) -> str: + valid_selectors = ", ".join(self.selector_methods) + return ( + f"The '{self.spec_method}' selector specified in {self.raw_spec} is " + f"invalid. Must be one of [{valid_selectors}]" + ) + + @dataclass class MacroEventInfo(InfoLevel, CliEventABC): msg: str @@ -287,5 +329,8 @@ def cli_msg(self) -> str: SystemStdOutMsg(bmsg=b'') SystemStdErrMsg(bmsg=b'') SystemReportReturnCode(code=0) + SelectorAlertUpto3UnusedNodes(node_names=[]) + SelectorAlertAllUnusedNodes(node_names=[]) + SelectorReportInvalidSelector(selector_methods={'': ''}, spec_method='', raw_spec='') MacroEventInfo(msg='') MacroEventDebug(msg='') diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index abc8bcb74b9..c1945f1f6db 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -5,7 +5,10 @@ from .selector_methods import MethodManager from .selector_spec import SelectionCriteria, SelectionSpec -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + SelectorAlertUpto3UnusedNodes, SelectorAlertAllUnusedNodes, SelectorReportInvalidSelector +) from dbt.node_types import NodeType from dbt.exceptions import ( InternalException, @@ -85,11 +88,11 @@ def get_nodes_from_criteria( try: collected = self.select_included(nodes, spec) except InvalidSelectorException: - valid_selectors = ", ".join(self.SELECTOR_METHODS) - logger.info( - f"The '{spec.method}' selector specified in {spec.raw} is " - f"invalid. Must be one of [{valid_selectors}]" - ) + fire_event(SelectorReportInvalidSelector( + selector_methods=self.SELECTOR_METHODS, + spec_method=spec.method, + raw_spec=spec.raw + )) return set(), set() neighbors = self.collect_specified_neighbors(spec, collected) From b141620125044607436c7f2068dcb3b8510bb022 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 1 Nov 2021 14:49:04 -0500 Subject: [PATCH 07/31] contracts call sites (#4166) * first pass adding structured logging --- core/dbt/contracts/connection.py | 8 +++----- core/dbt/contracts/graph/manifest.py | 7 +++---- core/dbt/contracts/graph/parsed.py | 1 - core/dbt/contracts/results.py | 5 +++-- core/dbt/events/types.py | 25 +++++++++++++++++++++++++ 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/core/dbt/contracts/connection.py b/core/dbt/contracts/connection.py index d302e373473..4485884a022 100644 --- a/core/dbt/contracts/connection.py +++ b/core/dbt/contracts/connection.py @@ -7,7 +7,8 @@ ) from dbt.exceptions import InternalException from dbt.utils import translate_aliases -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import NewConnectionOpening from typing_extensions import Protocol from dbt.dataclass_schema import ( dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, HyphenatedDbtClassMixin, @@ -101,10 +102,7 @@ def __init__(self, opener: Callable[[Connection], Connection]): self.opener = opener def resolve(self, connection: Connection) -> Connection: - logger.debug( - 'Opening a new connection, currently in state {}' - .format(connection.state) - ) + fire_event(NewConnectionOpening(connection_state=connection.state)) return self.opener(connection) diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index d382151eb7b..fe5288f1a94 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -29,7 +29,8 @@ raise_duplicate_resource_name, raise_compiler_error, ) from dbt.helper_types import PathSet -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import MergedFromState from dbt.node_types import NodeType from dbt.ui import line_wrap_message from dbt import flags @@ -958,9 +959,7 @@ def merge_from_artifact( # log up to 5 items sample = list(islice(merged, 5)) - logger.debug( - f'Merged {len(merged)} items from state (sample: {sample})' - ) + fire_event(MergedFromState(nbr_merged=len(merged), sample=sample)) # Methods that were formerly in ParseResult diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index 66a8fbc4e8a..0f5294b77be 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -30,7 +30,6 @@ ) from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin from dbt.exceptions import warn_or_error -from dbt.logger import GLOBAL_LOGGER as logger # noqa from dbt import flags from dbt.node_types import NodeType diff --git a/core/dbt/contracts/results.py b/core/dbt/contracts/results.py index a6b5307ea8b..caf5c57aaac 100644 --- a/core/dbt/contracts/results.py +++ b/core/dbt/contracts/results.py @@ -11,10 +11,11 @@ schema_version, ) from dbt.exceptions import InternalException +from dbt.events.functions import fire_event +from dbt.events.types import TimingInfoCollected from dbt.logger import ( TimingProcessor, JsonOnly, - GLOBAL_LOGGER as logger, ) from dbt.utils import lowercase from dbt.dataclass_schema import dbtClassMixin, StrEnum @@ -54,7 +55,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.timing_info.end() with JsonOnly(), TimingProcessor(self.timing_info): - logger.debug('finished collecting timing info') + fire_event(TimingInfoCollected()) class NodeStatus(StrEnum): diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 0a182d33b03..4b60fb66f22 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -299,6 +299,28 @@ def cli_msg(self) -> str: return self.msg +@dataclass +class NewConnectionOpening(DebugLevel, CliEventABC): + connection_state: str + + def cli_msg(self) -> str: + return f"Opening a new connection, currently in state {self.connection_state}" + + +class TimingInfoCollected(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "finished collecting timing info" + + +@dataclass +class MergedFromState(DebugLevel, CliEventABC): + nbr_merged: int + sample: List + + def cli_msg(self) -> str: + return f"Merged {self.nbr_merged} items from state (sample: {self.sample})" + + # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -334,3 +356,6 @@ def cli_msg(self) -> str: SelectorReportInvalidSelector(selector_methods={'': ''}, spec_method='', raw_spec='') MacroEventInfo(msg='') MacroEventDebug(msg='') + NewConnectionOpening(connection_state='') + TimingInfoCollected() + MergedFromState(nbr_merged=0, sample=[]) From d8868c5197bb5d3e0e3bb80106a1cc070496be7d Mon Sep 17 00:00:00 2001 From: Ian Knox <81931810+iknox-fa@users.noreply.github.com> Date: Tue, 2 Nov 2021 10:28:21 -0500 Subject: [PATCH 08/31] Dataclass compatibility (#4180) * use __post_init__() instead of fake dataclass member vars --- core/dbt/events/types.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 4b60fb66f22..947c4b20031 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -33,9 +33,14 @@ def level_tag(self) -> str: @dataclass class ShowException(): - exc_info: Any = None - stack_info: Any = None - extra: Any = None + # N.B.: + # As long as we stick with the current convention of setting the member vars in the + # `cli_msg` method of subclasses, this is a safe operation. + # If that ever changes we'll want to reassess. + def __post_init__(self): + self.exc_info: Any = None + self.stack_info: Any = None + self.extra: Any = None # The following classes represent the data necessary to describe a From 6b36b18029eba56967d9949bb61c4303224a65f0 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 2 Nov 2021 11:01:53 -0500 Subject: [PATCH 09/31] config call sites (#4169) * update config use structured logging * WIP * minor cleanup * fixed merge error * added in ShowException * added todo to remove defaults after dropping 3.6 * removed todo that is obsolete --- core/dbt/config/profile.py | 8 +++----- core/dbt/config/runtime.py | 12 ++++-------- core/dbt/config/utils.py | 7 +++---- core/dbt/events/types.py | 36 +++++++++++++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 18 deletions(-) diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index 1800111652e..d531abab60b 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -15,7 +15,8 @@ from dbt.exceptions import ValidationException from dbt.exceptions import RuntimeException from dbt.exceptions import validator_error_message -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.types import MissingProfileTarget +from dbt.events.functions import fire_event from dbt.utils import coerce_dict_str from .renderer import ProfileRenderer @@ -293,10 +294,7 @@ def render_profile( target_name = renderer.render_value(raw_profile['target']) else: target_name = 'default' - logger.debug( - "target not specified in profile '{}', using '{}'" - .format(profile_name, target_name) - ) + fire_event(MissingProfileTarget(profile_name=profile_name, target_name=target_name)) raw_profile_data = cls._get_profile_data( raw_profile, profile_name, target_name diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index a0857d94bf3..820b966eea8 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -19,7 +19,8 @@ from dbt.contracts.connection import AdapterRequiredConfig, Credentials from dbt.contracts.graph.manifest import ManifestMetadata from dbt.contracts.relation import ComponentName -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.types import ProfileLoadError, ProfileNotFound +from dbt.events.functions import fire_event from dbt.ui import warning_tag from dbt.contracts.project import Configuration, UserConfig @@ -544,13 +545,8 @@ def _get_rendered_profile( args, profile_renderer, profile_name ) except (DbtProjectError, DbtProfileError) as exc: - logger.debug( - 'Profile not loaded due to error: {}', exc, exc_info=True - ) - logger.info( - 'No profile "{}" found, continuing with no target', - profile_name - ) + fire_event(ProfileLoadError(exc=exc)) + fire_event(ProfileNotFound(profile_name=profile_name)) # return the poisoned form profile = UnsetProfile() # disable anonymous usage statistics diff --git a/core/dbt/config/utils.py b/core/dbt/config/utils.py index a0f5832a396..01eea117175 100644 --- a/core/dbt/config/utils.py +++ b/core/dbt/config/utils.py @@ -1,8 +1,9 @@ from typing import Dict, Any from dbt.clients import yaml_helper +from dbt.events.functions import fire_event from dbt.exceptions import raise_compiler_error, ValidationException -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.types import InvalidVarsYAML def parse_cli_vars(var_string: str) -> Dict[str, Any]: @@ -17,7 +18,5 @@ def parse_cli_vars(var_string: str) -> Dict[str, Any]: "The --vars argument must be a YAML dictionary, but was " "of type '{}'".format(type_name)) except ValidationException: - logger.error( - "The YAML provided in the --vars argument is not valid.\n" - ) + fire_event(InvalidVarsYAML()) raise diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 947c4b20031..c3037906c79 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,6 +1,6 @@ from abc import ABCMeta, abstractmethod from dataclasses import dataclass -from typing import Any, List +from typing import Any, List, Optional # types to represent log levels @@ -304,6 +304,36 @@ def cli_msg(self) -> str: return self.msg +@dataclass +class MissingProfileTarget(InfoLevel, CliEventABC): + profile_name: str + target_name: str + + def cli_msg(self) -> str: + return f"target not specified in profile '{self.profile_name}', using '{self.target_name}'" + + +@dataclass +class ProfileLoadError(ShowException, DebugLevel, CliEventABC): + exc: Exception + + def cli_msg(self) -> str: + return f"Profile not loaded due to error: {self.exc}" + + +@dataclass +class ProfileNotFound(InfoLevel, CliEventABC): + profile_name: Optional[str] + + def cli_msg(self) -> str: + return f'No profile "{self.profile_name}" found, continuing with no target' + + +class InvalidVarsYAML(ErrorLevel, CliEventABC): + def cli_msg(self) -> str: + return "The YAML provided in the --vars argument is not valid.\n" + + @dataclass class NewConnectionOpening(DebugLevel, CliEventABC): connection_state: str @@ -364,3 +394,7 @@ def cli_msg(self) -> str: NewConnectionOpening(connection_state='') TimingInfoCollected() MergedFromState(nbr_merged=0, sample=[]) + MissingProfileTarget(profile_name='', target_name='') + ProfileLoadError(exc=Exception('')) + ProfileNotFound(profile_name='') + InvalidVarsYAML() From 9857e1dd8379c7b5b3898b16513955aff30cafaf Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 2 Nov 2021 11:36:57 -0500 Subject: [PATCH 10/31] parser call sites (#4177) * convert generic_test to structured logging * convert macros to structured logging * add struc logging to most of manifest.py * add struct logging to models.py * added struct logging to partial.py * finished conversion of manifest * fixing errors * fixed 1 todo and added another * fixed bugs from merge --- core/dbt/events/types.py | 329 +++++++++++++++++++++++++++++++- core/dbt/parser/generic_test.py | 5 +- core/dbt/parser/macros.py | 5 +- core/dbt/parser/manifest.py | 62 +++--- core/dbt/parser/models.py | 43 ++--- core/dbt/parser/partial.py | 33 ++-- 6 files changed, 403 insertions(+), 74 deletions(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index c3037906c79..222dccd5142 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,6 +1,7 @@ from abc import ABCMeta, abstractmethod from dataclasses import dataclass -from typing import Any, List, Optional +from typing import Any, List, Optional, Dict +from dbt.ui import warning_tag # types to represent log levels @@ -356,6 +357,296 @@ def cli_msg(self) -> str: return f"Merged {self.nbr_merged} items from state (sample: {self.sample})" +@dataclass +class GenericTestFileParse(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Parsing {self.path}" + + +@dataclass +class MacroFileParse(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Parsing {self.path}" + + +class PartialParsingFullReparseBecauseOfError(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Partial parsing enabled but an error occurred. Switching to a full re-parse." + + +@dataclass +class PartialParsingExceptionFile(DebugLevel, CliEventABC): + file: str + + def cli_msg(self) -> str: + return f"Partial parsing exception processing file {self.file}" + + +@dataclass +class PartialParsingFile(DebugLevel, CliEventABC): + file_dict: Dict + + def cli_msg(self) -> str: + return f"PP file: {self.file_dict}" + + +@dataclass +class PartialParsingException(DebugLevel, CliEventABC): + exc_info: Dict + + def cli_msg(self) -> str: + return f"PP exception info: {self.exc_info}" + + +class PartialParsingSkipParsing(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "Partial parsing enabled, no changes found, skipping parsing" + + +class PartialParsingMacroChangeStartFullParse(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Change detected to override macro used during parsing. Starting full parse." + + +@dataclass +class ManifestWrongMetadataVersion(DebugLevel, CliEventABC): + version: str + + def cli_msg(self) -> str: + return ("Manifest metadata did not contain correct version. " + f"Contained '{self.version}' instead.") + + +@dataclass +class PartialParsingVersionMismatch(InfoLevel, CliEventABC): + saved_version: str + current_version: str + + def cli_msg(self) -> str: + return ("Unable to do partial parsing because of a dbt version mismatch. " + f"Saved manifest version: {self.saved_version}. " + f"Current version: {self.current_version}.") + + +class PartialParsingFailedBecauseConfigChange(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return ("Unable to do partial parsing because config vars, " + "config profile, or config target have changed") + + +class PartialParsingFailedBecauseProfileChange(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return ("Unable to do partial parsing because profile has changed") + + +class PartialParsingFailedBecauseNewProjectDependency(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return ("Unable to do partial parsing because a project dependency has been added") + + +class PartialParsingFailedBecauseHashChanged(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return ("Unable to do partial parsing because a project config has changed") + + +class PartialParsingNotEnabled(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return ("Partial parsing not enabled") + + +@dataclass +class ParsedFileLoadFailed(ShowException, DebugLevel, CliEventABC): + path: str + exc: Exception + + def cli_msg(self) -> str: + return f"Failed to load parsed file from disk at {self.path}: {self.exc}" + + +class PartialParseSaveFileNotFound(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return ("Partial parse save file not found. Starting full parse.") + + +@dataclass +class StaticParserCausedJinjaRendering(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1605: jinja rendering because of STATIC_PARSER flag. file: {self.path}" + + +# TODO: Experimental/static parser uses these for testing and some may be a good use case for +# the `TestLevel` logger once we implement it. Some will probably stay `DebugLevel`. +@dataclass +class UsingExperimentalParser(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1610: conducting experimental parser sample on {self.path}" + + +@dataclass +class SampleFullJinjaRendering(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1611: conducting full jinja rendering sample on {self.path}" + + +@dataclass +class StaticParserFallbackJinjaRendering(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1602: parser fallback to jinja rendering on {self.path}" + + +@dataclass +class StaticParsingMacroOverrideDetected(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1601: detected macro override of ref/source/config in the scope of {self.path}" + + +@dataclass +class StaticParserSuccess(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1699: static parser successfully parsed {self.path}" + + +@dataclass +class StaticParserFailure(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1603: static parser failed on {self.path}" + + +@dataclass +class ExperimentalParserSuccess(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1698: experimental parser successfully parsed {self.path}" + + +@dataclass +class ExperimentalParserFailure(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"1604: experimental parser failed on {self.path}" + + +@dataclass +class PartialParsingEnabled(DebugLevel, CliEventABC): + deleted: int + added: int + changed: int + + def cli_msg(self) -> str: + return (f"Partial parsing enabled: " + f"{self.deleted} files deleted, " + f"{self.added} files added, " + f"{self.changed} files changed.") + + +@dataclass +class PartialParsingAddedFile(DebugLevel, CliEventABC): + file_id: str + + def cli_msg(self) -> str: + return f"Partial parsing: added file: {self.file_id}" + + +@dataclass +class PartialParsingDeletedFile(DebugLevel, CliEventABC): + file_id: str + + def cli_msg(self) -> str: + return f"Partial parsing: deleted file: {self.file_id}" + + +@dataclass +class PartialParsingUpdatedFile(DebugLevel, CliEventABC): + file_id: str + + def cli_msg(self) -> str: + return f"Partial parsing: updated file: {self.file_id}" + + +@dataclass +class PartialParsingNodeMissingInSourceFile(DebugLevel, CliEventABC): + source_file: str + + def cli_msg(self) -> str: + return f"Partial parsing: node not found for source_file {self.source_file}" + + +@dataclass +class PartialParsingMissingNodes(DebugLevel, CliEventABC): + file_id: str + + def cli_msg(self) -> str: + return f"No nodes found for source file {self.file_id}" + + +@dataclass +class PartialParsingChildMapMissingUniqueID(DebugLevel, CliEventABC): + unique_id: str + + def cli_msg(self) -> str: + return f"Partial parsing: {self.unique_id} not found in child_map" + + +@dataclass +class PartialParsingUpdateSchemaFile(DebugLevel, CliEventABC): + file_id: str + + def cli_msg(self) -> str: + return f"Partial parsing: update schema file: {self.file_id}" + + +@dataclass +class PartialParsingDeletedSource(DebugLevel, CliEventABC): + unique_id: str + + def cli_msg(self) -> str: + return f"Partial parsing: deleted source {self.unique_id}" + + +@dataclass +class PartialParsingDeletedExposure(DebugLevel, CliEventABC): + unique_id: str + + def cli_msg(self) -> str: + return f"Partial parsing: deleted exposure {self.unique_id}" + + +@dataclass +class InvalidDisabledSourceInTestNode(WarnLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return warning_tag(self.msg) + + +@dataclass +class InvalidRefInTestNode(WarnLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return warning_tag(self.msg) + + # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -398,3 +689,39 @@ def cli_msg(self) -> str: ProfileLoadError(exc=Exception('')) ProfileNotFound(profile_name='') InvalidVarsYAML() + GenericTestFileParse(path='') + MacroFileParse(path='') + PartialParsingFullReparseBecauseOfError() + PartialParsingFile(file_dict={}) + PartialParsingException(exc_info={}) + PartialParsingSkipParsing() + PartialParsingMacroChangeStartFullParse() + ManifestWrongMetadataVersion(version='') + PartialParsingVersionMismatch(saved_version='', current_version='') + PartialParsingFailedBecauseConfigChange() + PartialParsingFailedBecauseProfileChange() + PartialParsingFailedBecauseNewProjectDependency() + PartialParsingFailedBecauseHashChanged() + ParsedFileLoadFailed(path='', exc=Exception('')) + PartialParseSaveFileNotFound() + StaticParserCausedJinjaRendering(path='') + UsingExperimentalParser(path='') + SampleFullJinjaRendering(path='') + StaticParserFallbackJinjaRendering(path='') + StaticParsingMacroOverrideDetected(path='') + StaticParserSuccess(path='') + StaticParserFailure(path='') + ExperimentalParserSuccess(path='') + ExperimentalParserFailure(path='') + PartialParsingEnabled(deleted=0, added=0, changed=0) + PartialParsingAddedFile(file_id='') + PartialParsingDeletedFile(file_id='') + PartialParsingUpdatedFile(file_id='') + PartialParsingNodeMissingInSourceFile(source_file='') + PartialParsingMissingNodes(file_id='') + PartialParsingChildMapMissingUniqueID(unique_id='') + PartialParsingUpdateSchemaFile(file_id='') + PartialParsingDeletedSource(unique_id='') + PartialParsingDeletedExposure(unique_id='') + InvalidDisabledSourceInTestNode(msg='') + InvalidRefInTestNode(msg='') diff --git a/core/dbt/parser/generic_test.py b/core/dbt/parser/generic_test.py index c3621d7568d..dbc54896157 100644 --- a/core/dbt/parser/generic_test.py +++ b/core/dbt/parser/generic_test.py @@ -8,7 +8,8 @@ from dbt.contracts.graph.unparsed import UnparsedMacro from dbt.contracts.graph.parsed import ParsedMacro from dbt.contracts.files import SourceFile -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import GenericTestFileParse from dbt.node_types import NodeType from dbt.parser.base import BaseParser from dbt.parser.search import FileBlock @@ -90,7 +91,7 @@ def parse_file(self, block: FileBlock): source_file = block.file assert isinstance(source_file.contents, str) original_file_path = source_file.path.original_file_path - logger.debug("Parsing {}".format(original_file_path)) + fire_event(GenericTestFileParse(path=original_file_path)) # this is really only used for error messages base_node = UnparsedMacro( diff --git a/core/dbt/parser/macros.py b/core/dbt/parser/macros.py index 1b28c17b545..cd928fab0a3 100644 --- a/core/dbt/parser/macros.py +++ b/core/dbt/parser/macros.py @@ -7,7 +7,8 @@ from dbt.contracts.graph.parsed import ParsedMacro from dbt.contracts.files import FilePath, SourceFile from dbt.exceptions import CompilationException -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import MacroFileParse from dbt.node_types import NodeType from dbt.parser.base import BaseParser from dbt.parser.search import FileBlock, FilesystemSearcher @@ -96,7 +97,7 @@ def parse_file(self, block: FileBlock): source_file = block.file assert isinstance(source_file.contents, str) original_file_path = source_file.path.original_file_path - logger.debug("Parsing {}".format(original_file_path)) + fire_event(MacroFileParse(path=original_file_path)) # this is really only used for error messages base_node = UnparsedMacro( diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index d36d4a8e840..b16ac70d2cb 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -19,7 +19,17 @@ get_adapter_package_names, ) from dbt.helper_types import PathSet -from dbt.logger import GLOBAL_LOGGER as logger, DbtProcessState +from dbt.events.functions import fire_event +from dbt.events.types import ( + PartialParsingFullReparseBecauseOfError, PartialParsingExceptionFile, PartialParsingFile, + PartialParsingException, PartialParsingSkipParsing, PartialParsingMacroChangeStartFullParse, + ManifestWrongMetadataVersion, PartialParsingVersionMismatch, + PartialParsingFailedBecauseConfigChange, PartialParsingFailedBecauseProfileChange, + PartialParsingFailedBecauseNewProjectDependency, PartialParsingFailedBecauseHashChanged, + PartialParsingNotEnabled, ParsedFileLoadFailed, PartialParseSaveFileNotFound, + InvalidDisabledSourceInTestNode, InvalidRefInTestNode +) +from dbt.logger import DbtProcessState from dbt.node_types import NodeType from dbt.clients.jinja import get_rendered, MacroStack from dbt.clients.jinja_static import statically_extract_macro_calls @@ -238,8 +248,7 @@ def load(self): except Exception: # pp_files should still be the full set and manifest is new manifest, # since get_parsing_files failed - logger.info("Partial parsing enabled but an error occurred. " - "Switching to a full re-parse.") + fire_event(PartialParsingFullReparseBecauseOfError()) # Get traceback info tb_info = traceback.format_exc() @@ -263,11 +272,11 @@ def load(self): source_file = self.manifest.files[file_id] if source_file: parse_file_type = source_file.parse_file_type - logger.debug(f"Partial parsing exception processing file {file_id}") + fire_event(PartialParsingExceptionFile(file=file_id)) file_dict = source_file.to_dict() - logger.debug(f"PP file: {file_dict}") + fire_event(PartialParsingFile(file_dict=file_dict)) exc_info['parse_file_type'] = parse_file_type - logger.debug(f"PP exception info: {exc_info}") + fire_event(PartialParsingException()) # Send event if dbt.tracking.active_user is not None: @@ -278,7 +287,7 @@ def load(self): self.manifest._parsing_info = ParsingInfo() if skip_parsing: - logger.debug("Partial parsing enabled, no changes found, skipping parsing") + fire_event(PartialParsingSkipParsing()) else: # Load Macros and tests # We need to parse the macros first, so they're resolvable when @@ -289,9 +298,8 @@ def load(self): # If we're partially parsing check that certain macros have not been changed if self.partially_parsing and self.skip_partial_parsing_because_of_macros(): - logger.info( - "Change detected to override macro used during parsing. Starting full parse." - ) + fire_event(PartialParsingMacroChangeStartFullParse()) + # Get new Manifest with original file records and move over the macros self.manifest = self.new_manifest # contains newly read files project_parser_files = orig_project_parser_files @@ -520,8 +528,9 @@ def write_manifest_for_partial_parse(self): # This shouldn't be necessary, but we have gotten bug reports (#3757) of the # saved manifest not matching the code version. if self.manifest.metadata.dbt_version != __version__: - logger.debug("Manifest metadata did not contain correct version. " - f"Contained '{self.manifest.metadata.dbt_version}' instead.") + fire_event(ManifestWrongMetadataVersion( + version=self.manifest.metadata.dbt_version) + ) self.manifest.metadata.dbt_version = __version__ manifest_msgpack = self.manifest.to_msgpack() make_directory(os.path.dirname(path)) @@ -539,20 +548,18 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: if manifest.metadata.dbt_version != __version__: # #3757 log both versions because of reports of invalid cases of mismatch. - logger.info("Unable to do partial parsing because of a dbt version mismatch. " - f"Saved manifest version: {manifest.metadata.dbt_version}. " - f"Current version: {__version__}.") + fire_event(PartialParsingVersionMismatch(saved_version=manifest.metadata.dbt_version, + current_version=__version__)) # If the version is wrong, the other checks might not work return False, ReparseReason.version_mismatch if self.manifest.state_check.vars_hash != manifest.state_check.vars_hash: - logger.info("Unable to do partial parsing because config vars, " - "config profile, or config target have changed") + fire_event(PartialParsingFailedBecauseConfigChange()) valid = False reparse_reason = ReparseReason.vars_changed if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash: # Note: This should be made more granular. We shouldn't need to invalidate # partial parsing if a non-used profile section has changed. - logger.info("Unable to do partial parsing because profile has changed") + fire_event(PartialParsingFailedBecauseProfileChange()) valid = False reparse_reason = ReparseReason.profile_changed if self.manifest.state_check.project_env_vars_hash != \ @@ -573,7 +580,7 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: if k not in manifest.state_check.project_hashes } if missing_keys: - logger.info("Unable to do partial parsing because a project dependency has been added") + fire_event(PartialParsingFailedBecauseNewProjectDependency()) valid = False reparse_reason = ReparseReason.deps_changed @@ -581,8 +588,7 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: if key in manifest.state_check.project_hashes: old_value = manifest.state_check.project_hashes[key] if new_value != old_value: - logger.info("Unable to do partial parsing because " - "a project config has changed") + fire_event(PartialParsingFailedBecauseHashChanged()) valid = False reparse_reason = ReparseReason.project_config_changed return valid, reparse_reason @@ -604,7 +610,7 @@ def skip_partial_parsing_because_of_macros(self): def read_manifest_for_partial_parse(self) -> Optional[Manifest]: if not flags.PARTIAL_PARSE: - logger.debug('Partial parsing not enabled') + fire_event(PartialParsingNotEnabled()) return None path = os.path.join(self.root_project.target_path, PARTIAL_PARSE_FILE_NAME) @@ -630,14 +636,10 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]: manifest.metadata.invocation_id = None return manifest except Exception as exc: - logger.debug( - 'Failed to load parsed file from disk at {}: {}' - .format(path, exc), - exc_info=True - ) + fire_event(ParsedFileLoadFailed(path=path, exc=exc)) reparse_reason = ReparseReason.load_file_failure else: - logger.info("Partial parse save file not found. Starting full parse.") + fire_event(PartialParseSaveFileNotFound()) reparse_reason = ReparseReason.file_not_found # this event is only fired if a full reparse is needed @@ -890,7 +892,7 @@ def invalid_ref_fail_unless_test(node, target_model_name, node, target_model_name, target_model_package, disabled ) if disabled: - logger.debug(warning_tag(msg)) + fire_event(InvalidRefInTestNode(msg=msg)) else: warn_or_error( msg, @@ -913,7 +915,7 @@ def invalid_source_fail_unless_test( node, target_name, target_table_name, disabled ) if disabled: - logger.debug(warning_tag(msg)) + fire_event(InvalidDisabledSourceInTestNode(msg=msg)) else: warn_or_error( msg, diff --git a/core/dbt/parser/models.py b/core/dbt/parser/models.py index adbd1b6b1d9..7230a91d594 100644 --- a/core/dbt/parser/models.py +++ b/core/dbt/parser/models.py @@ -2,7 +2,13 @@ from dbt.context.context_config import ContextConfig from dbt.contracts.graph.parsed import ParsedModelNode import dbt.flags as flags -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + StaticParserCausedJinjaRendering, UsingExperimentalParser, + SampleFullJinjaRendering, StaticParserFallbackJinjaRendering, + StaticParsingMacroOverrideDetected, StaticParserSuccess, StaticParserFailure, + ExperimentalParserSuccess, ExperimentalParserFailure +) from dbt.node_types import NodeType from dbt.parser.base import SimpleSQLParser from dbt.parser.search import FileBlock @@ -37,7 +43,7 @@ def render_update( if not flags.STATIC_PARSER: # jinja rendering super().render_update(node, config) - logger.debug(f"1605: jinja rendering because of STATIC_PARSER flag. file: {node.path}") + fire_event(StaticParserCausedJinjaRendering(path=node.path)) return # only sample for experimental parser correctness on normal runs, @@ -71,7 +77,7 @@ def render_update( # sample the experimental parser only during a normal run if exp_sample and not flags.USE_EXPERIMENTAL_PARSER: - logger.debug(f"1610: conducting experimental parser sample on {node.path}") + fire_event(UsingExperimentalParser(path=node.path)) experimental_sample = self.run_experimental_parser(node) # if the experimental parser succeeded, make a full copy of model parser # and populate _everything_ into it so it can be compared apples-to-apples @@ -105,7 +111,7 @@ def render_update( # sampling rng here, but the effect would be the same since we would only roll # it 40% of the time. So I've opted to keep all the rng code colocated above. if stable_sample and not flags.USE_EXPERIMENTAL_PARSER: - logger.debug(f"1611: conducting full jinja rendering sample on {node.path}") + fire_event(SampleFullJinjaRendering(path=node.path)) # if this will _never_ mutate anything `self` we could avoid these deep copies, # but we can't really guarantee that going forward. model_parser_copy = self.partial_deepcopy() @@ -150,17 +156,12 @@ def render_update( result += ["08_has_banned_macro"] super().render_update(node, config) - logger.debug( - f"1602: parser fallback to jinja rendering on {node.path}" - ) + fire_event(StaticParserFallbackJinjaRendering(path=node.path)) # if the static parser didn't succeed, fall back to jinja else: # jinja rendering super().render_update(node, config) - logger.debug( - f"1602: parser fallback to jinja rendering on {node.path}" - ) - + fire_event(StaticParserFallbackJinjaRendering(path=node.path)) # only send the tracking event if there is at least one result code if result: # fire a tracking event. this fires one event for every sample @@ -183,9 +184,7 @@ def run_static_parser( # this log line is used for integration testing. If you change # the code at the beginning of the line change the tests in # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py - logger.debug( - f"1601: detected macro override of ref/source/config in the scope of {node.path}" - ) + fire_event(StaticParsingMacroOverrideDetected(path=node.path)) return "has_banned_macro" # run the stable static parser and return the results @@ -193,15 +192,13 @@ def run_static_parser( statically_parsed = py_extract_from_source( node.raw_sql ) - logger.debug(f"1699: static parser successfully parsed {node.path}") + fire_event(StaticParserSuccess(path=node.path)) return _shift_sources(statically_parsed) # if we want information on what features are barring the static # parser from reading model files, this is where we would add that # since that information is stored in the `ExtractionError`. except ExtractionError: - logger.debug( - f"1603: static parser failed on {node.path}" - ) + fire_event(StaticParserFailure(path=node.path)) return "cannot_parse" def run_experimental_parser( @@ -212,9 +209,7 @@ def run_experimental_parser( # this log line is used for integration testing. If you change # the code at the beginning of the line change the tests in # test/integration/072_experimental_parser_tests/test_all_experimental_parser.py - logger.debug( - f"1601: detected macro override of ref/source/config in the scope of {node.path}" - ) + fire_event(StaticParsingMacroOverrideDetected(path=node.path)) return "has_banned_macro" # run the experimental parser and return the results @@ -225,15 +220,13 @@ def run_experimental_parser( experimentally_parsed = py_extract_from_source( node.raw_sql ) - logger.debug(f"1698: experimental parser successfully parsed {node.path}") + fire_event(ExperimentalParserSuccess(path=node.path)) return _shift_sources(experimentally_parsed) # if we want information on what features are barring the experimental # parser from reading model files, this is where we would add that # since that information is stored in the `ExtractionError`. except ExtractionError: - logger.debug( - f"1604: experimental parser failed on {node.path}" - ) + fire_event(ExperimentalParserFailure(path=node.path)) return "cannot_parse" # checks for banned macros diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index b29d3387800..e465a2a40bf 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -5,7 +5,13 @@ from dbt.contracts.files import ( AnySourceFile, ParseFileType, parse_file_type_to_parser, ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + PartialParsingEnabled, PartialParsingAddedFile, PartialParsingDeletedFile, + PartialParsingUpdatedFile, PartialParsingNodeMissingInSourceFile, PartialParsingMissingNodes, + PartialParsingChildMapMissingUniqueID, PartialParsingUpdateSchemaFile, + PartialParsingDeletedSource, PartialParsingDeletedExposure +) from dbt.node_types import NodeType @@ -140,10 +146,9 @@ def build_file_diff(self): } if changed_or_deleted_macro_file: self.macro_child_map = self.saved_manifest.build_macro_child_map() - logger.debug(f"Partial parsing enabled: " - f"{len(deleted) + len(deleted_schema_files)} files deleted, " - f"{len(added)} files added, " - f"{len(changed) + len(changed_schema_files)} files changed.") + deleted = len(deleted) + len(deleted_schema_files) + changed = len(changed) + len(changed_schema_files) + fire_event(PartialParsingEnabled(deleted=deleted, added=len(added), changed=changed)) self.file_diff = file_diff # generate the list of files that need parsing @@ -209,7 +214,7 @@ def add_to_saved(self, file_id): self.saved_files[file_id] = source_file # update pp_files to parse self.add_to_pp_files(source_file) - logger.debug(f"Partial parsing: added file: {file_id}") + fire_event(PartialParsingAddedFile(file_id=file_id)) def handle_added_schema_file(self, source_file): source_file.pp_dict = source_file.dict_from_yaml.copy() @@ -241,7 +246,7 @@ def delete_from_saved(self, file_id): if saved_source_file.parse_file_type == ParseFileType.Documentation: self.delete_doc_node(saved_source_file) - logger.debug(f"Partial parsing: deleted file: {file_id}") + fire_event(PartialParsingDeletedFile(file_id=file_id)) # Updates for non-schema files def update_in_saved(self, file_id): @@ -256,7 +261,7 @@ def update_in_saved(self, file_id): self.update_doc_in_saved(new_source_file, old_source_file) else: raise Exception(f"Invalid parse_file_type in source_file {file_id}") - logger.debug(f"Partial parsing: updated file: {file_id}") + fire_event(PartialParsingUpdatedFile(file_id=file_id)) # Models, seeds, snapshots: patches and tests # analyses: patches, no tests @@ -274,7 +279,7 @@ def update_mssat_in_saved(self, new_source_file, old_source_file): else: # It's not clear when this would actually happen. # Logging in case there are other associated errors. - logger.debug(f"Partial parsing: node not found for source_file {old_source_file}") + fire_event(PartialParsingNodeMissingInSourceFile(source_file=old_source_file)) # replace source_file in saved and add to parsing list file_id = new_source_file.file_id @@ -351,7 +356,7 @@ def remove_mssat_file(self, source_file): # nodes [unique_ids] -- SQL files # There should always be a node for a SQL file if not source_file.nodes: - logger.debug(f"No nodes found for source file {source_file.file_id}") + fire_event(PartialParsingMissingNodes(file_id=source_file.file_id)) return # There is generally only 1 node for SQL files, except for macros for unique_id in source_file.nodes: @@ -364,7 +369,7 @@ def schedule_referencing_nodes_for_parsing(self, unique_id): if unique_id in self.saved_manifest.child_map: self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id]) else: - logger.debug(f"Partial parsing: {unique_id} not found in child_map") + fire_event(PartialParsingChildMapMissingUniqueID(unique_id=unique_id)) def schedule_nodes_for_parsing(self, unique_ids): for unique_id in unique_ids: @@ -568,7 +573,7 @@ def change_schema_file(self, file_id): # schedule parsing self.add_to_pp_files(saved_schema_file) # schema_file pp_dict should have been generated already - logger.debug(f"Partial parsing: update schema file: {file_id}") + fire_event(PartialParsingUpdateSchemaFile(file_id=file_id)) # Delete schema files -- a variation on change_schema_file def delete_schema_file(self, file_id): @@ -823,7 +828,7 @@ def delete_schema_source(self, schema_file, source_dict): self.deleted_manifest.sources[unique_id] = source schema_file.sources.remove(unique_id) self.schedule_referencing_nodes_for_parsing(unique_id) - logger.debug(f"Partial parsing: deleted source {unique_id}") + fire_event(PartialParsingDeletedSource(unique_id=unique_id)) def delete_schema_macro_patch(self, schema_file, macro): # This is just macro patches that need to be reapplied @@ -851,7 +856,7 @@ def delete_schema_exposure(self, schema_file, exposure_dict): self.deleted_manifest.exposures[unique_id] = \ self.saved_manifest.exposures.pop(unique_id) schema_file.exposures.remove(unique_id) - logger.debug(f"Partial parsing: deleted exposure {unique_id}") + fire_event(PartialParsingDeletedExposure(unique_id=unique_id)) # metric are created only from schema files, so just delete # the metric. From 281d2491a5d6f1edd013cdc09481cac630b7da2b Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 2 Nov 2021 15:53:33 -0500 Subject: [PATCH 11/31] task call sites part 1 (#4183) * add struct logging to base.py * struct logging in run_operation * add struct logging to base * add struct logging to clean * add struct logging to debug * add struct logging to deps * fix errors * add struct logging to run.py * fixed flake error * add struct logging to geneerate * added debug level stack trace * fixed flake error * added struct logging to compile * added struct logging to freshness * cleaned up errors * resolved bug that broke everything * removed accidental import * fixed bug with unused args --- core/dbt/events/types.py | 408 ++++++++++++++++++++++++++++++++- core/dbt/task/base.py | 71 ++---- core/dbt/task/clean.py | 15 +- core/dbt/task/compile.py | 6 +- core/dbt/task/debug.py | 11 +- core/dbt/task/deps.py | 25 +- core/dbt/task/freshness.py | 5 +- core/dbt/task/generate.py | 22 +- core/dbt/task/run.py | 22 +- core/dbt/task/run_operation.py | 19 +- 10 files changed, 475 insertions(+), 129 deletions(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 222dccd5142..2827fc3d95e 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod from dataclasses import dataclass from typing import Any, List, Optional, Dict -from dbt.ui import warning_tag +from dbt import ui # types to represent log levels @@ -305,6 +305,28 @@ def cli_msg(self) -> str: return self.msg +@dataclass +class NewConnectionOpening(DebugLevel, CliEventABC): + connection_state: str + + def cli_msg(self) -> str: + return f"Opening a new connection, currently in state {self.connection_state}" + + +class TimingInfoCollected(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "finished collecting timing info" + + +@dataclass +class MergedFromState(DebugLevel, CliEventABC): + nbr_merged: int + sample: List + + def cli_msg(self) -> str: + return f"Merged {self.nbr_merged} items from state (sample: {self.sample})" + + @dataclass class MissingProfileTarget(InfoLevel, CliEventABC): profile_name: str @@ -336,25 +358,52 @@ def cli_msg(self) -> str: @dataclass -class NewConnectionOpening(DebugLevel, CliEventABC): - connection_state: str +class CatchRunException(ShowException, DebugLevel, CliEventABC): + build_path: Any + exc: Exception def cli_msg(self) -> str: - return f"Opening a new connection, currently in state {self.connection_state}" + INTERNAL_ERROR_STRING = """This is an error in dbt. Please try again. If the \ + error persists, open an issue at https://github.com/dbt-labs/dbt-core + """.strip() + prefix = f'Internal error executing {self.build_path}' + error = "{prefix}\n{error}\n\n{note}".format( + prefix=ui.red(prefix), + error=str(self.exc).strip(), + note=INTERNAL_ERROR_STRING + ) + return error -class TimingInfoCollected(DebugLevel, CliEventABC): +@dataclass +class HandleInternalException(ShowException, DebugLevel, CliEventABC): + exc: Exception + def cli_msg(self) -> str: - return "finished collecting timing info" + return str(self.exc) @dataclass -class MergedFromState(DebugLevel, CliEventABC): - nbr_merged: int - sample: List +class MessageHandleGenericException(ErrorLevel, CliEventABC): + build_path: str + unique_id: str + exc: Exception def cli_msg(self) -> str: - return f"Merged {self.nbr_merged} items from state (sample: {self.sample})" + node_description = self.build_path + if node_description is None: + node_description = self.unique_id + prefix = "Unhandled error while executing {}".format(node_description) + return "{prefix}\n{error}".format( + prefix=ui.red(prefix), + error=str(self.exc).strip() + ) + + +@dataclass +class DetailsHandleGenericException(ShowException, DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return '' @dataclass @@ -636,7 +685,7 @@ class InvalidDisabledSourceInTestNode(WarnLevel, CliEventABC): msg: str def cli_msg(self) -> str: - return warning_tag(self.msg) + return ui.warning_tag(self.msg) @dataclass @@ -644,7 +693,305 @@ class InvalidRefInTestNode(WarnLevel, CliEventABC): msg: str def cli_msg(self) -> str: - return warning_tag(self.msg) + return ui.warning_tag(self.msg) + + +@dataclass +class RunningOperationCaughtError(ErrorLevel, CliEventABC): + exc: Exception + + def cli_msg(self) -> str: + return f'Encountered an error while running operation: {self.exc}' + + +@dataclass +class RunningOperationUncaughtError(ErrorLevel, CliEventABC): + exc: Exception + + def cli_msg(self) -> str: + return f'Encountered an error while running operation: {self.exc}' + + +class DbtProjectError(ErrorLevel, CliEventABC): + def cli_msg(self) -> str: + return "Encountered an error while reading the project:" + + +@dataclass +class DbtProjectErrorException(ErrorLevel, CliEventABC): + exc: Exception + + def cli_msg(self) -> str: + return f" ERROR: {str(self.exc)}" + + +class DbtProfileError(ErrorLevel, CliEventABC): + def cli_msg(self) -> str: + return "Encountered an error while reading profiles:" + + +@dataclass +class DbtProfileErrorException(ErrorLevel, CliEventABC): + exc: Exception + + def cli_msg(self) -> str: + return f" ERROR: {str(self.exc)}" + + +class ProfileListTitle(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Defined profiles:" + + +@dataclass +class ListSingleProfile(InfoLevel, CliEventABC): + profile: str + + def cli_msg(self) -> str: + return f" - {self.profile}" + + +class NoDefinedProfiles(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "There are no profiles defined in your profiles.yml file" + + +class ProfileHelpMessage(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + PROFILES_HELP_MESSAGE = """ +For more information on configuring profiles, please consult the dbt docs: + +https://docs.getdbt.com/docs/configure-your-profile +""" + return PROFILES_HELP_MESSAGE + + +@dataclass +class CatchableExceptionOnRun(ShowException, DebugLevel, CliEventABC): + exc: Exception + + def cli_msg(self) -> str: + return str(self.exc) + + +@dataclass +class InternalExceptionOnRun(DebugLevel, CliEventABC): + build_path: str + exc: Exception + + def cli_msg(self) -> str: + prefix = 'Internal error executing {}'.format(self.build_path) + + INTERNAL_ERROR_STRING = """This is an error in dbt. Please try again. If \ +the error persists, open an issue at https://github.com/dbt-labs/dbt-core +""".strip() + + return "{prefix}\n{error}\n\n{note}".format( + prefix=ui.red(prefix), + error=str(self.exc).strip(), + note=INTERNAL_ERROR_STRING + ) + + +# This prints the stack trace at the debug level while allowing just the nice exception message +# at the error level - or whatever other level chosen. Used in multiple places. +@dataclass +class PrintDebugStackTrace(ShowException, DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "" + + +@dataclass +class GenericExceptionOnRun(ErrorLevel, CliEventABC): + build_path: str + unique_id: str + exc: Exception + + def cli_msg(self) -> str: + node_description = self.build_path + if node_description is None: + node_description = self.unique_id + prefix = "Unhandled error while executing {}".format(node_description) + return "{prefix}\n{error}".format( + prefix=ui.red(prefix), + error=str(self.exc).strip() + ) + + +@dataclass +class NodeConnectionReleaseError(ShowException, DebugLevel, CliEventABC): + node_name: str + exc: Exception + + def cli_msg(self) -> str: + return ('Error releasing connection for node {}: {!s}' + .format(self.node_name, self.exc)) + + +@dataclass +class CheckCleanPath(InfoLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Checking {self.path}/*" + + +@dataclass +class ConfirmCleanPath(InfoLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Cleaned {self.path}/*" + + +@dataclass +class ProtectedCleanPath(InfoLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"ERROR: not cleaning {self.path}/* because it is protected" + + +class FinishedCleanPaths(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Finished cleaning all paths." + + +@dataclass +class OpenCommand(InfoLevel, CliEventABC): + open_cmd: str + profiles_dir: str + + def cli_msg(self) -> str: + PROFILE_DIR_MESSAGE = """To view your profiles.yml file, run: + +{open_cmd} {profiles_dir}""" + message = PROFILE_DIR_MESSAGE.format( + open_cmd=self.open_cmd, + profiles_dir=self.profiles_dir + ) + + return message + + +class DepsNoPackagesFound(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return 'Warning: No packages were found in packages.yml' + + +@dataclass +class DepsStartPackageInstall(InfoLevel, CliEventABC): + package: str + + def cli_msg(self) -> str: + return f"Installing {self.package}" + + +@dataclass +class DepsInstallInfo(InfoLevel, CliEventABC): + version_name: str + + def cli_msg(self) -> str: + return f" Installed from {self.version_name}" + + +@dataclass +class DepsUpdateAvailable(InfoLevel, CliEventABC): + version_latest: str + + def cli_msg(self) -> str: + return f" Updated version available: {self.version_latest}" + + +class DepsUTD(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return " Up to date!" + + +@dataclass +class DepsListSubdirectory(InfoLevel, CliEventABC): + subdirectory: str + + def cli_msg(self) -> str: + return f" and subdirectory {self.subdirectory}" + + +@dataclass +class DepsNotifyUpdatesAvailable(InfoLevel, CliEventABC): + packages: List[str] + + def cli_msg(self) -> str: + return ('\nUpdates available for packages: {} \ + \nUpdate your versions in packages.yml, then run dbt deps'.format(self.packages)) + + +@dataclass +class DatabaseErrorRunning(InfoLevel, CliEventABC): + hook_type: str + + def cli_msg(self) -> str: + return f"Database error while running {self.hook_type}" + + +class EmptyLine(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return '' + + +@dataclass +class HooksRunning(InfoLevel, CliEventABC): + num_hooks: int + hook_type: str + + def cli_msg(self) -> str: + plural = 'hook' if self.num_hooks == 1 else 'hooks' + return f"Running {self.num_hooks} {self.hook_type} {plural}" + + +@dataclass +class HookFinished(InfoLevel, CliEventABC): + stat_line: str + execution: str + + def cli_msg(self) -> str: + return f"Finished running {self.stat_line}{self.execution}." + + +@dataclass +class WriteCatalogFailure(ErrorLevel, CliEventABC): + num_exceptions: int + + def cli_msg(self) -> str: + return (f"dbt encountered {self.num_exceptions} failure{(self.num_exceptions != 1) * 's'} " + "while writing the catalog") + + +@dataclass +class CatalogWritten(InfoLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Catalog written to {self.path}" + + +class CannotGenerateDocs(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "compile failed, cannot generate docs" + + +class BuildingCatalog(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Building catalog" + + +class CompileComplete(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Done." + + +class FreshnessCheckComplete(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Done." # since mypy doesn't run on every file we need to suggest to mypy that every @@ -725,3 +1072,40 @@ def cli_msg(self) -> str: PartialParsingDeletedExposure(unique_id='') InvalidDisabledSourceInTestNode(msg='') InvalidRefInTestNode(msg='') + MessageHandleGenericException(build_path='', unique_id='', exc=Exception('')) + DetailsHandleGenericException() + RunningOperationCaughtError(exc=Exception('')) + RunningOperationUncaughtError(exc=Exception('')) + DbtProjectError() + DbtProjectErrorException(exc=Exception('')) + DbtProfileError() + DbtProfileErrorException(exc=Exception('')) + ProfileListTitle() + ListSingleProfile(profile='') + NoDefinedProfiles() + ProfileHelpMessage() + CatchableExceptionOnRun(exc=Exception('')) + InternalExceptionOnRun(build_path='', exc=Exception('')) + GenericExceptionOnRun(build_path='', unique_id='', exc=Exception('')) + NodeConnectionReleaseError(node_name='', exc=Exception('')) + CheckCleanPath(path='') + ConfirmCleanPath(path='') + ProtectedCleanPath(path='') + FinishedCleanPaths() + OpenCommand(open_cmd='', profiles_dir='') + DepsNoPackagesFound() + DepsStartPackageInstall(package='') + DepsInstallInfo(version_name='') + DepsUpdateAvailable(version_latest='') + DepsListSubdirectory(subdirectory='') + DepsNotifyUpdatesAvailable(packages=[]) + DatabaseErrorRunning(hook_type='') + EmptyLine() + HooksRunning(num_hooks=0, hook_type='') + HookFinished(stat_line='', execution='') + WriteCatalogFailure(num_exceptions=0) + CatalogWritten(path='') + CannotGenerateDocs() + BuildingCatalog() + CompileComplete() + FreshnessCheckComplete() diff --git a/core/dbt/task/base.py b/core/dbt/task/base.py index 5b4b6ab594d..fc21c8d1d2d 100644 --- a/core/dbt/task/base.py +++ b/core/dbt/task/base.py @@ -1,12 +1,10 @@ import os import threading import time -import traceback from abc import ABCMeta, abstractmethod from typing import Type, Union, Dict, Any, Optional from dbt import tracking -from dbt import ui from dbt import flags from dbt.contracts.graph.manifest import Manifest from dbt.contracts.results import ( @@ -16,7 +14,14 @@ NotImplementedException, CompilationException, RuntimeException, InternalException ) -from dbt.logger import GLOBAL_LOGGER as logger, log_manager +from dbt.logger import log_manager +from dbt.events.functions import fire_event +from dbt.events.types import ( + DbtProjectError, DbtProjectErrorException, DbtProfileError, DbtProfileErrorException, + ProfileListTitle, ListSingleProfile, NoDefinedProfiles, ProfileHelpMessage, + CatchableExceptionOnRun, InternalExceptionOnRun, GenericExceptionOnRun, + NodeConnectionReleaseError, PrintDebugStackTrace +) from .printer import print_skip_caused_by_error, print_skip_line @@ -47,13 +52,6 @@ def read_profiles(profiles_dir=None): return profiles -PROFILES_HELP_MESSAGE = """ -For more information on configuring profiles, please consult the dbt docs: - -https://docs.getdbt.com/docs/configure-your-profile -""" - - class BaseTask(metaclass=ABCMeta): ConfigType: Union[Type[NoneConfig], Type[Project]] = NoneConfig @@ -84,28 +82,27 @@ def from_args(cls, args): # for the clean or deps tasks config = cls.ConfigType.from_args(args) except dbt.exceptions.DbtProjectError as exc: - logger.error("Encountered an error while reading the project:") - logger.error(" ERROR: {}".format(str(exc))) + fire_event(DbtProjectError()) + fire_event(DbtProjectErrorException(exc=exc)) tracking.track_invalid_invocation( args=args, result_type=exc.result_type) raise dbt.exceptions.RuntimeException('Could not run dbt') from exc except dbt.exceptions.DbtProfileError as exc: - logger.error("Encountered an error while reading profiles:") - logger.error(" ERROR {}".format(str(exc))) + fire_event(DbtProfileError()) + fire_event(DbtProfileErrorException(exc=exc)) all_profiles = read_profiles(flags.PROFILES_DIR).keys() if len(all_profiles) > 0: - logger.info("Defined profiles:") + fire_event(ProfileListTitle()) for profile in all_profiles: - logger.info(" - {}".format(profile)) + fire_event(ListSingleProfile(profile=profile)) else: - logger.info("There are no profiles defined in your " - "profiles.yml file") + fire_event(NoDefinedProfiles()) - logger.info(PROFILES_HELP_MESSAGE) + fire_event(ProfileHelpMessage()) tracking.track_invalid_invocation( args=args, @@ -167,11 +164,6 @@ def from_args(cls, args): return super().from_args(args) -INTERNAL_ERROR_STRING = """This is an error in dbt. Please try again. If \ -the error persists, open an issue at https://github.com/dbt-labs/dbt-core -""".strip() - - class ExecutionContext: """During execution and error handling, dbt makes use of mutable state: timing information and the newest (compiled vs executed) form of the node. @@ -309,33 +301,19 @@ def _handle_catchable_exception(self, e, ctx): if e.node is None: e.add_node(ctx.node) - logger.debug(str(e), exc_info=True) + fire_event(CatchableExceptionOnRun(exc=e)) return str(e) def _handle_internal_exception(self, e, ctx): - build_path = self.node.build_path - prefix = 'Internal error executing {}'.format(build_path) - - error = "{prefix}\n{error}\n\n{note}".format( - prefix=ui.red(prefix), - error=str(e).strip(), - note=INTERNAL_ERROR_STRING - ) - logger.debug(error, exc_info=True) + fire_event(InternalExceptionOnRun(build_path=self.node.build_path, exc=e)) return str(e) def _handle_generic_exception(self, e, ctx): - node_description = self.node.build_path - if node_description is None: - node_description = self.node.unique_id - prefix = "Unhandled error while executing {}".format(node_description) - error = "{prefix}\n{error}".format( - prefix=ui.red(prefix), - error=str(e).strip() - ) + fire_event(GenericExceptionOnRun(build_path=self.node.build_path, + unique_id=self.node.unique_id, + exc=e)) + fire_event(PrintDebugStackTrace()) - logger.error(error) - logger.debug('', exc_info=True) return str(e) def handle_exception(self, e, ctx): @@ -385,10 +363,7 @@ def _safe_release_connection(self): try: self.adapter.release_connection() except Exception as exc: - logger.debug( - 'Error releasing connection for node {}: {!s}\n{}' - .format(self.node.name, exc, traceback.format_exc()) - ) + fire_event(NodeConnectionReleaseError(node_name=self.node.name, exc=exc)) return str(exc) return None diff --git a/core/dbt/task/clean.py b/core/dbt/task/clean.py index 218d8cb4eba..6dfc8b3328b 100644 --- a/core/dbt/task/clean.py +++ b/core/dbt/task/clean.py @@ -4,7 +4,10 @@ from dbt import deprecations from dbt.task.base import BaseTask, move_to_nearest_project_dir -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + CheckCleanPath, ConfirmCleanPath, ProtectedCleanPath, FinishedCleanPaths +) from dbt.config import UnsetProfileConfig @@ -38,11 +41,11 @@ def run(self): self.config.packages_install_path != 'dbt_modules'): deprecations.warn('install-packages-path') for path in self.config.clean_targets: - logger.info("Checking {}/*".format(path)) + fire_event(CheckCleanPath(path=path)) if not self.__is_protected_path(path): shutil.rmtree(path, True) - logger.info(" Cleaned {}/*".format(path)) + fire_event(ConfirmCleanPath(path=path)) else: - logger.info("ERROR: not cleaning {}/* because it is " - "protected".format(path)) - logger.info("Finished cleaning all paths.") + fire_event(ProtectedCleanPath(path=path)) + + fire_event(FinishedCleanPaths()) diff --git a/core/dbt/task/compile.py b/core/dbt/task/compile.py index 464a4aa8c44..dd36e52416d 100644 --- a/core/dbt/task/compile.py +++ b/core/dbt/task/compile.py @@ -1,11 +1,13 @@ import threading + from .runnable import GraphRunnableTask from .base import BaseRunner from dbt.contracts.results import RunStatus, RunResult from dbt.exceptions import InternalException from dbt.graph import ResourceTypeSelector -from dbt.logger import print_timestamped_line +from dbt.events.functions import fire_event +from dbt.events.types import CompileComplete from dbt.node_types import NodeType @@ -53,4 +55,4 @@ def get_runner_type(self, _): return CompileRunner def task_end_messages(self, results): - print_timestamped_line('Done.') + fire_event(CompileComplete()) diff --git a/core/dbt/task/debug.py b/core/dbt/task/debug.py index 92c6f93e88f..58ae586352a 100644 --- a/core/dbt/task/debug.py +++ b/core/dbt/task/debug.py @@ -4,7 +4,8 @@ import sys from typing import Optional, Dict, Any, List -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import OpenCommand from dbt import flags import dbt.clients.system import dbt.exceptions @@ -100,13 +101,7 @@ def project_profile(self): def path_info(self): open_cmd = dbt.clients.system.open_dir_cmd() - - message = PROFILE_DIR_MESSAGE.format( - open_cmd=open_cmd, - profiles_dir=self.profiles_dir - ) - - logger.info(message) + fire_event(OpenCommand(open_cmd=open_cmd, profiles_dir=self.profiles_dir)) def run(self): if self.args.config_dir: diff --git a/core/dbt/task/deps.py b/core/dbt/task/deps.py index 04992bd550f..84d2a11dcad 100644 --- a/core/dbt/task/deps.py +++ b/core/dbt/task/deps.py @@ -7,7 +7,11 @@ from dbt.deps.base import downloads_directory from dbt.deps.resolver import resolve_packages -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + DepsNoPackagesFound, DepsStartPackageInstall, DepsUpdateAvailable, DepsUTD, + DepsInstallInfo, DepsListSubdirectory, DepsNotifyUpdatesAvailable +) from dbt.clients import system from dbt.task.base import BaseTask, move_to_nearest_project_dir @@ -45,7 +49,7 @@ def run(self): system.make_directory(self.config.packages_install_path) packages = self.config.packages.packages if not packages: - logger.info('Warning: No packages were found in packages.yml') + fire_event(DepsNoPackagesFound()) return with downloads_directory(): @@ -59,30 +63,25 @@ def run(self): source_type = package.source_type() version = package.get_version() - logger.info('Installing {}', package) + fire_event(DepsStartPackageInstall(package=package)) package.install(self.config, renderer) - logger.info(' Installed from {}', - package.nice_version_name()) + fire_event(DepsInstallInfo(version_name=package.nice_version_name())) if source_type == 'hub': version_latest = package.get_version_latest() if version_latest != version: packages_to_upgrade.append(package_name) - logger.info(' Updated version available: {}', - version_latest) + fire_event(DepsUpdateAvailable(version_latest=version_latest)) else: - logger.info(' Up to date!') + fire_event(DepsUTD()) if package.get_subdirectory(): - logger.info(' and subdirectory {}', - package.get_subdirectory()) + fire_event(DepsListSubdirectory(subdirectory=package.get_subdirectory())) self.track_package_install( package_name=package_name, source_type=source_type, version=version) if packages_to_upgrade: - logger.info('\nUpdates available for packages: {} \ - \nUpdate your versions in packages.yml, then run dbt deps', - packages_to_upgrade) + fire_event(DepsNotifyUpdatesAvailable(packages=packages_to_upgrade)) @classmethod def from_args(cls, args): diff --git a/core/dbt/task/freshness.py b/core/dbt/task/freshness.py index a0000399057..e8f765da8c8 100644 --- a/core/dbt/task/freshness.py +++ b/core/dbt/task/freshness.py @@ -16,7 +16,8 @@ SourceFreshnessResult, FreshnessStatus ) from dbt.exceptions import RuntimeException, InternalException -from dbt.logger import print_timestamped_line +from dbt.events.functions import fire_event +from dbt.events.types import FreshnessCheckComplete from dbt.node_types import NodeType from dbt.graph import ResourceTypeSelector @@ -170,4 +171,4 @@ def task_end_messages(self, results): ): print_run_result_error(result) - print_timestamped_line('Done.') + fire_event(FreshnessCheckComplete()) diff --git a/core/dbt/task/generate.py b/core/dbt/task/generate.py index de9134ed7fa..e405f54984a 100644 --- a/core/dbt/task/generate.py +++ b/core/dbt/task/generate.py @@ -16,7 +16,10 @@ ) from dbt.exceptions import InternalException from dbt.include.global_project import DOCS_INDEX_FILE_PATH -from dbt.logger import GLOBAL_LOGGER as logger, print_timestamped_line +from dbt.events.functions import fire_event +from dbt.events.types import ( + WriteCatalogFailure, CatalogWritten, CannotGenerateDocs, BuildingCatalog +) from dbt.parser.manifest import ManifestLoader import dbt.utils import dbt.compilation @@ -203,9 +206,7 @@ def run(self) -> CatalogArtifact: if self.args.compile: compile_results = CompileTask.run(self) if any(r.status == NodeStatus.Error for r in compile_results): - print_timestamped_line( - 'compile failed, cannot generate docs' - ) + fire_event(CannotGenerateDocs()) return CatalogArtifact.from_results( nodes={}, sources={}, @@ -238,7 +239,7 @@ def run(self) -> CatalogArtifact: adapter = get_adapter(self.config) with adapter.connection_named('generate_catalog'): - print_timestamped_line("Building catalog") + fire_event(BuildingCatalog()) catalog_table, exceptions = adapter.get_catalog(self.manifest) catalog_data: List[PrimitiveDict] = [ @@ -267,15 +268,8 @@ def run(self) -> CatalogArtifact: self.write_manifest() if exceptions: - logger.error( - 'dbt encountered {} failure{} while writing the catalog' - .format(len(exceptions), (len(exceptions) != 1) * 's') - ) - - print_timestamped_line( - 'Catalog written to {}'.format(os.path.abspath(path)) - ) - + fire_event(WriteCatalogFailure(num_exceptions=len(exceptions))) + fire_event(CatalogWritten(path=os.path.abspath(path))) return results def get_catalog_results( diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index d04a108b242..e5a217349f8 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -32,14 +32,14 @@ RuntimeException, missing_materialization, ) +from dbt.events.functions import fire_event +from dbt.events.types import DatabaseErrorRunning, EmptyLine, HooksRunning, HookFinished from dbt.logger import ( - GLOBAL_LOGGER as logger, TextOnly, HookMetadata, UniqueID, TimestampNamed, DbtModelState, - print_timestamped_line, ) from dbt.graph import ResourceTypeSelector from dbt.hooks import get_hook_dict @@ -308,12 +308,10 @@ def run_hooks(self, adapter, hook_type: RunHookType, extra_context): return num_hooks = len(ordered_hooks) - plural = 'hook' if num_hooks == 1 else 'hooks' with TextOnly(): - print_timestamped_line("") - print_timestamped_line( - 'Running {} {} {}'.format(num_hooks, hook_type, plural) - ) + fire_event(EmptyLine()) + fire_event(HooksRunning(num_hooks=num_hooks, hook_type=hook_type)) + startctx = TimestampNamed('node_started_at') finishctx = TimestampNamed('node_finished_at') @@ -344,7 +342,7 @@ def run_hooks(self, adapter, hook_type: RunHookType, extra_context): self._total_executed += len(ordered_hooks) with TextOnly(): - print_timestamped_line("") + fire_event(EmptyLine()) def safe_run_hooks( self, adapter, hook_type: RunHookType, extra_context: Dict[str, Any] @@ -352,7 +350,7 @@ def safe_run_hooks( try: self.run_hooks(adapter, hook_type, extra_context) except RuntimeException: - logger.info("Database error while running {}".format(hook_type)) + fire_event(DatabaseErrorRunning(hook_type)) raise def print_results_line(self, results, execution_time): @@ -366,10 +364,8 @@ def print_results_line(self, results, execution_time): execution_time=execution_time) with TextOnly(): - print_timestamped_line("") - print_timestamped_line( - "Finished running {stat_line}{execution}." - .format(stat_line=stat_line, execution=execution)) + fire_event(EmptyLine()) + fire_event(HookFinished(stat_line=stat_line, execution=execution)) def _get_deferred_manifest(self) -> Optional[WritableManifest]: if not self.args.defer: diff --git a/core/dbt/task/run_operation.py b/core/dbt/task/run_operation.py index 23bff66c5b0..3c0dfe9c55f 100644 --- a/core/dbt/task/run_operation.py +++ b/core/dbt/task/run_operation.py @@ -10,7 +10,10 @@ from dbt.config.utils import parse_cli_vars from dbt.contracts.results import RunOperationResultsArtifact from dbt.exceptions import InternalException -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + RunningOperationCaughtError, RunningOperationUncaughtError, PrintDebugStackTrace +) class RunOperationTask(ManifestTask): @@ -53,18 +56,12 @@ def run(self) -> RunOperationResultsArtifact: try: self._run_unsafe() except dbt.exceptions.Exception as exc: - logger.error( - 'Encountered an error while running operation: {}' - .format(exc) - ) - logger.debug('', exc_info=True) + fire_event(RunningOperationCaughtError(exc=exc)) + fire_event(PrintDebugStackTrace()) success = False except Exception as exc: - logger.error( - 'Encountered an uncaught exception while running operation: {}' - .format(exc) - ) - logger.debug('', exc_info=True) + fire_event(RunningOperationUncaughtError(exc=exc)) + fire_event(PrintDebugStackTrace()) success = False else: success = True From d5134910465c26d003ec86a92c7c445d95728c2d Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Wed, 3 Nov 2021 14:38:54 -0400 Subject: [PATCH 12/31] Show Exception should trigger a stack trace (#4190) --- core/dbt/events/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 2827fc3d95e..c06fea22c7e 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -39,7 +39,7 @@ class ShowException(): # `cli_msg` method of subclasses, this is a safe operation. # If that ever changes we'll want to reassess. def __post_init__(self): - self.exc_info: Any = None + self.exc_info: Any = True self.stack_info: Any = None self.extra: Any = None From 13f31aed902d81154c6a84746d55332973a09b31 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Thu, 4 Nov 2021 11:23:26 -0400 Subject: [PATCH 13/31] scrub the secrets (#4203) scrub secrets in event module --- core/dbt/events/functions.py | 39 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index 5c97ed7fb32..cdd846ac4ec 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -2,6 +2,24 @@ import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger from dbt.events.history import EVENT_HISTORY from dbt.events.types import CliEventABC, Event, ShowException +import os +from typing import List + + +def env_secrets() -> List[str]: + return [ + v for k, v in os.environ.items() + if k.startswith(logger.SECRET_ENV_PREFIX) + ] + + +def scrub_secrets(msg: str, secrets: List[str]) -> str: + scrubbed = msg + + for secret in secrets: + scrubbed = scrubbed.replace(secret, "*****") + + return scrubbed # top-level method for accessing the new eventing system @@ -11,49 +29,50 @@ def fire_event(e: Event) -> None: EVENT_HISTORY.append(e) if isinstance(e, CliEventABC): + msg = scrub_secrets(e.cli_msg(), env_secrets()) if e.level_tag() == 'test' and not isinstance(e, ShowException): # TODO after implmenting #3977 send to new test level - logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) + logger.GLOBAL_LOGGER.debug(logger.timestamped_line(msg)) elif e.level_tag() == 'test' and isinstance(e, ShowException): # TODO after implmenting #3977 send to new test level logger.GLOBAL_LOGGER.debug( - logger.timestamped_line(e.cli_msg()), + logger.timestamped_line(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) elif e.level_tag() == 'debug' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.debug(logger.timestamped_line(e.cli_msg())) + logger.GLOBAL_LOGGER.debug(logger.timestamped_line(msg)) elif e.level_tag() == 'debug' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.debug( - logger.timestamped_line(e.cli_msg()), + logger.timestamped_line(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) elif e.level_tag() == 'info' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.info(logger.timestamped_line(e.cli_msg())) + logger.GLOBAL_LOGGER.info(logger.timestamped_line(msg)) elif e.level_tag() == 'info' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.info( - logger.timestamped_line(e.cli_msg()), + logger.timestamped_line(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) elif e.level_tag() == 'warn' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.warning(logger.timestamped_line(e.cli_msg())) + logger.GLOBAL_LOGGER.warning(logger.timestamped_line(msg)) elif e.level_tag() == 'warn' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.warning( - logger.timestamped_line(e.cli_msg()), + logger.timestamped_line(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) elif e.level_tag() == 'error' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.error(logger.timestamped_line(e.cli_msg())) + logger.GLOBAL_LOGGER.error(logger.timestamped_line(msg)) elif e.level_tag() == 'error' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.error( - logger.timestamped_line(e.cli_msg()), + logger.timestamped_line(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra From 3cafc9e13f948bfd123d9b05022bc0f0fde85fb0 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 4 Nov 2021 10:30:02 -0500 Subject: [PATCH 14/31] task callsites: part 2 (#4188) * add struct logging to docs serve * remove merge fluff * struct logging to seed command * converting print to use structured logging * more structured logging print conversion * pulling apart formatting more * added struct logging by disecting printer.py * add struct logging to runnable * add struct logging to task init * fixed formatting * more formatting and moving things around --- core/dbt/events/format.py | 35 ++ core/dbt/events/types.py | 669 +++++++++++++++++++++++++++++++++++++ core/dbt/task/base.py | 44 +-- core/dbt/task/freshness.py | 62 +++- core/dbt/task/init.py | 52 +-- core/dbt/task/printer.py | 310 +++-------------- core/dbt/task/run.py | 61 +++- core/dbt/task/runnable.py | 33 +- core/dbt/task/seed.py | 54 ++- core/dbt/task/serve.py | 15 +- core/dbt/task/snapshot.py | 33 +- core/dbt/task/test.py | 58 +++- 12 files changed, 1049 insertions(+), 377 deletions(-) create mode 100644 core/dbt/events/format.py diff --git a/core/dbt/events/format.py b/core/dbt/events/format.py new file mode 100644 index 00000000000..5f0a61509a5 --- /dev/null +++ b/core/dbt/events/format.py @@ -0,0 +1,35 @@ +import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger +from dbt import ui +from typing import Optional + + +def format_fancy_output_line( + msg: str, status: str, index: Optional[int], + total: Optional[int], execution_time: Optional[float] = None, + truncate: bool = False +) -> str: + if index is None or total is None: + progress = '' + else: + progress = '{} of {} '.format(index, total) + # TODO: remove this formatting once we rip out all the old logger + prefix = "{timestamp} | {progress}{message}".format( + timestamp=logger.get_timestamp(), + progress=progress, + message=msg) + + truncate_width = ui.printer_width() - 3 + justified = prefix.ljust(ui.printer_width(), ".") + if truncate and len(justified) > truncate_width: + justified = justified[:truncate_width] + '...' + + if execution_time is None: + status_time = "" + else: + status_time = " in {execution_time:0.2f}s".format( + execution_time=execution_time) + + output = "{justified} [{status}{status_time}]".format( + justified=justified, status=status, status_time=status_time) + + return output diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index c06fea22c7e..813c8b65605 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -2,6 +2,9 @@ from dataclasses import dataclass from typing import Any, List, Optional, Dict from dbt import ui +from dbt import utils +from dbt.node_types import NodeType +from dbt.events.format import format_fancy_output_line # types to represent log levels @@ -994,6 +997,618 @@ def cli_msg(self) -> str: return "Done." +@dataclass +class ServingDocsPort(InfoLevel, CliEventABC): + address: str + port: int + + def cli_msg(self) -> str: + return f"Serving docs at {self.address}:{self.port}" + + +@dataclass +class ServingDocsAccessInfo(InfoLevel, CliEventABC): + port: str + + def cli_msg(self) -> str: + return f"To access from your browser, navigate to: http://localhost:{self.port}" + + +class ServingDocsExitInfo(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Press Ctrl+C to exit.\n\n" + + +@dataclass +class SeedHeader(InfoLevel, CliEventABC): + header: str + + def cli_msg(self) -> str: + return self.header + + +@dataclass +class SeedHeaderSeperator(InfoLevel, CliEventABC): + len_header: int + + def cli_msg(self) -> str: + return "-" * self.len_header + + +@dataclass +class RunResultWarning(WarnLevel, CliEventABC): + resource_type: str + node_name: str + path: str + + def cli_msg(self) -> str: + info = 'Warning' + return ui.yellow(f"{info} in {self.resource_type} {self.node_name} ({self.path})") + + +@dataclass +class RunResultFailure(ErrorLevel, CliEventABC): + resource_type: str + node_name: str + path: str + + def cli_msg(self) -> str: + info = 'Failure' + return ui.red(f"{info} in {self.resource_type} {self.node_name} ({self.path})") + + +@dataclass +class StatsLine(InfoLevel, CliEventABC): + stats: Dict + + def cli_msg(self) -> str: + stats_line = ("\nDone. PASS={pass} WARN={warn} ERROR={error} SKIP={skip} TOTAL={total}") + return stats_line.format(**self.stats) + + +@dataclass +class RunResultError(ErrorLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return f" {self.msg}" + + +@dataclass +class RunResultErrorNoMessage(ErrorLevel, CliEventABC): + status: str + + def cli_msg(self) -> str: + return f" Status: {self.status}" + + +@dataclass +class SQLCompiledPath(InfoLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f" compiled SQL at {self.path}" + + +@dataclass +class CheckNodeTestFailure(InfoLevel, CliEventABC): + relation_name: str + + def cli_msg(self) -> str: + msg = f"select * from {self.relation_name}" + border = '-' * len(msg) + return f" See test failures:\n {border}\n {msg}\n {border}" + + +@dataclass +class FirstRunResultError(ErrorLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return ui.yellow(self.msg) + + +@dataclass +class AfterFirstRunResultError(ErrorLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return self.msg + + +@dataclass +class EndOfRunSummary(InfoLevel, CliEventABC): + num_errors: int + num_warnings: int + keyboard_interrupt: bool = False + + def cli_msg(self) -> str: + error_plural = utils.pluralize(self.num_errors, 'error') + warn_plural = utils.pluralize(self.num_warnings, 'warning') + if self.keyboard_interrupt: + message = ui.yellow('Exited because of keyboard interrupt.') + elif self.num_errors > 0: + message = ui.red("Completed with {} and {}:".format( + error_plural, warn_plural)) + elif self.num_warnings > 0: + message = ui.yellow('Completed with {}:'.format(warn_plural)) + else: + message = ui.green('Completed successfully') + return message + + +@dataclass +class PrintStartLine(InfoLevel, CliEventABC): + description: str + index: int + total: int + + def cli_msg(self) -> str: + msg = "START {self.description}" + return format_fancy_output_line(msg=msg, status='RUN', index=self.index, total=self.total) + + +@dataclass +class PrintHookStartLine(InfoLevel, CliEventABC): + statement: str + index: int + total: int + truncate: bool + + def cli_msg(self) -> str: + msg = "START hook: {self.statement}" + return format_fancy_output_line(msg=msg, + status='RUN', + index=self.index, + total=self.total, + truncate=self.truncate) + + +@dataclass +class PrintHookEndLine(InfoLevel, CliEventABC): + statement: str + status: str + index: int + total: int + execution_time: int + truncate: bool + + def cli_msg(self) -> str: + msg = 'OK hook: {}'.format(self.statement) + return format_fancy_output_line(msg=msg, + status=ui.green(self.status), + index=self.index, + total=self.total, + execution_time=self.execution_time, + truncate=self.truncate) + + +@dataclass +class SkippingDetails(InfoLevel, CliEventABC): + resource_type: str + schema: str + node_name: str + index: int + total: int + + def cli_msg(self) -> str: + if self.resource_type in NodeType.refable(): + msg = f'SKIP relation {self.schema}.{self.node_name}' + else: + msg = f'SKIP {self.resource_type} {self.node_name}' + return format_fancy_output_line(msg=msg, + status=ui.yellow('SKIP'), + index=self.index, + total=self.total) + + +@dataclass +class PrintErrorTestResult(ErrorLevel, CliEventABC): + name: str + index: int + num_models: int + execution_time: int + + def cli_msg(self) -> str: + info = "ERROR" + msg = f"{info} {self.name}" + return format_fancy_output_line(msg=msg, + status=ui.red(info), + index=self.index, + total=self.num_models, + execution_time=self.execution_time) + + +@dataclass +class PrintPassTestResult(InfoLevel, CliEventABC): + name: str + index: int + num_models: int + execution_time: int + + def cli_msg(self) -> str: + info = "PASS" + msg = f"{info} {self.name}" + return format_fancy_output_line(msg=msg, + status=ui.green(info), + index=self.index, + total=self.num_models, + execution_time=self.execution_time) + + +@dataclass +class PrintWarnTestResult(WarnLevel, CliEventABC): + name: str + index: int + num_models: int + execution_time: int + failures: List[str] + + def cli_msg(self) -> str: + info = f'WARN {self.failures}' + msg = f"{info} {self.name}" + return format_fancy_output_line(msg=msg, + status=ui.yellow(info), + index=self.index, + total=self.num_models, + execution_time=self.execution_time) + + +@dataclass +class PrintFailureTestResult(ErrorLevel, CliEventABC): + name: str + index: int + num_models: int + execution_time: int + failures: List[str] + + def cli_msg(self) -> str: + info = f'FAIL {self.failures}' + msg = f"{info} {self.name}" + return format_fancy_output_line(msg=msg, + status=ui.red(info), + index=self.index, + total=self.num_models, + execution_time=self.execution_time) + + +@dataclass +class PrintSkipBecauseError(ErrorLevel, CliEventABC): + schema: str + relation: str + index: int + total: int + + def cli_msg(self) -> str: + msg = f'SKIP relation {self.schema}.{self.relation} due to ephemeral model error' + return format_fancy_output_line(msg=msg, + status=ui.red('ERROR SKIP'), + index=self.index, + total=self.total) + + +@dataclass +class PrintModelErrorResultLine(ErrorLevel, CliEventABC): + description: str + status: str + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = "ERROR creating" + msg = f"{info} {self.description}" + return format_fancy_output_line(msg=msg, + status=ui.red(self.status.upper()), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintModelResultLine(InfoLevel, CliEventABC): + description: str + status: str + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = "OK created" + msg = f"{info} {self.description}" + return format_fancy_output_line(msg=msg, + status=ui.green(self.status), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintSnapshotErrorResultLine(ErrorLevel, CliEventABC): + status: str + description: str + cfg: Dict + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = 'ERROR snapshotting' + msg = "{info} {description}".format(info=info, description=self.description, **self.cfg) + return format_fancy_output_line(msg=msg, + status=ui.red(self.status.upper()), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintSnapshotResultLine(InfoLevel, CliEventABC): + status: str + description: str + cfg: Dict + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = 'OK snapshotted' + msg = "{info} {description}".format(info=info, description=self.description, **self.cfg) + return format_fancy_output_line(msg=msg, + status=ui.green(self.status), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintSeedErrorResultLine(ErrorLevel, CliEventABC): + status: str + index: int + total: int + execution_time: int + schema: str + relation: str + + def cli_msg(self) -> str: + info = 'ERROR loading' + msg = f"{info} seed file {self.schema}.{self.relation}" + return format_fancy_output_line(msg=msg, + status=ui.red(self.status.upper()), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintSeedResultLine(InfoLevel, CliEventABC): + status: str + index: int + total: int + execution_time: int + schema: str + relation: str + + def cli_msg(self) -> str: + info = 'OK loaded' + msg = f"{info} seed file {self.schema}.{self.relation}" + return format_fancy_output_line(msg=msg, + status=ui.green(self.status), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintHookEndErrorLine(ErrorLevel, CliEventABC): + source_name: str + table_name: str + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = 'ERROR' + msg = f"{info} freshness of {self.source_name}.{self.table_name}" + return format_fancy_output_line(msg=msg, + status=ui.red(info), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintHookEndErrorStaleLine(ErrorLevel, CliEventABC): + source_name: str + table_name: str + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = 'ERROR STALE' + msg = f"{info} freshness of {self.source_name}.{self.table_name}" + return format_fancy_output_line(msg=msg, + status=ui.red(info), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintHookEndWarnLine(WarnLevel, CliEventABC): + source_name: str + table_name: str + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = 'WARN' + msg = f"{info} freshness of {self.source_name}.{self.table_name}" + return format_fancy_output_line(msg=msg, + status=ui.yellow(info), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintHookEndPassLine(InfoLevel, CliEventABC): + source_name: str + table_name: str + index: int + total: int + execution_time: int + + def cli_msg(self) -> str: + info = 'PASS' + msg = f"{info} freshness of {self.source_name}.{self.table_name}" + return format_fancy_output_line(msg=msg, + status=ui.green(info), + index=self.index, + total=self.total, + execution_time=self.execution_time) + + +@dataclass +class PrintCancelLine(ErrorLevel, CliEventABC): + conn_name: str + + def cli_msg(self) -> str: + msg = 'CANCEL query {}'.format(self.conn_name) + return format_fancy_output_line(msg=msg, + status=ui.red('CANCEL'), + index=None, + total=None) + + +@dataclass +class DefaultSelector(InfoLevel, CliEventABC): + name: str + + def cli_msg(self) -> str: + return f"Using default selector {self.name}" + + +@dataclass +class NodeStart(DebugLevel, CliEventABC): + unique_id: str + + def cli_msg(self) -> str: + return f"Began running node {self.unique_id}" + + +@dataclass +class NodeFinished(DebugLevel, CliEventABC): + unique_id: str + + def cli_msg(self) -> str: + return f"Finished running node {self.unique_id}" + + +@dataclass +class QueryCancelationUnsupported(InfoLevel, CliEventABC): + type: str + + def cli_msg(self) -> str: + msg = (f"The {self.type} adapter does not support query " + "cancellation. Some queries may still be " + "running!") + return ui.yellow(msg) + + +@dataclass +class ConcurrencyLine(InfoLevel, CliEventABC): + concurrency_line: str + + def cli_msg(self) -> str: + return self.concurrency_line + + +@dataclass +class StarterProjectPath(DebugLevel, CliEventABC): + dir: str + + def cli_msg(self) -> str: + return f"Starter project path: {self.dir}" + + +@dataclass +class ConfigFolderDirectory(InfoLevel, CliEventABC): + dir: str + + def cli_msg(self) -> str: + return f"Creating dbt configuration folder at {self.dir}" + + +@dataclass +class NoSampleProfileFound(InfoLevel, CliEventABC): + adapter: str + + def cli_msg(self) -> str: + return f"No sample profile found for {self.adapter}." + + +@dataclass +class ProfileWrittenWithSample(InfoLevel, CliEventABC): + name: str + path: str + + def cli_msg(self) -> str: + return (f"Profile {self.name} written to {self.path} " + "using target's sample configuration. Once updated, you'll be able to " + "start developing with dbt.") + + +@dataclass +class ProfileWrittenWithTargetTemplateYAML(InfoLevel, CliEventABC): + name: str + path: str + + def cli_msg(self) -> str: + return (f"Profile {self.name} written to {self.path} using target's " + "profile_template.yml and your supplied values. Run 'dbt debug' to " + "validate the connection.") + + +@dataclass +class ProfileWrittenWithProjectTemplateYAML(InfoLevel, CliEventABC): + name: str + path: str + + def cli_msg(self) -> str: + return (f"Profile {self.name} written to {self.path} using project's " + "profile_template.yml and your supplied values. Run 'dbt debug' to " + "validate the connection.") + + +class SettingUpProfile(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Setting up your profile." + + +class InvalidProfileTemplateYAML(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Invalid profile_template.yml in project." + + +@dataclass +class ProjectNameAlreadyExists(InfoLevel, CliEventABC): + name: str + + def cli_msg(self) -> str: + return f"A project called {self.name} already exists here." + + +@dataclass +class GetAddendum(InfoLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return self.msg + + # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -1109,3 +1724,57 @@ def cli_msg(self) -> str: BuildingCatalog() CompileComplete() FreshnessCheckComplete() + ServingDocsPort(address='', port=0) + ServingDocsAccessInfo(port='') + ServingDocsExitInfo() + SeedHeader(header='') + SeedHeaderSeperator(len_header=0) + RunResultWarning(resource_type='', node_name='', path='') + RunResultFailure(resource_type='', node_name='', path='') + StatsLine(stats={}) + RunResultError(msg='') + RunResultErrorNoMessage(status='') + SQLCompiledPath(path='') + CheckNodeTestFailure(relation_name='') + FirstRunResultError(msg='') + AfterFirstRunResultError(msg='') + PrintStartLine(description='', index=0, total=0) + PrintHookStartLine(statement='', index=0, total=0, truncate=False) + PrintHookEndLine(statement='', status='', index=0, total=0, execution_time=0, truncate=False) + SkippingDetails(resource_type='', schema='', node_name='', index=0, total=0) + PrintErrorTestResult(name='', index=0, num_models=0, execution_time=0) + PrintPassTestResult(name='', index=0, num_models=0, execution_time=0) + PrintWarnTestResult(name='', index=0, num_models=0, execution_time=0, failures=[]) + PrintFailureTestResult(name='', index=0, num_models=0, execution_time=0, failures=[]) + PrintSkipBecauseError(schema='', relation='', index=0, total=0) + PrintModelErrorResultLine(description='', status='', index=0, total=0, execution_time=0) + PrintModelResultLine(description='', status='', index=0, total=0, execution_time=0) + PrintSnapshotErrorResultLine(status='', + description='', + cfg={}, + index=0, + total=0, + execution_time=0) + PrintSnapshotResultLine(status='', description='', cfg={}, index=0, total=0, execution_time=0) + PrintSeedErrorResultLine(status='', index=0, total=0, execution_time=0, schema='', relation='') + PrintSeedResultLine(status='', index=0, total=0, execution_time=0, schema='', relation='') + PrintHookEndErrorLine(source_name='', table_name='', index=0, total=0, execution_time=0) + PrintHookEndErrorStaleLine(source_name='', table_name='', index=0, total=0, execution_time=0) + PrintHookEndWarnLine(source_name='', table_name='', index=0, total=0, execution_time=0) + PrintHookEndPassLine(source_name='', table_name='', index=0, total=0, execution_time=0) + PrintCancelLine(conn_name='') + DefaultSelector(name='') + NodeStart(unique_id='') + NodeFinished(unique_id='') + QueryCancelationUnsupported(type='') + ConcurrencyLine(concurrency_line='') + StarterProjectPath(dir='') + ConfigFolderDirectory(dir='') + NoSampleProfileFound(adapter='') + ProfileWrittenWithSample(name='', path='') + ProfileWrittenWithTargetTemplateYAML(name='', path='') + ProfileWrittenWithProjectTemplateYAML(name='', path='') + SettingUpProfile() + InvalidProfileTemplateYAML() + ProjectNameAlreadyExists(name='') + GetAddendum(msg='') diff --git a/core/dbt/task/base.py b/core/dbt/task/base.py index fc21c8d1d2d..9bc6171dcda 100644 --- a/core/dbt/task/base.py +++ b/core/dbt/task/base.py @@ -20,10 +20,9 @@ DbtProjectError, DbtProjectErrorException, DbtProfileError, DbtProfileErrorException, ProfileListTitle, ListSingleProfile, NoDefinedProfiles, ProfileHelpMessage, CatchableExceptionOnRun, InternalExceptionOnRun, GenericExceptionOnRun, - NodeConnectionReleaseError, PrintDebugStackTrace + NodeConnectionReleaseError, PrintDebugStackTrace, SkippingDetails, PrintSkipBecauseError ) -from .printer import print_skip_caused_by_error, print_skip_line - +from .printer import print_run_result_error from dbt.adapters.factory import register_adapter from dbt.config import RuntimeConfig, Project @@ -309,9 +308,13 @@ def _handle_internal_exception(self, e, ctx): return str(e) def _handle_generic_exception(self, e, ctx): - fire_event(GenericExceptionOnRun(build_path=self.node.build_path, - unique_id=self.node.unique_id, - exc=e)) + fire_event( + GenericExceptionOnRun( + build_path=self.node.build_path, + unique_id=self.node.unique_id, + exc=e + ) + ) fire_event(PrintDebugStackTrace()) return str(e) @@ -394,14 +397,15 @@ def on_skip(self): # if this model was skipped due to an upstream ephemeral model # failure, print a special 'error skip' message. if self._skip_caused_by_ephemeral_failure(): - print_skip_caused_by_error( - self.node, - schema_name, - node_name, - self.node_index, - self.num_nodes, - self.skip_cause + fire_event( + PrintSkipBecauseError( + schema=schema_name, + relation=node_name, + index=self.node_index, + total=self.num_nodes + ) ) + print_run_result_error(result=self.skip_cause, newline=False) if self.skip_cause is None: # mypy appeasement raise InternalException( 'Skip cause not set but skip was somehow caused by ' @@ -415,12 +419,14 @@ def on_skip(self): self.skip_cause.node.unique_id) ) else: - print_skip_line( - self.node, - schema_name, - node_name, - self.node_index, - self.num_nodes + fire_event( + SkippingDetails( + resource_type=self.node.resource_type, + schema=schema_name, + node_name=node_name, + index=self.node_index, + total=self.num_nodes + ) ) node_result = self.skip_result(self.node, error_message) diff --git a/core/dbt/task/freshness.py b/core/dbt/task/freshness.py index e8f765da8c8..88ddbcae44f 100644 --- a/core/dbt/task/freshness.py +++ b/core/dbt/task/freshness.py @@ -4,8 +4,6 @@ from .base import BaseRunner from .printer import ( - print_start_line, - print_freshness_result_line, print_run_result_error, ) from .runnable import GraphRunnableTask @@ -17,7 +15,10 @@ ) from dbt.exceptions import RuntimeException, InternalException from dbt.events.functions import fire_event -from dbt.events.types import FreshnessCheckComplete +from dbt.events.types import ( + FreshnessCheckComplete, PrintStartLine, PrintHookEndErrorLine, + PrintHookEndErrorStaleLine, PrintHookEndWarnLine, PrintHookEndPassLine +) from dbt.node_types import NodeType from dbt.graph import ResourceTypeSelector @@ -35,10 +36,61 @@ def on_skip(self): def before_execute(self): description = 'freshness of {0.source_name}.{0.name}'.format(self.node) - print_start_line(description, self.node_index, self.num_nodes) + fire_event( + PrintStartLine( + description=description, + index=self.node_index, + total=self.num_nodes + ) + ) def after_execute(self, result): - print_freshness_result_line(result, self.node_index, self.num_nodes) + if hasattr(result, 'node'): + source_name = result.node.source_name + table_name = result.node.name + else: + source_name = result.source_name + table_name = result.table_name + if result.status == FreshnessStatus.RuntimeErr: + fire_event( + PrintHookEndErrorLine( + source_name=source_name, + table_name=table_name, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) + elif result.status == FreshnessStatus.Error: + fire_event( + PrintHookEndErrorStaleLine( + source_name=source_name, + table_name=table_name, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) + elif result.status == FreshnessStatus.Warn: + fire_event( + PrintHookEndWarnLine( + source_name=source_name, + table_name=table_name, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) + else: + fire_event( + PrintHookEndPassLine( + source_name=source_name, + table_name=table_name, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) def error_result(self, node, message, start_time, timing_info): return self._build_run_result( diff --git a/core/dbt/task/init.py b/core/dbt/task/init.py index ba54800fb9f..7446ba800aa 100644 --- a/core/dbt/task/init.py +++ b/core/dbt/task/init.py @@ -14,7 +14,12 @@ from dbt.version import _get_adapter_plugin_names from dbt.adapters.factory import load_plugin, get_include_paths -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + StarterProjectPath, ConfigFolderDirectory, NoSampleProfileFound, ProfileWrittenWithSample, + ProfileWrittenWithTargetTemplateYAML, ProfileWrittenWithProjectTemplateYAML, SettingUpProfile, + InvalidProfileTemplateYAML, ProjectNameAlreadyExists, GetAddendum +) from dbt.include.starter_project import PACKAGE_PATH as starter_project_directory @@ -55,7 +60,7 @@ class InitTask(BaseTask): def copy_starter_repo(self, project_name): - logger.debug("Starter project path: " + starter_project_directory) + fire_event(StarterProjectPath(dir=starter_project_directory)) shutil.copytree(starter_project_directory, project_name, ignore=shutil.ignore_patterns(*IGNORE_FILES)) @@ -63,8 +68,7 @@ def create_profiles_dir(self, profiles_dir: str) -> bool: """Create the user's profiles directory if it doesn't already exist.""" profiles_path = Path(profiles_dir) if not profiles_path.exists(): - msg = "Creating dbt configuration folder at {}" - logger.info(msg.format(profiles_dir)) + fire_event(ConfigFolderDirectory(dir=profiles_dir)) dbt.clients.system.make_directory(profiles_dir) return True return False @@ -79,7 +83,7 @@ def create_profile_from_sample(self, adapter: str, profile_name: str): sample_profiles_path = adapter_path / "sample_profiles.yml" if not sample_profiles_path.exists(): - logger.debug(f"No sample profile found for {adapter}.") + fire_event(NoSampleProfileFound(adapter=adapter)) else: with open(sample_profiles_path, "r") as f: sample_profile = f.read() @@ -98,10 +102,11 @@ def create_profile_from_sample(self, adapter: str, profile_name: str): else: with open(profiles_filepath, "w") as f: f.write(sample_profile) - logger.info( - f"Profile {profile_name} written to {profiles_filepath} " - "using target's sample configuration. Once updated, you'll be able to " - "start developing with dbt." + fire_event( + ProfileWrittenWithSample( + name=profile_name, + path=str(profiles_filepath) + ) ) def get_addendum(self, project_name: str, profiles_path: str) -> str: @@ -207,10 +212,11 @@ def create_profile_from_target(self, adapter: str, profile_name: str): profile_template = yaml.safe_load(f) self.create_profile_from_profile_template(profile_template, profile_name) profiles_filepath = Path(flags.PROFILES_DIR) / Path("profiles.yml") - logger.info( - f"Profile {profile_name} written to {profiles_filepath} using target's " - "profile_template.yml and your supplied values. Run 'dbt debug' to " - "validate the connection." + fire_event( + ProfileWrittenWithTargetTemplateYAML( + name=profile_name, + path=str(profiles_filepath) + ) ) else: # For adapters without a profile_template.yml defined, fallback on @@ -244,10 +250,11 @@ def create_profile_using_project_profile_template(self, profile_name): profile_template = yaml.safe_load(f) self.create_profile_from_profile_template(profile_template, profile_name) profiles_filepath = Path(flags.PROFILES_DIR) / Path("profiles.yml") - logger.info( - f"Profile {profile_name} written to {profiles_filepath} using project's " - "profile_template.yml and your supplied values. Run 'dbt debug' to " - "validate the connection." + fire_event( + ProfileWrittenWithProjectTemplateYAML( + name=profile_name, + path=str(profiles_filepath) + ) ) def ask_for_adapter_choice(self) -> str: @@ -276,7 +283,7 @@ def run(self): if in_project: # When dbt init is run inside an existing project, # just setup the user's profile. - logger.info("Setting up your profile.") + fire_event(SettingUpProfile()) profile_name = self.get_profile_name_from_current_project() # If a profile_template.yml exists in the project root, that effectively # overrides the profile_template.yml for the given target. @@ -288,7 +295,7 @@ def run(self): self.create_profile_using_project_profile_template(profile_name) return except Exception: - logger.info("Invalid profile_template.yml in project.") + fire_event(InvalidProfileTemplateYAML()) if not self.check_if_can_write_profile(profile_name=profile_name): return adapter = self.ask_for_adapter_choice() @@ -301,9 +308,7 @@ def run(self): project_name = click.prompt("What is the desired project name?") project_path = Path(project_name) if project_path.exists(): - logger.info( - f"A project called {project_name} already exists here." - ) + fire_event(ProjectNameAlreadyExists(name=project_name)) return self.copy_starter_repo(project_name) @@ -323,4 +328,5 @@ def run(self): self.create_profile_from_target( adapter, profile_name=project_name ) - logger.info(self.get_addendum(project_name, profiles_dir)) + msg = self.get_addendum(project_name, profiles_dir) + fire_event(GetAddendum(msg=msg)) diff --git a/core/dbt/task/printer.py b/core/dbt/task/printer.py index 4062c1466d2..945d4edcd6f 100644 --- a/core/dbt/task/printer.py +++ b/core/dbt/task/printer.py @@ -1,50 +1,22 @@ -from typing import Dict, Optional, Tuple, Callable +from typing import Dict from dbt.logger import ( - GLOBAL_LOGGER as logger, DbtStatusMessage, TextOnly, - get_timestamp, ) -from dbt.node_types import NodeType +from dbt.events.functions import fire_event +from dbt.events.types import ( + EmptyLine, RunResultWarning, RunResultFailure, StatsLine, RunResultError, + RunResultErrorNoMessage, SQLCompiledPath, CheckNodeTestFailure, FirstRunResultError, + AfterFirstRunResultError, EndOfRunSummary +) from dbt.tracking import InvocationProcessor -from dbt import ui from dbt import utils from dbt.contracts.results import ( - FreshnessStatus, NodeStatus, TestStatus + NodeStatus ) - - -def print_fancy_output_line( - msg: str, status: str, logger_fn: Callable, index: Optional[int], - total: Optional[int], execution_time: Optional[float] = None, - truncate: bool = False -) -> None: - if index is None or total is None: - progress = '' - else: - progress = '{} of {} '.format(index, total) - prefix = "{timestamp} | {progress}{message}".format( - timestamp=get_timestamp(), - progress=progress, - message=msg) - - truncate_width = ui.printer_width() - 3 - justified = prefix.ljust(ui.printer_width(), ".") - if truncate and len(justified) > truncate_width: - justified = justified[:truncate_width] + '...' - - if execution_time is None: - status_time = "" - else: - status_time = " in {execution_time:0.2f}s".format( - execution_time=execution_time) - - output = "{justified} [{status}{status_time}]".format( - justified=justified, status=status, status_time=status_time) - - logger_fn(output) +from dbt.node_types import NodeType def get_counts(flat_nodes) -> str: @@ -66,179 +38,6 @@ def get_counts(flat_nodes) -> str: return stat_line -def print_start_line(description: str, index: int, total: int) -> None: - msg = "START {}".format(description) - print_fancy_output_line(msg, 'RUN', logger.info, index, total) - - -def print_hook_start_line(statement: str, index: int, total: int) -> None: - msg = 'START hook: {}'.format(statement) - print_fancy_output_line( - msg, 'RUN', logger.info, index, total, truncate=True) - - -def print_hook_end_line( - statement: str, status: str, index: int, total: int, execution_time: float -) -> None: - msg = 'OK hook: {}'.format(statement) - # hooks don't fail into this path, so always green - print_fancy_output_line(msg, ui.green(status), logger.info, index, total, - execution_time=execution_time, truncate=True) - - -def print_skip_line( - node, schema: str, relation: str, index: int, num_models: int -) -> None: - if node.resource_type in NodeType.refable(): - msg = f'SKIP relation {schema}.{relation}' - else: - msg = f'SKIP {node.resource_type} {node.name}' - print_fancy_output_line( - msg, ui.yellow('SKIP'), logger.info, index, num_models) - - -def print_cancel_line(model) -> None: - msg = 'CANCEL query {}'.format(model) - print_fancy_output_line( - msg, ui.red('CANCEL'), logger.error, index=None, total=None) - - -def get_printable_result( - result, success: str, error: str) -> Tuple[str, str, Callable]: - if result.status == NodeStatus.Error: - info = 'ERROR {}'.format(error) - status = ui.red(result.status.upper()) - logger_fn = logger.error - else: - info = 'OK {}'.format(success) - status = ui.green(result.message) - logger_fn = logger.info - - return info, status, logger_fn - - -def print_test_result_line( - result, index: int, total: int -) -> None: - model = result.node - - if result.status == TestStatus.Error: - info = "ERROR" - color = ui.red - logger_fn = logger.error - elif result.status == TestStatus.Pass: - info = 'PASS' - color = ui.green - logger_fn = logger.info - elif result.status == TestStatus.Warn: - info = f'WARN {result.failures}' - color = ui.yellow - logger_fn = logger.warning - elif result.status == TestStatus.Fail: - info = f'FAIL {result.failures}' - color = ui.red - logger_fn = logger.error - else: - raise RuntimeError("unexpected status: {}".format(result.status)) - - print_fancy_output_line( - "{info} {name}".format(info=info, name=model.name), - color(info), - logger_fn, - index, - total, - result.execution_time) - - -def print_model_result_line( - result, description: str, index: int, total: int -) -> None: - info, status, logger_fn = get_printable_result( - result, 'created', 'creating') - - print_fancy_output_line( - "{info} {description}".format(info=info, description=description), - status, - logger_fn, - index, - total, - result.execution_time) - - -def print_snapshot_result_line( - result, description: str, index: int, total: int -) -> None: - model = result.node - - info, status, logger_fn = get_printable_result( - result, 'snapshotted', 'snapshotting') - cfg = model.config.to_dict(omit_none=True) - - msg = "{info} {description}".format( - info=info, description=description, **cfg) - print_fancy_output_line( - msg, - status, - logger_fn, - index, - total, - result.execution_time) - - -def print_seed_result_line(result, schema_name: str, index: int, total: int): - model = result.node - - info, status, logger_fn = get_printable_result(result, 'loaded', 'loading') - - print_fancy_output_line( - "{info} seed file {schema}.{relation}".format( - info=info, - schema=schema_name, - relation=model.alias), - status, - logger_fn, - index, - total, - result.execution_time) - - -def print_freshness_result_line(result, index: int, total: int) -> None: - if result.status == FreshnessStatus.RuntimeErr: - info = 'ERROR' - color = ui.red - logger_fn = logger.error - elif result.status == FreshnessStatus.Error: - info = 'ERROR STALE' - color = ui.red - logger_fn = logger.error - elif result.status == FreshnessStatus.Warn: - info = 'WARN' - color = ui.yellow - logger_fn = logger.warning - else: - info = 'PASS' - color = ui.green - logger_fn = logger.info - - if hasattr(result, 'node'): - source_name = result.node.source_name - table_name = result.node.name - else: - source_name = result.source_name - table_name = result.table_name - - msg = f"{info} freshness of {source_name}.{table_name}" - - print_fancy_output_line( - msg, - color(info), - logger_fn, - index, - total, - execution_time=result.execution_time - ) - - def interpret_run_result(result) -> str: if result.status in (NodeStatus.Error, NodeStatus.Fail): return 'error' @@ -266,8 +65,7 @@ def print_run_status_line(results) -> None: stats[result_type] += 1 stats['total'] += 1 - stats_line = "\nDone. PASS={pass} WARN={warn} ERROR={error} SKIP={skip} TOTAL={total}" # noqa - logger.info(stats_line.format(**stats)) + fire_event(StatsLine(stats=stats)) def print_run_result_error( @@ -275,81 +73,51 @@ def print_run_result_error( ) -> None: if newline: with TextOnly(): - logger.info("") + fire_event(EmptyLine()) if result.status == NodeStatus.Fail or ( is_warning and result.status == NodeStatus.Warn ): if is_warning: - color = ui.yellow - info = 'Warning' - logger_fn = logger.warning + fire_event( + RunResultWarning( + resource_type=result.node.resource_type, + node_name=result.node.name, + path=result.node.original_file_path + ) + ) else: - color = ui.red - info = 'Failure' - logger_fn = logger.error - logger_fn(color("{} in {} {} ({})").format( - info, - result.node.resource_type, - result.node.name, - result.node.original_file_path)) + fire_event( + RunResultFailure( + resource_type=result.node.resource_type, + node_name=result.node.name, + path=result.node.original_file_path + ) + ) if result.message: - logger.error(f" {result.message}") + fire_event(RunResultError(msg=result.message)) else: - logger.error(f" Status: {result.status}") + fire_event(RunResultErrorNoMessage(status=result.status)) if result.node.build_path is not None: with TextOnly(): - logger.info("") - logger.info(" compiled SQL at {}".format( - result.node.compiled_path)) + fire_event(EmptyLine()) + fire_event(SQLCompiledPath(path=result.node.compiled_path)) if result.node.should_store_failures: with TextOnly(): - logger.info("") - msg = f"select * from {result.node.relation_name}" - border = '-' * len(msg) - logger.info(f" See test failures:\n {border}\n {msg}\n {border}") + fire_event(EmptyLine()) + fire_event(CheckNodeTestFailure(relation_name=result.node.relation_name)) elif result.message is not None: first = True for line in result.message.split("\n"): if first: - logger.error(ui.yellow(line)) + fire_event(FirstRunResultError(msg=line)) first = False else: - logger.error(line) - - -def print_skip_caused_by_error( - model, schema: str, relation: str, index: int, num_models: int, result -) -> None: - msg = ('SKIP relation {}.{} due to ephemeral model error' - .format(schema, relation)) - print_fancy_output_line( - msg, ui.red('ERROR SKIP'), logger.error, index, num_models) - print_run_result_error(result, newline=False) - - -def print_end_of_run_summary( - num_errors: int, num_warnings: int, keyboard_interrupt: bool = False -) -> None: - error_plural = utils.pluralize(num_errors, 'error') - warn_plural = utils.pluralize(num_warnings, 'warning') - if keyboard_interrupt: - message = ui.yellow('Exited because of keyboard interrupt.') - elif num_errors > 0: - message = ui.red("Completed with {} and {}:".format( - error_plural, warn_plural)) - elif num_warnings > 0: - message = ui.yellow('Completed with {}:'.format(warn_plural)) - else: - message = ui.green('Completed successfully') - - with TextOnly(): - logger.info('') - logger.info('{}'.format(message)) + fire_event(AfterFirstRunResultError(msg=line)) def print_run_end_messages(results, keyboard_interrupt: bool = False) -> None: @@ -369,9 +137,15 @@ def print_run_end_messages(results, keyboard_interrupt: bool = False) -> None: warnings.append(r) with DbtStatusMessage(), InvocationProcessor(): - print_end_of_run_summary(len(errors), - len(warnings), - keyboard_interrupt) + with TextOnly(): + fire_event(EmptyLine()) + fire_event( + EndOfRunSummary( + num_errors=len(errors), + num_warnings=len(warnings), + keyboard_interrupt=keyboard_interrupt + ) + ) for error in errors: print_run_result_error(error, is_warning=False) diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index e5a217349f8..82ed8009903 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -8,10 +8,6 @@ from .compile import CompileRunner, CompileTask from .printer import ( - print_start_line, - print_model_result_line, - print_hook_start_line, - print_hook_end_line, print_run_end_messages, get_counts, ) @@ -33,7 +29,11 @@ missing_materialization, ) from dbt.events.functions import fire_event -from dbt.events.types import DatabaseErrorRunning, EmptyLine, HooksRunning, HookFinished +from dbt.events.types import ( + DatabaseErrorRunning, EmptyLine, HooksRunning, HookFinished, + PrintModelErrorResultLine, PrintModelResultLine, PrintStartLine, + PrintHookEndLine, PrintHookStartLine +) from dbt.logger import ( TextOnly, HookMetadata, @@ -173,13 +173,36 @@ def describe_node(self): self.get_node_representation()) def print_start_line(self): - description = self.describe_node() - print_start_line(description, self.node_index, self.num_nodes) + fire_event( + PrintStartLine( + description=self.describe_node(), + index=self.node_index, + total=self.num_nodes + ) + ) def print_result_line(self, result): description = self.describe_node() - print_model_result_line(result, description, self.node_index, - self.num_nodes) + if result.status == NodeStatus.Error: + fire_event( + PrintModelErrorResultLine( + description=description, + status=result.status, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) + else: + fire_event( + PrintModelResultLine( + description=description, + status=result.message, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) def before_execute(self): self.print_start_line() @@ -324,7 +347,14 @@ def run_hooks(self, adapter, hook_type: RunHookType, extra_context): hook_meta_ctx = HookMetadata(hook, self.index_offset(idx)) with UniqueID(hook.unique_id): with hook_meta_ctx, startctx: - print_hook_start_line(hook_text, idx, num_hooks) + fire_event( + PrintHookStartLine( + statement=hook_text, + index=idx, + total=num_hooks, + truncate=True + ) + ) status = 'OK' @@ -335,8 +365,15 @@ def run_hooks(self, adapter, hook_type: RunHookType, extra_context): self.ran_hooks.append(hook) with finishctx, DbtModelState({'node_status': 'passed'}): - print_hook_end_line( - hook_text, str(status), idx, num_hooks, timer.elapsed + fire_event( + PrintHookEndLine( + statement=hook_text, + status=str(status), + index=idx, + total=num_hooks, + execution_time=timer.elapsed, + truncate=True + ) ) self._total_executed += len(ordered_hooks) diff --git a/core/dbt/task/runnable.py b/core/dbt/task/runnable.py index 779b7428cf9..6a3af18a188 100644 --- a/core/dbt/task/runnable.py +++ b/core/dbt/task/runnable.py @@ -11,16 +11,13 @@ from .printer import ( print_run_result_error, print_run_end_messages, - print_cancel_line, ) -from dbt import ui from dbt.clients.system import write_file from dbt.task.base import ConfiguredTask from dbt.adapters.base import BaseRelation from dbt.adapters.factory import get_adapter from dbt.logger import ( - GLOBAL_LOGGER as logger, DbtProcessState, TextOnly, UniqueID, @@ -28,9 +25,12 @@ DbtModelState, ModelMetadata, NodeCount, - print_timestamped_line, ) - +from dbt.events.functions import fire_event +from dbt.events.types import ( + EmptyLine, PrintCancelLine, DefaultSelector, NodeStart, NodeFinished, + QueryCancelationUnsupported, ConcurrencyLine +) from dbt.contracts.graph.compiled import CompileResultNode from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.parsed import ParsedSourceDefinition @@ -133,7 +133,7 @@ def get_selection_spec(self) -> SelectionSpec: spec = self.config.get_selector(self.args.selector_name) elif not (self.selection_arg or self.exclusion_arg) and default_selector_name: # use pre-defined selector (--selector) with default: true - logger.info(f"Using default selector {default_selector_name}") + fire_event(DefaultSelector(name=default_selector_name)) spec = self.config.get_selector(default_selector_name) else: # use --select and --exclude args @@ -208,8 +208,7 @@ def call_runner(self, runner): index = self.index_offset(runner.node_index) extended_metadata = ModelMetadata(runner.node, index) with startctx, extended_metadata: - logger.debug('Began running node {}'.format( - runner.node.unique_id)) + fire_event(NodeStart(unique_id=runner.node.unique_id)) status: Dict[str, str] try: result = runner.run_with_hooks(self.manifest) @@ -217,8 +216,7 @@ def call_runner(self, runner): finally: finishctx = TimestampNamed('node_finished_at') with finishctx, DbtModelState(status): - logger.debug('Finished running node {}'.format( - runner.node.unique_id)) + fire_event(NodeFinished(unique_id=runner.node.unique_id)) fail_fast = flags.FAIL_FAST @@ -337,12 +335,7 @@ def _cancel_connections(self, pool): adapter = get_adapter(self.config) if not adapter.is_cancelable(): - msg = ("The {} adapter does not support query " - "cancellation. Some queries may still be " - "running!".format(adapter.type())) - - yellow = ui.COLOR_FG_YELLOW - print_timestamped_line(msg, yellow) + fire_event(QueryCancelationUnsupported(type=adapter.type)) else: with adapter.connection_named('master'): for conn_name in adapter.cancel_open_connections(): @@ -352,7 +345,7 @@ def _cancel_connections(self, pool): continue # if we don't have a manifest/don't have a node, print # anyway. - print_cancel_line(conn_name) + fire_event(PrintCancelLine(conn_name=conn_name)) pool.join() @@ -363,9 +356,9 @@ def execute_nodes(self): text = "Concurrency: {} threads (target='{}')" concurrency_line = text.format(num_threads, target_name) with NodeCount(self.num_nodes): - print_timestamped_line(concurrency_line) + fire_event(ConcurrencyLine(concurrency_line=concurrency_line)) with TextOnly(): - print_timestamped_line("") + fire_event(EmptyLine()) pool = ThreadPool(num_threads) try: @@ -453,7 +446,7 @@ def run(self): ) else: with TextOnly(): - logger.info("") + fire_event(EmptyLine()) selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) result = self.execute_with_hooks(selected_uids) diff --git a/core/dbt/task/seed.py b/core/dbt/task/seed.py index edd60f52c85..6609673a284 100644 --- a/core/dbt/task/seed.py +++ b/core/dbt/task/seed.py @@ -2,16 +2,20 @@ from .run import ModelRunner, RunTask from .printer import ( - print_start_line, - print_seed_result_line, print_run_end_messages, ) from dbt.contracts.results import RunStatus from dbt.exceptions import InternalException from dbt.graph import ResourceTypeSelector -from dbt.logger import GLOBAL_LOGGER as logger, TextOnly +from dbt.logger import TextOnly +from dbt.events.functions import fire_event +from dbt.events.types import ( + SeedHeader, SeedHeaderSeperator, EmptyLine, PrintSeedErrorResultLine, + PrintSeedResultLine, PrintStartLine +) from dbt.node_types import NodeType +from dbt.contracts.results import NodeStatus class SeedRunner(ModelRunner): @@ -19,8 +23,13 @@ def describe_node(self): return "seed file {}".format(self.get_node_representation()) def before_execute(self): - description = self.describe_node() - print_start_line(description, self.node_index, self.num_nodes) + fire_event( + PrintStartLine( + description=self.describe_node(), + index=self.node_index, + total=self.num_nodes + ) + ) def _build_run_model_result(self, model, context): result = super()._build_run_model_result(model, context) @@ -32,9 +41,29 @@ def compile(self, manifest): return self.node def print_result_line(self, result): - schema_name = self.node.schema - print_seed_result_line(result, schema_name, self.node_index, - self.num_nodes) + model = result.node + if result.status == NodeStatus.Error: + fire_event( + PrintSeedErrorResultLine( + status=result.status, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time, + schema=self.node.schema, + relation=model.alias + ) + ) + else: + fire_event( + PrintSeedResultLine( + status=result.message, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time, + schema=self.node.schema, + relation=model.alias + ) + ) class SeedTask(RunTask): @@ -75,12 +104,13 @@ def show_table(self, result): header = "Random sample of table: {}.{}".format(schema, alias) with TextOnly(): - logger.info("") - logger.info(header) - logger.info("-" * len(header)) + fire_event(EmptyLine()) + fire_event(SeedHeader(header=header)) + fire_event(SeedHeaderSeperator(len_header=len(header))) + rand_table.print_table(max_rows=10, max_columns=None) with TextOnly(): - logger.info("") + fire_event(EmptyLine()) def show_tables(self, results): for result in results: diff --git a/core/dbt/task/serve.py b/core/dbt/task/serve.py index 4d0e2473498..890241a9931 100644 --- a/core/dbt/task/serve.py +++ b/core/dbt/task/serve.py @@ -5,7 +5,8 @@ from dbt.include.global_project import DOCS_INDEX_FILE_PATH from http.server import SimpleHTTPRequestHandler from socketserver import TCPServer -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ServingDocsPort, ServingDocsAccessInfo, ServingDocsExitInfo from dbt.task.base import ConfiguredTask @@ -15,19 +16,17 @@ def run(self): os.chdir(self.config.target_path) port = self.args.port + address = '0.0.0.0' shutil.copyfile(DOCS_INDEX_FILE_PATH, 'index.html') - logger.info("Serving docs at 0.0.0.0:{}".format(port)) - logger.info( - "To access from your browser, navigate to: http://localhost:{}" - .format(port) - ) - logger.info("Press Ctrl+C to exit.\n\n") + fire_event(ServingDocsPort(address=address, port=port)) + fire_event(ServingDocsAccessInfo(port=port)) + fire_event(ServingDocsExitInfo()) # mypy doesn't think SimpleHTTPRequestHandler is ok here, but it is httpd = TCPServer( # type: ignore - ('0.0.0.0', port), + (address, port), SimpleHTTPRequestHandler # type: ignore ) # type: ignore diff --git a/core/dbt/task/snapshot.py b/core/dbt/task/snapshot.py index 4515f11c583..63b1faa7279 100644 --- a/core/dbt/task/snapshot.py +++ b/core/dbt/task/snapshot.py @@ -1,9 +1,11 @@ from .run import ModelRunner, RunTask -from .printer import print_snapshot_result_line from dbt.exceptions import InternalException +from dbt.events.functions import fire_event +from dbt.events.types import PrintSnapshotErrorResultLine, PrintSnapshotResultLine from dbt.graph import ResourceTypeSelector from dbt.node_types import NodeType +from dbt.contracts.results import NodeStatus class SnapshotRunner(ModelRunner): @@ -11,11 +13,30 @@ def describe_node(self): return "snapshot {}".format(self.get_node_representation()) def print_result_line(self, result): - print_snapshot_result_line( - result, - self.get_node_representation(), - self.node_index, - self.num_nodes) + model = result.node + cfg = model.config.to_dict(omit_none=True) + if result.status == NodeStatus.Error: + fire_event( + PrintSnapshotErrorResultLine( + status=result.status, + description=self.get_node_representation(), + cfg=cfg, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) + else: + fire_event( + PrintSnapshotResultLine( + status=result.message, + description=self.get_node_representation(), + cfg=cfg, + index=self.node_index, + total=self.num_nodes, + execution_time=result.execution_time + ) + ) class SnapshotTask(RunTask): diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index c172492dc34..81f62c36bf3 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -8,7 +8,6 @@ from .compile import CompileRunner from .run import RunTask -from .printer import print_start_line, print_test_result_line from dbt.contracts.graph.compiled import ( CompiledSingularTestNode, @@ -19,6 +18,11 @@ from dbt.contracts.results import TestStatus, PrimitiveDict, RunResult from dbt.context.providers import generate_runtime_model_context from dbt.clients.jinja import MacroGenerator +from dbt.events.functions import fire_event +from dbt.events.types import ( + PrintErrorTestResult, PrintPassTestResult, PrintWarnTestResult, + PrintFailureTestResult, PrintStartLine +) from dbt.exceptions import ( InternalException, invalid_bool_error, @@ -61,11 +65,57 @@ def describe_node(self): return "test {}".format(node_name) def print_result_line(self, result): - print_test_result_line(result, self.node_index, self.num_nodes) + model = result.node + + if result.status == TestStatus.Error: + fire_event( + PrintErrorTestResult( + name=model.name, + index=self.node_index, + num_models=self.num_nodes, + execution_time=result.execution_time + ) + ) + elif result.status == TestStatus.Pass: + fire_event( + PrintPassTestResult( + name=model.name, + index=self.node_index, + num_models=self.num_nodes, + execution_time=result.execution_time + ) + ) + elif result.status == TestStatus.Warn: + fire_event( + PrintWarnTestResult( + name=model.name, + index=self.node_index, + num_models=self.num_nodes, + execution_time=result.execution_time, + failures=result.failures + ) + ) + elif result.status == TestStatus.Fail: + fire_event( + PrintFailureTestResult( + name=model.name, + index=self.node_index, + num_models=self.num_nodes, + execution_time=result.execution_time, + failures=result.failures + ) + ) + else: + raise RuntimeError("unexpected status: {}".format(result.status)) def print_start_line(self): - description = self.describe_node() - print_start_line(description, self.node_index, self.num_nodes) + fire_event( + PrintStartLine( + description=self.describe_node(), + index=self.node_index, + total=self.num_nodes + ) + ) def before_execute(self): self.print_start_line() From e0b0edaeede419cdbe658bcefc84c8556a508745 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 4 Nov 2021 10:45:23 -0500 Subject: [PATCH 15/31] deps call sites (#4199) * add struct logging to base * add struct logging to git * add struct logging to deps * remove blank line * fixed stray merge error --- core/dbt/deps/base.py | 5 +++-- core/dbt/deps/git.py | 9 +++------ core/dbt/deps/local.py | 8 ++++---- core/dbt/events/types.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/core/dbt/deps/base.py b/core/dbt/deps/base.py index 64d0f0922e0..d7c49b7a4b2 100644 --- a/core/dbt/deps/base.py +++ b/core/dbt/deps/base.py @@ -6,7 +6,8 @@ from dbt.clients import system from dbt.contracts.project import ProjectPackageMetadata -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import DepsSetDownloadDirectory DOWNLOADS_PATH = None @@ -31,7 +32,7 @@ def downloads_directory(): remove_downloads = True system.make_directory(DOWNLOADS_PATH) - logger.debug("Set downloads directory='{}'".format(DOWNLOADS_PATH)) + fire_event(DepsSetDownloadDirectory(path=DOWNLOADS_PATH)) yield DOWNLOADS_PATH diff --git a/core/dbt/deps/git.py b/core/dbt/deps/git.py index c1470794c5b..cec6206a4a0 100644 --- a/core/dbt/deps/git.py +++ b/core/dbt/deps/git.py @@ -12,7 +12,8 @@ from dbt.exceptions import ( ExecutableError, warn_or_error, raise_dependency_error ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import EnsureGitInstalled from dbt import ui PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa @@ -81,11 +82,7 @@ def _checkout(self): ) except ExecutableError as exc: if exc.cmd and exc.cmd[0] == 'git': - logger.error( - 'Make sure git is installed on your machine. More ' - 'information: ' - 'https://docs.getdbt.com/docs/package-management' - ) + fire_event(EnsureGitInstalled()) raise return os.path.join(get_downloads_path(), dir_) diff --git a/core/dbt/deps/local.py b/core/dbt/deps/local.py index 9b7b21c3792..54a727bf655 100644 --- a/core/dbt/deps/local.py +++ b/core/dbt/deps/local.py @@ -6,7 +6,8 @@ ProjectPackageMetadata, LocalPackage, ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import DepsCreatingLocalSymlink, DepsSymlinkNotAvailable class LocalPackageMixin: @@ -57,12 +58,11 @@ def install(self, project, renderer): system.remove_file(dest_path) if can_create_symlink: - logger.debug(' Creating symlink to local dependency.') + fire_event(DepsCreatingLocalSymlink()) system.make_symlink(src_path, dest_path) else: - logger.debug(' Symlinks are not available on this ' - 'OS, copying dependency.') + fire_event(DepsSymlinkNotAvailable()) shutil.copytree(src_path, dest_path) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 813c8b65605..91d49b7863c 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1609,6 +1609,31 @@ def cli_msg(self) -> str: return self.msg +@dataclass +class DepsSetDownloadDirectory(DebugLevel, CliEventABC): + path: str + + def cli_msg(self) -> str: + return f"Set downloads directory='{self.path}'" + + +class EnsureGitInstalled(ErrorLevel, CliEventABC): + def cli_msg(self) -> str: + return ('Make sure git is installed on your machine. More ' + 'information: ' + 'https://docs.getdbt.com/docs/package-management') + + +class DepsCreatingLocalSymlink(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return ' Creating symlink to local dependency.' + + +class DepsSymlinkNotAvailable(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return ' Symlinks are not available on this OS, copying dependency.' + + # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -1778,3 +1803,7 @@ def cli_msg(self) -> str: InvalidProfileTemplateYAML() ProjectNameAlreadyExists(name='') GetAddendum(msg='') + DepsSetDownloadDirectory(path='') + EnsureGitInstalled() + DepsCreatingLocalSymlink() + DepsSymlinkNotAvailable() From d8b97c107796b42180cdb0ec9d4b5bfcafd5fdba Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 8 Nov 2021 14:18:26 -0600 Subject: [PATCH 16/31] call sites in core/dbt (excluding main.py) (#4202) * add struct logging to compilation * add struct logging to tracking * add struct logging to utils * add struct logging to exceptions * fixed some misc errors * updated to send raw ex, removed resulting circ dep --- core/dbt/compilation.py | 11 ++-- core/dbt/events/format.py | 19 ++++++- core/dbt/events/types.py | 108 ++++++++++++++++++++++++++++++++++++-- core/dbt/exceptions.py | 12 ++--- core/dbt/task/debug.py | 2 +- core/dbt/task/printer.py | 4 +- core/dbt/task/test.py | 7 +-- core/dbt/tracking.py | 23 ++++---- core/dbt/utils.py | 24 ++------- 9 files changed, 153 insertions(+), 57 deletions(-) diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index b5c11005131..3ceb854b072 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -26,9 +26,10 @@ RuntimeException, ) from dbt.graph import Graph -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import FoundStats, CompilingNode, WritingInjectedSQLForNode from dbt.node_types import NodeType -from dbt.utils import pluralize +from dbt.events.format import pluralize import dbt.tracking graph_file_name = 'graph.gpickle' @@ -69,7 +70,7 @@ def print_compile_stats(stats): if t in names ]) - logger.info("Found {}".format(stat_line)) + fire_event(FoundStats(stat_line=stat_line)) def _node_enabled(node: ManifestNode): @@ -367,7 +368,7 @@ def _compile_node( if extra_context is None: extra_context = {} - logger.debug("Compiling {}".format(node.unique_id)) + fire_event(CompilingNode(unique_id=node.unique_id)) data = node.to_dict(omit_none=True) data.update({ @@ -521,7 +522,7 @@ def _write_node(self, node: NonSourceCompiledNode) -> ManifestNode: if (not node.extra_ctes_injected or node.resource_type == NodeType.Snapshot): return node - logger.debug(f'Writing injected SQL for node "{node.unique_id}"') + fire_event(WritingInjectedSQLForNode(unique_id=node.unique_id)) if node.compiled_sql: node.compiled_path = node.write_node( diff --git a/core/dbt/events/format.py b/core/dbt/events/format.py index 5f0a61509a5..f476620cf94 100644 --- a/core/dbt/events/format.py +++ b/core/dbt/events/format.py @@ -1,6 +1,7 @@ import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger from dbt import ui -from typing import Optional +from typing import Optional, Union +from dbt.node_types import NodeType def format_fancy_output_line( @@ -33,3 +34,19 @@ def format_fancy_output_line( justified=justified, status=status, status_time=status_time) return output + + +def _pluralize(string: Union[str, NodeType]) -> str: + try: + convert = NodeType(string) + except ValueError: + return f'{string}s' + else: + return convert.pluralize() + + +def pluralize(count, string: Union[str, NodeType]): + pluralized: str = str(string) + if count != 1: + pluralized = _pluralize(string) + return f'{count} {pluralized}' diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 91d49b7863c..1a347914f53 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -2,9 +2,8 @@ from dataclasses import dataclass from typing import Any, List, Optional, Dict from dbt import ui -from dbt import utils from dbt.node_types import NodeType -from dbt.events.format import format_fancy_output_line +from dbt.events.format import format_fancy_output_line, pluralize # types to represent log levels @@ -1123,8 +1122,8 @@ class EndOfRunSummary(InfoLevel, CliEventABC): keyboard_interrupt: bool = False def cli_msg(self) -> str: - error_plural = utils.pluralize(self.num_errors, 'error') - warn_plural = utils.pluralize(self.num_warnings, 'warning') + error_plural = pluralize(self.num_errors, 'error') + warn_plural = pluralize(self.num_warnings, 'warning') if self.keyboard_interrupt: message = ui.yellow('Exited because of keyboard interrupt.') elif self.num_errors > 0: @@ -1634,6 +1633,94 @@ def cli_msg(self) -> str: return ' Symlinks are not available on this OS, copying dependency.' +@dataclass +class FoundStats(InfoLevel, CliEventABC): + stat_line: str + + def cli_msg(self) -> str: + return f"Found {self.stat_line}" + + +@dataclass +class CompilingNode(DebugLevel, CliEventABC): + unique_id: str + + def cli_msg(self) -> str: + return f"Compiling {self.unique_id}" + + +@dataclass +class WritingInjectedSQLForNode(DebugLevel, CliEventABC): + unique_id: str + + def cli_msg(self) -> str: + return f'Writing injected SQL for node "{self.unique_id}"' + + +class DisableTracking(WarnLevel, CliEventABC): + def cli_msg(self) -> str: + return "Error sending message, disabling tracking" + + +@dataclass +class SendingEvent(DebugLevel, CliEventABC): + kwargs: str + + def cli_msg(self) -> str: + return f"Sending event: {self.kwargs}" + + +class SendEventFailure(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "An error was encountered while trying to send an event" + + +class FlushEvents(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "Flushing usage events" + + +class FlushEventsFailure(DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "An error was encountered while trying to flush usage events" + + +class TrackingInitializeFailure(ShowException, DebugLevel, CliEventABC): + def cli_msg(self) -> str: + return "Got an exception trying to initialize tracking" + + +@dataclass +class RetryExternalCall(DebugLevel, CliEventABC): + attempt: int + max: int + + def cli_msg(self) -> str: + return f"Retrying external call. Attempt: {self.attempt} Max attempts: {self.max}" + + +@dataclass +class GeneralWarningMsg(WarnLevel, CliEventABC): + msg: str + log_fmt: str + + def cli_msg(self) -> str: + if self.log_fmt is not None: + return self.log_fmt.format(self.msg) + return self.msg + + +@dataclass +class GeneralWarningException(WarnLevel, CliEventABC): + exc: Exception + log_fmt: str + + def cli_msg(self) -> str: + if self.log_fmt is not None: + return self.log_fmt.format(str(self.exc)) + return str(self.exc) + + # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -1763,6 +1850,7 @@ def cli_msg(self) -> str: CheckNodeTestFailure(relation_name='') FirstRunResultError(msg='') AfterFirstRunResultError(msg='') + EndOfRunSummary(num_errors=0, num_warnings=0, keyboard_interrupt=False) PrintStartLine(description='', index=0, total=0) PrintHookStartLine(statement='', index=0, total=0, truncate=False) PrintHookEndLine(statement='', status='', index=0, total=0, execution_time=0, truncate=False) @@ -1807,3 +1895,15 @@ def cli_msg(self) -> str: EnsureGitInstalled() DepsCreatingLocalSymlink() DepsSymlinkNotAvailable() + FoundStats(stat_line='') + CompilingNode(unique_id='') + WritingInjectedSQLForNode(unique_id='') + DisableTracking() + SendingEvent(kwargs='') + SendEventFailure() + FlushEvents() + FlushEventsFailure() + TrackingInitializeFailure() + RetryExternalCall(attempt=0, max=0) + GeneralWarningMsg(msg='', log_fmt='') + GeneralWarningException(exc=Exception(''), log_fmt='') diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 2f3a3d5398e..3efc59686c4 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -2,8 +2,9 @@ import functools from typing import NoReturn, Optional, Mapping, Any -from dbt.logger import GLOBAL_LOGGER as logger from dbt.logger import get_secret_env +from dbt.events.functions import fire_event +from dbt.events.types import GeneralWarningMsg, GeneralWarningException from dbt.node_types import NodeType from dbt import flags from dbt.ui import line_wrap_message, warning_tag @@ -992,19 +993,14 @@ def warn_or_error(msg, node=None, log_fmt=None): if flags.WARN_ERROR: raise_compiler_error(msg, node) else: - if log_fmt is not None: - msg = log_fmt.format(msg) - logger.warning(msg) + fire_event(GeneralWarningMsg(msg=msg, log_fmt=log_fmt)) def warn_or_raise(exc, log_fmt=None): if flags.WARN_ERROR: raise exc else: - msg = str(exc) - if log_fmt is not None: - msg = log_fmt.format(msg) - logger.warning(msg) + fire_event(GeneralWarningException(exc=exc, log_fmt=log_fmt)) def warn(msg, node=None): diff --git a/core/dbt/task/debug.py b/core/dbt/task/debug.py index 58ae586352a..472779d53c7 100644 --- a/core/dbt/task/debug.py +++ b/core/dbt/task/debug.py @@ -16,7 +16,7 @@ from dbt.clients.yaml_helper import load_yaml_text from dbt.links import ProfileConfigDocs from dbt.ui import green, red -from dbt.utils import pluralize +from dbt.events.format import pluralize from dbt.version import get_installed_version from dbt.task.base import BaseTask, get_nearest_project_dir diff --git a/core/dbt/task/printer.py b/core/dbt/task/printer.py index 945d4edcd6f..c2b51196219 100644 --- a/core/dbt/task/printer.py +++ b/core/dbt/task/printer.py @@ -11,7 +11,7 @@ ) from dbt.tracking import InvocationProcessor -from dbt import utils +from dbt.events.format import pluralize from dbt.contracts.results import ( NodeStatus @@ -33,7 +33,7 @@ def get_counts(flat_nodes) -> str: counts[t] = counts.get(t, 0) + 1 stat_line = ", ".join( - [utils.pluralize(v, k) for k, v in counts.items()]) + [pluralize(v, k) for k, v in counts.items()]) return stat_line diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index 81f62c36bf3..e143a17af0b 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -1,7 +1,8 @@ from distutils.util import strtobool from dataclasses import dataclass -from dbt import utils +from dbt.utils import _coerce_decimal +from dbt.events.format import pluralize from dbt.dataclass_schema import dbtClassMixin import threading from typing import Union @@ -170,7 +171,7 @@ def execute_test( test_result_dct: PrimitiveDict = dict( zip( [column_name.lower() for column_name in table.column_names], - map(utils._coerce_decimal, table.rows[0]) + map(_coerce_decimal, table.rows[0]) ) ) TestResultData.validate(test_result_dct) @@ -181,7 +182,7 @@ def execute(self, test: CompiledTestNode, manifest: Manifest): severity = test.config.severity.upper() thread_id = threading.current_thread().name - num_errors = utils.pluralize(result.failures, 'result') + num_errors = pluralize(result.failures, 'result') status = None message = None failures = 0 diff --git a/core/dbt/tracking.py b/core/dbt/tracking.py index 8c6f29c32bb..76b3eea3807 100644 --- a/core/dbt/tracking.py +++ b/core/dbt/tracking.py @@ -3,7 +3,11 @@ from dbt.clients.yaml_helper import ( # noqa:F401 yaml, safe_load, Loader, Dumper, ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + DisableTracking, SendingEvent, SendEventFailure, FlushEvents, + FlushEventsFailure, TrackingInitializeFailure +) from dbt import version as dbt_version from dbt import flags from snowplow_tracker import Subject, Tracker, Emitter, logger as sp_logger @@ -50,7 +54,7 @@ def __init__(self): def handle_failure(num_ok, unsent): # num_ok will always be 0, unsent will always be 1 entry long, because # the buffer is length 1, so not much to talk about - logger.warning('Error sending message, disabling tracking') + fire_event(DisableTracking()) disable_tracking() def _log_request(self, request, payload): @@ -258,13 +262,11 @@ def track(user, *args, **kwargs): if user.do_not_track: return else: - logger.debug("Sending event: {}".format(kwargs)) + fire_event(SendingEvent(kwargs=kwargs)) try: tracker.track_struct_event(*args, **kwargs) except Exception: - logger.debug( - "An error was encountered while trying to send an event" - ) + fire_event(SendEventFailure()) def track_invocation_start(config=None, args=None): @@ -459,13 +461,11 @@ def track_partial_parser(options): def flush(): - logger.debug("Flushing usage events") + fire_event(FlushEvents()) try: tracker.flush() except Exception: - logger.debug( - "An error was encountered while trying to flush usage events" - ) + fire_event(FlushEventsFailure()) def disable_tracking(): @@ -487,8 +487,7 @@ def initialize_tracking(cookie_dir): try: active_user.initialize() except Exception: - logger.debug('Got an exception trying to initialize tracking', - exc_info=True) + fire_event(TrackingInitializeFailure()) active_user = User(None) diff --git a/core/dbt/utils.py b/core/dbt/utils.py index 9dae4ed3463..658ce227e83 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -14,7 +14,8 @@ from contextlib import contextmanager from dbt.exceptions import ConnectionException -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import RetryExternalCall from enum import Enum from typing_extensions import Protocol from typing import ( @@ -24,8 +25,6 @@ import dbt.exceptions -from dbt.node_types import NodeType - DECIMALS: Tuple[Type[Any], ...] try: import cdecimal # typing: ignore @@ -400,22 +399,6 @@ def translate_aliases( return translator.translate(kwargs) -def _pluralize(string: Union[str, NodeType]) -> str: - try: - convert = NodeType(string) - except ValueError: - return f'{string}s' - else: - return convert.pluralize() - - -def pluralize(count, string: Union[str, NodeType]): - pluralized: str = str(string) - if count != 1: - pluralized = _pluralize(string) - return f'{count} {pluralized}' - - # Note that this only affects hologram json validation. # It has no effect on mashumaro serialization. def restrict_to(*restrictions): @@ -625,8 +608,7 @@ def _connection_exception_retry(fn, max_attempts: int, attempt: int = 0): requests.exceptions.ContentDecodingError, ) as exc: if attempt <= max_attempts - 1: - logger.debug('Retrying external call. Attempt: ' + - f'{attempt} Max attempts: {max_attempts}') + fire_event(RetryExternalCall(attempt=attempt, max=max_attempts)) time.sleep(1) _connection_exception_retry(fn, max_attempts, attempt + 1) else: From 63343653a99c6c94545b1adcd0ee6506e49559de Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 5 Nov 2021 11:09:59 -0500 Subject: [PATCH 17/31] trivial logger removal (#4216) --- test/unit/test_graph.py | 2 -- test/unit/test_postgres_adapter.py | 1 - 2 files changed, 3 deletions(-) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 58f8b24fc4e..c68a7e68a25 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -22,8 +22,6 @@ except ImportError: from Queue import Empty -from dbt.logger import GLOBAL_LOGGER as logger # noqa - from .utils import config_from_parts_or_dicts, generate_name_macros, inject_plugin diff --git a/test/unit/test_postgres_adapter.py b/test/unit/test_postgres_adapter.py index 1a31b2b99e9..06a2ed7c497 100644 --- a/test/unit/test_postgres_adapter.py +++ b/test/unit/test_postgres_adapter.py @@ -13,7 +13,6 @@ from dbt.contracts.graph.manifest import ManifestStateCheck from dbt.clients import agate_helper from dbt.exceptions import ValidationException, DbtConfigError -from dbt.logger import GLOBAL_LOGGER as logger # noqa from psycopg2 import extensions as psycopg2_extensions from psycopg2 import DatabaseError From bd3e623240c90fe2803d64c1affb557749c9591e Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 5 Nov 2021 14:21:46 -0500 Subject: [PATCH 18/31] test/integration call sites (#4209) * added struct logging to base * fixed merge wierdness * convert to use single type for integration tests * converted to 3 reusable test types in sep module * tweak message * clean up and making test_types complete for future * fix missed import --- core/dbt/events/test_types.py | 66 +++++++++++++++++++++++++++++++++++ test/integration/base.py | 21 +++++++---- 2 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 core/dbt/events/test_types.py diff --git a/core/dbt/events/test_types.py b/core/dbt/events/test_types.py new file mode 100644 index 00000000000..a0f36801670 --- /dev/null +++ b/core/dbt/events/test_types.py @@ -0,0 +1,66 @@ +from dataclasses import dataclass +from .types import ( + InfoLevel, + DebugLevel, + WarnLevel, + ErrorLevel, + ShowException, + CliEventABC +) + + +# Keeping log messages for testing separate since they are used for debugging. +# Reuse the existing messages when adding logs to tests. + +@dataclass +class IntegrationTestInfo(InfoLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return f"Integration Test: {self.msg}" + + +@dataclass +class IntegrationTestDebug(DebugLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return f"Integration Test: {self.msg}" + + +@dataclass +class IntegrationTestWarn(WarnLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return f"Integration Test: {self.msg}" + + +@dataclass +class IntegrationTestError(ErrorLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return f"Integration Test: {self.msg}" + + +@dataclass +class IntegrationTestException(ShowException, ErrorLevel, CliEventABC): + msg: str + + def cli_msg(self) -> str: + return f"Integration Test: {self.msg}" + + +# since mypy doesn't run on every file we need to suggest to mypy that every +# class gets instantiated. But we don't actually want to run this code. +# making the conditional `if False` causes mypy to skip it as dead code so +# we need to skirt around that by computing something it doesn't check statically. +# +# TODO remove these lines once we run mypy everywhere. +if 1 == 0: + IntegrationTestInfo(msg='') + IntegrationTestDebug(msg='') + IntegrationTestWarn(msg='') + IntegrationTestError(msg='') + IntegrationTestException(msg='') diff --git a/test/integration/base.py b/test/integration/base.py index 38c290e396c..01eb75130a4 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -23,7 +23,13 @@ from dbt.clients.jinja import template_cache from dbt.config import RuntimeConfig from dbt.context import providers -from dbt.logger import GLOBAL_LOGGER as logger, log_manager +from dbt.logger import log_manager +from dbt.events.functions import fire_event +from dbt.events.test_types import ( + IntegrationTestInfo, + IntegrationTestDebug, + IntegrationTestException +) from dbt.contracts.graph.manifest import Manifest @@ -322,7 +328,7 @@ def setUp(self): 'test_original_source_path={0.test_original_source_path}', 'test_root_dir={0.test_root_dir}' )).format(self) - logger.exception(msg) + fire_event(IntegrationTestException(msg=msg)) # if logging isn't set up, I still really want this message. print(msg) @@ -432,8 +438,8 @@ def tearDown(self): try: shutil.rmtree(self.test_root_dir) except EnvironmentError: - logger.exception('Could not clean up after test - {} not removable' - .format(self.test_root_dir)) + msg = f"Could not clean up after test - {self.test_root_dir} not removable" + fire_event(IntegrationTestException(msg=msg)) def _get_schema_fqn(self, database, schema): schema_fqn = self.quote_as_configured(schema, 'schema') @@ -546,8 +552,8 @@ def run_dbt_and_check(self, args=None, profiles_dir=True): if profiles_dir: final_args.extend(['--profiles-dir', self.test_root_dir]) final_args.append('--log-cache-events') - - logger.info("Invoking dbt with {}".format(final_args)) + msg = f"Invoking dbt with {final_args}" + fire_event(IntegrationTestInfo(msg=msg)) return dbt.handle_and_check(final_args) def run_sql_file(self, path, kwargs=None): @@ -626,7 +632,8 @@ def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): sql = self.transform_sql(query, kwargs=kwargs) with self.get_connection(connection_name) as conn: - logger.debug('test connection "{}" executing: {}'.format(conn.name, sql)) + msg = f'test connection "{conn.name}" executing: {sql}' + fire_event(IntegrationTestDebug(msg=msg)) if self.adapter_type == 'presto': return self.run_sql_presto(sql, fetch, conn) else: From b5c6f09a9e3f208d7081d20d5234b97d4e7dbc40 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 5 Nov 2021 14:43:26 -0500 Subject: [PATCH 19/31] remove unused import (#4217) --- core/dbt/contracts/project.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index 9ebc5a3a429..d895314e767 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -1,7 +1,6 @@ from dbt.contracts.util import Replaceable, Mergeable, list_str from dbt.contracts.connection import QueryComment, UserConfigContract from dbt.helper_types import NoValue -from dbt.logger import GLOBAL_LOGGER as logger # noqa from dbt.dataclass_schema import ( dbtClassMixin, ValidationError, HyphenatedDbtClassMixin, From 25c974af8c37d480d4dd5c94105028fd1f282f15 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Fri, 5 Nov 2021 19:33:55 -0400 Subject: [PATCH 20/31] lazy logging in event module (#4210) * switches on debug level to guard against expensive messages * adds memoization to msg construction --- core/dbt/events/functions.py | 41 +++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index cdd846ac4ec..a25bbd4d7ee 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -2,8 +2,9 @@ import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger from dbt.events.history import EVENT_HISTORY from dbt.events.types import CliEventABC, Event, ShowException +import dbt.flags as flags import os -from typing import List +from typing import Generator, List def env_secrets() -> List[str]: @@ -22,6 +23,18 @@ def scrub_secrets(msg: str, secrets: List[str]) -> str: return scrubbed +# this exists because some log messages are actually expensive to build. +# for example, many debug messages call `dump_graph()` and we don't want to +# do that in the event that those messages are never going to be sent to +# the user because we are only logging info-level events. +def gen_msg(e: CliEventABC) -> Generator[str, None, None]: + msg = None + if not msg: + msg = scrub_secrets(e.cli_msg(), env_secrets()) + while True: + yield msg + + # top-level method for accessing the new eventing system # this is where all the side effects happen branched by event type # (i.e. - mutating the event history, printing to stdout, logging @@ -29,50 +42,54 @@ def scrub_secrets(msg: str, secrets: List[str]) -> str: def fire_event(e: Event) -> None: EVENT_HISTORY.append(e) if isinstance(e, CliEventABC): - msg = scrub_secrets(e.cli_msg(), env_secrets()) + msg = gen_msg(e) if e.level_tag() == 'test' and not isinstance(e, ShowException): # TODO after implmenting #3977 send to new test level - logger.GLOBAL_LOGGER.debug(logger.timestamped_line(msg)) + logger.GLOBAL_LOGGER.debug(next(msg)) elif e.level_tag() == 'test' and isinstance(e, ShowException): # TODO after implmenting #3977 send to new test level logger.GLOBAL_LOGGER.debug( - logger.timestamped_line(msg), + next(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) + # explicitly checking the debug flag here so that potentially expensive-to-construct + # log messages are not constructed if debug messages are never shown. + elif e.level_tag() == 'debug' and not flags.DEBUG: + return # eat the message in case it was one of the expensive ones elif e.level_tag() == 'debug' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.debug(logger.timestamped_line(msg)) + logger.GLOBAL_LOGGER.debug(next(msg)) elif e.level_tag() == 'debug' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.debug( - logger.timestamped_line(msg), + next(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) elif e.level_tag() == 'info' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.info(logger.timestamped_line(msg)) + logger.GLOBAL_LOGGER.info(next(msg)) elif e.level_tag() == 'info' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.info( - logger.timestamped_line(msg), + next(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) elif e.level_tag() == 'warn' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.warning(logger.timestamped_line(msg)) + logger.GLOBAL_LOGGER.warning(next(msg)) elif e.level_tag() == 'warn' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.warning( - logger.timestamped_line(msg), + next(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) elif e.level_tag() == 'error' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.error(logger.timestamped_line(msg)) + logger.GLOBAL_LOGGER.error(next(msg)) elif e.level_tag() == 'error' and isinstance(e, ShowException): logger.GLOBAL_LOGGER.error( - logger.timestamped_line(msg), + next(msg), exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra From f95e9efbc02205442490315513d675afa7a9b2a0 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Fri, 5 Nov 2021 19:35:33 -0400 Subject: [PATCH 21/31] use event types in main even before the logger is set up. (#4219) --- core/dbt/events/types.py | 52 ++++++++++++++++++++++++++++++++++++++++ core/dbt/main.py | 32 +++++++++++-------------- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 1a347914f53..ebb250e6ea9 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,4 +1,5 @@ from abc import ABCMeta, abstractmethod +import argparse from dataclasses import dataclass from typing import Any, List, Optional, Dict from dbt import ui @@ -68,6 +69,51 @@ def cli_msg(self) -> str: raise Exception("cli_msg not implemented for cli event") +class MainKeyboardInterrupt(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "ctrl-c" + + +@dataclass +class MainEncounteredError(ErrorLevel, CliEventABC): + e: BaseException + + def cli_msg(self) -> str: + return f"Encountered an error:\n{str(self.e)}" + + +@dataclass +class MainStackTrace(DebugLevel, CliEventABC): + stack_trace: str + + def cli_msg(self) -> str: + return self.stack_trace + + +@dataclass +class MainReportVersion(InfoLevel, CliEventABC): + v: str # could be VersionSpecifier instead if we resolved some circular imports + + def cli_msg(self): + return f"Running with dbt{self.v}" + + +@dataclass +class MainReportArgs(DebugLevel, CliEventABC): + args: argparse.Namespace + + def cli_msg(self): + return f"running dbt with arguments {str(self.args)}" + + +@dataclass +class MainTrackingUserState(DebugLevel, CliEventABC): + user_state: str + + def cli_msg(self): + return f"Tracking: {self.user_state}" + + class ParsingStart(InfoLevel, CliEventABC): def cli_msg(self) -> str: return "Start parsing." @@ -1728,6 +1774,12 @@ def cli_msg(self) -> str: # # TODO remove these lines once we run mypy everywhere. if 1 == 0: + MainReportVersion('') + MainKeyboardInterrupt() + MainEncounteredError(BaseException('')) + MainStackTrace('') + MainReportVersion('') + MainTrackingUserState('') ParsingStart() ParsingCompiling() ParsingWritingManifest() diff --git a/core/dbt/main.py b/core/dbt/main.py index 7e6ae6ac03d..1af838f9f9d 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -1,5 +1,5 @@ from typing import List -from dbt.logger import GLOBAL_LOGGER as logger, log_cache_events, log_manager +from dbt.logger import log_cache_events, log_manager import argparse import os.path @@ -9,6 +9,11 @@ from pathlib import Path import dbt.version +from dbt.events.functions import fire_event +from dbt.events.types import ( + MainEncounteredError, MainKeyboardInterrupt, MainReportVersion, MainReportArgs, + MainTrackingUserState, MainStackTrace +) import dbt.flags as flags import dbt.task.build as build_task import dbt.task.clean as clean_task @@ -34,7 +39,6 @@ from dbt.utils import ExitCodes from dbt.config.profile import DEFAULT_PROFILES_DIR, read_user_config from dbt.exceptions import ( - RuntimeException, InternalException, NotImplementedException, FailedToConnectException @@ -127,7 +131,8 @@ def main(args=None): exit_code = ExitCodes.ModelError.value except KeyboardInterrupt: - logger.info("ctrl-c") + # if the logger isn't configured yet, it will use the default logger + fire_event(MainKeyboardInterrupt()) exit_code = ExitCodes.UnhandledError.value # This can be thrown by eg. argparse @@ -135,16 +140,8 @@ def main(args=None): exit_code = e.code except BaseException as e: - logger.warning("Encountered an error:") - logger.warning(str(e)) - - if log_manager.initialized: - logger.debug(traceback.format_exc()) - elif not isinstance(e, RuntimeException): - # if it did not come from dbt proper and the logger is not - # initialized (so there's no safe path to log to), log the - # stack trace at error level. - logger.error(traceback.format_exc()) + fire_event(MainEncounteredError(e=e)) + fire_event(MainStackTrace(stack_trace=traceback.format_exc())) exit_code = ExitCodes.UnhandledError.value sys.exit(exit_code) @@ -208,7 +205,7 @@ def track_run(task): ) except (NotImplementedException, FailedToConnectException) as e: - logger.error('ERROR: {}'.format(e)) + fire_event(MainEncounteredError(e=e)) dbt.tracking.track_invocation_end( config=task.config, args=task.args, result_type="error" ) @@ -228,13 +225,12 @@ def run_from_args(parsed): # set log_format in the logger parsed.cls.pre_init_hook(parsed) - logger.info("Running with dbt{}".format(dbt.version.installed)) + fire_event(MainReportVersion(v=dbt.version.installed)) # this will convert DbtConfigErrors into RuntimeExceptions # task could be any one of the task objects task = parsed.cls.from_args(args=parsed) - - logger.debug("running dbt with arguments {parsed}", parsed=str(parsed)) + fire_event(MainReportArgs(args=parsed)) log_path = None if task.config is not None: @@ -242,7 +238,7 @@ def run_from_args(parsed): # we can finally set the file logger up log_manager.set_path(log_path) if dbt.tracking.active_user is not None: # mypy appeasement, always true - logger.debug("Tracking: {}".format(dbt.tracking.active_user.state())) + fire_event(MainTrackingUserState(dbt.tracking.active_user.state())) results = None From 5cc8626e9643895cfa86d591c81d14a90f44667a Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 8 Nov 2021 14:31:34 -0600 Subject: [PATCH 22/31] updates associated with merging main - removed 3 new log call sites and replaced with structured logs - removed 2 unused struc logs --- core/dbt/events/types.py | 51 +++++++++++++++---------------------- core/dbt/graph/selector.py | 4 +-- core/dbt/parser/manifest.py | 11 ++++---- core/dbt/task/sql.py | 5 ++-- 4 files changed, 30 insertions(+), 41 deletions(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index ebb250e6ea9..fd608cc27fb 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -295,34 +295,6 @@ def cli_msg(self) -> str: return f"command return code={self.code}" -@dataclass -class SelectorAlertUpto3UnusedNodes(InfoLevel, CliEventABC): - node_names: List[str] - - def cli_msg(self) -> str: - summary_nodes_str = ("\n - ").join(self.node_names[:3]) - and_more_str = ( - f"\n - and {len(self.node_names) - 3} more" if len(self.node_names) > 4 else "" - ) - return ( - f"\nSome tests were excluded because at least one parent is not selected. " - f"Use the --greedy flag to include them." - f"\n - {summary_nodes_str}{and_more_str}" - ) - - -@dataclass -class SelectorAlertAllUnusedNodes(DebugLevel, CliEventABC): - node_names: List[str] - - def cli_msg(self) -> str: - debug_nodes_str = ("\n - ").join(self.node_names) - return ( - f"Full list of tests that were excluded:" - f"\n - {debug_nodes_str}" - ) - - @dataclass class SelectorReportInvalidSelector(InfoLevel, CliEventABC): selector_methods: dict @@ -728,6 +700,16 @@ def cli_msg(self) -> str: return f"Partial parsing: deleted exposure {self.unique_id}" +class PartialParsingProjectEnvVarsChanged(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Unable to do partial parsing because env vars used in dbt_project.yml have changed" + + +class PartialParsingProfileEnvVarsChanged(InfoLevel, CliEventABC): + def cli_msg(self) -> str: + return "Unable to do partial parsing because env vars used in profiles.yml have changed" + + @dataclass class InvalidDisabledSourceInTestNode(WarnLevel, CliEventABC): msg: str @@ -1767,6 +1749,14 @@ def cli_msg(self) -> str: return str(self.exc) +@dataclass +class SQlRunnerException(ShowException, DebugLevel, CliEventABC): + exc: Exception + + def cli_msg(self) -> str: + return f"Got an exception: {self.exc}" + + # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -1803,8 +1793,6 @@ def cli_msg(self) -> str: SystemStdOutMsg(bmsg=b'') SystemStdErrMsg(bmsg=b'') SystemReportReturnCode(code=0) - SelectorAlertUpto3UnusedNodes(node_names=[]) - SelectorAlertAllUnusedNodes(node_names=[]) SelectorReportInvalidSelector(selector_methods={'': ''}, spec_method='', raw_spec='') MacroEventInfo(msg='') MacroEventDebug(msg='') @@ -1849,6 +1837,8 @@ def cli_msg(self) -> str: PartialParsingUpdateSchemaFile(file_id='') PartialParsingDeletedSource(unique_id='') PartialParsingDeletedExposure(unique_id='') + PartialParsingProjectEnvVarsChanged() + PartialParsingProfileEnvVarsChanged() InvalidDisabledSourceInTestNode(msg='') InvalidRefInTestNode(msg='') MessageHandleGenericException(build_path='', unique_id='', exc=Exception('')) @@ -1959,3 +1949,4 @@ def cli_msg(self) -> str: RetryExternalCall(attempt=0, max=0) GeneralWarningMsg(msg='', log_fmt='') GeneralWarningException(exc=Exception(''), log_fmt='') + SQlRunnerException(exc=Exception('')) diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index c1945f1f6db..7e7cdc357fe 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -6,9 +6,7 @@ from .selector_spec import SelectionCriteria, SelectionSpec from dbt.events.functions import fire_event -from dbt.events.types import ( - SelectorAlertUpto3UnusedNodes, SelectorAlertAllUnusedNodes, SelectorReportInvalidSelector -) +from dbt.events.types import SelectorReportInvalidSelector from dbt.node_types import NodeType from dbt.exceptions import ( InternalException, diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index b16ac70d2cb..e1a63964611 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -27,7 +27,8 @@ PartialParsingFailedBecauseConfigChange, PartialParsingFailedBecauseProfileChange, PartialParsingFailedBecauseNewProjectDependency, PartialParsingFailedBecauseHashChanged, PartialParsingNotEnabled, ParsedFileLoadFailed, PartialParseSaveFileNotFound, - InvalidDisabledSourceInTestNode, InvalidRefInTestNode + InvalidDisabledSourceInTestNode, InvalidRefInTestNode, PartialParsingProjectEnvVarsChanged, + PartialParsingProfileEnvVarsChanged ) from dbt.logger import DbtProcessState from dbt.node_types import NodeType @@ -276,7 +277,7 @@ def load(self): file_dict = source_file.to_dict() fire_event(PartialParsingFile(file_dict=file_dict)) exc_info['parse_file_type'] = parse_file_type - fire_event(PartialParsingException()) + fire_event(PartialParsingException(exc_info=exc_info)) # Send event if dbt.tracking.active_user is not None: @@ -564,14 +565,12 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: reparse_reason = ReparseReason.profile_changed if self.manifest.state_check.project_env_vars_hash != \ manifest.state_check.project_env_vars_hash: - logger.info("Unable to do partial parsing because env vars " - "used in dbt_project.yml have changed") + fire_event(PartialParsingProjectEnvVarsChanged()) valid = False reparse_reason = ReparseReason.proj_env_vars_changed if self.manifest.state_check.profile_env_vars_hash != \ manifest.state_check.profile_env_vars_hash: - logger.info("Unable to do partial parsing because env vars " - "used in profiles.yml have changed") + fire_event(PartialParsingProfileEnvVarsChanged()) valid = False reparse_reason = ReparseReason.prof_env_vars_changed diff --git a/core/dbt/task/sql.py b/core/dbt/task/sql.py index 12d6ff7a066..446d53055b8 100644 --- a/core/dbt/task/sql.py +++ b/core/dbt/task/sql.py @@ -9,7 +9,8 @@ RemoteRunResult, ResultTable, ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import SQlRunnerException from dbt.task.compile import CompileRunner @@ -23,7 +24,7 @@ def __init__(self, config, adapter, node, node_index, num_nodes): ) def handle_exception(self, e, ctx): - logger.debug('Got an exception: {}'.format(e), exc_info=True) + fire_event(SQlRunnerException(exc=e)) if isinstance(e, dbt.exceptions.Exception): if isinstance(e, dbt.exceptions.RuntimeException): e.add_node(ctx.node) From 43b39fd1aa3e30870a405acbcedf35d20d821222 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Mon, 8 Nov 2021 15:46:59 -0600 Subject: [PATCH 23/31] removed redundant timestamp (#4239) --- core/dbt/events/format.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/dbt/events/format.py b/core/dbt/events/format.py index f476620cf94..0e40a6f2a75 100644 --- a/core/dbt/events/format.py +++ b/core/dbt/events/format.py @@ -1,4 +1,3 @@ -import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger from dbt import ui from typing import Optional, Union from dbt.node_types import NodeType @@ -13,9 +12,7 @@ def format_fancy_output_line( progress = '' else: progress = '{} of {} '.format(index, total) - # TODO: remove this formatting once we rip out all the old logger - prefix = "{timestamp} | {progress}{message}".format( - timestamp=logger.get_timestamp(), + prefix = "{progress}{message}".format( progress=progress, message=msg) From b2aea11cdb0304c466d9968281dfa6a53ac97476 Mon Sep 17 00:00:00 2001 From: Ian Knox <81931810+iknox-fa@users.noreply.github.com> Date: Mon, 8 Nov 2021 17:31:24 -0600 Subject: [PATCH 24/31] Struct log for adapter call sites (#4189) graph call sites for structured logging Co-authored-by: Nathaniel May Co-authored-by: Emily Rockman --- core/dbt/adapters/base/connections.py | 39 +- core/dbt/adapters/base/impl.py | 15 +- core/dbt/adapters/cache.py | 75 ++-- core/dbt/adapters/factory.py | 10 +- core/dbt/adapters/sql/connections.py | 24 +- core/dbt/adapters/sql/impl.py | 17 +- core/dbt/events/stubs.py | 44 ++ core/dbt/events/types.py | 375 +++++++++++++++++- .../test_query_comments.py | 9 +- 9 files changed, 512 insertions(+), 96 deletions(-) create mode 100644 core/dbt/events/stubs.py diff --git a/core/dbt/adapters/base/connections.py b/core/dbt/adapters/base/connections.py index 67789c9834f..218aa287bf9 100644 --- a/core/dbt/adapters/base/connections.py +++ b/core/dbt/adapters/base/connections.py @@ -18,7 +18,17 @@ from dbt.adapters.base.query_headers import ( MacroQueryStringSetter, ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ( + NewConnection, + ConnectionReused, + ConnectionLeftOpen, + ConnectionLeftOpen2, + ConnectionClosed, + ConnectionClosed2, + Rollback, + RollbackFailed +) from dbt import flags @@ -136,14 +146,10 @@ def set_connection_name(self, name: Optional[str] = None) -> Connection: if conn.name == conn_name and conn.state == 'open': return conn - logger.debug( - 'Acquiring new {} connection "{}".'.format(self.TYPE, conn_name)) + fire_event(NewConnection(conn_name=conn_name, conn_type=self.TYPE)) if conn.state == 'open': - logger.debug( - 'Re-using an available connection from the pool (formerly {}).' - .format(conn.name) - ) + fire_event(ConnectionReused(conn_name=conn_name)) else: conn.handle = LazyHandle(self.open) @@ -190,11 +196,9 @@ def cleanup_all(self) -> None: with self.lock: for connection in self.thread_connections.values(): if connection.state not in {'closed', 'init'}: - logger.debug("Connection '{}' was left open." - .format(connection.name)) + fire_event(ConnectionLeftOpen(conn_name=connection.name)) else: - logger.debug("Connection '{}' was properly closed." - .format(connection.name)) + fire_event(ConnectionClosed(conn_name=connection.name)) self.close(connection) # garbage collect these connections @@ -220,20 +224,17 @@ def _rollback_handle(cls, connection: Connection) -> None: try: connection.handle.rollback() except Exception: - logger.debug( - 'Failed to rollback {}'.format(connection.name), - exc_info=True - ) + fire_event(RollbackFailed(conn_name=connection.name)) @classmethod def _close_handle(cls, connection: Connection) -> None: """Perform the actual close operation.""" # On windows, sometimes connection handles don't have a close() attr. if hasattr(connection.handle, 'close'): - logger.debug(f'On {connection.name}: Close') + fire_event(ConnectionClosed2(conn_name=connection.name)) connection.handle.close() else: - logger.debug(f'On {connection.name}: No close available on handle') + fire_event(ConnectionLeftOpen2(conn_name=connection.name)) @classmethod def _rollback(cls, connection: Connection) -> None: @@ -244,7 +245,7 @@ def _rollback(cls, connection: Connection) -> None: f'"{connection.name}", but it does not have one open!' ) - logger.debug(f'On {connection.name}: ROLLBACK') + fire_event(Rollback(conn_name=connection.name)) cls._rollback_handle(connection) connection.transaction_open = False @@ -256,7 +257,7 @@ def close(cls, connection: Connection) -> Connection: return connection if connection.transaction_open and connection.handle: - logger.debug('On {}: ROLLBACK'.format(connection.name)) + fire_event(Rollback(conn_name=connection.name)) cls._rollback_handle(connection) connection.transaction_open = False diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 732b2e59914..f8ac6d5d4fa 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -29,7 +29,8 @@ from dbt.contracts.graph.manifest import Manifest, MacroManifest from dbt.contracts.graph.parsed import ParsedSeedNode from dbt.exceptions import warn_or_error -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import CacheMiss, ListRelations from dbt.utils import filter_null_values, executor from dbt.adapters.base.connections import Connection, AdapterResponse @@ -288,9 +289,12 @@ def _schema_is_cached(self, database: Optional[str], schema: str) -> bool: """Check if the schema is cached, and by default logs if it is not.""" if (database, schema) not in self.cache: - logger.debug( - 'On "{}": cache miss for schema "{}.{}", this is inefficient' - .format(self.nice_connection_name(), database, schema) + fire_event( + CacheMiss( + conn_name=self.nice_connection_name, + database=database, + schema=schema + ) ) return False else: @@ -672,9 +676,8 @@ def list_relations( relations = self.list_relations_without_caching( schema_relation ) + fire_event(ListRelations(database=database, schema=schema, relations=relations)) - logger.debug('with database={}, schema={}, relations={}' - .format(database, schema, relations)) return relations def _make_match_kwargs( diff --git a/core/dbt/adapters/cache.py b/core/dbt/adapters/cache.py index 8bbac5cc95f..07a4d875715 100644 --- a/core/dbt/adapters/cache.py +++ b/core/dbt/adapters/cache.py @@ -1,11 +1,26 @@ +import threading from collections import namedtuple from copy import deepcopy -from typing import List, Iterable, Optional, Dict, Set, Tuple, Any -import threading +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple -from dbt.logger import CACHE_LOGGER as logger -from dbt.utils import lowercase import dbt.exceptions +from dbt.events.functions import fire_event +from dbt.events.types import ( + AddLink, + AddRelation, + DropCascade, + DropMissingRelation, + DropRelation, + DumpAfterAddGraph, + DumpAfterRenameSchema, + DumpBeforeAddGraph, + DumpBeforeRenameSchema, + RenameSchema, + TemporaryRelation, + UncachedRelation, + UpdateReference +) +from dbt.utils import lowercase _ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier') @@ -157,12 +172,6 @@ def dump_graph_entry(self): return [dot_separated(r) for r in self.referenced_by] -def lazy_log(msg, func): - if logger.disabled: - return - logger.debug(msg.format(func())) - - class RelationsCache: """A cache of the relations known to dbt. Keeps track of relationships declared between tables and handles renames/drops as a real database would. @@ -278,6 +287,7 @@ def _add_link(self, referenced_key, dependent_key): referenced.add_reference(dependent) + # TODO: Is this dead code? I can't seem to find it grepping the codebase. def add_link(self, referenced, dependent): """Add a link between two relations to the database. If either relation does not exist, it will be added as an "external" relation. @@ -297,11 +307,7 @@ def add_link(self, referenced, dependent): # if we have not cached the referenced schema at all, we must be # referring to a table outside our control. There's no need to make # a link - we will never drop the referenced relation during a run. - logger.debug( - '{dep!s} references {ref!s} but {ref.database}.{ref.schema} ' - 'is not in the cache, skipping assumed external relation' - .format(dep=dependent, ref=ref_key) - ) + fire_event(UncachedRelation(dep_key=dependent, ref_key=ref_key)) return if ref_key not in self.relations: # Insert a dummy "external" relation. @@ -317,9 +323,7 @@ def add_link(self, referenced, dependent): type=referenced.External ) self.add(dependent) - logger.debug( - 'adding link, {!s} references {!s}'.format(dep_key, ref_key) - ) + fire_event(AddLink(dep_key=dep_key, ref_key=ref_key)) with self.lock: self._add_link(ref_key, dep_key) @@ -330,14 +334,12 @@ def add(self, relation): :param BaseRelation relation: The underlying relation. """ cached = _CachedRelation(relation) - logger.debug('Adding relation: {!s}'.format(cached)) - - lazy_log('before adding: {!s}', self.dump_graph) + fire_event(AddRelation(relation=cached)) + fire_event(DumpBeforeAddGraph(graph_func=self.dump_graph)) with self.lock: self._setdefault(cached) - - lazy_log('after adding: {!s}', self.dump_graph) + fire_event(DumpAfterAddGraph(graph_func=self.dump_graph)) def _remove_refs(self, keys): """Removes all references to all entries in keys. This does not @@ -359,13 +361,10 @@ def _drop_cascade_relation(self, dropped): :param _CachedRelation dropped: An existing _CachedRelation to drop. """ if dropped not in self.relations: - logger.debug('dropped a nonexistent relationship: {!s}' - .format(dropped)) + fire_event(DropMissingRelation(relation=dropped)) return consequences = self.relations[dropped].collect_consequences() - logger.debug( - 'drop {} is cascading to {}'.format(dropped, consequences) - ) + fire_event(DropCascade(dropped=dropped, consequences=consequences)) self._remove_refs(consequences) def drop(self, relation): @@ -380,7 +379,7 @@ def drop(self, relation): :param str identifier: The identifier of the relation to drop. """ dropped = _make_key(relation) - logger.debug('Dropping relation: {!s}'.format(dropped)) + fire_event(DropRelation(dropped=dropped)) with self.lock: self._drop_cascade_relation(dropped) @@ -403,9 +402,8 @@ def _rename_relation(self, old_key, new_relation): # update all the relations that refer to it for cached in self.relations.values(): if cached.is_referenced_by(old_key): - logger.debug( - 'updated reference from {0} -> {2} to {1} -> {2}' - .format(old_key, new_key, cached.key()) + fire_event( + UpdateReference(old_key=old_key, new_key=new_key, cached_key=cached.key()) ) cached.rename_key(old_key, new_key) @@ -435,10 +433,7 @@ def _check_rename_constraints(self, old_key, new_key): ) if old_key not in self.relations: - logger.debug( - 'old key {} not found in self.relations, assuming temporary' - .format(old_key) - ) + fire_event(TemporaryRelation(key=old_key)) return False return True @@ -456,11 +451,9 @@ def rename(self, old, new): """ old_key = _make_key(old) new_key = _make_key(new) - logger.debug('Renaming relation {!s} to {!s}'.format( - old_key, new_key - )) + fire_event(RenameSchema(old_key=old_key, new_key=new_key)) - lazy_log('before rename: {!s}', self.dump_graph) + fire_event(DumpBeforeRenameSchema(graph_func=self.dump_graph)) with self.lock: if self._check_rename_constraints(old_key, new_key): @@ -468,7 +461,7 @@ def rename(self, old, new): else: self._setdefault(_CachedRelation(new)) - lazy_log('after rename: {!s}', self.dump_graph) + fire_event(DumpAfterRenameSchema(graph_func=self.dump_graph)) def get_relations( self, database: Optional[str], schema: Optional[str] diff --git a/core/dbt/adapters/factory.py b/core/dbt/adapters/factory.py index 0de3f2561dc..64021f10536 100644 --- a/core/dbt/adapters/factory.py +++ b/core/dbt/adapters/factory.py @@ -8,10 +8,9 @@ PACKAGE_PATH as GLOBAL_PROJECT_PATH, PROJECT_NAME as GLOBAL_PROJECT_NAME, ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import AdapterImportError, PluginLoadError from dbt.contracts.connection import Credentials, AdapterRequiredConfig - - from dbt.adapters.protocol import ( AdapterProtocol, AdapterConfig, @@ -67,11 +66,12 @@ def load_plugin(self, name: str) -> Type[Credentials]: # if we failed to import the target module in particular, inform # the user about it via a runtime error if exc.name == 'dbt.adapters.' + name: + fire_event(AdapterImportError(exc=exc)) raise RuntimeException(f'Could not find adapter type {name}!') - logger.info(f'Error importing adapter: {exc}') # otherwise, the error had to have come from some underlying # library. Log the stack trace. - logger.debug('', exc_info=True) + + fire_event(PluginLoadError()) raise plugin: AdapterPlugin = mod.Plugin plugin_type = plugin.adapter.type() diff --git a/core/dbt/adapters/sql/connections.py b/core/dbt/adapters/sql/connections.py index d44764b7b69..cb477f2abc2 100644 --- a/core/dbt/adapters/sql/connections.py +++ b/core/dbt/adapters/sql/connections.py @@ -10,7 +10,8 @@ from dbt.contracts.connection import ( Connection, ConnectionState, AdapterResponse ) -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ConnectionUsed, SQLQuery, SQLCommit, SQLQueryStatus class SQLConnectionManager(BaseConnectionManager): @@ -58,9 +59,7 @@ def add_query( connection = self.get_thread_connection() if auto_begin and connection.transaction_open is False: self.begin() - - logger.debug('Using {} connection "{}".' - .format(self.TYPE, connection.name)) + fire_event(ConnectionUsed(conn_type=self.TYPE, conn_name=connection.name)) with self.exception_handler(sql): if abridge_sql_log: @@ -68,19 +67,16 @@ def add_query( else: log_sql = sql - logger.debug( - 'On {connection_name}: {sql}', - connection_name=connection.name, - sql=log_sql, - ) + fire_event(SQLQuery(conn_name=connection.name, sql=log_sql)) pre = time.time() cursor = connection.handle.cursor() cursor.execute(sql, bindings) - logger.debug( - "SQL status: {status} in {elapsed:0.2f} seconds", - status=self.get_response(cursor), - elapsed=(time.time() - pre) + + fire_event( + SQLQueryStatus( + status=self.get_response(cursor), elapsed=round((time.time() - pre), 2) + ) ) return connection, cursor @@ -160,7 +156,7 @@ def commit(self): 'Tried to commit transaction on connection "{}", but ' 'it does not have one open!'.format(connection.name)) - logger.debug('On {}: COMMIT'.format(connection.name)) + fire_event(SQLCommit(conn_name=connection.name)) self.add_commit_query() connection.transaction_open = False diff --git a/core/dbt/adapters/sql/impl.py b/core/dbt/adapters/sql/impl.py index 3377453d451..5dc54f3a6b6 100644 --- a/core/dbt/adapters/sql/impl.py +++ b/core/dbt/adapters/sql/impl.py @@ -6,7 +6,9 @@ import dbt.exceptions from dbt.adapters.base import BaseAdapter, available from dbt.adapters.sql import SQLConnectionManager -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events.functions import fire_event +from dbt.events.types import ColTypeChange, SchemaCreation, SchemaDrop + from dbt.adapters.base.relation import BaseRelation @@ -116,8 +118,13 @@ def expand_column_types(self, goal, current): target_column.can_expand_to(reference_column): col_string_size = reference_column.string_size() new_type = self.Column.string_type(col_string_size) - logger.debug("Changing col type from {} to {} in table {}", - target_column.data_type, new_type, current) + fire_event( + ColTypeChange( + orig_type=target_column.data_type, + new_type=new_type, + table=current, + ) + ) self.alter_column_type(current, column_name, new_type) @@ -175,7 +182,7 @@ def get_columns_in_relation(self, relation): def create_schema(self, relation: BaseRelation) -> None: relation = relation.without_identifier() - logger.debug('Creating schema "{}"', relation) + fire_event(SchemaCreation(relation=relation)) kwargs = { 'relation': relation, } @@ -186,7 +193,7 @@ def create_schema(self, relation: BaseRelation) -> None: def drop_schema(self, relation: BaseRelation) -> None: relation = relation.without_identifier() - logger.debug('Dropping schema "{}".', relation) + fire_event(SchemaDrop(relation=relation)) kwargs = { 'relation': relation, } diff --git a/core/dbt/events/stubs.py b/core/dbt/events/stubs.py new file mode 100644 index 00000000000..b2329ce2787 --- /dev/null +++ b/core/dbt/events/stubs.py @@ -0,0 +1,44 @@ +from typing import ( + Any, + Optional, + NamedTuple, +) + +# N.B.: +# These stubs were autogenerated by stubgen and then hacked +# to pieces to ensure we had something other than "Any" types +# where using external classes to instantiate event subclasses +# in events/types.py. +# +# This goes away when we turn mypy on for everything. +# +# Don't trust them too much at all! + + +class _ReferenceKey(NamedTuple): + database: Any + schema: Any + identifier: Any + + +class _CachedRelation: + referenced_by: Any + inner: Any + + +class AdapterResponse: + code: Optional[str] + rows_affected: Optional[int] + + +class BaseRelation: + path: Any + type: Optional[Any] + quote_character: str + include_policy: Any + quote_policy: Any + dbt_created: bool + + +class InformationSchema(BaseRelation): + information_schema_view: Optional[str] diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index fd608cc27fb..733e74f5535 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,7 +1,8 @@ from abc import ABCMeta, abstractmethod import argparse from dataclasses import dataclass -from typing import Any, List, Optional, Dict +from typing import Any, Callable, cast, Dict, List, Optional, Set, Union +from dbt.events.stubs import _CachedRelation, AdapterResponse, BaseRelation, _ReferenceKey from dbt import ui from dbt.node_types import NodeType from dbt.events.format import format_fancy_output_line, pluralize @@ -310,7 +311,7 @@ def cli_msg(self) -> str: @dataclass -class MacroEventInfo(InfoLevel, CliEventABC): +class MacroEventDebug(DebugLevel, CliEventABC): msg: str def cli_msg(self) -> str: @@ -318,13 +319,315 @@ def cli_msg(self) -> str: @dataclass -class MacroEventDebug(DebugLevel, CliEventABC): +class MacroEventInfo(InfoLevel, CliEventABC): msg: str def cli_msg(self) -> str: return self.msg +@dataclass +class NewConnection(DebugLevel, CliEventABC): + conn_type: str + conn_name: str + + def cli_msg(self) -> str: + return f'Acquiring new {self.conn_type} connection "{self.conn_name}"' + + +@dataclass +class ConnectionReused(DebugLevel, CliEventABC): + conn_name: str + + def cli_msg(self) -> str: + return f"Re-using an available connection from the pool (formerly {self.conn_name})" + + +@dataclass +class ConnectionLeftOpen(DebugLevel, CliEventABC): + conn_name: Optional[str] + + def cli_msg(self) -> str: + return f"Connection '{self.conn_name}' was left open." + + +@dataclass +class ConnectionClosed(DebugLevel, CliEventABC): + conn_name: Optional[str] + + def cli_msg(self) -> str: + return f"Connection '{self.conn_name}' was properly closed." + + +@dataclass +class RollbackFailed(ShowException, DebugLevel, CliEventABC): + conn_name: Optional[str] + + def cli_msg(self) -> str: + return f"Failed to rollback '{self.conn_name}'" + + +# TODO: can we combine this with ConnectionClosed? +@dataclass +class ConnectionClosed2(DebugLevel, CliEventABC): + conn_name: Optional[str] + + def cli_msg(self) -> str: + return f"On {self.conn_name}: Close" + + +# TODO: can we combine this with ConnectionLeftOpen? +@dataclass +class ConnectionLeftOpen2(DebugLevel, CliEventABC): + conn_name: Optional[str] + + def cli_msg(self) -> str: + return f"On {self.conn_name}: No close available on handle" + + +@dataclass +class Rollback(DebugLevel, CliEventABC): + conn_name: Optional[str] + + def cli_msg(self) -> str: + return f"On {self.conn_name}: ROLLBACK" + + +@dataclass +class CacheMiss(DebugLevel, CliEventABC): + conn_name: Any # TODO mypy says this is `Callable[[], str]`?? ¯\_(ツ)_/¯ + database: Optional[str] + schema: str + + def cli_msg(self) -> str: + return ( + f'On "{self.conn_name}": cache miss for schema ' + '"{self.database}.{self.schema}", this is inefficient' + ) + + +@dataclass +class ListRelations(DebugLevel, CliEventABC): + database: Optional[str] + schema: str + relations: List[BaseRelation] + + def cli_msg(self) -> str: + return f"with database={self.database}, schema={self.schema}, relations={self.relations}" + + +@dataclass +class ConnectionUsed(DebugLevel, CliEventABC): + conn_type: str + conn_name: Optional[str] + + def cli_msg(self) -> str: + return f'Using {self.conn_type} connection "{self.conn_name}"' + + +@dataclass +class SQLQuery(DebugLevel, CliEventABC): + conn_name: Optional[str] + sql: str + + def cli_msg(self) -> str: + return f"On {self.conn_name}: {self.sql}" + + +@dataclass +class SQLQueryStatus(DebugLevel, CliEventABC): + status: Union[AdapterResponse, str] + elapsed: float + + def cli_msg(self) -> str: + return f"SQL status: {self.status} in {self.elapsed} seconds" + + +@dataclass +class SQLCommit(DebugLevel, CliEventABC): + conn_name: str + + def cli_msg(self) -> str: + return f"On {self.conn_name}: COMMIT" + + +@dataclass +class ColTypeChange(DebugLevel, CliEventABC): + orig_type: str + new_type: str + table: str + + def cli_msg(self) -> str: + return f"Changing col type from {self.orig_type} to {self.new_type} in table {self.table}" + + +@dataclass +class SchemaCreation(DebugLevel, CliEventABC): + relation: BaseRelation + + def cli_msg(self) -> str: + return f'Creating schema "{self.relation}"' + + +@dataclass +class SchemaDrop(DebugLevel, CliEventABC): + relation: BaseRelation + + def cli_msg(self) -> str: + return f'Dropping schema "{self.relation}".' + + +# TODO pretty sure this is only ever called in dead code +# see: core/dbt/adapters/cache.py _add_link vs add_link +@dataclass +class UncachedRelation(DebugLevel, CliEventABC): + dep_key: _ReferenceKey + ref_key: _ReferenceKey + + def cli_msg(self) -> str: + return ( + f"{self.dep_key} references {str(self.ref_key)} " + "but {self.ref_key.database}.{self.ref_key.schema}" + "is not in the cache, skipping assumed external relation" + ) + + +@dataclass +class AddLink(DebugLevel, CliEventABC): + dep_key: _ReferenceKey + ref_key: _ReferenceKey + + def cli_msg(self) -> str: + return f"adding link, {self.dep_key} references {self.ref_key}" + + +@dataclass +class AddRelation(DebugLevel, CliEventABC): + relation: _CachedRelation + + def cli_msg(self) -> str: + return f"Adding relation: {str(self.relation)}" + + +@dataclass +class DropMissingRelation(DebugLevel, CliEventABC): + relation: _ReferenceKey + + def cli_msg(self) -> str: + return f"dropped a nonexistent relationship: {str(self.relation)}" + + +@dataclass +class DropCascade(DebugLevel, CliEventABC): + dropped: _ReferenceKey + consequences: Set[_ReferenceKey] + + def cli_msg(self) -> str: + return f"drop {self.dropped} is cascading to {self.consequences}" + + +@dataclass +class DropRelation(DebugLevel, CliEventABC): + dropped: _ReferenceKey + + def cli_msg(self) -> str: + return f"Dropping relation: {self.dropped}" + + +@dataclass +class UpdateReference(DebugLevel, CliEventABC): + old_key: _ReferenceKey + new_key: _ReferenceKey + cached_key: _ReferenceKey + + def cli_msg(self) -> str: + return f"updated reference from {self.old_key} -> {self.cached_key} to "\ + "{self.new_key} -> {self.cached_key}" + + +@dataclass +class TemporaryRelation(DebugLevel, CliEventABC): + key: _ReferenceKey + + def cli_msg(self) -> str: + return f"old key {self.key} not found in self.relations, assuming temporary" + + +@dataclass +class RenameSchema(DebugLevel, CliEventABC): + old_key: _ReferenceKey + new_key: _ReferenceKey + + def cli_msg(self) -> str: + return f"Renaming relation {self.old_key} to {self.new_key}" + + +@dataclass +class DumpBeforeAddGraph(DebugLevel, CliEventABC): + graph_func: Callable[[], Dict[str, List[str]]] + + def cli_msg(self) -> str: + # workaround for https://github.com/python/mypy/issues/6910 + # TODO remove when we've upgraded to a mypy version without that bug + func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) + return f"before adding : {func_returns}" + + +@dataclass +class DumpAfterAddGraph(DebugLevel, CliEventABC): + graph_func: Callable[[], Dict[str, List[str]]] + + def cli_msg(self) -> str: + # workaround for https://github.com/python/mypy/issues/6910 + func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) + return f"after adding: {func_returns}" + + +@dataclass +class DumpBeforeRenameSchema(DebugLevel, CliEventABC): + graph_func: Callable[[], Dict[str, List[str]]] + + def cli_msg(self) -> str: + # workaround for https://github.com/python/mypy/issues/6910 + func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) + return f"before rename: {func_returns}" + + +@dataclass +class DumpAfterRenameSchema(DebugLevel, CliEventABC): + graph_func: Callable[[], Dict[str, List[str]]] + + def cli_msg(self) -> str: + # workaround for https://github.com/python/mypy/issues/6910 + func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) + return f"after rename: {func_returns}" + + +@dataclass +class AdapterImportError(InfoLevel, CliEventABC): + exc: ModuleNotFoundError + + def cli_msg(self) -> str: + return f"Error importing adapter: {self.exc}" + + +@dataclass +class PluginLoadError(ShowException, DebugLevel, CliEventABC): + def cli_msg(self): + pass + + +# since mypy doesn't run on every file we need to suggest to mypy that every +# class gets instantiated. But we don't actually want to run this code. +# making the conditional `if False` causes mypy to skip it as dead code so +# we need to skirt around that by computing something it doesn't check statically. +# +# TODO remove these lines once we run mypy everywhere. + +def dump_callable(): + return {"": [""]} # for instantiating `Dump...` methods which take callables. + + @dataclass class NewConnectionOpening(DebugLevel, CliEventABC): connection_state: str @@ -1779,6 +2082,72 @@ def cli_msg(self) -> str: ManifestLoaded() ManifestChecked() ManifestFlatGraphBuilt() + ReportPerformancePath(path="") + GitSparseCheckoutSubdirectory(subdir="") + GitProgressCheckoutRevision(revision="") + GitProgressUpdatingExistingDependency(dir="") + GitProgressPullingNewDependency(dir="") + GitNothingToDo(sha="") + GitProgressUpdatedCheckoutRange(start_sha="", end_sha="") + GitProgressCheckedOutAt(end_sha="") + SystemErrorRetrievingModTime(path="") + SystemCouldNotWrite(path="", reason="", exc=Exception("")) + SystemExecutingCmd(cmd=[""]) + SystemStdOutMsg(bmsg=b"") + SystemStdErrMsg(bmsg=b"") + SystemReportReturnCode(code=0) + SelectorReportInvalidSelector( + selector_methods={"": ""}, spec_method="", raw_spec="" + ) + MacroEventInfo(msg="") + MacroEventDebug(msg="") + NewConnection(conn_type="", conn_name="") + ConnectionReused(conn_name="") + ConnectionLeftOpen(conn_name="") + ConnectionClosed(conn_name="") + RollbackFailed(conn_name="") + ConnectionClosed2(conn_name="") + ConnectionLeftOpen2(conn_name="") + Rollback(conn_name="") + CacheMiss(conn_name="", database="", schema="") + ListRelations(database="", schema="", relations=[]) + ConnectionUsed(conn_type="", conn_name="") + SQLQuery(conn_name="", sql="") + SQLQueryStatus(status="", elapsed=0.1) + SQLCommit(conn_name="") + ColTypeChange(orig_type="", new_type="", table="") + SchemaCreation(relation=BaseRelation()) + SchemaDrop(relation=BaseRelation()) + UncachedRelation( + dep_key=_ReferenceKey(database="", schema="", identifier=""), + ref_key=_ReferenceKey(database="", schema="", identifier=""), + ) + AddLink( + dep_key=_ReferenceKey(database="", schema="", identifier=""), + ref_key=_ReferenceKey(database="", schema="", identifier=""), + ) + AddRelation(relation=_CachedRelation()) + DropMissingRelation(relation=_ReferenceKey(database="", schema="", identifier="")) + DropCascade( + dropped=_ReferenceKey(database="", schema="", identifier=""), + consequences={_ReferenceKey(database="", schema="", identifier="")}, + ) + UpdateReference( + old_key=_ReferenceKey(database="", schema="", identifier=""), + new_key=_ReferenceKey(database="", schema="", identifier=""), + cached_key=_ReferenceKey(database="", schema="", identifier=""), + ) + TemporaryRelation(key=_ReferenceKey(database="", schema="", identifier="")) + RenameSchema( + old_key=_ReferenceKey(database="", schema="", identifier=""), + new_key=_ReferenceKey(database="", schema="", identifier="") + ) + DumpBeforeAddGraph(dump_callable) + DumpAfterAddGraph(dump_callable) + DumpBeforeRenameSchema(dump_callable) + DumpAfterRenameSchema(dump_callable) + AdapterImportError(ModuleNotFoundError()) + PluginLoadError() ReportPerformancePath(path='') GitSparseCheckoutSubdirectory(subdir='') GitProgressCheckoutRevision(revision='') diff --git a/test/integration/051_query_comments_test/test_query_comments.py b/test/integration/051_query_comments_test/test_query_comments.py index 6eed09f416a..4ada01aa991 100644 --- a/test/integration/051_query_comments_test/test_query_comments.py +++ b/test/integration/051_query_comments_test/test_query_comments.py @@ -2,6 +2,7 @@ import io import json import os +import re import dbt.exceptions from dbt.version import __version__ as dbt_version @@ -70,10 +71,12 @@ def run_get_json(self, expect_pass=True): return logs def query_comment(self, model_name, log): + # N.B: a temporary string replacement regex to strip the HH:MM:SS from the log line if present. + # TODO: make this go away when structured logging is stable + log_msg = re.sub("(?:[01]\d|2[0123]):(?:[012345]\d):(?:[012345]\d \| )", "", log['message']) prefix = 'On {}: '.format(model_name) - - if log['message'].startswith(prefix): - msg = log['message'][len(prefix):] + if log_msg.startswith(prefix): + msg = log_msg[len(prefix):] if msg in {'COMMIT', 'BEGIN', 'ROLLBACK'}: return None return msg From a40550b89d6bec5b94074f01f8fec33cf97ff3d1 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 8 Nov 2021 18:55:50 -0500 Subject: [PATCH 25/31] std logger for structured logging (#4231) structured logging powered by the stdlib logger Co-authored-by: Emily Rockman Co-authored-by: Ian Knox <81931810+iknox-fa@users.noreply.github.com> --- core/dbt/events/README.md | 13 + core/dbt/events/__init__.py | 1 + core/dbt/events/adapter_endpoint.py | 86 +++ core/dbt/events/functions.py | 249 +++++-- core/dbt/events/test_types.py | 22 +- core/dbt/events/types.py | 1013 ++++++++++++++------------- core/dbt/logger.py | 9 +- core/dbt/main.py | 3 +- 8 files changed, 835 insertions(+), 561 deletions(-) create mode 100644 core/dbt/events/adapter_endpoint.py diff --git a/core/dbt/events/README.md b/core/dbt/events/README.md index f3f765f34d6..a49fa070c66 100644 --- a/core/dbt/events/README.md +++ b/core/dbt/events/README.md @@ -7,3 +7,16 @@ The event module provides types that represent what is happening in dbt in `even # Adding a New Event In `events.types` add a new class that represents the new event. This may be a simple class with no values, or it may be a dataclass with some values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `CliEventABC`) as well as the loglevel this event belongs to. + +# Adapter Maintainers +To integrate existing log messages from adapters, you likely have a line of code like this in your adapter already: +```python +from dbt.logger import GLOBAL_LOGGER as logger +``` + +Simply change it to these two lines with your adapter's database name, and all your existing call sites will now use the new system for v1.0: +```python +from dbt.events import AdapterLogger +logger = AdapterLogger("") +# e.g. AdapterLogger("Snowflake") +``` diff --git a/core/dbt/events/__init__.py b/core/dbt/events/__init__.py index e69de29bb2d..d0fc24d7bb5 100644 --- a/core/dbt/events/__init__.py +++ b/core/dbt/events/__init__.py @@ -0,0 +1 @@ +from .adapter_endpoint import AdapterLogger # noqa: F401 diff --git a/core/dbt/events/adapter_endpoint.py b/core/dbt/events/adapter_endpoint.py new file mode 100644 index 00000000000..aad71080acc --- /dev/null +++ b/core/dbt/events/adapter_endpoint.py @@ -0,0 +1,86 @@ +from dataclasses import dataclass +from dbt.events.functions import fire_event +from dbt.events.types import ( + AdapterEventDebug, AdapterEventInfo, AdapterEventWarning, AdapterEventError +) +from typing import Any + + +@dataclass +class AdapterLogger(): + name: str + + def debug( + self, + msg: str, + exc_info: Any = None, + stack_info: Any = None, + extra: Any = None + ): + event = AdapterEventDebug(name=self.name, raw_msg=msg) + + event.exc_info = exc_info + event.stack_info = stack_info + event.extra = extra + + fire_event(event) + + def info( + self, + msg: str, + exc_info: Any = None, + stack_info: Any = None, + extra: Any = None + ): + event = AdapterEventInfo(name=self.name, raw_msg=msg) + + event.exc_info = exc_info + event.stack_info = stack_info + event.extra = extra + + fire_event(event) + + def warning( + self, + msg: str, + exc_info: Any = None, + stack_info: Any = None, + extra: Any = None + ): + event = AdapterEventWarning(name=self.name, raw_msg=msg) + + event.exc_info = exc_info + event.stack_info = stack_info + event.extra = extra + + fire_event(event) + + def error( + self, + msg: str, + exc_info: Any = None, + stack_info: Any = None, + extra: Any = None + ): + event = AdapterEventError(name=self.name, raw_msg=msg) + + event.exc_info = exc_info + event.stack_info = stack_info + event.extra = extra + + fire_event(event) + + def exception( + self, + msg: str, + exc_info: Any = True, # this default is what makes this method different + stack_info: Any = None, + extra: Any = None + ): + event = AdapterEventError(name=self.name, raw_msg=msg) + + event.exc_info = exc_info + event.stack_info = stack_info + event.extra = extra + + fire_event(event) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index a25bbd4d7ee..8a95bf301ed 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -1,16 +1,75 @@ -import dbt.logger as logger # type: ignore # TODO eventually remove dependency on this logger -from dbt.events.history import EVENT_HISTORY -from dbt.events.types import CliEventABC, Event, ShowException +from colorama import Style +import dbt.events.functions as this # don't worry I hate it too. +from dbt.events.types import Cli, Event, File, ShowException import dbt.flags as flags +# TODO this will need to move eventually +from dbt.logger import SECRET_ENV_PREFIX, make_log_dir_if_missing +import json +import logging +from logging import Logger +from logging.handlers import WatchedFileHandler import os -from typing import Generator, List +from typing import List + + +# create the global file logger with no configuration +global FILE_LOG +FILE_LOG = logging.getLogger('default_file') + +# set up logger to go to stdout with defaults +# setup_event_logger will be called once args have been parsed +global STDOUT_LOG +STDOUT_LOG = logging.getLogger('default_stdout') +STDOUT_LOG.setLevel(logging.INFO) +stdout_handler = logging.StreamHandler() +stdout_handler.setLevel(logging.INFO) +STDOUT_LOG.addHandler(stdout_handler) +global color +format_color = True +global json +format_json = False + + +def setup_event_logger(log_path): + make_log_dir_if_missing(log_path) + this.format_json = flags.LOG_FORMAT == 'json' + # USE_COLORS can be None if the app just started and the cli flags + # havent been applied yet + this.format_color = True if flags.USE_COLORS else False + # TODO this default should live somewhere better + log_dest = os.path.join('logs', 'dbt.log') + level = logging.DEBUG if flags.DEBUG else logging.INFO + + # overwrite the STDOUT_LOG logger with the configured one + this.STDOUT_LOG = logging.getLogger('configured_std_out') + this.STDOUT_LOG.setLevel(level) + + FORMAT = "%(message)s" + stdout_passthrough_formatter = logging.Formatter(fmt=FORMAT) + + stdout_handler = logging.StreamHandler() + stdout_handler.setFormatter(stdout_passthrough_formatter) + stdout_handler.setLevel(level) + this.STDOUT_LOG.addHandler(stdout_handler) + + # overwrite the FILE_LOG logger with the configured one + this.FILE_LOG = logging.getLogger('configured_file') + this.FILE_LOG.setLevel(logging.DEBUG) # always debug regardless of user input + + file_passthrough_formatter = logging.Formatter(fmt=FORMAT) + + # TODO log rotation is not handled by WatchedFileHandler + file_handler = WatchedFileHandler(filename=log_dest, encoding='utf8') + file_handler.setFormatter(file_passthrough_formatter) + file_handler.setLevel(logging.DEBUG) # always debug regardless of user input + this.FILE_LOG.addHandler(file_handler) def env_secrets() -> List[str]: return [ v for k, v in os.environ.items() - if k.startswith(logger.SECRET_ENV_PREFIX) + if k.startswith(SECRET_ENV_PREFIX) ] @@ -23,16 +82,102 @@ def scrub_secrets(msg: str, secrets: List[str]) -> str: return scrubbed -# this exists because some log messages are actually expensive to build. -# for example, many debug messages call `dump_graph()` and we don't want to -# do that in the event that those messages are never going to be sent to -# the user because we are only logging info-level events. -def gen_msg(e: CliEventABC) -> Generator[str, None, None]: - msg = None - if not msg: - msg = scrub_secrets(e.cli_msg(), env_secrets()) - while True: - yield msg +# translates an Event to a completely formatted output log_line +# json=True -> json formatting +# json=False -> text formatting +# cli=True -> cli formatting +# cli=False -> file formatting +def create_log_line(e: Event, json_fmt: bool, cli_dest: bool) -> str: + level = e.level_tag() + values: dict = { + 'pid': e.pid, + 'msg': '', + 'level': level if len(level) == 5 else f"{level} " + } + if cli_dest and isinstance(e, Cli): + values['msg'] = scrub_secrets(e.cli_msg(), env_secrets()) + elif not cli_dest and isinstance(e, File): + values['msg'] = scrub_secrets(e.file_msg(), env_secrets()) + + if json_fmt: + values['ts'] = e.ts.isoformat() + log_line = json.dumps(values, sort_keys=True) + else: + values['ts'] = e.ts.strftime("%H:%M:%S") + color_tag = '' if this.format_color else Style.RESET_ALL + log_line = f"{color_tag}{values['ts']} | [ {values['level']} ] | {values['msg']}" + + return log_line + + +# allows for resuse of this obnoxious if else tree. +# do not use for exceptions, it doesn't pass along exc_info, stack_info, or extra +def send_to_logger(l: Logger, level_tag: str, log_line: str): + if level_tag == 'test': + # TODO after implmenting #3977 send to new test level + l.debug(log_line) + elif level_tag == 'debug': + l.debug(log_line) + elif level_tag == 'info': + l.info(log_line) + elif level_tag == 'warn': + l.warning(log_line) + elif level_tag == 'error': + l.error(log_line) + else: + raise AssertionError( + f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}" + ) + + +def send_exc_to_logger( + l: Logger, + level_tag: str, + log_line: str, + exc_info=True, + stack_info=False, + extra=False +): + if level_tag == 'test': + # TODO after implmenting #3977 send to new test level + l.debug( + log_line, + exc_info=exc_info, + stack_info=stack_info, + extra=extra + ) + elif level_tag == 'debug': + l.debug( + log_line, + exc_info=exc_info, + stack_info=stack_info, + extra=extra + ) + elif level_tag == 'info': + l.info( + log_line, + exc_info=exc_info, + stack_info=stack_info, + extra=extra + ) + elif level_tag == 'warn': + l.warning( + log_line, + exc_info=exc_info, + stack_info=stack_info, + extra=extra + ) + elif level_tag == 'error': + l.error( + log_line, + exc_info=exc_info, + stack_info=stack_info, + extra=extra + ) + else: + raise AssertionError( + f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}" + ) # top-level method for accessing the new eventing system @@ -40,61 +185,29 @@ def gen_msg(e: CliEventABC) -> Generator[str, None, None]: # (i.e. - mutating the event history, printing to stdout, logging # to files, etc.) def fire_event(e: Event) -> None: - EVENT_HISTORY.append(e) - if isinstance(e, CliEventABC): - msg = gen_msg(e) - if e.level_tag() == 'test' and not isinstance(e, ShowException): - # TODO after implmenting #3977 send to new test level - logger.GLOBAL_LOGGER.debug(next(msg)) - elif e.level_tag() == 'test' and isinstance(e, ShowException): - # TODO after implmenting #3977 send to new test level - logger.GLOBAL_LOGGER.debug( - next(msg), - exc_info=e.exc_info, - stack_info=e.stack_info, - extra=e.extra - ) - # explicitly checking the debug flag here so that potentially expensive-to-construct - # log messages are not constructed if debug messages are never shown. - elif e.level_tag() == 'debug' and not flags.DEBUG: + # TODO manage history in phase 2: EVENT_HISTORY.append(e) + # explicitly checking the debug flag here so that potentially expensive-to-construct + # log messages are not constructed if debug messages are never shown. + + # always logs debug level regardless of user input + if isinstance(e, File): + log_line = create_log_line(e, json_fmt=this.format_json, cli_dest=False) + # doesn't send exceptions to exception logger + send_to_logger(FILE_LOG, level_tag=e.level_tag(), log_line=log_line) + + if isinstance(e, Cli): + if e.level_tag() == 'debug' and not flags.DEBUG: return # eat the message in case it was one of the expensive ones - elif e.level_tag() == 'debug' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.debug(next(msg)) - elif e.level_tag() == 'debug' and isinstance(e, ShowException): - logger.GLOBAL_LOGGER.debug( - next(msg), - exc_info=e.exc_info, - stack_info=e.stack_info, - extra=e.extra - ) - elif e.level_tag() == 'info' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.info(next(msg)) - elif e.level_tag() == 'info' and isinstance(e, ShowException): - logger.GLOBAL_LOGGER.info( - next(msg), - exc_info=e.exc_info, - stack_info=e.stack_info, - extra=e.extra - ) - elif e.level_tag() == 'warn' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.warning(next(msg)) - elif e.level_tag() == 'warn' and isinstance(e, ShowException): - logger.GLOBAL_LOGGER.warning( - next(msg), - exc_info=e.exc_info, - stack_info=e.stack_info, - extra=e.extra - ) - elif e.level_tag() == 'error' and not isinstance(e, ShowException): - logger.GLOBAL_LOGGER.error(next(msg)) - elif e.level_tag() == 'error' and isinstance(e, ShowException): - logger.GLOBAL_LOGGER.error( - next(msg), + log_line = create_log_line(e, json_fmt=this.format_json, cli_dest=True) + if not isinstance(e, ShowException): + send_to_logger(STDOUT_LOG, level_tag=e.level_tag(), log_line=log_line) + # CliEventABC and ShowException + else: + send_exc_to_logger( + STDOUT_LOG, + level_tag=e.level_tag(), + log_line=log_line, exc_info=e.exc_info, stack_info=e.stack_info, extra=e.extra ) - else: - raise AssertionError( - f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}" - ) diff --git a/core/dbt/events/test_types.py b/core/dbt/events/test_types.py index a0f36801670..1507faa8275 100644 --- a/core/dbt/events/test_types.py +++ b/core/dbt/events/test_types.py @@ -5,7 +5,7 @@ WarnLevel, ErrorLevel, ShowException, - CliEventABC + Cli ) @@ -13,42 +13,42 @@ # Reuse the existing messages when adding logs to tests. @dataclass -class IntegrationTestInfo(InfoLevel, CliEventABC): +class IntegrationTestInfo(InfoLevel, Cli): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Integration Test: {self.msg}" @dataclass -class IntegrationTestDebug(DebugLevel, CliEventABC): +class IntegrationTestDebug(DebugLevel, Cli): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Integration Test: {self.msg}" @dataclass -class IntegrationTestWarn(WarnLevel, CliEventABC): +class IntegrationTestWarn(WarnLevel, Cli): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Integration Test: {self.msg}" @dataclass -class IntegrationTestError(ErrorLevel, CliEventABC): +class IntegrationTestError(ErrorLevel, Cli): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Integration Test: {self.msg}" @dataclass -class IntegrationTestException(ShowException, ErrorLevel, CliEventABC): +class IntegrationTestException(ShowException, ErrorLevel, Cli): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Integration Test: {self.msg}" diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 733e74f5535..92a74c375f6 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -1,11 +1,13 @@ from abc import ABCMeta, abstractmethod import argparse from dataclasses import dataclass +from datetime import datetime from typing import Any, Callable, cast, Dict, List, Optional, Set, Union from dbt.events.stubs import _CachedRelation, AdapterResponse, BaseRelation, _ReferenceKey from dbt import ui from dbt.node_types import NodeType from dbt.events.format import format_fancy_output_line, pluralize +import os # types to represent log levels @@ -40,7 +42,7 @@ def level_tag(self) -> str: class ShowException(): # N.B.: # As long as we stick with the current convention of setting the member vars in the - # `cli_msg` method of subclasses, this is a safe operation. + # `message` method of subclasses, this is a safe operation. # If that ever changes we'll want to reassess. def __post_init__(self): self.exc_info: Any = True @@ -57,207 +59,251 @@ def __post_init__(self): # top-level superclass for all events class Event(metaclass=ABCMeta): + # fields that should be on all events with their default implementations + ts: datetime = datetime.now() + pid: int = os.getpid() + # code: int + # do not define this yourself. inherit it from one of the above level types. @abstractmethod def level_tag(self) -> str: raise Exception("level_tag not implemented for event") - -class CliEventABC(Event, metaclass=ABCMeta): # Solely the human readable message. Timestamps and formatting will be added by the logger. + # Must override yourself @abstractmethod - def cli_msg(self) -> str: - raise Exception("cli_msg not implemented for cli event") + def message(self) -> str: + raise Exception("msg not implemented for cli event") -class MainKeyboardInterrupt(InfoLevel, CliEventABC): +class File(Event, metaclass=ABCMeta): + # Solely the human readable message. Timestamps and formatting will be added by the logger. + def file_msg(self) -> str: + # returns the event msg unless overriden in the concrete class + return self.message() + + +class Cli(Event, metaclass=ABCMeta): + # Solely the human readable message. Timestamps and formatting will be added by the logger. def cli_msg(self) -> str: + # returns the event msg unless overriden in the concrete class + return self.message() + + +@dataclass +class AdapterEventBase(): + name: str + raw_msg: str + + def message(self) -> str: + return f"{self.name} adapter: {self.raw_msg}" + + +class AdapterEventDebug(DebugLevel, AdapterEventBase, Cli, File, ShowException): + pass + + +class AdapterEventInfo(InfoLevel, AdapterEventBase, Cli, File, ShowException): + pass + + +class AdapterEventWarning(WarnLevel, AdapterEventBase, Cli, File, ShowException): + pass + + +class AdapterEventError(ErrorLevel, AdapterEventBase, Cli, File, ShowException): + pass + + +class MainKeyboardInterrupt(InfoLevel, Cli): + def message(self) -> str: return "ctrl-c" +# will log to a file if the file logger is configured @dataclass -class MainEncounteredError(ErrorLevel, CliEventABC): +class MainEncounteredError(ErrorLevel, Cli): e: BaseException - def cli_msg(self) -> str: + def message(self) -> str: return f"Encountered an error:\n{str(self.e)}" @dataclass -class MainStackTrace(DebugLevel, CliEventABC): +class MainStackTrace(DebugLevel, Cli): stack_trace: str - def cli_msg(self) -> str: + def message(self) -> str: return self.stack_trace @dataclass -class MainReportVersion(InfoLevel, CliEventABC): +class MainReportVersion(InfoLevel, Cli, File): v: str # could be VersionSpecifier instead if we resolved some circular imports - def cli_msg(self): + def message(self): return f"Running with dbt{self.v}" @dataclass -class MainReportArgs(DebugLevel, CliEventABC): +class MainReportArgs(DebugLevel, Cli, File): args: argparse.Namespace - def cli_msg(self): + def message(self): return f"running dbt with arguments {str(self.args)}" @dataclass -class MainTrackingUserState(DebugLevel, CliEventABC): +class MainTrackingUserState(DebugLevel, Cli): user_state: str - def cli_msg(self): + def message(self): return f"Tracking: {self.user_state}" -class ParsingStart(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ParsingStart(InfoLevel, Cli, File): + def message(self) -> str: return "Start parsing." -class ParsingCompiling(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ParsingCompiling(InfoLevel, Cli, File): + def message(self) -> str: return "Compiling." -class ParsingWritingManifest(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ParsingWritingManifest(InfoLevel, Cli, File): + def message(self) -> str: return "Writing manifest." -class ParsingDone(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ParsingDone(InfoLevel, Cli, File): + def message(self) -> str: return "Done." -class ManifestDependenciesLoaded(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ManifestDependenciesLoaded(InfoLevel, Cli, File): + def message(self) -> str: return "Dependencies loaded" -class ManifestLoaderCreated(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ManifestLoaderCreated(InfoLevel, Cli, File): + def message(self) -> str: return "ManifestLoader created" -class ManifestLoaded(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ManifestLoaded(InfoLevel, Cli, File): + def message(self) -> str: return "Manifest loaded" -class ManifestChecked(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ManifestChecked(InfoLevel, Cli, File): + def message(self) -> str: return "Manifest checked" -class ManifestFlatGraphBuilt(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ManifestFlatGraphBuilt(InfoLevel, Cli, File): + def message(self) -> str: return "Flat graph built" @dataclass -class ReportPerformancePath(InfoLevel, CliEventABC): +class ReportPerformancePath(InfoLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Performance info: {self.path}" @dataclass -class GitSparseCheckoutSubdirectory(DebugLevel, CliEventABC): +class GitSparseCheckoutSubdirectory(DebugLevel, Cli, File): subdir: str - def cli_msg(self) -> str: + def message(self) -> str: return f" Subdirectory specified: {self.subdir}, using sparse checkout." @dataclass -class GitProgressCheckoutRevision(DebugLevel, CliEventABC): +class GitProgressCheckoutRevision(DebugLevel, Cli, File): revision: str - def cli_msg(self) -> str: + def message(self) -> str: return f" Checking out revision {self.revision}." @dataclass -class GitProgressUpdatingExistingDependency(DebugLevel, CliEventABC): +class GitProgressUpdatingExistingDependency(DebugLevel, Cli, File): dir: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Updating existing dependency {self.dir}." @dataclass -class GitProgressPullingNewDependency(DebugLevel, CliEventABC): +class GitProgressPullingNewDependency(DebugLevel, Cli, File): dir: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Pulling new dependency {self.dir}." @dataclass -class GitNothingToDo(DebugLevel, CliEventABC): +class GitNothingToDo(DebugLevel, Cli, File): sha: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Already at {self.sha}, nothing to do." @dataclass -class GitProgressUpdatedCheckoutRange(DebugLevel, CliEventABC): +class GitProgressUpdatedCheckoutRange(DebugLevel, Cli, File): start_sha: str end_sha: str - def cli_msg(self) -> str: + def message(self) -> str: return f" Updated checkout from {self.start_sha} to {self.end_sha}." @dataclass -class GitProgressCheckedOutAt(DebugLevel, CliEventABC): +class GitProgressCheckedOutAt(DebugLevel, Cli, File): end_sha: str - def cli_msg(self) -> str: + def message(self) -> str: return f" Checked out at {self.end_sha}." @dataclass -class RegistryProgressMakingGETRequest(DebugLevel, CliEventABC): +class RegistryProgressMakingGETRequest(DebugLevel, Cli, File): url: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Making package registry request: GET {self.url}" @dataclass -class RegistryProgressGETResponse(DebugLevel, CliEventABC): +class RegistryProgressGETResponse(DebugLevel, Cli, File): url: str resp_code: int - def cli_msg(self) -> str: + def message(self) -> str: return f"Response from registry: GET {self.url} {self.resp_code}" # TODO this was actually `logger.exception(...)` not `logger.error(...)` @dataclass -class SystemErrorRetrievingModTime(ErrorLevel, CliEventABC): +class SystemErrorRetrievingModTime(ErrorLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Error retrieving modification time for file {self.path}" @dataclass -class SystemCouldNotWrite(DebugLevel, CliEventABC): +class SystemCouldNotWrite(DebugLevel, Cli, File): path: str reason: str exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return ( f"Could not write to path {self.path}({len(self.path)} characters): " f"{self.reason}\nexception: {self.exc}" @@ -265,44 +311,72 @@ def cli_msg(self) -> str: @dataclass -class SystemExecutingCmd(DebugLevel, CliEventABC): +class SystemExecutingCmd(DebugLevel, Cli, File): cmd: List[str] - def cli_msg(self) -> str: + def message(self) -> str: return f'Executing "{" ".join(self.cmd)}"' @dataclass -class SystemStdOutMsg(DebugLevel, CliEventABC): +class SystemStdOutMsg(DebugLevel, Cli, File): bmsg: bytes - def cli_msg(self) -> str: + def message(self) -> str: return f'STDOUT: "{str(self.bmsg)}"' @dataclass -class SystemStdErrMsg(DebugLevel, CliEventABC): +class SystemStdErrMsg(DebugLevel, Cli, File): bmsg: bytes - def cli_msg(self) -> str: + def message(self) -> str: return f'STDERR: "{str(self.bmsg)}"' @dataclass -class SystemReportReturnCode(DebugLevel, CliEventABC): +class SystemReportReturnCode(DebugLevel, Cli, File): code: int - def cli_msg(self) -> str: + def message(self) -> str: return f"command return code={self.code}" @dataclass -class SelectorReportInvalidSelector(InfoLevel, CliEventABC): +class SelectorAlertUpto3UnusedNodes(InfoLevel, Cli, File): + node_names: List[str] + + def message(self) -> str: + summary_nodes_str = ("\n - ").join(self.node_names[:3]) + and_more_str = ( + f"\n - and {len(self.node_names) - 3} more" if len(self.node_names) > 4 else "" + ) + return ( + f"\nSome tests were excluded because at least one parent is not selected. " + f"Use the --greedy flag to include them." + f"\n - {summary_nodes_str}{and_more_str}" + ) + + +@dataclass +class SelectorAlertAllUnusedNodes(DebugLevel, Cli, File): + node_names: List[str] + + def message(self) -> str: + debug_nodes_str = ("\n - ").join(self.node_names) + return ( + f"Full list of tests that were excluded:" + f"\n - {debug_nodes_str}" + ) + + +@dataclass +class SelectorReportInvalidSelector(InfoLevel, Cli, File): selector_methods: dict spec_method: str raw_spec: str - def cli_msg(self) -> str: + def message(self) -> str: valid_selectors = ", ".join(self.selector_methods) return ( f"The '{self.spec_method}' selector specified in {self.raw_spec} is " @@ -311,95 +385,95 @@ def cli_msg(self) -> str: @dataclass -class MacroEventDebug(DebugLevel, CliEventABC): +class MacroEventInfo(InfoLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return self.msg @dataclass -class MacroEventInfo(InfoLevel, CliEventABC): +class MacroEventDebug(DebugLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return self.msg @dataclass -class NewConnection(DebugLevel, CliEventABC): +class NewConnection(DebugLevel, Cli, File): conn_type: str conn_name: str - def cli_msg(self) -> str: + def message(self) -> str: return f'Acquiring new {self.conn_type} connection "{self.conn_name}"' @dataclass -class ConnectionReused(DebugLevel, CliEventABC): +class ConnectionReused(DebugLevel, Cli, File): conn_name: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Re-using an available connection from the pool (formerly {self.conn_name})" @dataclass -class ConnectionLeftOpen(DebugLevel, CliEventABC): +class ConnectionLeftOpen(DebugLevel, Cli, File): conn_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f"Connection '{self.conn_name}' was left open." @dataclass -class ConnectionClosed(DebugLevel, CliEventABC): +class ConnectionClosed(DebugLevel, Cli, File): conn_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f"Connection '{self.conn_name}' was properly closed." @dataclass -class RollbackFailed(ShowException, DebugLevel, CliEventABC): +class RollbackFailed(ShowException, DebugLevel, Cli, File): conn_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f"Failed to rollback '{self.conn_name}'" # TODO: can we combine this with ConnectionClosed? @dataclass -class ConnectionClosed2(DebugLevel, CliEventABC): +class ConnectionClosed2(DebugLevel, Cli, File): conn_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f"On {self.conn_name}: Close" # TODO: can we combine this with ConnectionLeftOpen? @dataclass -class ConnectionLeftOpen2(DebugLevel, CliEventABC): +class ConnectionLeftOpen2(DebugLevel, Cli, File): conn_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f"On {self.conn_name}: No close available on handle" @dataclass -class Rollback(DebugLevel, CliEventABC): +class Rollback(DebugLevel, Cli, File): conn_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f"On {self.conn_name}: ROLLBACK" @dataclass -class CacheMiss(DebugLevel, CliEventABC): +class CacheMiss(DebugLevel, Cli, File): conn_name: Any # TODO mypy says this is `Callable[[], str]`?? ¯\_(ツ)_/¯ database: Optional[str] schema: str - def cli_msg(self) -> str: + def message(self) -> str: return ( f'On "{self.conn_name}": cache miss for schema ' '"{self.database}.{self.schema}", this is inefficient' @@ -407,84 +481,84 @@ def cli_msg(self) -> str: @dataclass -class ListRelations(DebugLevel, CliEventABC): +class ListRelations(DebugLevel, Cli, File): database: Optional[str] schema: str relations: List[BaseRelation] - def cli_msg(self) -> str: + def message(self) -> str: return f"with database={self.database}, schema={self.schema}, relations={self.relations}" @dataclass -class ConnectionUsed(DebugLevel, CliEventABC): +class ConnectionUsed(DebugLevel, Cli, File): conn_type: str conn_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f'Using {self.conn_type} connection "{self.conn_name}"' @dataclass -class SQLQuery(DebugLevel, CliEventABC): +class SQLQuery(DebugLevel, Cli, File): conn_name: Optional[str] sql: str - def cli_msg(self) -> str: + def message(self) -> str: return f"On {self.conn_name}: {self.sql}" @dataclass -class SQLQueryStatus(DebugLevel, CliEventABC): +class SQLQueryStatus(DebugLevel, Cli, File): status: Union[AdapterResponse, str] elapsed: float - def cli_msg(self) -> str: + def message(self) -> str: return f"SQL status: {self.status} in {self.elapsed} seconds" @dataclass -class SQLCommit(DebugLevel, CliEventABC): +class SQLCommit(DebugLevel, Cli, File): conn_name: str - def cli_msg(self) -> str: + def message(self) -> str: return f"On {self.conn_name}: COMMIT" @dataclass -class ColTypeChange(DebugLevel, CliEventABC): +class ColTypeChange(DebugLevel, Cli, File): orig_type: str new_type: str table: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Changing col type from {self.orig_type} to {self.new_type} in table {self.table}" @dataclass -class SchemaCreation(DebugLevel, CliEventABC): +class SchemaCreation(DebugLevel, Cli, File): relation: BaseRelation - def cli_msg(self) -> str: + def message(self) -> str: return f'Creating schema "{self.relation}"' @dataclass -class SchemaDrop(DebugLevel, CliEventABC): +class SchemaDrop(DebugLevel, Cli, File): relation: BaseRelation - def cli_msg(self) -> str: + def message(self) -> str: return f'Dropping schema "{self.relation}".' # TODO pretty sure this is only ever called in dead code # see: core/dbt/adapters/cache.py _add_link vs add_link @dataclass -class UncachedRelation(DebugLevel, CliEventABC): +class UncachedRelation(DebugLevel, Cli, File): dep_key: _ReferenceKey ref_key: _ReferenceKey - def cli_msg(self) -> str: + def message(self) -> str: return ( f"{self.dep_key} references {str(self.ref_key)} " "but {self.ref_key.database}.{self.ref_key.schema}" @@ -493,80 +567,80 @@ def cli_msg(self) -> str: @dataclass -class AddLink(DebugLevel, CliEventABC): +class AddLink(DebugLevel, Cli, File): dep_key: _ReferenceKey ref_key: _ReferenceKey - def cli_msg(self) -> str: + def message(self) -> str: return f"adding link, {self.dep_key} references {self.ref_key}" @dataclass -class AddRelation(DebugLevel, CliEventABC): +class AddRelation(DebugLevel, Cli, File): relation: _CachedRelation - def cli_msg(self) -> str: + def message(self) -> str: return f"Adding relation: {str(self.relation)}" @dataclass -class DropMissingRelation(DebugLevel, CliEventABC): +class DropMissingRelation(DebugLevel, Cli, File): relation: _ReferenceKey - def cli_msg(self) -> str: + def message(self) -> str: return f"dropped a nonexistent relationship: {str(self.relation)}" @dataclass -class DropCascade(DebugLevel, CliEventABC): +class DropCascade(DebugLevel, Cli, File): dropped: _ReferenceKey consequences: Set[_ReferenceKey] - def cli_msg(self) -> str: + def message(self) -> str: return f"drop {self.dropped} is cascading to {self.consequences}" @dataclass -class DropRelation(DebugLevel, CliEventABC): +class DropRelation(DebugLevel, Cli, File): dropped: _ReferenceKey - def cli_msg(self) -> str: + def message(self) -> str: return f"Dropping relation: {self.dropped}" @dataclass -class UpdateReference(DebugLevel, CliEventABC): +class UpdateReference(DebugLevel, Cli, File): old_key: _ReferenceKey new_key: _ReferenceKey cached_key: _ReferenceKey - def cli_msg(self) -> str: + def message(self) -> str: return f"updated reference from {self.old_key} -> {self.cached_key} to "\ "{self.new_key} -> {self.cached_key}" @dataclass -class TemporaryRelation(DebugLevel, CliEventABC): +class TemporaryRelation(DebugLevel, Cli, File): key: _ReferenceKey - def cli_msg(self) -> str: + def message(self) -> str: return f"old key {self.key} not found in self.relations, assuming temporary" @dataclass -class RenameSchema(DebugLevel, CliEventABC): +class RenameSchema(DebugLevel, Cli, File): old_key: _ReferenceKey new_key: _ReferenceKey - def cli_msg(self) -> str: + def message(self) -> str: return f"Renaming relation {self.old_key} to {self.new_key}" @dataclass -class DumpBeforeAddGraph(DebugLevel, CliEventABC): +class DumpBeforeAddGraph(DebugLevel, Cli, File): graph_func: Callable[[], Dict[str, List[str]]] - def cli_msg(self) -> str: + def message(self) -> str: # workaround for https://github.com/python/mypy/issues/6910 # TODO remove when we've upgraded to a mypy version without that bug func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) @@ -574,118 +648,107 @@ def cli_msg(self) -> str: @dataclass -class DumpAfterAddGraph(DebugLevel, CliEventABC): +class DumpAfterAddGraph(DebugLevel, Cli, File): graph_func: Callable[[], Dict[str, List[str]]] - def cli_msg(self) -> str: + def message(self) -> str: # workaround for https://github.com/python/mypy/issues/6910 func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) return f"after adding: {func_returns}" @dataclass -class DumpBeforeRenameSchema(DebugLevel, CliEventABC): +class DumpBeforeRenameSchema(DebugLevel, Cli, File): graph_func: Callable[[], Dict[str, List[str]]] - def cli_msg(self) -> str: + def message(self) -> str: # workaround for https://github.com/python/mypy/issues/6910 func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) return f"before rename: {func_returns}" @dataclass -class DumpAfterRenameSchema(DebugLevel, CliEventABC): +class DumpAfterRenameSchema(DebugLevel, Cli, File): graph_func: Callable[[], Dict[str, List[str]]] - def cli_msg(self) -> str: + def message(self) -> str: # workaround for https://github.com/python/mypy/issues/6910 func_returns = cast(Callable[[], Dict[str, List[str]]], getattr(self, "graph_func")) return f"after rename: {func_returns}" @dataclass -class AdapterImportError(InfoLevel, CliEventABC): +class AdapterImportError(InfoLevel, Cli, File): exc: ModuleNotFoundError - def cli_msg(self) -> str: + def message(self) -> str: return f"Error importing adapter: {self.exc}" @dataclass -class PluginLoadError(ShowException, DebugLevel, CliEventABC): - def cli_msg(self): +class PluginLoadError(ShowException, DebugLevel, Cli, File): + def message(self): pass -# since mypy doesn't run on every file we need to suggest to mypy that every -# class gets instantiated. But we don't actually want to run this code. -# making the conditional `if False` causes mypy to skip it as dead code so -# we need to skirt around that by computing something it doesn't check statically. -# -# TODO remove these lines once we run mypy everywhere. - -def dump_callable(): - return {"": [""]} # for instantiating `Dump...` methods which take callables. - - @dataclass -class NewConnectionOpening(DebugLevel, CliEventABC): +class NewConnectionOpening(DebugLevel, Cli, File): connection_state: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Opening a new connection, currently in state {self.connection_state}" -class TimingInfoCollected(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class TimingInfoCollected(DebugLevel, Cli, File): + def message(self) -> str: return "finished collecting timing info" @dataclass -class MergedFromState(DebugLevel, CliEventABC): +class MergedFromState(DebugLevel, Cli, File): nbr_merged: int sample: List - def cli_msg(self) -> str: + def message(self) -> str: return f"Merged {self.nbr_merged} items from state (sample: {self.sample})" @dataclass -class MissingProfileTarget(InfoLevel, CliEventABC): +class MissingProfileTarget(InfoLevel, Cli, File): profile_name: str target_name: str - def cli_msg(self) -> str: + def message(self) -> str: return f"target not specified in profile '{self.profile_name}', using '{self.target_name}'" @dataclass -class ProfileLoadError(ShowException, DebugLevel, CliEventABC): +class ProfileLoadError(ShowException, DebugLevel, Cli, File): exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return f"Profile not loaded due to error: {self.exc}" @dataclass -class ProfileNotFound(InfoLevel, CliEventABC): +class ProfileNotFound(InfoLevel, Cli, File): profile_name: Optional[str] - def cli_msg(self) -> str: + def message(self) -> str: return f'No profile "{self.profile_name}" found, continuing with no target' -class InvalidVarsYAML(ErrorLevel, CliEventABC): - def cli_msg(self) -> str: +class InvalidVarsYAML(ErrorLevel, Cli, File): + def message(self) -> str: return "The YAML provided in the --vars argument is not valid.\n" @dataclass -class CatchRunException(ShowException, DebugLevel, CliEventABC): +class CatchRunException(ShowException, DebugLevel, Cli, File): build_path: Any exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: INTERNAL_ERROR_STRING = """This is an error in dbt. Please try again. If the \ error persists, open an issue at https://github.com/dbt-labs/dbt-core """.strip() @@ -699,20 +762,20 @@ def cli_msg(self) -> str: @dataclass -class HandleInternalException(ShowException, DebugLevel, CliEventABC): +class HandleInternalException(ShowException, DebugLevel, Cli, File): exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return str(self.exc) @dataclass -class MessageHandleGenericException(ErrorLevel, CliEventABC): +class MessageHandleGenericException(ErrorLevel, Cli, File): build_path: str unique_id: str exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: node_description = self.build_path if node_description is None: node_description = self.unique_id @@ -724,207 +787,217 @@ def cli_msg(self) -> str: @dataclass -class DetailsHandleGenericException(ShowException, DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class DetailsHandleGenericException(ShowException, DebugLevel, Cli, File): + def message(self) -> str: return '' @dataclass -class GenericTestFileParse(DebugLevel, CliEventABC): +class GenericTestFileParse(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Parsing {self.path}" @dataclass -class MacroFileParse(DebugLevel, CliEventABC): +class MacroFileParse(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Parsing {self.path}" -class PartialParsingFullReparseBecauseOfError(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingFullReparseBecauseOfError(InfoLevel, Cli, File): + def message(self) -> str: return "Partial parsing enabled but an error occurred. Switching to a full re-parse." @dataclass -class PartialParsingExceptionFile(DebugLevel, CliEventABC): +class PartialParsingExceptionFile(DebugLevel, Cli, File): file: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing exception processing file {self.file}" @dataclass -class PartialParsingFile(DebugLevel, CliEventABC): +class PartialParsingFile(DebugLevel, Cli, File): file_dict: Dict - def cli_msg(self) -> str: + def message(self) -> str: return f"PP file: {self.file_dict}" @dataclass -class PartialParsingException(DebugLevel, CliEventABC): +class PartialParsingException(DebugLevel, Cli, File): exc_info: Dict - def cli_msg(self) -> str: + def message(self) -> str: return f"PP exception info: {self.exc_info}" -class PartialParsingSkipParsing(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingSkipParsing(DebugLevel, Cli, File): + def message(self) -> str: return "Partial parsing enabled, no changes found, skipping parsing" -class PartialParsingMacroChangeStartFullParse(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingMacroChangeStartFullParse(InfoLevel, Cli, File): + def message(self) -> str: return "Change detected to override macro used during parsing. Starting full parse." +class PartialParsingProjectEnvVarsChanged(InfoLevel, Cli, File): + def message(self) -> str: + return "Unable to do partial parsing because env vars used in dbt_project.yml have changed" + + +class PartialParsingProfileEnvVarsChanged(InfoLevel, Cli, File): + def message(self) -> str: + return "Unable to do partial parsing because env vars used in profiles.yml have changed" + + @dataclass -class ManifestWrongMetadataVersion(DebugLevel, CliEventABC): +class ManifestWrongMetadataVersion(DebugLevel, Cli, File): version: str - def cli_msg(self) -> str: + def message(self) -> str: return ("Manifest metadata did not contain correct version. " f"Contained '{self.version}' instead.") @dataclass -class PartialParsingVersionMismatch(InfoLevel, CliEventABC): +class PartialParsingVersionMismatch(InfoLevel, Cli, File): saved_version: str current_version: str - def cli_msg(self) -> str: + def message(self) -> str: return ("Unable to do partial parsing because of a dbt version mismatch. " f"Saved manifest version: {self.saved_version}. " f"Current version: {self.current_version}.") -class PartialParsingFailedBecauseConfigChange(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingFailedBecauseConfigChange(InfoLevel, Cli, File): + def message(self) -> str: return ("Unable to do partial parsing because config vars, " "config profile, or config target have changed") -class PartialParsingFailedBecauseProfileChange(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingFailedBecauseProfileChange(InfoLevel, Cli, File): + def message(self) -> str: return ("Unable to do partial parsing because profile has changed") -class PartialParsingFailedBecauseNewProjectDependency(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingFailedBecauseNewProjectDependency(InfoLevel, Cli, File): + def message(self) -> str: return ("Unable to do partial parsing because a project dependency has been added") -class PartialParsingFailedBecauseHashChanged(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingFailedBecauseHashChanged(InfoLevel, Cli, File): + def message(self) -> str: return ("Unable to do partial parsing because a project config has changed") -class PartialParsingNotEnabled(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParsingNotEnabled(DebugLevel, Cli, File): + def message(self) -> str: return ("Partial parsing not enabled") @dataclass -class ParsedFileLoadFailed(ShowException, DebugLevel, CliEventABC): +class ParsedFileLoadFailed(ShowException, DebugLevel, Cli, File): path: str exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return f"Failed to load parsed file from disk at {self.path}: {self.exc}" -class PartialParseSaveFileNotFound(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class PartialParseSaveFileNotFound(InfoLevel, Cli, File): + def message(self) -> str: return ("Partial parse save file not found. Starting full parse.") @dataclass -class StaticParserCausedJinjaRendering(DebugLevel, CliEventABC): +class StaticParserCausedJinjaRendering(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1605: jinja rendering because of STATIC_PARSER flag. file: {self.path}" # TODO: Experimental/static parser uses these for testing and some may be a good use case for # the `TestLevel` logger once we implement it. Some will probably stay `DebugLevel`. @dataclass -class UsingExperimentalParser(DebugLevel, CliEventABC): +class UsingExperimentalParser(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1610: conducting experimental parser sample on {self.path}" @dataclass -class SampleFullJinjaRendering(DebugLevel, CliEventABC): +class SampleFullJinjaRendering(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1611: conducting full jinja rendering sample on {self.path}" @dataclass -class StaticParserFallbackJinjaRendering(DebugLevel, CliEventABC): +class StaticParserFallbackJinjaRendering(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1602: parser fallback to jinja rendering on {self.path}" @dataclass -class StaticParsingMacroOverrideDetected(DebugLevel, CliEventABC): +class StaticParsingMacroOverrideDetected(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1601: detected macro override of ref/source/config in the scope of {self.path}" @dataclass -class StaticParserSuccess(DebugLevel, CliEventABC): +class StaticParserSuccess(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1699: static parser successfully parsed {self.path}" @dataclass -class StaticParserFailure(DebugLevel, CliEventABC): +class StaticParserFailure(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1603: static parser failed on {self.path}" @dataclass -class ExperimentalParserSuccess(DebugLevel, CliEventABC): +class ExperimentalParserSuccess(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1698: experimental parser successfully parsed {self.path}" @dataclass -class ExperimentalParserFailure(DebugLevel, CliEventABC): +class ExperimentalParserFailure(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"1604: experimental parser failed on {self.path}" @dataclass -class PartialParsingEnabled(DebugLevel, CliEventABC): +class PartialParsingEnabled(DebugLevel, Cli, File): deleted: int added: int changed: int - def cli_msg(self) -> str: + def message(self) -> str: return (f"Partial parsing enabled: " f"{self.deleted} files deleted, " f"{self.added} files added, " @@ -932,165 +1005,155 @@ def cli_msg(self) -> str: @dataclass -class PartialParsingAddedFile(DebugLevel, CliEventABC): +class PartialParsingAddedFile(DebugLevel, Cli, File): file_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: added file: {self.file_id}" @dataclass -class PartialParsingDeletedFile(DebugLevel, CliEventABC): +class PartialParsingDeletedFile(DebugLevel, Cli, File): file_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: deleted file: {self.file_id}" @dataclass -class PartialParsingUpdatedFile(DebugLevel, CliEventABC): +class PartialParsingUpdatedFile(DebugLevel, Cli, File): file_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: updated file: {self.file_id}" @dataclass -class PartialParsingNodeMissingInSourceFile(DebugLevel, CliEventABC): +class PartialParsingNodeMissingInSourceFile(DebugLevel, Cli, File): source_file: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: node not found for source_file {self.source_file}" @dataclass -class PartialParsingMissingNodes(DebugLevel, CliEventABC): +class PartialParsingMissingNodes(DebugLevel, Cli, File): file_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"No nodes found for source file {self.file_id}" @dataclass -class PartialParsingChildMapMissingUniqueID(DebugLevel, CliEventABC): +class PartialParsingChildMapMissingUniqueID(DebugLevel, Cli, File): unique_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: {self.unique_id} not found in child_map" @dataclass -class PartialParsingUpdateSchemaFile(DebugLevel, CliEventABC): +class PartialParsingUpdateSchemaFile(DebugLevel, Cli, File): file_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: update schema file: {self.file_id}" @dataclass -class PartialParsingDeletedSource(DebugLevel, CliEventABC): +class PartialParsingDeletedSource(DebugLevel, Cli, File): unique_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: deleted source {self.unique_id}" @dataclass -class PartialParsingDeletedExposure(DebugLevel, CliEventABC): +class PartialParsingDeletedExposure(DebugLevel, Cli, File): unique_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Partial parsing: deleted exposure {self.unique_id}" -class PartialParsingProjectEnvVarsChanged(InfoLevel, CliEventABC): - def cli_msg(self) -> str: - return "Unable to do partial parsing because env vars used in dbt_project.yml have changed" - - -class PartialParsingProfileEnvVarsChanged(InfoLevel, CliEventABC): - def cli_msg(self) -> str: - return "Unable to do partial parsing because env vars used in profiles.yml have changed" - - @dataclass -class InvalidDisabledSourceInTestNode(WarnLevel, CliEventABC): +class InvalidDisabledSourceInTestNode(WarnLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return ui.warning_tag(self.msg) @dataclass -class InvalidRefInTestNode(WarnLevel, CliEventABC): +class InvalidRefInTestNode(WarnLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return ui.warning_tag(self.msg) @dataclass -class RunningOperationCaughtError(ErrorLevel, CliEventABC): +class RunningOperationCaughtError(ErrorLevel, Cli, File): exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return f'Encountered an error while running operation: {self.exc}' @dataclass -class RunningOperationUncaughtError(ErrorLevel, CliEventABC): +class RunningOperationUncaughtError(ErrorLevel, Cli, File): exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return f'Encountered an error while running operation: {self.exc}' -class DbtProjectError(ErrorLevel, CliEventABC): - def cli_msg(self) -> str: +class DbtProjectError(ErrorLevel, Cli, File): + def message(self) -> str: return "Encountered an error while reading the project:" @dataclass -class DbtProjectErrorException(ErrorLevel, CliEventABC): +class DbtProjectErrorException(ErrorLevel, Cli, File): exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return f" ERROR: {str(self.exc)}" -class DbtProfileError(ErrorLevel, CliEventABC): - def cli_msg(self) -> str: +class DbtProfileError(ErrorLevel, Cli, File): + def message(self) -> str: return "Encountered an error while reading profiles:" @dataclass -class DbtProfileErrorException(ErrorLevel, CliEventABC): +class DbtProfileErrorException(ErrorLevel, Cli, File): exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return f" ERROR: {str(self.exc)}" -class ProfileListTitle(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ProfileListTitle(InfoLevel, Cli, File): + def message(self) -> str: return "Defined profiles:" @dataclass -class ListSingleProfile(InfoLevel, CliEventABC): +class ListSingleProfile(InfoLevel, Cli, File): profile: str - def cli_msg(self) -> str: + def message(self) -> str: return f" - {self.profile}" -class NoDefinedProfiles(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class NoDefinedProfiles(InfoLevel, Cli, File): + def message(self) -> str: return "There are no profiles defined in your profiles.yml file" -class ProfileHelpMessage(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ProfileHelpMessage(InfoLevel, Cli, File): + def message(self) -> str: PROFILES_HELP_MESSAGE = """ For more information on configuring profiles, please consult the dbt docs: @@ -1100,19 +1163,19 @@ def cli_msg(self) -> str: @dataclass -class CatchableExceptionOnRun(ShowException, DebugLevel, CliEventABC): +class CatchableExceptionOnRun(ShowException, DebugLevel, Cli, File): exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return str(self.exc) @dataclass -class InternalExceptionOnRun(DebugLevel, CliEventABC): +class InternalExceptionOnRun(DebugLevel, Cli, File): build_path: str exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: prefix = 'Internal error executing {}'.format(self.build_path) INTERNAL_ERROR_STRING = """This is an error in dbt. Please try again. If \ @@ -1129,18 +1192,18 @@ def cli_msg(self) -> str: # This prints the stack trace at the debug level while allowing just the nice exception message # at the error level - or whatever other level chosen. Used in multiple places. @dataclass -class PrintDebugStackTrace(ShowException, DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class PrintDebugStackTrace(ShowException, DebugLevel, Cli, File): + def message(self) -> str: return "" @dataclass -class GenericExceptionOnRun(ErrorLevel, CliEventABC): +class GenericExceptionOnRun(ErrorLevel, Cli, File): build_path: str unique_id: str exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: node_description = self.build_path if node_description is None: node_description = self.unique_id @@ -1152,50 +1215,50 @@ def cli_msg(self) -> str: @dataclass -class NodeConnectionReleaseError(ShowException, DebugLevel, CliEventABC): +class NodeConnectionReleaseError(ShowException, DebugLevel, Cli, File): node_name: str exc: Exception - def cli_msg(self) -> str: + def message(self) -> str: return ('Error releasing connection for node {}: {!s}' .format(self.node_name, self.exc)) @dataclass -class CheckCleanPath(InfoLevel, CliEventABC): +class CheckCleanPath(InfoLevel, Cli): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Checking {self.path}/*" @dataclass -class ConfirmCleanPath(InfoLevel, CliEventABC): +class ConfirmCleanPath(InfoLevel, Cli): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Cleaned {self.path}/*" @dataclass -class ProtectedCleanPath(InfoLevel, CliEventABC): +class ProtectedCleanPath(InfoLevel, Cli): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"ERROR: not cleaning {self.path}/* because it is protected" -class FinishedCleanPaths(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class FinishedCleanPaths(InfoLevel, Cli): + def message(self) -> str: return "Finished cleaning all paths." @dataclass -class OpenCommand(InfoLevel, CliEventABC): +class OpenCommand(InfoLevel, Cli, File): open_cmd: str profiles_dir: str - def cli_msg(self) -> str: + def message(self) -> str: PROFILE_DIR_MESSAGE = """To view your profiles.yml file, run: {open_cmd} {profiles_dir}""" @@ -1207,252 +1270,260 @@ def cli_msg(self) -> str: return message -class DepsNoPackagesFound(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class DepsNoPackagesFound(InfoLevel, Cli, File): + def message(self) -> str: return 'Warning: No packages were found in packages.yml' @dataclass -class DepsStartPackageInstall(InfoLevel, CliEventABC): +class DepsStartPackageInstall(InfoLevel, Cli, File): package: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Installing {self.package}" @dataclass -class DepsInstallInfo(InfoLevel, CliEventABC): +class DepsInstallInfo(InfoLevel, Cli, File): version_name: str - def cli_msg(self) -> str: + def message(self) -> str: return f" Installed from {self.version_name}" @dataclass -class DepsUpdateAvailable(InfoLevel, CliEventABC): +class DepsUpdateAvailable(InfoLevel, Cli, File): version_latest: str - def cli_msg(self) -> str: + def message(self) -> str: return f" Updated version available: {self.version_latest}" -class DepsUTD(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class DepsUTD(InfoLevel, Cli, File): + def message(self) -> str: return " Up to date!" @dataclass -class DepsListSubdirectory(InfoLevel, CliEventABC): +class DepsListSubdirectory(InfoLevel, Cli, File): subdirectory: str - def cli_msg(self) -> str: + def message(self) -> str: return f" and subdirectory {self.subdirectory}" @dataclass -class DepsNotifyUpdatesAvailable(InfoLevel, CliEventABC): +class DepsNotifyUpdatesAvailable(InfoLevel, Cli, File): packages: List[str] - def cli_msg(self) -> str: + def message(self) -> str: return ('\nUpdates available for packages: {} \ \nUpdate your versions in packages.yml, then run dbt deps'.format(self.packages)) @dataclass -class DatabaseErrorRunning(InfoLevel, CliEventABC): +class DatabaseErrorRunning(InfoLevel, Cli, File): hook_type: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Database error while running {self.hook_type}" -class EmptyLine(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class EmptyLine(InfoLevel, Cli, File): + def message(self) -> str: return '' @dataclass -class HooksRunning(InfoLevel, CliEventABC): +class HooksRunning(InfoLevel, Cli, File): num_hooks: int hook_type: str - def cli_msg(self) -> str: + def message(self) -> str: plural = 'hook' if self.num_hooks == 1 else 'hooks' return f"Running {self.num_hooks} {self.hook_type} {plural}" @dataclass -class HookFinished(InfoLevel, CliEventABC): +class HookFinished(InfoLevel, Cli, File): stat_line: str execution: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Finished running {self.stat_line}{self.execution}." @dataclass -class WriteCatalogFailure(ErrorLevel, CliEventABC): +class WriteCatalogFailure(ErrorLevel, Cli, File): num_exceptions: int - def cli_msg(self) -> str: + def message(self) -> str: return (f"dbt encountered {self.num_exceptions} failure{(self.num_exceptions != 1) * 's'} " "while writing the catalog") @dataclass -class CatalogWritten(InfoLevel, CliEventABC): +class CatalogWritten(InfoLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Catalog written to {self.path}" -class CannotGenerateDocs(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class CannotGenerateDocs(InfoLevel, Cli, File): + def message(self) -> str: return "compile failed, cannot generate docs" -class BuildingCatalog(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class BuildingCatalog(InfoLevel, Cli, File): + def message(self) -> str: return "Building catalog" -class CompileComplete(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class CompileComplete(InfoLevel, Cli, File): + def message(self) -> str: return "Done." -class FreshnessCheckComplete(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class FreshnessCheckComplete(InfoLevel, Cli, File): + def message(self) -> str: return "Done." @dataclass -class ServingDocsPort(InfoLevel, CliEventABC): +class ServingDocsPort(InfoLevel, Cli, File): address: str port: int - def cli_msg(self) -> str: + def message(self) -> str: return f"Serving docs at {self.address}:{self.port}" @dataclass -class ServingDocsAccessInfo(InfoLevel, CliEventABC): +class ServingDocsAccessInfo(InfoLevel, Cli, File): port: str - def cli_msg(self) -> str: + def message(self) -> str: return f"To access from your browser, navigate to: http://localhost:{self.port}" -class ServingDocsExitInfo(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class ServingDocsExitInfo(InfoLevel, Cli, File): + def message(self) -> str: return "Press Ctrl+C to exit.\n\n" @dataclass -class SeedHeader(InfoLevel, CliEventABC): +class SeedHeader(InfoLevel, Cli, File): header: str - def cli_msg(self) -> str: + def message(self) -> str: return self.header @dataclass -class SeedHeaderSeperator(InfoLevel, CliEventABC): +class SeedHeaderSeperator(InfoLevel, Cli, File): len_header: int - def cli_msg(self) -> str: + def message(self) -> str: return "-" * self.len_header @dataclass -class RunResultWarning(WarnLevel, CliEventABC): +class RunResultWarning(WarnLevel, Cli, File): resource_type: str node_name: str path: str - def cli_msg(self) -> str: + def message(self) -> str: info = 'Warning' return ui.yellow(f"{info} in {self.resource_type} {self.node_name} ({self.path})") @dataclass -class RunResultFailure(ErrorLevel, CliEventABC): +class RunResultFailure(ErrorLevel, Cli, File): resource_type: str node_name: str path: str - def cli_msg(self) -> str: + def message(self) -> str: info = 'Failure' return ui.red(f"{info} in {self.resource_type} {self.node_name} ({self.path})") @dataclass -class StatsLine(InfoLevel, CliEventABC): +class StatsLine(InfoLevel, Cli, File): stats: Dict - def cli_msg(self) -> str: + def message(self) -> str: stats_line = ("\nDone. PASS={pass} WARN={warn} ERROR={error} SKIP={skip} TOTAL={total}") return stats_line.format(**self.stats) @dataclass -class RunResultError(ErrorLevel, CliEventABC): +class RunResultError(ErrorLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return f" {self.msg}" @dataclass -class RunResultErrorNoMessage(ErrorLevel, CliEventABC): +class RunResultErrorNoMessage(ErrorLevel, Cli, File): status: str - def cli_msg(self) -> str: + def message(self) -> str: return f" Status: {self.status}" @dataclass -class SQLCompiledPath(InfoLevel, CliEventABC): +class SQLCompiledPath(InfoLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f" compiled SQL at {self.path}" @dataclass -class CheckNodeTestFailure(InfoLevel, CliEventABC): +class SQlRunnerException(ShowException, DebugLevel, Cli, File): + exc: Exception + + def message(self) -> str: + return f"Got an exception: {self.exc}" + + +@dataclass +class CheckNodeTestFailure(InfoLevel, Cli, File): relation_name: str - def cli_msg(self) -> str: + def message(self) -> str: msg = f"select * from {self.relation_name}" border = '-' * len(msg) return f" See test failures:\n {border}\n {msg}\n {border}" @dataclass -class FirstRunResultError(ErrorLevel, CliEventABC): +class FirstRunResultError(ErrorLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return ui.yellow(self.msg) @dataclass -class AfterFirstRunResultError(ErrorLevel, CliEventABC): +class AfterFirstRunResultError(ErrorLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return self.msg @dataclass -class EndOfRunSummary(InfoLevel, CliEventABC): +class EndOfRunSummary(InfoLevel, Cli, File): num_errors: int num_warnings: int keyboard_interrupt: bool = False - def cli_msg(self) -> str: + def message(self) -> str: error_plural = pluralize(self.num_errors, 'error') warn_plural = pluralize(self.num_warnings, 'warning') if self.keyboard_interrupt: @@ -1468,25 +1539,25 @@ def cli_msg(self) -> str: @dataclass -class PrintStartLine(InfoLevel, CliEventABC): +class PrintStartLine(InfoLevel, Cli, File): description: str index: int total: int - def cli_msg(self) -> str: - msg = "START {self.description}" + def message(self) -> str: + msg = f"START {self.description}" return format_fancy_output_line(msg=msg, status='RUN', index=self.index, total=self.total) @dataclass -class PrintHookStartLine(InfoLevel, CliEventABC): +class PrintHookStartLine(InfoLevel, Cli, File): statement: str index: int total: int truncate: bool - def cli_msg(self) -> str: - msg = "START hook: {self.statement}" + def message(self) -> str: + msg = f"START hook: {self.statement}" return format_fancy_output_line(msg=msg, status='RUN', index=self.index, @@ -1495,7 +1566,7 @@ def cli_msg(self) -> str: @dataclass -class PrintHookEndLine(InfoLevel, CliEventABC): +class PrintHookEndLine(InfoLevel, Cli, File): statement: str status: str index: int @@ -1503,7 +1574,7 @@ class PrintHookEndLine(InfoLevel, CliEventABC): execution_time: int truncate: bool - def cli_msg(self) -> str: + def message(self) -> str: msg = 'OK hook: {}'.format(self.statement) return format_fancy_output_line(msg=msg, status=ui.green(self.status), @@ -1514,14 +1585,14 @@ def cli_msg(self) -> str: @dataclass -class SkippingDetails(InfoLevel, CliEventABC): +class SkippingDetails(InfoLevel, Cli, File): resource_type: str schema: str node_name: str index: int total: int - def cli_msg(self) -> str: + def message(self) -> str: if self.resource_type in NodeType.refable(): msg = f'SKIP relation {self.schema}.{self.node_name}' else: @@ -1533,13 +1604,13 @@ def cli_msg(self) -> str: @dataclass -class PrintErrorTestResult(ErrorLevel, CliEventABC): +class PrintErrorTestResult(ErrorLevel, Cli, File): name: str index: int num_models: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = "ERROR" msg = f"{info} {self.name}" return format_fancy_output_line(msg=msg, @@ -1550,13 +1621,13 @@ def cli_msg(self) -> str: @dataclass -class PrintPassTestResult(InfoLevel, CliEventABC): +class PrintPassTestResult(InfoLevel, Cli, File): name: str index: int num_models: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = "PASS" msg = f"{info} {self.name}" return format_fancy_output_line(msg=msg, @@ -1567,14 +1638,14 @@ def cli_msg(self) -> str: @dataclass -class PrintWarnTestResult(WarnLevel, CliEventABC): +class PrintWarnTestResult(WarnLevel, Cli, File): name: str index: int num_models: int execution_time: int failures: List[str] - def cli_msg(self) -> str: + def message(self) -> str: info = f'WARN {self.failures}' msg = f"{info} {self.name}" return format_fancy_output_line(msg=msg, @@ -1585,14 +1656,14 @@ def cli_msg(self) -> str: @dataclass -class PrintFailureTestResult(ErrorLevel, CliEventABC): +class PrintFailureTestResult(ErrorLevel, Cli, File): name: str index: int num_models: int execution_time: int failures: List[str] - def cli_msg(self) -> str: + def message(self) -> str: info = f'FAIL {self.failures}' msg = f"{info} {self.name}" return format_fancy_output_line(msg=msg, @@ -1603,13 +1674,13 @@ def cli_msg(self) -> str: @dataclass -class PrintSkipBecauseError(ErrorLevel, CliEventABC): +class PrintSkipBecauseError(ErrorLevel, Cli, File): schema: str relation: str index: int total: int - def cli_msg(self) -> str: + def message(self) -> str: msg = f'SKIP relation {self.schema}.{self.relation} due to ephemeral model error' return format_fancy_output_line(msg=msg, status=ui.red('ERROR SKIP'), @@ -1618,14 +1689,14 @@ def cli_msg(self) -> str: @dataclass -class PrintModelErrorResultLine(ErrorLevel, CliEventABC): +class PrintModelErrorResultLine(ErrorLevel, Cli, File): description: str status: str index: int total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = "ERROR creating" msg = f"{info} {self.description}" return format_fancy_output_line(msg=msg, @@ -1636,14 +1707,14 @@ def cli_msg(self) -> str: @dataclass -class PrintModelResultLine(InfoLevel, CliEventABC): +class PrintModelResultLine(InfoLevel, Cli, File): description: str status: str index: int total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = "OK created" msg = f"{info} {self.description}" return format_fancy_output_line(msg=msg, @@ -1654,7 +1725,7 @@ def cli_msg(self) -> str: @dataclass -class PrintSnapshotErrorResultLine(ErrorLevel, CliEventABC): +class PrintSnapshotErrorResultLine(ErrorLevel, Cli, File): status: str description: str cfg: Dict @@ -1662,7 +1733,7 @@ class PrintSnapshotErrorResultLine(ErrorLevel, CliEventABC): total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = 'ERROR snapshotting' msg = "{info} {description}".format(info=info, description=self.description, **self.cfg) return format_fancy_output_line(msg=msg, @@ -1673,7 +1744,7 @@ def cli_msg(self) -> str: @dataclass -class PrintSnapshotResultLine(InfoLevel, CliEventABC): +class PrintSnapshotResultLine(InfoLevel, Cli, File): status: str description: str cfg: Dict @@ -1681,7 +1752,7 @@ class PrintSnapshotResultLine(InfoLevel, CliEventABC): total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = 'OK snapshotted' msg = "{info} {description}".format(info=info, description=self.description, **self.cfg) return format_fancy_output_line(msg=msg, @@ -1692,7 +1763,7 @@ def cli_msg(self) -> str: @dataclass -class PrintSeedErrorResultLine(ErrorLevel, CliEventABC): +class PrintSeedErrorResultLine(ErrorLevel, Cli, File): status: str index: int total: int @@ -1700,7 +1771,7 @@ class PrintSeedErrorResultLine(ErrorLevel, CliEventABC): schema: str relation: str - def cli_msg(self) -> str: + def message(self) -> str: info = 'ERROR loading' msg = f"{info} seed file {self.schema}.{self.relation}" return format_fancy_output_line(msg=msg, @@ -1711,7 +1782,7 @@ def cli_msg(self) -> str: @dataclass -class PrintSeedResultLine(InfoLevel, CliEventABC): +class PrintSeedResultLine(InfoLevel, Cli, File): status: str index: int total: int @@ -1719,7 +1790,7 @@ class PrintSeedResultLine(InfoLevel, CliEventABC): schema: str relation: str - def cli_msg(self) -> str: + def message(self) -> str: info = 'OK loaded' msg = f"{info} seed file {self.schema}.{self.relation}" return format_fancy_output_line(msg=msg, @@ -1730,14 +1801,14 @@ def cli_msg(self) -> str: @dataclass -class PrintHookEndErrorLine(ErrorLevel, CliEventABC): +class PrintHookEndErrorLine(ErrorLevel, Cli, File): source_name: str table_name: str index: int total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = 'ERROR' msg = f"{info} freshness of {self.source_name}.{self.table_name}" return format_fancy_output_line(msg=msg, @@ -1748,14 +1819,14 @@ def cli_msg(self) -> str: @dataclass -class PrintHookEndErrorStaleLine(ErrorLevel, CliEventABC): +class PrintHookEndErrorStaleLine(ErrorLevel, Cli, File): source_name: str table_name: str index: int total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = 'ERROR STALE' msg = f"{info} freshness of {self.source_name}.{self.table_name}" return format_fancy_output_line(msg=msg, @@ -1766,14 +1837,14 @@ def cli_msg(self) -> str: @dataclass -class PrintHookEndWarnLine(WarnLevel, CliEventABC): +class PrintHookEndWarnLine(WarnLevel, Cli, File): source_name: str table_name: str index: int total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = 'WARN' msg = f"{info} freshness of {self.source_name}.{self.table_name}" return format_fancy_output_line(msg=msg, @@ -1784,14 +1855,14 @@ def cli_msg(self) -> str: @dataclass -class PrintHookEndPassLine(InfoLevel, CliEventABC): +class PrintHookEndPassLine(InfoLevel, Cli, File): source_name: str table_name: str index: int total: int execution_time: int - def cli_msg(self) -> str: + def message(self) -> str: info = 'PASS' msg = f"{info} freshness of {self.source_name}.{self.table_name}" return format_fancy_output_line(msg=msg, @@ -1802,10 +1873,10 @@ def cli_msg(self) -> str: @dataclass -class PrintCancelLine(ErrorLevel, CliEventABC): +class PrintCancelLine(ErrorLevel, Cli, File): conn_name: str - def cli_msg(self) -> str: + def message(self) -> str: msg = 'CANCEL query {}'.format(self.conn_name) return format_fancy_output_line(msg=msg, status=ui.red('CANCEL'), @@ -1814,34 +1885,34 @@ def cli_msg(self) -> str: @dataclass -class DefaultSelector(InfoLevel, CliEventABC): +class DefaultSelector(InfoLevel, Cli, File): name: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Using default selector {self.name}" @dataclass -class NodeStart(DebugLevel, CliEventABC): +class NodeStart(DebugLevel, Cli, File): unique_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Began running node {self.unique_id}" @dataclass -class NodeFinished(DebugLevel, CliEventABC): +class NodeFinished(DebugLevel, Cli, File): unique_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Finished running node {self.unique_id}" @dataclass -class QueryCancelationUnsupported(InfoLevel, CliEventABC): +class QueryCancelationUnsupported(InfoLevel, Cli, File): type: str - def cli_msg(self) -> str: + def message(self) -> str: msg = (f"The {self.type} adapter does not support query " "cancellation. Some queries may still be " "running!") @@ -1849,217 +1920,209 @@ def cli_msg(self) -> str: @dataclass -class ConcurrencyLine(InfoLevel, CliEventABC): +class ConcurrencyLine(InfoLevel, Cli, File): concurrency_line: str - def cli_msg(self) -> str: + def message(self) -> str: return self.concurrency_line @dataclass -class StarterProjectPath(DebugLevel, CliEventABC): +class StarterProjectPath(DebugLevel, Cli, File): dir: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Starter project path: {self.dir}" @dataclass -class ConfigFolderDirectory(InfoLevel, CliEventABC): +class ConfigFolderDirectory(InfoLevel, Cli, File): dir: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Creating dbt configuration folder at {self.dir}" @dataclass -class NoSampleProfileFound(InfoLevel, CliEventABC): +class NoSampleProfileFound(InfoLevel, Cli, File): adapter: str - def cli_msg(self) -> str: + def message(self) -> str: return f"No sample profile found for {self.adapter}." @dataclass -class ProfileWrittenWithSample(InfoLevel, CliEventABC): +class ProfileWrittenWithSample(InfoLevel, Cli, File): name: str path: str - def cli_msg(self) -> str: + def message(self) -> str: return (f"Profile {self.name} written to {self.path} " "using target's sample configuration. Once updated, you'll be able to " "start developing with dbt.") @dataclass -class ProfileWrittenWithTargetTemplateYAML(InfoLevel, CliEventABC): +class ProfileWrittenWithTargetTemplateYAML(InfoLevel, Cli, File): name: str path: str - def cli_msg(self) -> str: + def message(self) -> str: return (f"Profile {self.name} written to {self.path} using target's " "profile_template.yml and your supplied values. Run 'dbt debug' to " "validate the connection.") @dataclass -class ProfileWrittenWithProjectTemplateYAML(InfoLevel, CliEventABC): +class ProfileWrittenWithProjectTemplateYAML(InfoLevel, Cli, File): name: str path: str - def cli_msg(self) -> str: + def message(self) -> str: return (f"Profile {self.name} written to {self.path} using project's " "profile_template.yml and your supplied values. Run 'dbt debug' to " "validate the connection.") -class SettingUpProfile(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class SettingUpProfile(InfoLevel, Cli, File): + def message(self) -> str: return "Setting up your profile." -class InvalidProfileTemplateYAML(InfoLevel, CliEventABC): - def cli_msg(self) -> str: +class InvalidProfileTemplateYAML(InfoLevel, Cli, File): + def message(self) -> str: return "Invalid profile_template.yml in project." @dataclass -class ProjectNameAlreadyExists(InfoLevel, CliEventABC): +class ProjectNameAlreadyExists(InfoLevel, Cli, File): name: str - def cli_msg(self) -> str: + def message(self) -> str: return f"A project called {self.name} already exists here." @dataclass -class GetAddendum(InfoLevel, CliEventABC): +class GetAddendum(InfoLevel, Cli, File): msg: str - def cli_msg(self) -> str: + def message(self) -> str: return self.msg @dataclass -class DepsSetDownloadDirectory(DebugLevel, CliEventABC): +class DepsSetDownloadDirectory(DebugLevel, Cli, File): path: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Set downloads directory='{self.path}'" -class EnsureGitInstalled(ErrorLevel, CliEventABC): - def cli_msg(self) -> str: +class EnsureGitInstalled(ErrorLevel, Cli, File): + def message(self) -> str: return ('Make sure git is installed on your machine. More ' 'information: ' 'https://docs.getdbt.com/docs/package-management') -class DepsCreatingLocalSymlink(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class DepsCreatingLocalSymlink(DebugLevel, Cli, File): + def message(self) -> str: return ' Creating symlink to local dependency.' -class DepsSymlinkNotAvailable(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class DepsSymlinkNotAvailable(DebugLevel, Cli, File): + def message(self) -> str: return ' Symlinks are not available on this OS, copying dependency.' @dataclass -class FoundStats(InfoLevel, CliEventABC): +class FoundStats(InfoLevel, Cli, File): stat_line: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Found {self.stat_line}" @dataclass -class CompilingNode(DebugLevel, CliEventABC): +class CompilingNode(DebugLevel, Cli, File): unique_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Compiling {self.unique_id}" @dataclass -class WritingInjectedSQLForNode(DebugLevel, CliEventABC): +class WritingInjectedSQLForNode(DebugLevel, Cli, File): unique_id: str - def cli_msg(self) -> str: + def message(self) -> str: return f'Writing injected SQL for node "{self.unique_id}"' -class DisableTracking(WarnLevel, CliEventABC): - def cli_msg(self) -> str: +class DisableTracking(WarnLevel, Cli, File): + def message(self) -> str: return "Error sending message, disabling tracking" @dataclass -class SendingEvent(DebugLevel, CliEventABC): +class SendingEvent(DebugLevel, Cli): kwargs: str - def cli_msg(self) -> str: + def message(self) -> str: return f"Sending event: {self.kwargs}" -class SendEventFailure(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class SendEventFailure(DebugLevel, Cli, File): + def message(self) -> str: return "An error was encountered while trying to send an event" -class FlushEvents(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class FlushEvents(DebugLevel, Cli): + def message(self) -> str: return "Flushing usage events" -class FlushEventsFailure(DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class FlushEventsFailure(DebugLevel, Cli): + def message(self) -> str: return "An error was encountered while trying to flush usage events" -class TrackingInitializeFailure(ShowException, DebugLevel, CliEventABC): - def cli_msg(self) -> str: +class TrackingInitializeFailure(ShowException, DebugLevel, Cli, File): + def message(self) -> str: return "Got an exception trying to initialize tracking" @dataclass -class RetryExternalCall(DebugLevel, CliEventABC): +class RetryExternalCall(DebugLevel, Cli, File): attempt: int max: int - def cli_msg(self) -> str: + def message(self) -> str: return f"Retrying external call. Attempt: {self.attempt} Max attempts: {self.max}" @dataclass -class GeneralWarningMsg(WarnLevel, CliEventABC): +class GeneralWarningMsg(WarnLevel, Cli, File): msg: str log_fmt: str - def cli_msg(self) -> str: + def message(self) -> str: if self.log_fmt is not None: return self.log_fmt.format(self.msg) return self.msg @dataclass -class GeneralWarningException(WarnLevel, CliEventABC): +class GeneralWarningException(WarnLevel, Cli, File): exc: Exception log_fmt: str - def cli_msg(self) -> str: + def message(self) -> str: if self.log_fmt is not None: return self.log_fmt.format(str(self.exc)) return str(self.exc) -@dataclass -class SQlRunnerException(ShowException, DebugLevel, CliEventABC): - exc: Exception - - def cli_msg(self) -> str: - return f"Got an exception: {self.exc}" - - # since mypy doesn't run on every file we need to suggest to mypy that every # class gets instantiated. But we don't actually want to run this code. # making the conditional `if False` causes mypy to skip it as dead code so @@ -2067,6 +2130,9 @@ def cli_msg(self) -> str: # # TODO remove these lines once we run mypy everywhere. if 1 == 0: + def dump_callable(): + return {"": [""]} # for instantiating `Dump...` methods which take callables. + MainReportVersion('') MainKeyboardInterrupt() MainEncounteredError(BaseException('')) @@ -2162,6 +2228,8 @@ def cli_msg(self) -> str: SystemStdOutMsg(bmsg=b'') SystemStdErrMsg(bmsg=b'') SystemReportReturnCode(code=0) + SelectorAlertUpto3UnusedNodes(node_names=[]) + SelectorAlertAllUnusedNodes(node_names=[]) SelectorReportInvalidSelector(selector_methods={'': ''}, spec_method='', raw_spec='') MacroEventInfo(msg='') MacroEventDebug(msg='') @@ -2206,8 +2274,6 @@ def cli_msg(self) -> str: PartialParsingUpdateSchemaFile(file_id='') PartialParsingDeletedSource(unique_id='') PartialParsingDeletedExposure(unique_id='') - PartialParsingProjectEnvVarsChanged() - PartialParsingProfileEnvVarsChanged() InvalidDisabledSourceInTestNode(msg='') InvalidRefInTestNode(msg='') MessageHandleGenericException(build_path='', unique_id='', exc=Exception('')) @@ -2318,4 +2384,3 @@ def cli_msg(self) -> str: RetryExternalCall(attempt=0, max=0) GeneralWarningMsg(msg='', log_fmt='') GeneralWarningException(exc=Exception(''), log_fmt='') - SQlRunnerException(exc=Exception('')) diff --git a/core/dbt/logger.py b/core/dbt/logger.py index 468fcb1d021..c11b6c8e6b3 100644 --- a/core/dbt/logger.py +++ b/core/dbt/logger.py @@ -181,10 +181,6 @@ def should_handle(self, record): return True -def _redirect_std_logging(): - logbook.compat.redirect_logging() - - def _root_channel(record: logbook.LogRecord) -> str: return record.channel.split('.')[0] @@ -378,8 +374,6 @@ def make_record(self, message, exception, filename, lineno): # push Python warnings to debug level logs. This will suppress all import-time # warnings. DebugWarnings().__enter__() -# redirect stdlib logging to logbook -_redirect_std_logging() class DelayedFileHandler(logbook.RotatingFileHandler, FormatterMixin): @@ -430,7 +424,7 @@ def set_path(self, log_dir): return make_log_dir_if_missing(log_dir) - log_path = os.path.join(log_dir, 'dbt.log') + log_path = os.path.join(log_dir, 'dbt.log.old') # TODO hack for now self._super_init(log_path) self._replay_buffered() self._log_path = log_path @@ -582,6 +576,7 @@ def log_cache_events(flag): CACHE_LOGGER.disabled = not flag +logger.disable() GLOBAL_LOGGER = logger diff --git a/core/dbt/main.py b/core/dbt/main.py index 1af838f9f9d..e98bc74fcd5 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -9,7 +9,7 @@ from pathlib import Path import dbt.version -from dbt.events.functions import fire_event +from dbt.events.functions import fire_event, setup_event_logger from dbt.events.types import ( MainEncounteredError, MainKeyboardInterrupt, MainReportVersion, MainReportArgs, MainTrackingUserState, MainStackTrace @@ -237,6 +237,7 @@ def run_from_args(parsed): log_path = getattr(task.config, 'log_path', None) # we can finally set the file logger up log_manager.set_path(log_path) + setup_event_logger(log_path or 'logs') if dbt.tracking.active_user is not None: # mypy appeasement, always true fire_event(MainTrackingUserState(dbt.tracking.active_user.state())) From 2ca6ce688b53c0b406204a74657922a835c4baf2 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 8 Nov 2021 19:01:02 -0500 Subject: [PATCH 26/31] whitespace change --- core/dbt/events/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index 8a95bf301ed..ff3552bfb35 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -185,7 +185,7 @@ def send_exc_to_logger( # (i.e. - mutating the event history, printing to stdout, logging # to files, etc.) def fire_event(e: Event) -> None: - # TODO manage history in phase 2: EVENT_HISTORY.append(e) + # TODO manage history in phase 2: EVENT_HISTORY.append(e) # explicitly checking the debug flag here so that potentially expensive-to-construct # log messages are not constructed if debug messages are never shown. From ebb84c404f8c9ef2401296449edc3ec57867bbe6 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Mon, 8 Nov 2021 19:07:54 -0500 Subject: [PATCH 27/31] postgres adapter to use new logger --- plugins/postgres/dbt/adapters/postgres/connections.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/postgres/dbt/adapters/postgres/connections.py b/plugins/postgres/dbt/adapters/postgres/connections.py index 7eaf4c1e26c..ac9bfd8e9a5 100644 --- a/plugins/postgres/dbt/adapters/postgres/connections.py +++ b/plugins/postgres/dbt/adapters/postgres/connections.py @@ -6,13 +6,16 @@ from dbt.adapters.base import Credentials from dbt.adapters.sql import SQLConnectionManager from dbt.contracts.connection import AdapterResponse -from dbt.logger import GLOBAL_LOGGER as logger +from dbt.events import AdapterLogger from dbt.helper_types import Port from dataclasses import dataclass from typing import Optional +logger = AdapterLogger("Postgres") + + @dataclass class PostgresCredentials(Credentials): host: str From 683190b711d90a21fb2d5f07bf69e23e51dec568 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 9 Nov 2021 11:21:33 -0500 Subject: [PATCH 28/31] fixes --- core/dbt/events/functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index ff3552bfb35..5cb9a832de5 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -25,9 +25,8 @@ stdout_handler = logging.StreamHandler() stdout_handler.setLevel(logging.INFO) STDOUT_LOG.addHandler(stdout_handler) -global color + format_color = True -global json format_json = False @@ -38,7 +37,7 @@ def setup_event_logger(log_path): # havent been applied yet this.format_color = True if flags.USE_COLORS else False # TODO this default should live somewhere better - log_dest = os.path.join('logs', 'dbt.log') + log_dest = os.path.join(log_path, 'dbt.log') level = logging.DEBUG if flags.DEBUG else logging.INFO # overwrite the STDOUT_LOG logger with the configured one From 31acb95d7a0ceb809d219ca67a3b8af1e4d325fb Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 9 Nov 2021 11:40:18 -0500 Subject: [PATCH 29/31] rebased on main and added new partial parsing event --- core/dbt/events/types.py | 9 +++++++++ core/dbt/parser/partial.py | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 92a74c375f6..ddb6caa7bf5 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -857,6 +857,14 @@ def message(self) -> str: return "Unable to do partial parsing because env vars used in profiles.yml have changed" +@dataclass +class PartialParsingDeletedMetric(DebugLevel, Cli, File): + id: str + + def message(self) -> str: + return f"Partial parsing: deleted metric {self.id}" + + @dataclass class ManifestWrongMetadataVersion(DebugLevel, Cli, File): version: str @@ -2253,6 +2261,7 @@ def dump_callable(): PartialParsingFailedBecauseProfileChange() PartialParsingFailedBecauseNewProjectDependency() PartialParsingFailedBecauseHashChanged() + PartialParsingDeletedMetric('') ParsedFileLoadFailed(path='', exc=Exception('')) PartialParseSaveFileNotFound() StaticParserCausedJinjaRendering(path='') diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index e465a2a40bf..4bac6a8334b 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -10,7 +10,7 @@ PartialParsingEnabled, PartialParsingAddedFile, PartialParsingDeletedFile, PartialParsingUpdatedFile, PartialParsingNodeMissingInSourceFile, PartialParsingMissingNodes, PartialParsingChildMapMissingUniqueID, PartialParsingUpdateSchemaFile, - PartialParsingDeletedSource, PartialParsingDeletedExposure + PartialParsingDeletedSource, PartialParsingDeletedExposure, PartialParsingDeletedMetric ) from dbt.node_types import NodeType @@ -870,7 +870,7 @@ def delete_schema_metric(self, schema_file, metric_dict): self.deleted_manifest.metrics[unique_id] = \ self.saved_manifest.metrics.pop(unique_id) schema_file.metrics.remove(unique_id) - logger.debug(f"Partial parsing: deleted metric {unique_id}") + fire_event(PartialParsingDeletedMetric(id=unique_id)) def get_schema_element(self, elem_list, elem_name): for element in elem_list: From 5e6be1660eb5672c9bfc3d79aa02cec275de7533 Mon Sep 17 00:00:00 2001 From: Nathaniel May Date: Tue, 9 Nov 2021 16:13:13 -0500 Subject: [PATCH 30/31] configure event logger for integration tests (#4257) * apply test fixes * remove presto test --- core/dbt/events/functions.py | 24 ++++++++++++++++--- .../001_simple_copy_test/test_simple_copy.py | 24 +++++-------------- .../test_query_comments.py | 21 ++++++++-------- test/integration/base.py | 13 +++++----- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index 5cb9a832de5..5d83c5cf735 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -5,10 +5,12 @@ import dbt.flags as flags # TODO this will need to move eventually from dbt.logger import SECRET_ENV_PREFIX, make_log_dir_if_missing +import io +from io import StringIO import json import logging from logging import Logger -from logging.handlers import WatchedFileHandler +from logging.handlers import RotatingFileHandler import os from typing import List @@ -58,13 +60,29 @@ def setup_event_logger(log_path): file_passthrough_formatter = logging.Formatter(fmt=FORMAT) - # TODO log rotation is not handled by WatchedFileHandler - file_handler = WatchedFileHandler(filename=log_dest, encoding='utf8') + file_handler = RotatingFileHandler(filename=log_dest, encoding='utf8') file_handler.setFormatter(file_passthrough_formatter) file_handler.setLevel(logging.DEBUG) # always debug regardless of user input this.FILE_LOG.addHandler(file_handler) +# used for integration tests +def capture_stdout_logs() -> StringIO: + capture_buf = io.StringIO() + stdout_capture_handler = logging.StreamHandler(capture_buf) + stdout_handler.setLevel(logging.DEBUG) + this.STDOUT_LOG.addHandler(stdout_capture_handler) + return capture_buf + + +# used for integration tests +def stop_capture_stdout_logs() -> None: + this.STDOUT_LOG.handlers = [ + h for h in this.STDOUT_LOG.handlers + if not (hasattr(h, 'stream') and isinstance(h.stream, StringIO)) # type: ignore + ] + + def env_secrets() -> List[str]: return [ v for k, v in os.environ.items() diff --git a/test/integration/001_simple_copy_test/test_simple_copy.py b/test/integration/001_simple_copy_test/test_simple_copy.py index 4fd1d60daf1..bb02f794463 100644 --- a/test/integration/001_simple_copy_test/test_simple_copy.py +++ b/test/integration/001_simple_copy_test/test_simple_copy.py @@ -91,20 +91,6 @@ def test__postgres__dbt_doesnt_run_empty_models(self): self.assertFalse("empty" in models.keys()) self.assertFalse("disabled" in models.keys()) - @use_profile("presto") - def test__presto__simple_copy(self): - self.use_default_project({"seed-paths": [self.dir("seed-initial")]}) - - results = self.run_dbt(["seed"]) - self.assertEqual(len(results), 1) - results = self.run_dbt(expect_pass=False) - self.assertEqual(len(results), 7) - for result in results: - if 'incremental' in result.node.name: - self.assertIn('not implemented for presto', result.message) - - self.assertManyTablesEqual(["seed", "view_model", "materialized"]) - class TestShouting(BaseTestSimpleCopy): @property @@ -196,18 +182,20 @@ def seed_get_json(self, expect_pass=True): except ValueError: continue - if log['extra'].get('run_state') != 'internal': - continue + # TODO structured logging does not put out run_state yet + # if log['extra'].get('run_state') != 'internal': + # continue logs.append(log) - self.assertGreater(len(logs), 0) + # empty lists evaluate as False + self.assertTrue(logs) return logs @use_profile('postgres') def test_postgres_no_create_schemas(self): logs = self.seed_get_json() for log in logs: - msg = log['message'] + msg = log['msg'] self.assertFalse( 'create schema if not exists' in msg, f'did not expect schema creation: {msg}' diff --git a/test/integration/051_query_comments_test/test_query_comments.py b/test/integration/051_query_comments_test/test_query_comments.py index 4ada01aa991..aa67b80b9e6 100644 --- a/test/integration/051_query_comments_test/test_query_comments.py +++ b/test/integration/051_query_comments_test/test_query_comments.py @@ -53,27 +53,28 @@ def tearDown(self): super().tearDown() def run_get_json(self, expect_pass=True): - self.run_dbt( + res, raw_logs = self.run_dbt_and_capture( ['--debug', '--log-format=json', 'run'], expect_pass=expect_pass ) - logs = [] - for line in self.stringbuf.getvalue().split('\n'): + + parsed_logs = [] + for line in raw_logs.split('\n'): try: log = json.loads(line) except ValueError: continue - if log['extra'].get('run_state') != 'running': - continue - logs.append(log) - self.assertGreater(len(logs), 0) - return logs + parsed_logs.append(log) + + # empty lists evaluate as False + self.assertTrue(parsed_logs) + return parsed_logs def query_comment(self, model_name, log): # N.B: a temporary string replacement regex to strip the HH:MM:SS from the log line if present. # TODO: make this go away when structured logging is stable - log_msg = re.sub("(?:[01]\d|2[0123]):(?:[012345]\d):(?:[012345]\d \| )", "", log['message']) + log_msg = re.sub("(?:[01]\d|2[0123]):(?:[012345]\d):(?:[012345]\d \| )", "", log['msg']) prefix = 'On {}: '.format(model_name) if log_msg.startswith(prefix): msg = log_msg[len(prefix):] @@ -91,7 +92,7 @@ def run_assert_comments(self): if msg is not None and self.matches_comment(msg): seen = True - self.assertTrue(seen, 'Never saw a matching log message! Logs:\n{}'.format('\n'.join(l['message'] for l in logs))) + self.assertTrue(seen, 'Never saw a matching log message! Logs:\n{}'.format('\n'.join(l['msg'] for l in logs))) @use_profile('postgres') def test_postgres_comments(self): diff --git a/test/integration/base.py b/test/integration/base.py index 01eb75130a4..29302897d54 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -24,7 +24,9 @@ from dbt.config import RuntimeConfig from dbt.context import providers from dbt.logger import log_manager -from dbt.events.functions import fire_event +from dbt.events.functions import ( + capture_stdout_logs, fire_event, setup_event_logger, stop_capture_stdout_logs +) from dbt.events.test_types import ( IntegrationTestInfo, IntegrationTestDebug, @@ -314,6 +316,7 @@ def setUp(self): os.chdir(self.initial_dir) # before we go anywhere, collect the initial path info self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix) + setup_event_logger(self._logs_dir) _really_makedirs(self._logs_dir) self.test_original_source_path = _pytest_get_test_root() self.test_root_dir = self._generate_test_root_dir() @@ -524,16 +527,12 @@ def run_dbt(self, args=None, expect_pass=True, profiles_dir=True): def run_dbt_and_capture(self, *args, **kwargs): try: - initial_stdout = log_manager.stdout - initial_stderr = log_manager.stderr - stringbuf = io.StringIO() - log_manager.set_output_stream(stringbuf) - + stringbuf = capture_stdout_logs() res = self.run_dbt(*args, **kwargs) stdout = stringbuf.getvalue() finally: - log_manager.set_output_stream(initial_stdout, initial_stderr) + stop_capture_stdout_logs() return res, stdout From 6dd9c2c5bab6150bda54b58614d88557e4b63f76 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 9 Nov 2021 23:04:47 +0100 Subject: [PATCH 31/31] Env var shim to enable legacy logger (#4255) * Env var shim to reenable logbook * Rename to ENABLE_LEGACY_LOGGER --- core/dbt/events/functions.py | 20 +++++++++++++++++++- core/dbt/flags.py | 1 + core/dbt/logger.py | 3 ++- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/core/dbt/events/functions.py b/core/dbt/events/functions.py index 5d83c5cf735..a1ef7bf6244 100644 --- a/core/dbt/events/functions.py +++ b/core/dbt/events/functions.py @@ -4,7 +4,7 @@ from dbt.events.types import Cli, Event, File, ShowException import dbt.flags as flags # TODO this will need to move eventually -from dbt.logger import SECRET_ENV_PREFIX, make_log_dir_if_missing +from dbt.logger import SECRET_ENV_PREFIX, make_log_dir_if_missing, GLOBAL_LOGGER import io from io import StringIO import json @@ -206,6 +206,24 @@ def fire_event(e: Event) -> None: # explicitly checking the debug flag here so that potentially expensive-to-construct # log messages are not constructed if debug messages are never shown. + # backwards compatibility for plugins that require old logger (dbt-rpc) + if flags.ENABLE_LEGACY_LOGGER: + log_line = create_log_line(e, json_fmt=this.format_json, cli_dest=False) + level_tag = e.level_tag() + if level_tag == 'debug': + GLOBAL_LOGGER.debug(log_line) + elif level_tag == 'info': + GLOBAL_LOGGER.info(log_line) + elif level_tag == 'warn': + GLOBAL_LOGGER.warn(log_line) + elif level_tag == 'error': + GLOBAL_LOGGER.error(log_line) + else: + raise AssertionError( + f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}" + ) + return + # always logs debug level regardless of user input if isinstance(e, File): log_line = create_log_line(e, json_fmt=this.format_json, cli_dest=False) diff --git a/core/dbt/flags.py b/core/dbt/flags.py index a05c7d0ecd8..ce46d0a15f4 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -81,6 +81,7 @@ def env_set_path(key: str) -> Optional[Path]: MACRO_DEBUGGING = env_set_truthy('DBT_MACRO_DEBUGGING') DEFER_MODE = env_set_truthy('DBT_DEFER_TO_STATE') ARTIFACT_STATE_PATH = env_set_path('DBT_ARTIFACT_STATE_PATH') +ENABLE_LEGACY_LOGGER = env_set_truthy('DBT_ENABLE_LEGACY_LOGGER') def _get_context(): diff --git a/core/dbt/logger.py b/core/dbt/logger.py index c11b6c8e6b3..9d8adbe5551 100644 --- a/core/dbt/logger.py +++ b/core/dbt/logger.py @@ -576,7 +576,8 @@ def log_cache_events(flag): CACHE_LOGGER.disabled = not flag -logger.disable() +if not dbt.flags.ENABLE_LEGACY_LOGGER: + logger.disable() GLOBAL_LOGGER = logger