From d522ad6ff72191ac3bee7df8b22cdfdf278ea8ec Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 09:25:58 -0600 Subject: [PATCH 01/10] add some event tests --- tests/unit/test_events.py | 136 ++++++++++++++++++++++++++++++++ tests/unit/test_proto_events.py | 78 ++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 tests/unit/test_events.py create mode 100644 tests/unit/test_proto_events.py diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py new file mode 100644 index 00000000..31333eda --- /dev/null +++ b/tests/unit/test_events.py @@ -0,0 +1,136 @@ +import re +from argparse import Namespace +from typing import TypeVar + +import pytest + +# from dbt.adapters.events import types as adapter_types +from dbt_common.events.event_manager_client import ctx_set_event_manager +# from dbt.artifacts.schemas.results import TimingInfo, RunStatus +# from dbt.artifacts.schemas.run import RunResult +from dbt_common.events import types +# from dbt.adapters.events.logging import AdapterLogger +from dbt_common.events.base_types import msg_from_base_event +# from dbt.events import types as core_types +from dbt_common.events.base_types import ( + BaseEvent, + DebugLevel, + DynamicLevel, + ErrorLevel, + InfoLevel, + TestLevel, + WarnLevel, +) +from dbt_common.events.event_manager import TestEventManager, EventManager +from dbt_common.events.functions import msg_to_dict, msg_to_json + + +# takes in a class and finds any subclasses for it +def get_all_subclasses(cls): + all_subclasses = [] + for subclass in cls.__subclasses__(): + if subclass not in [TestLevel, DebugLevel, WarnLevel, InfoLevel, ErrorLevel, DynamicLevel]: + all_subclasses.append(subclass) + all_subclasses.extend(get_all_subclasses(subclass)) + return set(all_subclasses) + + +class TestEventCodes: + + # checks to see if event codes are duplicated to keep codes singluar and clear. + # also checks that event codes follow correct namming convention ex. E001 + def test_event_codes(self): + all_concrete = get_all_subclasses(BaseEvent) + all_codes = set() + + for event_cls in all_concrete: + code = event_cls.code(event_cls) + # must be in the form 1 capital letter, 3 digits + assert re.match("^[A-Z][0-9]{3}", code) + # cannot have been used already + assert ( + code not in all_codes + ), f"{code} is assigned more than once. Check types.py for duplicates." + all_codes.add(code) + + +sample_values = [ + # N.B. Events instantiated here include the module prefix in order to + # avoid having the entire list twice in the code. + # M - Deps generation ====================== + types.RetryExternalCall(attempt=0, max=0), + types.RecordRetryException(exc=""), + # Z - misc ====================== + types.SystemCouldNotWrite(path="", reason="", exc=""), + types.SystemExecutingCmd(cmd=[""]), + types.SystemStdOut(bmsg=str(b"")), + types.SystemStdErr(bmsg=str(b"")), + types.SystemReportReturnCode(returncode=0), + types.Formatting(), + types.Note(msg="This is a note."), +] + + +class TestEventJSONSerialization: + + # attempts to test that every event is serializable to json. + # event types that take `Any` are not possible to test in this way since some will serialize + # just fine and others won't. + def test_all_serializable(self): + all_non_abstract_events = set( + get_all_subclasses(BaseEvent), + ) + all_event_values_list = list(map(lambda x: x.__class__, sample_values)) + diff = all_non_abstract_events.difference(set(all_event_values_list)) + assert ( + not diff + ), f"{diff}test is missing concrete values in `sample_values`. Please add the values for the aforementioned event classes" + + # make sure everything in the list is a value not a type + for event in sample_values: + assert type(event) != type + + # if we have everything we need to test, try to serialize everything + count = 0 + for event in sample_values: + msg = msg_from_base_event(event) + print(f"--- msg: {msg.info.name}") + # Serialize to dictionary + try: + msg_to_dict(msg) + except Exception as e: + raise Exception( + f"{event} can not be converted to a dict. Originating exception: {e}" + ) + # Serialize to json + try: + msg_to_json(msg) + except Exception as e: + raise Exception(f"{event} is not serializable to json. Originating exception: {e}") + # Serialize to binary + try: + msg.SerializeToString() + except Exception as e: + raise Exception( + f"{event} is not serializable to binary protobuf. Originating exception: {e}" + ) + count += 1 + print(f"--- Found {count} events") + + +def test_bad_serialization(): + """Tests that bad serialization enters the proper exception handling + + When pytest is in use the exception handling of `BaseEvent` raises an + exception. When pytest isn't present, it fires a Note event. Thus to test + that bad serializations are properly handled, the best we can do is test + that the exception handling path is used. + """ + + with pytest.raises(Exception) as excinfo: + types.Note(param_event_doesnt_have="This should break") + + assert ( + str(excinfo.value) + == "[Note]: Unable to parse dict {'param_event_doesnt_have': 'This should break'}" + ) diff --git a/tests/unit/test_proto_events.py b/tests/unit/test_proto_events.py new file mode 100644 index 00000000..e29675d7 --- /dev/null +++ b/tests/unit/test_proto_events.py @@ -0,0 +1,78 @@ +from dbt_common.events.functions import msg_to_dict, msg_to_json, reset_metadata_vars +from dbt_common.events import types_pb2 +from dbt_common.events.base_types import msg_from_base_event +from dbt_common.events.types import ( + RetryExternalCall +) +from dbt_common.events import types_pb2 +from google.protobuf.json_format import MessageToDict + +info_keys = { + "name", + "code", + "msg", + "level", + "invocation_id", + "pid", + "thread", + "ts", + "extra", + "category", +} + + +def test_events(): + + # M020 event + event_code = "M020" + event = RetryExternalCall(attempt=3, max=5) + msg = msg_from_base_event(event) + msg_dict = msg_to_dict(msg) + msg_json = msg_to_json(msg) + serialized = msg.SerializeToString() + assert "Retrying external call. Attempt: 3" in str(serialized) + assert set(msg_dict.keys()) == {"info", "data"} + assert set(msg_dict["data"].keys()) == {"attempt", "max"} + assert set(msg_dict["info"].keys()) == info_keys + assert msg_json + assert msg.info.code == event_code + + # Extract EventInfo from serialized message + generic_msg = types_pb2.GenericMessage() + generic_msg.ParseFromString(serialized) + assert generic_msg.info.code == event_code + # get the message class for the real message from the generic message + message_class = getattr(types_pb2, f"{generic_msg.info.name}Msg") + new_msg = message_class() + new_msg.ParseFromString(serialized) + assert new_msg.info.code == msg.info.code + assert new_msg.data.attempt == msg.data.attempt + +def test_extra_dict_on_event(monkeypatch): + + monkeypatch.setenv("DBT_ENV_CUSTOM_ENV_env_key", "env_value") + + reset_metadata_vars() + + event_code = "M020" + event = RetryExternalCall(attempt=3, max=5) + msg = msg_from_base_event(event) + msg_dict = msg_to_dict(msg) + assert set(msg_dict["info"].keys()) == info_keys + extra_dict = {"env_key": "env_value"} + assert msg.info.extra == extra_dict + serialized = msg.SerializeToString() + + # Extract EventInfo from serialized message + generic_msg = types_pb2.GenericMessage() + generic_msg.ParseFromString(serialized) + assert generic_msg.info.code == event_code + # get the message class for the real message from the generic message + message_class = getattr(types_pb2, f"{generic_msg.info.name}Msg") + new_msg = message_class() + new_msg.ParseFromString(serialized) + new_msg_dict = MessageToDict(new_msg) + assert new_msg_dict["info"]["extra"] == msg.info.extra + + # clean up + reset_metadata_vars() From a343ce568ac2e88787cd1c1b28caa2b09c6b4a28 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 09:30:37 -0600 Subject: [PATCH 02/10] code cleanup --- tests/unit/test_events.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py index 31333eda..4130bb15 100644 --- a/tests/unit/test_events.py +++ b/tests/unit/test_events.py @@ -1,17 +1,9 @@ import re -from argparse import Namespace -from typing import TypeVar import pytest -# from dbt.adapters.events import types as adapter_types -from dbt_common.events.event_manager_client import ctx_set_event_manager -# from dbt.artifacts.schemas.results import TimingInfo, RunStatus -# from dbt.artifacts.schemas.run import RunResult from dbt_common.events import types -# from dbt.adapters.events.logging import AdapterLogger from dbt_common.events.base_types import msg_from_base_event -# from dbt.events import types as core_types from dbt_common.events.base_types import ( BaseEvent, DebugLevel, @@ -21,7 +13,6 @@ TestLevel, WarnLevel, ) -from dbt_common.events.event_manager import TestEventManager, EventManager from dbt_common.events.functions import msg_to_dict, msg_to_json @@ -117,7 +108,6 @@ def test_all_serializable(self): count += 1 print(f"--- Found {count} events") - def test_bad_serialization(): """Tests that bad serialization enters the proper exception handling From 3284faa72041a9bbe23f392ec4c5cb58dc75b0ec Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 09:34:53 -0600 Subject: [PATCH 03/10] put back error --- dbt_common/events/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_common/events/functions.py b/dbt_common/events/functions.py index 6402c791..fe29a543 100644 --- a/dbt_common/events/functions.py +++ b/dbt_common/events/functions.py @@ -94,7 +94,7 @@ def msg_to_dict(msg: EventMsg) -> dict: msg_dict = dict() try: msg_dict = MessageToDict( - msg, + msg.data, preserving_proto_field_name=True, including_default_value_fields=True, # type: ignore ) From f374f1ea9fd38a1e3bf48bc9a76b72c53ddfaa61 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 09:42:02 -0600 Subject: [PATCH 04/10] actually run precommit --- tests/unit/test_events.py | 5 ++--- tests/unit/test_proto_events.py | 8 ++------ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py index 4130bb15..aaa993e4 100644 --- a/tests/unit/test_events.py +++ b/tests/unit/test_events.py @@ -27,7 +27,6 @@ def get_all_subclasses(cls): class TestEventCodes: - # checks to see if event codes are duplicated to keep codes singluar and clear. # also checks that event codes follow correct namming convention ex. E001 def test_event_codes(self): @@ -63,7 +62,6 @@ def test_event_codes(self): class TestEventJSONSerialization: - # attempts to test that every event is serializable to json. # event types that take `Any` are not possible to test in this way since some will serialize # just fine and others won't. @@ -79,7 +77,7 @@ def test_all_serializable(self): # make sure everything in the list is a value not a type for event in sample_values: - assert type(event) != type + assert not isinstance(event, type) # if we have everything we need to test, try to serialize everything count = 0 @@ -108,6 +106,7 @@ def test_all_serializable(self): count += 1 print(f"--- Found {count} events") + def test_bad_serialization(): """Tests that bad serialization enters the proper exception handling diff --git a/tests/unit/test_proto_events.py b/tests/unit/test_proto_events.py index e29675d7..32eb08ae 100644 --- a/tests/unit/test_proto_events.py +++ b/tests/unit/test_proto_events.py @@ -1,10 +1,7 @@ from dbt_common.events.functions import msg_to_dict, msg_to_json, reset_metadata_vars from dbt_common.events import types_pb2 from dbt_common.events.base_types import msg_from_base_event -from dbt_common.events.types import ( - RetryExternalCall -) -from dbt_common.events import types_pb2 +from dbt_common.events.types import RetryExternalCall from google.protobuf.json_format import MessageToDict info_keys = { @@ -22,7 +19,6 @@ def test_events(): - # M020 event event_code = "M020" event = RetryExternalCall(attempt=3, max=5) @@ -48,8 +44,8 @@ def test_events(): assert new_msg.info.code == msg.info.code assert new_msg.data.attempt == msg.data.attempt -def test_extra_dict_on_event(monkeypatch): +def test_extra_dict_on_event(monkeypatch): monkeypatch.setenv("DBT_ENV_CUSTOM_ENV_env_key", "env_value") reset_metadata_vars() From e172840cced47f8ff1fc9e0310c89bdb8584511d Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 10:08:54 -0600 Subject: [PATCH 05/10] remove - From 96f27acb5134349065f85e0fb6849d0ed85eca99 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 10:26:47 -0600 Subject: [PATCH 06/10] add back codecov flags --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index acacc238..03e046d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,6 @@ features = ["test"] [tool.hatch.envs.test.scripts] unit = "python -m pytest --cov=dbt_common --cov-report=xml {args:tests/unit}" - ### Linting settings, envs & scripts [tool.hatch.envs.lint] From de285236f5f0e1042e99e6d5a99cf44ac3754c94 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 10:53:13 -0600 Subject: [PATCH 07/10] add back missing coverage From 6daca2c052214cf6defda8273bdf23a037b0dc00 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 30 Jan 2024 10:55:10 -0600 Subject: [PATCH 08/10] remove dash again From e908c41f55bb95acd0dfea99e682fee7ac6d4339 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Wed, 31 Jan 2024 14:08:16 -0600 Subject: [PATCH 09/10] re-undo msg to dict functionality --- dbt_common/events/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_common/events/functions.py b/dbt_common/events/functions.py index fe29a543..6402c791 100644 --- a/dbt_common/events/functions.py +++ b/dbt_common/events/functions.py @@ -94,7 +94,7 @@ def msg_to_dict(msg: EventMsg) -> dict: msg_dict = dict() try: msg_dict = MessageToDict( - msg.data, + msg, preserving_proto_field_name=True, including_default_value_fields=True, # type: ignore ) From ec10b291e7212280880d82a6e8451525a9e28768 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 6 Feb 2024 17:24:54 -0600 Subject: [PATCH 10/10] pr feedback --- tests/unit/test_events.py | 48 ++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py index aaa993e4..e91b5787 100644 --- a/tests/unit/test_events.py +++ b/tests/unit/test_events.py @@ -44,44 +44,46 @@ def test_event_codes(self): all_codes.add(code) -sample_values = [ - # N.B. Events instantiated here include the module prefix in order to - # avoid having the entire list twice in the code. - # M - Deps generation ====================== - types.RetryExternalCall(attempt=0, max=0), - types.RecordRetryException(exc=""), - # Z - misc ====================== - types.SystemCouldNotWrite(path="", reason="", exc=""), - types.SystemExecutingCmd(cmd=[""]), - types.SystemStdOut(bmsg=str(b"")), - types.SystemStdErr(bmsg=str(b"")), - types.SystemReportReturnCode(returncode=0), - types.Formatting(), - types.Note(msg="This is a note."), -] +class TestEventJSONSerialization: + """Attempts to test that every event is serializable to json. + + event types that take `Any` are not possible to test in this way since some will serialize + just fine and others won't. + """ + SAMPLE_VALUES = [ + # N.B. Events instantiated here include the module prefix in order to + # avoid having the entire list twice in the code. + # M - Deps generation ====================== + types.RetryExternalCall(attempt=0, max=0), + types.RecordRetryException(exc=""), + # Z - misc ====================== + types.SystemCouldNotWrite(path="", reason="", exc=""), + types.SystemExecutingCmd(cmd=[""]), + types.SystemStdOut(bmsg=str(b"")), + types.SystemStdErr(bmsg=str(b"")), + types.SystemReportReturnCode(returncode=0), + types.Formatting(), + types.Note(msg="This is a note."), + ] -class TestEventJSONSerialization: - # attempts to test that every event is serializable to json. - # event types that take `Any` are not possible to test in this way since some will serialize - # just fine and others won't. def test_all_serializable(self): all_non_abstract_events = set( get_all_subclasses(BaseEvent), ) - all_event_values_list = list(map(lambda x: x.__class__, sample_values)) + all_event_values_list = list(map(lambda x: x.__class__, self.SAMPLE_VALUES)) diff = all_non_abstract_events.difference(set(all_event_values_list)) assert ( not diff - ), f"{diff}test is missing concrete values in `sample_values`. Please add the values for the aforementioned event classes" + ), f"{diff}test is missing concrete values in `SAMPLE_VALUES`. Please add the values for the aforementioned event classes" # make sure everything in the list is a value not a type - for event in sample_values: + for event in self.SAMPLE_VALUES: assert not isinstance(event, type) # if we have everything we need to test, try to serialize everything count = 0 - for event in sample_values: + for event in self.SAMPLE_VALUES: msg = msg_from_base_event(event) print(f"--- msg: {msg.info.name}") # Serialize to dictionary