From 5071b00baac3ff2a9bcce565d09661605de52654 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 25 Mar 2022 17:09:35 +0100 Subject: [PATCH] Custom names for generic tests (#4898) * Support user-supplied name for generic tests * Support dict-style generic test spec * Add changelog entry * Add TODO comment * Rework raise_duplicate_resource_name * Add functional tests * Update comments, rm TODO * PR feedback --- .../unreleased/Features-20220318-085756.yaml | 7 ++ core/dbt/exceptions.py | 56 ++++++++---- core/dbt/parser/generic_test_builders.py | 61 +++++++++---- .../test_duplicate_exposure.py | 2 +- .../test_duplicate_model.py | 4 +- .../test_duplicate_source.py | 2 +- tests/functional/schema_tests/fixtures.py | 91 +++++++++++++++++++ .../schema_tests/test_schema_v2_tests.py | 62 ++++++++++++- 8 files changed, 243 insertions(+), 42 deletions(-) create mode 100644 .changes/unreleased/Features-20220318-085756.yaml diff --git a/.changes/unreleased/Features-20220318-085756.yaml b/.changes/unreleased/Features-20220318-085756.yaml new file mode 100644 index 00000000000..8b734db7797 --- /dev/null +++ b/.changes/unreleased/Features-20220318-085756.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Support custom names for generic tests +time: 2022-03-18T08:57:56.05584+01:00 +custom: + Author: jtcohen6 + Issue: "3348" + PR: "4898" diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 79f9a6c0c62..b760c3a9d3a 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -838,31 +838,47 @@ def raise_duplicate_macro_name(node_1, node_2, namespace) -> NoReturn: def raise_duplicate_resource_name(node_1, node_2): duped_name = node_1.name + node_type = NodeType(node_1.resource_type) + pluralized = ( + node_type.pluralize() + if node_1.resource_type == node_2.resource_type + else "resources" # still raise if ref() collision, e.g. model + seed + ) - if node_1.resource_type in NodeType.refable(): - get_func = 'ref("{}")'.format(duped_name) - elif node_1.resource_type == NodeType.Source: + action = "looking for" + # duplicate 'ref' targets + if node_type in NodeType.refable(): + formatted_name = f'ref("{duped_name}")' + # duplicate sources + elif node_type == NodeType.Source: duped_name = node_1.get_full_source_name() - get_func = node_1.get_source_representation() - elif node_1.resource_type == NodeType.Documentation: - get_func = 'doc("{}")'.format(duped_name) - elif node_1.resource_type == NodeType.Test and "schema" in node_1.tags: - return + formatted_name = node_1.get_source_representation() + # duplicate docs blocks + elif node_type == NodeType.Documentation: + formatted_name = f'doc("{duped_name}")' + # duplicate generic tests + elif node_type == NodeType.Test and hasattr(node_1, "test_metadata"): + column_name = f'column "{node_1.column_name}" in ' if node_1.column_name else "" + model_name = node_1.file_key_name + duped_name = f'{node_1.name}" defined on {column_name}"{model_name}' + action = "running" + formatted_name = "tests" + # all other resource types else: - get_func = '"{}"'.format(duped_name) + formatted_name = duped_name + # should this be raise_parsing_error instead? raise_compiler_error( - 'dbt found two resources with the name "{}". Since these resources ' - "have the same name,\ndbt will be unable to find the correct resource " - "when {} is used. To fix this,\nchange the name of one of " - "these resources:\n- {} ({})\n- {} ({})".format( - duped_name, - get_func, - node_1.unique_id, - node_1.original_file_path, - node_2.unique_id, - node_2.original_file_path, - ) + f""" +dbt found two {pluralized} with the name "{duped_name}". + +Since these resources have the same name, dbt will be unable to find the correct resource +when {action} {formatted_name}. + +To fix this, change the name of one of these resources: +- {node_1.unique_id} ({node_1.original_file_path}) +- {node_2.unique_id} ({node_2.original_file_path}) + """.strip() ) diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index 3d522dec7ce..fce7905e575 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -25,9 +25,13 @@ from dbt.parser.search import FileBlock -def get_nice_generic_test_name( +def synthesize_generic_test_names( test_type: str, test_name: str, args: Dict[str, Any] ) -> Tuple[str, str]: + # Using the type, name, and arguments to this generic test, synthesize a (hopefully) unique name + # Will not be unique if multiple tests have same name + arguments, and only configs differ + # Returns a shorter version (hashed/truncated, for the compiled file) + # as well as the full name (for the unique_id + FQN) flat_args = [] for arg_name in sorted(args): # the model is already embedded in the name, so skip it @@ -263,13 +267,25 @@ def __init__( if self.namespace is not None: self.package_name = self.namespace - compiled_name, fqn_name = self.get_test_name() - self.compiled_name: str = compiled_name - self.fqn_name: str = fqn_name - - # use hashed name as alias if too long - if compiled_name != fqn_name and "alias" not in self.config: - self.config["alias"] = compiled_name + # If the user has provided a custom name for this generic test, use it + # Then delete the "name" argument to avoid passing it into the test macro + # Otherwise, use an auto-generated name synthesized from test inputs + self.compiled_name: str = "" + self.fqn_name: str = "" + + if "name" in self.args: + # Assign the user-defined name here, which will be checked for uniqueness later + # we will raise an error if two tests have same name for same model + column combo + self.compiled_name = self.args["name"] + self.fqn_name = self.args["name"] + del self.args["name"] + else: + short_name, full_name = self.get_synthetic_test_names() + self.compiled_name = short_name + self.fqn_name = full_name + # use hashed name as alias if full name is too long + if short_name != full_name and "alias" not in self.config: + self.config["alias"] = short_name def _bad_type(self) -> TypeError: return TypeError('invalid target type "{}"'.format(type(self.target))) @@ -281,13 +297,23 @@ def extract_test_args(test, name=None) -> Tuple[str, Dict[str, Any]]: "test must be dict or str, got {} (value {})".format(type(test), test) ) - test = list(test.items()) - if len(test) != 1: - raise_parsing_error( - "test definition dictionary must have exactly one key, got" - " {} instead ({} keys)".format(test, len(test)) - ) - test_name, test_args = test[0] + # If the test is a dictionary with top-level keys, the test name is "test_name" + # and the rest are arguments + # {'name': 'my_favorite_test', 'test_name': 'unique', 'config': {'where': '1=1'}} + if "test_name" in test.keys(): + test_name = test.pop("test_name") + test_args = test + # If the test is a nested dictionary with one top-level key, the test name + # is the dict name, and nested keys are arguments + # {'unique': {'name': 'my_favorite_test', 'config': {'where': '1=1'}}} + else: + test = list(test.items()) + if len(test) != 1: + raise_parsing_error( + "test definition dictionary must have exactly one key, got" + " {} instead ({} keys)".format(test, len(test)) + ) + test_name, test_args = test[0] if not isinstance(test_args, dict): raise_parsing_error( @@ -401,7 +427,8 @@ def macro_name(self) -> str: macro_name = "{}.{}".format(self.namespace, macro_name) return macro_name - def get_test_name(self) -> Tuple[str, str]: + def get_synthetic_test_names(self) -> Tuple[str, str]: + # Returns two names: shorter (for the compiled file), full (for the unique_id + FQN) if isinstance(self.target, UnparsedNodeUpdate): name = self.name elif isinstance(self.target, UnpatchedSourceDefinition): @@ -410,7 +437,7 @@ def get_test_name(self) -> Tuple[str, str]: raise self._bad_type() if self.namespace is not None: name = "{}_{}".format(self.namespace, name) - return get_nice_generic_test_name(name, self.target.name, self.args) + return synthesize_generic_test_names(name, self.target.name, self.args) def construct_config(self) -> str: configs = ",".join( diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py b/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py index 33bcb3b7892..d9890cf906c 100644 --- a/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py +++ b/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py @@ -14,7 +14,7 @@ def models(self): @use_profile("postgres") def test_postgres_duplicate_exposure(self): - message = "dbt found two resources with the name" + message = "dbt found two exposures with the name" try: self.run_dbt(["compile"]) self.assertTrue(False, "dbt did not throw for duplicate exposures") diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_model.py b/test/integration/025_duplicate_model_tests/test_duplicate_model.py index ca34186adf8..8088ab91929 100644 --- a/test/integration/025_duplicate_model_tests/test_duplicate_model.py +++ b/test/integration/025_duplicate_model_tests/test_duplicate_model.py @@ -14,7 +14,7 @@ def models(self): @use_profile("postgres") def test_postgres_duplicate_model_enabled(self): - message = "dbt found two resources with the name" + message = "dbt found two models with the name" try: self.run_dbt(["run"]) self.assertTrue(False, "dbt did not throw for duplicate models") @@ -80,7 +80,7 @@ def packages_config(self): @use_profile("postgres") def test_postgres_duplicate_model_enabled_across_packages(self): self.run_dbt(["deps"]) - message = "dbt found two resources with the name" + message = "dbt found two models with the name" try: self.run_dbt(["run"]) self.assertTrue(False, "dbt did not throw for duplicate models") diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_source.py b/test/integration/025_duplicate_model_tests/test_duplicate_source.py index 614f600f0c7..7d4facd8af2 100644 --- a/test/integration/025_duplicate_model_tests/test_duplicate_source.py +++ b/test/integration/025_duplicate_model_tests/test_duplicate_source.py @@ -14,7 +14,7 @@ def models(self): @use_profile("postgres") def test_postgres_duplicate_source_enabled(self): - message = "dbt found two resources with the name" + message = "dbt found two sources with the name" try: self.run_dbt(["compile"]) self.assertTrue(False, "dbt did not throw for duplicate sources") diff --git a/tests/functional/schema_tests/fixtures.py b/tests/functional/schema_tests/fixtures.py index e97cc2fe01e..d024dec5468 100644 --- a/tests/functional/schema_tests/fixtures.py +++ b/tests/functional/schema_tests/fixtures.py @@ -420,6 +420,73 @@ SELECT 'NOT_NULL' AS id """ + +dupe_generic_tests_collide__schema_yml = """ +version: 2 +models: +- name: model_a + columns: + - name: id + tests: + - not_null: + config: + where: "1=1" + - not_null: + config: + where: "1=2" + +""" + +dupe_generic_tests_collide__model_a = """ +SELECT 'NOT_NULL' AS id +""" + + +custom_generic_test_names__schema_yml = """ +version: 2 +models: +- name: model_a + columns: + - name: id + tests: + - not_null: + name: not_null_where_1_equals_1 + config: + where: "1=1" + - not_null: + name: not_null_where_1_equals_2 + config: + where: "1=2" + +""" + +custom_generic_test_names__model_a = """ +SELECT 'NOT_NULL' AS id +""" + +custom_generic_test_names_alt_format__schema_yml = """ +version: 2 +models: +- name: model_a + columns: + - name: id + tests: + - name: not_null_where_1_equals_1 + test_name: not_null + config: + where: "1=1" + - name: not_null_where_1_equals_2 + test_name: not_null + config: + where: "1=2" + +""" + +custom_generic_test_names_alt_format__model_a = """ +SELECT 'NOT_NULL' AS id +""" + + test_context_where_subq_macros__custom_generic_test_sql = """ /*{# This test will fail if get_where_subquery() is missing from TestContext + TestMacroNamespace #}*/ @@ -1266,6 +1333,30 @@ def name_collision(): } +@pytest.fixture(scope="class") +def dupe_tests_collide(): + return { + "schema.yml": dupe_generic_tests_collide__schema_yml, + "model_a.sql": dupe_generic_tests_collide__model_a, + } + + +@pytest.fixture(scope="class") +def custom_generic_test_names(): + return { + "schema.yml": custom_generic_test_names__schema_yml, + "model_a.sql": custom_generic_test_names__model_a, + } + + +@pytest.fixture(scope="class") +def custom_generic_test_names_alt_format(): + return { + "schema.yml": custom_generic_test_names_alt_format__schema_yml, + "model_a.sql": custom_generic_test_names_alt_format__model_a, + } + + @pytest.fixture(scope="class") def test_context_where_subq_macros(): return {"custom_generic_test.sql": test_context_where_subq_macros__custom_generic_test_sql} diff --git a/tests/functional/schema_tests/test_schema_v2_tests.py b/tests/functional/schema_tests/test_schema_v2_tests.py index 5b49d44242e..3fa5a361da1 100644 --- a/tests/functional/schema_tests/test_schema_v2_tests.py +++ b/tests/functional/schema_tests/test_schema_v2_tests.py @@ -16,6 +16,9 @@ seeds, test_context_models, name_collision, + dupe_tests_collide, + custom_generic_test_names, + custom_generic_test_names_alt_format, test_context_where_subq_macros, invalid_schema_models, all_models, @@ -25,7 +28,7 @@ project_files, case_sensitive_models__uppercase_SQL, ) -from dbt.exceptions import ParsingException +from dbt.exceptions import ParsingException, CompilationException from dbt.contracts.results import TestStatus @@ -658,6 +661,63 @@ def test_collision_test_names_get_hash( assert test_results[1].node.unique_id in expected_unique_ids +class TestGenericTestsCollide: + @pytest.fixture(scope="class") + def models(self, dupe_tests_collide): # noqa: F811 + return dupe_tests_collide + + def test_generic_test_collision( + self, + project, + ): + """These tests collide, since only the configs differ""" + with pytest.raises(CompilationException) as exc: + run_dbt() + assert "dbt found two tests with the name" in str(exc) + + +class TestGenericTestsCustomNames: + @pytest.fixture(scope="class") + def models(self, custom_generic_test_names): # noqa: F811 + return custom_generic_test_names + + # users can define custom names for specific instances of generic tests + def test_generic_tests_with_custom_names( + self, + project, + ): + """These tests don't collide, since they have user-provided custom names""" + results = run_dbt() + test_results = run_dbt(["test"]) + + # model + both tests run + assert len(results) == 1 + assert len(test_results) == 2 + + # custom names propagate to the unique_id + expected_unique_ids = [ + "test.test.not_null_where_1_equals_1.7b96089006", + "test.test.not_null_where_1_equals_2.8ae586e17f", + ] + assert test_results[0].node.unique_id in expected_unique_ids + assert test_results[1].node.unique_id in expected_unique_ids + + +class TestGenericTestsCustomNamesAltFormat(TestGenericTestsCustomNames): + @pytest.fixture(scope="class") + def models(self, custom_generic_test_names_alt_format): # noqa: F811 + return custom_generic_test_names_alt_format + + # exactly as above, just alternative format for yaml definition + def test_collision_test_names_get_hash( + self, + project, + ): + """These tests don't collide, since they have user-provided custom names, + defined using an alternative format""" + super().test_generic_tests_with_custom_names(project) + + class TestInvalidSchema: @pytest.fixture(scope="class") def models(self, invalid_schema_models): # noqa: F811