From 95da400880293002538ee3187e6a1c178f604d07 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Thu, 25 Apr 2024 15:59:45 -0700 Subject: [PATCH] move partial parsing tests and add more examples --- tests/unit/parser/test_partial.py | 211 +++++++++++++++++++++++++++++ tests/unit/test_partial_parsing.py | 198 --------------------------- tests/unit/utils/manifest.py | 23 +++- 3 files changed, 230 insertions(+), 202 deletions(-) create mode 100644 tests/unit/parser/test_partial.py delete mode 100644 tests/unit/test_partial_parsing.py diff --git a/tests/unit/parser/test_partial.py b/tests/unit/parser/test_partial.py new file mode 100644 index 00000000000..84886f4ea48 --- /dev/null +++ b/tests/unit/parser/test_partial.py @@ -0,0 +1,211 @@ +from copy import deepcopy + +import pytest +import time + +from dbt.parser.partial import PartialParsing +from dbt.contracts.files import ( + ParseFileType, + SourceFile, + SchemaSourceFile, + FilePath, + FileHash, + BaseSourceFile, +) +from dbt.node_types import NodeType +from dbt.tests.util import safe_set_invocation_context +from tests.unit.utils import normalize +from tests.unit.utils.manifest import make_model, make_generic_test + + +PROJECT_NAME = "my_test" + + +@pytest.fixture +def files() -> dict[str, BaseSourceFile]: + project_root = "/users/root" + sql_model_file = SourceFile( + path=FilePath( + project_root=project_root, + searched_path="models", + relative_path="my_model.sql", + modification_time=time.time(), + ), + checksum=FileHash.from_contents("abcdef"), + project_name=PROJECT_NAME, + parse_file_type=ParseFileType.Model, + nodes=["model.my_test.my_model"], + env_vars=[], + ) + sql_model_file_untouched = SourceFile( + path=FilePath( + project_root=project_root, + searched_path="models", + relative_path="my_model_untouched.sql", + modification_time=time.time(), + ), + checksum=FileHash.from_contents("abcdef"), + project_name=PROJECT_NAME, + parse_file_type=ParseFileType.Model, + nodes=["model.my_test.my_model_untouched"], + env_vars=[], + ) + + python_model_file = SourceFile( + path=FilePath( + project_root=project_root, + searched_path="models", + relative_path="python_model.py", + modification_time=time.time(), + ), + checksum=FileHash.from_contents("lalala"), + project_name=PROJECT_NAME, + parse_file_type=ParseFileType.Model, + nodes=["model.my_test.python_model"], + env_vars=[], + ) + python_model_file_untouched = SourceFile( + path=FilePath( + project_root=project_root, + searched_path="models", + relative_path="python_model_untouched.py", + modification_time=time.time(), + ), + checksum=FileHash.from_contents("lalala"), + project_name=PROJECT_NAME, + parse_file_type=ParseFileType.Model, + nodes=["model.my_test.python_model_untouched"], + env_vars=[], + ) + schema_file = SchemaSourceFile( + path=FilePath( + project_root=project_root, + searched_path="models", + relative_path="schema.yml", + modification_time=time.time(), + ), + checksum=FileHash.from_contents("ghijkl"), + project_name=PROJECT_NAME, + parse_file_type=ParseFileType.Schema, + dfy={ + "version": 2, + "models": [ + {"name": "my_model", "description": "Test model"}, + {"name": "python_model", "description": "python"}, + {"name": "not_null", "model": "test.my_test.test_my_model"}, + ], + }, + ndp=["model.my_test.my_model"], + env_vars={}, + data_tests={"models": {"not_null": {"test.my_test.test_my_model": []}}}, + ) + return { + schema_file.file_id: schema_file, + sql_model_file.file_id: sql_model_file, + sql_model_file_untouched.file_id: sql_model_file_untouched, + python_model_file.file_id: python_model_file, + python_model_file_untouched.file_id: python_model_file_untouched, + } + + +@pytest.fixture +def nodes() -> list[NodeType]: + patch_path = "my_test://" + normalize("models/schema.yml") + my_model = make_model(PROJECT_NAME, "my_model", "", patch_path=patch_path) + return [ + my_model, + make_model(PROJECT_NAME, "my_model_untouched", "", patch_path=patch_path), + make_model(PROJECT_NAME, "python_model", "", language="python", patch_path=patch_path), + make_model( + PROJECT_NAME, "python_model_untouched", "", language="python", patch_path=patch_path + ), + make_generic_test(PROJECT_NAME, "test", my_model, {}), + ] + + +@pytest.fixture +def partial_parsing(manifest, files): + safe_set_invocation_context() + return PartialParsing(manifest, deepcopy(files)) + + +def test_simple(partial_parsing, files, nodes): + # Nothing has changed + assert partial_parsing is not None + assert partial_parsing.skip_parsing() is True + + # Change a model file + sql_model_file_id = "my_test://" + normalize("models/my_model.sql") + partial_parsing.new_files[sql_model_file_id].checksum = FileHash.from_contents("xyzabc") + + python_model_file_id = "my_test://" + normalize("models/python_model.py") + partial_parsing.new_files[python_model_file_id].checksum = FileHash.from_contents("ohohoh") + + partial_parsing.build_file_diff() + assert partial_parsing.skip_parsing() is False + pp_files = partial_parsing.get_parsing_files() + pp_files["my_test"]["ModelParser"] = set(pp_files["my_test"]["ModelParser"]) + # models has 'patch_path' so we expect to see a SchemaParser file listed + schema_file_id = "my_test://" + normalize("models/schema.yml") + expected_pp_files = { + "my_test": { + "ModelParser": set([sql_model_file_id, python_model_file_id]), + "SchemaParser": [schema_file_id], + } + } + assert pp_files == expected_pp_files + schema_file = files[schema_file_id] + schema_file_model_names = set([model["name"] for model in schema_file.pp_dict["models"]]) + expected_model_names = set(["python_model", "my_model"]) + assert schema_file_model_names == expected_model_names + schema_file_model_descriptions = set( + [model["description"] for model in schema_file.pp_dict["models"]] + ) + expected_model_descriptions = set(["Test model", "python"]) + assert schema_file_model_descriptions == expected_model_descriptions + + +def test_schedule_nodes_for_parsing_basic(partial_parsing, nodes): + assert partial_parsing.file_diff["deleted"] == [] + assert partial_parsing.project_parser_files == {} + partial_parsing.schedule_nodes_for_parsing([nodes[0].unique_id]) + assert partial_parsing.project_parser_files == { + "my_test": { + "ModelParser": ["my_test://models/my_model.sql"], + "SchemaParser": ["my_test://models/schema.yml"], + } + } + + +def test_schedule_macro_nodes_for_parsing_basic(partial_parsing): + # XXX it seems kind of confusing what exactly this function does. + # Whoever Changes this function please add more comment. + + # this rely on the dfy and data_tests fields in schema node to add schema file to reparse + partial_parsing.schedule_macro_nodes_for_parsing(["test.my_test.test_my_model"]) + assert partial_parsing.project_parser_files == { + "my_test": {"SchemaParser": ["my_test://models/schema.yml"]} + } + + +class TestFileDiff: + @pytest.fixture + def partial_parsing(self, manifest, files): + safe_set_invocation_context() + saved_files = deepcopy(files) + saved_files[ + "my_test://models/python_model_untouched.py" + ].checksum = FileHash.from_contents("something new") + return PartialParsing(manifest, saved_files) + + def test_build_file_diff_basic(self, partial_parsing): + partial_parsing.build_file_diff() + assert set(partial_parsing.file_diff["unchanged"]) == { + "my_test://models/my_model_untouched.sql", + "my_test://models/my_model.sql", + "my_test://models/schema.yml", + "my_test://models/python_model.py", + } + assert partial_parsing.file_diff["changed"] == [ + "my_test://models/python_model_untouched.py" + ] diff --git a/tests/unit/test_partial_parsing.py b/tests/unit/test_partial_parsing.py deleted file mode 100644 index beac86abe38..00000000000 --- a/tests/unit/test_partial_parsing.py +++ /dev/null @@ -1,198 +0,0 @@ -import unittest -import time - -from dbt.parser.partial import PartialParsing -from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.nodes import ModelNode -from dbt.contracts.files import ParseFileType, SourceFile, SchemaSourceFile, FilePath, FileHash -from dbt.node_types import NodeType -from dbt.tests.util import safe_set_invocation_context -from .utils import normalize - - -class TestPartialParsing(unittest.TestCase): - def setUp(self): - - safe_set_invocation_context() - - project_name = "my_test" - project_root = "/users/root" - sql_model_file = SourceFile( - path=FilePath( - project_root=project_root, - searched_path="models", - relative_path="my_model.sql", - modification_time=time.time(), - ), - checksum=FileHash.from_contents("abcdef"), - project_name=project_name, - parse_file_type=ParseFileType.Model, - nodes=["model.my_test.my_model"], - env_vars=[], - ) - sql_model_file_untouched = SourceFile( - path=FilePath( - project_root=project_root, - searched_path="models", - relative_path="my_model_untouched.sql", - modification_time=time.time(), - ), - checksum=FileHash.from_contents("abcdef"), - project_name=project_name, - parse_file_type=ParseFileType.Model, - nodes=["model.my_test.my_model_untouched"], - env_vars=[], - ) - - python_model_file = SourceFile( - path=FilePath( - project_root=project_root, - searched_path="models", - relative_path="python_model.py", - modification_time=time.time(), - ), - checksum=FileHash.from_contents("lalala"), - project_name=project_name, - parse_file_type=ParseFileType.Model, - nodes=["model.my_test.python_model"], - env_vars=[], - ) - python_model_file_untouched = SourceFile( - path=FilePath( - project_root=project_root, - searched_path="models", - relative_path="python_model_untouched.py", - modification_time=time.time(), - ), - checksum=FileHash.from_contents("lalala"), - project_name=project_name, - parse_file_type=ParseFileType.Model, - nodes=["model.my_test.python_model_untouched"], - env_vars=[], - ) - schema_file = SchemaSourceFile( - path=FilePath( - project_root=project_root, - searched_path="models", - relative_path="schema.yml", - modification_time=time.time(), - ), - checksum=FileHash.from_contents("ghijkl"), - project_name=project_name, - parse_file_type=ParseFileType.Schema, - dfy={ - "version": 2, - "models": [ - {"name": "my_model", "description": "Test model"}, - {"name": "python_model", "description": "python"}, - ], - }, - ndp=["model.my_test.my_model"], - env_vars={}, - ) - self.saved_files = { - schema_file.file_id: schema_file, - sql_model_file.file_id: sql_model_file, - python_model_file.file_id: python_model_file, - sql_model_file_untouched.file_id: sql_model_file_untouched, - python_model_file_untouched.file_id: python_model_file_untouched, - } - sql_model_node = self.get_model("my_model") - sql_model_node_untouched = self.get_model("my_model_untouched") - python_model_node = self.get_python_model("python_model") - python_model_node_untouched = self.get_python_model("python_model_untouched") - nodes = { - sql_model_node.unique_id: sql_model_node, - python_model_node.unique_id: python_model_node, - sql_model_node_untouched.unique_id: sql_model_node_untouched, - python_model_node_untouched.unique_id: python_model_node_untouched, - } - self.saved_manifest = Manifest(files=self.saved_files, nodes=nodes) - self.new_files = { - sql_model_file.file_id: SourceFile.from_dict(sql_model_file.to_dict()), - python_model_file.file_id: SourceFile.from_dict(python_model_file.to_dict()), - sql_model_file_untouched.file_id: SourceFile.from_dict( - sql_model_file_untouched.to_dict() - ), - python_model_file_untouched.file_id: SourceFile.from_dict( - python_model_file_untouched.to_dict() - ), - schema_file.file_id: SchemaSourceFile.from_dict(schema_file.to_dict()), - } - - self.partial_parsing = PartialParsing(self.saved_manifest, self.new_files) - - def get_model(self, name): - return ModelNode( - package_name="my_test", - path=f"{name}.sql", - original_file_path=f"models/{name}.sql", - language="sql", - raw_code="select * from wherever", - name=name, - resource_type=NodeType.Model, - unique_id=f"model.my_test.{name}", - fqn=["my_test", "models", name], - database="test_db", - schema="test_schema", - alias="bar", - checksum=FileHash.from_contents(""), - patch_path="my_test://" + normalize("models/schema.yml"), - ) - - def get_python_model(self, name): - return ModelNode( - package_name="my_test", - path=f"{name}.py", - original_file_path=f"models/{name}.py", - raw_code="import something", - language="python", - name=name, - resource_type=NodeType.Model, - unique_id=f"model.my_test.{name}", - fqn=["my_test", "models", name], - database="test_db", - schema="test_schema", - alias="bar", - checksum=FileHash.from_contents(""), - patch_path="my_test://" + normalize("models/schema.yml"), - ) - - def test_simple(self): - # Nothing has changed - self.assertIsNotNone(self.partial_parsing) - self.assertTrue(self.partial_parsing.skip_parsing()) - - # Change a model file - sql_model_file_id = "my_test://" + normalize("models/my_model.sql") - self.partial_parsing.new_files[sql_model_file_id].checksum = FileHash.from_contents( - "xyzabc" - ) - - python_model_file_id = "my_test://" + normalize("models/python_model.py") - self.partial_parsing.new_files[python_model_file_id].checksum = FileHash.from_contents( - "ohohoh" - ) - - self.partial_parsing.build_file_diff() - self.assertFalse(self.partial_parsing.skip_parsing()) - pp_files = self.partial_parsing.get_parsing_files() - pp_files["my_test"]["ModelParser"] = set(pp_files["my_test"]["ModelParser"]) - # models has 'patch_path' so we expect to see a SchemaParser file listed - schema_file_id = "my_test://" + normalize("models/schema.yml") - expected_pp_files = { - "my_test": { - "ModelParser": set([sql_model_file_id, python_model_file_id]), - "SchemaParser": [schema_file_id], - } - } - self.assertEqual(pp_files, expected_pp_files) - schema_file = self.saved_files[schema_file_id] - schema_file_model_names = set([model["name"] for model in schema_file.pp_dict["models"]]) - expected_model_names = set(["python_model", "my_model"]) - self.assertEqual(schema_file_model_names, expected_model_names) - schema_file_model_descriptions = set( - [model["description"] for model in schema_file.pp_dict["models"]] - ) - expected_model_descriptions = set(["Test model", "python"]) - self.assertEqual(schema_file_model_descriptions, expected_model_descriptions) diff --git a/tests/unit/utils/manifest.py b/tests/unit/utils/manifest.py index 2f56570df41..1c69c130f6f 100644 --- a/tests/unit/utils/manifest.py +++ b/tests/unit/utils/manifest.py @@ -48,7 +48,8 @@ def make_model( pkg, name, - sql, + code, + language="sql", refs=None, sources=None, tags=None, @@ -60,6 +61,7 @@ def make_model( version=None, latest_version=None, access=None, + patch_path=None, ): if refs is None: refs = [] @@ -68,7 +70,12 @@ def make_model( if tags is None: tags = [] if path is None: - path = f"{name}.sql" + if language == "sql": + path = f"{name}.sql" + elif language == "python": + path = f"{name}.py" + else: + raise ValueError(f"Unknown language: {language}") if alias is None: alias = name if config_kwargs is None: @@ -96,7 +103,7 @@ def make_model( return ModelNode( language="sql", - raw_code=sql, + raw_code=code, database="dbt", schema="dbt_schema", alias=alias, @@ -119,6 +126,7 @@ def make_model( version=version, latest_version=latest_version, access=access or AccessType.Protected, + patch_path=patch_path, ) @@ -977,6 +985,11 @@ def semantic_models() -> list: return [] +@pytest.fixture +def files() -> dict: + return {} + + @pytest.fixture def manifest( metric, @@ -987,6 +1000,7 @@ def manifest( unit_tests, metrics, semantic_models, + files, ) -> Manifest: manifest = Manifest( nodes={n.unique_id: n for n in nodes}, @@ -995,7 +1009,7 @@ def manifest( unit_tests={t.unique_id: t for t in unit_tests}, semantic_models={s.unique_id: s for s in semantic_models}, docs={}, - files={}, + files=files, exposures={}, metrics={m.unique_id: m for m in metrics}, disabled={}, @@ -1003,4 +1017,5 @@ def manifest( groups={}, metadata=ManifestMetadata(adapter_type="postgres"), ) + manifest.build_parent_and_child_maps() return manifest