From 2f78af02534759a0a4bc3486b2a878feceee83af Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Tue, 2 Apr 2024 13:51:38 -0700 Subject: [PATCH 1/8] hash connection_keys as profile --- core/dbt/parser/manifest.py | 13 +++++++++---- tests/unit/test_parse_manifest.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 0dd3ec08423..0239001d0c0 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -988,10 +988,15 @@ def build_manifest_state_check(self): env_var_str += f"{key}:{config.profile_env_vars[key]}|" profile_env_vars_hash = FileHash.from_contents(env_var_str) - # Create a FileHash of the profile file - profile_path = os.path.join(get_flags().PROFILES_DIR, "profiles.yml") - with open(profile_path) as fp: - profile_hash = FileHash.from_contents(fp.read()) + # Create a hash of the connection keys, which user has access to in + # jinja context. Thus keys here may affect the parsing result. + + # Renaming this variable mean that we will have to do a whole lot more + # change to make sure the previous manifest can be loaded correctly. + # This is an example of naming should be chosen based on the functionality + # rather than the implementation details. + connection_keys = config.credentials._connection_keys() + profile_hash = FileHash.from_contents(pprint.pformat(connection_keys)) # Create a FileHashes for dbt_project for all dependencies project_hashes = {} diff --git a/tests/unit/test_parse_manifest.py b/tests/unit/test_parse_manifest.py index a3113b78de0..e2db95eb035 100644 --- a/tests/unit/test_parse_manifest.py +++ b/tests/unit/test_parse_manifest.py @@ -104,8 +104,29 @@ def _new_file(self, searched, name, match): ) return SourceFile(path=path, checksum=checksum) + def test_profile_change(self): + self.loader.manifest = self._new_manifest() + + self.loader.profile = "new_profile" + self.loader._update_manifest_state_check() + self.assertEqual( + self.loader.manifest.state_check.profile_hash, FileHash.from_contents("new_profile") + ) + class TestPartialParse(unittest.TestCase): + def setUp(self) -> None: + mock_project = MagicMock(RuntimeConfig) + mock_project.cli_vars = "" + mock_project.args = MagicMock() + mock_project.args.profile = "test" + mock_project.args.target = "test" + mock_project.project_env_vars = {} + mock_project.profile_env_vars = {} + mock_project.project_target_path = "mock_target_path" + mock_project.credentials = MagicMock() + self.mock_project = mock_project + @patch("dbt.parser.manifest.ManifestLoader.build_manifest_state_check") @patch("dbt.parser.manifest.os.path.exists") @patch("dbt.parser.manifest.open") @@ -122,6 +143,17 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s # if specified in flags, we use the specified path patched_open.assert_called_with("specified_partial_parse_path", "rb") + def test_partial_parse_profile_change(self): + # This test validate that the profile_hash is updated when the connection keys change + profile_hash = "6e94a0aef218fd7aef18b257f0ba9fc33c92a2bc9788fc751868e43ab398137f" + self.mock_project.credentials._connection_keys.return_value = "test" + set_from_args(Namespace(), {}) + manifest = ManifestLoader(self.mock_project, {}) + assert manifest.manifest.state_check.profile_hash.checksum == profile_hash + self.mock_project.credentials._connection_keys.return_value = "test1" + manifest = ManifestLoader(self.mock_project, {}) + assert manifest.manifest.state_check.profile_hash.checksum != profile_hash + class TestFailedPartialParse(unittest.TestCase): @patch("dbt.tracking.track_partial_parser") From 11fc733b4cda271d487a17f51a140dd4ea65079a Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Tue, 2 Apr 2024 13:56:08 -0700 Subject: [PATCH 2/8] changlog --- .changes/unreleased/Fixes-20240402-135556.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20240402-135556.yaml diff --git a/.changes/unreleased/Fixes-20240402-135556.yaml b/.changes/unreleased/Fixes-20240402-135556.yaml new file mode 100644 index 00000000000..b6ba62fc0f7 --- /dev/null +++ b/.changes/unreleased/Fixes-20240402-135556.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Exclude password-like fields for considering reparse +time: 2024-04-02T13:55:56.169953-07:00 +custom: + Author: ChenyuLInx + Issue: "9795" From 42bbe7aa5737b6be1e626a34a8dcf9fb603ed3d2 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Tue, 2 Apr 2024 13:59:21 -0700 Subject: [PATCH 3/8] nits --- core/dbt/task/debug.py | 3 ++- tests/unit/test_parse_manifest.py | 9 --------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/core/dbt/task/debug.py b/core/dbt/task/debug.py index ea0f636bd6c..9a902f10b4a 100644 --- a/core/dbt/task/debug.py +++ b/core/dbt/task/debug.py @@ -273,10 +273,11 @@ def _choose_profile_names(self) -> Tuple[List[str], str]: # try to guess profiles = [] + summary_message = "Could not load dbt_project.yml\n" if self.raw_profile_data: profiles = [k for k in self.raw_profile_data if k != "config"] if project_profile is None: - summary_message = "Could not load dbt_project.yml\n" + summary_message = "The dbt_project.yml has no profile\n" elif len(profiles) == 0: summary_message = "The profiles.yml has no profiles\n" elif len(profiles) == 1: diff --git a/tests/unit/test_parse_manifest.py b/tests/unit/test_parse_manifest.py index e2db95eb035..9927926fd57 100644 --- a/tests/unit/test_parse_manifest.py +++ b/tests/unit/test_parse_manifest.py @@ -104,15 +104,6 @@ def _new_file(self, searched, name, match): ) return SourceFile(path=path, checksum=checksum) - def test_profile_change(self): - self.loader.manifest = self._new_manifest() - - self.loader.profile = "new_profile" - self.loader._update_manifest_state_check() - self.assertEqual( - self.loader.manifest.state_check.profile_hash, FileHash.from_contents("new_profile") - ) - class TestPartialParse(unittest.TestCase): def setUp(self) -> None: From e44b328a19adbf8efc8ce46f1438a59adbc8bb4f Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Tue, 2 Apr 2024 14:00:23 -0700 Subject: [PATCH 4/8] nits --- core/dbt/task/debug.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/dbt/task/debug.py b/core/dbt/task/debug.py index 9a902f10b4a..ea0f636bd6c 100644 --- a/core/dbt/task/debug.py +++ b/core/dbt/task/debug.py @@ -273,11 +273,10 @@ def _choose_profile_names(self) -> Tuple[List[str], str]: # try to guess profiles = [] - summary_message = "Could not load dbt_project.yml\n" if self.raw_profile_data: profiles = [k for k in self.raw_profile_data if k != "config"] if project_profile is None: - summary_message = "The dbt_project.yml has no profile\n" + summary_message = "Could not load dbt_project.yml\n" elif len(profiles) == 0: summary_message = "The profiles.yml has no profiles\n" elif len(profiles) == 1: From 3ce11efd8d82d14c11e189e72fd044c571a5b53d Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Tue, 2 Apr 2024 17:08:12 -0700 Subject: [PATCH 5/8] adjust --- core/dbt/parser/manifest.py | 7 +-- .../partial_parsing/test_pp_profile.py | 50 +++++++++++++++++++ tests/unit/test_parse_manifest.py | 8 +-- 3 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 tests/functional/partial_parsing/test_pp_profile.py diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 0239001d0c0..166988c758b 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -988,14 +988,15 @@ def build_manifest_state_check(self): env_var_str += f"{key}:{config.profile_env_vars[key]}|" profile_env_vars_hash = FileHash.from_contents(env_var_str) - # Create a hash of the connection keys, which user has access to in - # jinja context. Thus keys here may affect the parsing result. + # Create a hash of the connection_info, which user has access to in + # jinja context. Thus attributes here may affect the parsing result. + # Ideally we should not expose all of the connection info to the jinja. # Renaming this variable mean that we will have to do a whole lot more # change to make sure the previous manifest can be loaded correctly. # This is an example of naming should be chosen based on the functionality # rather than the implementation details. - connection_keys = config.credentials._connection_keys() + connection_keys = list(config.credentials.connection_info()) profile_hash = FileHash.from_contents(pprint.pformat(connection_keys)) # Create a FileHashes for dbt_project for all dependencies diff --git a/tests/functional/partial_parsing/test_pp_profile.py b/tests/functional/partial_parsing/test_pp_profile.py new file mode 100644 index 00000000000..9126d51c10a --- /dev/null +++ b/tests/functional/partial_parsing/test_pp_profile.py @@ -0,0 +1,50 @@ +import os +import yaml + +import pytest + +from dbt.tests.util import run_dbt_and_capture, write_file + +from tests.functional.partial_parsing.fixtures import ( + model_color_sql, +) + + +class TestProfileChanges: + @pytest.fixture(scope="class") + def models(self): + return { + "model_color.sql": model_color_sql, + } + + @pytest.fixture(scope="class") + def dbt_profile_target(self): + return { + "type": "postgres", + "threads": 4, + "host": "localhost", + "port": int(os.getenv("POSTGRES_TEST_PORT", 5432)), + "user": os.getenv("POSTGRES_TEST_USER", "root"), + "pass": os.getenv("POSTGRES_TEST_PASS", "password"), + "dbname": os.getenv("POSTGRES_TEST_DATABASE", "dbt"), + } + + def test_change_password(self, project, dbt_profile_data): + # Fist run not partial parsing + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing because saved manifest not found" in stdout + + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing" not in stdout + + # write a different profile + dbt_profile_data["test"]["outputs"]["default"]["dbname"] = "dbt2" + write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing because profile has changed" in stdout + + # Change the password + dbt_profile_data["test"]["outputs"]["default"]["pass"] = "another_password" + write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing" not in stdout diff --git a/tests/unit/test_parse_manifest.py b/tests/unit/test_parse_manifest.py index 9927926fd57..e9b86cc51df 100644 --- a/tests/unit/test_parse_manifest.py +++ b/tests/unit/test_parse_manifest.py @@ -134,14 +134,14 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s # if specified in flags, we use the specified path patched_open.assert_called_with("specified_partial_parse_path", "rb") - def test_partial_parse_profile_change(self): + def test_profile_hash_change(self): # This test validate that the profile_hash is updated when the connection keys change - profile_hash = "6e94a0aef218fd7aef18b257f0ba9fc33c92a2bc9788fc751868e43ab398137f" - self.mock_project.credentials._connection_keys.return_value = "test" + profile_hash = "7644083797ea2d42b4e7bb1ed4751afc1d8c9566980083c96af826c3486d3531" + self.mock_project.credentials.connection_info.return_value = "test" set_from_args(Namespace(), {}) manifest = ManifestLoader(self.mock_project, {}) assert manifest.manifest.state_check.profile_hash.checksum == profile_hash - self.mock_project.credentials._connection_keys.return_value = "test1" + self.mock_project.credentials.connection_info.return_value = "test1" manifest = ManifestLoader(self.mock_project, {}) assert manifest.manifest.state_check.profile_hash.checksum != profile_hash From c3f438934b17224c8738833043296d7d339bd635 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Tue, 2 Apr 2024 17:15:39 -0700 Subject: [PATCH 6/8] adjust --- .../partial_parsing/test_partial_parsing.py | 29 +++++++++++ .../partial_parsing/test_pp_profile.py | 50 ------------------- 2 files changed, 29 insertions(+), 50 deletions(-) delete mode 100644 tests/functional/partial_parsing/test_pp_profile.py diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index aa38944e678..6b5ba8895cd 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -11,6 +11,7 @@ run_dbt_and_capture, rename_dir, ) +import yaml from tests.functional.utils import up_one from dbt.tests.fixtures.project import write_project_files from tests.functional.partial_parsing.fixtures import ( @@ -824,3 +825,31 @@ def test_pp_renamed_project_dir_changed_project_contents(self, project): run_dbt(["deps"]) len(run_dbt(["--partial-parse", "seed"])) == 1 len(run_dbt(["--partial-parse", "run"])) == 3 + + +class TestProfileChanges: + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": "select 1 as id", + } + + def test_profile_change(self, project, dbt_profile_data): + # Fist run not partial parsing + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing because saved manifest not found" in stdout + + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing" not in stdout + + # change dbname which is included in the connection_info + dbt_profile_data["test"]["outputs"]["default"]["dbname"] = "dbt2" + write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing because profile has changed" in stdout + + # Change the password which is not included in the connection_info + dbt_profile_data["test"]["outputs"]["default"]["pass"] = "another_password" + write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing" not in stdout diff --git a/tests/functional/partial_parsing/test_pp_profile.py b/tests/functional/partial_parsing/test_pp_profile.py deleted file mode 100644 index 9126d51c10a..00000000000 --- a/tests/functional/partial_parsing/test_pp_profile.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -import yaml - -import pytest - -from dbt.tests.util import run_dbt_and_capture, write_file - -from tests.functional.partial_parsing.fixtures import ( - model_color_sql, -) - - -class TestProfileChanges: - @pytest.fixture(scope="class") - def models(self): - return { - "model_color.sql": model_color_sql, - } - - @pytest.fixture(scope="class") - def dbt_profile_target(self): - return { - "type": "postgres", - "threads": 4, - "host": "localhost", - "port": int(os.getenv("POSTGRES_TEST_PORT", 5432)), - "user": os.getenv("POSTGRES_TEST_USER", "root"), - "pass": os.getenv("POSTGRES_TEST_PASS", "password"), - "dbname": os.getenv("POSTGRES_TEST_DATABASE", "dbt"), - } - - def test_change_password(self, project, dbt_profile_data): - # Fist run not partial parsing - _, stdout = run_dbt_and_capture(["parse"]) - assert "Unable to do partial parsing because saved manifest not found" in stdout - - _, stdout = run_dbt_and_capture(["parse"]) - assert "Unable to do partial parsing" not in stdout - - # write a different profile - dbt_profile_data["test"]["outputs"]["default"]["dbname"] = "dbt2" - write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") - _, stdout = run_dbt_and_capture(["parse"]) - assert "Unable to do partial parsing because profile has changed" in stdout - - # Change the password - dbt_profile_data["test"]["outputs"]["default"]["pass"] = "another_password" - write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") - _, stdout = run_dbt_and_capture(["parse"]) - assert "Unable to do partial parsing" not in stdout From 1055202b1f1d13c902df5f08d151dfca005a3823 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Wed, 3 Apr 2024 15:59:11 -0700 Subject: [PATCH 7/8] adjust_vars --- core/dbt/parser/manifest.py | 18 ++-------------- .../partial_parsing/test_pp_vars.py | 21 +++++++------------ 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 166988c758b..1681c6f4851 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -835,13 +835,6 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: ) valid = False reparse_reason = ReparseReason.proj_env_vars_changed - if ( - self.manifest.state_check.profile_env_vars_hash - != manifest.state_check.profile_env_vars_hash - ): - fire_event(UnableToPartialParse(reason="env vars used in profiles.yml have changed")) - valid = False - reparse_reason = ReparseReason.prof_env_vars_changed missing_keys = { k @@ -980,14 +973,6 @@ def build_manifest_state_check(self): env_var_str += f"{key}:{config.project_env_vars[key]}|" project_env_vars_hash = FileHash.from_contents(env_var_str) - # Create a FileHash of the env_vars in the project - key_list = list(config.profile_env_vars.keys()) - key_list.sort() - env_var_str = "" - for key in key_list: - env_var_str += f"{key}:{config.profile_env_vars[key]}|" - profile_env_vars_hash = FileHash.from_contents(env_var_str) - # Create a hash of the connection_info, which user has access to in # jinja context. Thus attributes here may affect the parsing result. # Ideally we should not expose all of the connection info to the jinja. @@ -997,6 +982,8 @@ def build_manifest_state_check(self): # This is an example of naming should be chosen based on the functionality # rather than the implementation details. connection_keys = list(config.credentials.connection_info()) + # avoid reparsing because of ordering issues + connection_keys.sort() profile_hash = FileHash.from_contents(pprint.pformat(connection_keys)) # Create a FileHashes for dbt_project for all dependencies @@ -1009,7 +996,6 @@ def build_manifest_state_check(self): # Create the ManifestStateCheck object state_check = ManifestStateCheck( project_env_vars_hash=project_env_vars_hash, - profile_env_vars_hash=profile_env_vars_hash, vars_hash=vars_hash, profile_hash=profile_hash, project_hashes=project_hashes, diff --git a/tests/functional/partial_parsing/test_pp_vars.py b/tests/functional/partial_parsing/test_pp_vars.py index a01e78c6458..aa93f1131dd 100644 --- a/tests/functional/partial_parsing/test_pp_vars.py +++ b/tests/functional/partial_parsing/test_pp_vars.py @@ -314,40 +314,33 @@ def dbt_profile_target(self): # calls 'load_config' before the tests are run. # Note: only the specified profile is rendered, so there's no # point it setting env_vars in non-used profiles. - os.environ["ENV_VAR_USER"] = "root" - os.environ["ENV_VAR_PASS"] = "password" + os.environ["ENV_VAR_HOST"] = "localhost" return { "type": "postgres", "threads": 4, - "host": "localhost", + "host": "{{ env_var('ENV_VAR_HOST') }}", "port": 5432, - "user": "{{ env_var('ENV_VAR_USER') }}", - "pass": "{{ env_var('ENV_VAR_PASS') }}", + "user": "root", + "pass": "password", "dbname": "dbt", } def test_profile_env_vars(self, project, logs_dir): # Initial run - os.environ["ENV_VAR_USER"] = "root" - os.environ["ENV_VAR_PASS"] = "password" + os.environ["ENV_VAR_HOST"] = "localhost" run_dbt(["run"]) - manifest = get_manifest(project.project_root) - env_vars_checksum = manifest.state_check.profile_env_vars_hash.checksum # Change env_vars, the user doesn't exist, this should fail - os.environ["ENV_VAR_USER"] = "fake_user" + os.environ["ENV_VAR_HOST"] = "wrong_host" # N.B. run_dbt_and_capture won't work here because FailedToConnectError ends the test entirely with pytest.raises(FailedToConnectError): run_dbt(["run"], expect_pass=False) log_output = Path(logs_dir, "dbt.log").read_text() - assert "env vars used in profiles.yml have changed" in log_output - - manifest = get_manifest(project.project_root) - assert env_vars_checksum != manifest.state_check.profile_env_vars_hash.checksum + assert "Unable to do partial parsing because profile has changed" in log_output class TestProfileSecretEnvVars: From f07a86b519c0328878453d1fec89a979d819f2eb Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Thu, 4 Apr 2024 09:52:41 -0700 Subject: [PATCH 8/8] nits --- tests/unit/test_parse_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_parse_manifest.py b/tests/unit/test_parse_manifest.py index e9b86cc51df..23d6d51446d 100644 --- a/tests/unit/test_parse_manifest.py +++ b/tests/unit/test_parse_manifest.py @@ -136,7 +136,7 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s def test_profile_hash_change(self): # This test validate that the profile_hash is updated when the connection keys change - profile_hash = "7644083797ea2d42b4e7bb1ed4751afc1d8c9566980083c96af826c3486d3531" + profile_hash = "750bc99c1d64ca518536ead26b28465a224be5ffc918bf2a490102faa5a1bcf5" self.mock_project.credentials.connection_info.return_value = "test" set_from_args(Namespace(), {}) manifest = ManifestLoader(self.mock_project, {})