From edac4db3073d7ec9a821917aa0aa34f35bf1a884 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Thu, 4 Apr 2024 14:34:31 -0700 Subject: [PATCH] [BUG] Exclude password-like fields for considering reparse (#9844) (cherry picked from commit ebc22fa26c9cb26b6deef86cc0a7ceb1ee3fb642) --- .../unreleased/Fixes-20240402-135556.yaml | 6 ++++ core/dbt/parser/manifest.py | 30 +++++++------------ .../partial_parsing/test_partial_parsing.py | 29 ++++++++++++++++++ .../partial_parsing/test_pp_vars.py | 21 +++++-------- tests/unit/test_parse_manifest.py | 23 ++++++++++++++ 5 files changed, 76 insertions(+), 33 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240402-135556.yaml diff --git a/.changes/unreleased/Fixes-20240402-135556.yaml b/.changes/unreleased/Fixes-20240402-135556.yaml new file mode 100644 index 00000000000..b6ba62fc0f7 --- /dev/null +++ b/.changes/unreleased/Fixes-20240402-135556.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Exclude password-like fields for considering reparse +time: 2024-04-02T13:55:56.169953-07:00 +custom: + Author: ChenyuLInx + Issue: "9795" diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 0dd3ec08423..1681c6f4851 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -835,13 +835,6 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: ) valid = False reparse_reason = ReparseReason.proj_env_vars_changed - if ( - self.manifest.state_check.profile_env_vars_hash - != manifest.state_check.profile_env_vars_hash - ): - fire_event(UnableToPartialParse(reason="env vars used in profiles.yml have changed")) - valid = False - reparse_reason = ReparseReason.prof_env_vars_changed missing_keys = { k @@ -980,18 +973,18 @@ def build_manifest_state_check(self): env_var_str += f"{key}:{config.project_env_vars[key]}|" project_env_vars_hash = FileHash.from_contents(env_var_str) - # Create a FileHash of the env_vars in the project - key_list = list(config.profile_env_vars.keys()) - key_list.sort() - env_var_str = "" - for key in key_list: - env_var_str += f"{key}:{config.profile_env_vars[key]}|" - profile_env_vars_hash = FileHash.from_contents(env_var_str) + # Create a hash of the connection_info, which user has access to in + # jinja context. Thus attributes here may affect the parsing result. + # Ideally we should not expose all of the connection info to the jinja. - # Create a FileHash of the profile file - profile_path = os.path.join(get_flags().PROFILES_DIR, "profiles.yml") - with open(profile_path) as fp: - profile_hash = FileHash.from_contents(fp.read()) + # Renaming this variable mean that we will have to do a whole lot more + # change to make sure the previous manifest can be loaded correctly. + # This is an example of naming should be chosen based on the functionality + # rather than the implementation details. + connection_keys = list(config.credentials.connection_info()) + # avoid reparsing because of ordering issues + connection_keys.sort() + profile_hash = FileHash.from_contents(pprint.pformat(connection_keys)) # Create a FileHashes for dbt_project for all dependencies project_hashes = {} @@ -1003,7 +996,6 @@ def build_manifest_state_check(self): # Create the ManifestStateCheck object state_check = ManifestStateCheck( project_env_vars_hash=project_env_vars_hash, - profile_env_vars_hash=profile_env_vars_hash, vars_hash=vars_hash, profile_hash=profile_hash, project_hashes=project_hashes, diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index aa38944e678..6b5ba8895cd 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -11,6 +11,7 @@ run_dbt_and_capture, rename_dir, ) +import yaml from tests.functional.utils import up_one from dbt.tests.fixtures.project import write_project_files from tests.functional.partial_parsing.fixtures import ( @@ -824,3 +825,31 @@ def test_pp_renamed_project_dir_changed_project_contents(self, project): run_dbt(["deps"]) len(run_dbt(["--partial-parse", "seed"])) == 1 len(run_dbt(["--partial-parse", "run"])) == 3 + + +class TestProfileChanges: + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": "select 1 as id", + } + + def test_profile_change(self, project, dbt_profile_data): + # Fist run not partial parsing + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing because saved manifest not found" in stdout + + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing" not in stdout + + # change dbname which is included in the connection_info + dbt_profile_data["test"]["outputs"]["default"]["dbname"] = "dbt2" + write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing because profile has changed" in stdout + + # Change the password which is not included in the connection_info + dbt_profile_data["test"]["outputs"]["default"]["pass"] = "another_password" + write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml") + _, stdout = run_dbt_and_capture(["parse"]) + assert "Unable to do partial parsing" not in stdout diff --git a/tests/functional/partial_parsing/test_pp_vars.py b/tests/functional/partial_parsing/test_pp_vars.py index a01e78c6458..aa93f1131dd 100644 --- a/tests/functional/partial_parsing/test_pp_vars.py +++ b/tests/functional/partial_parsing/test_pp_vars.py @@ -314,40 +314,33 @@ def dbt_profile_target(self): # calls 'load_config' before the tests are run. # Note: only the specified profile is rendered, so there's no # point it setting env_vars in non-used profiles. - os.environ["ENV_VAR_USER"] = "root" - os.environ["ENV_VAR_PASS"] = "password" + os.environ["ENV_VAR_HOST"] = "localhost" return { "type": "postgres", "threads": 4, - "host": "localhost", + "host": "{{ env_var('ENV_VAR_HOST') }}", "port": 5432, - "user": "{{ env_var('ENV_VAR_USER') }}", - "pass": "{{ env_var('ENV_VAR_PASS') }}", + "user": "root", + "pass": "password", "dbname": "dbt", } def test_profile_env_vars(self, project, logs_dir): # Initial run - os.environ["ENV_VAR_USER"] = "root" - os.environ["ENV_VAR_PASS"] = "password" + os.environ["ENV_VAR_HOST"] = "localhost" run_dbt(["run"]) - manifest = get_manifest(project.project_root) - env_vars_checksum = manifest.state_check.profile_env_vars_hash.checksum # Change env_vars, the user doesn't exist, this should fail - os.environ["ENV_VAR_USER"] = "fake_user" + os.environ["ENV_VAR_HOST"] = "wrong_host" # N.B. run_dbt_and_capture won't work here because FailedToConnectError ends the test entirely with pytest.raises(FailedToConnectError): run_dbt(["run"], expect_pass=False) log_output = Path(logs_dir, "dbt.log").read_text() - assert "env vars used in profiles.yml have changed" in log_output - - manifest = get_manifest(project.project_root) - assert env_vars_checksum != manifest.state_check.profile_env_vars_hash.checksum + assert "Unable to do partial parsing because profile has changed" in log_output class TestProfileSecretEnvVars: diff --git a/tests/unit/test_parse_manifest.py b/tests/unit/test_parse_manifest.py index a3113b78de0..23d6d51446d 100644 --- a/tests/unit/test_parse_manifest.py +++ b/tests/unit/test_parse_manifest.py @@ -106,6 +106,18 @@ def _new_file(self, searched, name, match): class TestPartialParse(unittest.TestCase): + def setUp(self) -> None: + mock_project = MagicMock(RuntimeConfig) + mock_project.cli_vars = "" + mock_project.args = MagicMock() + mock_project.args.profile = "test" + mock_project.args.target = "test" + mock_project.project_env_vars = {} + mock_project.profile_env_vars = {} + mock_project.project_target_path = "mock_target_path" + mock_project.credentials = MagicMock() + self.mock_project = mock_project + @patch("dbt.parser.manifest.ManifestLoader.build_manifest_state_check") @patch("dbt.parser.manifest.os.path.exists") @patch("dbt.parser.manifest.open") @@ -122,6 +134,17 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s # if specified in flags, we use the specified path patched_open.assert_called_with("specified_partial_parse_path", "rb") + def test_profile_hash_change(self): + # This test validate that the profile_hash is updated when the connection keys change + profile_hash = "750bc99c1d64ca518536ead26b28465a224be5ffc918bf2a490102faa5a1bcf5" + self.mock_project.credentials.connection_info.return_value = "test" + set_from_args(Namespace(), {}) + manifest = ManifestLoader(self.mock_project, {}) + assert manifest.manifest.state_check.profile_hash.checksum == profile_hash + self.mock_project.credentials.connection_info.return_value = "test1" + manifest = ManifestLoader(self.mock_project, {}) + assert manifest.manifest.state_check.profile_hash.checksum != profile_hash + class TestFailedPartialParse(unittest.TestCase): @patch("dbt.tracking.track_partial_parser")