Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] Exclude password-like fields for considering reparse #9844

Merged
merged 8 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240402-135556.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Exclude password-like fields for considering reparse
time: 2024-04-02T13:55:56.169953-07:00
custom:
Author: ChenyuLInx
Issue: "9795"
30 changes: 11 additions & 19 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,13 +835,6 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]:
)
valid = False
reparse_reason = ReparseReason.proj_env_vars_changed
if (
self.manifest.state_check.profile_env_vars_hash
!= manifest.state_check.profile_env_vars_hash
):
fire_event(UnableToPartialParse(reason="env vars used in profiles.yml have changed"))
valid = False
reparse_reason = ReparseReason.prof_env_vars_changed

missing_keys = {
k
Expand Down Expand Up @@ -980,18 +973,18 @@ def build_manifest_state_check(self):
env_var_str += f"{key}:{config.project_env_vars[key]}|"
project_env_vars_hash = FileHash.from_contents(env_var_str)

# Create a FileHash of the env_vars in the project
key_list = list(config.profile_env_vars.keys())
key_list.sort()
env_var_str = ""
for key in key_list:
env_var_str += f"{key}:{config.profile_env_vars[key]}|"
profile_env_vars_hash = FileHash.from_contents(env_var_str)
# Create a hash of the connection_info, which user has access to in
# jinja context. Thus attributes here may affect the parsing result.
# Ideally we should not expose all of the connection info to the jinja.

# Create a FileHash of the profile file
profile_path = os.path.join(get_flags().PROFILES_DIR, "profiles.yml")
with open(profile_path) as fp:
profile_hash = FileHash.from_contents(fp.read())
# Renaming this variable mean that we will have to do a whole lot more
# change to make sure the previous manifest can be loaded correctly.
# This is an example of naming should be chosen based on the functionality
# rather than the implementation details.
connection_keys = list(config.credentials.connection_info())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't want to convert connection_info to a list, that removes the values from the connection_info dictionary. We need to preserve them, because it's changes in the values that force a re-parse. In other places in the code we hash a dictionary for profiles_env_var_hash and and project_env_vars_hash, but any method that turns a dictionary into something hashable would work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, we don't really need profile_env_vars_hash anymore after doing this since the env_vars would be resolved. So we might want to remove that part, since it will cause extra churn in parts of the profile that may not be being used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So connection_info returns a iterator, after converting to a list it will look like [(key1, value1), (key2, value2)].
I will get the profile env_var_hash part adjusted

# avoid reparsing because of ordering issues
connection_keys.sort()
profile_hash = FileHash.from_contents(pprint.pformat(connection_keys))

# Create a FileHashes for dbt_project for all dependencies
project_hashes = {}
Expand All @@ -1003,7 +996,6 @@ def build_manifest_state_check(self):
# Create the ManifestStateCheck object
state_check = ManifestStateCheck(
project_env_vars_hash=project_env_vars_hash,
profile_env_vars_hash=profile_env_vars_hash,
vars_hash=vars_hash,
profile_hash=profile_hash,
project_hashes=project_hashes,
Expand Down
29 changes: 29 additions & 0 deletions tests/functional/partial_parsing/test_partial_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
run_dbt_and_capture,
rename_dir,
)
import yaml
from tests.functional.utils import up_one
from dbt.tests.fixtures.project import write_project_files
from tests.functional.partial_parsing.fixtures import (
Expand Down Expand Up @@ -824,3 +825,31 @@ def test_pp_renamed_project_dir_changed_project_contents(self, project):
run_dbt(["deps"])
len(run_dbt(["--partial-parse", "seed"])) == 1
len(run_dbt(["--partial-parse", "run"])) == 3


class TestProfileChanges:
@pytest.fixture(scope="class")
def models(self):
return {
"model.sql": "select 1 as id",
}

def test_profile_change(self, project, dbt_profile_data):
# Fist run not partial parsing
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing because saved manifest not found" in stdout

_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing" not in stdout

# change dbname which is included in the connection_info
dbt_profile_data["test"]["outputs"]["default"]["dbname"] = "dbt2"
write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml")
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing because profile has changed" in stdout

# Change the password which is not included in the connection_info
dbt_profile_data["test"]["outputs"]["default"]["pass"] = "another_password"
write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml")
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing" not in stdout
21 changes: 7 additions & 14 deletions tests/functional/partial_parsing/test_pp_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,40 +314,33 @@ def dbt_profile_target(self):
# calls 'load_config' before the tests are run.
# Note: only the specified profile is rendered, so there's no
# point it setting env_vars in non-used profiles.
os.environ["ENV_VAR_USER"] = "root"
os.environ["ENV_VAR_PASS"] = "password"
os.environ["ENV_VAR_HOST"] = "localhost"
return {
"type": "postgres",
"threads": 4,
"host": "localhost",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gshank I think the adjust to the test here makes sense since user and pass are not in connection_info

"host": "{{ env_var('ENV_VAR_HOST') }}",
"port": 5432,
"user": "{{ env_var('ENV_VAR_USER') }}",
"pass": "{{ env_var('ENV_VAR_PASS') }}",
"user": "root",
"pass": "password",
"dbname": "dbt",
}

def test_profile_env_vars(self, project, logs_dir):

# Initial run
os.environ["ENV_VAR_USER"] = "root"
os.environ["ENV_VAR_PASS"] = "password"
os.environ["ENV_VAR_HOST"] = "localhost"

run_dbt(["run"])
manifest = get_manifest(project.project_root)
env_vars_checksum = manifest.state_check.profile_env_vars_hash.checksum

# Change env_vars, the user doesn't exist, this should fail
os.environ["ENV_VAR_USER"] = "fake_user"
os.environ["ENV_VAR_HOST"] = "wrong_host"

# N.B. run_dbt_and_capture won't work here because FailedToConnectError ends the test entirely
with pytest.raises(FailedToConnectError):
run_dbt(["run"], expect_pass=False)

log_output = Path(logs_dir, "dbt.log").read_text()
assert "env vars used in profiles.yml have changed" in log_output

manifest = get_manifest(project.project_root)
assert env_vars_checksum != manifest.state_check.profile_env_vars_hash.checksum
assert "Unable to do partial parsing because profile has changed" in log_output


class TestProfileSecretEnvVars:
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/test_parse_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,18 @@ def _new_file(self, searched, name, match):


class TestPartialParse(unittest.TestCase):
def setUp(self) -> None:
mock_project = MagicMock(RuntimeConfig)
mock_project.cli_vars = ""
mock_project.args = MagicMock()
mock_project.args.profile = "test"
mock_project.args.target = "test"
mock_project.project_env_vars = {}
mock_project.profile_env_vars = {}
mock_project.project_target_path = "mock_target_path"
mock_project.credentials = MagicMock()
self.mock_project = mock_project

@patch("dbt.parser.manifest.ManifestLoader.build_manifest_state_check")
@patch("dbt.parser.manifest.os.path.exists")
@patch("dbt.parser.manifest.open")
Expand All @@ -122,6 +134,17 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s
# if specified in flags, we use the specified path
patched_open.assert_called_with("specified_partial_parse_path", "rb")

def test_profile_hash_change(self):
# This test validate that the profile_hash is updated when the connection keys change
profile_hash = "750bc99c1d64ca518536ead26b28465a224be5ffc918bf2a490102faa5a1bcf5"
self.mock_project.credentials.connection_info.return_value = "test"
set_from_args(Namespace(), {})
manifest = ManifestLoader(self.mock_project, {})
assert manifest.manifest.state_check.profile_hash.checksum == profile_hash
self.mock_project.credentials.connection_info.return_value = "test1"
manifest = ManifestLoader(self.mock_project, {})
assert manifest.manifest.state_check.profile_hash.checksum != profile_hash


class TestFailedPartialParse(unittest.TestCase):
@patch("dbt.tracking.track_partial_parser")
Expand Down
Loading