diff --git a/.changes/unreleased/Fixes-20230912-153002.yaml b/.changes/unreleased/Fixes-20230912-153002.yaml new file mode 100644 index 00000000000..86382cdbfb4 --- /dev/null +++ b/.changes/unreleased/Fixes-20230912-153002.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Disallow cleaning paths outside current working directory +time: 2023-09-12T15:30:02.089967+01:00 +custom: + Author: aranke + Issue: "8318" diff --git a/core/dbt/cli/main.py b/core/dbt/cli/main.py index a8613693285..071e4492cb4 100644 --- a/core/dbt/cli/main.py +++ b/core/dbt/cli/main.py @@ -224,6 +224,7 @@ def build(ctx, **kwargs): @cli.command("clean") @click.pass_context @global_flags +@p.clean_project_files_only @p.profile @p.profiles_dir @p.project_dir diff --git a/core/dbt/cli/params.py b/core/dbt/cli/params.py index da2facec302..1e696347d04 100644 --- a/core/dbt/cli/params.py +++ b/core/dbt/cli/params.py @@ -423,6 +423,13 @@ default=True, ) +clean_project_files_only = click.option( + "--clean-project-files-only / --no-clean-project-files-only", + envvar="DBT_CLEAN_PROJECT_FILES_ONLY", + help="If disabled, dbt clean will delete all paths specified in clean-paths, even if they're outside the dbt project.", + default=True, +) + show = click.option( "--show", envvar=None, diff --git a/core/dbt/constants.py b/core/dbt/constants.py index f52ac23fefe..f213154bb51 100644 --- a/core/dbt/constants.py +++ b/core/dbt/constants.py @@ -9,6 +9,7 @@ "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" ) +DBT_PROJECT_FILE_NAME = "dbt_project.yml" PACKAGES_FILE_NAME = "packages.yml" DEPENDENCIES_FILE_NAME = "dependencies.yml" MANIFEST_FILE_NAME = "manifest.json" diff --git a/core/dbt/task/base.py b/core/dbt/task/base.py index dcffce7b454..a4c9b526008 100644 --- a/core/dbt/task/base.py +++ b/core/dbt/task/base.py @@ -5,6 +5,7 @@ from abc import ABCMeta, abstractmethod from contextlib import nullcontext from datetime import datetime +from pathlib import Path from typing import Any, Dict, List, Optional, Type, Union import dbt.exceptions @@ -12,6 +13,7 @@ from dbt.adapters.factory import get_adapter from dbt.config import RuntimeConfig, Project from dbt.config.profile import read_profile +from dbt.constants import DBT_PROJECT_FILE_NAME from dbt.contracts.graph.manifest import Manifest from dbt.contracts.results import ( NodeStatus, @@ -45,7 +47,7 @@ from dbt.flags import get_flags from dbt.graph import Graph from dbt.logger import log_manager -from .printer import print_run_result_error +from dbt.task.printer import print_run_result_error class NoneConfig: @@ -117,35 +119,32 @@ def interpret_results(self, results): return True -def get_nearest_project_dir(project_dir: Optional[str]) -> str: +def get_nearest_project_dir(project_dir: Optional[str]) -> Path: # If the user provides an explicit project directory, use that # but don't look at parent directories. if project_dir: - project_file = os.path.join(project_dir, "dbt_project.yml") - if os.path.exists(project_file): - return project_dir + cur_dir = Path(project_dir) + project_file = Path(project_dir) / DBT_PROJECT_FILE_NAME + if project_file.is_file(): + return cur_dir else: raise dbt.exceptions.DbtRuntimeError( "fatal: Invalid --project-dir flag. Not a dbt project. " "Missing dbt_project.yml file" ) - root_path = os.path.abspath(os.sep) - cwd = os.getcwd() - - while cwd != root_path: - project_file = os.path.join(cwd, "dbt_project.yml") - if os.path.exists(project_file): - return cwd - cwd = os.path.dirname(cwd) - - raise dbt.exceptions.DbtRuntimeError( - "fatal: Not a dbt project (or any of the parent directories). " - "Missing dbt_project.yml file" - ) + cur_dir = Path.cwd() + project_file = cur_dir / DBT_PROJECT_FILE_NAME + if project_file.is_file(): + return cur_dir + else: + raise dbt.exceptions.DbtRuntimeError( + "fatal: Not a dbt project (or any of the parent directories). " + "Missing dbt_project.yml file" + ) -def move_to_nearest_project_dir(project_dir: Optional[str]) -> str: +def move_to_nearest_project_dir(project_dir: Optional[str]) -> Path: nearest_project_dir = get_nearest_project_dir(project_dir) os.chdir(nearest_project_dir) return nearest_project_dir diff --git a/core/dbt/task/clean.py b/core/dbt/task/clean.py index 6f31fc81ecd..18b64e0b091 100644 --- a/core/dbt/task/clean.py +++ b/core/dbt/task/clean.py @@ -1,16 +1,14 @@ -import os.path -import os -import shutil -from typing import List +from pathlib import Path +from shutil import rmtree from dbt import deprecations from dbt.events.functions import fire_event from dbt.events.types import ( CheckCleanPath, ConfirmCleanPath, - ProtectedCleanPath, FinishedCleanPaths, ) +from dbt.exceptions import DbtRuntimeError from dbt.task.base import ( BaseTask, move_to_nearest_project_dir, @@ -23,32 +21,36 @@ def run(self): This function takes all the paths in the target file and cleans the project paths that are not protected. """ - move_to_nearest_project_dir(self.args.project_dir) + project_dir = move_to_nearest_project_dir(self.args.project_dir) + + potential_clean_paths = set(Path(p).resolve() for p in self.project.clean_targets) + source_paths = set( + Path(p).resolve() for p in (*self.project.all_source_paths, *self.project.test_paths) + ) + clean_paths = potential_clean_paths.difference(source_paths) + + if potential_clean_paths != clean_paths: + raise DbtRuntimeError( + f"dbt will not clean the following source paths: {[str(s) for s in source_paths.intersection(potential_clean_paths)]}" + ) + + paths_outside_project = set( + path for path in clean_paths if project_dir not in path.absolute().parents + ) + if paths_outside_project and self.args.clean_project_files_only: + raise DbtRuntimeError( + f"dbt will not clean the following directories outside the project: {[str(p) for p in paths_outside_project]}" + ) + if ( "dbt_modules" in self.project.clean_targets and self.config.packages_install_path not in self.config.clean_targets ): deprecations.warn("install-packages-path") - for path in self.project.clean_targets: - fire_event(CheckCleanPath(path=path)) - if not is_protected_path(path, self.project.model_paths, self.project.test_paths): - shutil.rmtree(path, True) - fire_event(ConfirmCleanPath(path=path)) - else: - fire_event(ProtectedCleanPath(path=path)) - - fire_event(FinishedCleanPaths()) + for path in clean_paths: + fire_event(CheckCleanPath(path=str(path))) + rmtree(path, True) + fire_event(ConfirmCleanPath(path=str(path))) -def is_protected_path(path: str, model_paths: List[str], test_paths: List[str]) -> bool: - """This function identifies protected paths.""" - abs_path = os.path.abspath(path) - protected_paths = model_paths + test_paths + ["."] - protected_abs_paths = [os.path.abspath(p) for p in protected_paths] - return abs_path in set(protected_abs_paths) or is_project_path(abs_path) - - -def is_project_path(path: str) -> bool: - """This function identifies project paths.""" - proj_path = os.path.abspath(".") - return not os.path.commonprefix([proj_path, os.path.abspath(path)]) == proj_path + fire_event(FinishedCleanPaths()) diff --git a/core/dbt/task/debug.py b/core/dbt/task/debug.py index 415f6949e68..e19bf99d547 100644 --- a/core/dbt/task/debug.py +++ b/core/dbt/task/debug.py @@ -6,6 +6,7 @@ from collections import namedtuple from enum import Flag +from pathlib import Path from typing import Optional, Dict, Any, List, Tuple from dbt.events.functions import fire_event @@ -86,7 +87,7 @@ def __init__(self, args, config) -> None: if args.project_dir: self.project_dir = args.project_dir else: - self.project_dir = os.getcwd() + self.project_dir = Path.cwd() self.project_path = os.path.join(self.project_dir, "dbt_project.yml") self.cli_vars: Dict[str, Any] = args.vars @@ -340,7 +341,7 @@ def _load_project(self) -> SubtaskStatus: try: self.project = Project.from_project_root( - self.project_dir, + str(self.project_dir), renderer, verify_version=self.args.VERSION_CHECK, ) diff --git a/tests/functional/clean/test_clean.py b/tests/functional/clean/test_clean.py new file mode 100644 index 00000000000..5da8144f48f --- /dev/null +++ b/tests/functional/clean/test_clean.py @@ -0,0 +1,56 @@ +import pytest + +from dbt.exceptions import DbtRuntimeError +from dbt.tests.util import run_dbt + + +class TestCleanSourcePath: + @pytest.fixture(scope="class") + def project_config_update(self): + return "clean-targets: ['models']" + + def test_clean_source_path(self, project): + with pytest.raises(DbtRuntimeError, match="dbt will not clean the following source paths"): + run_dbt(["clean"]) + + +class TestCleanPathOutsideProjectRelative: + @pytest.fixture(scope="class") + def project_config_update(self): + return "clean-targets: ['..']" + + def test_clean_path_outside_project(self, project): + with pytest.raises( + DbtRuntimeError, + match="dbt will not clean the following directories outside the project", + ): + run_dbt(["clean"]) + + +class TestCleanPathOutsideProjectAbsolute: + @pytest.fixture(scope="class") + def project_config_update(self): + return "clean-targets: ['/']" + + def test_clean_path_outside_project(self, project): + with pytest.raises( + DbtRuntimeError, + match="dbt will not clean the following directories outside the project", + ): + run_dbt(["clean"]) + + +class TestCleanPathOutsideProjectWithFlag: + @pytest.fixture(scope="class") + def project_config_update(self): + return "clean-targets: ['/tmp/foo']" + + def test_clean_path_outside_project(self, project): + # Doesn't fail because flag is set + run_dbt(["clean", "--no-clean-project-files-only"]) + + with pytest.raises( + DbtRuntimeError, + match="dbt will not clean the following directories outside the project", + ): + run_dbt(["clean", "--clean-project-files-only"]) diff --git a/tests/functional/dependencies/test_local_dependency.py b/tests/functional/dependencies/test_local_dependency.py index c7a9f01cc0a..5305659b95b 100644 --- a/tests/functional/dependencies/test_local_dependency.py +++ b/tests/functional/dependencies/test_local_dependency.py @@ -8,13 +8,13 @@ from pathlib import Path from unittest import mock -from contextlib import contextmanager import dbt.semver import dbt.config import dbt.exceptions from dbt.tests.util import check_relations_equal, run_dbt, run_dbt_and_capture +from tests.functional.utils import up_one models__dep_source = """ {# If our dependency source didn't exist, this would be an errror #} @@ -83,16 +83,6 @@ """ -@contextmanager -def up_one(): - current_path = Path.cwd() - os.chdir("../") - try: - yield - finally: - os.chdir(current_path) - - class BaseDependencyTest(object): @pytest.fixture(scope="class") def macros(self): diff --git a/tests/functional/minimal_cli/test_minimal_cli.py b/tests/functional/minimal_cli/test_minimal_cli.py index ae8f40dcfcc..5580b1d460f 100644 --- a/tests/functional/minimal_cli/test_minimal_cli.py +++ b/tests/functional/minimal_cli/test_minimal_cli.py @@ -3,6 +3,7 @@ from dbt.cli.main import cli from tests.functional.minimal_cli.fixtures import BaseConfigProject +from tests.functional.utils import up_one class TestMinimalCli(BaseConfigProject): @@ -18,6 +19,13 @@ def test_clean(self, runner, project): assert "dbt_packages" in result.output assert "logs" in result.output + def test_clean_one_level_up(self, runner, project): + with up_one(): + result = runner.invoke(cli, ["clean"]) + assert result.exit_code == 2 + assert "Runtime Error" in result.output + assert "No dbt_project.yml" in result.output + def test_deps(self, runner, project): result = runner.invoke(cli, ["deps"]) assert "dbt-labs/dbt_utils" in result.output diff --git a/tests/functional/utils.py b/tests/functional/utils.py new file mode 100644 index 00000000000..26d31c752ac --- /dev/null +++ b/tests/functional/utils.py @@ -0,0 +1,13 @@ +import os +from contextlib import contextmanager +from pathlib import Path + + +@contextmanager +def up_one(): + current_path = Path.cwd() + os.chdir("../") + try: + yield + finally: + os.chdir(current_path)