diff --git a/cosmos/config.py b/cosmos/config.py index cc11ce52c..fa68b44ab 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -5,6 +5,7 @@ import contextlib import tempfile from dataclasses import dataclass, field +from functools import cached_property from pathlib import Path from typing import Any, Iterator, Callable @@ -47,39 +48,67 @@ class ProjectConfig: """ Class for setting project config. - :param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project + :param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project. Defaults to None :param models_relative_path: The relative path to the dbt models directory within the project. Defaults to models :param seeds_relative_path: The relative path to the dbt seeds directory within the project. Defaults to seeds :param snapshots_relative_path: The relative path to the dbt snapshots directory within the project. Defaults to snapshots :param manifest_path: The absolute path to the dbt manifest file. Defaults to None + :param project_name: Allows the user to define the project name. + Required if dbt_project_path is not defined. Defaults to the folder name of dbt_project_path. """ - dbt_project_path: str | Path + dbt_project_path: str | Path | None = None models_relative_path: str | Path = "models" seeds_relative_path: str | Path = "seeds" snapshots_relative_path: str | Path = "snapshots" manifest_path: str | Path | None = None + project_name: str | None = None - parsed_manifest_path: Path | None = None + @cached_property + def parsed_dbt_project_path(self) -> Path | None: + return Path(self.dbt_project_path) if self.dbt_project_path else None + + @cached_property + def parsed_manifest_path(self) -> Path | None: + return Path(self.manifest_path) if self.manifest_path else None def __post_init__(self) -> None: "Converts paths to `Path` objects." - self.dbt_project_path = Path(self.dbt_project_path) - self.models_relative_path = self.dbt_project_path / Path(self.models_relative_path) - self.seeds_relative_path = self.dbt_project_path / Path(self.seeds_relative_path) - self.snapshots_relative_path = self.dbt_project_path / Path(self.snapshots_relative_path) - - if self.manifest_path: - self.parsed_manifest_path = Path(self.manifest_path) + if self.parsed_dbt_project_path: + self.models_relative_path = self.parsed_dbt_project_path / Path(self.models_relative_path) + self.seeds_relative_path = self.parsed_dbt_project_path / Path(self.seeds_relative_path) + self.snapshots_relative_path = self.parsed_dbt_project_path / Path(self.snapshots_relative_path) + if not self.project_name: + self.project_name = self.parsed_dbt_project_path.stem def validate_project(self) -> None: - "Validates that the project, models, and seeds directories exist." - project_yml_path = Path(self.dbt_project_path) / "dbt_project.yml" - mandatory_paths = { - "dbt_project.yml": project_yml_path, - "models directory ": self.models_relative_path, - } + """ + Validates necessary context is present for a project. + There are 2 cases we need to account for + 1 - the entire dbt project + 2 - the dbt manifest + Here, we can assume if the project path is provided, we have scenario 1. + If the project path is not provided, we have a scenario 2 + """ + + mandatory_paths = {} + + if self.parsed_dbt_project_path: + project_yml_path = self.parsed_dbt_project_path / "dbt_project.yml" + mandatory_paths = { + "dbt_project.yml": project_yml_path, + "models directory ": self.models_relative_path, + } + elif self.parsed_manifest_path: + if not self.project_name: + raise CosmosValueError( + "project_name required when manifest_path is present and dbt_project_path is not." + ) + mandatory_paths = {"manifest file": self.parsed_manifest_path} + else: + raise CosmosValueError("dbt_project_path or manifest_path are required parameters.") + for name, path in mandatory_paths.items(): if path is None or not Path(path).exists(): raise CosmosValueError(f"Could not find {name} at {path}") @@ -93,11 +122,6 @@ def is_manifest_available(self) -> bool: return self.parsed_manifest_path.exists() - @property - def project_name(self) -> str: - "The name of the dbt project." - return Path(self.dbt_project_path).stem - @dataclass class ProfileConfig: diff --git a/docs/configuration/project-config.rst b/docs/configuration/project-config.rst index 9f00930ba..c1d952f6e 100644 --- a/docs/configuration/project-config.rst +++ b/docs/configuration/project-config.rst @@ -4,7 +4,7 @@ Project Config The ``cosmos.config.ProjectConfig`` allows you to specify information about where your dbt project is located. It takes the following arguments: -- ``dbt_project_path`` (required): The full path to your dbt project. This directory should have a ``dbt_project.yml`` file +- ``dbt_project_path``: The full path to your dbt project. This directory should have a ``dbt_project.yml`` file - ``models_relative_path``: The path to your models directory, relative to the ``dbt_project_path``. This defaults to ``models/`` - ``seeds_relative_path``: The path to your seeds directory, relative to the ``dbt_project_path``. This defaults to @@ -13,6 +13,9 @@ takes the following arguments: to ``snapshots/`` - ``manifest_path``: The absolute path to your manifests directory. This is only required if you're using Cosmos' manifest parsing mode +- ``project_name`` : The name of the project. If ``dbt_project_path`` is provided, the ``project_name`` defaults to the + folder name containing ``dbt_project.yml``. If ``dbt_project_path`` is not provided, and ``manifest_path`` is provided, + ``project_name`` is required as the name can not be inferred from ``dbt_project_path`` Project Config Example diff --git a/tests/test_config.py b/tests/test_config.py index ee96b03c3..9dd936fba 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -10,29 +10,82 @@ PIPELINE_FOLDER = "jaffle_shop" -# Tests that a ProjectConfig object can be created with valid parameters def test_valid_parameters(): project_config = ProjectConfig(dbt_project_path="path/to/dbt/project") - assert project_config.dbt_project_path == Path("path/to/dbt/project") + assert project_config.parsed_dbt_project_path == Path("path/to/dbt/project") assert project_config.models_relative_path == Path("path/to/dbt/project/models") assert project_config.seeds_relative_path == Path("path/to/dbt/project/seeds") assert project_config.snapshots_relative_path == Path("path/to/dbt/project/snapshots") assert project_config.manifest_path is None -def test_init_with_manifest(): +def test_init_with_manifest_path_and_project_path_succeeds(): + """ + Passing a manifest path AND project path together should succeed, as previous + """ project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json") assert project_config.parsed_manifest_path == Path("target/manifest.json") -def test_validate_project_succeeds(): +def test_init_with_manifest_path_and_not_project_path_succeeds(): + """ + Since dbt_project_path is optional, we should be able to operate with only a manifest + """ + project_config = ProjectConfig(manifest_path="target/manifest.json") + assert project_config.parsed_manifest_path == Path("target/manifest.json") + + +def test_validate_with_project_path_and_manifest_path_succeeds(): + """ + Supplying both project and manifest paths as previous should be permitted + """ project_config = ProjectConfig( dbt_project_path=DBT_PROJECTS_ROOT_DIR, manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json" ) assert project_config.validate_project() is None -def test_validate_project_fails(): +def test_validate_with_project_path_and_not_manifest_path_succeeds(): + """ + Passing a project with no manifest should be permitted + """ + project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR) + assert project_config.validate_project() is None + + +def test_validate_with_manifest_path_and_not_project_path_and_not_project_name_fails(): + """ + Passing a manifest alone should fail since we also require a project_name + """ + project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json") + with pytest.raises(CosmosValueError) as err_info: + assert project_config.validate_project() is None + print(err_info.value.args[0]) + assert err_info.value.args[0] == "project_name required when manifest_path is present and dbt_project_path is not." + + +def test_validate_with_manifest_path_and_project_name_and_not_project_path_succeeds(): + """ + Passing a manifest and project name together should succeed. + """ + project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", project_name="test-project") + assert project_config.validate_project() is None + + +def test_validate_no_paths_fails(): + """ + Passing no manifest and no project directory should fail. + """ + project_config = ProjectConfig() + with pytest.raises(CosmosValueError) as err_info: + assert project_config.validate_project() is None + assert err_info.value.args[0] == "dbt_project_path or manifest_path are required parameters." + + +def test_validate_project_missing_fails(): + """ + Passing a project dir that does not exist where specified should fail + """ project_config = ProjectConfig(dbt_project_path=Path("/tmp")) with pytest.raises(CosmosValueError) as err_info: assert project_config.validate_project() is None