Skip to content

Commit

Permalink
Make the arg dbt_project_path in the ProjectConfig optional (#581)
Browse files Browse the repository at this point in the history
This change allows the `dbt_project_path` argument from `ProjectConfig` to
be optional, and add the ability to provide `manifest_path` alone. It
also adds the ability for the user to (optionally) define `project_name`
when `dbt_project_path` is defined and requires `project_name` to be
defined when `dbt_project_path` is not defined.

Closes: #569
  • Loading branch information
MrBones757 authored Oct 13, 2023
1 parent a85f2f1 commit 5ae38f6
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 27 deletions.
66 changes: 45 additions & 21 deletions cosmos/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import contextlib
import tempfile
from dataclasses import dataclass, field
from functools import cached_property
from pathlib import Path
from typing import Any, Iterator, Callable

Expand Down Expand Up @@ -47,39 +48,67 @@ class ProjectConfig:
"""
Class for setting project config.
:param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project
:param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project. Defaults to None
:param models_relative_path: The relative path to the dbt models directory within the project. Defaults to models
:param seeds_relative_path: The relative path to the dbt seeds directory within the project. Defaults to seeds
:param snapshots_relative_path: The relative path to the dbt snapshots directory within the project. Defaults to
snapshots
:param manifest_path: The absolute path to the dbt manifest file. Defaults to None
:param project_name: Allows the user to define the project name.
Required if dbt_project_path is not defined. Defaults to the folder name of dbt_project_path.
"""

dbt_project_path: str | Path
dbt_project_path: str | Path | None = None
models_relative_path: str | Path = "models"
seeds_relative_path: str | Path = "seeds"
snapshots_relative_path: str | Path = "snapshots"
manifest_path: str | Path | None = None
project_name: str | None = None

parsed_manifest_path: Path | None = None
@cached_property
def parsed_dbt_project_path(self) -> Path | None:
return Path(self.dbt_project_path) if self.dbt_project_path else None

@cached_property
def parsed_manifest_path(self) -> Path | None:
return Path(self.manifest_path) if self.manifest_path else None

def __post_init__(self) -> None:
"Converts paths to `Path` objects."
self.dbt_project_path = Path(self.dbt_project_path)
self.models_relative_path = self.dbt_project_path / Path(self.models_relative_path)
self.seeds_relative_path = self.dbt_project_path / Path(self.seeds_relative_path)
self.snapshots_relative_path = self.dbt_project_path / Path(self.snapshots_relative_path)

if self.manifest_path:
self.parsed_manifest_path = Path(self.manifest_path)
if self.parsed_dbt_project_path:
self.models_relative_path = self.parsed_dbt_project_path / Path(self.models_relative_path)
self.seeds_relative_path = self.parsed_dbt_project_path / Path(self.seeds_relative_path)
self.snapshots_relative_path = self.parsed_dbt_project_path / Path(self.snapshots_relative_path)
if not self.project_name:
self.project_name = self.parsed_dbt_project_path.stem

def validate_project(self) -> None:
"Validates that the project, models, and seeds directories exist."
project_yml_path = Path(self.dbt_project_path) / "dbt_project.yml"
mandatory_paths = {
"dbt_project.yml": project_yml_path,
"models directory ": self.models_relative_path,
}
"""
Validates necessary context is present for a project.
There are 2 cases we need to account for
1 - the entire dbt project
2 - the dbt manifest
Here, we can assume if the project path is provided, we have scenario 1.
If the project path is not provided, we have a scenario 2
"""

mandatory_paths = {}

if self.parsed_dbt_project_path:
project_yml_path = self.parsed_dbt_project_path / "dbt_project.yml"
mandatory_paths = {
"dbt_project.yml": project_yml_path,
"models directory ": self.models_relative_path,
}
elif self.parsed_manifest_path:
if not self.project_name:
raise CosmosValueError(
"project_name required when manifest_path is present and dbt_project_path is not."
)
mandatory_paths = {"manifest file": self.parsed_manifest_path}
else:
raise CosmosValueError("dbt_project_path or manifest_path are required parameters.")

for name, path in mandatory_paths.items():
if path is None or not Path(path).exists():
raise CosmosValueError(f"Could not find {name} at {path}")
Expand All @@ -93,11 +122,6 @@ def is_manifest_available(self) -> bool:

return self.parsed_manifest_path.exists()

@property
def project_name(self) -> str:
"The name of the dbt project."
return Path(self.dbt_project_path).stem


@dataclass
class ProfileConfig:
Expand Down
5 changes: 4 additions & 1 deletion docs/configuration/project-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Project Config
The ``cosmos.config.ProjectConfig`` allows you to specify information about where your dbt project is located. It
takes the following arguments:

- ``dbt_project_path`` (required): The full path to your dbt project. This directory should have a ``dbt_project.yml`` file
- ``dbt_project_path``: The full path to your dbt project. This directory should have a ``dbt_project.yml`` file
- ``models_relative_path``: The path to your models directory, relative to the ``dbt_project_path``. This defaults to
``models/``
- ``seeds_relative_path``: The path to your seeds directory, relative to the ``dbt_project_path``. This defaults to
Expand All @@ -13,6 +13,9 @@ takes the following arguments:
to ``snapshots/``
- ``manifest_path``: The absolute path to your manifests directory. This is only required if you're using Cosmos' manifest
parsing mode
- ``project_name`` : The name of the project. If ``dbt_project_path`` is provided, the ``project_name`` defaults to the
folder name containing ``dbt_project.yml``. If ``dbt_project_path`` is not provided, and ``manifest_path`` is provided,
``project_name`` is required as the name can not be inferred from ``dbt_project_path``


Project Config Example
Expand Down
63 changes: 58 additions & 5 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,82 @@
PIPELINE_FOLDER = "jaffle_shop"


# Tests that a ProjectConfig object can be created with valid parameters
def test_valid_parameters():
project_config = ProjectConfig(dbt_project_path="path/to/dbt/project")
assert project_config.dbt_project_path == Path("path/to/dbt/project")
assert project_config.parsed_dbt_project_path == Path("path/to/dbt/project")
assert project_config.models_relative_path == Path("path/to/dbt/project/models")
assert project_config.seeds_relative_path == Path("path/to/dbt/project/seeds")
assert project_config.snapshots_relative_path == Path("path/to/dbt/project/snapshots")
assert project_config.manifest_path is None


def test_init_with_manifest():
def test_init_with_manifest_path_and_project_path_succeeds():
"""
Passing a manifest path AND project path together should succeed, as previous
"""
project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json")
assert project_config.parsed_manifest_path == Path("target/manifest.json")


def test_validate_project_succeeds():
def test_init_with_manifest_path_and_not_project_path_succeeds():
"""
Since dbt_project_path is optional, we should be able to operate with only a manifest
"""
project_config = ProjectConfig(manifest_path="target/manifest.json")
assert project_config.parsed_manifest_path == Path("target/manifest.json")


def test_validate_with_project_path_and_manifest_path_succeeds():
"""
Supplying both project and manifest paths as previous should be permitted
"""
project_config = ProjectConfig(
dbt_project_path=DBT_PROJECTS_ROOT_DIR, manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json"
)
assert project_config.validate_project() is None


def test_validate_project_fails():
def test_validate_with_project_path_and_not_manifest_path_succeeds():
"""
Passing a project with no manifest should be permitted
"""
project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR)
assert project_config.validate_project() is None


def test_validate_with_manifest_path_and_not_project_path_and_not_project_name_fails():
"""
Passing a manifest alone should fail since we also require a project_name
"""
project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json")
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
print(err_info.value.args[0])
assert err_info.value.args[0] == "project_name required when manifest_path is present and dbt_project_path is not."


def test_validate_with_manifest_path_and_project_name_and_not_project_path_succeeds():
"""
Passing a manifest and project name together should succeed.
"""
project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", project_name="test-project")
assert project_config.validate_project() is None


def test_validate_no_paths_fails():
"""
Passing no manifest and no project directory should fail.
"""
project_config = ProjectConfig()
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
assert err_info.value.args[0] == "dbt_project_path or manifest_path are required parameters."


def test_validate_project_missing_fails():
"""
Passing a project dir that does not exist where specified should fail
"""
project_config = ProjectConfig(dbt_project_path=Path("/tmp"))
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
Expand Down

0 comments on commit 5ae38f6

Please sign in to comment.