From 794e79b6bde4c9da9a51eb163bacbaab0f8d7871 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 8 Mar 2024 15:33:02 +0100 Subject: [PATCH 01/27] WIP Add basic dbt objects and rule definitions --- src/dbt_score/manifest.py | 125 +++++++++++++++++++++++++++ src/dbt_score/rule.py | 47 ++++++++++ src/dbt_score/rules/__init__.py | 0 src/dbt_score/rules/example_rules.py | 93 ++++++++++++++++++++ src/dbt_score/utils.py | 20 +++++ 5 files changed, 285 insertions(+) create mode 100644 src/dbt_score/manifest.py create mode 100644 src/dbt_score/rule.py create mode 100644 src/dbt_score/rules/__init__.py create mode 100644 src/dbt_score/rules/example_rules.py create mode 100644 src/dbt_score/utils.py diff --git a/src/dbt_score/manifest.py b/src/dbt_score/manifest.py new file mode 100644 index 0000000..4494eb2 --- /dev/null +++ b/src/dbt_score/manifest.py @@ -0,0 +1,125 @@ +from dataclasses import dataclass, field +from typing import Any, List + + +@dataclass +class Constraint: + """Constraint for a column in a model.""" + + type: str + expression: str + name: str + + +@dataclass +class Test: + """Test for a column or model.""" + + name: str + type: str + tags: list[str] = field(default_factory=list) + + +@dataclass +class Column: + """Represents a column in a model.""" + + name: str + description: str + constraints: List[Constraint] + tests: List[Test] = field(default_factory=list) + + +@dataclass +class Model: + """Represents a dbt model.""" + + id: str + name: str + description: str + file_path: str + config: dict[str, Any] + meta: dict[str, Any] + columns: dict[str, Column] + tests: list[Test] = field(default_factory=list) + + @classmethod + def from_node(cls, node_values: dict[str, Any]) -> "Model": + """Create a model object from a node in the manifest.""" + columns = { + name: Column( + name=values.get("name"), + description=values.get("description"), + constraints=[ + Constraint( + name=constraint.get("name"), + type=constraint.get("type"), + expression=constraint.get("expression"), + ) + for constraint in values.get("constraints", []) + ], + ) + for name, values in node_values.get("columns", {}).items() + } + + model = cls( + id=node_values["unique_id"], + file_path=node_values["patch_path"], + config=node_values.get("config", {}), + name=node_values["name"], + description=node_values.get("description", ""), + meta=node_values.get("meta", {}), + columns=columns, + ) + + return model + + +class ManifestLoader: + """Load the models and tests from the manifest.""" + + def __init__(self, raw_manifest: dict[str, Any]): + self.raw_manifest = raw_manifest + self.raw_nodes = raw_manifest.get("nodes", {}) + self.models: dict[str, Model] = {} + self.tests: dict[str, Test] = {} + + # Load models first so the tests can be attached to them later. + self.load_models() + self.load_tests() + + def load_models(self) -> None: + """Load the models from the manifest.""" + for node_values in self.raw_nodes.values(): + if node_values.get("resource_type") == "model": + model = Model.from_node(node_values) + self.models[model.id] = model + + def load_tests(self) -> None: + """Load the tests from the manifest and attach them to the right object.""" + for node_values in self.raw_nodes.values(): + # Only include tests that are attached to a model. + if node_values.get("resource_type") == "test" and node_values.get( + "attached_node" + ): + model = self.models.get(node_values.get("attached_node")) + + if not model: + raise ValueError( + f"Model {node_values.get('attached_node')}" + f"not found, while tests are attached to it." + ) + + test = Test( + name=node_values.get("name"), + type=node_values.get("test_metadata").get("name"), + tags=node_values.get("tags"), + ) + column_name = ( + node_values.get("test_metadata").get("kwargs").get("column_name") + ) + + if column_name: # Test is a column-level test. + model.columns[column_name].tests.append(test) + else: + model.tests.append(test) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py new file mode 100644 index 0000000..5609c8d --- /dev/null +++ b/src/dbt_score/rule.py @@ -0,0 +1,47 @@ +import functools +import logging +from dataclasses import dataclass +from enum import Enum +from typing import Any, Callable + +from dbt_score.manifest import Model + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class Severity(Enum): + """The severity/weight of a rule.""" + + LOW = 1 + MEDIUM = 2 + HIGH = 3 + CRITICAL = 4 + + +@dataclass +class RuleViolation: + """The violation of a rule.""" + + message: str | None = None + + +def rule( + description: str, + hint: str, + severity: Severity = Severity.MEDIUM, +) -> Callable[[Callable[[Model], RuleViolation | None]], Callable[..., None]]: + """Rule decorator.""" + + def decorator_rule( + func: Callable[[Model], RuleViolation | None], + ) -> Callable[..., None]: + @functools.wraps(func) + def wrapper_rule(*args: Any, **kwargs: Any) -> Any: + logger.debug("Executing `%s` with severity: %s.", func.__name__, severity) + return func(*args, **kwargs) + + return wrapper_rule + + return decorator_rule diff --git a/src/dbt_score/rules/__init__.py b/src/dbt_score/rules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py new file mode 100644 index 0000000..6404d6b --- /dev/null +++ b/src/dbt_score/rules/example_rules.py @@ -0,0 +1,93 @@ +"""All general rules.""" + +from ..manifest import Model +from ..rule import RuleViolation, Severity, rule + + +@rule( + description="A model should have an owner defined.", + hint="Define the owner of the model in the meta section.", + severity=Severity.HIGH, +) +def has_owner(model: Model) -> RuleViolation | None: + """A model should have an owner defined.""" + if "owner" not in model.meta: + return RuleViolation() + + return None + + +@rule(description="A model should have a primary key defined.", hint="Some hint.") +def has_primary_key(model: Model) -> RuleViolation | None: + """A model should have a primary key defined, unless it's a view.""" + if not model.config.get("materialized") == "picnic_view": + has_pk = False + for column in model.columns.values(): + if "primary_key" in [constraint.type for constraint in column.constraints]: + has_pk = True + break + + if not has_pk: + return RuleViolation() + + return None + + +@rule( + description="Primary key columns should have a uniqueness test defined.", + hint="Some hint.", +) +def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None: + """Primary key columns should have a uniqueness test defined.""" + columns_with_pk = [] + if not model.config.get("materialized") == "picnic_view": + for column_name, column in model.columns.items(): + if "primary_key" in [constraint.type for constraint in column.constraints]: + columns_with_pk.append(column_name) + + tests = ( + model.columns[columns_with_pk[0]].tests + if len(columns_with_pk) == 1 + else model.tests + ) + + if columns_with_pk and "unique" not in [test.type for test in tests]: + return RuleViolation() + + return None + + +@rule( + description="All columns of a model should have a description.", hint="Some hint." +) +def columns_have_description(model: Model) -> RuleViolation | None: + """All columns of a model should have a description.""" + invalid_columns = [ + column_name + for column_name, column in model.columns.items() + if not column.description + ] + if invalid_columns: + return RuleViolation( + message=f"The following columns lack a description: " + f"{', '.join(invalid_columns)}." + ) + + return None + + +@rule(description="A model should have at least one test defined.", hint="Some hint.") +def has_test(model: Model) -> RuleViolation | None: + """A model should have at least one model-level and one column-level test. + + This does not include singular tests, which are tests defined in a separate .sql + file and not linked to the model in the metadata. + """ + column_tests = [] + for column in model.columns.values(): + column_tests.extend(column.tests) + + if len(model.tests) == 0 or len(column_tests) == 0: + return RuleViolation() + + return None diff --git a/src/dbt_score/utils.py b/src/dbt_score/utils.py new file mode 100644 index 0000000..b5ee0c3 --- /dev/null +++ b/src/dbt_score/utils.py @@ -0,0 +1,20 @@ +"""Utility functions.""" + +import json +from pathlib import Path +from typing import Any + + +class JsonOpenError(RuntimeError): + """Raised when there is an error opening a JSON file.""" + + pass + + +def get_json(json_filename: str) -> Any: + """Get JSON from a file.""" + try: + file_content = Path(json_filename).read_text(encoding="utf-8") + return json.loads(file_content) + except Exception as e: + raise JsonOpenError(f"Error opening {json_filename}.") from e From 25d10595d2e2d280d9651e5e66ed3e5f74635cb6 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Wed, 13 Mar 2024 14:33:27 +0100 Subject: [PATCH 02/27] Fix decorator and change manifest loading --- src/dbt_score/manifest.py | 125 ------------------- src/dbt_score/models.py | 176 +++++++++++++++++++++++++++ src/dbt_score/rule.py | 60 +++++++-- src/dbt_score/rules/__init__.py | 0 src/dbt_score/rules/example_rules.py | 79 ++++++------ src/dbt_score/utils.py | 20 --- 6 files changed, 271 insertions(+), 189 deletions(-) delete mode 100644 src/dbt_score/manifest.py create mode 100644 src/dbt_score/models.py delete mode 100644 src/dbt_score/rules/__init__.py delete mode 100644 src/dbt_score/utils.py diff --git a/src/dbt_score/manifest.py b/src/dbt_score/manifest.py deleted file mode 100644 index 4494eb2..0000000 --- a/src/dbt_score/manifest.py +++ /dev/null @@ -1,125 +0,0 @@ -from dataclasses import dataclass, field -from typing import Any, List - - -@dataclass -class Constraint: - """Constraint for a column in a model.""" - - type: str - expression: str - name: str - - -@dataclass -class Test: - """Test for a column or model.""" - - name: str - type: str - tags: list[str] = field(default_factory=list) - - -@dataclass -class Column: - """Represents a column in a model.""" - - name: str - description: str - constraints: List[Constraint] - tests: List[Test] = field(default_factory=list) - - -@dataclass -class Model: - """Represents a dbt model.""" - - id: str - name: str - description: str - file_path: str - config: dict[str, Any] - meta: dict[str, Any] - columns: dict[str, Column] - tests: list[Test] = field(default_factory=list) - - @classmethod - def from_node(cls, node_values: dict[str, Any]) -> "Model": - """Create a model object from a node in the manifest.""" - columns = { - name: Column( - name=values.get("name"), - description=values.get("description"), - constraints=[ - Constraint( - name=constraint.get("name"), - type=constraint.get("type"), - expression=constraint.get("expression"), - ) - for constraint in values.get("constraints", []) - ], - ) - for name, values in node_values.get("columns", {}).items() - } - - model = cls( - id=node_values["unique_id"], - file_path=node_values["patch_path"], - config=node_values.get("config", {}), - name=node_values["name"], - description=node_values.get("description", ""), - meta=node_values.get("meta", {}), - columns=columns, - ) - - return model - - -class ManifestLoader: - """Load the models and tests from the manifest.""" - - def __init__(self, raw_manifest: dict[str, Any]): - self.raw_manifest = raw_manifest - self.raw_nodes = raw_manifest.get("nodes", {}) - self.models: dict[str, Model] = {} - self.tests: dict[str, Test] = {} - - # Load models first so the tests can be attached to them later. - self.load_models() - self.load_tests() - - def load_models(self) -> None: - """Load the models from the manifest.""" - for node_values in self.raw_nodes.values(): - if node_values.get("resource_type") == "model": - model = Model.from_node(node_values) - self.models[model.id] = model - - def load_tests(self) -> None: - """Load the tests from the manifest and attach them to the right object.""" - for node_values in self.raw_nodes.values(): - # Only include tests that are attached to a model. - if node_values.get("resource_type") == "test" and node_values.get( - "attached_node" - ): - model = self.models.get(node_values.get("attached_node")) - - if not model: - raise ValueError( - f"Model {node_values.get('attached_node')}" - f"not found, while tests are attached to it." - ) - - test = Test( - name=node_values.get("name"), - type=node_values.get("test_metadata").get("name"), - tags=node_values.get("tags"), - ) - column_name = ( - node_values.get("test_metadata").get("kwargs").get("column_name") - ) - - if column_name: # Test is a column-level test. - model.columns[column_name].tests.append(test) - else: - model.tests.append(test) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py new file mode 100644 index 0000000..293cc9c --- /dev/null +++ b/src/dbt_score/models.py @@ -0,0 +1,176 @@ +"""Objects related to loading the dbt manifest.""" +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class Constraint: + """Constraint for a column. + + Args: + name: The name of the constraint. + type: The type of the constraint, e.g. `foreign_key`. + expression: The expression of the constraint, e.g. `schema.other_table`. + """ + + name: str + type: str + expression: str + + +@dataclass +class Test: + """Test for a column or model. + + Args: + name: The name of the test. + type: The type of the test, e.g. `unique`. + tags: The list of tags attached to the test. + """ + + name: str + type: str + tags: list[str] = field(default_factory=list) + + +@dataclass +class Column: + """Represents a column in a model. + + Args: + name: The name of the column. + description: The description of the column. + constraints: The list of constraints attached to the column. + tests: The list of tests attached to the column. + """ + + name: str + description: str + constraints: list[Constraint] = field(default_factory=list) + tests: list[Test] = field(default_factory=list) + + +@dataclass +class Model: + """Represents a dbt model. + + Args: + id: The id of the model, e.g. `model.package.model_name`. + name: The name of the model. + description: The full description of the model. + file_path: The `.yml` file path of the model. + config: The config of the model. + meta: The meta of the model. + columns: The list of columns of the model. + tests: The list of tests attached to the model. + """ + + id: str + name: str + description: str + file_path: str + config: dict[str, Any] + meta: dict[str, Any] + columns: list[Column] + tests: list[Test] = field(default_factory=list) + + def get_column(self, column_name: str) -> Column | None: + """Get a column by name.""" + for column in self.columns: + if column.name == column_name: + return column + + return None + + @staticmethod + def _get_columns( + node_values: dict[str, Any], tests_values: list[dict[str, Any]] + ) -> list[Column]: + """Get columns from a node and it's tests in the manifest.""" + columns = [ + Column( + name=values.get("name"), + description=values.get("description"), + constraints=[ + Constraint( + name=constraint.get("name"), + type=constraint.get("type"), + expression=constraint.get("expression"), + ) + for constraint in values.get("constraints", []) + ], + tests=[ + Test( + name=test["name"], + type=test["test_metadata"]["name"], + tags=test.get("tags", []), + ) + for test in tests_values + if test["test_metadata"].get("kwargs", {}).get("column_name") + == values.get("name") + ], + ) + for name, values in node_values.get("columns", {}).items() + ] + return columns + + @classmethod + def from_node( + cls, node_values: dict[str, Any], tests_values: list[dict[str, Any]] + ) -> "Model": + """Create a model object from a node and it's tests in the manifest.""" + model = cls( + id=node_values["unique_id"], + file_path=node_values["patch_path"], + config=node_values.get("config", {}), + name=node_values["name"], + description=node_values.get("description", ""), + meta=node_values.get("meta", {}), + columns=cls._get_columns(node_values, tests_values), + tests=[ + Test( + name=test["name"], + type=test["test_metadata"]["name"], + tags=test.get("tags", []), + ) + for test in tests_values + if not test["test_metadata"].get("kwargs", {}).get("column_name") + ], + ) + + return model + + +class ManifestLoader: + """Load the models and tests from the manifest.""" + + def __init__(self, raw_manifest: dict[str, Any]): + """Initialize the ManifestLoader. + + Args: + raw_manifest: The dictionary representation of the JSON manifest. + """ + self.raw_manifest = raw_manifest + self.raw_nodes = raw_manifest.get("nodes", {}) + self.models: list[Model] = [] + self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list) + + self._reindex_tests() + self._load_models() + + def _load_models(self) -> None: + """Load the models from the manifest.""" + for node_id, node_values in self.raw_nodes.items(): + if node_values.get("resource_type") == "model": + model = Model.from_node(node_values, self.tests.get(node_id, [])) + self.models.append(model) + + def _reindex_tests(self) -> None: + """Index tests based on their model id.""" + for node_values in self.raw_nodes.values(): + # Only include tests that are attached to a model. + if node_values.get("resource_type") == "test" and node_values.get( + "attached_node" + ): + self.tests[node_values["attached_node"]].append(node_values) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 5609c8d..50d506b 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -1,10 +1,13 @@ +"""Rule definitions.""" + + import functools import logging from dataclasses import dataclass from enum import Enum -from typing import Any, Callable +from typing import Any, Callable, Type -from dbt_score.manifest import Model +from dbt_score.models import Model logging.basicConfig() logger = logging.getLogger(__name__) @@ -27,21 +30,62 @@ class RuleViolation: message: str | None = None +class Rule: + """The rule base class.""" + + description: str + severity: Severity = Severity.MEDIUM + + def __init_subclass__(cls, **kwargs) -> None: # type: ignore + """Initializes the subclass.""" + super().__init_subclass__(**kwargs) + if not hasattr(cls, "description"): + raise TypeError("Subclass must define class attribute `description`.") + + @classmethod + def evaluate(cls, model: Model) -> RuleViolation | None: + """Evaluates the rule.""" + raise NotImplementedError("Subclass must implement class method `evaluate`.") + + def rule( - description: str, - hint: str, + description: str | None = None, severity: Severity = Severity.MEDIUM, -) -> Callable[[Callable[[Model], RuleViolation | None]], Callable[..., None]]: - """Rule decorator.""" +) -> Callable[[Callable[[Model], RuleViolation | None]], Type[Rule]]: + """Rule decorator. + + The rule decorator creates a rule class (subclass of Rule) and returns it. + + Args: + description: The description of the rule. + severity: The severity of the rule. + """ def decorator_rule( func: Callable[[Model], RuleViolation | None], - ) -> Callable[..., None]: + ) -> Type[Rule]: @functools.wraps(func) def wrapper_rule(*args: Any, **kwargs: Any) -> Any: logger.debug("Executing `%s` with severity: %s.", func.__name__, severity) return func(*args, **kwargs) - return wrapper_rule + # Create the rule class + if func.__doc__ is None and description is None: + raise TypeError("Rule must define `description` or `func.__doc__`.") + + rule_description = description or ( + func.__doc__.split("\n")[0] if func.__doc__ else None + ) + rule_class = type( + func.__name__, + (Rule,), + { + "description": rule_description, + "severity": severity, + "evaluate": wrapper_rule, + }, + ) + + return rule_class return decorator_rule diff --git a/src/dbt_score/rules/__init__.py b/src/dbt_score/rules/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py index 6404d6b..8bc59c6 100644 --- a/src/dbt_score/rules/example_rules.py +++ b/src/dbt_score/rules/example_rules.py @@ -1,28 +1,46 @@ """All general rules.""" -from ..manifest import Model -from ..rule import RuleViolation, Severity, rule +from dbt_score.models import Model +from dbt_score.rule import Rule, RuleViolation, Severity, rule -@rule( - description="A model should have an owner defined.", - hint="Define the owner of the model in the meta section.", - severity=Severity.HIGH, -) +class ComplexRule(Rule): + """Complex rule.""" + + description = "Example of a complex rule." + severity = Severity.CRITICAL + + @classmethod + def preprocess(cls) -> int: + """Preprocessing.""" + return 1 + + @classmethod + def evaluate(cls, model: Model) -> RuleViolation | None: + """Evaluate model.""" + x = cls.preprocess() + + if x: + return RuleViolation(str(x)) + else: + return None + + +@rule() def has_owner(model: Model) -> RuleViolation | None: """A model should have an owner defined.""" if "owner" not in model.meta: - return RuleViolation() + return RuleViolation("Define the owner of the model in the meta section.") return None -@rule(description="A model should have a primary key defined.", hint="Some hint.") +@rule() def has_primary_key(model: Model) -> RuleViolation | None: """A model should have a primary key defined, unless it's a view.""" if not model.config.get("materialized") == "picnic_view": has_pk = False - for column in model.columns.values(): + for column in model.columns: if "primary_key" in [constraint.type for constraint in column.constraints]: has_pk = True break @@ -33,23 +51,16 @@ def has_primary_key(model: Model) -> RuleViolation | None: return None -@rule( - description="Primary key columns should have a uniqueness test defined.", - hint="Some hint.", -) +@rule() def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None: """Primary key columns should have a uniqueness test defined.""" columns_with_pk = [] - if not model.config.get("materialized") == "picnic_view": - for column_name, column in model.columns.items(): + if model.config.get("materialized") == "view": + for column in model.columns: if "primary_key" in [constraint.type for constraint in column.constraints]: - columns_with_pk.append(column_name) + columns_with_pk.append(column) - tests = ( - model.columns[columns_with_pk[0]].tests - if len(columns_with_pk) == 1 - else model.tests - ) + tests = columns_with_pk[0].tests if len(columns_with_pk) == 1 else model.tests if columns_with_pk and "unique" not in [test.type for test in tests]: return RuleViolation() @@ -57,37 +68,33 @@ def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None: return None -@rule( - description="All columns of a model should have a description.", hint="Some hint." -) +@rule() def columns_have_description(model: Model) -> RuleViolation | None: """All columns of a model should have a description.""" - invalid_columns = [ - column_name - for column_name, column in model.columns.items() - if not column.description + invalid_column_names = [ + column.name for column in model.columns if not column.description ] - if invalid_columns: + if invalid_column_names: return RuleViolation( message=f"The following columns lack a description: " - f"{', '.join(invalid_columns)}." + f"{', '.join(invalid_column_names)}." ) return None -@rule(description="A model should have at least one test defined.", hint="Some hint.") +@rule(description="A model should have at least one test defined.") def has_test(model: Model) -> RuleViolation | None: - """A model should have at least one model-level and one column-level test. + """A model should have at least one model-level or column-level test defined. This does not include singular tests, which are tests defined in a separate .sql file and not linked to the model in the metadata. """ column_tests = [] - for column in model.columns.values(): + for column in model.columns: column_tests.extend(column.tests) - if len(model.tests) == 0 or len(column_tests) == 0: - return RuleViolation() + if len(model.tests) == 0 and len(column_tests) == 0: + return RuleViolation("Define a test for the model on model- or column-level.") return None diff --git a/src/dbt_score/utils.py b/src/dbt_score/utils.py deleted file mode 100644 index b5ee0c3..0000000 --- a/src/dbt_score/utils.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Utility functions.""" - -import json -from pathlib import Path -from typing import Any - - -class JsonOpenError(RuntimeError): - """Raised when there is an error opening a JSON file.""" - - pass - - -def get_json(json_filename: str) -> Any: - """Get JSON from a file.""" - try: - file_content = Path(json_filename).read_text(encoding="utf-8") - return json.loads(file_content) - except Exception as e: - raise JsonOpenError(f"Error opening {json_filename}.") from e From 37c0a018ff5abac84c2fb63ff800034315a08e9f Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Wed, 13 Mar 2024 14:34:50 +0100 Subject: [PATCH 03/27] Add newline after docstring --- src/dbt_score/models.py | 1 + src/dbt_score/rule.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 293cc9c..bdcfc40 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -1,4 +1,5 @@ """Objects related to loading the dbt manifest.""" + from collections import defaultdict from dataclasses import dataclass, field from typing import Any diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 50d506b..6148989 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -1,6 +1,5 @@ """Rule definitions.""" - import functools import logging from dataclasses import dataclass From 8811c28986ae1085c70f8df84b36069ca0174ae6 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Wed, 13 Mar 2024 15:25:12 +0100 Subject: [PATCH 04/27] Improve documentation and remove logging --- src/dbt_score/rule.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 6148989..43cd06a 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -1,17 +1,12 @@ """Rule definitions.""" import functools -import logging from dataclasses import dataclass from enum import Enum from typing import Any, Callable, Type from dbt_score.models import Model -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - class Severity(Enum): """The severity/weight of a rule.""" @@ -65,16 +60,17 @@ def decorator_rule( ) -> Type[Rule]: @functools.wraps(func) def wrapper_rule(*args: Any, **kwargs: Any) -> Any: - logger.debug("Executing `%s` with severity: %s.", func.__name__, severity) return func(*args, **kwargs) - # Create the rule class if func.__doc__ is None and description is None: raise TypeError("Rule must define `description` or `func.__doc__`.") + # Get description parameter, otherwise use the docstring. rule_description = description or ( func.__doc__.split("\n")[0] if func.__doc__ else None ) + + # Create the rule class inheriting from Rule. rule_class = type( func.__name__, (Rule,), From 4a620e82d5f6cd6365f4c19766f5aa56aff77ca2 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Thu, 14 Mar 2024 13:55:50 +0100 Subject: [PATCH 05/27] Improve model definitions and process feedback --- src/dbt_score/models.py | 78 +++++++++++++++++++--------- src/dbt_score/rules/example_rules.py | 3 +- 2 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index bdcfc40..3f6cf03 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -9,7 +9,7 @@ class Constraint: """Constraint for a column. - Args: + Attributes: name: The name of the constraint. type: The type of the constraint, e.g. `foreign_key`. expression: The expression of the constraint, e.g. `schema.other_table`. @@ -24,31 +24,46 @@ class Constraint: class Test: """Test for a column or model. - Args: + Attributes: name: The name of the test. type: The type of the test, e.g. `unique`. + kwargs: The kwargs of the test. tags: The list of tags attached to the test. """ name: str type: str + kwargs: dict[str, Any] = field(default_factory=dict) tags: list[str] = field(default_factory=list) + @classmethod + def from_node(cls, test_node: dict[str, Any]) -> "Test": + """Create a test object from a test node in the manifest.""" + test = cls( + name=test_node["name"], + type=test_node["test_metadata"]["name"], + kwargs=test_node["test_metadata"].get("kwargs", {}), + tags=test_node.get("tags", []), + ) + return test + @dataclass class Column: """Represents a column in a model. - Args: + Attributes: name: The name of the column. description: The description of the column. constraints: The list of constraints attached to the column. + tags: The list of tags attached to the column. tests: The list of tests attached to the column. """ name: str description: str constraints: list[Constraint] = field(default_factory=list) + tags: list[str] = field(default_factory=list) tests: list[Test] = field(default_factory=list) @@ -56,25 +71,37 @@ class Column: class Model: """Represents a dbt model. - Args: - id: The id of the model, e.g. `model.package.model_name`. + Attributes: + unique_id: The id of the model, e.g. `model.package.model_name`. name: The name of the model. description: The full description of the model. - file_path: The `.yml` file path of the model. + patch_path: The yml path of the model, e.g. `package://model_dir/dir/file.yml`. + original_file_path: The sql path of the model, `e.g. model_dir/dir/file.sql`. config: The config of the model. meta: The meta of the model. columns: The list of columns of the model. + package_name: The package name of the model. + database: The database name of the model. + schema: The schema name of the model. + tags: The list of tags attached to the model. tests: The list of tests attached to the model. + depends_on: Dictionary of models/sources/macros that the model depends on. """ - id: str + unique_id: str name: str description: str - file_path: str + patch_path: str + original_file_path: str config: dict[str, Any] meta: dict[str, Any] columns: list[Column] + package_name: str + database: str + schema: str + tags: list[str] = field(default_factory=list) tests: list[Test] = field(default_factory=list) + depends_on: dict[str, list[str]] = field(default_factory=dict) def get_column(self, column_name: str) -> Column | None: """Get a column by name.""" @@ -101,12 +128,9 @@ def _get_columns( ) for constraint in values.get("constraints", []) ], + tags=values.get("tags", []), tests=[ - Test( - name=test["name"], - type=test["test_metadata"]["name"], - tags=test.get("tags", []), - ) + Test.from_node(test) for test in tests_values if test["test_metadata"].get("kwargs", {}).get("column_name") == values.get("name") @@ -122,22 +146,24 @@ def from_node( ) -> "Model": """Create a model object from a node and it's tests in the manifest.""" model = cls( - id=node_values["unique_id"], - file_path=node_values["patch_path"], - config=node_values.get("config", {}), + unique_id=node_values["unique_id"], name=node_values["name"], description=node_values.get("description", ""), + patch_path=node_values["patch_path"], + original_file_path=node_values["original_file_path"], + config=node_values.get("config", {}), meta=node_values.get("meta", {}), columns=cls._get_columns(node_values, tests_values), + package_name=node_values["package_name"], + database=node_values["database"], + schema=node_values["schema"], + tags=node_values.get("tags", []), tests=[ - Test( - name=test["name"], - type=test["test_metadata"]["name"], - tags=test.get("tags", []), - ) + Test.from_node(test) for test in tests_values if not test["test_metadata"].get("kwargs", {}).get("column_name") ], + depends_on=node_values.get("depends_on", {}), ) return model @@ -165,13 +191,17 @@ def _load_models(self) -> None: for node_id, node_values in self.raw_nodes.items(): if node_values.get("resource_type") == "model": model = Model.from_node(node_values, self.tests.get(node_id, [])) + + if model.unique_id == "model.dwh.ft_purchase_order_line_article": + print(model) + self.models.append(model) def _reindex_tests(self) -> None: """Index tests based on their model id.""" for node_values in self.raw_nodes.values(): # Only include tests that are attached to a model. - if node_values.get("resource_type") == "test" and node_values.get( - "attached_node" + if node_values.get("resource_type") == "test" and ( + attached_node := node_values.get("attached_node") ): - self.tests[node_values["attached_node"]].append(node_values) + self.tests[attached_node].append(node_values) diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py index 8bc59c6..1aec4a2 100644 --- a/src/dbt_score/rules/example_rules.py +++ b/src/dbt_score/rules/example_rules.py @@ -1,14 +1,13 @@ """All general rules.""" from dbt_score.models import Model -from dbt_score.rule import Rule, RuleViolation, Severity, rule +from dbt_score.rule import Rule, RuleViolation, rule class ComplexRule(Rule): """Complex rule.""" description = "Example of a complex rule." - severity = Severity.CRITICAL @classmethod def preprocess(cls) -> int: From d1ddd4be09c6ec3e08f7508a13c9d8ed7aa6d9ff Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Thu, 14 Mar 2024 14:03:23 +0100 Subject: [PATCH 06/27] Remove debugging lines --- src/dbt_score/models.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 3f6cf03..17f1dfa 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -191,10 +191,6 @@ def _load_models(self) -> None: for node_id, node_values in self.raw_nodes.items(): if node_values.get("resource_type") == "model": model = Model.from_node(node_values, self.tests.get(node_id, [])) - - if model.unique_id == "model.dwh.ft_purchase_order_line_article": - print(model) - self.models.append(model) def _reindex_tests(self) -> None: From 5e4a4bcaca6ae38ee9ebfe63086d8b59aa6c07bb Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 13:43:37 +0100 Subject: [PATCH 07/27] Add more parameters and cleanup --- src/dbt_score/models.py | 107 ++++++++++++++++++--------- src/dbt_score/rule.py | 15 ++-- src/dbt_score/rules/__init__.py | 1 + src/dbt_score/rules/example_rules.py | 20 +++-- 4 files changed, 89 insertions(+), 54 deletions(-) create mode 100644 src/dbt_score/rules/__init__.py diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 17f1dfa..237acfa 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -29,12 +29,14 @@ class Test: type: The type of the test, e.g. `unique`. kwargs: The kwargs of the test. tags: The list of tags attached to the test. + _raw_values: The raw values of the test in the manifest. """ name: str type: str kwargs: dict[str, Any] = field(default_factory=dict) tags: list[str] = field(default_factory=list) + _raw_values: dict[str, Any] = field(default_factory=dict) @classmethod def from_node(cls, test_node: dict[str, Any]) -> "Test": @@ -45,6 +47,7 @@ def from_node(cls, test_node: dict[str, Any]) -> "Test": kwargs=test_node["test_metadata"].get("kwargs", {}), tags=test_node.get("tags", []), ) + test._raw_values = test_node return test @@ -55,16 +58,51 @@ class Column: Attributes: name: The name of the column. description: The description of the column. + data_type: The data type of the column. + meta: The metadata attached to the column. constraints: The list of constraints attached to the column. tags: The list of tags attached to the column. tests: The list of tests attached to the column. + _raw_values: The raw values of the column as defined in the node. + _raw_test_values: The raw test values of the column as defined in the node. """ name: str description: str + data_type: str | None = None + meta: dict[str, Any] = field(default_factory=dict) constraints: list[Constraint] = field(default_factory=list) tags: list[str] = field(default_factory=list) tests: list[Test] = field(default_factory=list) + _raw_values: dict[str, Any] = field(default_factory=dict) + _raw_test_values: list[dict[str, Any]] = field(default_factory=list) + + @classmethod + def from_node_values( + cls, values: dict[str, Any], test_values: list[dict[str, Any]] + ) -> "Column": + """Create a column object from raw values.""" + column = cls( + name=values["name"], + description=values["description"], + data_type=values["data_type"], + meta=values["meta"], + constraints=[ + Constraint( + name=constraint["name"], + type=constraint["type"], + expression=constraint["expression"], + ) + for constraint in values["constraints"] + ], + tags=values["tags"], + tests=[Test.from_node(test) for test in test_values], + ) + + column._raw_values = values + column._raw_test_values = test_values + + return column @dataclass @@ -74,8 +112,8 @@ class Model: Attributes: unique_id: The id of the model, e.g. `model.package.model_name`. name: The name of the model. + relation_name: The relation name of the model, e.g. `db.schema.model_name`. description: The full description of the model. - patch_path: The yml path of the model, e.g. `package://model_dir/dir/file.yml`. original_file_path: The sql path of the model, `e.g. model_dir/dir/file.sql`. config: The config of the model. meta: The meta of the model. @@ -83,15 +121,20 @@ class Model: package_name: The package name of the model. database: The database name of the model. schema: The schema name of the model. + raw_code: The raw code of the model. + alias: The alias of the model. + patch_path: The yml path of the model, e.g. `package://model_dir/dir/file.yml`. tags: The list of tags attached to the model. tests: The list of tests attached to the model. depends_on: Dictionary of models/sources/macros that the model depends on. + _node_values: The raw values of the model in the manifest. + _test_values: The raw test values of the model in the manifest. """ unique_id: str name: str + relation_name: str description: str - patch_path: str original_file_path: str config: dict[str, Any] meta: dict[str, Any] @@ -99,9 +142,14 @@ class Model: package_name: str database: str schema: str + raw_code: str + alias: str | None = None + patch_path: str | None = None tags: list[str] = field(default_factory=list) tests: list[Test] = field(default_factory=list) depends_on: dict[str, list[str]] = field(default_factory=dict) + _node_values: dict[str, Any] = field(default_factory=dict) + _test_values: list[dict[str, Any]] = field(default_factory=list) def get_column(self, column_name: str) -> Column | None: """Get a column by name.""" @@ -113,59 +161,52 @@ def get_column(self, column_name: str) -> Column | None: @staticmethod def _get_columns( - node_values: dict[str, Any], tests_values: list[dict[str, Any]] + node_values: dict[str, Any], test_values: list[dict[str, Any]] ) -> list[Column]: - """Get columns from a node and it's tests in the manifest.""" - columns = [ - Column( - name=values.get("name"), - description=values.get("description"), - constraints=[ - Constraint( - name=constraint.get("name"), - type=constraint.get("type"), - expression=constraint.get("expression"), - ) - for constraint in values.get("constraints", []) - ], - tags=values.get("tags", []), - tests=[ - Test.from_node(test) - for test in tests_values - if test["test_metadata"].get("kwargs", {}).get("column_name") - == values.get("name") + """Get columns from a node and its tests in the manifest.""" + return [ + Column.from_node_values( + values, + [ + test + for test in test_values + if test["test_metadata"]["kwargs"].get("column_name") == name ], ) for name, values in node_values.get("columns", {}).items() ] - return columns @classmethod def from_node( - cls, node_values: dict[str, Any], tests_values: list[dict[str, Any]] + cls, node_values: dict[str, Any], test_values: list[dict[str, Any]] ) -> "Model": """Create a model object from a node and it's tests in the manifest.""" model = cls( unique_id=node_values["unique_id"], name=node_values["name"], - description=node_values.get("description", ""), - patch_path=node_values["patch_path"], + relation_name=node_values["relation_name"], + description=node_values["description"], original_file_path=node_values["original_file_path"], - config=node_values.get("config", {}), - meta=node_values.get("meta", {}), - columns=cls._get_columns(node_values, tests_values), + config=node_values["config"], + meta=node_values["meta"], + columns=cls._get_columns(node_values, test_values), package_name=node_values["package_name"], database=node_values["database"], schema=node_values["schema"], - tags=node_values.get("tags", []), + alias=node_values["alias"], + patch_path=node_values["patch_path"], + tags=node_values["tags"], tests=[ Test.from_node(test) - for test in tests_values - if not test["test_metadata"].get("kwargs", {}).get("column_name") + for test in test_values + if not test["test_metadata"]["kwargs"].get("column_name") ], - depends_on=node_values.get("depends_on", {}), + depends_on=node_values["depends_on"], ) + model._node_values = node_values + model._test_values = test_values + return model diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 43cd06a..686bb49 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -1,9 +1,8 @@ """Rule definitions.""" -import functools from dataclasses import dataclass from enum import Enum -from typing import Any, Callable, Type +from typing import Callable, Type from dbt_score.models import Model @@ -36,10 +35,9 @@ def __init_subclass__(cls, **kwargs) -> None: # type: ignore if not hasattr(cls, "description"): raise TypeError("Subclass must define class attribute `description`.") - @classmethod - def evaluate(cls, model: Model) -> RuleViolation | None: + def evaluate(self, model: Model) -> RuleViolation | None: """Evaluates the rule.""" - raise NotImplementedError("Subclass must implement class method `evaluate`.") + raise NotImplementedError("Subclass must implement method `evaluate`.") def rule( @@ -58,10 +56,7 @@ def rule( def decorator_rule( func: Callable[[Model], RuleViolation | None], ) -> Type[Rule]: - @functools.wraps(func) - def wrapper_rule(*args: Any, **kwargs: Any) -> Any: - return func(*args, **kwargs) - + """Decorator function.""" if func.__doc__ is None and description is None: raise TypeError("Rule must define `description` or `func.__doc__`.") @@ -77,7 +72,7 @@ def wrapper_rule(*args: Any, **kwargs: Any) -> Any: { "description": rule_description, "severity": severity, - "evaluate": wrapper_rule, + "evaluate": func, }, ) diff --git a/src/dbt_score/rules/__init__.py b/src/dbt_score/rules/__init__.py new file mode 100644 index 0000000..9a7e8a7 --- /dev/null +++ b/src/dbt_score/rules/__init__.py @@ -0,0 +1 @@ +"""Rules.""" diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py index 1aec4a2..333fabd 100644 --- a/src/dbt_score/rules/example_rules.py +++ b/src/dbt_score/rules/example_rules.py @@ -9,15 +9,13 @@ class ComplexRule(Rule): description = "Example of a complex rule." - @classmethod - def preprocess(cls) -> int: + def preprocess(self) -> int: """Preprocessing.""" - return 1 + return len(self.description) - @classmethod - def evaluate(cls, model: Model) -> RuleViolation | None: + def evaluate(self, model: Model) -> RuleViolation | None: """Evaluate model.""" - x = cls.preprocess() + x = self.preprocess() if x: return RuleViolation(str(x)) @@ -26,7 +24,7 @@ def evaluate(cls, model: Model) -> RuleViolation | None: @rule() -def has_owner(model: Model) -> RuleViolation | None: +def has_owner(self, model: Model) -> RuleViolation | None: """A model should have an owner defined.""" if "owner" not in model.meta: return RuleViolation("Define the owner of the model in the meta section.") @@ -35,7 +33,7 @@ def has_owner(model: Model) -> RuleViolation | None: @rule() -def has_primary_key(model: Model) -> RuleViolation | None: +def has_primary_key(self, model: Model) -> RuleViolation | None: """A model should have a primary key defined, unless it's a view.""" if not model.config.get("materialized") == "picnic_view": has_pk = False @@ -51,7 +49,7 @@ def has_primary_key(model: Model) -> RuleViolation | None: @rule() -def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None: +def primary_key_has_uniqueness_test(self, model: Model) -> RuleViolation | None: """Primary key columns should have a uniqueness test defined.""" columns_with_pk = [] if model.config.get("materialized") == "view": @@ -68,7 +66,7 @@ def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None: @rule() -def columns_have_description(model: Model) -> RuleViolation | None: +def columns_have_description(self, model: Model) -> RuleViolation | None: """All columns of a model should have a description.""" invalid_column_names = [ column.name for column in model.columns if not column.description @@ -83,7 +81,7 @@ def columns_have_description(model: Model) -> RuleViolation | None: @rule(description="A model should have at least one test defined.") -def has_test(model: Model) -> RuleViolation | None: +def has_test(self, model: Model) -> RuleViolation | None: """A model should have at least one model-level or column-level test defined. This does not include singular tests, which are tests defined in a separate .sql From 7e24cddcaac721fe4dc81ebc1bf1e468386929fd Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 13:45:56 +0100 Subject: [PATCH 08/27] Fix Model.from_node --- src/dbt_score/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 237acfa..dd6d4bf 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -193,6 +193,7 @@ def from_node( package_name=node_values["package_name"], database=node_values["database"], schema=node_values["schema"], + raw_code=node_values["raw_code"], alias=node_values["alias"], patch_path=node_values["patch_path"], tags=node_values["tags"], From e1e0fb31e88b77e644c5a99d53532ca313115e29 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 13:55:54 +0100 Subject: [PATCH 09/27] Add _values to Constraint --- src/dbt_score/models.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index dd6d4bf..f64fd3e 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -10,14 +10,27 @@ class Constraint: """Constraint for a column. Attributes: - name: The name of the constraint. type: The type of the constraint, e.g. `foreign_key`. + name: The name of the constraint. expression: The expression of the constraint, e.g. `schema.other_table`. + _raw_values: The raw values of the constraint in the manifest. """ - name: str type: str - expression: str + name: str | None = None + expression: str | None = None + _raw_values: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_raw_values(cls, raw_values: dict[str, Any]) -> "Constraint": + """Create a constraint object from a constraint node in the manifest.""" + constraint = cls( + type=raw_values["type"], + name=raw_values["name"], + expression=raw_values["expression"], + ) + constraint._raw_values = raw_values + return constraint @dataclass @@ -88,11 +101,7 @@ def from_node_values( data_type=values["data_type"], meta=values["meta"], constraints=[ - Constraint( - name=constraint["name"], - type=constraint["type"], - expression=constraint["expression"], - ) + Constraint.from_raw_values(constraint) for constraint in values["constraints"] ], tags=values["tags"], From 787bebfd85eb477211e2c90a5134e3045eb5054a Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 14:02:11 +0100 Subject: [PATCH 10/27] Fix typehints --- src/dbt_score/rule.py | 6 +++--- src/dbt_score/rules/example_rules.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 686bb49..ea145ce 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from enum import Enum -from typing import Callable, Type +from typing import Any, Callable, Type from dbt_score.models import Model @@ -43,7 +43,7 @@ def evaluate(self, model: Model) -> RuleViolation | None: def rule( description: str | None = None, severity: Severity = Severity.MEDIUM, -) -> Callable[[Callable[[Model], RuleViolation | None]], Type[Rule]]: +) -> Callable[[Callable[[Any, Model], RuleViolation | None]], Type[Rule]]: """Rule decorator. The rule decorator creates a rule class (subclass of Rule) and returns it. @@ -54,7 +54,7 @@ def rule( """ def decorator_rule( - func: Callable[[Model], RuleViolation | None], + func: Callable[[Any, Model], RuleViolation | None], ) -> Type[Rule]: """Decorator function.""" if func.__doc__ is None and description is None: diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py index 333fabd..0189cff 100644 --- a/src/dbt_score/rules/example_rules.py +++ b/src/dbt_score/rules/example_rules.py @@ -24,7 +24,7 @@ def evaluate(self, model: Model) -> RuleViolation | None: @rule() -def has_owner(self, model: Model) -> RuleViolation | None: +def has_owner(self: Rule, model: Model) -> RuleViolation | None: """A model should have an owner defined.""" if "owner" not in model.meta: return RuleViolation("Define the owner of the model in the meta section.") @@ -33,7 +33,7 @@ def has_owner(self, model: Model) -> RuleViolation | None: @rule() -def has_primary_key(self, model: Model) -> RuleViolation | None: +def has_primary_key(self: Rule, model: Model) -> RuleViolation | None: """A model should have a primary key defined, unless it's a view.""" if not model.config.get("materialized") == "picnic_view": has_pk = False @@ -49,7 +49,7 @@ def has_primary_key(self, model: Model) -> RuleViolation | None: @rule() -def primary_key_has_uniqueness_test(self, model: Model) -> RuleViolation | None: +def primary_key_has_uniqueness_test(self: Rule, model: Model) -> RuleViolation | None: """Primary key columns should have a uniqueness test defined.""" columns_with_pk = [] if model.config.get("materialized") == "view": @@ -66,7 +66,7 @@ def primary_key_has_uniqueness_test(self, model: Model) -> RuleViolation | None: @rule() -def columns_have_description(self, model: Model) -> RuleViolation | None: +def columns_have_description(self: Rule, model: Model) -> RuleViolation | None: """All columns of a model should have a description.""" invalid_column_names = [ column.name for column in model.columns if not column.description @@ -81,7 +81,7 @@ def columns_have_description(self, model: Model) -> RuleViolation | None: @rule(description="A model should have at least one test defined.") -def has_test(self, model: Model) -> RuleViolation | None: +def has_test(self: Rule, model: Model) -> RuleViolation | None: """A model should have at least one model-level or column-level test defined. This does not include singular tests, which are tests defined in a separate .sql From dce673bb3e388a0face7cce8149a2dde90be52cc Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 14:05:20 +0100 Subject: [PATCH 11/27] Rename _raw_node to _raw_values in order to align naming --- src/dbt_score/models.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index f64fd3e..c6385dd 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -136,8 +136,8 @@ class Model: tags: The list of tags attached to the model. tests: The list of tests attached to the model. depends_on: Dictionary of models/sources/macros that the model depends on. - _node_values: The raw values of the model in the manifest. - _test_values: The raw test values of the model in the manifest. + _raw_values: The raw values of the model (node) in the manifest. + _raw_test_values: The raw test values of the model (node) in the manifest. """ unique_id: str @@ -157,8 +157,8 @@ class Model: tags: list[str] = field(default_factory=list) tests: list[Test] = field(default_factory=list) depends_on: dict[str, list[str]] = field(default_factory=dict) - _node_values: dict[str, Any] = field(default_factory=dict) - _test_values: list[dict[str, Any]] = field(default_factory=list) + _raw_values: dict[str, Any] = field(default_factory=dict) + _raw_test_values: list[dict[str, Any]] = field(default_factory=list) def get_column(self, column_name: str) -> Column | None: """Get a column by name.""" @@ -214,8 +214,8 @@ def from_node( depends_on=node_values["depends_on"], ) - model._node_values = node_values - model._test_values = test_values + model._raw_values = node_values + model._raw_test_values = test_values return model From c14d7a7c0536053b4d4d38962b4182d781d0dba2 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 17:02:19 +0100 Subject: [PATCH 12/27] Get rid of self in regular method --- src/dbt_score/rule.py | 10 +++++++--- src/dbt_score/rules/example_rules.py | 10 +++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index ea145ce..6471537 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -43,7 +43,7 @@ def evaluate(self, model: Model) -> RuleViolation | None: def rule( description: str | None = None, severity: Severity = Severity.MEDIUM, -) -> Callable[[Callable[[Any, Model], RuleViolation | None]], Type[Rule]]: +) -> Callable[[Callable[[Model], RuleViolation | None]], Type[Rule]]: """Rule decorator. The rule decorator creates a rule class (subclass of Rule) and returns it. @@ -54,7 +54,7 @@ def rule( """ def decorator_rule( - func: Callable[[Any, Model], RuleViolation | None], + func: Callable[[Model], RuleViolation | None], ) -> Type[Rule]: """Decorator function.""" if func.__doc__ is None and description is None: @@ -65,6 +65,10 @@ def decorator_rule( func.__doc__.split("\n")[0] if func.__doc__ else None ) + def wrapped_func(self: Rule, *args: Any, **kwargs: Any) -> RuleViolation | None: + """Wrap func to add `self`.""" + return func(*args, **kwargs) + # Create the rule class inheriting from Rule. rule_class = type( func.__name__, @@ -72,7 +76,7 @@ def decorator_rule( { "description": rule_description, "severity": severity, - "evaluate": func, + "evaluate": wrapped_func, }, ) diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py index 0189cff..9401c3c 100644 --- a/src/dbt_score/rules/example_rules.py +++ b/src/dbt_score/rules/example_rules.py @@ -24,7 +24,7 @@ def evaluate(self, model: Model) -> RuleViolation | None: @rule() -def has_owner(self: Rule, model: Model) -> RuleViolation | None: +def has_owner(model: Model) -> RuleViolation | None: """A model should have an owner defined.""" if "owner" not in model.meta: return RuleViolation("Define the owner of the model in the meta section.") @@ -33,7 +33,7 @@ def has_owner(self: Rule, model: Model) -> RuleViolation | None: @rule() -def has_primary_key(self: Rule, model: Model) -> RuleViolation | None: +def has_primary_key(model: Model) -> RuleViolation | None: """A model should have a primary key defined, unless it's a view.""" if not model.config.get("materialized") == "picnic_view": has_pk = False @@ -49,7 +49,7 @@ def has_primary_key(self: Rule, model: Model) -> RuleViolation | None: @rule() -def primary_key_has_uniqueness_test(self: Rule, model: Model) -> RuleViolation | None: +def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None: """Primary key columns should have a uniqueness test defined.""" columns_with_pk = [] if model.config.get("materialized") == "view": @@ -66,7 +66,7 @@ def primary_key_has_uniqueness_test(self: Rule, model: Model) -> RuleViolation | @rule() -def columns_have_description(self: Rule, model: Model) -> RuleViolation | None: +def columns_have_description(model: Model) -> RuleViolation | None: """All columns of a model should have a description.""" invalid_column_names = [ column.name for column in model.columns if not column.description @@ -81,7 +81,7 @@ def columns_have_description(self: Rule, model: Model) -> RuleViolation | None: @rule(description="A model should have at least one test defined.") -def has_test(self: Rule, model: Model) -> RuleViolation | None: +def has_test(model: Model) -> RuleViolation | None: """A model should have at least one model-level or column-level test defined. This does not include singular tests, which are tests defined in a separate .sql From b8636f254e09410a13db7777371ae17b329a40f8 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 17:06:38 +0100 Subject: [PATCH 13/27] Add private variables to classmethods --- src/dbt_score/models.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index c6385dd..7ab9ec4 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -24,13 +24,12 @@ class Constraint: @classmethod def from_raw_values(cls, raw_values: dict[str, Any]) -> "Constraint": """Create a constraint object from a constraint node in the manifest.""" - constraint = cls( + return cls( type=raw_values["type"], name=raw_values["name"], expression=raw_values["expression"], + _raw_values=raw_values ) - constraint._raw_values = raw_values - return constraint @dataclass @@ -54,14 +53,13 @@ class Test: @classmethod def from_node(cls, test_node: dict[str, Any]) -> "Test": """Create a test object from a test node in the manifest.""" - test = cls( + return cls( name=test_node["name"], type=test_node["test_metadata"]["name"], kwargs=test_node["test_metadata"].get("kwargs", {}), tags=test_node.get("tags", []), + _raw_values=test_node ) - test._raw_values = test_node - return test @dataclass @@ -95,7 +93,7 @@ def from_node_values( cls, values: dict[str, Any], test_values: list[dict[str, Any]] ) -> "Column": """Create a column object from raw values.""" - column = cls( + return cls( name=values["name"], description=values["description"], data_type=values["data_type"], @@ -106,13 +104,10 @@ def from_node_values( ], tags=values["tags"], tests=[Test.from_node(test) for test in test_values], + _raw_values=values, + _raw_test_values=test_values ) - column._raw_values = values - column._raw_test_values = test_values - - return column - @dataclass class Model: @@ -190,7 +185,7 @@ def from_node( cls, node_values: dict[str, Any], test_values: list[dict[str, Any]] ) -> "Model": """Create a model object from a node and it's tests in the manifest.""" - model = cls( + return cls( unique_id=node_values["unique_id"], name=node_values["name"], relation_name=node_values["relation_name"], @@ -212,13 +207,10 @@ def from_node( if not test["test_metadata"]["kwargs"].get("column_name") ], depends_on=node_values["depends_on"], + _raw_values=node_values, + _raw_test_values=test_values, ) - model._raw_values = node_values - model._raw_test_values = test_values - - return model - class ManifestLoader: """Load the models and tests from the manifest.""" From db2643b0d74df4ecfdae35f81748d2145bddd54b Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 17:22:05 +0100 Subject: [PATCH 14/27] Load manifest from path in ManifestLoader --- src/dbt_score/models.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 7ab9ec4..2fb2045 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -1,7 +1,8 @@ """Objects related to loading the dbt manifest.""" - +import json from collections import defaultdict from dataclasses import dataclass, field +from pathlib import Path from typing import Any @@ -28,7 +29,7 @@ def from_raw_values(cls, raw_values: dict[str, Any]) -> "Constraint": type=raw_values["type"], name=raw_values["name"], expression=raw_values["expression"], - _raw_values=raw_values + _raw_values=raw_values, ) @@ -58,7 +59,7 @@ def from_node(cls, test_node: dict[str, Any]) -> "Test": type=test_node["test_metadata"]["name"], kwargs=test_node["test_metadata"].get("kwargs", {}), tags=test_node.get("tags", []), - _raw_values=test_node + _raw_values=test_node, ) @@ -105,7 +106,7 @@ def from_node_values( tags=values["tags"], tests=[Test.from_node(test) for test in test_values], _raw_values=values, - _raw_test_values=test_values + _raw_test_values=test_values, ) @@ -215,14 +216,15 @@ def from_node( class ManifestLoader: """Load the models and tests from the manifest.""" - def __init__(self, raw_manifest: dict[str, Any]): + def __init__(self, file_path: Path): """Initialize the ManifestLoader. Args: - raw_manifest: The dictionary representation of the JSON manifest. + file_path: The file path of the JSON manifest. """ - self.raw_manifest = raw_manifest - self.raw_nodes = raw_manifest.get("nodes", {}) + self.file_path = file_path + self.raw_manifest = json.loads(self.file_path.read_text(encoding="utf-8")) + self.raw_nodes = self.raw_manifest.get("nodes", {}) self.models: list[Model] = [] self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list) From 66e2c7322688a8820359533c82dd8adf5061a1f8 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Mon, 18 Mar 2024 17:23:12 +0100 Subject: [PATCH 15/27] Improve loading --- src/dbt_score/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 2fb2045..04c298b 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -222,8 +222,7 @@ def __init__(self, file_path: Path): Args: file_path: The file path of the JSON manifest. """ - self.file_path = file_path - self.raw_manifest = json.loads(self.file_path.read_text(encoding="utf-8")) + self.raw_manifest = json.loads(file_path.read_text(encoding="utf-8")) self.raw_nodes = self.raw_manifest.get("nodes", {}) self.models: list[Model] = [] self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list) From cb82aa2f1adf9cd7455a76a768afcada350dcb09 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Wed, 20 Mar 2024 14:23:30 +0100 Subject: [PATCH 16/27] Add unit tests. --- tests/conftest.py | 148 ++++++++++++++++++++++++++++++++++++++++++- tests/test_models.py | 18 ++++++ tests/test_rule.py | 46 ++++++++++++++ 3 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 tests/test_models.py create mode 100644 tests/test_rule.py diff --git a/tests/conftest.py b/tests/conftest.py index 8bd86b1..77c61bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,155 @@ """Test configuration.""" +from typing import Any, Type -from pytest import ExitCode, Session +from dbt_score.models import Model +from dbt_score.rule import Rule, RuleViolation, rule +from pytest import ExitCode, Session, fixture def pytest_sessionfinish(session: Session, exitstatus: int): """Avoid ci failure if no tests are found.""" if exitstatus == ExitCode.NO_TESTS_COLLECTED: session.exitstatus = ExitCode.OK + + +@fixture +def raw_manifest() -> dict[str, Any]: + """Mock the raw manifest.""" + return { + "nodes": { + "analysis.package.analysis1": {"resource_type": "analysis"}, + "model.package.model1": { + "resource_type": "model", + "unique_id": "model.package.model1", + "name": "model1", + "relation_name": "database.schema.model1", + "description": "Description1.", + "original_file_path": "/path/to/model1.sql", + "config": {}, + "meta": {}, + "columns": { + "a": { + "name": "column_a", + "description": "Column A.", + "data_type": "string", + "meta": {}, + "constraints": [], + "tags": [], + } + }, + "package_name": "package", + "database": "db", + "schema": "schema", + "raw_code": "SELECT x FROM y", + "alias": "model1_alias", + "patch_path": "/path/to/model1.yml", + "tags": [], + "depends_on": {}, + }, + "model.package.model2": { + "resource_type": "model", + "unique_id": "model.package.model2", + "name": "model2", + "relation_name": "database.schema.model2", + "description": "Description2.", + "original_file_path": "/path/to/model2.sql", + "config": {}, + "meta": {}, + "columns": { + "a": { + "name": "column_a", + "description": "Column A.", + "data_type": "string", + "meta": {}, + "constraints": [], + "tags": [], + } + }, + "package_name": "package", + "database": "db", + "schema": "schema", + "raw_code": "SELECT x FROM y", + "alias": "model2_alias", + "patch_path": "/path/to/model2.yml", + "tags": [], + "depends_on": {}, + }, + "test.package.test1": { + "resource_type": "test", + "attached_node": "model.package.model1", + "name": "test1", + "test_metadata": {"name": "type", "kwargs": {"column_name": "a"}}, + "tags": [], + }, + "test.package.test2": { + "resource_type": "test", + "attached_node": "model.package.model1", + "name": "test2", + "test_metadata": {"name": "type", "kwargs": {}}, + "tags": [], + }, + "test.package.test3": {"resource_type": "test"}, + } + } + + +@fixture +def model1(raw_manifest) -> Model: + """Model 1.""" + return Model.from_node(raw_manifest["nodes"]["model.package.model1"], []) + + +@fixture +def model2(raw_manifest) -> Model: + """Model 2.""" + return Model.from_node(raw_manifest["nodes"]["model.package.model2"], []) + + +@fixture +def decorator_rule() -> Type[Rule]: + """An example rule created with the rule decorator.""" + + @rule() + def example_rule(model: Model) -> RuleViolation | None: + """Description of the rule.""" + if model.name == "model1": + return RuleViolation(message="Model1 is a violation.") + return None + + return example_rule + + +@fixture +def class_rule() -> Type[Rule]: + """An example rule created with a class.""" + + class ExampleRule(Rule): + """Example rule.""" + + description = "Description of the rule." + + def evaluate(self, model: Model) -> RuleViolation | None: + """Evaluate model.""" + if model.name == "model1": + return RuleViolation(message="Model1 is a violation.") + return None + + return ExampleRule + + +@fixture +def invalid_class_rule() -> Type[Rule]: + """An example rule created with a class.""" + + class ExampleRule(Rule): + """Example rule.""" + + description = "Description of the rule." + + def evaluate(self, model: Model) -> RuleViolation | None: + """Evaluate model.""" + if model.name == "model1": + return RuleViolation(message="Model1 is a violation.") + return None + + return ExampleRule diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..645c9b4 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,18 @@ +"""Test models.""" + +from pathlib import Path +from unittest.mock import patch + +from dbt_score.models import ManifestLoader + + +@patch("dbt_score.models.Path.read_text") +def test_manifest(read_text, raw_manifest): + """Test loading a manifest.""" + with patch("dbt_score.models.json.loads", return_value=raw_manifest): + loader = ManifestLoader(Path("manifest.json")) + assert len(loader.models) == len([node for node in + raw_manifest["nodes"].values() + if node["resource_type"] == "model"]) + assert loader.models[0].tests[0].name == "test2" + assert loader.models[0].columns[0].tests[0].name == "test1" diff --git a/tests/test_rule.py b/tests/test_rule.py new file mode 100644 index 0000000..5494e3a --- /dev/null +++ b/tests/test_rule.py @@ -0,0 +1,46 @@ +"""Test rule.""" + +import pytest +from dbt_score.models import Model +from dbt_score.rule import Rule, RuleViolation, Severity + + +def test_rule_decorator(decorator_rule, class_rule, model1, model2): + """Test rule creation with the rule decorator and class.""" + decorator_rule_instance = decorator_rule() + class_rule_instance = class_rule() + + def assertions(rule_instance): + assert isinstance(rule_instance, Rule) + assert rule_instance.severity == Severity.MEDIUM + assert rule_instance.description == "Description of the rule." + assert rule_instance.evaluate(model1) == RuleViolation( + message="Model1 is a violation.") + assert rule_instance.evaluate(model2) is None + + assertions(decorator_rule_instance) + assertions(class_rule_instance) + + +def test_missing_description_rule_class(class_rule): + """Test missing description in rule class.""" + with pytest.raises(TypeError): + class BadRule(Rule): + """Bad example rule.""" + + def evaluate(self, model: Model) -> RuleViolation | None: + """Evaluate model.""" + return None + + +def test_missing_evaluate_rule_class(class_rule, model1): + """Test missing evaluate implementation in rule class.""" + class BadRule(Rule): + """Bad example rule.""" + description = "Description of the rule." + + rule = BadRule() + + with pytest.raises(NotImplementedError): + rule.evaluate(model1) + From 2bf66c5803d6f9ef83198fff6648aff651686f06 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Wed, 20 Mar 2024 14:26:16 +0100 Subject: [PATCH 17/27] Remove unused variables --- tests/test_rule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_rule.py b/tests/test_rule.py index 5494e3a..63901e8 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -22,7 +22,7 @@ def assertions(rule_instance): assertions(class_rule_instance) -def test_missing_description_rule_class(class_rule): +def test_missing_description_rule_class(): """Test missing description in rule class.""" with pytest.raises(TypeError): class BadRule(Rule): @@ -33,7 +33,7 @@ def evaluate(self, model: Model) -> RuleViolation | None: return None -def test_missing_evaluate_rule_class(class_rule, model1): +def test_missing_evaluate_rule_class(model1): """Test missing evaluate implementation in rule class.""" class BadRule(Rule): """Bad example rule.""" From 8256171d9250d6902f7ab355cb6c1cf77d1e2f55 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Wed, 20 Mar 2024 14:43:00 +0100 Subject: [PATCH 18/27] Remove non-default rules --- src/dbt_score/rules/example_rules.py | 97 ---------------------------- src/dbt_score/rules/rules.py | 28 ++++++++ tests/test_models.py | 10 ++- tests/test_rule.py | 7 +- 4 files changed, 40 insertions(+), 102 deletions(-) delete mode 100644 src/dbt_score/rules/example_rules.py create mode 100644 src/dbt_score/rules/rules.py diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py deleted file mode 100644 index 9401c3c..0000000 --- a/src/dbt_score/rules/example_rules.py +++ /dev/null @@ -1,97 +0,0 @@ -"""All general rules.""" - -from dbt_score.models import Model -from dbt_score.rule import Rule, RuleViolation, rule - - -class ComplexRule(Rule): - """Complex rule.""" - - description = "Example of a complex rule." - - def preprocess(self) -> int: - """Preprocessing.""" - return len(self.description) - - def evaluate(self, model: Model) -> RuleViolation | None: - """Evaluate model.""" - x = self.preprocess() - - if x: - return RuleViolation(str(x)) - else: - return None - - -@rule() -def has_owner(model: Model) -> RuleViolation | None: - """A model should have an owner defined.""" - if "owner" not in model.meta: - return RuleViolation("Define the owner of the model in the meta section.") - - return None - - -@rule() -def has_primary_key(model: Model) -> RuleViolation | None: - """A model should have a primary key defined, unless it's a view.""" - if not model.config.get("materialized") == "picnic_view": - has_pk = False - for column in model.columns: - if "primary_key" in [constraint.type for constraint in column.constraints]: - has_pk = True - break - - if not has_pk: - return RuleViolation() - - return None - - -@rule() -def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None: - """Primary key columns should have a uniqueness test defined.""" - columns_with_pk = [] - if model.config.get("materialized") == "view": - for column in model.columns: - if "primary_key" in [constraint.type for constraint in column.constraints]: - columns_with_pk.append(column) - - tests = columns_with_pk[0].tests if len(columns_with_pk) == 1 else model.tests - - if columns_with_pk and "unique" not in [test.type for test in tests]: - return RuleViolation() - - return None - - -@rule() -def columns_have_description(model: Model) -> RuleViolation | None: - """All columns of a model should have a description.""" - invalid_column_names = [ - column.name for column in model.columns if not column.description - ] - if invalid_column_names: - return RuleViolation( - message=f"The following columns lack a description: " - f"{', '.join(invalid_column_names)}." - ) - - return None - - -@rule(description="A model should have at least one test defined.") -def has_test(model: Model) -> RuleViolation | None: - """A model should have at least one model-level or column-level test defined. - - This does not include singular tests, which are tests defined in a separate .sql - file and not linked to the model in the metadata. - """ - column_tests = [] - for column in model.columns: - column_tests.extend(column.tests) - - if len(model.tests) == 0 and len(column_tests) == 0: - return RuleViolation("Define a test for the model on model- or column-level.") - - return None diff --git a/src/dbt_score/rules/rules.py b/src/dbt_score/rules/rules.py new file mode 100644 index 0000000..48ba5fc --- /dev/null +++ b/src/dbt_score/rules/rules.py @@ -0,0 +1,28 @@ +"""All general rules.""" + +from dbt_score.models import Model +from dbt_score.rule import RuleViolation, rule + + +@rule() +def has_description(model: Model) -> RuleViolation | None: + """A model should have a description.""" + if not model.description: + return RuleViolation(message="Model lacks a description.") + + return None + + +@rule() +def columns_have_description(model: Model) -> RuleViolation | None: + """All columns of a model should have a description.""" + invalid_column_names = [ + column.name for column in model.columns if not column.description + ] + if invalid_column_names: + return RuleViolation( + message=f"The following columns lack a description: " + f"{', '.join(invalid_column_names)}." + ) + + return None diff --git a/tests/test_models.py b/tests/test_models.py index 645c9b4..e39046a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -11,8 +11,12 @@ def test_manifest(read_text, raw_manifest): """Test loading a manifest.""" with patch("dbt_score.models.json.loads", return_value=raw_manifest): loader = ManifestLoader(Path("manifest.json")) - assert len(loader.models) == len([node for node in - raw_manifest["nodes"].values() - if node["resource_type"] == "model"]) + assert len(loader.models) == len( + [ + node + for node in raw_manifest["nodes"].values() + if node["resource_type"] == "model" + ] + ) assert loader.models[0].tests[0].name == "test2" assert loader.models[0].columns[0].tests[0].name == "test1" diff --git a/tests/test_rule.py b/tests/test_rule.py index 63901e8..d4ae41c 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -15,7 +15,8 @@ def assertions(rule_instance): assert rule_instance.severity == Severity.MEDIUM assert rule_instance.description == "Description of the rule." assert rule_instance.evaluate(model1) == RuleViolation( - message="Model1 is a violation.") + message="Model1 is a violation." + ) assert rule_instance.evaluate(model2) is None assertions(decorator_rule_instance) @@ -25,6 +26,7 @@ def assertions(rule_instance): def test_missing_description_rule_class(): """Test missing description in rule class.""" with pytest.raises(TypeError): + class BadRule(Rule): """Bad example rule.""" @@ -35,12 +37,13 @@ def evaluate(self, model: Model) -> RuleViolation | None: def test_missing_evaluate_rule_class(model1): """Test missing evaluate implementation in rule class.""" + class BadRule(Rule): """Bad example rule.""" + description = "Description of the rule." rule = BadRule() with pytest.raises(NotImplementedError): rule.evaluate(model1) - From 05c46ac974a5cf984747285b18adee970716ad34 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Thu, 21 Mar 2024 11:42:40 +0100 Subject: [PATCH 19/27] Clean up conftest and add json resource --- tests/conftest.py | 104 +++------------------------------- tests/resources/manifest.json | 88 ++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 95 deletions(-) create mode 100644 tests/resources/manifest.json diff --git a/tests/conftest.py b/tests/conftest.py index 77c61bd..5534cba 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,7 @@ """Test configuration.""" + +import json +from pathlib import Path from typing import Any, Type from dbt_score.models import Model @@ -13,84 +16,13 @@ def pytest_sessionfinish(session: Session, exitstatus: int): @fixture -def raw_manifest() -> dict[str, Any]: +def raw_manifest() -> Any: """Mock the raw manifest.""" - return { - "nodes": { - "analysis.package.analysis1": {"resource_type": "analysis"}, - "model.package.model1": { - "resource_type": "model", - "unique_id": "model.package.model1", - "name": "model1", - "relation_name": "database.schema.model1", - "description": "Description1.", - "original_file_path": "/path/to/model1.sql", - "config": {}, - "meta": {}, - "columns": { - "a": { - "name": "column_a", - "description": "Column A.", - "data_type": "string", - "meta": {}, - "constraints": [], - "tags": [], - } - }, - "package_name": "package", - "database": "db", - "schema": "schema", - "raw_code": "SELECT x FROM y", - "alias": "model1_alias", - "patch_path": "/path/to/model1.yml", - "tags": [], - "depends_on": {}, - }, - "model.package.model2": { - "resource_type": "model", - "unique_id": "model.package.model2", - "name": "model2", - "relation_name": "database.schema.model2", - "description": "Description2.", - "original_file_path": "/path/to/model2.sql", - "config": {}, - "meta": {}, - "columns": { - "a": { - "name": "column_a", - "description": "Column A.", - "data_type": "string", - "meta": {}, - "constraints": [], - "tags": [], - } - }, - "package_name": "package", - "database": "db", - "schema": "schema", - "raw_code": "SELECT x FROM y", - "alias": "model2_alias", - "patch_path": "/path/to/model2.yml", - "tags": [], - "depends_on": {}, - }, - "test.package.test1": { - "resource_type": "test", - "attached_node": "model.package.model1", - "name": "test1", - "test_metadata": {"name": "type", "kwargs": {"column_name": "a"}}, - "tags": [], - }, - "test.package.test2": { - "resource_type": "test", - "attached_node": "model.package.model1", - "name": "test2", - "test_metadata": {"name": "type", "kwargs": {}}, - "tags": [], - }, - "test.package.test3": {"resource_type": "test"}, - } - } + return json.loads( + Path(__file__) + .parent.joinpath("resources/manifest.json") + .read_text(encoding="utf-8") + ) @fixture @@ -135,21 +67,3 @@ def evaluate(self, model: Model) -> RuleViolation | None: return None return ExampleRule - - -@fixture -def invalid_class_rule() -> Type[Rule]: - """An example rule created with a class.""" - - class ExampleRule(Rule): - """Example rule.""" - - description = "Description of the rule." - - def evaluate(self, model: Model) -> RuleViolation | None: - """Evaluate model.""" - if model.name == "model1": - return RuleViolation(message="Model1 is a violation.") - return None - - return ExampleRule diff --git a/tests/resources/manifest.json b/tests/resources/manifest.json new file mode 100644 index 0000000..0385938 --- /dev/null +++ b/tests/resources/manifest.json @@ -0,0 +1,88 @@ +{ + "nodes": { + "analysis.package.analysis1": { + "resource_type": "analysis" + }, + "model.package.model1": { + "resource_type": "model", + "unique_id": "model.package.model1", + "name": "model1", + "relation_name": "database.schema.model1", + "description": "Description1.", + "original_file_path": "/path/to/model1.sql", + "config": {}, + "meta": {}, + "columns": { + "a": { + "name": "column_a", + "description": "Column A.", + "data_type": "string", + "meta": {}, + "constraints": [], + "tags": [] + } + }, + "package_name": "package", + "database": "db", + "schema": "schema", + "raw_code": "SELECT x FROM y", + "alias": "model1_alias", + "patch_path": "/path/to/model1.yml", + "tags": [], + "depends_on": {} + }, + "model.package.model2": { + "resource_type": "model", + "unique_id": "model.package.model2", + "name": "model2", + "relation_name": "database.schema.model2", + "description": "Description2.", + "original_file_path": "/path/to/model2.sql", + "config": {}, + "meta": {}, + "columns": { + "a": { + "name": "column_a", + "description": "Column A.", + "data_type": "string", + "meta": {}, + "constraints": [], + "tags": [] + } + }, + "package_name": "package", + "database": "db", + "schema": "schema", + "raw_code": "SELECT x FROM y", + "alias": "model2_alias", + "patch_path": "/path/to/model2.yml", + "tags": [], + "depends_on": {} + }, + "test.package.test1": { + "resource_type": "test", + "attached_node": "model.package.model1", + "name": "test1", + "test_metadata": { + "name": "type", + "kwargs": { + "column_name": "a" + } + }, + "tags": [] + }, + "test.package.test2": { + "resource_type": "test", + "attached_node": "model.package.model1", + "name": "test2", + "test_metadata": { + "name": "type", + "kwargs": {} + }, + "tags": [] + }, + "test.package.test3": { + "resource_type": "test" + } + } +} \ No newline at end of file From c77279a00485257fb1af295cee6a2f277a775cfa Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Thu, 21 Mar 2024 11:52:56 +0100 Subject: [PATCH 20/27] Prettify manifest.json --- tests/resources/manifest.json | 170 +++++++++++++++++----------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/tests/resources/manifest.json b/tests/resources/manifest.json index 0385938..cc3a420 100644 --- a/tests/resources/manifest.json +++ b/tests/resources/manifest.json @@ -1,88 +1,88 @@ { - "nodes": { - "analysis.package.analysis1": { - "resource_type": "analysis" - }, - "model.package.model1": { - "resource_type": "model", - "unique_id": "model.package.model1", - "name": "model1", - "relation_name": "database.schema.model1", - "description": "Description1.", - "original_file_path": "/path/to/model1.sql", - "config": {}, - "meta": {}, - "columns": { - "a": { - "name": "column_a", - "description": "Column A.", - "data_type": "string", - "meta": {}, - "constraints": [], - "tags": [] - } - }, - "package_name": "package", - "database": "db", - "schema": "schema", - "raw_code": "SELECT x FROM y", - "alias": "model1_alias", - "patch_path": "/path/to/model1.yml", - "tags": [], - "depends_on": {} - }, - "model.package.model2": { - "resource_type": "model", - "unique_id": "model.package.model2", - "name": "model2", - "relation_name": "database.schema.model2", - "description": "Description2.", - "original_file_path": "/path/to/model2.sql", - "config": {}, - "meta": {}, - "columns": { - "a": { - "name": "column_a", - "description": "Column A.", - "data_type": "string", - "meta": {}, - "constraints": [], - "tags": [] - } - }, - "package_name": "package", - "database": "db", - "schema": "schema", - "raw_code": "SELECT x FROM y", - "alias": "model2_alias", - "patch_path": "/path/to/model2.yml", - "tags": [], - "depends_on": {} - }, - "test.package.test1": { - "resource_type": "test", - "attached_node": "model.package.model1", - "name": "test1", - "test_metadata": { - "name": "type", - "kwargs": { - "column_name": "a" - } - }, - "tags": [] - }, - "test.package.test2": { - "resource_type": "test", - "attached_node": "model.package.model1", - "name": "test2", - "test_metadata": { - "name": "type", - "kwargs": {} - }, - "tags": [] - }, - "test.package.test3": { - "resource_type": "test" + "nodes": { + "analysis.package.analysis1": { + "resource_type": "analysis" + }, + "model.package.model1": { + "resource_type": "model", + "unique_id": "model.package.model1", + "name": "model1", + "relation_name": "database.schema.model1", + "description": "Description1.", + "original_file_path": "/path/to/model1.sql", + "config": {}, + "meta": {}, + "columns": { + "a": { + "name": "column_a", + "description": "Column A.", + "data_type": "string", + "meta": {}, + "constraints": [], + "tags": [] } + }, + "package_name": "package", + "database": "db", + "schema": "schema", + "raw_code": "SELECT x FROM y", + "alias": "model1_alias", + "patch_path": "/path/to/model1.yml", + "tags": [], + "depends_on": {} + }, + "model.package.model2": { + "resource_type": "model", + "unique_id": "model.package.model2", + "name": "model2", + "relation_name": "database.schema.model2", + "description": "Description2.", + "original_file_path": "/path/to/model2.sql", + "config": {}, + "meta": {}, + "columns": { + "a": { + "name": "column_a", + "description": "Column A.", + "data_type": "string", + "meta": {}, + "constraints": [], + "tags": [] + } + }, + "package_name": "package", + "database": "db", + "schema": "schema", + "raw_code": "SELECT x FROM y", + "alias": "model2_alias", + "patch_path": "/path/to/model2.yml", + "tags": [], + "depends_on": {} + }, + "test.package.test1": { + "resource_type": "test", + "attached_node": "model.package.model1", + "name": "test1", + "test_metadata": { + "name": "type", + "kwargs": { + "column_name": "a" + } + }, + "tags": [] + }, + "test.package.test2": { + "resource_type": "test", + "attached_node": "model.package.model1", + "name": "test2", + "test_metadata": { + "name": "type", + "kwargs": {} + }, + "tags": [] + }, + "test.package.test3": { + "resource_type": "test" } -} \ No newline at end of file + } +} From cbca0b240eacb4fbdaaae6bc0a56016a5ea379e3 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 22 Mar 2024 11:29:01 +0100 Subject: [PATCH 21/27] Update src/dbt_score/models.py Co-authored-by: Matthieu Caneill --- src/dbt_score/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 04c298b..51b556c 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -1,4 +1,5 @@ """Objects related to loading the dbt manifest.""" + import json from collections import defaultdict from dataclasses import dataclass, field From 23282dbf2679dffe5d89620ee28335375984680d Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 22 Mar 2024 11:30:07 +0100 Subject: [PATCH 22/27] Update tests/test_models.py Co-authored-by: Matthieu Caneill --- tests/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_models.py b/tests/test_models.py index e39046a..7efce1a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -10,7 +10,7 @@ def test_manifest(read_text, raw_manifest): """Test loading a manifest.""" with patch("dbt_score.models.json.loads", return_value=raw_manifest): - loader = ManifestLoader(Path("manifest.json")) + loader = ManifestLoader(Path("some.json")) assert len(loader.models) == len( [ node From 09b088328462f8cdcccb0285ced2ab55e6ca6a9f Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 22 Mar 2024 11:33:48 +0100 Subject: [PATCH 23/27] Update tests/conftest.py Co-authored-by: Matthieu Caneill --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5534cba..8e8a06e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,7 +20,7 @@ def raw_manifest() -> Any: """Mock the raw manifest.""" return json.loads( Path(__file__) - .parent.joinpath("resources/manifest.json") + .parent / "resources" / "manifest.json" .read_text(encoding="utf-8") ) From 2a4966c0faf4ad9efd20757472a4f34aa581cbbc Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 22 Mar 2024 11:37:52 +0100 Subject: [PATCH 24/27] Naming and fix path --- src/dbt_score/rules/{rules.py => generic.py} | 2 +- tests/conftest.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename src/dbt_score/rules/{rules.py => generic.py} (96%) diff --git a/src/dbt_score/rules/rules.py b/src/dbt_score/rules/generic.py similarity index 96% rename from src/dbt_score/rules/rules.py rename to src/dbt_score/rules/generic.py index 48ba5fc..664f97f 100644 --- a/src/dbt_score/rules/rules.py +++ b/src/dbt_score/rules/generic.py @@ -1,4 +1,4 @@ -"""All general rules.""" +"""All generic rules.""" from dbt_score.models import Model from dbt_score.rule import RuleViolation, rule diff --git a/tests/conftest.py b/tests/conftest.py index 8e8a06e..f2dfc8a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,8 +19,8 @@ def pytest_sessionfinish(session: Session, exitstatus: int): def raw_manifest() -> Any: """Mock the raw manifest.""" return json.loads( - Path(__file__) - .parent / "resources" / "manifest.json" + (Path(__file__) + .parent / "resources" / "manifest.json") .read_text(encoding="utf-8") ) From a21c5c4a041da6c2edb79c5c4be0cea3c85d9b13 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 22 Mar 2024 11:39:04 +0100 Subject: [PATCH 25/27] Improve indendation --- tests/conftest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f2dfc8a..ff6ee77 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,8 +19,7 @@ def pytest_sessionfinish(session: Session, exitstatus: int): def raw_manifest() -> Any: """Mock the raw manifest.""" return json.loads( - (Path(__file__) - .parent / "resources" / "manifest.json") + (Path(__file__).parent / "resources" / "manifest.json") .read_text(encoding="utf-8") ) From 8d94bf952574eaf9d4dd9bf18b1082a2cbb2c266 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 22 Mar 2024 15:26:10 +0100 Subject: [PATCH 26/27] Ignore missing return value for rules --- src/dbt_score/rules/generic.py | 7 ++----- tests/conftest.py | 5 +++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/dbt_score/rules/generic.py b/src/dbt_score/rules/generic.py index 664f97f..0a0c6d8 100644 --- a/src/dbt_score/rules/generic.py +++ b/src/dbt_score/rules/generic.py @@ -1,8 +1,9 @@ """All generic rules.""" - from dbt_score.models import Model from dbt_score.rule import RuleViolation, rule +# mypy: disable-error-code="return" + @rule() def has_description(model: Model) -> RuleViolation | None: @@ -10,8 +11,6 @@ def has_description(model: Model) -> RuleViolation | None: if not model.description: return RuleViolation(message="Model lacks a description.") - return None - @rule() def columns_have_description(model: Model) -> RuleViolation | None: @@ -24,5 +23,3 @@ def columns_have_description(model: Model) -> RuleViolation | None: message=f"The following columns lack a description: " f"{', '.join(invalid_column_names)}." ) - - return None diff --git a/tests/conftest.py b/tests/conftest.py index ff6ee77..782c622 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,8 +19,9 @@ def pytest_sessionfinish(session: Session, exitstatus: int): def raw_manifest() -> Any: """Mock the raw manifest.""" return json.loads( - (Path(__file__).parent / "resources" / "manifest.json") - .read_text(encoding="utf-8") + (Path(__file__).parent / "resources" / "manifest.json").read_text( + encoding="utf-8" + ) ) From f39613704640b86f407839fd608a1aa0a5788f4d Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Fri, 22 Mar 2024 16:03:15 +0100 Subject: [PATCH 27/27] Add test and improve exceptions --- src/dbt_score/rule.py | 4 ++-- tests/test_rule.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 6471537..2170b20 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -33,7 +33,7 @@ def __init_subclass__(cls, **kwargs) -> None: # type: ignore """Initializes the subclass.""" super().__init_subclass__(**kwargs) if not hasattr(cls, "description"): - raise TypeError("Subclass must define class attribute `description`.") + raise AttributeError("Subclass must define class attribute `description`.") def evaluate(self, model: Model) -> RuleViolation | None: """Evaluates the rule.""" @@ -58,7 +58,7 @@ def decorator_rule( ) -> Type[Rule]: """Decorator function.""" if func.__doc__ is None and description is None: - raise TypeError("Rule must define `description` or `func.__doc__`.") + raise AttributeError("Rule must define `description` or `func.__doc__`.") # Get description parameter, otherwise use the docstring. rule_description = description or ( diff --git a/tests/test_rule.py b/tests/test_rule.py index d4ae41c..febdd7b 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -2,10 +2,10 @@ import pytest from dbt_score.models import Model -from dbt_score.rule import Rule, RuleViolation, Severity +from dbt_score.rule import Rule, RuleViolation, Severity, rule -def test_rule_decorator(decorator_rule, class_rule, model1, model2): +def test_rule_decorator_and_class(decorator_rule, class_rule, model1, model2): """Test rule creation with the rule decorator and class.""" decorator_rule_instance = decorator_rule() class_rule_instance = class_rule() @@ -23,9 +23,18 @@ def assertions(rule_instance): assertions(class_rule_instance) +def test_missing_description_rule_decorator(): + """Test missing description in rule decorator.""" + with pytest.raises(AttributeError): + + @rule() + def example_rule(model: Model) -> RuleViolation | None: + return None + + def test_missing_description_rule_class(): """Test missing description in rule class.""" - with pytest.raises(TypeError): + with pytest.raises(AttributeError): class BadRule(Rule): """Bad example rule."""