Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement rule evaluation and scoring #11

Merged
merged 10 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/reference/evaluation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Evaluation

::: dbt_score.evaluation
3 changes: 3 additions & 0 deletions docs/reference/formatters/human_readable_formatter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Human-readable formatter

::: dbt_score.formatters.human_readable_formatter
3 changes: 3 additions & 0 deletions docs/reference/formatters/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Formatters

::: dbt_score.formatters
4 changes: 4 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@ nav:
- Reference:
- reference/cli.md
- reference/exceptions.md
- reference/evaluation.md
- reference/models.md
- reference/rule.md
- Formatters:
- reference/formatters/index.md
- reference/formatters/human_readable_formatter.md
- Rules:
- reference/rules/generic.md
- reference/rule_registry.md
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ module = "tests.*"
disallow_untyped_defs = false
disallow_incomplete_defs = false
disallow_untyped_calls = false
warn_no_return = false

### Ruff ###

Expand Down
103 changes: 103 additions & 0 deletions src/dbt_score/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""This module is responsible for evaluating rules."""

from __future__ import annotations

from typing import Type

from dbt_score.formatters import Formatter
from dbt_score.models import ManifestLoader, Model
from dbt_score.rule import Rule, RuleViolation, Severity
from dbt_score.rule_registry import RuleRegistry

# The results of a given model are stored in a dictionary, mapping rules to either:
# - None if there was no issue
# - A RuleViolation if a linting error was found
# - An Exception if the rule failed to run
ModelResultsType = dict[Type[Rule], None | RuleViolation | Exception]


class Evaluation:
"""Evaluate a set of rules on a set of nodes."""

def __init__(
self,
rule_registry: RuleRegistry,
manifest_loader: ManifestLoader,
formatter: Formatter,
) -> None:
"""Create an Evaluation object.

Args:
rule_registry: A rule registry.
manifest_loader: A manifest loader.
formatter: A formatter.
matthieucan marked this conversation as resolved.
Show resolved Hide resolved
"""
self._rule_registry = rule_registry
self._manifest_loader = manifest_loader
self._formatter = formatter

# For each model, its results
self.results: dict[Model, ModelResultsType] = {}

# For each model, its computed score
self.scores: dict[Model, float] = {}

# The aggregated project score
druzhinin-kirill marked this conversation as resolved.
Show resolved Hide resolved
self.project_score: float

def evaluate(self) -> None:
matthieucan marked this conversation as resolved.
Show resolved Hide resolved
"""Evaluate all rules."""
# Instantiate all rules. In case they keep state across calls, this must be
# done only once.
rules = [rule_class() for rule_class in self._rule_registry.rules.values()]

for model in self._manifest_loader.models:
self.results[model] = {}
for rule in rules:
try:
result = rule.evaluate(model)
except Exception as e:
self.results[model][rule.__class__] = e
else:
if result is None:
self.results[model][rule.__class__] = None
else:
self.results[model][rule.__class__] = result
matthieucan marked this conversation as resolved.
Show resolved Hide resolved

self.scores[model] = self.score_model(model)
self._formatter.model_evaluated(
model, self.results[model], self.scores[model]
)

# Compute score for project
self.project_score = self.score_project()
self._formatter.project_evaluated(self.project_score)

def score_model(self, model: Model) -> float:
"""Compute the score of a given model."""
model_results = self.results[model]

if len(model_results) == 0:
# No rule? No problem
return 1.0
if any(
rule.severity == Severity.CRITICAL and isinstance(result, RuleViolation)
for rule, result in model_results.items()
):
# If there's a CRITICAL violation, the score is 0
return 0.0
else:
# Otherwise, the score is the weighted average (by severity) of the results
return sum(
[
# The more severe the violation, the more points are lost
3 - rule.severity.value if isinstance(result, RuleViolation) else 3
matthieucan marked this conversation as resolved.
Show resolved Hide resolved
for rule, result in model_results.items()
]
) / (3 * len(model_results))

def score_project(self) -> float:
"""Compute the aggregated score for the project."""
if len(self.scores) == 0:
return 1.0
matthieucan marked this conversation as resolved.
Show resolved Hide resolved
return sum(self.scores.values()) / len(self.scores)
matthieucan marked this conversation as resolved.
Show resolved Hide resolved
26 changes: 26 additions & 0 deletions src/dbt_score/formatters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Formatters are used to output CLI results."""

from __future__ import annotations

import typing
from abc import ABC, abstractmethod

if typing.TYPE_CHECKING:
from dbt_score.evaluation import ModelResultsType
from dbt_score.models import Model


class Formatter(ABC):
"""Abstract class to define a formatter."""

@abstractmethod
def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
) -> None:
"""Callback when a model has been evaluated."""
raise NotImplementedError

@abstractmethod
def project_evaluated(self, score: float) -> None:
"""Callback when a project has been evaluated."""
raise NotImplementedError
43 changes: 43 additions & 0 deletions src/dbt_score/formatters/human_readable_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Human readable formatter."""


from dbt_score.evaluation import ModelResultsType
from dbt_score.formatters import Formatter
from dbt_score.models import Model
from dbt_score.rule import RuleViolation


class HumanReadableFormatter(Formatter):
"""Formatter for human-readable messages in the terminal."""

indent = " "
label_ok = "\033[1;32mOK \033[0m"
label_warning = "\033[1;33mWARN\033[0m"
label_error = "\033[1;31mERR \033[0m"

@staticmethod
def bold(text: str) -> str:
"""Return text in bold."""
return f"\033[1m{text}\033[0m"

def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
) -> None:
"""Callback when a model has been evaluated."""
print(f"Model {self.bold(model.name)}")
for rule, result in results.items():
if result is None:
print(f"{self.indent}{self.label_ok} {rule.source()}")
elif isinstance(result, RuleViolation):
matthieucan marked this conversation as resolved.
Show resolved Hide resolved
print(
f"{self.indent}{self.label_warning} "
f"{rule.source()}: {result.message}"
)
else:
print(f"{self.indent}{self.label_error} {rule.source()}: {result!s}")
print(f"Score: {self.bold(str(round(score * 10, 1)))}")
print()

def project_evaluated(self, score: float) -> None:
"""Callback when a project has been evaluated."""
print(f"Project score: {self.bold(str(round(score * 10, 1)))}")
19 changes: 19 additions & 0 deletions src/dbt_score/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,27 @@

from pathlib import Path

from dbt_score.evaluation import Evaluation
from dbt_score.formatters.human_readable_formatter import HumanReadableFormatter
from dbt_score.models import ManifestLoader
from dbt_score.rule_registry import RuleRegistry


def lint_dbt_project(manifest_path: Path) -> None:
"""Lint dbt manifest."""
if not manifest_path.exists():
raise FileNotFoundError(f"Manifest not found at {manifest_path}.")

rule_registry = RuleRegistry()
rule_registry.load_all()

manifest_loader = ManifestLoader(manifest_path)

formatter = HumanReadableFormatter()

evaluation = Evaluation(
rule_registry=rule_registry,
manifest_loader=manifest_loader,
formatter=formatter,
)
evaluation.evaluate()
4 changes: 4 additions & 0 deletions src/dbt_score/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ def from_node(
_raw_test_values=test_values,
)

def __hash__(self) -> int:
matthieucan marked this conversation as resolved.
Show resolved Hide resolved
"""Compute a unique hash for a model."""
return hash(self.unique_id)


class ManifestLoader:
"""Load the models and tests from the manifest."""
Expand Down
12 changes: 12 additions & 0 deletions src/dbt_score/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ def evaluate(self, model: Model) -> RuleViolation | None:
"""Evaluates the rule."""
raise NotImplementedError("Subclass must implement method `evaluate`.")

@classmethod
def source(cls) -> str:
"""Return the source of the rule, i.e. a fully qualified name."""
return f"{cls.__module__}.{cls.__name__}"

def __hash__(self) -> int:
"""Compute a unique hash for a rule."""
return hash(self.source())


# Use @overload to have proper typing for both @rule and @rule(...).
# https://mypy.readthedocs.io/en/stable/generics.html#decorator-factories
Expand Down Expand Up @@ -105,6 +114,9 @@ def wrapped_func(self: Rule, *args: Any, **kwargs: Any) -> RuleViolation | None:
"description": rule_description,
"severity": severity,
"evaluate": wrapped_func,
# Forward origin of the decorated function
"__qualname__": func.__qualname__, # https://peps.python.org/pep-3155/
"__module__": func.__module__,
},
)

Expand Down
9 changes: 6 additions & 3 deletions src/dbt_score/rule_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,12 @@ def _load(self, namespace_name: str) -> None:
for obj_name in dir(module):
obj = module.__dict__[obj_name]
if type(obj) is type and issubclass(obj, Rule):
if obj_name in self.rules:
raise DuplicatedRuleException(obj_name)
self._rules[obj_name] = obj
self._add_rule(obj_name, obj)

def _add_rule(self, name: str, rule: Type[Rule]) -> None:
if name in self.rules:
raise DuplicatedRuleException(name)
self._rules[name] = rule

def load_all(self) -> None:
"""Load all rules, core and third-party."""
Expand Down
Loading