Skip to content

Commit

Permalink
Add medals to scoring (#32)
Browse files Browse the repository at this point in the history
Models can now receive a badge based on their score:

- Badge thresholds can be configured
- Badges are added to the output

---------

Co-authored-by: Matthieu Caneill <matthieucan@users.noreply.github.com>
  • Loading branch information
jochemvandooren and matthieucan authored Jun 3, 2024
1 parent bd40888 commit 1631b4f
Showing 15 changed files with 322 additions and 91 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -17,3 +17,4 @@ and this project adheres to
- Linting and scoring functionality for dbt models.
- Configuration through `pyproject.toml`.
- Default rules in `dbt_score.rules.generic`.
- Badges for project and model evaluation.
30 changes: 30 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
@@ -17,6 +17,15 @@ below:
rule_namespaces = ["dbt_score.rules", "dbt_score_rules", "custom_rules"]
disabled_rules = ["dbt_score.rules.generic.columns_have_description"]

[tool.dbt-score.badges]
first.threshold = 10.0
first.icon = "🥇"
second.threshold = 8.0
second.icon = "🥈"
third.threshold = 6.0
third.icon = "🥉"
wip.icon = "🏗️"

[tool.dbt-score.rules."dbt_score.rules.generic.sql_has_reasonable_number_of_lines"]
severity = 1
max_lines = 300
@@ -38,6 +47,27 @@ The following options can be set in the `pyproject.toml` file:
if not included here.
- `disabled_rules`: A list of rules to disable.

#### Badges configuration

```toml
[tool.dbt-score.badges]
```

Four badges can be configured: `first`, `second`, `third` and `wip`. Each badge
can be configured with the following option:

- `icon`: The icon to use for the badge. A string that will be displayed in the
output, e.g. `🥇`.

All badges except `wip` can be configured with the following option:

- `threshold`: The threshold for the badge. A decimal number between `0.0` and
`10.0` that will be used to compare to the score. The threshold is the minimum
score required for a model to be rewarded with a certain badge.

The default values can be found in the
[BadgeConfig](/reference/config/#dbt_score.config.BadgeConfig).

#### Rule configuration

```toml
11 changes: 6 additions & 5 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -12,15 +12,16 @@ encourage) good practices.

```shell
$ dbt-score lint
Model customers
🥇 customers (score: 10.0)
OK dbt_score.rules.generic.has_description
WARN (medium) dbt_score.rules.generic.has_owner: Model lacks an owner.
OK dbt_score.rules.generic.has_owner: Model lacks an owner.
OK dbt_score.rules.generic.sql_has_reasonable_number_of_lines
Score: 6.7
Score: 10.0 🥇
```

In this example, `dbt-score` reports a warning: the model `customers` does not
declare an owner. Hence, it doesn't score the maximum value of `10`.
In this example, the model `customers` scores the maximum value of `10.0` as it
passes all the rules. It also is awarded a golden medal because of the perfect
score.

## Philosophy

61 changes: 58 additions & 3 deletions src/dbt_score/config.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

import logging
import tomllib
from dataclasses import dataclass, field, replace
from pathlib import Path
from typing import Any, Final

@@ -12,19 +13,53 @@
DEFAULT_CONFIG_FILE = "pyproject.toml"


@dataclass
class Badge:
"""Badge object."""

icon: str
threshold: float


@dataclass
class BadgeConfig:
"""Configuration for badges."""

third: Badge = field(default_factory=lambda: Badge("🥉", 6.0))
second: Badge = field(default_factory=lambda: Badge("🥈", 8.0))
first: Badge = field(default_factory=lambda: Badge("🥇", 10.0))
wip: Badge = field(default_factory=lambda: Badge("🚧", 0.0))

def validate(self) -> None:
"""Validate the badge configuration."""
if not (self.first.threshold > self.second.threshold > self.third.threshold):
raise ValueError("Invalid badge thresholds.")
if self.first.threshold > 10.0: # noqa: PLR2004 [magic-value-comparison]
raise ValueError("first threshold must 10.0 or lower.")
if self.third.threshold < 0.0:
raise ValueError("third threshold must be 0.0 or higher.")
if self.wip.threshold != 0.0:
raise AttributeError("wip badge cannot have a threshold configuration.")


class Config:
"""Configuration for dbt-score."""

_main_section: Final[str] = "tool.dbt-score"
_options: Final[list[str]] = ["rule_namespaces", "disabled_rules"]
_rules_section: Final[str] = f"{_main_section}.rules"
_options: Final[list[str]] = [
"rule_namespaces",
"disabled_rules",
]
_rules_section: Final[str] = "rules"
_badges_section: Final[str] = "badges"

def __init__(self) -> None:
"""Initialize the Config object."""
self.rule_namespaces: list[str] = ["dbt_score.rules", "dbt_score_rules"]
self.disabled_rules: list[str] = []
self.rules_config: dict[str, RuleConfig] = {}
self.config_file: Path | None = None
self.badge_config: BadgeConfig = BadgeConfig()

def set_option(self, option: str, value: Any) -> None:
"""Set an option in the config."""
@@ -37,7 +72,8 @@ def _load_toml_file(self, file: str) -> None:

tools = toml_data.get("tool", {})
dbt_score_config = tools.get("dbt-score", {})
rules_config = dbt_score_config.pop("rules", {})
rules_config = dbt_score_config.pop(self._rules_section, {})
badge_config = dbt_score_config.pop(self._badges_section, {})

# Main configuration
for option, value in dbt_score_config.items():
@@ -50,6 +86,25 @@ def _load_toml_file(self, file: str) -> None:
f"Option {option} in {self._main_section} not supported."
)

# Badge configuration
for name, config in badge_config.items():
try:
default_config = getattr(self.badge_config, name)
updated_config = replace(default_config, **config)
setattr(self.badge_config, name, updated_config)
except AttributeError as e:
options = list(BadgeConfig.__annotations__.keys())
raise AttributeError(f"Config only accepts badges: {options}.") from e
except TypeError as e:
options = list(Badge.__annotations__.keys())
if name == "wip":
options.remove("threshold")
raise AttributeError(
f"Badge {name}: config only accepts {options}."
) from e

self.badge_config.validate()

# Rule configuration
self.rules_config = {
name: RuleConfig.from_dict(config) for name, config in rules_config.items()
6 changes: 3 additions & 3 deletions src/dbt_score/evaluation.py
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@
from dbt_score.models import ManifestLoader, Model
from dbt_score.rule import Rule, RuleViolation
from dbt_score.rule_registry import RuleRegistry
from dbt_score.scoring import Scorer
from dbt_score.scoring import Score, Scorer

# The results of a given model are stored in a dictionary, mapping rules to either:
# - None if there was no issue
@@ -44,10 +44,10 @@ def __init__(
self.results: dict[Model, ModelResultsType] = {}

# For each model, its computed score
self.scores: dict[Model, float] = {}
self.scores: dict[Model, Score] = {}

# The aggregated project score
self.project_score: float
self.project_score: Score

def evaluate(self) -> None:
"""Evaluate all rules."""
6 changes: 4 additions & 2 deletions src/dbt_score/formatters/__init__.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,8 @@
import typing
from abc import ABC, abstractmethod

from dbt_score.scoring import Score

if typing.TYPE_CHECKING:
from dbt_score.evaluation import ModelResultsType
from dbt_score.models import ManifestLoader, Model
@@ -19,12 +21,12 @@ def __init__(self, manifest_loader: ManifestLoader):

@abstractmethod
def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
self, model: Model, results: ModelResultsType, score: Score
) -> None:
"""Callback when a model has been evaluated."""
raise NotImplementedError

@abstractmethod
def project_evaluated(self, score: float) -> None:
def project_evaluated(self, score: Score) -> None:
"""Callback when a project has been evaluated."""
raise NotImplementedError
12 changes: 7 additions & 5 deletions src/dbt_score/formatters/human_readable_formatter.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
from dbt_score.formatters import Formatter
from dbt_score.models import Model
from dbt_score.rule import RuleViolation
from dbt_score.scoring import Score


class HumanReadableFormatter(Formatter):
@@ -21,10 +22,12 @@ def bold(text: str) -> str:
return f"\033[1m{text}\033[0m"

def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
self, model: Model, results: ModelResultsType, score: Score
) -> None:
"""Callback when a model has been evaluated."""
print(f"Model {self.bold(model.name)}")
print(
f"{score.badge} {self.bold(model.name)} (score: {round(score.value, 1)!s})"
)
for rule, result in results.items():
if result is None:
print(f"{self.indent}{self.label_ok} {rule.source()}")
@@ -35,9 +38,8 @@ def model_evaluated(
)
else:
print(f"{self.indent}{self.label_error} {rule.source()}: {result!s}")
print(f"Score: {self.bold(str(round(score, 1)))}")
print()

def project_evaluated(self, score: float) -> None:
def project_evaluated(self, score: Score) -> None:
"""Callback when a project has been evaluated."""
print(f"Project score: {self.bold(str(round(score, 1)))}")
print(f"Project score: {self.bold(str(round(score.value, 1)))} {score.badge}")
12 changes: 7 additions & 5 deletions src/dbt_score/formatters/manifest_formatter.py
Original file line number Diff line number Diff line change
@@ -7,25 +7,27 @@
from dbt_score.evaluation import ModelResultsType
from dbt_score.formatters import Formatter
from dbt_score.models import Model
from dbt_score.scoring import Score


class ManifestFormatter(Formatter):
"""Formatter to generate manifest.json with score metadata."""

def __init__(self, *args: Any, **kwargs: Any) -> None:
"""Instantiate a manifest formatter."""
self._model_scores: dict[str, float] = {}
self._model_scores: dict[str, Score] = {}
super().__init__(*args, **kwargs)

def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
self, model: Model, results: ModelResultsType, score: Score
) -> None:
"""Callback when a model has been evaluated."""
self._model_scores[model.unique_id] = score

def project_evaluated(self, score: float) -> None:
def project_evaluated(self, score: Score) -> None:
"""Callback when a project has been evaluated."""
manifest = copy.copy(self._manifest_loader.raw_manifest)
for model_id, score in self._model_scores.items():
manifest["nodes"][model_id]["meta"]["score"] = round(score, 1)
for model_id, model_score in self._model_scores.items():
manifest["nodes"][model_id]["meta"]["score"] = model_score.value
manifest["nodes"][model_id]["meta"]["badge"] = model_score.badge
print(json.dumps(manifest, indent=2))
2 changes: 1 addition & 1 deletion src/dbt_score/lint.py
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ def lint_dbt_project(
formatters = {"plain": HumanReadableFormatter, "manifest": ManifestFormatter}
formatter = formatters[format](manifest_loader=manifest_loader)

scorer = Scorer()
scorer = Scorer(config)

evaluation = Evaluation(
rule_registry=rule_registry,
54 changes: 43 additions & 11 deletions src/dbt_score/scoring.py
Original file line number Diff line number Diff line change
@@ -3,12 +3,23 @@
from __future__ import annotations

import typing
from dataclasses import dataclass

from dbt_score.config import Config

if typing.TYPE_CHECKING:
from dbt_score.evaluation import ModelResultsType
from dbt_score.rule import RuleViolation, Severity


@dataclass
class Score:
"""Class representing a score."""

value: float
badge: str


class Scorer:
"""Logic for computing scores."""

@@ -22,20 +33,24 @@ class Scorer:
min_score = 0.0
max_score = 10.0

def score_model(self, model_results: ModelResultsType) -> float:
def __init__(self, config: Config) -> None:
"""Create a Scorer object."""
self._config = config

def score_model(self, model_results: ModelResultsType) -> Score:
"""Compute the score of a given model."""
if len(model_results) == 0:
# No rule? No problem
return self.max_score
if any(
score = self.max_score
elif any(
rule.severity == Severity.CRITICAL and isinstance(result, RuleViolation)
for rule, result in model_results.items()
):
# If there's a CRITICAL violation, the score is 0
return self.min_score
score = self.min_score
else:
# Otherwise, the score is the weighted average (by severity) of the results
return (
score = (
sum(
[
# The more severe the violation, the more points are lost
@@ -49,11 +64,28 @@ def score_model(self, model_results: ModelResultsType) -> float:
* self.max_score
)

def score_aggregate_models(self, scores: list[float]) -> float:
return Score(score, self._badge(score))

def score_aggregate_models(self, scores: list[Score]) -> Score:
"""Compute the score of a list of models."""
if 0.0 in scores:
actual_scores = [s.value for s in scores]
if 0.0 in actual_scores:
# Any model with a CRITICAL violation makes the project score 0
return self.min_score
if len(scores) == 0:
return self.max_score
return sum(scores) / len(scores)
score = Score(self.min_score, self._badge(self.min_score))
elif len(actual_scores) == 0:
score = Score(self.max_score, self._badge(self.max_score))
else:
average_score = sum(actual_scores) / len(actual_scores)
score = Score(average_score, self._badge(average_score))
return score

def _badge(self, score: float) -> str:
"""Compute the badge of a given score."""
if score >= self._config.badge_config.first.threshold:
return self._config.badge_config.first.icon
elif score >= self._config.badge_config.second.threshold:
return self._config.badge_config.second.icon
elif score >= self._config.badge_config.third.threshold:
return self._config.badge_config.third.icon
else:
return self._config.badge_config.wip.icon
Loading

0 comments on commit 1631b4f

Please sign in to comment.