Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add medals to scoring #32

Merged
merged 21 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ and this project adheres to
- Linting and scoring functionality for dbt models.
- Configuration through `pyproject.toml`.
- Default rules in `dbt_score.rules.generic`.
- Badges for project and model evaluation.
30 changes: 30 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ below:
rule_namespaces = ["dbt_score.rules", "dbt_score_rules", "custom_rules"]
disabled_rules = ["dbt_score.rules.generic.columns_have_description"]

[tool.dbt-score.badges]
first.threshold = 10.0
first.icon = "🥇"
second.threshold = 8.0
second.icon = "🥈"
third.threshold = 6.0
third.icon = "🥉"
wip.icon = "🏗️"

[tool.dbt-score.rules."dbt_score.rules.generic.sql_has_reasonable_number_of_lines"]
severity = 1
max_lines = 300
Expand All @@ -38,6 +47,27 @@ The following options can be set in the `pyproject.toml` file:
if not included here.
- `disabled_rules`: A list of rules to disable.

#### Badges configuration

```toml
[tool.dbt-score.badges]
```
jochemvandooren marked this conversation as resolved.
Show resolved Hide resolved

Four badges can be configured: `first`, `second`, `third` and `wip`. Each badge
can be configured with the following option:

- `icon`: The icon to use for the badge. A string that will be displayed in the
output, e.g. `🥇`.

All badges except `wip` can be configured with the following option:

- `threshold`: The threshold for the badge. A float between `0.0` and `10.0`
jochemvandooren marked this conversation as resolved.
Show resolved Hide resolved
that will be used to compare to the score. The threshold is the minimum score
required for a model to be rewarded with a certain badge.

The default values can be found in the
[BadgeConfig](/reference/config/#dbt_score.config.BadgeConfig).

#### Rule configuration

```toml
Expand Down
11 changes: 6 additions & 5 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@ encourage) good practices.

```shell
$ dbt-score lint
Model customers
🥇 customers (score: 10.0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🥇

OK dbt_score.rules.generic.has_description
WARN (medium) dbt_score.rules.generic.has_owner: Model lacks an owner.
OK dbt_score.rules.generic.has_owner: Model lacks an owner.
OK dbt_score.rules.generic.sql_has_reasonable_number_of_lines
Score: 6.7
Score: 10.0 🥇
```

In this example, `dbt-score` reports a warning: the model `customers` does not
declare an owner. Hence, it doesn't score the maximum value of `10`.
In this example, the model `customers` scores the maximum value of `10.0` as it
passes all the rules. It also is awarded a golden medal because of the perfect
score.

## Philosophy

Expand Down
61 changes: 58 additions & 3 deletions src/dbt_score/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import tomllib
from dataclasses import dataclass, field, replace
from pathlib import Path
from typing import Any, Final

Expand All @@ -12,19 +13,53 @@
DEFAULT_CONFIG_FILE = "pyproject.toml"


@dataclass
class Badge:
"""Badge object."""

icon: str
threshold: float


@dataclass
class BadgeConfig:
"""Configuration for badges."""

third: Badge = field(default_factory=lambda: Badge("🥉", 6.0))
second: Badge = field(default_factory=lambda: Badge("🥈", 8.0))
first: Badge = field(default_factory=lambda: Badge("🥇", 10.0))
wip: Badge = field(default_factory=lambda: Badge("🚧", 0.0))

def validate(self) -> None:
"""Validate the badge configuration."""
if not (self.first.threshold > self.second.threshold > self.third.threshold):
raise ValueError("Invalid badge thresholds.")
if self.first.threshold > 10.0: # noqa: PLR2004 [magic-value-comparison]
raise ValueError("first threshold must 10.0 or lower.")
if self.third.threshold < 0.0:
raise ValueError("third threshold must be 0.0 or higher.")
if self.wip.threshold != 0.0:
raise AttributeError("wip badge cannot have a threshold configuration.")


class Config:
"""Configuration for dbt-score."""

_main_section: Final[str] = "tool.dbt-score"
_options: Final[list[str]] = ["rule_namespaces", "disabled_rules"]
_rules_section: Final[str] = f"{_main_section}.rules"
_options: Final[list[str]] = [
"rule_namespaces",
"disabled_rules",
]
_rules_section: Final[str] = "rules"
_badges_section: Final[str] = "badges"

def __init__(self) -> None:
"""Initialize the Config object."""
self.rule_namespaces: list[str] = ["dbt_score.rules", "dbt_score_rules"]
self.disabled_rules: list[str] = []
self.rules_config: dict[str, RuleConfig] = {}
self.config_file: Path | None = None
self.badge_config: BadgeConfig = BadgeConfig()

def set_option(self, option: str, value: Any) -> None:
"""Set an option in the config."""
Expand All @@ -37,7 +72,8 @@ def _load_toml_file(self, file: str) -> None:

tools = toml_data.get("tool", {})
dbt_score_config = tools.get("dbt-score", {})
rules_config = dbt_score_config.pop("rules", {})
rules_config = dbt_score_config.pop(self._rules_section, {})
badge_config = dbt_score_config.pop(self._badges_section, {})

# Main configuration
for option, value in dbt_score_config.items():
Expand All @@ -50,6 +86,25 @@ def _load_toml_file(self, file: str) -> None:
f"Option {option} in {self._main_section} not supported."
)

# Badge configuration
for name, config in badge_config.items():
try:
default_config = getattr(self.badge_config, name)
updated_config = replace(default_config, **config)
setattr(self.badge_config, name, updated_config)
except AttributeError as e:
options = list(BadgeConfig.__annotations__.keys())
raise AttributeError(f"Config only accepts badges: {options}.") from e
except TypeError as e:
options = list(Badge.__annotations__.keys())
if name == "wip":
options.remove("threshold")
raise AttributeError(
f"Badge {name}: config only accepts {options}."
) from e

self.badge_config.validate()

# Rule configuration
self.rules_config = {
name: RuleConfig.from_dict(config) for name, config in rules_config.items()
Expand Down
6 changes: 3 additions & 3 deletions src/dbt_score/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dbt_score.models import ManifestLoader, Model
from dbt_score.rule import Rule, RuleViolation
from dbt_score.rule_registry import RuleRegistry
from dbt_score.scoring import Scorer
from dbt_score.scoring import Score, Scorer

# The results of a given model are stored in a dictionary, mapping rules to either:
# - None if there was no issue
Expand Down Expand Up @@ -44,10 +44,10 @@ def __init__(
self.results: dict[Model, ModelResultsType] = {}

# For each model, its computed score
self.scores: dict[Model, float] = {}
self.scores: dict[Model, Score] = {}

# The aggregated project score
self.project_score: float
self.project_score: Score

def evaluate(self) -> None:
"""Evaluate all rules."""
Expand Down
6 changes: 4 additions & 2 deletions src/dbt_score/formatters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import typing
from abc import ABC, abstractmethod

from dbt_score.scoring import Score

if typing.TYPE_CHECKING:
from dbt_score.evaluation import ModelResultsType
from dbt_score.models import ManifestLoader, Model
Expand All @@ -19,12 +21,12 @@ def __init__(self, manifest_loader: ManifestLoader):

@abstractmethod
def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
self, model: Model, results: ModelResultsType, score: Score
) -> None:
"""Callback when a model has been evaluated."""
raise NotImplementedError

@abstractmethod
def project_evaluated(self, score: float) -> None:
def project_evaluated(self, score: Score) -> None:
"""Callback when a project has been evaluated."""
raise NotImplementedError
12 changes: 7 additions & 5 deletions src/dbt_score/formatters/human_readable_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dbt_score.formatters import Formatter
from dbt_score.models import Model
from dbt_score.rule import RuleViolation
from dbt_score.scoring import Score


class HumanReadableFormatter(Formatter):
Expand All @@ -21,10 +22,12 @@ def bold(text: str) -> str:
return f"\033[1m{text}\033[0m"

def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
self, model: Model, results: ModelResultsType, score: Score
) -> None:
"""Callback when a model has been evaluated."""
print(f"Model {self.bold(model.name)}")
print(
f"{score.badge} {self.bold(model.name)} (score: {round(score.value, 1)!s})"
)
for rule, result in results.items():
if result is None:
print(f"{self.indent}{self.label_ok} {rule.source()}")
Expand All @@ -35,9 +38,8 @@ def model_evaluated(
)
else:
print(f"{self.indent}{self.label_error} {rule.source()}: {result!s}")
print(f"Score: {self.bold(str(round(score, 1)))}")
print()

def project_evaluated(self, score: float) -> None:
def project_evaluated(self, score: Score) -> None:
"""Callback when a project has been evaluated."""
print(f"Project score: {self.bold(str(round(score, 1)))}")
print(f"Project score: {self.bold(str(round(score.value, 1)))} {score.badge}")
12 changes: 7 additions & 5 deletions src/dbt_score/formatters/manifest_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,27 @@
from dbt_score.evaluation import ModelResultsType
from dbt_score.formatters import Formatter
from dbt_score.models import Model
from dbt_score.scoring import Score


class ManifestFormatter(Formatter):
"""Formatter to generate manifest.json with score metadata."""

def __init__(self, *args: Any, **kwargs: Any) -> None:
"""Instantiate a manifest formatter."""
self._model_scores: dict[str, float] = {}
self._model_scores: dict[str, Score] = {}
super().__init__(*args, **kwargs)

def model_evaluated(
self, model: Model, results: ModelResultsType, score: float
self, model: Model, results: ModelResultsType, score: Score
) -> None:
"""Callback when a model has been evaluated."""
self._model_scores[model.unique_id] = score

def project_evaluated(self, score: float) -> None:
def project_evaluated(self, score: Score) -> None:
"""Callback when a project has been evaluated."""
manifest = copy.copy(self._manifest_loader.raw_manifest)
for model_id, score in self._model_scores.items():
manifest["nodes"][model_id]["meta"]["score"] = round(score, 1)
for model_id, model_score in self._model_scores.items():
manifest["nodes"][model_id]["meta"]["score"] = model_score.value
manifest["nodes"][model_id]["meta"]["badge"] = model_score.badge
print(json.dumps(manifest, indent=2))
2 changes: 1 addition & 1 deletion src/dbt_score/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def lint_dbt_project(
formatters = {"plain": HumanReadableFormatter, "manifest": ManifestFormatter}
formatter = formatters[format](manifest_loader=manifest_loader)

scorer = Scorer()
scorer = Scorer(config)

evaluation = Evaluation(
rule_registry=rule_registry,
Expand Down
54 changes: 43 additions & 11 deletions src/dbt_score/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@
from __future__ import annotations

import typing
from dataclasses import dataclass

from dbt_score.config import Config

if typing.TYPE_CHECKING:
from dbt_score.evaluation import ModelResultsType
from dbt_score.rule import RuleViolation, Severity


@dataclass
class Score:
"""Class representing a score."""

value: float
badge: str


class Scorer:
"""Logic for computing scores."""

Expand All @@ -22,20 +33,24 @@ class Scorer:
min_score = 0.0
max_score = 10.0

def score_model(self, model_results: ModelResultsType) -> float:
def __init__(self, config: Config) -> None:
"""Create a Scorer object."""
self._config = config

def score_model(self, model_results: ModelResultsType) -> Score:
"""Compute the score of a given model."""
if len(model_results) == 0:
# No rule? No problem
return self.max_score
if any(
score = self.max_score
elif any(
rule.severity == Severity.CRITICAL and isinstance(result, RuleViolation)
for rule, result in model_results.items()
):
# If there's a CRITICAL violation, the score is 0
return self.min_score
score = self.min_score
else:
# Otherwise, the score is the weighted average (by severity) of the results
return (
score = (
sum(
[
# The more severe the violation, the more points are lost
Expand All @@ -49,11 +64,28 @@ def score_model(self, model_results: ModelResultsType) -> float:
* self.max_score
)

def score_aggregate_models(self, scores: list[float]) -> float:
return Score(score, self._badge(score))

def score_aggregate_models(self, scores: list[Score]) -> Score:
"""Compute the score of a list of models."""
if 0.0 in scores:
actual_scores = [s.value for s in scores]
if 0.0 in actual_scores:
# Any model with a CRITICAL violation makes the project score 0
return self.min_score
if len(scores) == 0:
return self.max_score
return sum(scores) / len(scores)
score = Score(self.min_score, self._badge(self.min_score))
elif len(actual_scores) == 0:
score = Score(self.max_score, self._badge(self.max_score))
else:
average_score = sum(actual_scores) / len(actual_scores)
score = Score(average_score, self._badge(average_score))
return score

def _badge(self, score: float) -> str:
"""Compute the badge of a given score."""
if score >= self._config.badge_config.first.threshold:
return self._config.badge_config.first.icon
elif score >= self._config.badge_config.second.threshold:
return self._config.badge_config.second.icon
elif score >= self._config.badge_config.third.threshold:
return self._config.badge_config.third.icon
else:
return self._config.badge_config.wip.icon
Loading