Skip to content

Commit

Permalink
Improve model definitions and process feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
jochemvandooren committed Mar 14, 2024
1 parent 8811c28 commit 4a620e8
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 26 deletions.
78 changes: 54 additions & 24 deletions src/dbt_score/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
class Constraint:
"""Constraint for a column.
Args:
Attributes:
name: The name of the constraint.
type: The type of the constraint, e.g. `foreign_key`.
expression: The expression of the constraint, e.g. `schema.other_table`.
Expand All @@ -24,57 +24,84 @@ class Constraint:
class Test:
"""Test for a column or model.
Args:
Attributes:
name: The name of the test.
type: The type of the test, e.g. `unique`.
kwargs: The kwargs of the test.
tags: The list of tags attached to the test.
"""

name: str
type: str
kwargs: dict[str, Any] = field(default_factory=dict)
tags: list[str] = field(default_factory=list)

@classmethod
def from_node(cls, test_node: dict[str, Any]) -> "Test":
"""Create a test object from a test node in the manifest."""
test = cls(
name=test_node["name"],
type=test_node["test_metadata"]["name"],
kwargs=test_node["test_metadata"].get("kwargs", {}),
tags=test_node.get("tags", []),
)
return test


@dataclass
class Column:
"""Represents a column in a model.
Args:
Attributes:
name: The name of the column.
description: The description of the column.
constraints: The list of constraints attached to the column.
tags: The list of tags attached to the column.
tests: The list of tests attached to the column.
"""

name: str
description: str
constraints: list[Constraint] = field(default_factory=list)
tags: list[str] = field(default_factory=list)
tests: list[Test] = field(default_factory=list)


@dataclass
class Model:
"""Represents a dbt model.
Args:
id: The id of the model, e.g. `model.package.model_name`.
Attributes:
unique_id: The id of the model, e.g. `model.package.model_name`.
name: The name of the model.
description: The full description of the model.
file_path: The `.yml` file path of the model.
patch_path: The yml path of the model, e.g. `package://model_dir/dir/file.yml`.
original_file_path: The sql path of the model, `e.g. model_dir/dir/file.sql`.
config: The config of the model.
meta: The meta of the model.
columns: The list of columns of the model.
package_name: The package name of the model.
database: The database name of the model.
schema: The schema name of the model.
tags: The list of tags attached to the model.
tests: The list of tests attached to the model.
depends_on: Dictionary of models/sources/macros that the model depends on.
"""

id: str
unique_id: str
name: str
description: str
file_path: str
patch_path: str
original_file_path: str
config: dict[str, Any]
meta: dict[str, Any]
columns: list[Column]
package_name: str
database: str
schema: str
tags: list[str] = field(default_factory=list)
tests: list[Test] = field(default_factory=list)
depends_on: dict[str, list[str]] = field(default_factory=dict)

def get_column(self, column_name: str) -> Column | None:
"""Get a column by name."""
Expand All @@ -101,12 +128,9 @@ def _get_columns(
)
for constraint in values.get("constraints", [])
],
tags=values.get("tags", []),
tests=[
Test(
name=test["name"],
type=test["test_metadata"]["name"],
tags=test.get("tags", []),
)
Test.from_node(test)
for test in tests_values
if test["test_metadata"].get("kwargs", {}).get("column_name")
== values.get("name")
Expand All @@ -122,22 +146,24 @@ def from_node(
) -> "Model":
"""Create a model object from a node and it's tests in the manifest."""
model = cls(
id=node_values["unique_id"],
file_path=node_values["patch_path"],
config=node_values.get("config", {}),
unique_id=node_values["unique_id"],
name=node_values["name"],
description=node_values.get("description", ""),
patch_path=node_values["patch_path"],
original_file_path=node_values["original_file_path"],
config=node_values.get("config", {}),
meta=node_values.get("meta", {}),
columns=cls._get_columns(node_values, tests_values),
package_name=node_values["package_name"],
database=node_values["database"],
schema=node_values["schema"],
tags=node_values.get("tags", []),
tests=[
Test(
name=test["name"],
type=test["test_metadata"]["name"],
tags=test.get("tags", []),
)
Test.from_node(test)
for test in tests_values
if not test["test_metadata"].get("kwargs", {}).get("column_name")
],
depends_on=node_values.get("depends_on", {}),
)

return model
Expand Down Expand Up @@ -165,13 +191,17 @@ def _load_models(self) -> None:
for node_id, node_values in self.raw_nodes.items():
if node_values.get("resource_type") == "model":
model = Model.from_node(node_values, self.tests.get(node_id, []))

if model.unique_id == "model.dwh.ft_purchase_order_line_article":
print(model)

self.models.append(model)

def _reindex_tests(self) -> None:
"""Index tests based on their model id."""
for node_values in self.raw_nodes.values():
# Only include tests that are attached to a model.
if node_values.get("resource_type") == "test" and node_values.get(
"attached_node"
if node_values.get("resource_type") == "test" and (
attached_node := node_values.get("attached_node")
):
self.tests[node_values["attached_node"]].append(node_values)
self.tests[attached_node].append(node_values)
3 changes: 1 addition & 2 deletions src/dbt_score/rules/example_rules.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
"""All general rules."""

from dbt_score.models import Model
from dbt_score.rule import Rule, RuleViolation, Severity, rule
from dbt_score.rule import Rule, RuleViolation, rule


class ComplexRule(Rule):
"""Complex rule."""

description = "Example of a complex rule."
severity = Severity.CRITICAL

@classmethod
def preprocess(cls) -> int:
Expand Down

0 comments on commit 4a620e8

Please sign in to comment.