From 4a620e82d5f6cd6365f4c19766f5aa56aff77ca2 Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Thu, 14 Mar 2024 13:55:50 +0100 Subject: [PATCH] Improve model definitions and process feedback --- src/dbt_score/models.py | 78 +++++++++++++++++++--------- src/dbt_score/rules/example_rules.py | 3 +- 2 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index bdcfc40..3f6cf03 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -9,7 +9,7 @@ class Constraint: """Constraint for a column. - Args: + Attributes: name: The name of the constraint. type: The type of the constraint, e.g. `foreign_key`. expression: The expression of the constraint, e.g. `schema.other_table`. @@ -24,31 +24,46 @@ class Constraint: class Test: """Test for a column or model. - Args: + Attributes: name: The name of the test. type: The type of the test, e.g. `unique`. + kwargs: The kwargs of the test. tags: The list of tags attached to the test. """ name: str type: str + kwargs: dict[str, Any] = field(default_factory=dict) tags: list[str] = field(default_factory=list) + @classmethod + def from_node(cls, test_node: dict[str, Any]) -> "Test": + """Create a test object from a test node in the manifest.""" + test = cls( + name=test_node["name"], + type=test_node["test_metadata"]["name"], + kwargs=test_node["test_metadata"].get("kwargs", {}), + tags=test_node.get("tags", []), + ) + return test + @dataclass class Column: """Represents a column in a model. - Args: + Attributes: name: The name of the column. description: The description of the column. constraints: The list of constraints attached to the column. + tags: The list of tags attached to the column. tests: The list of tests attached to the column. """ name: str description: str constraints: list[Constraint] = field(default_factory=list) + tags: list[str] = field(default_factory=list) tests: list[Test] = field(default_factory=list) @@ -56,25 +71,37 @@ class Column: class Model: """Represents a dbt model. - Args: - id: The id of the model, e.g. `model.package.model_name`. + Attributes: + unique_id: The id of the model, e.g. `model.package.model_name`. name: The name of the model. description: The full description of the model. - file_path: The `.yml` file path of the model. + patch_path: The yml path of the model, e.g. `package://model_dir/dir/file.yml`. + original_file_path: The sql path of the model, `e.g. model_dir/dir/file.sql`. config: The config of the model. meta: The meta of the model. columns: The list of columns of the model. + package_name: The package name of the model. + database: The database name of the model. + schema: The schema name of the model. + tags: The list of tags attached to the model. tests: The list of tests attached to the model. + depends_on: Dictionary of models/sources/macros that the model depends on. """ - id: str + unique_id: str name: str description: str - file_path: str + patch_path: str + original_file_path: str config: dict[str, Any] meta: dict[str, Any] columns: list[Column] + package_name: str + database: str + schema: str + tags: list[str] = field(default_factory=list) tests: list[Test] = field(default_factory=list) + depends_on: dict[str, list[str]] = field(default_factory=dict) def get_column(self, column_name: str) -> Column | None: """Get a column by name.""" @@ -101,12 +128,9 @@ def _get_columns( ) for constraint in values.get("constraints", []) ], + tags=values.get("tags", []), tests=[ - Test( - name=test["name"], - type=test["test_metadata"]["name"], - tags=test.get("tags", []), - ) + Test.from_node(test) for test in tests_values if test["test_metadata"].get("kwargs", {}).get("column_name") == values.get("name") @@ -122,22 +146,24 @@ def from_node( ) -> "Model": """Create a model object from a node and it's tests in the manifest.""" model = cls( - id=node_values["unique_id"], - file_path=node_values["patch_path"], - config=node_values.get("config", {}), + unique_id=node_values["unique_id"], name=node_values["name"], description=node_values.get("description", ""), + patch_path=node_values["patch_path"], + original_file_path=node_values["original_file_path"], + config=node_values.get("config", {}), meta=node_values.get("meta", {}), columns=cls._get_columns(node_values, tests_values), + package_name=node_values["package_name"], + database=node_values["database"], + schema=node_values["schema"], + tags=node_values.get("tags", []), tests=[ - Test( - name=test["name"], - type=test["test_metadata"]["name"], - tags=test.get("tags", []), - ) + Test.from_node(test) for test in tests_values if not test["test_metadata"].get("kwargs", {}).get("column_name") ], + depends_on=node_values.get("depends_on", {}), ) return model @@ -165,13 +191,17 @@ def _load_models(self) -> None: for node_id, node_values in self.raw_nodes.items(): if node_values.get("resource_type") == "model": model = Model.from_node(node_values, self.tests.get(node_id, [])) + + if model.unique_id == "model.dwh.ft_purchase_order_line_article": + print(model) + self.models.append(model) def _reindex_tests(self) -> None: """Index tests based on their model id.""" for node_values in self.raw_nodes.values(): # Only include tests that are attached to a model. - if node_values.get("resource_type") == "test" and node_values.get( - "attached_node" + if node_values.get("resource_type") == "test" and ( + attached_node := node_values.get("attached_node") ): - self.tests[node_values["attached_node"]].append(node_values) + self.tests[attached_node].append(node_values) diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py index 8bc59c6..1aec4a2 100644 --- a/src/dbt_score/rules/example_rules.py +++ b/src/dbt_score/rules/example_rules.py @@ -1,14 +1,13 @@ """All general rules.""" from dbt_score.models import Model -from dbt_score.rule import Rule, RuleViolation, Severity, rule +from dbt_score.rule import Rule, RuleViolation, rule class ComplexRule(Rule): """Complex rule.""" description = "Example of a complex rule." - severity = Severity.CRITICAL @classmethod def preprocess(cls) -> int: