dbt-labs · gshank · Oct 5, 2023 · Sep 28, 2023 · Sep 28, 2023 · Oct 2, 2023
diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py
@@ -35,8 +35,8 @@
     UnparsedSourceTableDefinition,
     UnparsedColumn,
     UnitTestOverrides,
-    InputFixture,
-    OutputFixture,
+    UnitTestInputFixture,
+    UnitTestOutputFixture,
 )
 from dbt.contracts.graph.node_args import ModelNodeArgs
 from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
@@ -1071,8 +1071,8 @@ class UnitTestNode(CompiledNode):
 @dataclass
 class UnitTestDefinition(GraphNode):
     model: str
-    given: Sequence[InputFixture]
-    expect: OutputFixture
+    given: Sequence[UnitTestInputFixture]
+    expect: UnitTestOutputFixture
     description: str = ""
     overrides: Optional[UnitTestOverrides] = None
     depends_on: DependsOn = field(default_factory=DependsOn)

diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py
@@ -1,5 +1,7 @@
 import datetime
 import re
+import csv
+from io import StringIO
 
 from dbt import deprecations
 from dbt.node_types import NodeType
@@ -741,15 +743,38 @@ class UnitTestFormat(StrEnum):
     Dict = "dict"
 
 
+class UnitTestFixture:
+    @property
+    def format(self) -> UnitTestFormat:
+        return UnitTestFormat.Dict
+
+    @property
+    def rows(self) -> Union[str, List[Dict[str, Any]]]:
+        return []
+
+    def get_rows(self) -> List[Dict[str, Any]]:
+        if self.format == UnitTestFormat.Dict:
+            assert isinstance(self.rows, List)
+            return self.rows
+        elif self.format == UnitTestFormat.CSV:
+            assert isinstance(self.rows, str)
+            dummy_file = StringIO(self.rows)
+            reader = csv.DictReader(dummy_file)
+            rows = []
+            for row in reader:
+                rows.append(row)
+            return rows
+
+
 @dataclass
-class InputFixture(dbtClassMixin):
+class UnitTestInputFixture(dbtClassMixin, UnitTestFixture):
     input: str
     rows: Union[str, List[Dict[str, Any]]] = ""
     format: UnitTestFormat = UnitTestFormat.Dict
 
 
 @dataclass
-class OutputFixture(dbtClassMixin):
+class UnitTestOutputFixture(dbtClassMixin, UnitTestFixture):
     rows: Union[str, List[Dict[str, Any]]] = ""
     format: UnitTestFormat = UnitTestFormat.Dict
 
@@ -764,8 +789,8 @@ class UnitTestOverrides(dbtClassMixin):
 @dataclass
 class UnparsedUnitTestDefinition(dbtClassMixin):
     name: str
-    given: Sequence[InputFixture]
-    expect: OutputFixture
+    given: Sequence[UnitTestInputFixture]
+    expect: UnitTestOutputFixture
     description: str = ""
     overrides: Optional[UnitTestOverrides] = None
     config: Dict[str, Any] = field(default_factory=dict)

diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py
@@ -1,6 +1,4 @@
 from typing import List, Set, Dict, Any
-import csv
-from io import StringIO
 
 from dbt.config import RuntimeConfig
 from dbt.context.context_config import ContextConfig
@@ -61,25 +59,16 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
         # for selection.
         # Note: no depends_on, that's added later using input nodes
         name = f"{test_case.model}__{test_case.name}"
-        if test_case.expect.format == UnitTestFormat.Dict:
-            if isinstance(test_case.expect.rows, List):
-                expected_rows = test_case.expect.rows
-            else:
-                raise ParsingError("Wrong format for expected rows")
-        else:  # test_case.expect.format == UnitTestFormat.CSV:
-            # build a dictionary from the csv string
-            if isinstance(test_case.expect.rows, str):
-                expected_rows = self._build_rows_from_csv(test_case.expect.rows)
-            else:
-                raise ParsingError("Wrong format for expected rows")
         unit_test_node = UnitTestNode(
             name=name,
             resource_type=NodeType.Unit,
             package_name=package_name,
             path=get_pseudo_test_path(name, test_case.original_file_path),
             original_file_path=test_case.original_file_path,
             unique_id=test_case.unique_id,
-            config=UnitTestNodeConfig(materialized="unit", expected_rows=expected_rows),
+            config=UnitTestNodeConfig(
+                materialized="unit", expected_rows=test_case.expect.get_rows()
+            ),
             raw_code=actual_node.raw_code,
             database=actual_node.database,
             schema=actual_node.schema,
@@ -131,15 +120,8 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
             # TODO: package_name?
             input_name = f"{test_case.model}__{test_case.name}__{original_input_node.name}"
             input_unique_id = f"model.{package_name}.{input_name}"
-            rows: List[Dict[str, Any]]
-            if given.format == UnitTestFormat.CSV:
-                rows = self._build_rows_from_csv(given.rows)
-            else:  # format == "dict"
-                # Should always be a dictionary.
-                rows = given.rows  # type:ignore
-
             input_node = ModelNode(
-                raw_code=self._build_raw_code(rows, original_input_node_columns),
+                raw_code=self._build_raw_code(given.get_rows(), original_input_node_columns),
                 resource_type=NodeType.Model,
                 package_name=package_name,
                 path=original_input_node.path,
@@ -157,14 +139,6 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition):
             # Add unique ids of input_nodes to depends_on
             unit_test_node.depends_on.nodes.append(input_node.unique_id)
 
-    def _build_rows_from_csv(self, csv_string) -> List[Dict[str, Any]]:
-        dummy_file = StringIO(csv_string)
-        reader = csv.DictReader(dummy_file)
-        rows = []
-        for row in reader:
-            rows.append(row)
-        return rows
-
     def _build_raw_code(self, rows, column_name_to_data_types) -> str:
         return ("{{{{ get_fixture_sql({rows}, {column_name_to_data_types}) }}}}").format(
             rows=rows, column_name_to_data_types=column_name_to_data_types