Use fastjsonschema instead of jsonschema for validation

dbt-labs · Jun 1, 2024 · d580bfd · d580bfd
1 parent a34267f
commit d580bfd
Show file tree

Hide file tree

Showing 10 changed files with 27 additions and 20 deletions.
diff --git a/.changes/unreleased/Under the Hood-20240601-140218.yaml b/.changes/unreleased/Under the Hood-20240601-140218.yaml
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Replace jsonschema validation with fastjsonschema
+time: 2024-06-01T14:02:18.612839-04:00
+custom:
+  Author: gshank
+  Issue: "10248"
diff --git a/core/dbt/config/selectors.py b/core/dbt/config/selectors.py
@@ -38,7 +38,7 @@ def selectors_from_dict(cls, data: Dict[str, Any]) -> "SelectorConfig":
             validate_selector_default(selector_file)
             selectors = parse_from_selectors_definition(selector_file)
         except ValidationError as exc:
-            yaml_sel_cfg = yaml.dump(exc.instance)
+            yaml_sel_cfg = yaml.dump(exc.value)
             raise DbtSelectorsError(
                 f"Could not parse selector file data: \n{yaml_sel_cfg}\n"
                 f"Valid root-level selector definitions: "

diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py
@@ -445,12 +445,14 @@ def parse(self) -> ParseResult:
         versioned_test_blocks: List[VersionedTestBlock] = []
 
         # get list of 'node' objects
-        # UnparsedNodeUpdate (TestablePatchParser, models, seeds, snapshots)
+        # UnparsedNodeUpdate (TestablePatchParser, seeds, snapshots)
         #      = HasColumnTests, HasTests
         # UnparsedAnalysisUpdate (UnparsedAnalysisParser, analyses)
         #      = HasColumnDocs, HasDocs
         # UnparsedMacroUpdate (MacroPatchParser, 'macros')
         #      = HasDocs
+        # UnparsedModelUpdate (ModelPatchParser, models)
+        #      = HasColumnTests, HasTests
         # correspond to this parser's 'key'
         for node in self.get_unparsed_target():
             # node_block is a TargetBlock (Macro or Analysis)
@@ -544,7 +546,7 @@ def validate_data_tests(self, data):
         # Raise a validation error if the user has defined both names
         def validate_and_rename(data):
             if data.get("tests"):
-                if "tests" in data and "data_tests" in data:
+                if data.get("data_tests"):
                     raise ValidationError(
                         "Invalid test config: cannot have both 'tests' and 'data_tests' defined"
                     )

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,6 +1,6 @@
 git+https://github.com/dbt-labs/dbt-adapters.git@main
 git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter
-git+https://github.com/dbt-labs/dbt-common.git@main
+git+https://github.com/dbt-labs/dbt-common.git@try_fastjsonschema
 git+https://github.com/dbt-labs/dbt-postgres.git@main
 black>=24.3.0,<25.0
 bumpversion

diff --git a/tests/functional/basic/test_project.py b/tests/functional/basic/test_project.py
@@ -64,9 +64,7 @@ def test_invalid_version(self, project):
         update_config_file({"version": "invalid"}, "dbt_project.yml")
         with pytest.raises(ProjectContractError) as excinfo:
             run_dbt()
-        assert "at path ['version']: 'invalid' is not valid under any of the given schemas" in str(
-            excinfo.value
-        )
+        assert "Invalid value 'invalid'" in str(excinfo.value)
 
 
 class TestProjectDbtCloudConfig:
@@ -113,9 +111,7 @@ def test_dbt_cloud_invalid(self, project):
         run_dbt()
         config = {"name": "test", "profile": "test", "dbt-cloud": "Some string"}
         update_config_file(config, "dbt_project.yml")
-        expected_err = (
-            "at path ['dbt-cloud']: 'Some string' is not valid under any of the given schemas"
-        )
+        expected_err = "Invalid value 'Some string'"
         with pytest.raises(ProjectContractError) as excinfo:
             run_dbt()
         assert expected_err in str(excinfo.value)

diff --git a/tests/functional/configs/test_disabled_model.py b/tests/functional/configs/test_disabled_model.py
@@ -394,9 +394,8 @@ def models(self):
             "my_model.sql": my_model,
         }
 
-    def test_invalis_config(self, project):
+    def test_invalid_config(self, project):
         with pytest.raises(ValidationError) as exc:
             run_dbt(["parse"])
-        exc_str = " ".join(str(exc.value).split())  # flatten all whitespace
-        expected_msg = "'True and False' is not of type 'boolean'"
-        assert expected_msg in exc_str
+        expected_msg = "Invalid value 'True and False': data.enabled must be boolean"
+        assert expected_msg in exc.value.msg
diff --git a/tests/functional/exposures/test_exposure_configs.py b/tests/functional/exposures/test_exposure_configs.py
@@ -128,5 +128,5 @@ def models(self):
     def test_exposure_config_yaml_level(self, project):
         with pytest.raises(ValidationError) as excinfo:
             run_dbt(["parse"])
-        expected_msg = "'True and False' is not of type 'boolean'"
+        expected_msg = "Invalid value 'True and False': data.enabled must be boolean"
         assert expected_msg in str(excinfo.value)
diff --git a/tests/functional/metrics/test_metric_configs.py b/tests/functional/metrics/test_metric_configs.py
@@ -172,7 +172,7 @@ def models(self):
     def test_invalid_config_metric(self, project):
         with pytest.raises(ValidationError) as excinfo:
             run_dbt(["parse"])
-        expected_msg = "'True and False' is not of type 'boolean'"
+        expected_msg = "Invalid value 'True and False': data.enabled must be boolean"
         assert expected_msg in str(excinfo.value)
 
 

diff --git a/tests/functional/postgres/test_postgres_indexes.py b/tests/functional/postgres/test_postgres_indexes.py
@@ -141,7 +141,11 @@ def models(self):
     def test_invalid_index_configs(self, project):
         results, output = run_dbt_and_capture(expect_pass=False)
         assert len(results) == 4
-        assert re.search(r"columns.*is not of type 'array'", output)
-        assert re.search(r"unique.*is not of type 'boolean'", output)
-        assert re.search(r"'columns' is a required property", output)
+        # Could not parse index config: Invalid value 'column_a, column_b': data.columns must be array
+        assert re.search(r"columns must be array", output)
+        # Could not parse index config: Invalid value 'yes': data.unique must be boolean
+        assert re.search(r"unique must be boolean", output)
+        # Could not parse index config: Invalid value '{'unique': True}': data must contain ['columns'] properties
+        assert re.search(r"data must contain \['columns'\] properties", output)
+        # Database Error in model invalid_type (models/invalid_type.sql) / access method "non_existent_type" does not exist
         assert re.search(r"Database Error in model invalid_type", output)
diff --git a/tests/functional/sources/test_source_configs.py b/tests/functional/sources/test_source_configs.py
@@ -177,5 +177,5 @@ def models(self):
     def test_invalid_config_source(self, project):
         with pytest.raises(ValidationError) as excinfo:
             run_dbt(["parse"])
-        expected_msg = "'True and False' is not of type 'boolean'"
+        expected_msg = "Invalid value 'True and False': data.enabled must be boolean"
         assert expected_msg in str(excinfo.value)