Merge pull request #1499 from Giskard-AI/feature/gsk-1419-gsk-1383-ma…

…ke-dataset-and-model-as-suite-input [GSK-1419][GSK-1383] Make dataset, model as suite input in `Suite` and upload them automatically
Giskard-AI · Oct 25, 2023 · 63727f4 · 63727f4
2 parents a5369b5 + c0b4f4d
commit 63727f4
Show file tree

Hide file tree

Showing 11 changed files with 96 additions and 19 deletions.
diff --git a/giskard/core/suite.py b/giskard/core/suite.py
@@ -311,22 +311,28 @@ class Suite:
             A mapping of suite parameters with their corresponding SuiteInput objects.
         name : str
             A string representing the name of the suite.
+        default_params : Dict[str, Any]
+            A dictionary containing the default parameters for the tests in the suite.
     """
 
     id: int
     tests: List[TestPartial]
     name: str
+    default_params: Dict[str, Any]
 
-    def __init__(self, name=None) -> None:
+    def __init__(self, name=None, default_params=None) -> None:
         """Create a new Test Suite instance with a given name.
 
         Parameters
         ----------
         name : str, optional
             The name of the test suite.
+        default_params : dict, optional
+            Any arguments passed will be applied to the tests in the suite, if runtime params with the same name are not set.
         """
         self.tests = list()
         self.name = name
+        self.default_params = default_params if default_params else dict()
 
     def run(self, verbose: bool = True, **suite_run_args):
         """Execute all the tests that have been added to the test suite through the `add_test` method.
@@ -345,14 +351,17 @@ def run(self, verbose: bool = True, **suite_run_args):
         TestSuiteResult
             containing test execution information
         """
+        run_args = self.default_params.copy()
+        run_args.update(suite_run_args)
+
         results: List[(str, TestResult, Dict[str, Any])] = list()
         required_params = self.find_required_params()
-        undefined_params = {k: v for k, v in required_params.items() if k not in suite_run_args}
+        undefined_params = {k: v for k, v in required_params.items() if k not in run_args}
         if len(undefined_params):
             raise ValueError(f"Missing {len(undefined_params)} required parameters: {undefined_params}")
 
         for test_partial in self.tests:
-            test_params = self.create_test_params(test_partial, suite_run_args)
+            test_params = self.create_test_params(test_partial, run_args)
 
             try:
                 result = test_partial.giskard_test.get_builder()(**test_params).execute()
@@ -417,6 +426,12 @@ def upload(self, client: GiskardClient, project_key: str):
         """
         if self.name is None:
             self.name = "Unnamed test suite"
+
+        # Upload the default parameters if they are model or dataset
+        for arg in self.default_params.values():
+            if isinstance(arg, BaseModel) or isinstance(arg, Dataset):
+                arg.upload(client, project_key)
+
         self.id = client.save_test_suite(self.to_dto(client, project_key))
         project_id = client.get_project(project_key).project_id
         print(f"Test suite has been saved: {client.host_url}/main/projects/{project_id}/test-suite/{self.id}/overview")

diff --git a/giskard/scanner/calibration/overconfidence_detector.py b/giskard/scanner/calibration/overconfidence_detector.py
@@ -125,8 +125,6 @@ def _generate_overconfidence_tests(issue):
 
     tests = {
         f"Overconfidence on data slice “{issue.slicing_fn}”": test_overconfidence_rate(
-            model=issue.model,
-            dataset=issue.dataset,
             slicing_function=issue.slicing_fn,
             threshold=abs_threshold,
             p_threshold=issue.meta["p_threshold"],

diff --git a/giskard/scanner/calibration/underconfidence_detector.py b/giskard/scanner/calibration/underconfidence_detector.py
@@ -128,8 +128,6 @@ def _generate_underconfidence_tests(issue):
 
     tests = {
         f"Underconfidence on data slice “{issue.slicing_fn}”": test_underconfidence_rate(
-            model=issue.model,
-            dataset=issue.dataset,
             slicing_function=issue.slicing_fn,
             threshold=abs_threshold,
             p_threshold=issue.meta["p_threshold"],

diff --git a/giskard/scanner/correlation/spurious_correlation_detector.py b/giskard/scanner/correlation/spurious_correlation_detector.py
@@ -139,8 +139,6 @@ def _generate_spurious_corr_tests(issue):
 
     return {
         f"{issue.meta['metric']} on data slice “{issue.slicing_fn}”": test_fn(
-            model=issue.model,
-            dataset=issue.dataset,
             slicing_function=issue.slicing_fn,
             threshold=issue.meta["threshold"],
         )

diff --git a/giskard/scanner/performance/performance_bias_detector.py b/giskard/scanner/performance/performance_bias_detector.py
@@ -290,7 +290,7 @@ def _generate_performance_tests(issue: Issue):
 
     return {
         f"{metric.name} on data slice “{issue.slicing_fn}”": test_fn(
-            model=issue.model, dataset=issue.dataset, slicing_function=issue.slicing_fn, threshold=abs_threshold
+            slicing_function=issue.slicing_fn, threshold=abs_threshold
         )
     }
 

diff --git a/giskard/scanner/report.py b/giskard/scanner/report.py
@@ -11,9 +11,11 @@
 
 
 class ScanReport:
-    def __init__(self, issues, as_html: bool = True):
+    def __init__(self, issues, model=None, dataset=None, as_html: bool = True):
         self.issues = issues
         self.as_html = as_html
+        self.model = model
+        self.dataset = dataset
 
     def has_issues(self):
         return len(self.issues) > 0
@@ -91,7 +93,14 @@ def generate_tests(self, with_names=False):
     def generate_test_suite(self, name=None):
         from giskard import Suite
 
-        suite = Suite(name=name or "Test suite (generated by automatic scan)")
+        # Set suite-level default parameters if exists
+        suite_default_params = {}
+        if self.model:
+            suite_default_params.update({"model": self.model})
+        if self.dataset:
+            suite_default_params.update({"dataset": self.dataset})
+
+        suite = Suite(name=name or "Test suite (generated by automatic scan)", default_params=suite_default_params)
         for test, test_name in self.generate_tests(with_names=True):
             suite.add_test(test, test_name)
 

diff --git a/giskard/scanner/robustness/base_detector.py b/giskard/scanner/robustness/base_detector.py
@@ -195,8 +195,6 @@ def _generate_robustness_tests(issue: Issue):
     # Only generates a single metamorphic test
     return {
         f"Invariance to “{issue.transformation_fn}”": test_metamorphic_invariance(
-            model=issue.model,
-            dataset=issue.dataset,
             transformation_function=issue.transformation_fn,
             slicing_function=None,
             threshold=1 - issue.meta["threshold"],

diff --git a/giskard/scanner/scanner.py b/giskard/scanner/scanner.py
@@ -111,7 +111,7 @@ def analyze(
         issues = self._postprocess(issues)
         self._collect_analytics(model, dataset, issues, elapsed, model_validation_time)
 
-        return ScanReport(issues)
+        return ScanReport(issues, model=model, dataset=dataset)
 
     def _run_detectors(self, detectors, model, dataset, verbose=True, raise_exceptions=False):
         if not detectors:

diff --git a/tests/scan/test_overconfidence_detector.py b/tests/scan/test_overconfidence_detector.py
@@ -76,8 +76,13 @@ def prediction_fn(df):
 
     the_test = tests[0][0]
     assert the_test.meta.name == "test_overconfidence_rate"
-    assert the_test.params["model"] == model
-    assert the_test.params["dataset"] == dataset
+
+    # model and dataset are set as default params in `Suite`
+    assert "model" not in the_test.params
+    the_test.params.update({"model": model})
+    assert "dataset" not in the_test.params
+    the_test.params.update({"dataset": dataset})
+
     assert the_test.params["p_threshold"] == approx(0.5)
 
     # Global rate is 50%, we accept a 10% deviation, thus up to 55%:

diff --git a/tests/scan/test_suite_generation.py b/tests/scan/test_suite_generation.py
@@ -1,4 +1,5 @@
 from unittest.mock import Mock
+import pytest
 
 from giskard.core.suite import Suite
 from giskard.ml_worker.testing.registry.slicing_function import SlicingFunction
@@ -37,4 +38,54 @@ def test_generate_test_suite_from_scan_result(german_credit_data, german_credit_
     assert test_suite.name == "Custom name"
     assert len(test_suite.tests) == 1
 
+    with pytest.raises(ValueError):
+        test_suite.run()
+    with pytest.raises(ValueError):
+        test_suite.run(model=german_credit_model)
+    with pytest.raises(ValueError):
+        test_suite.run(dataset=german_credit_data)
+    # Provide model and dataset
+    test_suite.run(model=german_credit_model, dataset=german_credit_data)
+
+    # Test ScanReport creation with model
+    result = ScanReport(issues, model=german_credit_model)
+    test_suite = result.generate_test_suite("Custom name with model")
+
+    assert isinstance(test_suite, Suite)
+    assert test_suite.name == "Custom name with model"
+    assert len(test_suite.tests) == 1
+
+    with pytest.raises(ValueError):
+        test_suite.run()
+    with pytest.raises(ValueError):
+        test_suite.run(model=german_credit_model)
+    test_suite.run(dataset=german_credit_data)
+    test_suite.run(model=german_credit_model, dataset=german_credit_data)
+
+    # Test ScanReport creation with dataset
+    result = ScanReport(issues, dataset=german_credit_data)
+    test_suite = result.generate_test_suite("Custom name with dataset")
+
+    assert isinstance(test_suite, Suite)
+    assert test_suite.name == "Custom name with dataset"
+    assert len(test_suite.tests) == 1
+
+    with pytest.raises(ValueError):
+        test_suite.run()
+    test_suite.run(model=german_credit_model)
+    with pytest.raises(ValueError):
+        test_suite.run(dataset=german_credit_data)
+    test_suite.run(model=german_credit_model, dataset=german_credit_data)
+
+    # Test ScanReport creation with model and dataset
+    result = ScanReport(issues, model=german_credit_model, dataset=german_credit_data)
+    test_suite = result.generate_test_suite("Custom name with model and dataset")
+
+    assert isinstance(test_suite, Suite)
+    assert test_suite.name == "Custom name with model and dataset"
+    assert len(test_suite.tests) == 1
+
     test_suite.run()
+    test_suite.run(model=german_credit_model)
+    test_suite.run(dataset=german_credit_data)
+    test_suite.run(model=german_credit_model, dataset=german_credit_data)
diff --git a/tests/scan/test_underconfidence_detector.py b/tests/scan/test_underconfidence_detector.py
@@ -75,8 +75,13 @@ def prediction_fn(df):
 
     the_test = tests[0][0]
     assert the_test.meta.name == "test_underconfidence_rate"
-    assert the_test.params["model"] == model
-    assert the_test.params["dataset"] == dataset
+
+    # model and dataset are set as default params in `Suite`
+    assert "model" not in the_test.params
+    the_test.params.update({"model": model})
+    assert "dataset" not in the_test.params
+    the_test.params.update({"dataset": dataset})
+
     assert the_test.params["p_threshold"] == approx(0.94)
 
     # Global rate is 33%, we accept a 10% deviation, thus up to 36.7%: