Skip to content

Commit

Permalink
Merge pull request #1499 from Giskard-AI/feature/gsk-1419-gsk-1383-ma…
Browse files Browse the repository at this point in the history
…ke-dataset-and-model-as-suite-input

[GSK-1419][GSK-1383] Make dataset, model as suite input in `Suite` and upload them automatically
  • Loading branch information
Inokinoki authored Oct 25, 2023
2 parents a5369b5 + c0b4f4d commit 63727f4
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 19 deletions.
21 changes: 18 additions & 3 deletions giskard/core/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,22 +311,28 @@ class Suite:
A mapping of suite parameters with their corresponding SuiteInput objects.
name : str
A string representing the name of the suite.
default_params : Dict[str, Any]
A dictionary containing the default parameters for the tests in the suite.
"""

id: int
tests: List[TestPartial]
name: str
default_params: Dict[str, Any]

def __init__(self, name=None) -> None:
def __init__(self, name=None, default_params=None) -> None:
"""Create a new Test Suite instance with a given name.
Parameters
----------
name : str, optional
The name of the test suite.
default_params : dict, optional
Any arguments passed will be applied to the tests in the suite, if runtime params with the same name are not set.
"""
self.tests = list()
self.name = name
self.default_params = default_params if default_params else dict()

def run(self, verbose: bool = True, **suite_run_args):
"""Execute all the tests that have been added to the test suite through the `add_test` method.
Expand All @@ -345,14 +351,17 @@ def run(self, verbose: bool = True, **suite_run_args):
TestSuiteResult
containing test execution information
"""
run_args = self.default_params.copy()
run_args.update(suite_run_args)

results: List[(str, TestResult, Dict[str, Any])] = list()
required_params = self.find_required_params()
undefined_params = {k: v for k, v in required_params.items() if k not in suite_run_args}
undefined_params = {k: v for k, v in required_params.items() if k not in run_args}
if len(undefined_params):
raise ValueError(f"Missing {len(undefined_params)} required parameters: {undefined_params}")

for test_partial in self.tests:
test_params = self.create_test_params(test_partial, suite_run_args)
test_params = self.create_test_params(test_partial, run_args)

try:
result = test_partial.giskard_test.get_builder()(**test_params).execute()
Expand Down Expand Up @@ -417,6 +426,12 @@ def upload(self, client: GiskardClient, project_key: str):
"""
if self.name is None:
self.name = "Unnamed test suite"

# Upload the default parameters if they are model or dataset
for arg in self.default_params.values():
if isinstance(arg, BaseModel) or isinstance(arg, Dataset):
arg.upload(client, project_key)

self.id = client.save_test_suite(self.to_dto(client, project_key))
project_id = client.get_project(project_key).project_id
print(f"Test suite has been saved: {client.host_url}/main/projects/{project_id}/test-suite/{self.id}/overview")
Expand Down
2 changes: 0 additions & 2 deletions giskard/scanner/calibration/overconfidence_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,6 @@ def _generate_overconfidence_tests(issue):

tests = {
f"Overconfidence on data slice “{issue.slicing_fn}”": test_overconfidence_rate(
model=issue.model,
dataset=issue.dataset,
slicing_function=issue.slicing_fn,
threshold=abs_threshold,
p_threshold=issue.meta["p_threshold"],
Expand Down
2 changes: 0 additions & 2 deletions giskard/scanner/calibration/underconfidence_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ def _generate_underconfidence_tests(issue):

tests = {
f"Underconfidence on data slice “{issue.slicing_fn}”": test_underconfidence_rate(
model=issue.model,
dataset=issue.dataset,
slicing_function=issue.slicing_fn,
threshold=abs_threshold,
p_threshold=issue.meta["p_threshold"],
Expand Down
2 changes: 0 additions & 2 deletions giskard/scanner/correlation/spurious_correlation_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,6 @@ def _generate_spurious_corr_tests(issue):

return {
f"{issue.meta['metric']} on data slice “{issue.slicing_fn}”": test_fn(
model=issue.model,
dataset=issue.dataset,
slicing_function=issue.slicing_fn,
threshold=issue.meta["threshold"],
)
Expand Down
2 changes: 1 addition & 1 deletion giskard/scanner/performance/performance_bias_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def _generate_performance_tests(issue: Issue):

return {
f"{metric.name} on data slice “{issue.slicing_fn}”": test_fn(
model=issue.model, dataset=issue.dataset, slicing_function=issue.slicing_fn, threshold=abs_threshold
slicing_function=issue.slicing_fn, threshold=abs_threshold
)
}

Expand Down
13 changes: 11 additions & 2 deletions giskard/scanner/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@


class ScanReport:
def __init__(self, issues, as_html: bool = True):
def __init__(self, issues, model=None, dataset=None, as_html: bool = True):
self.issues = issues
self.as_html = as_html
self.model = model
self.dataset = dataset

def has_issues(self):
return len(self.issues) > 0
Expand Down Expand Up @@ -91,7 +93,14 @@ def generate_tests(self, with_names=False):
def generate_test_suite(self, name=None):
from giskard import Suite

suite = Suite(name=name or "Test suite (generated by automatic scan)")
# Set suite-level default parameters if exists
suite_default_params = {}
if self.model:
suite_default_params.update({"model": self.model})
if self.dataset:
suite_default_params.update({"dataset": self.dataset})

suite = Suite(name=name or "Test suite (generated by automatic scan)", default_params=suite_default_params)
for test, test_name in self.generate_tests(with_names=True):
suite.add_test(test, test_name)

Expand Down
2 changes: 0 additions & 2 deletions giskard/scanner/robustness/base_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,6 @@ def _generate_robustness_tests(issue: Issue):
# Only generates a single metamorphic test
return {
f"Invariance to “{issue.transformation_fn}”": test_metamorphic_invariance(
model=issue.model,
dataset=issue.dataset,
transformation_function=issue.transformation_fn,
slicing_function=None,
threshold=1 - issue.meta["threshold"],
Expand Down
2 changes: 1 addition & 1 deletion giskard/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def analyze(
issues = self._postprocess(issues)
self._collect_analytics(model, dataset, issues, elapsed, model_validation_time)

return ScanReport(issues)
return ScanReport(issues, model=model, dataset=dataset)

def _run_detectors(self, detectors, model, dataset, verbose=True, raise_exceptions=False):
if not detectors:
Expand Down
9 changes: 7 additions & 2 deletions tests/scan/test_overconfidence_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,13 @@ def prediction_fn(df):

the_test = tests[0][0]
assert the_test.meta.name == "test_overconfidence_rate"
assert the_test.params["model"] == model
assert the_test.params["dataset"] == dataset

# model and dataset are set as default params in `Suite`
assert "model" not in the_test.params
the_test.params.update({"model": model})
assert "dataset" not in the_test.params
the_test.params.update({"dataset": dataset})

assert the_test.params["p_threshold"] == approx(0.5)

# Global rate is 50%, we accept a 10% deviation, thus up to 55%:
Expand Down
51 changes: 51 additions & 0 deletions tests/scan/test_suite_generation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from unittest.mock import Mock
import pytest

from giskard.core.suite import Suite
from giskard.ml_worker.testing.registry.slicing_function import SlicingFunction
Expand Down Expand Up @@ -37,4 +38,54 @@ def test_generate_test_suite_from_scan_result(german_credit_data, german_credit_
assert test_suite.name == "Custom name"
assert len(test_suite.tests) == 1

with pytest.raises(ValueError):
test_suite.run()
with pytest.raises(ValueError):
test_suite.run(model=german_credit_model)
with pytest.raises(ValueError):
test_suite.run(dataset=german_credit_data)
# Provide model and dataset
test_suite.run(model=german_credit_model, dataset=german_credit_data)

# Test ScanReport creation with model
result = ScanReport(issues, model=german_credit_model)
test_suite = result.generate_test_suite("Custom name with model")

assert isinstance(test_suite, Suite)
assert test_suite.name == "Custom name with model"
assert len(test_suite.tests) == 1

with pytest.raises(ValueError):
test_suite.run()
with pytest.raises(ValueError):
test_suite.run(model=german_credit_model)
test_suite.run(dataset=german_credit_data)
test_suite.run(model=german_credit_model, dataset=german_credit_data)

# Test ScanReport creation with dataset
result = ScanReport(issues, dataset=german_credit_data)
test_suite = result.generate_test_suite("Custom name with dataset")

assert isinstance(test_suite, Suite)
assert test_suite.name == "Custom name with dataset"
assert len(test_suite.tests) == 1

with pytest.raises(ValueError):
test_suite.run()
test_suite.run(model=german_credit_model)
with pytest.raises(ValueError):
test_suite.run(dataset=german_credit_data)
test_suite.run(model=german_credit_model, dataset=german_credit_data)

# Test ScanReport creation with model and dataset
result = ScanReport(issues, model=german_credit_model, dataset=german_credit_data)
test_suite = result.generate_test_suite("Custom name with model and dataset")

assert isinstance(test_suite, Suite)
assert test_suite.name == "Custom name with model and dataset"
assert len(test_suite.tests) == 1

test_suite.run()
test_suite.run(model=german_credit_model)
test_suite.run(dataset=german_credit_data)
test_suite.run(model=german_credit_model, dataset=german_credit_data)
9 changes: 7 additions & 2 deletions tests/scan/test_underconfidence_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,13 @@ def prediction_fn(df):

the_test = tests[0][0]
assert the_test.meta.name == "test_underconfidence_rate"
assert the_test.params["model"] == model
assert the_test.params["dataset"] == dataset

# model and dataset are set as default params in `Suite`
assert "model" not in the_test.params
the_test.params.update({"model": model})
assert "dataset" not in the_test.params
the_test.params.update({"dataset": dataset})

assert the_test.params["p_threshold"] == approx(0.94)

# Global rate is 33%, we accept a 10% deviation, thus up to 36.7%:
Expand Down

0 comments on commit 63727f4

Please sign in to comment.