From 65a2b6247084cb4caf4d5f4b20e91891d29c0a40 Mon Sep 17 00:00:00 2001
From: Advitya Gemawat <agemawat@microsoft.com>
Date: Wed, 9 Aug 2023 13:47:34 -0400
Subject: [PATCH 1/7] Added OD enum

---
 raiwidgets/raiwidgets/constants.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/raiwidgets/raiwidgets/constants.py b/raiwidgets/raiwidgets/constants.py
index 8c5ecb60a9..57d26309b2 100644
--- a/raiwidgets/raiwidgets/constants.py
+++ b/raiwidgets/raiwidgets/constants.py
@@ -162,6 +162,7 @@ class ModelTask(str, Enum):
 
     CLASSIFICATION = 'classification'
     REGRESSION = 'regression'
+    OBJECT_DETECTION = 'object_detection'
     UNKNOWN = 'unknown'
 
 

From 6f6f5bb4a2dc6480cc9a70d270d8f34baa47a582 Mon Sep 17 00:00:00 2001
From: Advitya Gemawat <agemawat@microsoft.com>
Date: Wed, 9 Aug 2023 13:48:31 -0400
Subject: [PATCH 2/7] Added OD metric case.

---
 raiwidgets/raiwidgets/error_analysis_dashboard_input.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/raiwidgets/raiwidgets/error_analysis_dashboard_input.py b/raiwidgets/raiwidgets/error_analysis_dashboard_input.py
index 745d4df90d..5aa2e6d4d9 100644
--- a/raiwidgets/raiwidgets/error_analysis_dashboard_input.py
+++ b/raiwidgets/raiwidgets/error_analysis_dashboard_input.py
@@ -346,6 +346,8 @@ def setup_local(self, explanation, model, dataset, true_y, classes,
                 metric = Metrics.ERROR_RATE
             else:
                 metric = self._error_analyzer.metric
+        elif self._error_analyzer.model_task == ModelTask.OBJECT_DETECTION:
+            metric = Metrics.ERROR_RATE
         else:
             if self._error_analyzer.metric is None:
                 metric = Metrics.MEAN_SQUARED_ERROR

From c0e77355754d614ef9d887c0888f348e857523fc Mon Sep 17 00:00:00 2001
From: Advitya Gemawat <agemawat@microsoft.com>
Date: Thu, 10 Aug 2023 10:46:11 -0400
Subject: [PATCH 3/7] test & lint update

---
 .../tests/test_error_analysis_dashboard.py    | 140 ++++++++++++++++++
 1 file changed, 140 insertions(+)

diff --git a/raiwidgets/tests/test_error_analysis_dashboard.py b/raiwidgets/tests/test_error_analysis_dashboard.py
index 33dda1c929..cd2fa2b1f3 100644
--- a/raiwidgets/tests/test_error_analysis_dashboard.py
+++ b/raiwidgets/tests/test_error_analysis_dashboard.py
@@ -1,8 +1,11 @@
 # Copyright (c) Microsoft Corporation
 # Licensed under the MIT License.
 
+from zipfile import ZipFile
 import numpy as np
+import os
 import pandas as pd
+import pytest
 import shap
 import sklearn
 from interpret.ext.blackbox import MimicExplainer
@@ -11,6 +14,17 @@
 from sklearn.datasets import load_iris, make_classification
 from sklearn.model_selection import train_test_split
 
+try:
+    import torch
+    import torchvision
+    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+    torch_installed = True
+except ImportError:
+    torch_installed = False
+
+import urllib.request as request_file
+import xml.etree.ElementTree as ET
+
 from erroranalysis._internal.constants import Metrics, metric_to_display_name
 from erroranalysis._internal.surrogate_error_tree import (
     DEFAULT_MAX_DEPTH, DEFAULT_MIN_CHILD_SAMPLES, DEFAULT_NUM_LEAVES)
@@ -158,6 +172,132 @@ def test_error_analysis_adult_census_numeric_feature_names(self):
 
         run_error_analysis_adult_census(X, y, cat_idxs)
 
+    @pytest.mark.skipif(not torch_installed,
+                        reason="requires torch & torchvision")
+    def test_error_analysis_fridge_object_detection(self):
+        model = get_object_detection_model()
+        dataset = load_fridge_object_detection_dataset()
+        classes = np.array(['can', 'carton', 'milk_bottle', 'water_bottle'])
+
+        X_test = dataset[["image"]]
+        y_test = dataset[["label"]]
+        ErrorAnalysisDashboard(model=model, dataset=X_test,
+                               true_y=y_test, classes=classes)
+
+
+def get_object_detection_model():
+    # download fine-tuned recycling model from url
+    def download_assets(filepath, force=False):
+        if force or not os.path.exists(filepath):
+            request_file.urlretrieve(
+                "https://publictestdatasets.blob.core.windows.net\
+                /models/fastrcnn.pt",
+                os.path.join(filepath))
+        else:
+            print('Found' + filepath)
+
+        return filepath
+
+    def get_instance_segmentation_model(num_classes):
+        # load an instance segmentation model pre-trained on COCO
+        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
+            pretrained=True
+        )
+        in_features = model.roi_heads.box_predictor.cls_score.in_features
+        # replace the pre-trained head with a new one
+        model.roi_heads.box_predictor = FastRCNNPredictor(
+            in_features,
+            num_classes
+        )
+        return model
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+    num_classes = 5
+    model = get_instance_segmentation_model(num_classes)
+    _ = download_assets('Recycling_finetuned_FastRCNN.pt')
+    model.load_state_dict(
+        torch.load('Recycling_finetuned_FastRCNN.pt',
+                   map_location=device
+                   )
+    )
+
+    model.to(device)
+
+
+def load_fridge_object_detection_dataset_labels():
+
+    src_images = "./data/odFridgeObjects/"
+
+    # Path to the annotations
+    annotations_folder = os.path.join(src_images, "annotations")
+
+    labels = []
+    label_dict = {'can': 1, 'carton': 2, 'milk_bottle': 3, 'water_bottle': 4}
+
+    # Read each annotation
+    for i, filename in enumerate(os.listdir(annotations_folder)):
+        if filename.endswith(".xml"):
+            print("Parsing " + os.path.join(src_images, filename))
+
+            root = ET.parse(
+                os.path.join(annotations_folder, filename)
+            ).getroot()
+
+            # use if needed
+            # width = int(root.find("size/width").text)
+            # height = int(root.find("size/height").text)
+
+            image_labels = []
+            for object in root.findall("object"):
+                name = object.find("name").text
+                xmin = object.find("bndbox/xmin").text
+                ymin = object.find("bndbox/ymin").text
+                xmax = object.find("bndbox/xmax").text
+                ymax = object.find("bndbox/ymax").text
+                isCrowd = int(object.find("difficult").text)
+                image_labels.append([
+                    label_dict[name],  # label
+                    float(xmin),  # topX. To normalize, divide by width.
+                    float(ymin),  # topY. To normalize, divide by height.
+                    float(xmax),  # bottomX. To normalize, divide by width
+                    float(ymax),  # bottomY. To normalize, divide by height
+                    int(isCrowd)
+                ])
+            labels.append(image_labels)
+
+    return labels
+
+
+def load_fridge_object_detection_dataset():
+    # create data folder if it doesnt exist.
+    os.makedirs("data", exist_ok=True)
+
+    # download data
+    download_url = ("https://cvbp-secondary.z19.web.core.windows.net/" +
+                    "datasets/object_detection/odFridgeObjects.zip")
+    data_file = "./odFridgeObjects.zip"
+    request_file.urlretrieve(download_url, filename=data_file)
+
+    # extract files
+    with ZipFile(data_file, "r") as zip:
+        print("extracting files...")
+        zip.extractall(path="./data")
+        print("done")
+    # delete zip file
+    os.remove(data_file)
+
+    labels = load_fridge_object_detection_dataset_labels()
+
+    # get all file names into a pandas dataframe with the labels
+    data = pd.DataFrame(columns=["image", "label"])
+    for i, file in enumerate(os.listdir("./data/odFridgeObjects/" + "images")):
+        image_path = "./data/odFridgeObjects/" + "images" + "/" + file
+        data = data.append({"image": image_path,
+                            "label": labels[i]},  # folder
+                           ignore_index=True)
+
+    return data
+
 
 def run_error_analysis_adult_census(X, y, categorical_features):
     X, y = sklearn.utils.resample(

From d646af94fa1c9a040ac56c9842fc79749d38d25e Mon Sep 17 00:00:00 2001
From: Advitya Gemawat <agemawat@microsoft.com>
Date: Thu, 10 Aug 2023 11:01:31 -0400
Subject: [PATCH 4/7] flake fix

---
 raiwidgets/tests/test_error_analysis_dashboard.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/raiwidgets/tests/test_error_analysis_dashboard.py b/raiwidgets/tests/test_error_analysis_dashboard.py
index cd2fa2b1f3..6bbb30d4c5 100644
--- a/raiwidgets/tests/test_error_analysis_dashboard.py
+++ b/raiwidgets/tests/test_error_analysis_dashboard.py
@@ -235,7 +235,7 @@ def load_fridge_object_detection_dataset_labels():
     label_dict = {'can': 1, 'carton': 2, 'milk_bottle': 3, 'water_bottle': 4}
 
     # Read each annotation
-    for i, filename in enumerate(os.listdir(annotations_folder)):
+    for _, filename in enumerate(os.listdir(annotations_folder)):
         if filename.endswith(".xml"):
             print("Parsing " + os.path.join(src_images, filename))
 

From 250d01fd15c6b2ee871502224bdc9ddb738ffd02 Mon Sep 17 00:00:00 2001
From: Advitya Gemawat <agemawat@microsoft.com>
Date: Thu, 10 Aug 2023 19:12:51 -0400
Subject: [PATCH 5/7] import fix

---
 raiwidgets/tests/test_error_analysis_dashboard.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/raiwidgets/tests/test_error_analysis_dashboard.py b/raiwidgets/tests/test_error_analysis_dashboard.py
index 6bbb30d4c5..95599a7f53 100644
--- a/raiwidgets/tests/test_error_analysis_dashboard.py
+++ b/raiwidgets/tests/test_error_analysis_dashboard.py
@@ -1,9 +1,10 @@
 # Copyright (c) Microsoft Corporation
 # Licensed under the MIT License.
 
+import os
 from zipfile import ZipFile
+
 import numpy as np
-import os
 import pandas as pd
 import pytest
 import shap

From cfaad4bb6acf1a97e2dde33d780b85d2de3b27a2 Mon Sep 17 00:00:00 2001
From: Advitya Gemawat <agemawat@microsoft.com>
Date: Mon, 14 Aug 2023 10:22:08 -0400
Subject: [PATCH 6/7] OD model & data ckpt

---
 .../datasets/vision/__init__.py               | 10 +++
 .../vision/object_detection_data_utils.py     | 84 +++++++++++++++++++
 .../rai_test_utils/models/torch/__init__.py   |  8 ++
 .../models/torch/torch_model_utils.py         | 53 ++++++++++++
 4 files changed, 155 insertions(+)
 create mode 100644 rai_test_utils/rai_test_utils/datasets/vision/__init__.py
 create mode 100644 rai_test_utils/rai_test_utils/datasets/vision/object_detection_data_utils.py
 create mode 100644 rai_test_utils/rai_test_utils/models/torch/__init__.py
 create mode 100644 rai_test_utils/rai_test_utils/models/torch/torch_model_utils.py

diff --git a/rai_test_utils/rai_test_utils/datasets/vision/__init__.py b/rai_test_utils/rai_test_utils/datasets/vision/__init__.py
new file mode 100644
index 0000000000..e9a338b106
--- /dev/null
+++ b/rai_test_utils/rai_test_utils/datasets/vision/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Microsoft Corporation
+# Licensed under the MIT License.
+
+"""Namespace for tabular datasets."""
+
+from .object_detection_data_utils import load_fridge_object_detection_dataset
+
+__all__ = [
+    "load_fridge_object_detection_dataset"
+]
diff --git a/rai_test_utils/rai_test_utils/datasets/vision/object_detection_data_utils.py b/rai_test_utils/rai_test_utils/datasets/vision/object_detection_data_utils.py
new file mode 100644
index 0000000000..b1344e8b86
--- /dev/null
+++ b/rai_test_utils/rai_test_utils/datasets/vision/object_detection_data_utils.py
@@ -0,0 +1,84 @@
+# Copyright (c) Microsoft Corporation
+# Licensed under the MIT License.
+
+import os
+from zipfile import ZipFile
+import pandas as pd
+import xml.etree.ElementTree as ET
+
+import urllib.request as request_file
+
+
+def load_fridge_object_detection_dataset_labels():
+
+    src_images = "./data/odFridgeObjects/"
+
+    # Path to the annotations
+    annotations_folder = os.path.join(src_images, "annotations")
+
+    labels = []
+    label_dict = {'can': 1, 'carton': 2, 'milk_bottle': 3, 'water_bottle': 4}
+
+    # Read each annotation
+    for _, filename in enumerate(os.listdir(annotations_folder)):
+        if filename.endswith(".xml"):
+            print("Parsing " + os.path.join(src_images, filename))
+
+            root = ET.parse(
+                os.path.join(annotations_folder, filename)
+            ).getroot()
+
+            # use if needed
+            # width = int(root.find("size/width").text)
+            # height = int(root.find("size/height").text)
+
+            image_labels = []
+            for object in root.findall("object"):
+                name = object.find("name").text
+                xmin = object.find("bndbox/xmin").text
+                ymin = object.find("bndbox/ymin").text
+                xmax = object.find("bndbox/xmax").text
+                ymax = object.find("bndbox/ymax").text
+                isCrowd = int(object.find("difficult").text)
+                image_labels.append([
+                    label_dict[name],  # label
+                    float(xmin),  # topX. To normalize, divide by width.
+                    float(ymin),  # topY. To normalize, divide by height.
+                    float(xmax),  # bottomX. To normalize, divide by width
+                    float(ymax),  # bottomY. To normalize, divide by height
+                    int(isCrowd)
+                ])
+            labels.append(image_labels)
+
+    return labels
+
+
+def load_fridge_object_detection_dataset():
+    # create data folder if it doesnt exist.
+    os.makedirs("data", exist_ok=True)
+
+    # download data
+    download_url = ("https://cvbp-secondary.z19.web.core.windows.net/" +
+                    "datasets/object_detection/odFridgeObjects.zip")
+    data_file = "./odFridgeObjects.zip"
+    request_file.urlretrieve(download_url, filename=data_file)
+
+    # extract files
+    with ZipFile(data_file, "r") as zip:
+        print("extracting files...")
+        zip.extractall(path="./data")
+        print("done")
+    # delete zip file
+    os.remove(data_file)
+
+    labels = load_fridge_object_detection_dataset_labels()
+
+    # get all file names into a pandas dataframe with the labels
+    data = pd.DataFrame(columns=["image", "label"])
+    for i, file in enumerate(os.listdir("./data/odFridgeObjects/" + "images")):
+        image_path = "./data/odFridgeObjects/" + "images" + "/" + file
+        data = data.append({"image": image_path,
+                            "label": labels[i]},  # folder
+                           ignore_index=True)
+
+    return data
diff --git a/rai_test_utils/rai_test_utils/models/torch/__init__.py b/rai_test_utils/rai_test_utils/models/torch/__init__.py
new file mode 100644
index 0000000000..a41a2bc420
--- /dev/null
+++ b/rai_test_utils/rai_test_utils/models/torch/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Microsoft Corporation
+# Licensed under the MIT License.
+
+"""Namespace for torch models."""
+
+from .torch_model_utils import get_object_detection_fridge_model
+
+__all__ = ["get_object_detection_fridge_model"]
diff --git a/rai_test_utils/rai_test_utils/models/torch/torch_model_utils.py b/rai_test_utils/rai_test_utils/models/torch/torch_model_utils.py
new file mode 100644
index 0000000000..706214abc2
--- /dev/null
+++ b/rai_test_utils/rai_test_utils/models/torch/torch_model_utils.py
@@ -0,0 +1,53 @@
+# Copyright (c) Microsoft Corporation
+# Licensed under the MIT License.
+
+import os
+
+import torch
+import torchvision
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+
+import urllib.request as request_file
+
+
+# download fine-tuned recycling model from url
+def download_assets(filepath, force=False):
+    if force or not os.path.exists(filepath):
+        request_file.urlretrieve(
+            "https://publictestdatasets.blob.core.windows.net\
+            /models/fastrcnn.pt",
+            os.path.join(filepath))
+    else:
+        print('Found' + filepath)
+
+    return filepath
+
+
+def get_instance_segmentation_model(num_classes):
+    # load an instance segmentation model pre-trained on COCO
+    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
+        pretrained=True
+    )
+    in_features = model.roi_heads.box_predictor.cls_score.in_features
+    # replace the pre-trained head with a new one
+    model.roi_heads.box_predictor = FastRCNNPredictor(
+        in_features,
+        num_classes
+    )
+    return model
+
+
+def get_object_detection_fridge_model():
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+    num_classes = 5
+    model = get_instance_segmentation_model(num_classes)
+    _ = download_assets('Recycling_finetuned_FastRCNN.pt')
+    model.load_state_dict(
+        torch.load('Recycling_finetuned_FastRCNN.pt',
+                   map_location=device
+                   )
+    )
+
+    model.to(device)
+    return model

From 21d9cfd79e12d6d5cdf64d55aba6c348685d8849 Mon Sep 17 00:00:00 2001
From: Advitya Gemawat <agemawat@microsoft.com>
Date: Mon, 14 Aug 2023 10:51:50 -0400
Subject: [PATCH 7/7] test format tweak

---
 raiwidgets/requirements.txt                   |   1 +
 .../tests/test_error_analysis_dashboard.py    | 119 +-----------------
 2 files changed, 5 insertions(+), 115 deletions(-)

diff --git a/raiwidgets/requirements.txt b/raiwidgets/requirements.txt
index aa464b107f..f2f6bd8ea9 100644
--- a/raiwidgets/requirements.txt
+++ b/raiwidgets/requirements.txt
@@ -8,3 +8,4 @@ lightgbm>=2.0.11
 erroranalysis>=0.4.4
 fairlearn==0.7.0
 raiutils>=0.4.0
+rai-test-utils
diff --git a/raiwidgets/tests/test_error_analysis_dashboard.py b/raiwidgets/tests/test_error_analysis_dashboard.py
index 95599a7f53..fb46b185b7 100644
--- a/raiwidgets/tests/test_error_analysis_dashboard.py
+++ b/raiwidgets/tests/test_error_analysis_dashboard.py
@@ -15,6 +15,9 @@
 from sklearn.datasets import load_iris, make_classification
 from sklearn.model_selection import train_test_split
 
+from rai_test_utils.datasets.vision.object_detection_data_utils import load_fridge_object_detection_dataset
+from rai_test_utils.models.torch.torch_model_utils import get_object_detection_fridge_model
+
 try:
     import torch
     import torchvision
@@ -176,7 +179,7 @@ def test_error_analysis_adult_census_numeric_feature_names(self):
     @pytest.mark.skipif(not torch_installed,
                         reason="requires torch & torchvision")
     def test_error_analysis_fridge_object_detection(self):
-        model = get_object_detection_model()
+        model = get_object_detection_fridge_model()
         dataset = load_fridge_object_detection_dataset()
         classes = np.array(['can', 'carton', 'milk_bottle', 'water_bottle'])
 
@@ -186,120 +189,6 @@ def test_error_analysis_fridge_object_detection(self):
                                true_y=y_test, classes=classes)
 
 
-def get_object_detection_model():
-    # download fine-tuned recycling model from url
-    def download_assets(filepath, force=False):
-        if force or not os.path.exists(filepath):
-            request_file.urlretrieve(
-                "https://publictestdatasets.blob.core.windows.net\
-                /models/fastrcnn.pt",
-                os.path.join(filepath))
-        else:
-            print('Found' + filepath)
-
-        return filepath
-
-    def get_instance_segmentation_model(num_classes):
-        # load an instance segmentation model pre-trained on COCO
-        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
-            pretrained=True
-        )
-        in_features = model.roi_heads.box_predictor.cls_score.in_features
-        # replace the pre-trained head with a new one
-        model.roi_heads.box_predictor = FastRCNNPredictor(
-            in_features,
-            num_classes
-        )
-        return model
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-
-    num_classes = 5
-    model = get_instance_segmentation_model(num_classes)
-    _ = download_assets('Recycling_finetuned_FastRCNN.pt')
-    model.load_state_dict(
-        torch.load('Recycling_finetuned_FastRCNN.pt',
-                   map_location=device
-                   )
-    )
-
-    model.to(device)
-
-
-def load_fridge_object_detection_dataset_labels():
-
-    src_images = "./data/odFridgeObjects/"
-
-    # Path to the annotations
-    annotations_folder = os.path.join(src_images, "annotations")
-
-    labels = []
-    label_dict = {'can': 1, 'carton': 2, 'milk_bottle': 3, 'water_bottle': 4}
-
-    # Read each annotation
-    for _, filename in enumerate(os.listdir(annotations_folder)):
-        if filename.endswith(".xml"):
-            print("Parsing " + os.path.join(src_images, filename))
-
-            root = ET.parse(
-                os.path.join(annotations_folder, filename)
-            ).getroot()
-
-            # use if needed
-            # width = int(root.find("size/width").text)
-            # height = int(root.find("size/height").text)
-
-            image_labels = []
-            for object in root.findall("object"):
-                name = object.find("name").text
-                xmin = object.find("bndbox/xmin").text
-                ymin = object.find("bndbox/ymin").text
-                xmax = object.find("bndbox/xmax").text
-                ymax = object.find("bndbox/ymax").text
-                isCrowd = int(object.find("difficult").text)
-                image_labels.append([
-                    label_dict[name],  # label
-                    float(xmin),  # topX. To normalize, divide by width.
-                    float(ymin),  # topY. To normalize, divide by height.
-                    float(xmax),  # bottomX. To normalize, divide by width
-                    float(ymax),  # bottomY. To normalize, divide by height
-                    int(isCrowd)
-                ])
-            labels.append(image_labels)
-
-    return labels
-
-
-def load_fridge_object_detection_dataset():
-    # create data folder if it doesnt exist.
-    os.makedirs("data", exist_ok=True)
-
-    # download data
-    download_url = ("https://cvbp-secondary.z19.web.core.windows.net/" +
-                    "datasets/object_detection/odFridgeObjects.zip")
-    data_file = "./odFridgeObjects.zip"
-    request_file.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
-        print("extracting files...")
-        zip.extractall(path="./data")
-        print("done")
-    # delete zip file
-    os.remove(data_file)
-
-    labels = load_fridge_object_detection_dataset_labels()
-
-    # get all file names into a pandas dataframe with the labels
-    data = pd.DataFrame(columns=["image", "label"])
-    for i, file in enumerate(os.listdir("./data/odFridgeObjects/" + "images")):
-        image_path = "./data/odFridgeObjects/" + "images" + "/" + file
-        data = data.append({"image": image_path,
-                            "label": labels[i]},  # folder
-                           ignore_index=True)
-
-    return data
-
-
 def run_error_analysis_adult_census(X, y, categorical_features):
     X, y = sklearn.utils.resample(
         X, y, n_samples=1000, random_state=7, stratify=y)