DeepForest support for multiple annotation types (point, box, polygon…

…). (#590) Add support for multiple annotation types (point, box, polygon) --------- Co-authored-by: henry senyondo <[email protected]>
weecology · Jul 29, 2024 · 81b029f · 81b029f
1 parent 290062c
commit 81b029f
Show file tree

Hide file tree

Showing 40 changed files with 1,831 additions and 758 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -4,6 +4,7 @@ DeepForest Change Log
 
 **1.3.5**
 
+Add Support for multiple annotation types (point, box, polygon).
 Create a utilities.download_ArcGIS_REST function to download tiles from ArcGIS REST API. For example to download NAIP imagery.
 
 **1.3.3**

diff --git a/deepforest/IoU.py b/deepforest/IoU.py
@@ -112,6 +112,6 @@ def compute_IoU(ground_truth, submission):
             }))
 
     iou_df = pd.concat(iou_df)
-    iou_df = iou_df.merge(ground_truth[["truth_id", "xmin", "xmax", "ymin", "ymax"]])
+    iou_df = iou_df.merge(ground_truth[["truth_id", "geometry"]])
 
     return iou_df
diff --git a/deepforest/data/australia.cpg b/deepforest/data/australia.cpg
@@ -0,0 +1 @@
+ISO-8859-1
diff --git a/deepforest/data/australia.dbf b/deepforest/data/australia.dbf
diff --git a/deepforest/data/australia.prj b/deepforest/data/australia.prj
@@ -0,0 +1 @@
+PROJCS["WGS_1984_UTM_Zone_53S",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",10000000.0],PARAMETER["Central_Meridian",135.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]
diff --git a/deepforest/data/australia.shp b/deepforest/data/australia.shp
diff --git a/deepforest/data/australia.shx b/deepforest/data/australia.shx
diff --git a/deepforest/data/australia.tif b/deepforest/data/australia.tif
diff --git a/deepforest/evaluate.py b/deepforest/evaluate.py
@@ -5,19 +5,21 @@
 import numpy as np
 import cv2
 from PIL import Image
+import warnings
 
 from deepforest import IoU
-from deepforest.utilities import check_file
 from deepforest import visualize
-import warnings
+from deepforest.utilities import determine_geometry_type
 
 
-def evaluate_image(predictions, ground_df, root_dir, savedir=None):
+def evaluate_image_boxes(predictions, ground_df, root_dir, savedir=None):
     """Compute intersection-over-union matching among prediction and ground
     truth boxes for one image.
 
     Args:
-        df: a pandas dataframe with columns name, xmin, xmax, ymin, ymax, label. The 'name' column should be the path relative to the location of the file.
+        df: a geopandas dataframe with geometry columns
+        predictions: a geopandas dataframe with geometry columns
+        ground_df: a geopandas dataframe with geometry columns
         summarize: Whether to group statistics by plot and overall score
         image_coordinates: Whether the current boxes are in coordinate system of the image, e.g. origin (0,0) upper left.
         root_dir: Where to search for image names in df
@@ -32,14 +34,6 @@ def evaluate_image(predictions, ground_df, root_dir, savedir=None):
     else:
         plot_name = plot_names[0]
 
-    predictions['geometry'] = predictions.apply(
-        lambda x: shapely.geometry.box(x.xmin, x.ymin, x.xmax, x.ymax), axis=1)
-    predictions = gpd.GeoDataFrame(predictions, geometry='geometry')
-
-    ground_df['geometry'] = ground_df.apply(
-        lambda x: shapely.geometry.box(x.xmin, x.ymin, x.xmax, x.ymax), axis=1)
-    ground_df = gpd.GeoDataFrame(ground_df, geometry='geometry')
-
     # match
     result = IoU.compute_IoU(ground_df, predictions)
 
@@ -101,25 +95,45 @@ def __evaluate_wrapper__(predictions,
                          iou_threshold,
                          numeric_to_label_dict,
                          savedir=None):
-    """Evaluate a set of predictions against a ground truth csv file.
-
-    Args:
-        predictions: a pandas dataframe, if supplied a root dir is needed to give the relative path of files in df.name. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric.
-        csv_file: a csv file with columns xmin, ymin, xmax, ymax, label, image_path
-        root_dir: location of files in the dataframe 'name' column.
-        iou_threshold: intersection-over-union threshold, see deepforest.evaluate
-        savedir: optional directory to save image with overlaid predictions and annotations
-    Returns:
-        results: a dictionary of results with keys, results, box_recall, box_precision, class_recall
-    """
+    """Evaluate a set of predictions against a ground truth csv file
+        Args:   
+            predictions: a pandas dataframe, if supplied a root dir is needed to give the relative path of files in df.name. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric.
+            root_dir: location of files in the dataframe 'name' column.
+            iou_threshold: intersection-over-union threshold, see deepforest.evaluate
+            savedir: optional directory to save image with overlaid predictions and annotations
+        Returns:
+            results: a dictionary of results with keys, results, box_recall, box_precision, class_recall
+        """
     # remove empty samples from ground truth
     ground_df = ground_df[~((ground_df.xmin == 0) & (ground_df.xmax == 0))]
 
-    results = evaluate(predictions=predictions,
-                       ground_df=ground_df,
-                       root_dir=root_dir,
-                       iou_threshold=iou_threshold,
-                       savedir=savedir)
+    # Default results for blank predictions
+    if predictions.empty:
+        results = {
+            "results": None,
+            "box_recall": 0,
+            "box_precision": np.nan,
+            "class_recall": None
+        }
+        return results
+
+    # Convert pandas to geopandas if needed
+    if not isinstance(predictions, gpd.GeoDataFrame):
+        warnings.warn("Converting predictions to GeoDataFrame using geometry column")
+        predictions = gpd.GeoDataFrame(predictions, geometry="geometry")
+
+    prediction_geometry = determine_geometry_type(predictions)
+    if prediction_geometry == "point":
+        raise NotImplementedError("Point evaluation is not yet implemented")
+    elif prediction_geometry == "box":
+        results = evaluate_boxes(predictions=predictions,
+                                 ground_df=ground_df,
+                                 root_dir=root_dir,
+                                 iou_threshold=iou_threshold,
+                                 savedir=savedir)
+    else:
+        raise NotImplementedError(
+            "Geometry type {} not implemented".format(prediction_geometry))
 
     # replace classes if not NUll
     if not results is None:
@@ -135,7 +149,7 @@ def __evaluate_wrapper__(predictions,
     return results
 
 
-def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
+def evaluate_boxes(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
     """Image annotated crown evaluation routine submission can be submitted as
     a .shp, existing pandas dataframe or .csv path.
 
@@ -151,10 +165,6 @@ def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
         box_precision: proportion of predictions that are true positive, regardless of class
         class_recall: a pandas dataframe of class level recall and precision with class sizes
     """
-
-    check_file(ground_df)
-    check_file(predictions)
-
     # Run evaluation on all plots
     results = []
     box_recalls = []
@@ -181,10 +191,10 @@ def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
             continue
         else:
             group = group.reset_index(drop=True)
-            result = evaluate_image(predictions=image_predictions,
-                                    ground_df=group,
-                                    root_dir=root_dir,
-                                    savedir=savedir)
+            result = evaluate_image_boxes(predictions=image_predictions,
+                                          ground_df=group,
+                                          root_dir=root_dir,
+                                          savedir=savedir)
 
         result["image_path"] = image_path
         result["match"] = result.IoU > iou_threshold
@@ -254,7 +264,9 @@ def _point_recall_image_(predictions, ground_df, root_dir=None, savedir=None):
             raise AttributeError("savedir is {}, but root dir is None".format(savedir))
         image = np.array(Image.open("{}/{}".format(root_dir, plot_name)))[:, :, ::-1]
         image = visualize.plot_predictions(image, df=predictions)
-        image = visualize.plot_points(image, df=ground_df, color=(0, 165, 255))
+        image = visualize.plot_points(image,
+                                      points=ground_df[["x", "y"]].values,
+                                      color=(0, 165, 255))
         cv2.imwrite("{}/{}".format(savedir, plot_name), image)
 
     return result
@@ -276,7 +288,6 @@ def point_recall(predictions, ground_df, root_dir=None, savedir=None):
         box_recall: proportion of true positives between predicted boxes and ground truth points, regardless of class
         class_recall: a pandas dataframe of class level recall and precision with class sizes
     """
-    check_file(predictions)
     if savedir:
         if root_dir is None:
             raise AttributeError("savedir is {}, but root dir is None".format(savedir))

diff --git a/deepforest/main.py b/deepforest/main.py
@@ -4,7 +4,7 @@
 import typing
 import warnings
 
-import cv2
+import geopandas as gpd
 import numpy as np
 import pandas as pd
 import pytorch_lightning as pl
@@ -406,7 +406,7 @@ def predict_file(self, csv_file, root_dir, savedir=None, color=None, thickness=1
         Returns:
             df: pandas dataframe with bounding boxes, label and scores for each image in the csv file
         """
-        df = pd.read_csv(csv_file)
+        df = utilities.read_file(csv_file)
         ds = dataset.TreeDataset(csv_file=csv_file,
                                  root_dir=root_dir,
                                  transforms=None,
@@ -595,7 +595,10 @@ def validation_step(self, batch, batch_idx):
             self.log("val_{}".format(key), value, on_epoch=True)
 
         for index, result in enumerate(preds):
-            boxes = visualize.format_boxes(result)
+            # Skip empty predictions
+            if result["boxes"].shape[0] == 0:
+                continue
+            boxes = visualize.format_geometry(result)
             boxes["image_path"] = path[index]
             self.predictions.append(boxes)
 
@@ -616,30 +619,52 @@ def on_validation_epoch_end(self):
         self.log_dict(output)
         self.mAP_metric.reset()
 
-        # Evaluate on validation data predictions
-        self.predictions_df = pd.concat(self.predictions)
-        ground_df = pd.read_csv(self.config["validation"]["csv_file"])
-        ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
+        if len(self.predictions) == 0:
+            return None
+        else:
+            self.predictions_df = pd.concat(self.predictions)
 
         #Evaluate every n epochs
         if self.current_epoch % self.config["validation"]["val_accuracy_interval"] == 0:
-            results = evaluate_iou.__evaluate_wrapper__(
-                predictions=self.predictions_df,
-                ground_df=ground_df,
-                root_dir=self.config["validation"]["root_dir"],
-                iou_threshold=self.config["validation"]["iou_threshold"],
-                savedir=None,
-                numeric_to_label_dict=self.numeric_to_label_dict)
-
-            self.log("box_recall", results["box_recall"])
-            self.log("box_precision", results["box_precision"])
-            if isinstance(results, pd.DataFrame):
-                for index, row in results["class_recall"].iterrows():
-                    self.log("{}_Recall".format(self.numeric_to_label_dict[row["label"]]),
-                             row["recall"])
-                    self.log(
-                        "{}_Precision".format(self.numeric_to_label_dict[row["label"]]),
-                        row["precision"])
+            #Create a geospatial column
+            ground_df = utilities.read_file(self.config["validation"]["csv_file"])
+            ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
+
+            if self.predictions_df.empty:
+                warnings.warn("No predictions made, skipping evaluation")
+                geom_type = utilities.determine_geometry_type(ground_df)
+                if geom_type == "box":
+                    result = {
+                        "box_recall": 0,
+                        "box_precision": 0,
+                        "class_recall": pd.DataFrame()
+                    }
+            else:
+                results = evaluate_iou.__evaluate_wrapper__(
+                    predictions=self.predictions_df,
+                    ground_df=ground_df,
+                    root_dir=self.config["validation"]["root_dir"],
+                    iou_threshold=self.config["validation"]["iou_threshold"],
+                    savedir=None,
+                    numeric_to_label_dict=self.numeric_to_label_dict)
+
+                # Log each key value pair of the results dict
+                for key, value in results.items():
+                    if key in ["class_recall"]:
+                        for index, row in value.iterrows():
+                            self.log(
+                                "{}_Recall".format(
+                                    self.numeric_to_label_dict[row["label"]]),
+                                row["recall"])
+                            self.log(
+                                "{}_Precision".format(
+                                    self.numeric_to_label_dict[row["label"]]),
+                                row["precision"])
+                    else:
+                        try:
+                            self.log(key, value)
+                        except:
+                            pass
 
     def predict_step(self, batch, batch_idx):
         batch_results = self.model(batch)
@@ -719,7 +744,7 @@ def evaluate(self, csv_file, root_dir, iou_threshold=None, savedir=None):
         Returns:
             results: dict of ("results", "precision", "recall") for a given threshold
         """
-        ground_df = pd.read_csv(csv_file)
+        ground_df = utilities.read_file(csv_file)
         ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
         predictions = self.predict_file(csv_file=csv_file,
                                         root_dir=root_dir,

diff --git a/deepforest/predict.py b/deepforest/predict.py
@@ -3,15 +3,13 @@
 import pandas as pd
 import numpy as np
 import os
-from PIL import Image
-import warnings
 
 import torch
 from torchvision.ops import nms
 import typing
 
 from deepforest import visualize, dataset
-import rasterio
+from deepforest.utilities import read_file
 
 
 def _predict_image_(model,
@@ -182,6 +180,11 @@ def _dataloader_wrapper_(model,
         results.append(prediction)
 
     results = pd.concat(results, ignore_index=True)
+    if results.empty:
+        results["geometry"] = None
+        return results
+
+    results = read_file(results, root_dir)
 
     if savedir:
         visualize.plot_prediction_dataframe(results,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		PROJCS["WGS_1984_UTM_Zone_53S",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",10000000.0],PARAMETER["Central_Meridian",135.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]