feat: update FastLabel merger (#178)

* update configs Signed-off-by: Shunsuke Miura <[email protected]> * add label converter to FastLabel2dToT4Converter Signed-off-by: Shunsuke Miura <[email protected]> * fix bug in from_json of surface_ann Signed-off-by: Shunsuke Miura <[email protected]> * add automatic_annotation key to sample_annotation/object_ann/surface_ann Signed-off-by: Shunsuke Miura <[email protected]> * accelerate default object_mask generation Signed-off-by: Shunsuke Miura <[email protected]> * feat: improve T4 updater functionality and directory handling Signed-off-by: Shunsuke Miura <[email protected]> * feat: support polygon-to-bbox conversion and improve polygon-to-RLE conversion * update object config Signed-off-by: Shunsuke Miura <[email protected]> * feat: update annotation loader * feat: only convert to bbox for objects labels Signed-off-by: Shunsuke Miura <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: handling automatic_annotation entry Signed-off-by: Shunsuke Miura <[email protected]> * fix: bug fix in rle_from_points Signed-off-by: Shunsuke Miura <[email protected]> * fix: add default value for automatic_annotation Signed-off-by: Shunsuke Miura <[email protected]> --------- Signed-off-by: Shunsuke Miura <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
tier4 · Dec 17, 2024 · c77c682 · c77c682
1 parent bc5c3fe
commit c77c682
Show file tree

Hide file tree

Showing 11 changed files with 173 additions and 49 deletions.
diff --git a/config/label/object.yaml b/config/label/object.yaml
@@ -4,6 +4,7 @@ bus: [bus, BUS, vehicle.bus, vehicle.bus (bendy & rigid)]
 car:
   [
     car,
+    cars,
     CAR,
     vehicle.car,
     vehicle.construction,
@@ -53,3 +54,40 @@ stroller: [stroller, STROLLER, pedestrian.stroller]
 police_officer: [police_officer, POLICE_OFFICER, pedestrian.police_officer]
 wheelchair: [wheelchair, WHEELCHAIR, pedestrian.wheelchair]
 forklift: [forklift, FORKLIFT]
+other_vehicle: [other_vehicle]
+other_pedestrian: [other_pedestrian]
+train: [train, TRAIN]
+cone: [cone]
+curb: [curb]
+gate: [gate]
+guide_post: [guide_post]
+construction: [constructions, construction]
+traffic_sign: [traffic_sign]
+road_debris: [road_debris]
+other_obstacle: [other_obstacle]
+obstacle_others: [obstacle_others]
+laneline_solid_white: [laneline_solid_white]
+laneline_dash_white: [laneline_dash_white]
+laneline_solid_yellow: [laneline_solid_yellow]
+laneline_dash_yellow: [laneline_dash_yellow]
+laneline_solid_green: [laneline_solid_green]
+laneline_solid_red: [laneline_solid_red]
+deceleration_line: [deceleration_line]
+dashed_lane_marking: [dashed_lane_markings, dash_lane_markings, dashed_lane_marking, dash_white_merge, dash_white_branch]
+stopline: [stopline]
+crosswalk: [crosswalk]
+marking_character: [marking_character]
+marking_arrow: [marking_arrow]
+striped_road_marking: [striped_road_markings, striped_road_marking]
+parking_lot: [parking_lot]
+marking_other: [marking_other]
+road: [road]
+road_paint_lane_solid_white: [road_paint_lane_solid_white]
+road_paint_lane_dash_white: [road_paint_lane_dash_white]
+sidewalk: [sidewalk]
+building: [building, buildling] # typo
+wall/fence: [wall_fence, wall/fence]
+pole: [pole]
+vegetation/terrain: [vegetation_terrain, vegetation/terrain]
+sky: [sky]
+traffic_light: [traffic_light]
diff --git a/config/label/surface.yaml b/config/label/surface.yaml
@@ -4,8 +4,10 @@
 - sidewalk
 - building
 - wall_fence
+- wall/fence
 - pole
 - vegetation_terrain
+- vegetation/terrain
 - sky
 - road_paint_lane_solid_white
 - road_paint_lane_dash_white
diff --git a/config/update_t4_with_fastlabel_sample.yaml b/config/update_t4_with_fastlabel_sample.yaml
@@ -6,17 +6,18 @@ description:
     partial: "The object is occluded by more than 50% (but not completely)."
     none: "The object is 90-100% occluded and no points/pixels are visible in the label."
   camera_index:
-    CAM_FRONT: 0
-    CAM_FRONT_RIGHT: 1
-    CAM_BACK_RIGHT: 2
-    CAM_BACK: 3
-    CAM_BACK_LEFT: 4
-    CAM_FRONT_LEFT: 5
+    CAM_FRONT_NARROW: 0
+    CAM_FRONT_WIDE: 1
+    CAM_FRONT_RIGHT: 2
+    CAM_BACK_RIGHT: 3
+    CAM_BACK_NARROW: 4
+    CAM_BACK_WIDE: 5
+    CAM_BACK_LEFT: 6
+    CAM_FRONT_LEFT: 7
+  surface_categories: ./config/label/surface.yaml
 
 conversion:
+  make_t4_dataset_dir: false # If true, the output directory includes t4_dataset directory (such as "scene_dir"/t4_dataset/data|annotation). If false, "scene_dir"/data|annotation.
   input_base: ./data/input_t4_format # could be non_annotated_t4_format or t4_format_3d_annotated
   input_anno_base: ./data/fastlabel
   output_base: ./data/output_t4_format # currently, this only includes the 2D annotations
-  dataset_corresponding:
-    # input t4dataset_name: FastLabel json file name
-    T4DatasetName: FastLabelAnnotationFile
diff --git a/perception_dataset/convert.py b/perception_dataset/convert.py
@@ -378,16 +378,16 @@ def main():
         input_base = config_dict["conversion"]["input_base"]
         output_base = config_dict["conversion"]["output_base"]
         input_anno_base = config_dict["conversion"]["input_anno_base"]
-        dataset_corresponding = config_dict["conversion"]["dataset_corresponding"]
         description = config_dict["description"]
+        make_t4_dataset_dir = config_dict["conversion"]["make_t4_dataset_dir"]
 
         converter = FastLabel2dToT4Updater(
             input_base=input_base,
             output_base=output_base,
             input_anno_base=input_anno_base,
-            dataset_corresponding=dataset_corresponding,
             overwrite_mode=args.overwrite,
             description=description,
+            make_t4_dataset_dir=make_t4_dataset_dir,
         )
         logger.info(
             f"[BEGIN] Updating T4 dataset ({input_base}) with FastLabel {input_anno_base} into T4 data ({output_base})"

diff --git a/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_converter.py b/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_converter.py
@@ -6,10 +6,13 @@
 import shutil
 from typing import Any, Dict, List, Optional, Union
 
+import numpy as np
 import pycocotools.mask as cocomask
 
+from perception_dataset.constants import LABEL_PATH_ENUM
 from perception_dataset.deepen.deepen_to_t4_converter import DeepenToT4Converter
 from perception_dataset.t4_dataset.annotation_files_generator import AnnotationFilesGenerator
+from perception_dataset.utils.label_converter import LabelConverter
 from perception_dataset.utils.logger import configure_logger
 
 logger = configure_logger(modname=__name__)
@@ -48,6 +51,10 @@ def __init__(
         self._input_anno_files: List[Path] = []
         for f in Path(input_anno_base).rglob("*.json"):
             self._input_anno_files.append(f)
+        self._label_converter = LabelConverter(
+            label_path=LABEL_PATH_ENUM.OBJECT_LABEL,
+            attribute_path=LABEL_PATH_ENUM.ATTRIBUTE,
+        )
 
     def convert(self):
         # Load and format Fastlabel annotations
@@ -184,6 +191,7 @@ def _format_fastlabel_annotation(self, annotations: Dict[str, List[Dict[str, Any
                 for a in ann["annotations"]:
                     occlusion_state: str = "occlusion_state.none"
                     visibility: str = "Not available"
+                    instance_id = ""
                     for att in a["attributes"]:
                         if att["key"] == "id":
                             instance_id = att["value"]
@@ -197,8 +205,9 @@ def _format_fastlabel_annotation(self, annotations: Dict[str, List[Dict[str, Any
                                         att["key"].split("_")[-1]
                                     )
                                     break
+                    category_label = self._label_converter.convert_label(a["title"])
                     label_t4_dict: Dict[str, Any] = {
-                        "category_name": a["title"],
+                        "category_name": category_label,
                         "instance_id": instance_id,
                         "attribute_names": [occlusion_state],
                         "visibility_name": visibility,
@@ -217,6 +226,10 @@ def _format_fastlabel_annotation(self, annotations: Dict[str, List[Dict[str, Any
                                 "sensor_id": self._camera2idx[camera],
                             }
                         )
+                        if self._label_converter.is_object_label(category_label):
+                            label_t4_dict["two_d_box"] = _convert_polygon_to_bbox(
+                                a["points"][0][0]
+                            )
                     fl_annotations[dataset_name][file_id].append(label_t4_dict)
 
         return fl_annotations
@@ -247,11 +260,40 @@ def _rle_from_points(points: Points2DLike, width: int, height: int) -> Dict[str,
     Returns:
         Dict[str, Any]: RLE format mask.
     """
-    flattened = [[coord for p in point for coord in p] for point in points]
+    final_mask = np.zeros((height, width, 1), dtype=np.uint8)
 
-    rle_objects = cocomask.frPyObjects(flattened, height, width)
-    rle = cocomask.merge(rle_objects)
+    for polygon in points:
+        outer_polygon = polygon[0]  # outer points
 
+        outer_rle = cocomask.frPyObjects([outer_polygon], height, width)
+        outer_mask = cocomask.decode(outer_rle)
+        combined_mask = outer_mask
+        for i in range(1, len(polygon)):
+            hollow_polygon = polygon[i]  # hollowed out points
+            hollow_rle = cocomask.frPyObjects([hollow_polygon], height, width)
+            hollow_mask = cocomask.decode(hollow_rle)
+            combined_mask = combined_mask - hollow_mask
+        final_mask = np.maximum(final_mask, combined_mask)
+    # encode RLE
+    rle = cocomask.encode(np.asfortranarray(np.squeeze(final_mask)))
     rle["counts"] = base64.b64encode(rle["counts"]).decode("ascii")
-
     return rle
+
+
+def _convert_polygon_to_bbox(polygon: List[int]) -> List[float]:
+    """Convert polygon points to bounding box.
+
+    Args:
+        polygon: 2D points, such as `[x1, y1, x2, y2 ....]`.
+
+    Returns:
+        List[float]: Bounding box in [x1, y1, x2, y2] format.
+    """
+    x_coords = polygon[0::2]
+    y_coords = polygon[1::2]
+
+    xmin = min(x_coords)
+    xmax = max(x_coords)
+    ymin = min(y_coords)
+    ymax = max(y_coords)
+    return [xmin, ymin, xmax, ymax]
diff --git a/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_updater.py b/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_updater.py
@@ -22,26 +22,28 @@ def __init__(
         input_base: str,
         output_base: str,
         input_anno_base: str,
-        dataset_corresponding: Dict[str, int],
         overwrite_mode: bool,
         description: Dict[str, Dict[str, str]],
+        make_t4_dataset_dir: bool = True,
     ):
         super().__init__(
             input_base,
             output_base,
             input_anno_base,
-            dataset_corresponding,
-            overwrite_mode,
-            description,
+            dataset_corresponding=None,
+            overwrite_mode=overwrite_mode,
+            description=description,
             input_bag_base=None,
             topic_list=None,
         )
+        self._make_t4_dataset_dir = make_t4_dataset_dir
 
     def convert(self) -> None:
-        anno_jsons_dict = self._load_annotation_jsons()
+        t4_datasets = sorted([d.name for d in self._input_base.iterdir() if d.is_dir()])
+        anno_jsons_dict = self._load_annotation_jsons(t4_datasets)
         fl_annotations = self._format_fastlabel_annotation(anno_jsons_dict)
 
-        for t4dataset_name in self._t4dataset_name_to_merge:
+        for t4dataset_name in t4_datasets:
             # Check if input directory exists
             input_dir = self._input_base / t4dataset_name
             input_annotation_dir = input_dir / "annotation"
@@ -50,7 +52,9 @@ def convert(self) -> None:
                 continue
 
             # Check if output directory already exists
-            output_dir = self._output_base / t4dataset_name / "t4_dataset"
+            output_dir = self._output_base / t4dataset_name
+            if self._make_t4_dataset_dir:
+                output_dir = output_dir / "t4_dataset"
             if self._input_bag_base is not None:
                 input_bag_dir = Path(self._input_bag_base) / t4dataset_name
 
@@ -74,6 +78,10 @@ def convert(self) -> None:
             else:
                 raise ValueError("If you want to overwrite files, use --overwrite option.")
 
+            if t4dataset_name not in fl_annotations.keys():
+                logger.warning(f"No annotation for {t4dataset_name}")
+                continue
+
             # Start updating annotations
             annotation_files_updater = AnnotationFilesUpdater(description=self._description)
             annotation_files_updater.convert_one_scene(
@@ -83,15 +91,15 @@ def convert(self) -> None:
                 dataset_name=t4dataset_name,
             )
 
-    def _load_annotation_jsons(self):
+    def _load_annotation_jsons(self, t4_datasets: list[str]) -> dict[str, list[dict[str, any]]]:
         anno_dict = {}
         for file in self._input_anno_files:
-            t4_dataset_name = None
-            for name, ann_filename in self._t4dataset_name_to_merge.items():
-                if ann_filename == file.name:
-                    t4_dataset_name = name
-
-            assert t4_dataset_name is not None
+            t4_dataset_name = file.name.split("_CAM")[0]
+            if t4_dataset_name not in t4_datasets:
+                continue
             with open(file) as f:
-                anno_dict[t4_dataset_name] = json.load(f)
+                one_label = json.load(f)
+                if t4_dataset_name not in anno_dict.keys():
+                    anno_dict[t4_dataset_name] = []
+                anno_dict[t4_dataset_name].extend(one_label)
         return anno_dict
diff --git a/perception_dataset/t4_dataset/annotation_files_generator.py b/perception_dataset/t4_dataset/annotation_files_generator.py
@@ -1,7 +1,7 @@
 import base64
 from collections import defaultdict
 import os.path as osp
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 from nptyping import NDArray
 from nuimages import NuImages
@@ -113,6 +113,8 @@ def convert_one_scene(
                     break
 
         if has_2d_annotation:
+            object_mask: NDArray = np.zeros((0, 0), dtype=np.uint8)
+            prev_wid_hgt: Tuple = (0, 0)
             # NOTE: num_cameras is always 6, because it is hard coded above.
             for frame_index_nuim, sample_nuim in enumerate(nuim.sample_data):
                 if (
@@ -126,13 +128,14 @@ def convert_one_scene(
                         {frame_index: sample_nuim["token"]}
                     )
 
-                    width: int = sample_nuim["width"]
-                    height: int = sample_nuim["height"]
-                    object_mask: NDArray = np.array(
-                        [[0 for _ in range(height)] for __ in range(width)], dtype=np.uint8
-                    )
-                    object_mask = cocomask.encode(np.asfortranarray(object_mask))
-                    object_mask["counts"] = repr(base64.b64encode(object_mask["counts"]))[2:]
+                    wid_hgt = (sample_nuim["width"], sample_nuim["height"])
+                    if wid_hgt != prev_wid_hgt:
+                        prev_wid_hgt = wid_hgt
+                        object_mask = np.zeros(wid_hgt, dtype=np.uint8)
+                        object_mask = cocomask.encode(np.asfortranarray(object_mask))
+                        object_mask["counts"] = base64.b64encode(object_mask["counts"]).decode(
+                            "ascii"
+                        )
                     mask[cam_idx].update({frame_index: object_mask})
 
         self.convert_annotations(
@@ -287,6 +290,7 @@ def _convert_to_t4_format(
                         rotation=anno_three_d_bbox["rotation"],
                         num_lidar_pts=anno["num_lidar_pts"],
                         num_radar_pts=anno["num_radar_pts"],
+                        automatic_annotation=False,
                     )
                     self._instance_token_to_annotation_token_list[instance_token].append(
                         sample_annotation_token
@@ -299,8 +303,10 @@ def _convert_to_t4_format(
                     sensor_id: int = int(anno["sensor_id"])
                     if frame_index not in frame_index_to_sample_data_token[sensor_id]:
                         continue
-                    anno_two_d_box: List[float] = self._clip_bbox(
-                        anno["two_d_box"], mask[sensor_id][frame_index]
+                    anno_two_d_box: List[float] = (
+                        self._clip_bbox(anno["two_d_box"], mask[sensor_id][frame_index])
+                        if "two_d_box" in anno.keys()
+                        else None
                     )
                     self._object_ann_table.insert_into_table(
                         sample_data_token=frame_index_to_sample_data_token[sensor_id][frame_index],
@@ -313,6 +319,7 @@ def _convert_to_t4_format(
                             if "two_d_segmentation" in anno.keys()
                             else mask[sensor_id][frame_index]
                         ),
+                        automatic_annotation=False,
                     )
 
                 # Surface Annotation
@@ -327,6 +334,7 @@ def _convert_to_t4_format(
                         category_token=category_token,
                         mask=anno["two_d_segmentation"],
                         sample_data_token=frame_index_to_sample_data_token[sensor_id][frame_index],
+                        automatic_annotation=False,
                     )
 
     def _clip_bbox(self, bbox: List[float], mask: Dict[str, Any]) -> List[float]: