diff --git a/config/update_t4_with_fastlabel_sample.yaml b/config/update_t4_with_fastlabel_sample.yaml new file mode 100644 index 00000000..635c2ca4 --- /dev/null +++ b/config/update_t4_with_fastlabel_sample.yaml @@ -0,0 +1,24 @@ +task: update_t4_with_fastlabel +description: + visibility: + full: "No occlusion of the object." + most: "Object is occluded, but by less than 50%." + partial: "The object is occluded by more than 50% (but not completely)." + none: "The object is 90-100% occluded and no points/pixels are visible in the label." + camera_index: + CAM_FRONT: 0 + CAM_FRONT_RIGHT: 1 + CAM_BACK_RIGHT: 2 + CAM_BACK: 3 + CAM_BACK_LEFT: 4 + CAM_FRONT_LEFT: 5 + +conversion: + input_base: ./data/input/t4_format_2d_annotated # could be non_annotated_t4_format or t4_format_3d_annotated + input_anno_base: ./data/fastlabel + input_bag_base: ./data/rosbag2 + output_base: ./data/output/t4_format_2d_annotated # this only includes the 2D annotations + topic_list: ./config/topic_list_sample.yaml + dataset_corresponding: + # input t4dataset_name: FastLabel json file name + DBv2.0-2-4: 2-4.json diff --git a/perception_dataset/convert.py b/perception_dataset/convert.py index c1dd0887..23c3b969 100644 --- a/perception_dataset/convert.py +++ b/perception_dataset/convert.py @@ -370,6 +370,39 @@ def main(): converter.convert() logger.info(f"[END] Converting Fastlabel data ({input_base}) to T4 data ({output_base})") + elif task == "update_t4_with_fastlabel": + from perception_dataset.fastlabel_to_t4.fastlabel_2d_to_t4_updater import ( + FastLabel2dToUpdater, + ) + + input_base = config_dict["conversion"]["input_base"] + output_base = config_dict["conversion"]["output_base"] + input_anno_base = config_dict["conversion"]["input_anno_base"] + dataset_corresponding = config_dict["conversion"]["dataset_corresponding"] + description = config_dict["description"] + input_bag_base = config_dict["conversion"]["input_bag_base"] + topic_list_yaml_path = config_dict["conversion"]["topic_list"] + with open(topic_list_yaml_path) as f: + topic_list_yaml = yaml.safe_load(f) + + converter = FastLabel2dToUpdater( + input_base=input_base, + output_base=output_base, + input_anno_base=input_anno_base, + dataset_corresponding=dataset_corresponding, + overwrite_mode=args.overwrite, + description=description, + input_bag_base=input_bag_base, + topic_list=topic_list_yaml, + ) + logger.info( + f"[BEGIN] Updating T4 dataset ({input_base}) with FastLabel {input_anno_base} into T4 data ({output_base})" + ) + converter.convert() + logger.info( + f"[DONE] Updating T4 dataset ({input_base}) with FastLabel {input_anno_base} into T4 data ({output_base})" + ) + elif task == "merge_2d_t4dataset_to_3d": from perception_dataset.t4_dataset.t4_dataset_2d3d_merger import T4dataset2D3DMerger diff --git a/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_converter.py b/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_converter.py index 1f352e2f..2e83be99 100644 --- a/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_converter.py +++ b/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_converter.py @@ -1,3 +1,4 @@ +import base64 from collections import defaultdict import json import os.path as osp @@ -5,12 +6,16 @@ import shutil from typing import Any, Dict, List, Optional, Union +import pycocotools.mask as cocomask + from perception_dataset.deepen.deepen_to_t4_converter import DeepenToT4Converter from perception_dataset.t4_dataset.annotation_files_generator import AnnotationFilesGenerator from perception_dataset.utils.logger import configure_logger logger = configure_logger(modname=__name__) +Points2DLike = list[list[list[float]]] + class FastLabel2dToT4Converter(DeepenToT4Converter): def __init__( @@ -146,18 +151,14 @@ def _format_fastlabel_annotation(self, annotations: Dict[str, List[Dict[str, Any ] } ], - "annotations": [ - { - "points": [ + "points": [ 1221.25, 488.44, 1275.38, 570.47 - ], - "rotation": 0, - "autogenerated": false - } - ] + ], + "rotation": 0, + "autogenerated": false }, }, .... @@ -177,6 +178,9 @@ def _format_fastlabel_annotation(self, annotations: Dict[str, List[Dict[str, Any if dataset_name not in fl_annotations: fl_annotations[dataset_name] = defaultdict(list) + width: int = ann["width"] + height: int = ann["height"] + for a in ann["annotations"]: occlusion_state: str = "occlusion_state.none" visibility: str = "Not available" @@ -199,12 +203,55 @@ def _format_fastlabel_annotation(self, annotations: Dict[str, List[Dict[str, Any "attribute_names": [occlusion_state], "visibility_name": visibility, } - label_t4_dict.update( - { - "two_d_box": a["annotations"][0]["points"], - "sensor_id": self._camera2idx[camera], - } - ) + if a["type"] == "bbox": + label_t4_dict.update( + { + "two_d_box": a["points"], + "sensor_id": self._camera2idx[camera], + } + ) + elif a["type"] == "segmentation": + label_t4_dict.update( + { + "two_d_segmentation": _rle_from_points(a["points"], width, height), + "sensor_id": self._camera2idx[camera], + } + ) fl_annotations[dataset_name][file_id].append(label_t4_dict) return fl_annotations + + +def _rle_from_points(points: Points2DLike, width: int, height: int) -> Dict[str, Any]: + """Encode points to RLE format mask. + + Points format of 2D segmentation in FastLabel: + ``` + "points": [ + [ + [...], // outer points (1) + [...], // hollowed out points (1) + ], + [ + [...], // outer points (2) + [...], // hollowed out points (2) + ] + ], + ``` + + Args: + points (Points2DLike): 2D points, such as `[[[o1, o1, o2, o2, ...], [ho1, ho1, ho2, ho2, ...], ...]]`. + width (int): Image width. + height (int): Image height. + + Returns: + Dict[str, Any]: RLE format mask. + """ + flattened = [[coord for p in point for coord in p] for point in points] + + rle_objects = cocomask.frPyObjects(flattened, height, width) + rle = cocomask.merge(rle_objects) + + rle["counts"] = base64.b64encode(rle["counts"]).decode("ascii") + + return rle diff --git a/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_updater.py b/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_updater.py new file mode 100644 index 00000000..0df1fb85 --- /dev/null +++ b/perception_dataset/fastlabel_to_t4/fastlabel_2d_to_t4_updater.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import os.path as osp +from pathlib import Path +import shutil +from typing import Dict, List + +from perception_dataset.fastlabel_to_t4.fastlabel_2d_to_t4_converter import ( + FastLabel2dToT4Converter, +) +from perception_dataset.t4_dataset.annotation_files_updater import AnnotationFilesUpdater +from perception_dataset.utils.logger import configure_logger + +logger = configure_logger(modname=__name__) + + +class FastLabel2dToUpdater(FastLabel2dToT4Converter): + def __init__( + self, + input_base: str, + output_base: str, + input_anno_base: str, + dataset_corresponding: Dict[str, int], + overwrite_mode: bool, + description: Dict[str, Dict[str, str]], + input_bag_base: str | None, + topic_list: Dict[str, List[str]] | List[str], + ): + super().__init__( + input_base, + output_base, + input_anno_base, + dataset_corresponding, + overwrite_mode, + description, + input_bag_base, + topic_list, + ) + + def convert(self) -> None: + anno_jsons_dict = self._load_annotation_jsons() + fl_annotations = self._format_deepen_annotation(anno_jsons_dict) + + for t4dataset_name in self._t4dataset_name_to_merge: + # Check if input directory exists + input_dir = self._input_base / t4dataset_name + input_annotation_dir = input_dir / "annotation" + if not osp.exists(input_annotation_dir): + logger.warning(f"input_dir {input_dir} not exists.") + continue + + # Check if output directory already exists + output_dir = self._output_base / t4dataset_name / "t4_dataset" + if self._input_bag_base is not None: + input_bag_dir = Path(self._input_bag_base) / t4dataset_name + if osp.exists(output_dir): + logger.error(f"{output_dir} already exists.") + is_dir_exist = True + if self._overwrite_mode or not is_dir_exist: + # Remove existing output directory + shutil.rmtree(output_dir, ignore_errors=True) + # Copy input data to output directory + self._copy_data(input_dir, output_dir) + # Make rosbag + if self._input_bag_base is not None and not osp.exists( + osp.join(output_dir, "input_bag") + ): + self._find_start_end_time(input_dir) + self._make_rosbag(str(input_bag_dir), str(output_dir)) + else: + raise ValueError("If you want to overwrite files, use --overwrite option.") + + # Start updating annotations + annotation_files_updater = AnnotationFilesUpdater(description=self._description) + annotation_files_updater.convert_one_scene( + input_dir=input_dir, + output_dir=output_dir, + scene_anno_dict=fl_annotations[t4dataset_name], + dataset_name=t4dataset_name, + ) diff --git a/perception_dataset/t4_dataset/annotation_files_updater.py b/perception_dataset/t4_dataset/annotation_files_updater.py new file mode 100644 index 00000000..02f82d9f --- /dev/null +++ b/perception_dataset/t4_dataset/annotation_files_updater.py @@ -0,0 +1,38 @@ +import os.path as osp +from typing import Any + +from perception_dataset.t4_dataset.annotation_files_generator import AnnotationFilesGenerator + + +class AnnotationFilesUpdater(AnnotationFilesGenerator): + def convert_one_scene( + self, + input_dir: str, + output_dir: str, + scene_anno_dict: dict[str, list[dict[str, Any]]], + dataset_name: str, + ) -> None: + anno_dir = osp.join(input_dir, "annotation") + if not osp.exists(anno_dir): + raise ValueError(f"Annotations files doesn't exist in {anno_dir}") + + # Load existence annotation files + self._attribute_table.insert_from_json(osp.join(anno_dir, self._attribute_table.FILENAME)) + self._category_table.insert_from_json(osp.join(anno_dir, self._category_table.FILENAME)) + self._instance_table.insert_from_json(osp.join(anno_dir, self._instance_table.FILENAME)) + self._sample_annotation_table.insert_from_json( + osp.join(anno_dir, self._sample_annotation_table.FILENAME) + ) + self._object_ann_table.insert_from_json( + osp.join(anno_dir, self._object_ann_table.FILENAME) + ) + self._surface_ann_table.insert_from_json( + osp.join(anno_dir, self._surface_ann_table.FILENAME) + ) + + super().convert_one_scene( + input_dir=input_dir, + output_dir=output_dir, + scene_anno_dict=scene_anno_dict, + dataset_name=dataset_name, + ) diff --git a/perception_dataset/t4_dataset/classes/abstract_class.py b/perception_dataset/t4_dataset/classes/abstract_class.py index 25e2793d..563d3863 100644 --- a/perception_dataset/t4_dataset/classes/abstract_class.py +++ b/perception_dataset/t4_dataset/classes/abstract_class.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from abc import ABCMeta, abstractmethod import json import os.path as osp @@ -14,10 +16,17 @@ def __init__(self): def token(self) -> str: return self._token + @token.setter + def token(self, token: str): + self._token = token + @abstractmethod def to_dict(self) -> Dict[str, Any]: raise NotImplementedError() + def __eq__(self, value: T) -> bool: + return self.__dict__ == value.__dict__ + T = TypeVar("T", bound=AbstractRecord) @@ -37,6 +46,10 @@ def _to_record(self, **kwargs) -> T: raise NotImplementedError() def set_record_to_table(self, record: T): + same_tokens = [token for token, v in self._token_to_record.items() if v == record] + assert len(same_tokens) in (0, 1) + if len(same_tokens) == 1: + record.token = same_tokens[0] # overwrite record token with the existing one self._token_to_record[record.token] = record def insert_into_table(self, **kwargs) -> str: @@ -47,6 +60,19 @@ def insert_into_table(self, **kwargs) -> str: self.set_record_to_table(record) return record.token + def insert_from_json(self, filepath: str): + with open(filepath, "r") as f: + table_data: List[Dict[str, Any]] = json.load(f) + + for data in table_data: + token: str = data.pop("token") + record = self._to_record(**data) + record.token = token + assert isinstance( + record, AbstractRecord + ), "_to_record function must return the instance of RecordClass" + self.set_record_to_table(record) + def select_record_from_token(self, token: str) -> T: assert ( token in self._token_to_record