tier4 · yukke42 · Nov 8, 2023 · Sep 13, 2023 · Sep 13, 2023 · Sep 13, 2023
diff --git a/config/rosbag2_to_t4/convert_synthetic_data_camera.yaml b/config/rosbag2_to_t4/convert_synthetic_data_camera.yaml
@@ -0,0 +1,23 @@
+task: convert_rosbag2_to_t4
+description:
+  scene: synthetic
+conversion:
+  # path to rosbag dir output by simulator
+  input_base: .data/odaiba_synthetic_with_images/rosbags
+  output_base: .data/odaiba_synthetic_with_images/t4dataset
+  workers_number: 1
+  skip_timestamp: 2.0
+  num_load_frames: 50
+  crop_frames_unit: 1  # crop frames from the end so that the number of frames is divisible by crop_frames_unit. Set to 0 or 1 so as not to crop any frames.
+  camera_latency_sec: 0.00
+  object_topic_name: /ground_truth/objects
+  object_msg_type: TrackedObjects
+  world_frame_id: world
+  lidar_sensor:
+    topic: /lidar/concatenated/pointcloud
+    channel: LIDAR_CONCAT
+  camera_sensors:  #Keep the same order as each camera exposure timing
+    - topic: /sensing/camera/camera4/image_rect_color
+      channel: CAM_FRONT
+    - topic: /sensing/camera/camera5/image_rect_color
+      channel: CAM_BACK
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
@@ -158,6 +158,29 @@ Synthetic bag must contain ground truth objects, pointclouds and tf.
 python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_synthetic_data.yaml
 ```
 
+### Synthetic bag to T4 format with images and 2d images annotations
+
+input: rosbag2  
+output: T4 format data
+
+#### Messages
+
+| Topic Name                                    | Required | Message Type                                       |
+| --------------------------------------------- | -------- | -------------------------------------------------- |
+| `/ground_truth/objects`                       | o        | `autoware_auto_perception_msgs/msg/TrackedObjects` |
+| `/sensing/camera/camera{ID}/camera_info`      | o        | `visualization_msgs/msg/MarkerArray`               |
+| `/sensing/lidar/concatenated/pointcloud`      | o        | `sensor_msgs/msg/PointCloud2`                      |
+| `/tf`                                         | o        | `tf2_msgs/msg/TFMessage`                           |
+| `/tf_static`                                  | o        | `tf2_msgs/msg/TFMessage`                           |
+| `/sensing/camera/camera{ID}/image_rect_color` | o        | `sensor_msgs/msg/Image`                            |
+| `/sensing/camera/camera{ID}/camera_info`      | o        | `sensor_msgs/msg/CameraInfo`                       |
+
+#### script
+
+```bash
+python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_synthetic_data_camera.yaml --synthetic --generate-bbox-from-cuboid
+```
+
 ### Pseudo-labeled bag to T4 format
 
 #### Description

diff --git a/perception_dataset/convert.py b/perception_dataset/convert.py
@@ -2,7 +2,7 @@
 
 import yaml
 
-from perception_dataset.rosbag2.converter_params import Rosbag2ConverterParams
+from perception_dataset.rosbag2.converter_params import DataType, Rosbag2ConverterParams
 from perception_dataset.utils.logger import configure_logger
 
 logger = configure_logger(modname=__name__)
@@ -25,6 +25,16 @@ def main():
         action="store_true",
         help="do NOT compress rosbag/non-annotated-t4",
     )
+    parser.add_argument(
+        "--synthetic",
+        action="store_true",
+        help="convert synthetic data",
+    )
+    parser.add_argument(
+        "--generate-bbox-from-cuboid",
+        action="store_true",
+        help="generate 2d images annotations from cuboid annotations",
+    )
     args = parser.parse_args()
 
     with open(args.config, "r") as f:
@@ -118,9 +128,13 @@ def main():
             "scene_description": config_dict["description"]["scene"],
             "overwrite_mode": args.overwrite,
             **config_dict["conversion"],
+            "generate_bbox_from_cuboid": args.generate_bbox_from_cuboid,
         }
         converter_params = Rosbag2ConverterParams(**param_args)
 
+        if args.synthetic:
+            converter_params.data_type = DataType.SYNTHETIC
+
         converter = Rosbag2ToT4Converter(converter_params)
 
         logger.info("[BEGIN] Converting ros2bag output by simulator/autoware --> T4 Format Data")

diff --git a/perception_dataset/rosbag2/converter_params.py b/perception_dataset/rosbag2/converter_params.py
@@ -42,6 +42,7 @@ class Rosbag2ConverterParams(BaseModel):
     traffic_light_rois_topic_name: str = ""
     world_frame_id: str = "map"
     with_camera: bool = True
+    generate_bbox_from_cuboid: bool = False
 
     # rosbag reader
     num_load_frames: int  # the number of frames to be loaded. if the value isn't positive, read all messages.

diff --git a/perception_dataset/rosbag2/rosbag2_to_non_annotated_t4_converter.py b/perception_dataset/rosbag2/rosbag2_to_non_annotated_t4_converter.py
@@ -11,6 +11,7 @@
 import builtin_interfaces.msg
 import cv2
 import numpy as np
+from pyquaternion import Quaternion
 from radar_msgs.msg import RadarTracks
 from sensor_msgs.msg import CompressedImage, PointCloud2
 
@@ -21,7 +22,7 @@
     SENSOR_MODALITY_ENUM,
     T4_FORMAT_DIRECTORY_NAME,
 )
-from perception_dataset.rosbag2.converter_params import Rosbag2ConverterParams
+from perception_dataset.rosbag2.converter_params import DataType, Rosbag2ConverterParams
 from perception_dataset.rosbag2.rosbag2_reader import Rosbag2Reader
 from perception_dataset.t4_dataset.classes.abstract_class import AbstractTable
 from perception_dataset.t4_dataset.classes.attribute import AttributeTable
@@ -99,6 +100,7 @@ def __init__(self, params: Rosbag2ConverterParams) -> None:
         self._camera_latency: float = params.camera_latency_sec
         self._start_timestamp: float = params.start_timestamp_sec
         self._end_timestamp: float = 0
+        self._data_type: DataType = params.data_type
         self._ignore_no_ego_transform_at_rosbag_beginning: bool = (
             params.ignore_no_ego_transform_at_rosbag_beginning
         )
@@ -113,7 +115,9 @@ def __init__(self, params: Rosbag2ConverterParams) -> None:
         self._calibrated_sensor_target_frame: str = "base_link"
 
         # Note: To determine if there is any message dropout, including a delay tolerance of 10Hz.
-        self._TIMESTAMP_DIFF = 0.15
+        # changed from 0.15 to 2.0 - a workaround of issue with Unity's step execution
+        # #TODO (mkotynia) should be decreased when the issue with Unity is fixed
+        self._TIMESTAMP_DIFF = 2.0
 
         self._lidar_sensor: Dict[str, str] = params.lidar_sensor
         self._radar_sensors: List[Dict[str, str]] = params.radar_sensors
@@ -793,7 +797,23 @@ def _generate_calibrated_sensor(
                     camera_distortion=[],
                 )
             elif modality == SENSOR_MODALITY_ENUM.CAMERA.value:
-                cam_info_topic = topic_name.rsplit("/", 2)[0] + "/camera_info"
+                if self._data_type.value == "synthetic":
+                    # fix of the orientation of camera view
+                    rotation = Quaternion(
+                        rotation["w"], rotation["x"], rotation["y"], rotation["z"]
+                    )
+                    axes_fix_rotation = Quaternion(0.5, -0.5, 0.5, -0.5)
+                    rotation = rotation * axes_fix_rotation
+
+                    rotation = {
+                        "w": rotation.w,
+                        "x": rotation.x,
+                        "y": rotation.y,
+                        "z": rotation.z,
+                    }
+
+                topic_name_splitted = topic_name.split("/")
+                cam_info_topic = "/".join(topic_name_splitted[:4]) + "/camera_info"
                 info = self._bag_reader.camera_info.get(cam_info_topic)
                 if info is None:
                     continue

diff --git a/perception_dataset/rosbag2/rosbag2_to_t4_converter.py b/perception_dataset/rosbag2/rosbag2_to_t4_converter.py
@@ -21,6 +21,7 @@
 from perception_dataset.t4_dataset.classes.scene import SceneTable
 from perception_dataset.t4_dataset.classes.sensor import SensorTable
 from perception_dataset.utils.calculate_num_points import calculate_num_points
+from perception_dataset.utils.create_2d_annotations import create_2d_annotations
 from perception_dataset.utils.logger import configure_logger
 import perception_dataset.utils.rosbag2 as rosbag2_utils
 
@@ -98,6 +99,7 @@ def __init__(self, params: Rosbag2ConverterParams) -> None:
         self._calibrated_sensor_target_frame: str = "base_link"
 
         self._annotation_files_generator = AnnotationFilesGenerator(with_camera=params.with_camera)
+        self._generate_bbox_from_cuboid = params.generate_bbox_from_cuboid
 
     def _init_tables(self):
         # vehicle
@@ -133,6 +135,10 @@ def convert(self):
         self._annotation_files_generator.save_tables(self._output_anno_dir)
         # Calculate and overwrite num_lidar_prs in annotations
         self._calculate_num_points()
+        if len(self._camera_sensors) > 0 and self._generate_bbox_from_cuboid is True:
+            self._create_2d_annotations()
+        elif len(self._camera_sensors) == 0 and self._generate_bbox_from_cuboid is True:
+            logger.info("Skipping 2d annotations generation. No camera sensors found.")
 
     def _calculate_num_points(self):
         logger.info("Calculating number of points...")
@@ -144,6 +150,22 @@ def _calculate_num_points(self):
         )
         annotation_table.save_json(self._output_anno_dir)
 
+    def _create_2d_annotations(self):
+        logger.info("Creating 2d camera annotations...")
+        object_ann_table = self._annotation_files_generator._object_ann_table
+        create_2d_annotations(
+            self._output_scene_dir,
+            self._camera_sensors,
+            self._annotation_files_generator._sample_annotation_table,
+            self._sample_data_table,
+            object_ann_table,
+            self._annotation_files_generator._instance_table,
+        )
+        logger.info(
+            f"object_ann.json created with {len(object_ann_table._token_to_record)} records"
+        )
+        object_ann_table.save_json(self._output_anno_dir)
+
     def _convert_objects(self, start_timestamp: float):
         """read object bbox ground truth from rosbag"""
         start_time_in_time = rosbag2_utils.unix_timestamp_to_stamp(start_timestamp)

diff --git a/perception_dataset/utils/create_2d_annotations.py b/perception_dataset/utils/create_2d_annotations.py
@@ -0,0 +1,60 @@
+from typing import List
+
+import numpy as np
+from nuscenes import NuScenes
+
+from perception_dataset.t4_dataset.classes.instance import InstanceTable
+from perception_dataset.t4_dataset.classes.object_ann import ObjectAnnRecord, ObjectAnnTable
+from perception_dataset.t4_dataset.classes.sample_annotation import SampleAnnotationTable
+from perception_dataset.t4_dataset.classes.sample_data import SampleDataTable
+
+
+def create_2d_annotations(
+    dataroot: str,
+    camera_sensor_channels: List[str],
+    annotation_table: SampleAnnotationTable,
+    sample_data_table: SampleDataTable,
+    object_ann_table: ObjectAnnTable,
+    instance_table: InstanceTable,
+):
+    nusc = NuScenes(version="annotation", dataroot=dataroot, verbose=False)
+    for sample in nusc.sample:
+        for camera_sensor in camera_sensor_channels:
+            camera_channel = camera_sensor["channel"]
+            camera_token = sample["data"][camera_channel]
+            _, boxes, camera_intrinsic = nusc.get_sample_data(camera_token)
+
+            # map 3d cuboids to 2d bounding boxes
+            for box in boxes:
+                corners = box.corners()
+                x = corners[0, :]
+                y = corners[1, :]
+                z = corners[2, :]
+                x_y_z = np.array((x, y, z))
+                orthographic = np.dot(camera_intrinsic, x_y_z)
+                perspective_x = orthographic[0] / orthographic[2]
+                perspective_y = orthographic[1] / orthographic[2]
+
+                min_x = int(np.min(perspective_x))
+                max_x = int(np.max(perspective_x))
+                min_y = int(np.min(perspective_y))
+                max_y = int(np.max(perspective_y))
+
+                sample_annotation_record = annotation_table._token_to_record[box.token]
+                instance_token = sample_annotation_record._instance_token
+                attribute_tokens = sample_annotation_record._attribute_tokens
+                category_token = instance_table._token_to_record[instance_token]._category_token
+                imsize = [
+                    sample_data_table._token_to_record[camera_token].width,
+                    sample_data_table._token_to_record[camera_token].height,
+                ]
+
+                object_ann_record = ObjectAnnRecord(
+                    sample_data_token=camera_token,
+                    instance_token=instance_token,
+                    category_token=category_token,
+                    attribute_tokens=attribute_tokens,
+                    bbox=[min_x, min_y, max_x, max_y],
+                    mask={"size": imsize},
+                )
+                object_ann_table._token_to_record[object_ann_record.token] = object_ann_record
diff --git a/perception_dataset/utils/rosbag2.py b/perception_dataset/utils/rosbag2.py
@@ -162,11 +162,23 @@ def radar_tracks_msg_to_list(radar_tracks_msg: RadarTracks) -> List[Dict[str, An
     return radar_tracks
 
 
-def compressed_msg_to_numpy(compressed_image_msg: CompressedImage) -> NDArray:
-    image_buf = np.ndarray(
-        shape=(1, len(compressed_image_msg.data)), dtype=np.uint8, buffer=compressed_image_msg.data
-    )
-    image = cv2.imdecode(image_buf, cv2.IMREAD_ANYCOLOR)
+def compressed_msg_to_numpy(compressed_image_msg: CompressedImage) -> np.ndarray:
+    if hasattr(compressed_image_msg, "_encoding"):
+        try:
+            np_arr = np.frombuffer(compressed_image_msg.data, np.uint8)
+            image = np.reshape(
+                np_arr, (compressed_image_msg.height, compressed_image_msg.width, 3)
+            )
+        except Exception as e:
+            print(e)
+            return None
+    else:
+        image_buf = np.ndarray(
+            shape=(1, len(compressed_image_msg.data)),
+            dtype=np.uint8,
+            buffer=compressed_image_msg.data,
+        )
+        image = cv2.imdecode(image_buf, cv2.IMREAD_ANYCOLOR)
     return image