diff --git a/config/rosbag2_to_t4/convert_synthetic_data_camera.yaml b/config/rosbag2_to_t4/convert_synthetic_data_camera.yaml new file mode 100644 index 00000000..665c9a31 --- /dev/null +++ b/config/rosbag2_to_t4/convert_synthetic_data_camera.yaml @@ -0,0 +1,23 @@ +task: convert_rosbag2_to_t4 +description: + scene: synthetic +conversion: + # path to rosbag dir output by simulator + input_base: .data/odaiba_synthetic_with_images/rosbags + output_base: .data/odaiba_synthetic_with_images/t4dataset + workers_number: 1 + skip_timestamp: 2.0 + num_load_frames: 50 + crop_frames_unit: 1 # crop frames from the end so that the number of frames is divisible by crop_frames_unit. Set to 0 or 1 so as not to crop any frames. + camera_latency_sec: 0.00 + object_topic_name: /ground_truth/objects + object_msg_type: TrackedObjects + world_frame_id: world + lidar_sensor: + topic: /lidar/concatenated/pointcloud + channel: LIDAR_CONCAT + camera_sensors: #Keep the same order as each camera exposure timing + - topic: /sensing/camera/camera4/image_rect_color + channel: CAM_FRONT + - topic: /sensing/camera/camera5/image_rect_color + channel: CAM_BACK diff --git a/docs/tools_overview.md b/docs/tools_overview.md index ec5ceb46..affcd756 100644 --- a/docs/tools_overview.md +++ b/docs/tools_overview.md @@ -158,6 +158,29 @@ Synthetic bag must contain ground truth objects, pointclouds and tf. python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_synthetic_data.yaml ``` +### Synthetic bag to T4 format with images and 2d images annotations + +input: rosbag2 +output: T4 format data + +#### Messages + +| Topic Name | Required | Message Type | +| --------------------------------------------- | -------- | -------------------------------------------------- | +| `/ground_truth/objects` | o | `autoware_auto_perception_msgs/msg/TrackedObjects` | +| `/sensing/camera/camera{ID}/camera_info` | o | `visualization_msgs/msg/MarkerArray` | +| `/sensing/lidar/concatenated/pointcloud` | o | `sensor_msgs/msg/PointCloud2` | +| `/tf` | o | `tf2_msgs/msg/TFMessage` | +| `/tf_static` | o | `tf2_msgs/msg/TFMessage` | +| `/sensing/camera/camera{ID}/image_rect_color` | o | `sensor_msgs/msg/Image` | +| `/sensing/camera/camera{ID}/camera_info` | o | `sensor_msgs/msg/CameraInfo` | + +#### script + +```bash +python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_synthetic_data_camera.yaml --synthetic --generate-bbox-from-cuboid +``` + ### Pseudo-labeled bag to T4 format #### Description diff --git a/perception_dataset/convert.py b/perception_dataset/convert.py index 7175b2a9..01ac22e6 100644 --- a/perception_dataset/convert.py +++ b/perception_dataset/convert.py @@ -2,7 +2,7 @@ import yaml -from perception_dataset.rosbag2.converter_params import Rosbag2ConverterParams +from perception_dataset.rosbag2.converter_params import DataType, Rosbag2ConverterParams from perception_dataset.utils.logger import configure_logger logger = configure_logger(modname=__name__) @@ -25,6 +25,16 @@ def main(): action="store_true", help="do NOT compress rosbag/non-annotated-t4", ) + parser.add_argument( + "--synthetic", + action="store_true", + help="convert synthetic data", + ) + parser.add_argument( + "--generate-bbox-from-cuboid", + action="store_true", + help="generate 2d images annotations from cuboid annotations", + ) args = parser.parse_args() with open(args.config, "r") as f: @@ -118,9 +128,13 @@ def main(): "scene_description": config_dict["description"]["scene"], "overwrite_mode": args.overwrite, **config_dict["conversion"], + "generate_bbox_from_cuboid": args.generate_bbox_from_cuboid, } converter_params = Rosbag2ConverterParams(**param_args) + if args.synthetic: + converter_params.data_type = DataType.SYNTHETIC + converter = Rosbag2ToT4Converter(converter_params) logger.info("[BEGIN] Converting ros2bag output by simulator/autoware --> T4 Format Data") diff --git a/perception_dataset/rosbag2/converter_params.py b/perception_dataset/rosbag2/converter_params.py index 0775514e..77cf1433 100644 --- a/perception_dataset/rosbag2/converter_params.py +++ b/perception_dataset/rosbag2/converter_params.py @@ -42,6 +42,7 @@ class Rosbag2ConverterParams(BaseModel): traffic_light_rois_topic_name: str = "" world_frame_id: str = "map" with_camera: bool = True + generate_bbox_from_cuboid: bool = False # rosbag reader num_load_frames: int # the number of frames to be loaded. if the value isn't positive, read all messages. @@ -51,6 +52,7 @@ class Rosbag2ConverterParams(BaseModel): camera_latency_sec: float = ( 0.0 # camera latency in seconds between the header.stamp and shutter trigger ) + timestamp_diff: float = 0.15 topic_list: list = [] # topic list for input_bag # in synthetic data (from AWSIM) it may be the case that there is no ego transform available at the beginning of rosbag ignore_no_ego_transform_at_rosbag_beginning: bool = False diff --git a/perception_dataset/rosbag2/rosbag2_to_non_annotated_t4_converter.py b/perception_dataset/rosbag2/rosbag2_to_non_annotated_t4_converter.py index 2cbc0c33..716aedc9 100644 --- a/perception_dataset/rosbag2/rosbag2_to_non_annotated_t4_converter.py +++ b/perception_dataset/rosbag2/rosbag2_to_non_annotated_t4_converter.py @@ -11,6 +11,7 @@ import builtin_interfaces.msg import cv2 import numpy as np +from pyquaternion import Quaternion from radar_msgs.msg import RadarTracks from sensor_msgs.msg import CompressedImage, PointCloud2 @@ -21,7 +22,7 @@ SENSOR_MODALITY_ENUM, T4_FORMAT_DIRECTORY_NAME, ) -from perception_dataset.rosbag2.converter_params import Rosbag2ConverterParams +from perception_dataset.rosbag2.converter_params import DataType, Rosbag2ConverterParams from perception_dataset.rosbag2.rosbag2_reader import Rosbag2Reader from perception_dataset.t4_dataset.classes.abstract_class import AbstractTable from perception_dataset.t4_dataset.classes.attribute import AttributeTable @@ -99,6 +100,7 @@ def __init__(self, params: Rosbag2ConverterParams) -> None: self._camera_latency: float = params.camera_latency_sec self._start_timestamp: float = params.start_timestamp_sec self._end_timestamp: float = 0 + self._data_type: DataType = params.data_type self._ignore_no_ego_transform_at_rosbag_beginning: bool = ( params.ignore_no_ego_transform_at_rosbag_beginning ) @@ -113,7 +115,8 @@ def __init__(self, params: Rosbag2ConverterParams) -> None: self._calibrated_sensor_target_frame: str = "base_link" # Note: To determine if there is any message dropout, including a delay tolerance of 10Hz. - self._TIMESTAMP_DIFF = 0.15 + + self._TIMESTAMP_DIFF = params.timestamp_diff self._lidar_sensor: Dict[str, str] = params.lidar_sensor self._radar_sensors: List[Dict[str, str]] = params.radar_sensors @@ -793,7 +796,23 @@ def _generate_calibrated_sensor( camera_distortion=[], ) elif modality == SENSOR_MODALITY_ENUM.CAMERA.value: - cam_info_topic = topic_name.rsplit("/", 2)[0] + "/camera_info" + if self._data_type.value == "synthetic": + # fix of the orientation of camera view + rotation = Quaternion( + rotation["w"], rotation["x"], rotation["y"], rotation["z"] + ) + axes_fix_rotation = Quaternion(0.5, -0.5, 0.5, -0.5) + rotation = rotation * axes_fix_rotation + + rotation = { + "w": rotation.w, + "x": rotation.x, + "y": rotation.y, + "z": rotation.z, + } + + topic_name_splitted = topic_name.split("/") + cam_info_topic = "/".join(topic_name_splitted[:4]) + "/camera_info" info = self._bag_reader.camera_info.get(cam_info_topic) if info is None: continue diff --git a/perception_dataset/rosbag2/rosbag2_to_t4_converter.py b/perception_dataset/rosbag2/rosbag2_to_t4_converter.py index b249d072..41110328 100644 --- a/perception_dataset/rosbag2/rosbag2_to_t4_converter.py +++ b/perception_dataset/rosbag2/rosbag2_to_t4_converter.py @@ -21,6 +21,7 @@ from perception_dataset.t4_dataset.classes.scene import SceneTable from perception_dataset.t4_dataset.classes.sensor import SensorTable from perception_dataset.utils.calculate_num_points import calculate_num_points +from perception_dataset.utils.create_2d_annotations import create_2d_annotations from perception_dataset.utils.logger import configure_logger import perception_dataset.utils.rosbag2 as rosbag2_utils @@ -98,6 +99,7 @@ def __init__(self, params: Rosbag2ConverterParams) -> None: self._calibrated_sensor_target_frame: str = "base_link" self._annotation_files_generator = AnnotationFilesGenerator(with_camera=params.with_camera) + self._generate_bbox_from_cuboid = params.generate_bbox_from_cuboid def _init_tables(self): # vehicle @@ -133,6 +135,10 @@ def convert(self): self._annotation_files_generator.save_tables(self._output_anno_dir) # Calculate and overwrite num_lidar_prs in annotations self._calculate_num_points() + if len(self._camera_sensors) > 0 and self._generate_bbox_from_cuboid is True: + self._create_2d_annotations() + elif len(self._camera_sensors) == 0 and self._generate_bbox_from_cuboid is True: + logger.info("Skipping 2d annotations generation. No camera sensors found.") def _calculate_num_points(self): logger.info("Calculating number of points...") @@ -144,6 +150,22 @@ def _calculate_num_points(self): ) annotation_table.save_json(self._output_anno_dir) + def _create_2d_annotations(self): + logger.info("Creating 2d camera annotations...") + object_ann_table = self._annotation_files_generator._object_ann_table + create_2d_annotations( + self._output_scene_dir, + self._camera_sensors, + self._annotation_files_generator._sample_annotation_table, + self._sample_data_table, + object_ann_table, + self._annotation_files_generator._instance_table, + ) + logger.info( + f"object_ann.json created with {len(object_ann_table._token_to_record)} records" + ) + object_ann_table.save_json(self._output_anno_dir) + def _convert_objects(self, start_timestamp: float): """read object bbox ground truth from rosbag""" start_time_in_time = rosbag2_utils.unix_timestamp_to_stamp(start_timestamp) diff --git a/perception_dataset/utils/create_2d_annotations.py b/perception_dataset/utils/create_2d_annotations.py new file mode 100644 index 00000000..92f22a38 --- /dev/null +++ b/perception_dataset/utils/create_2d_annotations.py @@ -0,0 +1,60 @@ +from typing import List + +import numpy as np +from nuscenes import NuScenes + +from perception_dataset.t4_dataset.classes.instance import InstanceTable +from perception_dataset.t4_dataset.classes.object_ann import ObjectAnnRecord, ObjectAnnTable +from perception_dataset.t4_dataset.classes.sample_annotation import SampleAnnotationTable +from perception_dataset.t4_dataset.classes.sample_data import SampleDataTable + + +def create_2d_annotations( + dataroot: str, + camera_sensor_channels: List[str], + annotation_table: SampleAnnotationTable, + sample_data_table: SampleDataTable, + object_ann_table: ObjectAnnTable, + instance_table: InstanceTable, +): + nusc = NuScenes(version="annotation", dataroot=dataroot, verbose=False) + for sample in nusc.sample: + for camera_sensor in camera_sensor_channels: + camera_channel = camera_sensor["channel"] + camera_token = sample["data"][camera_channel] + _, boxes, camera_intrinsic = nusc.get_sample_data(camera_token) + + # map 3d cuboids to 2d bounding boxes + for box in boxes: + corners = box.corners() + x = corners[0, :] + y = corners[1, :] + z = corners[2, :] + x_y_z = np.array((x, y, z)) + orthographic = np.dot(camera_intrinsic, x_y_z) + perspective_x = orthographic[0] / orthographic[2] + perspective_y = orthographic[1] / orthographic[2] + + min_x = int(np.min(perspective_x)) + max_x = int(np.max(perspective_x)) + min_y = int(np.min(perspective_y)) + max_y = int(np.max(perspective_y)) + + sample_annotation_record = annotation_table._token_to_record[box.token] + instance_token = sample_annotation_record._instance_token + attribute_tokens = sample_annotation_record._attribute_tokens + category_token = instance_table._token_to_record[instance_token]._category_token + imsize = [ + sample_data_table._token_to_record[camera_token].width, + sample_data_table._token_to_record[camera_token].height, + ] + + object_ann_record = ObjectAnnRecord( + sample_data_token=camera_token, + instance_token=instance_token, + category_token=category_token, + attribute_tokens=attribute_tokens, + bbox=[min_x, min_y, max_x, max_y], + mask={"size": imsize}, + ) + object_ann_table._token_to_record[object_ann_record.token] = object_ann_record diff --git a/perception_dataset/utils/rosbag2.py b/perception_dataset/utils/rosbag2.py index 5b0c3191..0fc6bbda 100644 --- a/perception_dataset/utils/rosbag2.py +++ b/perception_dataset/utils/rosbag2.py @@ -162,11 +162,23 @@ def radar_tracks_msg_to_list(radar_tracks_msg: RadarTracks) -> List[Dict[str, An return radar_tracks -def compressed_msg_to_numpy(compressed_image_msg: CompressedImage) -> NDArray: - image_buf = np.ndarray( - shape=(1, len(compressed_image_msg.data)), dtype=np.uint8, buffer=compressed_image_msg.data - ) - image = cv2.imdecode(image_buf, cv2.IMREAD_ANYCOLOR) +def compressed_msg_to_numpy(compressed_image_msg: CompressedImage) -> np.ndarray: + if hasattr(compressed_image_msg, "_encoding"): + try: + np_arr = np.frombuffer(compressed_image_msg.data, np.uint8) + image = np.reshape( + np_arr, (compressed_image_msg.height, compressed_image_msg.width, 3) + ) + except Exception as e: + print(e) + return None + else: + image_buf = np.ndarray( + shape=(1, len(compressed_image_msg.data)), + dtype=np.uint8, + buffer=compressed_image_msg.data, + ) + image = cv2.imdecode(image_buf, cv2.IMREAD_ANYCOLOR) return image