Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: camera images and 2d annotations generation #24

Merged
merged 10 commits into from
Nov 8, 2023
23 changes: 23 additions & 0 deletions config/rosbag2_to_t4/convert_synthetic_data_camera.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
task: convert_rosbag2_to_t4
description:
scene: synthetic
conversion:
# path to rosbag dir output by simulator
input_base: .data/odaiba_synthetic_with_images/rosbags
output_base: .data/odaiba_synthetic_with_images/t4dataset
workers_number: 1
skip_timestamp: 2.0
num_load_frames: 50
crop_frames_unit: 1 # crop frames from the end so that the number of frames is divisible by crop_frames_unit. Set to 0 or 1 so as not to crop any frames.
camera_latency_sec: 0.00
object_topic_name: /ground_truth/objects
object_msg_type: TrackedObjects
world_frame_id: world
lidar_sensor:
topic: /lidar/concatenated/pointcloud
channel: LIDAR_CONCAT
camera_sensors: #Keep the same order as each camera exposure timing
- topic: /sensing/camera/camera4/image_rect_color
channel: CAM_FRONT
- topic: /sensing/camera/camera5/image_rect_color
channel: CAM_BACK
23 changes: 23 additions & 0 deletions docs/tools_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,29 @@ Synthetic bag must contain ground truth objects, pointclouds and tf.
python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_synthetic_data.yaml
```

### Synthetic bag to T4 format with images and 2d images annotations

input: rosbag2
output: T4 format data

#### Messages

| Topic Name | Required | Message Type |
| --------------------------------------------- | -------- | -------------------------------------------------- |
| `/ground_truth/objects` | o | `autoware_auto_perception_msgs/msg/TrackedObjects` |
| `/sensing/camera/camera{ID}/camera_info` | o | `visualization_msgs/msg/MarkerArray` |
| `/sensing/lidar/concatenated/pointcloud` | o | `sensor_msgs/msg/PointCloud2` |
| `/tf` | o | `tf2_msgs/msg/TFMessage` |
| `/tf_static` | o | `tf2_msgs/msg/TFMessage` |
| `/sensing/camera/camera{ID}/image_rect_color` | o | `sensor_msgs/msg/Image` |
| `/sensing/camera/camera{ID}/camera_info` | o | `sensor_msgs/msg/CameraInfo` |

#### script

```bash
python -m perception_dataset.convert --config config/rosbag2_to_t4/convert_synthetic_data_camera.yaml --synthetic --generate-bbox-from-cuboid
```

### Pseudo-labeled bag to T4 format

#### Description
Expand Down
16 changes: 15 additions & 1 deletion perception_dataset/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import yaml

from perception_dataset.rosbag2.converter_params import Rosbag2ConverterParams
from perception_dataset.rosbag2.converter_params import DataType, Rosbag2ConverterParams
from perception_dataset.utils.logger import configure_logger

logger = configure_logger(modname=__name__)
Expand All @@ -25,6 +25,16 @@ def main():
action="store_true",
help="do NOT compress rosbag/non-annotated-t4",
)
parser.add_argument(
"--synthetic",
action="store_true",
help="convert synthetic data",
)
parser.add_argument(
"--generate-bbox-from-cuboid",
action="store_true",
help="generate 2d images annotations from cuboid annotations",
)
args = parser.parse_args()

with open(args.config, "r") as f:
Expand Down Expand Up @@ -118,9 +128,13 @@ def main():
"scene_description": config_dict["description"]["scene"],
"overwrite_mode": args.overwrite,
**config_dict["conversion"],
"generate_bbox_from_cuboid": args.generate_bbox_from_cuboid,
}
converter_params = Rosbag2ConverterParams(**param_args)

if args.synthetic:
converter_params.data_type = DataType.SYNTHETIC

converter = Rosbag2ToT4Converter(converter_params)

logger.info("[BEGIN] Converting ros2bag output by simulator/autoware --> T4 Format Data")
Expand Down
1 change: 1 addition & 0 deletions perception_dataset/rosbag2/converter_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Rosbag2ConverterParams(BaseModel):
traffic_light_rois_topic_name: str = ""
world_frame_id: str = "map"
with_camera: bool = True
generate_bbox_from_cuboid: bool = False

# rosbag reader
num_load_frames: int # the number of frames to be loaded. if the value isn't positive, read all messages.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import builtin_interfaces.msg
import cv2
import numpy as np
from pyquaternion import Quaternion
from radar_msgs.msg import RadarTracks
from sensor_msgs.msg import CompressedImage, PointCloud2

Expand All @@ -21,7 +22,7 @@
SENSOR_MODALITY_ENUM,
T4_FORMAT_DIRECTORY_NAME,
)
from perception_dataset.rosbag2.converter_params import Rosbag2ConverterParams
from perception_dataset.rosbag2.converter_params import DataType, Rosbag2ConverterParams
from perception_dataset.rosbag2.rosbag2_reader import Rosbag2Reader
from perception_dataset.t4_dataset.classes.abstract_class import AbstractTable
from perception_dataset.t4_dataset.classes.attribute import AttributeTable
Expand Down Expand Up @@ -99,6 +100,7 @@ def __init__(self, params: Rosbag2ConverterParams) -> None:
self._camera_latency: float = params.camera_latency_sec
self._start_timestamp: float = params.start_timestamp_sec
self._end_timestamp: float = 0
self._data_type: DataType = params.data_type
self._ignore_no_ego_transform_at_rosbag_beginning: bool = (
params.ignore_no_ego_transform_at_rosbag_beginning
)
Expand All @@ -113,7 +115,9 @@ def __init__(self, params: Rosbag2ConverterParams) -> None:
self._calibrated_sensor_target_frame: str = "base_link"

# Note: To determine if there is any message dropout, including a delay tolerance of 10Hz.
self._TIMESTAMP_DIFF = 0.15
# changed from 0.15 to 2.0 - a workaround of issue with Unity's step execution
# #TODO (mkotynia) should be decreased when the issue with Unity is fixed
self._TIMESTAMP_DIFF = 2.0
miursh marked this conversation as resolved.
Show resolved Hide resolved

self._lidar_sensor: Dict[str, str] = params.lidar_sensor
self._radar_sensors: List[Dict[str, str]] = params.radar_sensors
Expand Down Expand Up @@ -793,7 +797,23 @@ def _generate_calibrated_sensor(
camera_distortion=[],
)
elif modality == SENSOR_MODALITY_ENUM.CAMERA.value:
cam_info_topic = topic_name.rsplit("/", 2)[0] + "/camera_info"
if self._data_type.value == "synthetic":
# fix of the orientation of camera view
rotation = Quaternion(
rotation["w"], rotation["x"], rotation["y"], rotation["z"]
)
axes_fix_rotation = Quaternion(0.5, -0.5, 0.5, -0.5)
rotation = rotation * axes_fix_rotation

rotation = {
"w": rotation.w,
"x": rotation.x,
"y": rotation.y,
"z": rotation.z,
}
yukke42 marked this conversation as resolved.
Show resolved Hide resolved

topic_name_splitted = topic_name.split("/")
cam_info_topic = "/".join(topic_name_splitted[:4]) + "/camera_info"
info = self._bag_reader.camera_info.get(cam_info_topic)
if info is None:
continue
Expand Down
22 changes: 22 additions & 0 deletions perception_dataset/rosbag2/rosbag2_to_t4_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from perception_dataset.t4_dataset.classes.scene import SceneTable
from perception_dataset.t4_dataset.classes.sensor import SensorTable
from perception_dataset.utils.calculate_num_points import calculate_num_points
from perception_dataset.utils.create_2d_annotations import create_2d_annotations
from perception_dataset.utils.logger import configure_logger
import perception_dataset.utils.rosbag2 as rosbag2_utils

Expand Down Expand Up @@ -98,6 +99,7 @@ def __init__(self, params: Rosbag2ConverterParams) -> None:
self._calibrated_sensor_target_frame: str = "base_link"

self._annotation_files_generator = AnnotationFilesGenerator(with_camera=params.with_camera)
self._generate_bbox_from_cuboid = params.generate_bbox_from_cuboid

def _init_tables(self):
# vehicle
Expand Down Expand Up @@ -133,6 +135,10 @@ def convert(self):
self._annotation_files_generator.save_tables(self._output_anno_dir)
# Calculate and overwrite num_lidar_prs in annotations
self._calculate_num_points()
if len(self._camera_sensors) > 0 and self._generate_bbox_from_cuboid is True:
self._create_2d_annotations()
elif len(self._camera_sensors) == 0 and self._generate_bbox_from_cuboid is True:
logger.info("Skipping 2d annotations generation. No camera sensors found.")

def _calculate_num_points(self):
logger.info("Calculating number of points...")
Expand All @@ -144,6 +150,22 @@ def _calculate_num_points(self):
)
annotation_table.save_json(self._output_anno_dir)

def _create_2d_annotations(self):
logger.info("Creating 2d camera annotations...")
object_ann_table = self._annotation_files_generator._object_ann_table
create_2d_annotations(
self._output_scene_dir,
self._camera_sensors,
self._annotation_files_generator._sample_annotation_table,
self._sample_data_table,
object_ann_table,
self._annotation_files_generator._instance_table,
)
logger.info(
f"object_ann.json created with {len(object_ann_table._token_to_record)} records"
)
object_ann_table.save_json(self._output_anno_dir)

def _convert_objects(self, start_timestamp: float):
"""read object bbox ground truth from rosbag"""
start_time_in_time = rosbag2_utils.unix_timestamp_to_stamp(start_timestamp)
Expand Down
60 changes: 60 additions & 0 deletions perception_dataset/utils/create_2d_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from typing import List

import numpy as np
from nuscenes import NuScenes

from perception_dataset.t4_dataset.classes.instance import InstanceTable
from perception_dataset.t4_dataset.classes.object_ann import ObjectAnnRecord, ObjectAnnTable
from perception_dataset.t4_dataset.classes.sample_annotation import SampleAnnotationTable
from perception_dataset.t4_dataset.classes.sample_data import SampleDataTable


def create_2d_annotations(
dataroot: str,
camera_sensor_channels: List[str],
annotation_table: SampleAnnotationTable,
sample_data_table: SampleDataTable,
object_ann_table: ObjectAnnTable,
instance_table: InstanceTable,
):
nusc = NuScenes(version="annotation", dataroot=dataroot, verbose=False)
for sample in nusc.sample:
for camera_sensor in camera_sensor_channels:
camera_channel = camera_sensor["channel"]
camera_token = sample["data"][camera_channel]
_, boxes, camera_intrinsic = nusc.get_sample_data(camera_token)

# map 3d cuboids to 2d bounding boxes
for box in boxes:
corners = box.corners()
x = corners[0, :]
y = corners[1, :]
z = corners[2, :]
x_y_z = np.array((x, y, z))
orthographic = np.dot(camera_intrinsic, x_y_z)
perspective_x = orthographic[0] / orthographic[2]
perspective_y = orthographic[1] / orthographic[2]

min_x = int(np.min(perspective_x))
max_x = int(np.max(perspective_x))
min_y = int(np.min(perspective_y))
max_y = int(np.max(perspective_y))

sample_annotation_record = annotation_table._token_to_record[box.token]
instance_token = sample_annotation_record._instance_token
attribute_tokens = sample_annotation_record._attribute_tokens
category_token = instance_table._token_to_record[instance_token]._category_token
imsize = [
sample_data_table._token_to_record[camera_token].width,
sample_data_table._token_to_record[camera_token].height,
]

object_ann_record = ObjectAnnRecord(
sample_data_token=camera_token,
instance_token=instance_token,
category_token=category_token,
attribute_tokens=attribute_tokens,
bbox=[min_x, min_y, max_x, max_y],
mask={"size": imsize},
)
object_ann_table._token_to_record[object_ann_record.token] = object_ann_record
22 changes: 17 additions & 5 deletions perception_dataset/utils/rosbag2.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,23 @@ def radar_tracks_msg_to_list(radar_tracks_msg: RadarTracks) -> List[Dict[str, An
return radar_tracks


def compressed_msg_to_numpy(compressed_image_msg: CompressedImage) -> NDArray:
image_buf = np.ndarray(
shape=(1, len(compressed_image_msg.data)), dtype=np.uint8, buffer=compressed_image_msg.data
)
image = cv2.imdecode(image_buf, cv2.IMREAD_ANYCOLOR)
def compressed_msg_to_numpy(compressed_image_msg: CompressedImage) -> np.ndarray:
if hasattr(compressed_image_msg, "_encoding"):
try:
np_arr = np.frombuffer(compressed_image_msg.data, np.uint8)
image = np.reshape(
np_arr, (compressed_image_msg.height, compressed_image_msg.width, 3)
)
except Exception as e:
print(e)
return None
else:
image_buf = np.ndarray(
shape=(1, len(compressed_image_msg.data)),
dtype=np.uint8,
buffer=compressed_image_msg.data,
)
image = cv2.imdecode(image_buf, cv2.IMREAD_ANYCOLOR)
return image


Expand Down