tier4 · ktro2828 · May 28, 2024 · May 9, 2023 · May 16, 2023 · Jun 27, 2023
diff --git a/perception_eval/perception_eval/common/dataset.py b/perception_eval/perception_eval/common/dataset.py
@@ -32,7 +32,6 @@
 from perception_eval.common.geometry import interpolate_homogeneous_matrix
 from perception_eval.common.geometry import interpolate_object_list
 from perception_eval.common.label import LabelConverter
-from perception_eval.common.object import DynamicObject
 from perception_eval.common.schema import FrameID
 from perception_eval.common.transform import HomogeneousMatrix
 from perception_eval.common.transform import TransformDict
@@ -48,15 +47,15 @@ def __init__(
         self,
         unix_time: int,
         frame_name: str,
-        objects: List[DynamicObject],
+        objects: List[ObjectType],
         transforms: TransformDictArgType = None,
         raw_data: Optional[Dict[FrameID, NDArray]] = None,
     ) -> None:
         """
         Args:
             unix_time (int): Current unix time.
             frame_name (str): The file name
-            objects (list[DynamicObject]): Ground truth objects.
+            objects (list[ObjectType]): Ground truth objects.
             transforms (TransformDict | None, optional): 4x4 transform matrices. Defaults to None.
             raw_data (dict[FrameID, NDArray] | None, optional): Raw data for each sensor. Defaults to None.
         """

diff --git a/perception_eval/perception_eval/common/dataset_utils.py b/perception_eval/perception_eval/common/dataset_utils.py
@@ -251,6 +251,7 @@ def _get_sample_boxes(nusc: NuScenes, sample_data_token: str, frame_id: FrameID)
 
 def _get_transforms(nusc: NuScenes, sample_data_token: str) -> List[HomogeneousMatrix]:
     """Load transform matrices.
+    Additionally, for traffic light cameras, add transforms from BASE_LINK to TRAFFIC_LIGHT.
 
     Args:
         nusc (NuScenes): NuScenes instance.
@@ -267,6 +268,8 @@ def _get_transforms(nusc: NuScenes, sample_data_token: str) -> List[HomogeneousM
     ego2map = HomogeneousMatrix(ego_position, ego_rotation, src=FrameID.BASE_LINK, dst=FrameID.MAP)
 
     matrices = [ego2map]
+    tlr_avg_pos: List[NDArray] = []
+    tlr_avg_quat: List[Quaternion] = []
     for cs_record in nusc.calibrated_sensor:
         sensor_position = cs_record["translation"]
         sensor_rotation = Quaternion(cs_record["rotation"])
@@ -275,6 +278,18 @@ def _get_transforms(nusc: NuScenes, sample_data_token: str) -> List[HomogeneousM
         sensor2ego = HomogeneousMatrix(sensor_position, sensor_rotation, src=sensor_frame_id, dst=FrameID.BASE_LINK)
         sensor2map = ego2map.dot(sensor2ego)
         matrices.extend((sensor2ego, sensor2map))
+        if "CAM_TRAFFIC_LIGHT" in sensor_frame_id.value.upper():
+            tlr_avg_pos.append(sensor_position)
+            tlr_avg_quat.append(sensor_rotation)
+
+    # NOTE: Average positions and rotations are used for matrices of cameras related to TLR.
+    if len(tlr_avg_pos) > 0 and len(tlr_avg_quat) > 0:
+        tlr_cam_pos: NDArray = np.mean(tlr_avg_pos, axis=0)
+        tlr_cam_rot: Quaternion = sum(tlr_avg_quat) / sum(tlr_avg_quat).norm
+        tlr2ego = HomogeneousMatrix(tlr_cam_pos, tlr_cam_rot, src=FrameID.TRAFFIC_LIGHT, dst=FrameID.BASE_LINK)
+        tlr2map = ego2map.dot(tlr2ego)
+        matrices.extend((tlr2ego, tlr2map))
+
     return matrices
 
 
@@ -477,19 +492,31 @@ def _sample_to_frame_2d(
 
     sample_data_tokens: List[str] = []
     frame_id_mapping: Dict[str, FrameID] = {}
-    raw_data: Optional[Dict[FrameID, np.ndarray]] = {} if load_raw_data else None
-    for frame_id in frame_ids:
-        camera_type: str = frame_id.value.upper()
+    transforms = None
+    for frame_id_ in frame_ids:
+        camera_type: str = frame_id_.value.upper()
+        if nusc_sample["data"].get(camera_type) is None:
+            continue
         sample_data_token = nusc_sample["data"][camera_type]
         sample_data_tokens.append(sample_data_token)
-        frame_id_mapping[sample_data_token] = frame_id
-    raw_data = _load_raw_data(nusc, sample_token)
+        frame_id_mapping[sample_data_token] = frame_id_
+
+        sd_record = nusc.get("sample_data", sample_data_token)
+        if sd_record["is_key_frame"]:
+            transforms = _get_transforms(nusc, sample_data_token)
+
+    if load_raw_data:
+        raw_data = _load_raw_data(nusc, sample_token)
+    else:
+        raw_data = None
 
     object_annotations: List[Dict[str, Any]] = [
         ann for ann in nuim.object_ann if ann["sample_data_token"] in sample_data_tokens
     ]
 
     objects_: List[DynamicObject2D] = []
+    # used to merge multiple traffic lights with same regulatory element ID
+    uuids: List[str] = []
     for ann in object_annotations:
         if evaluation_task in (EvaluationTask.DETECTION2D, EvaluationTask.TRACKING2D):
             bbox: List[float] = ann["bbox"]
@@ -507,31 +534,82 @@ def _sample_to_frame_2d(
         semantic_label: LabelType = label_converter.convert_label(category_info["name"], attributes)
 
         if label_converter.label_type == TrafficLightLabel:
+            # NOTE: Check whether Regulatory Element is used
+            # in scene.json => description: "TLR, regulatory_element"
             for instance_record in nusc.instance:
                 if instance_record["token"] == ann["instance_token"]:
                     instance_name: str = instance_record["instance_name"]
                     uuid: str = instance_name.split(":")[-1]
+                    break
+            uuids.append(uuid)
         else:
             uuid: str = ann["instance_token"]
 
-        visibility = None
-
         object_: DynamicObject2D = DynamicObject2D(
             unix_time=unix_time,
             frame_id=frame_id_mapping[ann["sample_data_token"]],
             semantic_score=1.0,
             semantic_label=semantic_label,
             roi=roi,
             uuid=uuid,
-            visibility=visibility,
+            visibility=None,
         )
         objects_.append(object_)
 
+    if label_converter.label_type == TrafficLightLabel and evaluation_task == EvaluationTask.CLASSIFICATION2D:
+        objects_ = _merge_duplicated_traffic_lights(unix_time, objects_, uuids)
+
     frame = dataset.FrameGroundTruth(
         unix_time=unix_time,
         frame_name=frame_name,
         objects=objects_,
+        transforms=transforms,
         raw_data=raw_data,
     )
 
     return frame
+
+
+def _merge_duplicated_traffic_lights(
+    unix_time: int,
+    objects: List[DynamicObject2D],
+    uuids: List[str],
+) -> List[DynamicObject2D]:
+    """Merge traffic light objects which have same uuids and set its frame id as `FrameID.TRAFFIC_LIGHT`.
+
+    Args:
+        unix_time (int): Current unix timestamp.
+        objects (List[DynamicObject2D]): List of traffic light objects.
+            It can contain the multiple traffic lights with same uuid.
+        uuids (List[str]): List of uuids.
+
+    Returns:
+        List[DynamicObject2D]: List of merged results.
+    """
+    uuids = set(uuids)
+    ret_objects: List[DynamicObject2D] = []
+    for uuid in uuids:
+        candidates = [obj for obj in objects if obj.uuid == uuid]
+        candidate_labels = [obj.semantic_label for obj in candidates]
+        if all([label == candidate_labels[0] for label in candidate_labels]):
+            # all unknown or not unknown
+            semantic_label = candidate_labels[0]
+        else:
+            unique_labels = set([obj.semantic_label.label for obj in candidates])
+            assert len(unique_labels) == 2, (
+                "If the same regulatory element ID is assigned to multiple traffic lights, "
+                f"it must annotated with only two labels: (unknown, another one). But got, {unique_labels}"
+            )
+            semantic_label = [label for label in candidate_labels if label.label != TrafficLightLabel.UNKNOWN][0]
+            assert semantic_label.label != TrafficLightLabel.UNKNOWN
+        merged_object = DynamicObject2D(
+            unix_time=unix_time,
+            frame_id=FrameID.TRAFFIC_LIGHT,
+            semantic_score=1.0,
+            semantic_label=semantic_label,
+            roi=None,
+            uuid=uuid,
+            visibility=None,
+        )
+        ret_objects.append(merged_object)
+    return ret_objects
diff --git a/perception_eval/perception_eval/common/label.py b/perception_eval/perception_eval/common/label.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# cspell: ignore leftdiagonal, rightdiagonal
+
 from __future__ import annotations
 
 from dataclasses import dataclass
@@ -52,17 +54,27 @@ class TrafficLightLabel(Enum):
     TRAFFIC_LIGHT = "traffic_light"
 
     # classification
+    # === for vehicle ===
     GREEN = "green"
-    RED = "red"
+    GREEN_STRAIGHT = "green_straight"
+    GREEN_LEFT = "green_left"
+    GREEN_RIGHT = "green_right"
     YELLOW = "yellow"
+    YELLOW_STRAIGHT = "yellow_straight"
+    YELLOW_LEFT = "yellow_left"
+    YELLOW_RIGHT = "yellow_right"
+    YELLOW_STRAIGHT_LEFT = "yellow_straight_left"
+    YELLOW_STRAIGHT_RIGHT = "yellow_straight_right"
+    YELLOW_STRAIGHT_LEFT_RIGHT = "yellow_straight_left_right"
+    RED = "red"
     RED_STRAIGHT = "red_straight"
     RED_LEFT = "red_left"
-    RED_LEFT_STRAIGHT = "red_left_straight"
-    RED_LEFT_DIAGONAL = "red_left_diagonal"
     RED_RIGHT = "red_right"
-    RED_RIGHT_STRAIGHT = "red_right_straight"
+    RED_STRAIGHT_LEFT = "red_straight_left"
+    RED_STRAIGHT_RIGHT = "red_straight_right"
+    RED_STRAIGHT_LEFT_RIGHT = "red_straight_left_right"
+    RED_LEFT_DIAGONAL = "red_left_diagonal"
     RED_RIGHT_DIAGONAL = "red_right_diagonal"
-    YELLOW_RIGHT = "yellow_right"
 
     # unknown is used in both detection and classification
     UNKNOWN = "unknown"
@@ -344,34 +356,60 @@ def _get_traffic_light_paris(
     if evaluation_task == EvaluationTask.CLASSIFICATION2D:
         pair_list: List[Tuple[TrafficLightLabel, str]] = [
             (TrafficLightLabel.GREEN, "green"),
-            (TrafficLightLabel.RED, "red"),
+            (TrafficLightLabel.GREEN_STRAIGHT, "green_straight"),
+            (TrafficLightLabel.GREEN_LEFT, "green_left"),
+            (TrafficLightLabel.GREEN_RIGHT, "green_right"),
             (TrafficLightLabel.YELLOW, "yellow"),
+            (TrafficLightLabel.YELLOW_STRAIGHT, "yellow_straight"),
+            (TrafficLightLabel.YELLOW_LEFT, "yellow_left"),
+            (TrafficLightLabel.YELLOW_RIGHT, "yellow_right"),
+            (TrafficLightLabel.YELLOW_STRAIGHT_LEFT, "yellow_straight_left"),
+            (TrafficLightLabel.YELLOW_STRAIGHT_LEFT_RIGHT, "yellow_straight_right"),
+            (TrafficLightLabel.RED, "red"),
             (TrafficLightLabel.RED_STRAIGHT, "red_straight"),
             (TrafficLightLabel.RED_LEFT, "red_left"),
-            (TrafficLightLabel.RED_LEFT_STRAIGHT, "red_left_straight"),
-            (TrafficLightLabel.RED_LEFT_DIAGONAL, "red_left_diagonal"),
             (TrafficLightLabel.RED_RIGHT, "red_right"),
-            (TrafficLightLabel.RED_RIGHT_STRAIGHT, "red_right_straight"),
+            (TrafficLightLabel.RED_STRAIGHT_LEFT, "red_straight_left"),
+            (TrafficLightLabel.RED_STRAIGHT_RIGHT, "red_straight_right"),
+            (TrafficLightLabel.RED_STRAIGHT_LEFT_RIGHT, "red_straight_left_right"),
             (TrafficLightLabel.RED_RIGHT_DIAGONAL, "red_right_diagonal"),
-            (TrafficLightLabel.YELLOW_RIGHT, "yellow_right"),
+            (TrafficLightLabel.RED_RIGHT_DIAGONAL, "red_rightdiagonal"),
+            (TrafficLightLabel.RED_LEFT_DIAGONAL, "red_left_diagonal"),
+            (TrafficLightLabel.RED_LEFT_DIAGONAL, "red_leftdiagonal"),
             (TrafficLightLabel.UNKNOWN, "unknown"),
+            (TrafficLightLabel.RED, "crosswalk_red"),
+            (TrafficLightLabel.GREEN, "crosswalk_green"),
+            (TrafficLightLabel.UNKNOWN, "crosswalk_unknown"),
             (TrafficLightLabel.FP, "false_positive"),
         ]
     else:
         pair_list: List[Tuple[TrafficLightLabel, str]] = [
             (TrafficLightLabel.TRAFFIC_LIGHT, "traffic_light"),
             (TrafficLightLabel.TRAFFIC_LIGHT, "green"),
-            (TrafficLightLabel.TRAFFIC_LIGHT, "red"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "green_straight"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "green_left"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "green_right"),
             (TrafficLightLabel.TRAFFIC_LIGHT, "yellow"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "yellow_straight"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "yellow_left"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "yellow_right"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "yellow_straight_left"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "yellow_straight_right"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "red"),
             (TrafficLightLabel.TRAFFIC_LIGHT, "red_straight"),
             (TrafficLightLabel.TRAFFIC_LIGHT, "red_left"),
-            (TrafficLightLabel.TRAFFIC_LIGHT, "red_left_straight"),
-            (TrafficLightLabel.TRAFFIC_LIGHT, "red_left_diagonal"),
             (TrafficLightLabel.TRAFFIC_LIGHT, "red_right"),
-            (TrafficLightLabel.TRAFFIC_LIGHT, "red_right_straight"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "red_straight_left"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "red_straight_right"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "red_straight_left_right"),
             (TrafficLightLabel.TRAFFIC_LIGHT, "red_right_diagonal"),
-            (TrafficLightLabel.TRAFFIC_LIGHT, "yellow_right"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "red_rightdiagonal"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "red_left_diagonal"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "red_leftdiagonal"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "crosswalk_red"),
+            (TrafficLightLabel.TRAFFIC_LIGHT, "crosswalk_green"),
             (TrafficLightLabel.UNKNOWN, "unknown"),
+            (TrafficLightLabel.UNKNOWN, "crosswalk_unknown"),
             (TrafficLightLabel.FP, "false_positive"),
         ]
     return pair_list

diff --git a/perception_eval/perception_eval/common/object.py b/perception_eval/perception_eval/common/object.py
@@ -50,27 +50,27 @@ class ObjectState:
 
     def __init__(
         self,
-        position: Tuple[float, float, float],
-        orientation: Quaternion,
-        shape: Shape,
+        position: Optional[Tuple[float, float, float]],
+        orientation: Optional[Quaternion],
+        shape: Optional[Shape],
         velocity: Optional[Tuple[float, float, float]],
     ) -> None:
-        self.position: Tuple[float, float, float] = position
-        self.orientation: Quaternion = orientation
-        self.shape: Shape = shape
-        self.velocity: Optional[Tuple[float, float, float]] = velocity
+        self.position = position
+        self.orientation = orientation
+        self.shape = shape
+        self.velocity = velocity
 
     @property
-    def shape_type(self) -> ShapeType:
-        return self.shape.type
+    def shape_type(self) -> Optional[ShapeType]:
+        return self.shape.type if self.shape is not None else None
 
     @property
     def size(self) -> Tuple[float, float, float]:
-        return self.shape.size
+        return self.shape.size if self.shape is not None else None
 
     @property
     def footprint(self) -> Polygon:
-        return self.shape.footprint
+        return self.shape.footprint if self.shape is not None else None
 
 
 class DynamicObject: