WATonomous · Gongsta · Mar 8, 2024 · Feb 21, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/docker/perception/traffic_light_detection/traffic_light_detection.Dockerfile b/docker/perception/traffic_light_detection/traffic_light_detection.Dockerfile
diff --git a/modules/dev_overrides/docker-compose.perception.yaml b/modules/dev_overrides/docker-compose.perception.yaml
@@ -32,15 +32,6 @@ services:
     volumes:
       - ${MONO_DIR}/src/perception/lidar_object_detection:/home/bolty/ament_ws/src/lidar_object_detection
 
-  traffic_light_detection:
-    <<: *fixuid
-    extends:
-      file: ../docker-compose.perception.yaml
-      service: traffic_light_detection
-    command: tail -F anything
-    volumes:
-      - ${MONO_DIR}/src/perception/traffic_light_detection:/home/bolty/ament_ws/src/traffic_light_detection
-
   traffic_sign_detection:
     <<: *fixuid
     extends:

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
@@ -31,6 +31,7 @@ services:
     command: /bin/bash -c "ros2 launch camera_object_detection eve_launch.py"
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8s.pt:/perception_models/yolov8s.pt
+      - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
 
   lidar_object_detection:
     build:
@@ -42,16 +43,6 @@ services:
       target: deploy
     image: "${PERCEPTION_LIDAR_OBJECT_DETECTION_IMAGE}:${TAG}"
     command: /bin/bash -c "ros2 launch lidar_object_detection lidar_object_detection.launch.py"
-  traffic_light_detection:
-    build:
-      context: ..
-      dockerfile: docker/perception/traffic_light_detection/traffic_light_detection.Dockerfile
-      cache_from:
-        - "${PERCEPTION_TRAFFIC_LIGHT_DETECTION_IMAGE}:${TAG}"
-        - "${PERCEPTION_TRAFFIC_LIGHT_DETECTION_IMAGE}:main"
-      target: deploy
-    image: "${PERCEPTION_TRAFFIC_LIGHT_DETECTION_IMAGE}:${TAG}"
-    command: /bin/bash -c "ros2 launch traffic_light_detection traffic_light_detection.launch.py"
 
   traffic_sign_detection:
     build:

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -1,5 +1,6 @@
 import rclpy
 from rclpy.node import Node
+import os
 
 from sensor_msgs.msg import Image, CompressedImage
 from vision_msgs.msg import (
@@ -9,7 +10,7 @@
 )
 
 from ultralytics.nn.autobackend import AutoBackend
-from ultralytics.data.augment import LetterBox
+from ultralytics.data.augment import LetterBox, CenterCrop
 from ultralytics.utils.ops import non_max_suppression
 from ultralytics.utils.plotting import Annotator, colors
 
@@ -35,8 +36,10 @@ def __init__(self):
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("publish_detection_topic", "/detections")
         self.declare_parameter("model_path", "/perception_models/yolov8s.pt")
-        self.declare_parameter("image_size", 480)
+        self.declare_parameter("image_size", 1024)
         self.declare_parameter("compressed", False)
+        self.declare_parameter("crop_mode", "LetterBox")
+        self.declare_parameter("save_detections", False)
 
         self.camera_topic = self.get_parameter("camera_topic").value
         self.publish_vis_topic = self.get_parameter("publish_vis_topic").value
@@ -45,10 +48,15 @@ def __init__(self):
         self.model_path = self.get_parameter("model_path").value
         self.image_size = self.get_parameter("image_size").value
         self.compressed = self.get_parameter("compressed").value
+        self.crop_mode = self.get_parameter("crop_mode").value
+        self.save_detections = bool(self.get_parameter("save_detections").value)
+        self.counter = 0 # For saving detections
+        if self.save_detections:
+            if not os.path.exists("detections"):
+                os.makedirs("detections")
 
         self.line_thickness = 1
         self.half = False
-        self.augment = False
 
         self.subscription = self.create_subscription(
             Image if not self.compressed else CompressedImage,
@@ -60,6 +68,9 @@ def __init__(self):
                 depth=10,
             ),
         )
+
+        self.orig_image_width = None
+        self.orig_image_height = None
 
         # set device
         self.device = torch.device(
@@ -90,7 +101,51 @@ def __init__(self):
         self.get_logger().info(
             f"Successfully created node listening on camera topic: {self.camera_topic}...")
 
-    def preprocess_image(self, cv_image):
+    def crop_image(self, cv_image):
+        if self.crop_mode == "LetterBox":
+            img = LetterBox(self.image_size, stride=self.stride)(
+                image=cv_image)
+        elif self.crop_mode == "CenterCrop":
+            img = CenterCrop(self.image_size)(cv_image)
+        else:
+            raise Exception(
+                "Invalid crop mode, please choose either 'LetterBox' or 'CenterCrop'!")
+
+        return img
+
+    def convert_bboxes_to_orig_frame(self, bbox):
+        """
+        Converts bounding box coordinates from the scaled image frame back to the original image frame.
+
+        This function takes into account the original image dimensions and the scaling method used 
+        (either "LetterBox" or "CenterCrop") to accurately map the bounding box coordinates back to 
+        their original positions in the original image.
+
+        Parameters:
+        bbox (list): A list containing the bounding box coordinates in the format [x1, y1, w1, h1] 
+                    in the scaled image frame.
+
+        Returns:
+        list: A list containing the bounding box coordinates in the format [x1, y1, w1, h1] 
+            in the original image frame.
+
+        """
+        width_scale = self.orig_image_width / self.image_size
+        height_scale = self.orig_image_height / self.image_size
+        if self.crop_mode == "LetterBox":
+            translation = (self.image_size - self.orig_image_height / width_scale) / 2
+            return [bbox[0] * width_scale,
+                    (bbox[1] - translation) * width_scale,
+                    bbox[2] * width_scale,
+                    bbox[3] * width_scale]
+        elif self.crop_mode == "CenterCrop":
+            translation = (self.orig_image_width / height_scale - self.image_size) / 2
+            return [(bbox[0] + translation) * height_scale,
+                    bbox[1] * height_scale,
+                    bbox[2] * height_scale,
+                    bbox[3] * height_scale]
+
+    def crop_and_convert_to_tensor(self, cv_image):
         """
         Preprocess the image by resizing, padding and rearranging the dimensions.
 
@@ -100,9 +155,7 @@ def preprocess_image(self, cv_image):
         Returns:
             torch.Tensor image for model input of shape (1,3,w,h)
         """
-        # Padded resize
-        img = cv_image
-        img = LetterBox(self.image_size, stride=self.stride)(image=cv_image)
+        img = self.crop_image(cv_image)
 
         # Convert
         img = img.transpose(2, 0, 1)
@@ -143,10 +196,11 @@ def postprocess_detections(self, detections, annotator):
         annotator_img = annotator.result()
         return (processed_detections, annotator_img)
 
-    def publish_vis(self, annotated_img, feed):
+    def publish_vis(self, annotated_img, msg, feed):
         # Publish visualizations
         imgmsg = self.cv_bridge.cv2_to_imgmsg(annotated_img, "bgr8")
-        imgmsg.header.frame_id = "camera_{}_link".format(feed)
+        imgmsg.header.stamp = msg.header.stamp
+        imgmsg.header.frame_id = msg.header.frame_id
         self.vis_publisher.publish(imgmsg)
 
     def publish_detections(self, detections, msg, feed):
@@ -175,10 +229,13 @@ def publish_detections(self, detections, msg, feed):
                 detection2darray.detections.append(detection2d)
 
         self.detection_publisher.publish(detection2darray)
-        return
 
     def image_callback(self, msg):
         self.get_logger().debug("Received image")
+        if (self.orig_image_width is None):
+            self.orig_image_width = msg.width
+            self.orig_image_height = msg.height
+
         images = [msg]  # msg is a single sensor image
         startTime = time.time()
         for image in images:
@@ -196,9 +253,7 @@ def image_callback(self, msg):
                     return
 
             # preprocess image and run through prediction
-            img = self.preprocess_image(cv_image)
-            processed_cv_image = LetterBox(
-                self.image_size, stride=self.stride)(image=cv_image)
+            img = self.crop_and_convert_to_tensor(cv_image)
             pred = self.model(img)
 
             # nms function used same as yolov8 detect.py
@@ -217,6 +272,7 @@ def image_callback(self, msg):
                             xyxy[3] - xyxy[1],
                         ]
                         bbox = [b.item() for b in bbox]
+                        bbox = self.convert_bboxes_to_orig_frame(bbox)
 
                         detections.append(
                             {
@@ -228,7 +284,7 @@ def image_callback(self, msg):
                         self.get_logger().debug(f"{label}: {bbox}")
 
             annotator = Annotator(
-                processed_cv_image,
+                cv_image,
                 line_width=self.line_thickness,
                 example=str(self.names),
             )
@@ -237,8 +293,12 @@ def image_callback(self, msg):
 
             # Currently we support a single camera so we pass an empty string
             feed = ""
-            self.publish_vis(annotated_img, feed)
+            self.publish_vis(annotated_img, msg, feed)
             self.publish_detections(detections, msg, feed)
+
+            if self.save_detections:
+                cv2.imwrite(f"detections/{self.counter}.jpg", annotated_img)
+                self.counter += 1
 
         self.get_logger().info(
             f"Finished in: {time.time() - startTime}, {1/(time.time() - startTime)} Hz")

diff --git a/src/perception/camera_object_detection/config/eve_config.yaml b/src/perception/camera_object_detection/config/eve_config.yaml
@@ -1,23 +1,23 @@
 left_camera_object_detection_node:
   ros__parameters:
     camera_topic: /camera/left/image_color
-    publish_vis_topic: /camera/left/annotated_img
-    publish_detection_topic: /camera/left/detections
+    publish_vis_topic: /camera/left/camera_detections_viz
+    publish_detection_topic: /camera/left/camera_detections
     model_path: /perception_models/yolov8s.pt
     image_size: 480
 
 center_camera_object_detection_node:
   ros__parameters:
     camera_topic: /camera/center/image_color
-    publish_vis_topic: /camera/center/annotated_img
-    publish_detection_topic: /camera/center/detections
+    publish_vis_topic: /camera/center/camera_detections_viz
+    publish_detection_topic: /camera/center/camera_detections
     model_path: /perception_models/yolov8s.pt
     image_size: 480
 
 right_camera_object_detection_node:
   ros__parameters:
     camera_topic: /camera/right/image_color
-    publish_vis_topic: /camera/right/annotated_img
-    publish_detection_topic: /camera/right/detections
+    publish_vis_topic: /camera/right/camera_detections_viz
+    publish_detection_topic: /camera/right/camera_detections
     model_path: /perception_models/yolov8s.pt
     image_size: 480
diff --git a/src/perception/camera_object_detection/config/traffic_light_config.yaml b/src/perception/camera_object_detection/config/traffic_light_config.yaml
@@ -0,0 +1,9 @@
+traffic_light_node:
+  ros__parameters:
+    camera_topic: /camera/left/image_color
+    publish_vis_topic: /traffic_lights_viz
+    publish_detection_topic: /traffic_lights
+    model_path: /perception_models/traffic_light.pt
+    crop_mode: CenterCrop
+    image_size: 1024
+    save_detections: false
diff --git a/src/perception/camera_object_detection/launch/traffic_light_launch.py b/src/perception/camera_object_detection/launch/traffic_light_launch.py
@@ -0,0 +1,26 @@
+from launch import LaunchDescription
+from launch_ros.actions import Node
+from ament_index_python.packages import get_package_share_directory
+import os
+
+
+def generate_launch_description():
+    ld = LaunchDescription()
+    config = os.path.join(
+        get_package_share_directory('camera_object_detection'),
+        'config',
+        'traffic_light_config.yaml'
+    )
+
+    # nodes
+    camera_object_detection_node = Node(
+        package='camera_object_detection',
+        executable='camera_object_detection_node',
+        name='traffic_light_node',
+        parameters=[config]
+    )
+
+    # finalize
+    ld.add_action(camera_object_detection_node)
+
+    return ld