From c054c5fa7a3cf88b6dcd57c2391d38c024392630 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 5 Nov 2024 21:19:59 +0000
Subject: [PATCH 01/37] Completed batching and creating tensrt engine

---
 .../yolov8_detection.py                       | 135 ++++++++++++++++--
 .../camera_object_detection/requirements.txt  |   3 +-
 watod-config.sh                               |   2 +-
 3 files changed, 127 insertions(+), 13 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 05ceb431..0b73261d 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -1,14 +1,14 @@
 import rclpy
 from rclpy.node import Node
 import os
-
+from message_filters import Subscriber, TimeSynchronizer, ApproximateTimeSynchronizer
 from sensor_msgs.msg import Image, CompressedImage
 from vision_msgs.msg import (
     ObjectHypothesisWithPose,
     Detection2D,
     Detection2DArray,
 )
-
+from ultralytics import YOLO
 from ultralytics.nn.autobackend import AutoBackend
 from ultralytics.data.augment import LetterBox, CenterCrop
 from ultralytics.utils.ops import non_max_suppression
@@ -22,6 +22,8 @@
 import time
 
 import torch
+import onnx
+import tensorrt as trt
 
 
 class CameraDetectionNode(Node):
@@ -30,21 +32,65 @@ def __init__(self):
         torch.zeros(1).cuda()
 
         super().__init__("camera_object_detection_node")
-        self.get_logger().info("Creating camera detection node...")
+        self.get_logger().info("Creating batched camera detection node...")
+        
 
+        #Eve config params 
         self.declare_parameter("camera_topic", "/camera/right/image_color")
+        self.declare_parameter("left_camera_topic", "/camera/left/image_color")
+        self.declare_parameter("center_camera_topic", "/camera/center/image_color")
+
+        #Nuscenes 
+        self.declare_parameter("front_center_camera_topic", "/CAM_FRONT/image_rect_compressed")
+        self.declare_parameter("front_right_camera_topic", "/CAM_FRONT_RIGHT/image_rect_compressed")
+        self.declare_parameter("front_left_camera_topic", "/CAM_FRONT_LEFT/image_rect_compressed")
+
+        self.declare_parameter("back_center_camera_topic", "/CAM_BACK/image_rect_compressed")
+        self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed")
+        self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed")
+
+        self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx")
+        self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("publish_detection_topic", "/detections")
         self.declare_parameter("model_path", "/perception_models/yolov8m.pt")
+       
         self.declare_parameter("image_size", 1024)
         self.declare_parameter("compressed", False)
         self.declare_parameter("crop_mode", "LetterBox")
         self.declare_parameter("save_detections", False)
-
+        
+        #Declare batch topic 
+        self.declare_parameter("batch_inference_topic", "/batched_camera_message")
+       
+       
+        
+       #Batch inference topic 
+        self.batch_inference_topic = self.get_parameter("batch_inference_topic").value
+        self.onnx_model_path = self.get_parameter("onnx_model_path").value
+        self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value
+
+        #Camera topics for eve 
         self.camera_topic = self.get_parameter("camera_topic").value
+        self.left_camera_topic = self.get_parameter("left_camera_topic").value
+        self.center_camera_topic = self.get_parameter("center_camera_topic").value
+
+        #Camera topics for nuscenes 
+        self.front_center_camera_topic  = self.get_parameter("front_center_camera_topic").value
+        self.front_right_camera_topic = self.get_parameter("front_right_camera_topic").value
+        self.front_left_camera_topic = self.get_parameter("front_left_camera_topic").value
+
+        self.back_center_camera_topic = self.get_parameter("back_center_camera_topic").value
+        self.back_right_camera_topic = self.get_parameter("back_right_camera_topic").value
+        self.back_left_camera_topic  = self.get_parameter("back_left_camera_topic").value
+
+
+        #Parameter for TensorRT Model
+        
         self.publish_vis_topic = self.get_parameter("publish_vis_topic").value
         self.publish_detection_topic = self.get_parameter("publish_detection_topic").value
         self.model_path = self.get_parameter("model_path").value
+      
         self.image_size = self.get_parameter("image_size").value
         self.compressed = self.get_parameter("compressed").value
         self.crop_mode = self.get_parameter("crop_mode").value
@@ -57,6 +103,8 @@ def __init__(self):
         self.line_thickness = 1
         self.half = False
 
+        
+
         self.subscription = self.create_subscription(
             Image if not self.compressed else CompressedImage,
             self.camera_topic,
@@ -67,11 +115,22 @@ def __init__(self):
                 depth=10,
             ),
         )
+        #Subscription for Nuscenes
+        self.front_center_camera_subscription = Subscriber(self, CompressedImage, self.front_center_camera_topic)
+        self.front_right_camera_subscription  = Subscriber(self, CompressedImage, self.front_right_camera_topic)
+        self.front_left_camera_subscription = Subscriber(self, CompressedImage, self.front_left_camera_topic)
+        self.back_center_camera_subscription = Subscriber(self, CompressedImage, self.back_center_camera_topic)
+        self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic)
+        self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
+        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1)
+        self.ats.registerCallback(self.batch_callback)
+
+
 
         self.orig_image_width = None
         self.orig_image_height = None
 
-        # set device
+        # set devicepublish_visDete
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if torch.cuda.is_available():
             self.get_logger().info("Using GPU for inference")
@@ -81,23 +140,77 @@ def __init__(self):
         # CV bridge
         self.cv_bridge = CvBridge()
 
-        # load yolov8 model
-        self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
+        self.logger = trt.Logger(trt.Logger.WARNING)
+        self.builder = trt.Builder(self.logger)
+        self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+        self.parser = trt.OnnxParser(self.network, self.logger)
+    #Commented this out because only run when building the tensorRT engine
+    #     #Reading the onnx file in the perception models directory
+    #     with open(self.onnx_model_path, "rb") as model_file:
+       
+    #      if not self.parser.parse(model_file.read()):
+            
+    #         for error in range(parser.num_errors):
+    #             print(self.parser.get_error(error))
+    #         raise RuntimeError("Failed to parse the ONNX file.")
+
+    #     #Building the tensorRT engine for inferencing 
+    #     self.config = self.builder.create_builder_config()
+    #     self.total_memory = torch.cuda.get_device_properties(self.device).total_memory
+    #     self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory)
+    #     self.engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+    #     assert self.engine_bytes is not None, "Failed to create engine"
+    #     self.get_logger().info("BUILT THE ENGINE ")
+       
+    #    #Writing to the perception models directory
+    #    # Getting bug on these lines 
+    #     with open(self.tensorRT_model_path, "wb") as f:
+    #         self.get_logger().info("WRITINTG THE ENGINE ")
+    #         f.write(self.engine_bytes)
 
+        self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
-
         self.stride = int(self.model.stride)
 
         # setup vis publishers
+        self.batched_camera_message_publisher  = self.create_publisher(Image,self.batch_inference_topic, 10)
         self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10)
         self.detection_publisher = self.create_publisher(
             Detection2DArray, self.publish_detection_topic, 10
         )
-
         self.get_logger().info(
             f"Successfully created node listening on camera topic: {self.camera_topic}..."
         )
 
+    def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
+        image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
+        batched_list = []
+        for msg in image_list:  
+            # Need to convert to numpy array and then make it have 3 color channels 
+            numpy_array = np.frombuffer(msg.data, np.uint8)
+            compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
+            resized_compressedImage = cv2.resize(compressedImage,(1920, 1080))
+            preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage)
+            batched_list.append(preprocessedImage)
+            # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")
+           
+        
+        batch_tensor = torch.stack(batched_list)
+        self.get_logger().info(f"batch tensor shape: {batch_tensor.shape}")
+
+        #Now need to pass it to yolov8 tensorRT model
+        # predictions = self.model(batch_tensor)
+       
+        
+
+    #Converting to tensor for batching
+    def batch_convert_to_tensor(self, cv_image):
+        img  = cv_image.transpose(2,0,1)
+        img = torch.from_numpy(img).to(self.device)
+        img = img.half() if self.half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        return img
+
     def crop_image(self, cv_image):
         if self.crop_mode == "LetterBox":
             img = LetterBox(self.image_size, stride=self.stride)(image=cv_image)
@@ -157,7 +270,7 @@ def crop_and_convert_to_tensor(self, cv_image):
         img = self.crop_image(cv_image)
 
         # Convert
-        img = img.transpose(2, 0, 1)
+        img = cv_image.transpose(2, 0, 1)
 
         # Further conversion
         img = torch.from_numpy(img).to(self.device)
@@ -255,7 +368,7 @@ def image_callback(self, msg):
             pred = self.model(img)
 
             # nms function used same as yolov8 detect.py
-            pred = non_max_suppression(pred)
+            pred = non_max_suppression(pred) #Eliminates overlapping bounding boxes
             detections = []
             for i, det in enumerate(pred):  # per image
                 if len(det):
diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt
index 0807da67..b4eb4248 100644
--- a/src/perception/camera_object_detection/requirements.txt
+++ b/src/perception/camera_object_detection/requirements.txt
@@ -4,4 +4,5 @@ setuptools==65.5.1
 easydict
 huggingface
 huggingface_hub
-numpy==1.24.2
\ No newline at end of file
+numpy==1.24.2
+onnx
\ No newline at end of file
diff --git a/watod-config.sh b/watod-config.sh
index 3255dd40..96033167 100755
--- a/watod-config.sh
+++ b/watod-config.sh
@@ -15,7 +15,7 @@
 ##	 - simulation			:	starts simulation
 ##   - samples             	:   starts sample ROS2 pubsub nodes
 
-# ACTIVE_MODULES=""
+ACTIVE_MODULES="perception infrastructure"
 
 ################################# MODE OF OPERATION #################################
 ## Possible modes of operation when running watod.

From f1f9cee43a3fccd81e7bb2196cb7c7183463508b Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 5 Nov 2024 21:22:34 +0000
Subject: [PATCH 02/37] Mounted file and directory

---
 modules/docker-compose.perception.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index 99dc9a49..4370e488 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -32,8 +32,11 @@ services:
     command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py"
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt
+      - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx
+      - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt
+      - /mnt/wato-drive2/perception_models/:/perception_models
 
   lidar_object_detection:
     build:

From 359626b2c674aec3c7cf0aa1c5d2d81801a88350 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 6 Nov 2024 03:38:20 +0000
Subject: [PATCH 03/37] Requirment txt changes

---
 src/perception/camera_object_detection/requirements.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt
index b4eb4248..9ae1f4cb 100644
--- a/src/perception/camera_object_detection/requirements.txt
+++ b/src/perception/camera_object_detection/requirements.txt
@@ -5,4 +5,9 @@ easydict
 huggingface
 huggingface_hub
 numpy==1.24.2
-onnx
\ No newline at end of file
+onnx
+onnxsim
+onnxruntime-gpu
+tensorrt
+tensorrt_lean
+tensorrt_dispatch
\ No newline at end of file

From 5cb072ea36f947ee0863d26bc1ebe08bd768c604 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 6 Nov 2024 14:46:45 +0000
Subject: [PATCH 04/37] Reafactored engine build to a function

---
 .../yolov8_detection.py                       | 55 +++++++++++--------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 0b73261d..cffb5309 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -144,29 +144,10 @@ def __init__(self):
         self.builder = trt.Builder(self.logger)
         self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
         self.parser = trt.OnnxParser(self.network, self.logger)
-    #Commented this out because only run when building the tensorRT engine
-    #     #Reading the onnx file in the perception models directory
-    #     with open(self.onnx_model_path, "rb") as model_file:
-       
-    #      if not self.parser.parse(model_file.read()):
-            
-    #         for error in range(parser.num_errors):
-    #             print(self.parser.get_error(error))
-    #         raise RuntimeError("Failed to parse the ONNX file.")
-
-    #     #Building the tensorRT engine for inferencing 
-    #     self.config = self.builder.create_builder_config()
-    #     self.total_memory = torch.cuda.get_device_properties(self.device).total_memory
-    #     self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory)
-    #     self.engine_bytes = self.builder.build_serialized_network(self.network, self.config)
-    #     assert self.engine_bytes is not None, "Failed to create engine"
-    #     self.get_logger().info("BUILT THE ENGINE ")
-       
-    #    #Writing to the perception models directory
-    #    # Getting bug on these lines 
-    #     with open(self.tensorRT_model_path, "wb") as f:
-    #         self.get_logger().info("WRITINTG THE ENGINE ")
-    #         f.write(self.engine_bytes)
+    
+    # #Commented this out because only run when building the tensorRT engine
+        # self.build_engine()
+
 
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
@@ -181,7 +162,33 @@ def __init__(self):
         self.get_logger().info(
             f"Successfully created node listening on camera topic: {self.camera_topic}..."
         )
-
+       
+    def build_engine(self):
+    #Reading the onnx file in the perception models directory
+        with open(self.onnx_model_path, "rb") as model_file:
+       
+         if not self.parser.parse(model_file.read()):
+            
+            for error in range(parser.num_errors):
+                print(self.parser.get_error(error))
+            raise RuntimeError("Failed to parse the ONNX file.")
+
+        #Building the tensorRT engine for inferencing 
+        self.config = self.builder.create_builder_config()
+        self.total_memory = torch.cuda.get_device_properties(self.device).total_memory
+        self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory)
+        self.engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+        assert self.engine_bytes is not None, "Failed to create engine"
+        self.get_logger().info("BUILT THE ENGINE ")
+       
+    #Writing to the perception models directory
+    #Getting bug on these lines 
+        with open(self.tensorRT_model_path, "wb") as f:
+            self.get_logger().info("WRITINTG THE ENGINE ")
+            f.write(self.engine_bytes)
+        self.get_logger().info("RETURNS 1")
+
+  
     def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []

From 387e49199394c66d3f7db2ebee209d6dd2086f16 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 6 Nov 2024 16:15:53 +0000
Subject: [PATCH 05/37] Fixed file error

---
 modules/docker-compose.perception.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index 4370e488..13fc7f53 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -33,7 +33,7 @@ services:
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt
       - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx
-      - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.engine
+      - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt
       - /mnt/wato-drive2/perception_models/:/perception_models

From 61f48d84421a6d5e45acbac187377bc2c4842dc7 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 6 Nov 2024 16:18:50 +0000
Subject: [PATCH 06/37] Changed docker compose file

---
 modules/docker-compose.perception.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index 13fc7f53..c82d5589 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -32,7 +32,6 @@ services:
     command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py"
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt
-      - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx
       - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt

From bea933d4376917d2ec9ba493489b417c1eb5f51f Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 6 Nov 2024 16:20:59 +0000
Subject: [PATCH 07/37] Refactored docker changes

---
 modules/docker-compose.perception.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index c82d5589..13fc7f53 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -32,6 +32,7 @@ services:
     command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py"
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt
+      - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx
       - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt

From 4d4513dff98a160a7c5ba0e0bf0e45fd7674e012 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 6 Nov 2024 21:53:19 +0000
Subject: [PATCH 08/37] initialized engine

---
 .../yolov8_detection.py                       | 51 +++++++++++++++----
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index cffb5309..9dd19eb9 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -8,6 +8,7 @@
     Detection2D,
     Detection2DArray,
 )
+from pathlib import Path
 from ultralytics import YOLO
 from ultralytics.nn.autobackend import AutoBackend
 from ultralytics.data.augment import LetterBox, CenterCrop
@@ -125,7 +126,7 @@ def __init__(self):
         self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1)
         self.ats.registerCallback(self.batch_callback)
 
-
+        self.get_logger().info(f"TENSORT VERSION:{trt.__version__}")
 
         self.orig_image_width = None
         self.orig_image_height = None
@@ -140,15 +141,14 @@ def __init__(self):
         # CV bridge
         self.cv_bridge = CvBridge()
 
-        self.logger = trt.Logger(trt.Logger.WARNING)
-        self.builder = trt.Builder(self.logger)
-        self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
-        self.parser = trt.OnnxParser(self.network, self.logger)
+      
     
     # #Commented this out because only run when building the tensorRT engine
         # self.build_engine()
-
-
+    
+       
+        
+        self.initialize_engine(self.tensorRT_model_path)
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
@@ -165,6 +165,10 @@ def __init__(self):
        
     def build_engine(self):
     #Reading the onnx file in the perception models directory
+        self.logger = trt.Logger(trt.Logger.WARNING)
+        self.builder = trt.Builder(self.logger)
+        self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
+        self.parser = trt.OnnxParser(self.network, self.logger)
         with open(self.onnx_model_path, "rb") as model_file:
        
          if not self.parser.parse(model_file.read()):
@@ -172,6 +176,8 @@ def build_engine(self):
             for error in range(parser.num_errors):
                 print(self.parser.get_error(error))
             raise RuntimeError("Failed to parse the ONNX file.")
+        
+     
 
         #Building the tensorRT engine for inferencing 
         self.config = self.builder.create_builder_config()
@@ -181,13 +187,40 @@ def build_engine(self):
         assert self.engine_bytes is not None, "Failed to create engine"
         self.get_logger().info("BUILT THE ENGINE ")
        
+       
     #Writing to the perception models directory
     #Getting bug on these lines 
         with open(self.tensorRT_model_path, "wb") as f:
             self.get_logger().info("WRITINTG THE ENGINE ")
             f.write(self.engine_bytes)
-        self.get_logger().info("RETURNS 1")
+        
 
+    def initialize_engine(self, weight):
+        self.weight = Path(weight) if isinstance(weight, str) else weight
+        self.logger  = trt.Logger(trt.Logger.WARNING)
+        trt.init_libnvinfer_plugins(self.logger, namespace ='')
+        with trt.Runtime(self.logger) as runtime:
+            self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
+        self.execution_context = self.tensorRT_model.create_execution_context()  
+            #self.names = [self.tensorRT_model.get_binding_name(i) for i in range(self.tensorRT_model.num_bindings)]
+        self.num_io_tensors = self.tensorRT_model.num_io_tensors
+        self.get_logger().info(f"BINDINGS: {self.num_io_tensors}")
+        self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
+        self.num_io_tensors = self.tensorRT_model.num_io_tensors
+        self.bindings: List[int] = [0] * self.num_io_tensors
+        self.num_inputs = 0
+        self.num_outputs  = 0
+        self.input_names = []
+        self.output_names = []
+
+        for i in range(self.num_io_tensors):
+              self.tensor_name  = self.tensorRT_model.get_tensor_name(i)
+              if self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.INPUT:
+                self.input_names.append(self.tensor_name)
+                self.num_inputs += 1
+              else:
+                self.output_names.append(self.tensor_name)
+                self.num_outputs += 1           
   
     def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
@@ -199,7 +232,7 @@ def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             resized_compressedImage = cv2.resize(compressedImage,(1920, 1080))
             preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage)
             batched_list.append(preprocessedImage)
-            # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")
+            # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")a
            
         
         batch_tensor = torch.stack(batched_list)

From 8074ca40bd1fc3d7f2d1a560665eb1099042f2b8 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Fri, 8 Nov 2024 14:37:33 +0000
Subject: [PATCH 09/37] Built and initliazed tensorRT engine

---
 .../yolov8_detection.py                       | 80 +++++++++++--------
 1 file changed, 48 insertions(+), 32 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 9dd19eb9..91ff6e70 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -69,6 +69,7 @@ def __init__(self):
        #Batch inference topic 
         self.batch_inference_topic = self.get_parameter("batch_inference_topic").value
         self.onnx_model_path = self.get_parameter("onnx_model_path").value
+        
         self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value
 
         #Camera topics for eve 
@@ -140,14 +141,6 @@ def __init__(self):
 
         # CV bridge
         self.cv_bridge = CvBridge()
-
-      
-    
-    # #Commented this out because only run when building the tensorRT engine
-        # self.build_engine()
-    
-       
-        
         self.initialize_engine(self.tensorRT_model_path)
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
@@ -162,38 +155,56 @@ def __init__(self):
         self.get_logger().info(
             f"Successfully created node listening on camera topic: {self.camera_topic}..."
         )
-       
+ 
     def build_engine(self):
     #Reading the onnx file in the perception models directory
-        self.logger = trt.Logger(trt.Logger.WARNING)
+        self.max_batch_size = 6
+        self.channels = 3
+        self.height = 640
+        self.width = 640
+        self.shape_input_model = [self.max_batch_size, self.channels, self.height, self.width]
+        self.logger = trt.Logger(trt.Logger.VERBOSE)
         self.builder = trt.Builder(self.logger)
-        self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
-        self.parser = trt.OnnxParser(self.network, self.logger)
-        with open(self.onnx_model_path, "rb") as model_file:
-       
-         if not self.parser.parse(model_file.read()):
-            
-            for error in range(parser.num_errors):
-                print(self.parser.get_error(error))
-            raise RuntimeError("Failed to parse the ONNX file.")
-        
-     
-
-        #Building the tensorRT engine for inferencing 
-        self.config = self.builder.create_builder_config()
+        self.config  = self.builder.create_builder_config()
+        self.cache = self.config.create_timing_cache(b"")
+        self.config.set_timing_cache(self.cache, ignore_mismatch=False)
         self.total_memory = torch.cuda.get_device_properties(self.device).total_memory
         self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory)
+        self.flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        self.network = self.builder.create_network(self.flag)
+        self.parser = trt.OnnxParser(self.network, self.logger)
+        with open(self.onnx_model_path, "rb") as f:
+            if not self.parser.parse(f.read()):
+                self.get_logger().info("ERROR: Cannot read ONNX FILE")
+                for error in range(self.parser.num_errors):
+                    self.get_logger().info(f"{self.parser.get_error(error)}")
+        self.inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+        self.outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
+        for input in self.inputs:
+         self.get_logger().info(f"Model {input.name} shape:{input.shape} {input.dtype}")
+        for output in self.outputs:
+         self.get_logger().info(f"Model {output.name} shape: {output.shape} {output.dtype}")  
+        if self.max_batch_size > 1:
+            self.profile  = self.builder.create_optimization_profile()
+            self.min_shape = [1, self.channels, self.height, self.width]                 # Minimum batch size of 1
+            self.opt_shape = [int(self.max_batch_size / 2), self.channels, self.height, self.width]  # Optimal batch size (half of max, so 3)
+            self.max_shape = self.shape_input_model
+
+            for input in self.inputs:
+                self.profile.set_shape(input.name, self.min_shape, self.opt_shape, self.max_shape)
+            self.config.add_optimization_profile(self.profile)
+        self.half = True
+        self.int8 = False
+        if self.half:
+            self.config.set_flag(trt.BuilderFlag.FP16)
+        elif self.int8:
+            self.config.set_flag(trt.BuilderFlag.INT8)
         self.engine_bytes = self.builder.build_serialized_network(self.network, self.config)
         assert self.engine_bytes is not None, "Failed to create engine"
         self.get_logger().info("BUILT THE ENGINE ")
-       
-       
-    #Writing to the perception models directory
-    #Getting bug on these lines 
         with open(self.tensorRT_model_path, "wb") as f:
-            self.get_logger().info("WRITINTG THE ENGINE ")
             f.write(self.engine_bytes)
-        
+        self.get_logger().info("FINISHED WRITING ")
 
     def initialize_engine(self, weight):
         self.weight = Path(weight) if isinstance(weight, str) else weight
@@ -201,10 +212,15 @@ def initialize_engine(self, weight):
         trt.init_libnvinfer_plugins(self.logger, namespace ='')
         with trt.Runtime(self.logger) as runtime:
             self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
+            self.get_logger().info(f"TENSORRT Model:{self.weight}")
         self.execution_context = self.tensorRT_model.create_execution_context()  
-            #self.names = [self.tensorRT_model.get_binding_name(i) for i in range(self.tensorRT_model.num_bindings)]
+        
+          
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
-        self.get_logger().info(f"BINDINGS: {self.num_io_tensors}")
+        self.input_tensor_name = self.tensorRT_model.get_tensor_name(0)
+        self.execution_context.set_input_shape(self.input_tensor_name, [6, 3, 1080, 1920])
+        self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name)
+        self.get_logger().info(f"INPUT SHAPE:{self.inputShape}")
         self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.bindings: List[int] = [0] * self.num_io_tensors

From 9b4637e309454ea97db5d4153e67b9d0f774075c Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Mon, 11 Nov 2024 21:29:39 +0000
Subject: [PATCH 10/37] Allocated memory for inferencing

---
 modules/docker-compose.perception.yaml        |  2 +
 .../yolov8_detection.py                       | 63 +++++++++++++++----
 .../camera_object_detection/requirements.txt  |  2 -
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index 13fc7f53..0679b51a 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -33,7 +33,9 @@ services:
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt
       - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx
+       - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/yolov8m_dynamic.onnx
       - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine
+      - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt
       - /mnt/wato-drive2/perception_models/:/perception_models
diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 91ff6e70..8c00ef72 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -25,6 +25,7 @@
 import torch
 import onnx
 import tensorrt as trt
+# import pycuda.driver as cuda
 
 
 class CameraDetectionNode(Node):
@@ -50,8 +51,8 @@ def __init__(self):
         self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed")
         self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed")
 
-        self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx")
-        self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine")
+        self.declare_parameter("onnx_model_path", "/perception_models/yolov8m_dynamic.onnx")
+        self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m_copy2.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("publish_detection_topic", "/detections")
         self.declare_parameter("model_path", "/perception_models/yolov8m.pt")
@@ -87,7 +88,7 @@ def __init__(self):
         self.back_left_camera_topic  = self.get_parameter("back_left_camera_topic").value
 
 
-        #Parameter for TensorRT Model
+    
         
         self.publish_vis_topic = self.get_parameter("publish_vis_topic").value
         self.publish_detection_topic = self.get_parameter("publish_detection_topic").value
@@ -141,7 +142,9 @@ def __init__(self):
 
         # CV bridge
         self.cv_bridge = CvBridge()
+        # self.stream = cuda.Stream()
         self.initialize_engine(self.tensorRT_model_path)
+        self.initialize_tensors()
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
@@ -157,6 +160,7 @@ def __init__(self):
         )
  
     def build_engine(self):
+    #Only calling this function when we dont have an engine file
     #Reading the onnx file in the perception models directory
         self.max_batch_size = 6
         self.channels = 3
@@ -220,24 +224,61 @@ def initialize_engine(self, weight):
         self.input_tensor_name = self.tensorRT_model.get_tensor_name(0)
         self.execution_context.set_input_shape(self.input_tensor_name, [6, 3, 1080, 1920])
         self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name)
-        self.get_logger().info(f"INPUT SHAPE:{self.inputShape}")
         self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
-        self.bindings: List[int] = [0] * self.num_io_tensors
-        self.num_inputs = 0
-        self.num_outputs  = 0
+        self.get_logger().info(f"Number of tensors: {self.num_io_tensors}")
+        self.bindings = [0] * self.num_io_tensors
+        self.num_inputs  = 0
+        self.num_outputs = 0
         self.input_names = []
         self.output_names = []
-
         for i in range(self.num_io_tensors):
               self.tensor_name  = self.tensorRT_model.get_tensor_name(i)
               if self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.INPUT:
                 self.input_names.append(self.tensor_name)
                 self.num_inputs += 1
-              else:
+              elif self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.OUTPUT:
                 self.output_names.append(self.tensor_name)
-                self.num_outputs += 1           
-  
+                self.num_outputs += 1
+        # self.get_logger().info(f"Inputs:{self.input_names}")
+        # self.get_logger().info(f"Outputs:{self.output_names}")     
+        self.num_inputs = len(self.input_names)
+        self.num_outputs = len(self.output_names)    
+        # self.get_logger().info(f"Number of inputs:{self.num_inputs}")
+        # self.get_logger().info(f"Number of outputs:{self.num_outputs}")
+        self.input_names = self.names[:self.num_inputs]
+        self.output_names = [name for name in self.output_names if name not in self.input_names]
+        self.get_logger().info(f"INPUT NAMES:: {self.input_names}")
+        self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}")
+    
+    def initialize_tensors(self):
+        self.dynamic = True 
+        self.input_info = []
+        self.output_info = []
+        self.output_ptrs = []
+        self.input_device_bindings = []
+        self.output_device_bindings = []
+   
+        for i, name in enumerate(self.input_names):
+            if self.tensorRT_model.get_tensor_name(i) == name:
+            
+             self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) 
+             self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
+             self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}")
+             self.get_logger().info(f"Tensor Datatype:{self.dtype}")
+            # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype)
+            # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes)
+            # self.input_device_bindings.append(self.input_device_memory)
+           
+        for i, name in enumerate(self.output_names):
+            if self.tensorRT_model.get_tensor_name(i) == name:
+             self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name))
+             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
+             self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}")
+             self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}")
+            #  self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype)
+            #  self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes)
+            #  self.output_device_bindings.append(self.output_device_memory)
     def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []
diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt
index 9ae1f4cb..4373e3c7 100644
--- a/src/perception/camera_object_detection/requirements.txt
+++ b/src/perception/camera_object_detection/requirements.txt
@@ -9,5 +9,3 @@ onnx
 onnxsim
 onnxruntime-gpu
 tensorrt
-tensorrt_lean
-tensorrt_dispatch
\ No newline at end of file

From 72a58a22574754bb5e2fff14b09288cad07afef3 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Mon, 11 Nov 2024 22:05:32 +0000
Subject: [PATCH 11/37] fixed tensor function

---
 .../yolov8_detection.py                       | 39 +++++++++++++------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 8c00ef72..53795d93 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -25,7 +25,17 @@
 import torch
 import onnx
 import tensorrt as trt
+from typing import List, Optional, Tuple, Union
+from dataclasses import dataclass
+from numpy import ndarray
 # import pycuda.driver as cuda
+@dataclass
+class Tensor:
+    name:str
+    dtype:np.dtype
+    shape:Tuple
+    cpu:ndarray
+    gpu:int
 
 
 class CameraDetectionNode(Node):
@@ -250,6 +260,7 @@ def initialize_engine(self, weight):
         self.output_names = [name for name in self.output_names if name not in self.input_names]
         self.get_logger().info(f"INPUT NAMES:: {self.input_names}")
         self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}")
+       
     
     def initialize_tensors(self):
         self.dynamic = True 
@@ -258,10 +269,20 @@ def initialize_tensors(self):
         self.output_ptrs = []
         self.input_device_bindings = []
         self.output_device_bindings = []
-   
+       
+        for name in self.output_names:
+            self.get_logger().info(f"NAMES of outputs:{name}")
+            self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name))
+            self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
+            self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}")
+            self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}")
+             #  self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype)
+            #  self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes)
+            #  self.output_device_bindings.append(self.output_device_memory)
+       
         for i, name in enumerate(self.input_names):
             if self.tensorRT_model.get_tensor_name(i) == name:
-            
+             
              self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) 
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}")
@@ -270,15 +291,7 @@ def initialize_tensors(self):
             # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes)
             # self.input_device_bindings.append(self.input_device_memory)
            
-        for i, name in enumerate(self.output_names):
-            if self.tensorRT_model.get_tensor_name(i) == name:
-             self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name))
-             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
-             self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}")
-             self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}")
-            #  self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype)
-            #  self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes)
-            #  self.output_device_bindings.append(self.output_device_memory)
+     
     def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []
@@ -289,12 +302,14 @@ def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             resized_compressedImage = cv2.resize(compressedImage,(1920, 1080))
             preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage)
             batched_list.append(preprocessedImage)
+
+        
             # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")a
            
         
         batch_tensor = torch.stack(batched_list)
         self.get_logger().info(f"batch tensor shape: {batch_tensor.shape}")
-
+   
         #Now need to pass it to yolov8 tensorRT model
         # predictions = self.model(batch_tensor)
        

From 46a0d38e5ee750952222cfc1f70bafd3894a5a67 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Mon, 11 Nov 2024 22:53:59 +0000
Subject: [PATCH 12/37] Appended output tensor

---
 .../camera_object_detection/yolov8_detection.py        | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 53795d93..a8f19fd1 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -29,6 +29,7 @@
 from dataclasses import dataclass
 from numpy import ndarray
 # import pycuda.driver as cuda
+
 @dataclass
 class Tensor:
     name:str
@@ -276,9 +277,13 @@ def initialize_tensors(self):
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
             self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}")
             self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}")
-             #  self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype)
+            self.output_cpu = np.empty(self.tensorRT_output_shape, self.outputDtype)
+            self.gpu = 0
+            #  self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype)
             #  self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes)
             #  self.output_device_bindings.append(self.output_device_memory)
+            self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.cpu, self.gpu))
+          
        
         for i, name in enumerate(self.input_names):
             if self.tensorRT_model.get_tensor_name(i) == name:
@@ -287,6 +292,7 @@ def initialize_tensors(self):
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}")
              self.get_logger().info(f"Tensor Datatype:{self.dtype}")
+             self.input_cpu  = 0
             # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype)
             # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes)
             # self.input_device_bindings.append(self.input_device_memory)
@@ -299,7 +305,7 @@ def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             # Need to convert to numpy array and then make it have 3 color channels 
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
-            resized_compressedImage = cv2.resize(compressedImage,(1920, 1080))
+            resized_compressedImage = cv2.resize(compressedImage,(1920, 1088))
             preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage)
             batched_list.append(preprocessedImage)
 

From 2fffe0d1970c179429df14ed9d43b2e5f0f2b350 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 12 Nov 2024 15:14:28 +0000
Subject: [PATCH 13/37] Set up tensors, focusing on gpu allocation

---
 .../yolov8_detection.py                       | 27 ++++++++++++-------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index a8f19fd1..0d1f97a7 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -155,7 +155,11 @@ def __init__(self):
         self.cv_bridge = CvBridge()
         # self.stream = cuda.Stream()
         self.initialize_engine(self.tensorRT_model_path)
-        self.initialize_tensors()
+        self.input_info, self.output_info =  self.initialize_tensors()
+        self.engine_gpu_allocation(self.input_info)
+        self.get_logger().info(f"Input Info:{self.input_info}")
+        self.get_logger().info(f"Output Info:{self.output_info}")
+        
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
@@ -267,7 +271,6 @@ def initialize_tensors(self):
         self.dynamic = True 
         self.input_info = []
         self.output_info = []
-        self.output_ptrs = []
         self.input_device_bindings = []
         self.output_device_bindings = []
        
@@ -277,13 +280,15 @@ def initialize_tensors(self):
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
             self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}")
             self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}")
-            self.output_cpu = np.empty(self.tensorRT_output_shape, self.outputDtype)
-            self.gpu = 0
+            self.output_cpu = np.empty(0)
+            self.output_gpu = 0
+            self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu))
             #  self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype)
             #  self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes)
             #  self.output_device_bindings.append(self.output_device_memory)
-            self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.cpu, self.gpu))
-          
+           
+            
+        self.get_logger().info(f"Output Info:{self.output_info}")
        
         for i, name in enumerate(self.input_names):
             if self.tensorRT_model.get_tensor_name(i) == name:
@@ -292,12 +297,16 @@ def initialize_tensors(self):
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}")
              self.get_logger().info(f"Tensor Datatype:{self.dtype}")
-             self.input_cpu  = 0
+             self.input_cpu  = np.empty(0)
+             self.input_gpu = 0
+             self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu))
+           
             # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype)
             # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes)
             # self.input_device_bindings.append(self.input_device_memory)
-           
-     
+        return self.input_info, self.output_info
+    def engine_gpu_allocation(self, input_info):
+
     def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []

From 87f0785280efcb83321843af949dbcb02e697b61 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 12 Nov 2024 22:42:33 +0000
Subject: [PATCH 14/37] Allocated memory

---
 .../yolov8_detection.py                       | 42 +++++++++++++++++--
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 0d1f97a7..95345111 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -63,7 +63,7 @@ def __init__(self):
         self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed")
 
         self.declare_parameter("onnx_model_path", "/perception_models/yolov8m_dynamic.onnx")
-        self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m_copy2.engine")
+        self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("publish_detection_topic", "/detections")
         self.declare_parameter("model_path", "/perception_models/yolov8m.pt")
@@ -170,7 +170,7 @@ def __init__(self):
         self.detection_publisher = self.create_publisher(
             Detection2DArray, self.publish_detection_topic, 10
         )
-        self.get_logger().info(
+        self.get_logger().info( 
             f"Successfully created node listening on camera topic: {self.camera_topic}..."
         )
  
@@ -273,13 +273,13 @@ def initialize_tensors(self):
         self.output_info = []
         self.input_device_bindings = []
         self.output_device_bindings = []
+        batch_size = 6
        
         for name in self.output_names:
             self.get_logger().info(f"NAMES of outputs:{name}")
             self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name))
+            self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:]
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
-            self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}")
-            self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}")
             self.output_cpu = np.empty(0)
             self.output_gpu = 0
             self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu))
@@ -294,6 +294,7 @@ def initialize_tensors(self):
             if self.tensorRT_model.get_tensor_name(i) == name:
              
              self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) 
+             self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:]
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}")
              self.get_logger().info(f"Tensor Datatype:{self.dtype}")
@@ -305,8 +306,41 @@ def initialize_tensors(self):
             # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes)
             # self.input_device_bindings.append(self.input_device_memory)
         return self.input_info, self.output_info
+    
     def engine_gpu_allocation(self, input_info):
+       
 
+        for i in range(10):
+            self.inputTensors = []
+            for i in self.input_info:
+                self.inputTensors.append(i.cpu)
+              
+                assert len(self.inputTensors) == self.num_inputs, "does not match"
+                self.contiguous_inputs:List[ndarray]  = [
+                  np.ascontiguousarray(i) for i in self.inputTensors
+                ]
+            self.get_logger().info(f"contigous shape:{self.contiguous_inputs}")
+            for i in range(self.num_inputs):
+                name = self.input_info[i].name
+                shape = self.input_info[i].shape
+                self.get_logger().info(f"input names:{name} & input shape:{shape}")
+                # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i])
+            
+                # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream)           
+                self.bindings[i] = int(self.inp_info[i].gpu)
+            for i in range(self.num_outputs):
+                name = self.output_info[i].name
+                shape = self.output_info[i].shape  
+                self.get_logger().info(f"output names: {name}& output SHAPES:{shape}") 
+                dtype = self.output_info[i].dtype
+                cpu = np.empty(shape, dtype=dtype)
+        #         gpu = cuda.mem_alloc(cpu.nbytes)
+        #         cuda.memcpy_htod_async(gpu,cpu, self.stream)
+        #         self.outputCpuTensor.append(cpu)
+        
+        # self.execution_context.execute_async_v2()
+        # self.stream.synchronize()
+        
     def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []

From 592a877fb00c4738ceea27d53b4929525d8cd5ea Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 12 Nov 2024 22:51:46 +0000
Subject: [PATCH 15/37] fixed small bug

---
 .../camera_object_detection/yolov8_detection.py                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 95345111..4b9c80aa 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -327,7 +327,7 @@ def engine_gpu_allocation(self, input_info):
                 # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i])
             
                 # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream)           
-                self.bindings[i] = int(self.inp_info[i].gpu)
+                self.bindings[i] = int(self.input_info[i].gpu)
             for i in range(self.num_outputs):
                 name = self.output_info[i].name
                 shape = self.output_info[i].shape  

From ae4153ca8d06a4e952c69b71bfc6e0d6be9eb393 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Thu, 14 Nov 2024 21:05:33 +0000
Subject: [PATCH 16/37] Binded inp. & out., alloc. memory, setting up inf.

---
 modules/docker-compose.perception.yaml        |  2 +-
 .../yolov8_detection.py                       | 94 ++++++++++---------
 .../camera_object_detection/requirements.txt  |  2 +-
 3 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index 0679b51a..f56b2e6b 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -39,7 +39,7 @@ services:
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt
       - /mnt/wato-drive2/perception_models/:/perception_models
-
+    
   lidar_object_detection:
     build:
       context: ..
diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 4b9c80aa..5d9cb749 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -137,7 +137,7 @@ def __init__(self):
         self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic)
         self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
         self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1)
-        self.ats.registerCallback(self.batch_callback)
+        self.ats.registerCallback(self.batch_inference_callback)
 
         self.get_logger().info(f"TENSORT VERSION:{trt.__version__}")
 
@@ -157,8 +157,8 @@ def __init__(self):
         self.initialize_engine(self.tensorRT_model_path)
         self.input_info, self.output_info =  self.initialize_tensors()
         self.engine_gpu_allocation(self.input_info)
-        self.get_logger().info(f"Input Info:{self.input_info}")
-        self.get_logger().info(f"Output Info:{self.output_info}")
+      
+    
         
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
@@ -254,13 +254,9 @@ def initialize_engine(self, weight):
                 self.num_inputs += 1
               elif self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.OUTPUT:
                 self.output_names.append(self.tensor_name)
-                self.num_outputs += 1
-        # self.get_logger().info(f"Inputs:{self.input_names}")
-        # self.get_logger().info(f"Outputs:{self.output_names}")     
+                self.num_outputs += 1 
         self.num_inputs = len(self.input_names)
         self.num_outputs = len(self.output_names)    
-        # self.get_logger().info(f"Number of inputs:{self.num_inputs}")
-        # self.get_logger().info(f"Number of outputs:{self.num_outputs}")
         self.input_names = self.names[:self.num_inputs]
         self.output_names = [name for name in self.output_names if name not in self.input_names]
         self.get_logger().info(f"INPUT NAMES:: {self.input_names}")
@@ -271,6 +267,7 @@ def initialize_tensors(self):
         self.dynamic = True 
         self.input_info = []
         self.output_info = []
+        self.output_ptrs = []
         self.input_device_bindings = []
         self.output_device_bindings = []
         batch_size = 6
@@ -279,37 +276,28 @@ def initialize_tensors(self):
             self.get_logger().info(f"NAMES of outputs:{name}")
             self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name))
             self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:]
+            self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
-            self.output_cpu = np.empty(0)
+            self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype)
             self.output_gpu = 0
+            # self.output_gpu = cuda.mem_alloc(self.output_cpu.nbytes)
+            # cyda.memcpy_htod_async(self.output_gpu, self.output_cpu, self.stream)
+            self.output_ptrs.append(self.output_gpu)
             self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu))
-            #  self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype)
-            #  self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes)
-            #  self.output_device_bindings.append(self.output_device_memory)
-           
-            
-        self.get_logger().info(f"Output Info:{self.output_info}")
-       
+        
         for i, name in enumerate(self.input_names):
-            if self.tensorRT_model.get_tensor_name(i) == name:
-             
+            if self.tensorRT_model.get_tensor_name(i) == name: 
              self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) 
              self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:]
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
-             self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}")
-             self.get_logger().info(f"Tensor Datatype:{self.dtype}")
-             self.input_cpu  = np.empty(0)
-             self.input_gpu = 0
+             self.input_cpu  = np.empty(self.tensorRT_input_shape, self.dtype)
+             self.input_gpu = 1
+            #  self.input_gpu = cuda.mem_alloc(self.input_cpu.nbytes)
+            #  cuda.memcpy_htod_async(self.input_gpu, self.input_cpu, self.stream)
              self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu))
-           
-            # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype)
-            # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes)
-            # self.input_device_bindings.append(self.input_device_memory)
         return self.input_info, self.output_info
     
     def engine_gpu_allocation(self, input_info):
-       
-
         for i in range(10):
             self.inputTensors = []
             for i in self.input_info:
@@ -319,51 +307,69 @@ def engine_gpu_allocation(self, input_info):
                 self.contiguous_inputs:List[ndarray]  = [
                   np.ascontiguousarray(i) for i in self.inputTensors
                 ]
-            self.get_logger().info(f"contigous shape:{self.contiguous_inputs}")
+           
             for i in range(self.num_inputs):
                 name = self.input_info[i].name
                 shape = self.input_info[i].shape
+                contigous_shape = self.contiguous_inputs[i].nbytes
+
                 self.get_logger().info(f"input names:{name} & input shape:{shape}")
-                # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i])
+               
+                # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i].nbytes)
             
                 # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream)           
                 self.bindings[i] = int(self.input_info[i].gpu)
+            
+            self.output_gpu_ptrs:List[int] = []
+            self.outputs_ndarray:List[ndarray] = []
             for i in range(self.num_outputs):
+                j = i + self.num_inputs  
                 name = self.output_info[i].name
                 shape = self.output_info[i].shape  
                 self.get_logger().info(f"output names: {name}& output SHAPES:{shape}") 
                 dtype = self.output_info[i].dtype
-                cpu = np.empty(shape, dtype=dtype)
+                self.outputs_ndarray.append(self.output_cpu)
+                self.output_gpu_ptrs.append(self.output_gpu)
         #         gpu = cuda.mem_alloc(cpu.nbytes)
         #         cuda.memcpy_htod_async(gpu,cpu, self.stream)
-        #         self.outputCpuTensor.append(cpu)
+                self.bindings[j] = int(self.output_info[i].gpu)
+        self.get_logger().info(f"Bindings Array:{self.bindings}")
+        self.get_logger().info(f"Outputs Array after append{self.output_gpu_ptrs}")
         
-        # self.execution_context.execute_async_v2()
+        #this is for the inferencing part
+        # self.execution_context.execute_async_v2(self.bindings, self.stream.handle)
         # self.stream.synchronize()
+
+        # for i, o in enumerate(self.output_gpu_ptrs):
+        #     cuda.memcpy_dtoh_async(self.outputs[i], o, self.stream)
+        
         
-    def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
+    def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []
         for msg in image_list:  
             # Need to convert to numpy array and then make it have 3 color channels 
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
-            resized_compressedImage = cv2.resize(compressedImage,(1920, 1088))
+            resized_compressedImage = cv2.resize(compressedImage,(640, 640))
             preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage)
             batched_list.append(preprocessedImage)
-
-        
-            # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")a
-           
-        
-        batch_tensor = torch.stack(batched_list)
-        self.get_logger().info(f"batch tensor shape: {batch_tensor.shape}")
-   
+        self.batch_tensor = torch.stack(batched_list)
+        self.batch_array = self.batch_tensor.cpu().numpy().astype(np.float32)
+        self.get_logger().info(f"batch array shape: {self.batch_array.shape}")
+        height, width  = self.input_info[0].shape[-2:]
+        self.get_logger().info(f"HEIGHT:{height}")
+        self.get_logger().info(f"Width:{width}")
+        cuda.memcpy_htod_async(self.input_info[0].gpu, self.batch_array, self.stream)
+        self.execution_context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.handle)
+        self.stream.synchronize()  # Wait for inference to complete
+       
         #Now need to pass it to yolov8 tensorRT model
         # predictions = self.model(batch_tensor)
        
         
-
+ 
+        
     #Converting to tensor for batching
     def batch_convert_to_tensor(self, cv_image):
         img  = cv_image.transpose(2,0,1)
diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt
index 4373e3c7..1bdf147b 100644
--- a/src/perception/camera_object_detection/requirements.txt
+++ b/src/perception/camera_object_detection/requirements.txt
@@ -8,4 +8,4 @@ numpy==1.24.2
 onnx
 onnxsim
 onnxruntime-gpu
-tensorrt
+tensorrt
\ No newline at end of file

From 2252b0eb17daee7f73ce54e084f31f46e916f0de Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Mon, 18 Nov 2024 19:48:47 +0000
Subject: [PATCH 17/37] Finished Inference function and allocated gpu

---
 modules/docker-compose.perception.yaml        |   2 +-
 .../yolov8_detection.py                       | 110 +++++++++---------
 .../camera_object_detection/requirements.txt  |   5 +-
 3 files changed, 61 insertions(+), 56 deletions(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index f56b2e6b..74ce35d3 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -33,7 +33,7 @@ services:
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt
       - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx
-       - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/yolov8m_dynamic.onnx
+      - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/ylov8m_dynamic.onnx
       - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine
       - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 5d9cb749..a06303f8 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -29,7 +29,7 @@
 from dataclasses import dataclass
 from numpy import ndarray
 # import pycuda.driver as cuda
-
+from cuda import cudart
 @dataclass
 class Tensor:
     name:str
@@ -62,7 +62,7 @@ def __init__(self):
         self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed")
         self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed")
 
-        self.declare_parameter("onnx_model_path", "/perception_models/yolov8m_dynamic.onnx")
+        self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx")
         self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("publish_detection_topic", "/detections")
@@ -153,13 +153,15 @@ def __init__(self):
 
         # CV bridge
         self.cv_bridge = CvBridge()
-        # self.stream = cuda.Stream()
+      
+        status, self.stream = cudart.cudaStreamCreate()
+        assert status.value == 0, "IS NOT ZERO"
+       
+        #self.build_engine()
         self.initialize_engine(self.tensorRT_model_path)
         self.input_info, self.output_info =  self.initialize_tensors()
-        self.engine_gpu_allocation(self.input_info)
-      
-    
-        
+        self.tensorRT_inferencing(self.input_info)
+  
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
@@ -224,7 +226,7 @@ def build_engine(self):
         with open(self.tensorRT_model_path, "wb") as f:
             f.write(self.engine_bytes)
         self.get_logger().info("FINISHED WRITING ")
-
+ 
     def initialize_engine(self, weight):
         self.weight = Path(weight) if isinstance(weight, str) else weight
         self.logger  = trt.Logger(trt.Logger.WARNING)
@@ -237,11 +239,9 @@ def initialize_engine(self, weight):
           
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.input_tensor_name = self.tensorRT_model.get_tensor_name(0)
-        self.execution_context.set_input_shape(self.input_tensor_name, [6, 3, 1080, 1920])
         self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name)
         self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
-        self.get_logger().info(f"Number of tensors: {self.num_io_tensors}")
         self.bindings = [0] * self.num_io_tensors
         self.num_inputs  = 0
         self.num_outputs = 0
@@ -268,20 +268,21 @@ def initialize_tensors(self):
         self.input_info = []
         self.output_info = []
         self.output_ptrs = []
-        self.input_device_bindings = []
-        self.output_device_bindings = []
         batch_size = 6
        
         for name in self.output_names:
             self.get_logger().info(f"NAMES of outputs:{name}")
             self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name))
+            #Batch size changes from -1 to 6 
             self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:]
-            self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
+            # self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
+
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
             self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype)
-            self.output_gpu = 0
-            # self.output_gpu = cuda.mem_alloc(self.output_cpu.nbytes)
-            # cyda.memcpy_htod_async(self.output_gpu, self.output_cpu, self.stream)
+            status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream)
+            assert status.value == 0
+            cudart.cudaMemcpyAsync(self.output_gpu, self.output_cpu.ctypes.data, self.output_cpu.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
             self.output_ptrs.append(self.output_gpu)
             self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu))
         
@@ -291,64 +292,63 @@ def initialize_tensors(self):
              self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:]
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.input_cpu  = np.empty(self.tensorRT_input_shape, self.dtype)
-             self.input_gpu = 1
-            #  self.input_gpu = cuda.mem_alloc(self.input_cpu.nbytes)
-            #  cuda.memcpy_htod_async(self.input_gpu, self.input_cpu, self.stream)
+             status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream)
+             assert status.value == 0, "DOES NOT MATCH"
+             cudart.cudaMemcpyAsync(
+                    self.input_gpu, self.input_cpu.ctypes.data, self.input_cpu.nbytes,
+                    cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
              self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu))
+             
         return self.input_info, self.output_info
     
-    def engine_gpu_allocation(self, input_info):
+    def tensorRT_inferencing(self, input_info):
         for i in range(10):
             self.inputTensors = []
             for i in self.input_info:
                 self.inputTensors.append(i.cpu)
-              
                 assert len(self.inputTensors) == self.num_inputs, "does not match"
                 self.contiguous_inputs:List[ndarray]  = [
                   np.ascontiguousarray(i) for i in self.inputTensors
                 ]
-           
             for i in range(self.num_inputs):
                 name = self.input_info[i].name
                 shape = self.input_info[i].shape
                 contigous_shape = self.contiguous_inputs[i].nbytes
+                cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data,
+                            self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
+                self.bindings[i] = self.input_info[i].gpu
 
-                self.get_logger().info(f"input names:{name} & input shape:{shape}")
-               
-                # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i].nbytes)
-            
-                # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream)           
-                self.bindings[i] = int(self.input_info[i].gpu)
-            
             self.output_gpu_ptrs:List[int] = []
             self.outputs_ndarray:List[ndarray] = []
             for i in range(self.num_outputs):
                 j = i + self.num_inputs  
-                name = self.output_info[i].name
-                shape = self.output_info[i].shape  
-                self.get_logger().info(f"output names: {name}& output SHAPES:{shape}") 
-                dtype = self.output_info[i].dtype
-                self.outputs_ndarray.append(self.output_cpu)
-                self.output_gpu_ptrs.append(self.output_gpu)
-        #         gpu = cuda.mem_alloc(cpu.nbytes)
-        #         cuda.memcpy_htod_async(gpu,cpu, self.stream)
-                self.bindings[j] = int(self.output_info[i].gpu)
-        self.get_logger().info(f"Bindings Array:{self.bindings}")
-        self.get_logger().info(f"Outputs Array after append{self.output_gpu_ptrs}")
-        
-        #this is for the inferencing part
-        # self.execution_context.execute_async_v2(self.bindings, self.stream.handle)
-        # self.stream.synchronize()
+                cpu = self.output_info[i].cpu
+                gpu = self.output_info[i].gpu
+                self.outputs_ndarray.append(cpu)
+                self.output_gpu_ptrs.append(gpu)
+                self.bindings[j] = gpu
+            self.get_logger().info(f"Binding Gpu array:{self.bindings}")
+            for i in range(self.num_io_tensors):
+                # self.get_logger().info(f"Number of tensors: {self.num_io_tensors}")
+                # self.get_logger().info(f"TENSOR NAMES: {self.tensorRT_model.get_tensor_name(i)}")
+                self.get_logger().info(f"Binding Gpu array:{self.bindings[i]}")
+                self.execution_context.set_tensor_address(self.tensorRT_model.get_tensor_name(i), self.bindings[i])
+            
+            self.execution_context.execute_async_v3(self.stream)
+            cudart.cudaStreamSynchronize(self.stream)
+        #      for i, o in enumerate(output_gpu_ptrs):
+        #     cuda.memcpy_dtoh_async(outputs[i], o, self.stream)
+
+        # return tuple(outputs) if len(outputs) > 1 else outputs[0]
 
-        # for i, o in enumerate(self.output_gpu_ptrs):
-        #     cuda.memcpy_dtoh_async(self.outputs[i], o, self.stream)
-        
         
     def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
+        #Taking msgs from all 6 ros2 subscribers 
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []
+
+        #Preprocessing
         for msg in image_list:  
-            # Need to convert to numpy array and then make it have 3 color channels 
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
             resized_compressedImage = cv2.resize(compressedImage,(640, 640))
@@ -360,15 +360,17 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         height, width  = self.input_info[0].shape[-2:]
         self.get_logger().info(f"HEIGHT:{height}")
         self.get_logger().info(f"Width:{width}")
-        cuda.memcpy_htod_async(self.input_info[0].gpu, self.batch_array, self.stream)
-        self.execution_context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.handle)
-        self.stream.synchronize()  # Wait for inference to complete
-       
+
+        #We have to call inferencing function here 
+        
+
+        #Bounding Boxes & oost processing detections
+
+
         #Now need to pass it to yolov8 tensorRT model
         # predictions = self.model(batch_tensor)
        
-        
- 
+
         
     #Converting to tensor for batching
     def batch_convert_to_tensor(self, cv_image):
diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt
index 1bdf147b..877493f7 100644
--- a/src/perception/camera_object_detection/requirements.txt
+++ b/src/perception/camera_object_detection/requirements.txt
@@ -8,4 +8,7 @@ numpy==1.24.2
 onnx
 onnxsim
 onnxruntime-gpu
-tensorrt
\ No newline at end of file
+tensorrt==10.6.0
+tensorrt_lean
+tensorrt_dispatch
+cuda-python
\ No newline at end of file

From b27effbf5b27da4104e65a59c6749edb06a8c7eb Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Thu, 21 Nov 2024 20:55:22 +0000
Subject: [PATCH 18/37] Got Bounding boxes, socres, and predicted classes

---
 .../yolov8_detection.py                       | 133 +++++++++++-------
 1 file changed, 84 insertions(+), 49 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index a06303f8..bf99facd 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -160,7 +160,7 @@ def __init__(self):
         #self.build_engine()
         self.initialize_engine(self.tensorRT_model_path)
         self.input_info, self.output_info =  self.initialize_tensors()
-        self.tensorRT_inferencing(self.input_info)
+        # self.tensorRT_inferencing(self.input_info)
   
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
@@ -239,7 +239,10 @@ def initialize_engine(self, weight):
           
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.input_tensor_name = self.tensorRT_model.get_tensor_name(0)
+      
+        self.execution_context.set_input_shape(self.input_tensor_name, (6,3,640,640))
         self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name)
+        self.get_logger().info(f"tensor Shape:{self.inputShape}")
         self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.bindings = [0] * self.num_io_tensors
@@ -258,7 +261,8 @@ def initialize_engine(self, weight):
         self.num_inputs = len(self.input_names)
         self.num_outputs = len(self.output_names)    
         self.input_names = self.names[:self.num_inputs]
-        self.output_names = [name for name in self.output_names if name not in self.input_names]
+        #self.output_names = self.names[:self.num_outputs] #This includes the input name in output
+        self.output_names = [name for name in self.output_names if name not in self.input_names] # This line removes it
         self.get_logger().info(f"INPUT NAMES:: {self.input_names}")
         self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}")
        
@@ -272,11 +276,9 @@ def initialize_tensors(self):
        
         for name in self.output_names:
             self.get_logger().info(f"NAMES of outputs:{name}")
-            self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name))
-            #Batch size changes from -1 to 6 
-            self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:]
-            # self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
-
+          
+            self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name)
+            self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
             self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype)
             status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream)
@@ -301,72 +303,105 @@ def initialize_tensors(self):
              
         return self.input_info, self.output_info
     
-    def tensorRT_inferencing(self, input_info):
-        for i in range(10):
-            self.inputTensors = []
-            for i in self.input_info:
-                self.inputTensors.append(i.cpu)
-                assert len(self.inputTensors) == self.num_inputs, "does not match"
-                self.contiguous_inputs:List[ndarray]  = [
-                  np.ascontiguousarray(i) for i in self.inputTensors
-                ]
-            for i in range(self.num_inputs):
+    def tensorRT_inferencing(self, batch_array):
+        assert batch_array.ndim == 4
+        batch_size = batch_array.shape[0]
+        assert batch_size == self.input_info[0].shape[0]
+        #self.get_logger().info(f"Batch Array Stats: shape={batch_array.shape}, dtype={batch_array.dtype}, range=({batch_array.min()}, {batch_array.max()})")
+        #self.inputTensors = [batch_array]
+        self.contiguous_inputs = [np.ascontiguousarray(i) for i in batch_array]
+        #self.get_logger().info(f"Contiguous Inputs: dtype={self.contiguous_inputs[0].dtype}, shape={self.contiguous_inputs[0].shape}")
+        #for i, input_tensor in enumerate(self.contiguous_inputs):
+            #self.get_logger().info(f"Input {i}: shape={input_tensor.shape}, dtype={input_tensor.dtype}, range=({input_tensor.min()}, {input_tensor.max()})")
+        #for i, input_gpu in enumerate(self.input_info):
+                #self.get_logger().info(f"Input {i} GPU address: {input_gpu.gpu}")
+
+        for i in range(self.num_inputs):
                 name = self.input_info[i].name
-                shape = self.input_info[i].shape
-                contigous_shape = self.contiguous_inputs[i].nbytes
                 cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data,
                             self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
-                self.bindings[i] = self.input_info[i].gpu
+                self.execution_context.set_tensor_address(name, self.input_info[i].gpu)
 
-            self.output_gpu_ptrs:List[int] = []
-            self.outputs_ndarray:List[ndarray] = []
-            for i in range(self.num_outputs):
+        self.output_gpu_ptrs:List[int] = []
+        self.outputs_ndarray:List[ndarray] = []
+        for i in range(self.num_outputs):
                 j = i + self.num_inputs  
                 cpu = self.output_info[i].cpu
                 gpu = self.output_info[i].gpu
                 self.outputs_ndarray.append(cpu)
                 self.output_gpu_ptrs.append(gpu)
-                self.bindings[j] = gpu
-            self.get_logger().info(f"Binding Gpu array:{self.bindings}")
-            for i in range(self.num_io_tensors):
-                # self.get_logger().info(f"Number of tensors: {self.num_io_tensors}")
-                # self.get_logger().info(f"TENSOR NAMES: {self.tensorRT_model.get_tensor_name(i)}")
-                self.get_logger().info(f"Binding Gpu array:{self.bindings[i]}")
-                self.execution_context.set_tensor_address(self.tensorRT_model.get_tensor_name(i), self.bindings[i])
-            
-            self.execution_context.execute_async_v3(self.stream)
-            cudart.cudaStreamSynchronize(self.stream)
-        #      for i, o in enumerate(output_gpu_ptrs):
-        #     cuda.memcpy_dtoh_async(outputs[i], o, self.stream)
-
-        # return tuple(outputs) if len(outputs) > 1 else outputs[0]
+                self.execution_context.set_tensor_address(self.output_info[i].name, self.output_info[i].gpu)
 
         
+        self.execution_context.execute_async_v3(self.stream)
+        cudart.cudaStreamSynchronize(self.stream)
+        for i, o in enumerate(self.output_gpu_ptrs):
+             cudart.cudaMemcpyAsync(
+            self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes,
+            cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream
+                )
+        # for i, output in enumerate(self.outputs_ndarray):
+       
+        #for i, output in enumerate(self.outputs_ndarray):
+
+        return self.outputs_ndarray
     def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         #Taking msgs from all 6 ros2 subscribers 
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []
-
         #Preprocessing
         for msg in image_list:  
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
             resized_compressedImage = cv2.resize(compressedImage,(640, 640))
-            preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage)
-            batched_list.append(preprocessedImage)
-        self.batch_tensor = torch.stack(batched_list)
-        self.batch_array = self.batch_tensor.cpu().numpy().astype(np.float32)
-        self.get_logger().info(f"batch array shape: {self.batch_array.shape}")
-        height, width  = self.input_info[0].shape[-2:]
-        self.get_logger().info(f"HEIGHT:{height}")
-        self.get_logger().info(f"Width:{width}")
-
+            rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB)
+            normalized_image = rgb_image / 255.0
+            chw_image = np.transpose(normalized_image, (2,0,1))
+            float_image = chw_image.astype(np.float32)
+            tensor_image = torch.from_numpy(float_image).to('cuda')
+            batched_list.append(tensor_image)
+        batched_list = [tensor.cpu().numpy() for tensor in batched_list]
+        batched_images = np.stack(batched_list, axis=0)
+    
         #We have to call inferencing function here 
-        
+        detections = self.tensorRT_inferencing(batched_images)
 
-        #Bounding Boxes & oost processing detections
+        #self.get_logger().info(f"detections[0] output:{detections[0][0]}")
+        # for i, output in enumerate(detections[0][0][:2]):
+        #     self.get_logger().info(f"Output {i}: shape={output.shape}, dtype={output.dtype}, range=({output.min()}, {output.max()})")
 
+        """ 
+            - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images 
+            - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components  for bounding boxes, classes and confidence scores
+            - Detection[0][0][0] represnets the first detection components ranging from 0 to 83 
+            - 8400 represents 8400 anchors 
 
+        """
+        batch_size = detections[0].shape[0]
+        for image_idx in range(batch_size):
+            image_detections = detections[0][image_idx]
+
+            for anchor_idx in range(8400):
+                x_min = image_detections[0][anchor_idx]
+                y_min = image_detections[1][anchor_idx]
+                x_max = image_detections[2][anchor_idx]
+                y_max = image_detections[3][anchor_idx]
+
+                confidence = image_detections[4][anchor_idx]
+                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,84)]
+                predicted_class = np.argmax(class_probs)
+                predicted_prob = class_probs[predicted_class]
+
+                if confidence > 0.5:
+                    self.get_logger().info(f"Bounding Box: ({x_min}, {y_min}, {x_max}, {y_max})")
+                    self.get_logger().info(f" Predicted Class: {predicted_class} (Probability: {predicted_prob})")
+                    self.get_logger().info(f"Confidence:{confidence}")
+        #self.get_logger().info(f"SHAPE:{detections[0].shape[0]}")
+
+        #self.get_logger().info(f"result detections shape:{results_detections[0].shape}")
+        #self.get_logger().info(f"result detections [0]:{results_detections[0]}")
+   
+                
         #Now need to pass it to yolov8 tensorRT model
         # predictions = self.model(batch_tensor)
        

From 485d59227f65015d801ebf7f8ab1e678a682a686 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 26 Nov 2024 17:07:43 +0000
Subject: [PATCH 19/37] Stored results, working on visualization

---
 modules/docker-compose.perception.yaml        |   2 +
 .../yolov8_detection.py                       | 126 +++++++++++++-----
 2 files changed, 94 insertions(+), 34 deletions(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index 74ce35d3..db2efd1a 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -38,6 +38,8 @@ services:
       - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt
+      - /mnt/wato-drive2/perception_models/tensorRT.onnx:/perception_models/tensorRT.onnx
+      - /mnt/wato-drive2/perception_models/tensorRT.engine:/perception_models/tensorRT.engine
       - /mnt/wato-drive2/perception_models/:/perception_models
     
   lidar_object_detection:
diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index bf99facd..a5795c3f 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -62,7 +62,7 @@ def __init__(self):
         self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed")
         self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed")
 
-        self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx")
+        self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx")
         self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("publish_detection_topic", "/detections")
@@ -290,8 +290,8 @@ def initialize_tensors(self):
         
         for i, name in enumerate(self.input_names):
             if self.tensorRT_model.get_tensor_name(i) == name: 
-             self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) 
-             self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:]
+             self.tensorRT_input_shape = tuple(self.inputShape) 
+             self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}")
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.input_cpu  = np.empty(self.tensorRT_input_shape, self.dtype)
              status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream)
@@ -340,20 +340,19 @@ def tensorRT_inferencing(self, batch_array):
             self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes,
             cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream
                 )
-        # for i, output in enumerate(self.outputs_ndarray):
-       
-        #for i, output in enumerate(self.outputs_ndarray):
-
+      
         return self.outputs_ndarray
     def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
-        #Taking msgs from all 6 ros2 subscribers 
+        #Taking msgs from all 6 ros2 subscribers
+        imageHeight = self.input_info[0].shape[-1]
+        imageWidth = self.input_info[0].shape[-2]
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []
         #Preprocessing
         for msg in image_list:  
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
-            resized_compressedImage = cv2.resize(compressedImage,(640, 640))
+            resized_compressedImage = cv2.resize(compressedImage,(imageWidth, imageHeight))
             rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB)
             normalized_image = rgb_image / 255.0
             chw_image = np.transpose(normalized_image, (2,0,1))
@@ -362,14 +361,7 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             batched_list.append(tensor_image)
         batched_list = [tensor.cpu().numpy() for tensor in batched_list]
         batched_images = np.stack(batched_list, axis=0)
-    
-        #We have to call inferencing function here 
         detections = self.tensorRT_inferencing(batched_images)
-
-        #self.get_logger().info(f"detections[0] output:{detections[0][0]}")
-        # for i, output in enumerate(detections[0][0][:2]):
-        #     self.get_logger().info(f"Output {i}: shape={output.shape}, dtype={output.dtype}, range=({output.min()}, {output.max()})")
-
         """ 
             - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images 
             - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components  for bounding boxes, classes and confidence scores
@@ -377,36 +369,102 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             - 8400 represents 8400 anchors 
 
         """
+        results_dict = {i: [] for i in range(6)}
         batch_size = detections[0].shape[0]
         for image_idx in range(batch_size):
+            #self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
             image_detections = detections[0][image_idx]
+        
+            if image_idx not in results_dict:
+                results_dict[image_idx] = []
 
             for anchor_idx in range(8400):
-                x_min = image_detections[0][anchor_idx]
-                y_min = image_detections[1][anchor_idx]
-                x_max = image_detections[2][anchor_idx]
-                y_max = image_detections[3][anchor_idx]
+                x_center = image_detections[0][anchor_idx]
+                y_center = image_detections[1][anchor_idx]
+                width = image_detections[2][anchor_idx]
+                height = image_detections[3][anchor_idx]
 
                 confidence = image_detections[4][anchor_idx]
-                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,84)]
+                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)]
                 predicted_class = np.argmax(class_probs)
                 predicted_prob = class_probs[predicted_class]
 
-                if confidence > 0.5:
-                    self.get_logger().info(f"Bounding Box: ({x_min}, {y_min}, {x_max}, {y_max})")
-                    self.get_logger().info(f" Predicted Class: {predicted_class} (Probability: {predicted_prob})")
-                    self.get_logger().info(f"Confidence:{confidence}")
-        #self.get_logger().info(f"SHAPE:{detections[0].shape[0]}")
-
-        #self.get_logger().info(f"result detections shape:{results_detections[0].shape}")
-        #self.get_logger().info(f"result detections [0]:{results_detections[0]}")
-   
+                if confidence > 0.4:
                 
-        #Now need to pass it to yolov8 tensorRT model
-        # predictions = self.model(batch_tensor)
-       
+                    #Convert from chw to tlbr  
+                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, imageHeight, imageWidth)
+                    results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
+            results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
+
+          # To store final results after NMS
+
+
+ 
+    def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth):
+        half_width = width /2
+        half_height = height /2 
+
+        x_min = x_center - half_width
+        y_min = y_center - half_height
+        x_max = x_center + half_width
+        y_max = y_center + half_height
+        return x_min, y_min, x_max, y_max     
+
+    def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ):
+        #Confidence threshold
+    bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold]
+    bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True)
+    bbox_list_new = []
+    while bboxes_sorted:
+        current_box = bboxes_sorted.pop(0)
+        bbox_list_new.append(current_box)
+
+        # Filter boxes with IoU below the threshold
+        bboxes_sorted = [
+            box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold
+        ]
+
+    self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}")
+    return bbox_list_new
+
+    def get_iou(self, box1, box2):
+        x1, y1, x2, y2 = box1
+        x3, y3, x4, y4 = box2
+
+    # Intersection coordinates
+        x_inter1 = max(x1, x3)
+        y_inter1 = max(y1, y3)
+        x_inter2 = min(x2, x4)
+        y_inter2 = min(y2, y4)
+
+        # Width and height of the intersection
+        width_inter = abs(x_inter2 - x_inter1)
+        height_inter = abs(y_inter2 - y_inter1)
+
+        # If there is no intersection
+        if x_inter2 < x_inter1 or y_inter2 < y_inter1:
+            return 0.0
+
+        # Area of intersection
+        area_inter = width_inter * height_inter
+
+        # Areas of the two boxes
+        width_box1 = abs(x2 - x1)
+        height_box1 = abs(y2 - y1)
+        area_box1 = width_box1 * height_box1
+
+        width_box2 = abs(x4 - x3)
+        height_box2 = abs(y4 - y3)
+        area_box2 = width_box2 * height_box2
+
+        # Union area
+        area_union = area_box1 + area_box2 - area_inter
+
+        # Intersection over Union
+        iou = area_inter / area_union
+
+        return iou
 
-        
     #Converting to tensor for batching
     def batch_convert_to_tensor(self, cv_image):
         img  = cv_image.transpose(2,0,1)

From 43434350d15a76cd943a633a1728903e06d2f3eb Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 26 Nov 2024 18:01:19 +0000
Subject: [PATCH 20/37] Refactored code

---
 .../yolov8_detection.py                       | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index a5795c3f..e0ae4eaa 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -65,6 +65,8 @@ def __init__(self):
         self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx")
         self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
+        self.declare_parameter("batch_publish_vis_topic", "/batch_annotated_img")
+        self.declare_parameter("batch_publish_detection_topic", "/batch_detections")
         self.declare_parameter("publish_detection_topic", "/detections")
         self.declare_parameter("model_path", "/perception_models/yolov8m.pt")
        
@@ -80,6 +82,8 @@ def __init__(self):
         
        #Batch inference topic 
         self.batch_inference_topic = self.get_parameter("batch_inference_topic").value
+        self.batch_publish_detection_topic = self.get_parameter("batch_publish_detection_topic").value
+        self.batch_publish_vis_topic = self.get_parameter("batch_publish_vis_topic").value
         self.onnx_model_path = self.get_parameter("onnx_model_path").value
         
         self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value
@@ -168,6 +172,9 @@ def __init__(self):
 
         # setup vis publishers
         self.batched_camera_message_publisher  = self.create_publisher(Image,self.batch_inference_topic, 10)
+        self.batch_detection_publisher = self.create_publisher(Detection2DArray, batch_publish_detection_topic, 10)
+        self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_vis_publisher, 10)
+
         self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10)
         self.detection_publisher = self.create_publisher(
             Detection2DArray, self.publish_detection_topic, 10
@@ -412,20 +419,20 @@ def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageW
 
     def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ):
         #Confidence threshold
-    bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold]
-    bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True)
-    bbox_list_new = []
-    while bboxes_sorted:
-        current_box = bboxes_sorted.pop(0)
-        bbox_list_new.append(current_box)
+        bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold]
+        bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True)
+        bbox_list_new = []
+        while bboxes_sorted:
+            current_box = bboxes_sorted.pop(0)
+            bbox_list_new.append(current_box)
 
         # Filter boxes with IoU below the threshold
-        bboxes_sorted = [
-            box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold
-        ]
+            bboxes_sorted = [
+                box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold
+                ]
 
-    self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}")
-    return bbox_list_new
+        self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}")
+        return bbox_list_new
 
     def get_iou(self, box1, box2):
         x1, y1, x2, y2 = box1

From c3551fcfb620658d050899a40493cbb6dfa299a7 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 26 Nov 2024 18:08:26 +0000
Subject: [PATCH 21/37] Refactored publishers

---
 .../camera_object_detection/yolov8_detection.py               | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index e0ae4eaa..7f242caa 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -172,8 +172,8 @@ def __init__(self):
 
         # setup vis publishers
         self.batched_camera_message_publisher  = self.create_publisher(Image,self.batch_inference_topic, 10)
-        self.batch_detection_publisher = self.create_publisher(Detection2DArray, batch_publish_detection_topic, 10)
-        self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_vis_publisher, 10)
+        self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10)
+        self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10)
 
         self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10)
         self.detection_publisher = self.create_publisher(

From e1c2e471df33943ae73aadd35e50f974a8816268 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 27 Nov 2024 20:12:15 +0000
Subject: [PATCH 22/37] Added eve configuration

---
 modules/docker-compose.perception.yaml        |  3 +-
 .../yolov8_detection.py                       | 68 +++++++++++++++----
 2 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index db2efd1a..1d03eb54 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -40,8 +40,9 @@ services:
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt
       - /mnt/wato-drive2/perception_models/tensorRT.onnx:/perception_models/tensorRT.onnx
       - /mnt/wato-drive2/perception_models/tensorRT.engine:/perception_models/tensorRT.engine
+      - /mnt/wato-drive2/perception_models/eve.onnx:/perception_models/eve.onnx
       - /mnt/wato-drive2/perception_models/:/perception_models
-    
+      
   lidar_object_detection:
     build:
       context: ..
diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 7f242caa..ed297ce1 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -62,8 +62,8 @@ def __init__(self):
         self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed")
         self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed")
 
-        self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx")
-        self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine")
+        self.declare_parameter("onnx_model_path", "/perception_models/eve.onnx") #tensorRT. extensions
+        self.declare_parameter("tensorRT_model_path", "/perception_models/eve.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("batch_publish_vis_topic", "/batch_annotated_img")
         self.declare_parameter("batch_publish_detection_topic", "/batch_detections")
@@ -75,7 +75,7 @@ def __init__(self):
         self.declare_parameter("crop_mode", "LetterBox")
         self.declare_parameter("save_detections", False)
         
-        #Declare batch topic 
+        #Declare batch topic  
         self.declare_parameter("batch_inference_topic", "/batched_camera_message")
        
        
@@ -143,6 +143,14 @@ def __init__(self):
         self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1)
         self.ats.registerCallback(self.batch_inference_callback)
 
+        #Subscription for Eve cameras
+        self.right_camera_subscription = Subscriber(self, Image, self.camera_topic)
+        self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic)
+        self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic)
+        self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1)
+        
+        #Create this callback function tomorrow
+        self.eve_ats.registerCallback(self.eve_batch_inference_callback) 
         self.get_logger().info(f"TENSORT VERSION:{trt.__version__}")
 
         self.orig_image_width = None
@@ -164,17 +172,17 @@ def __init__(self):
         #self.build_engine()
         self.initialize_engine(self.tensorRT_model_path)
         self.input_info, self.output_info =  self.initialize_tensors()
-        # self.tensorRT_inferencing(self.input_info)
-  
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
 
         # setup vis publishers
+        #Batch vis publishers
         self.batched_camera_message_publisher  = self.create_publisher(Image,self.batch_inference_topic, 10)
         self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10)
         self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10)
 
+        #vis publishers
         self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10)
         self.detection_publisher = self.create_publisher(
             Detection2DArray, self.publish_detection_topic, 10
@@ -186,10 +194,10 @@ def __init__(self):
     def build_engine(self):
     #Only calling this function when we dont have an engine file
     #Reading the onnx file in the perception models directory
-        self.max_batch_size = 6
+        self.max_batch_size = 3
         self.channels = 3
-        self.height = 640
-        self.width = 640
+        self.height = 1024
+        self.width = 1024
         self.shape_input_model = [self.max_batch_size, self.channels, self.height, self.width]
         self.logger = trt.Logger(trt.Logger.VERBOSE)
         self.builder = trt.Builder(self.logger)
@@ -247,7 +255,7 @@ def initialize_engine(self, weight):
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.input_tensor_name = self.tensorRT_model.get_tensor_name(0)
       
-        self.execution_context.set_input_shape(self.input_tensor_name, (6,3,640,640))
+        self.execution_context.set_input_shape(self.input_tensor_name, (3,3,1024,1024))
         self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name)
         self.get_logger().info(f"tensor Shape:{self.inputShape}")
         self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
@@ -349,6 +357,24 @@ def tensorRT_inferencing(self, batch_array):
                 )
       
         return self.outputs_ndarray
+    def eve_batch_inference_callback(self, msg1,msg2,msg3):
+        imageHeight = self.input_info[0].shape[-1]
+        imageWidth = self.input_info[0].shape[-2]
+        imageList = [msg1,msg2,msg3]
+        eve_batched_list = []
+        for msg in imageList:
+            numpy_array = np.frombuffer(msg.data, np.uint8)
+            compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
+            resized_compressedImage  = cv2.resize(compressedImage, (imageWidth, imageHeight))
+            rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB)
+            normalized_image = rgb_image / 255.0
+            chw_image = np.transpose(normalized_image, (2,0,1))
+            float_image = chw_image.astype(np.float32)
+            tensor_image = torch.from_numpy(float_image).to('cuda')
+            batched_list.append(tensor_image)
+        batched_list = [tensor.cpu().numpy() for tensor in batched_list]
+        batched_images = np.stack(batched_list, axis=0)
+
     def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         #Taking msgs from all 6 ros2 subscribers
         imageHeight = self.input_info[0].shape[-1]
@@ -405,8 +431,6 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
 
           # To store final results after NMS
 
-
- 
     def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth):
         half_width = width /2
         half_height = height /2 
@@ -472,6 +496,26 @@ def get_iou(self, box1, box2):
 
         return iou
 
+    def publish_bounding_boxes(self, results_dict):
+        detection2darray = Detection2DArray()
+        for image_idx, bboxes in results_dict.items():
+            detection2d = Detection2D()
+            x_min, y_min, x_max, y_max, confidence, class_id = bbox
+            center_x = (x_min + x_max) / 2
+            center_y = (y_min + y_max) / 2
+            size_x = x_max - x_min
+            size_y = y_max - y_min
+            detection2d.bbox.center.position.x = center_x
+            detection2d.bbox.center.position.y = center_y
+            detection2d.bbox.size_x = size_x
+            detection2d.bbox.size_y = size_y
+            detected_object = ObjectHypothesisWithPose()
+            detected_object.hypothesis.class_id = class_id
+            detected_object.hypothesis.score = confidence
+            detection2darray.detections.append(detection2d)
+
+
+    
     #Converting to tensor for batching
     def batch_convert_to_tensor(self, cv_image):
         img  = cv_image.transpose(2,0,1)
@@ -583,7 +627,7 @@ def publish_vis(self, annotated_img, msg, feed):
         imgmsg.header.stamp = msg.header.stamp
         imgmsg.header.frame_id = msg.header.frame_id
         self.vis_publisher.publish(imgmsg)
-
+ 
     def publish_detections(self, detections, msg, feed):
         # Publish detections to an detectionList message
         detection2darray = Detection2DArray()

From 976ffb143d1d01edf3f199239242cd9dc279f2ce Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 27 Nov 2024 21:44:22 +0000
Subject: [PATCH 23/37] Eve configuiration completed for batch inf

---
 .../yolov8_detection.py                       | 90 +++++++++++++------
 1 file changed, 61 insertions(+), 29 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index ed297ce1..c19c1f0d 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -62,8 +62,9 @@ def __init__(self):
         self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed")
         self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed")
 
-        self.declare_parameter("onnx_model_path", "/perception_models/eve.onnx") #tensorRT. extensions
-        self.declare_parameter("tensorRT_model_path", "/perception_models/eve.engine")
+        self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx") #tensorRT. extensions
+        self.declare_parameter("tensorRT_model_path", "/perception_models/tensorRT.engine")
+        self.declare_parameter("eve_tensorRT_model_path", "/perception_models/eve.engine")
         self.declare_parameter("publish_vis_topic", "/annotated_img")
         self.declare_parameter("batch_publish_vis_topic", "/batch_annotated_img")
         self.declare_parameter("batch_publish_detection_topic", "/batch_detections")
@@ -87,7 +88,7 @@ def __init__(self):
         self.onnx_model_path = self.get_parameter("onnx_model_path").value
         
         self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value
-
+        self.eve_tensorRT_model_path = self.get_parameter("eve_tensorRT_model_path").value
         #Camera topics for eve 
         self.camera_topic = self.get_parameter("camera_topic").value
         self.left_camera_topic = self.get_parameter("left_camera_topic").value
@@ -170,8 +171,8 @@ def __init__(self):
         assert status.value == 0, "IS NOT ZERO"
        
         #self.build_engine()
-        self.initialize_engine(self.tensorRT_model_path)
-        self.input_info, self.output_info =  self.initialize_tensors()
+        # self.initialize_engine(self.tensorRT_model_path, 3,3,1024,1024)
+        # self.input_info, self.output_info =  self.initialize_tensors()
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
@@ -242,7 +243,7 @@ def build_engine(self):
             f.write(self.engine_bytes)
         self.get_logger().info("FINISHED WRITING ")
  
-    def initialize_engine(self, weight):
+    def initialize_engine(self, weight, batch_size, rgb, width, height):
         self.weight = Path(weight) if isinstance(weight, str) else weight
         self.logger  = trt.Logger(trt.Logger.WARNING)
         trt.init_libnvinfer_plugins(self.logger, namespace ='')
@@ -255,9 +256,9 @@ def initialize_engine(self, weight):
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.input_tensor_name = self.tensorRT_model.get_tensor_name(0)
       
-        self.execution_context.set_input_shape(self.input_tensor_name, (3,3,1024,1024))
+        self.execution_context.set_input_shape(self.input_tensor_name, (batch_size, rgb, width, height))
         self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name)
-        self.get_logger().info(f"tensor Shape:{self.inputShape}")
+        #self.get_logger().info(f"tensor Shape:{self.inputShape}")
         self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.bindings = [0] * self.num_io_tensors
@@ -278,8 +279,8 @@ def initialize_engine(self, weight):
         self.input_names = self.names[:self.num_inputs]
         #self.output_names = self.names[:self.num_outputs] #This includes the input name in output
         self.output_names = [name for name in self.output_names if name not in self.input_names] # This line removes it
-        self.get_logger().info(f"INPUT NAMES:: {self.input_names}")
-        self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}")
+        #self.get_logger().info(f"INPUT NAMES:: {self.input_names}")
+        #self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}")
        
     
     def initialize_tensors(self):
@@ -287,13 +288,13 @@ def initialize_tensors(self):
         self.input_info = []
         self.output_info = []
         self.output_ptrs = []
-        batch_size = 6
+      
        
         for name in self.output_names:
-            self.get_logger().info(f"NAMES of outputs:{name}")
+            #self.get_logger().info(f"NAMES of outputs:{name}")
           
             self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name)
-            self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
+            #self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
             self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype)
             status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream)
@@ -306,7 +307,7 @@ def initialize_tensors(self):
         for i, name in enumerate(self.input_names):
             if self.tensorRT_model.get_tensor_name(i) == name: 
              self.tensorRT_input_shape = tuple(self.inputShape) 
-             self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}")
+             #self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}")
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.input_cpu  = np.empty(self.tensorRT_input_shape, self.dtype)
              status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream)
@@ -357,15 +358,15 @@ def tensorRT_inferencing(self, batch_array):
                 )
       
         return self.outputs_ndarray
+
+    # will be called with eve rosbag 
     def eve_batch_inference_callback(self, msg1,msg2,msg3):
-        imageHeight = self.input_info[0].shape[-1]
-        imageWidth = self.input_info[0].shape[-2]
         imageList = [msg1,msg2,msg3]
         eve_batched_list = []
         for msg in imageList:
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
-            resized_compressedImage  = cv2.resize(compressedImage, (imageWidth, imageHeight))
+            resized_compressedImage  = cv2.resize(compressedImage, (1024, 1024))
             rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB)
             normalized_image = rgb_image / 255.0
             chw_image = np.transpose(normalized_image, (2,0,1))
@@ -374,26 +375,59 @@ def eve_batch_inference_callback(self, msg1,msg2,msg3):
             batched_list.append(tensor_image)
         batched_list = [tensor.cpu().numpy() for tensor in batched_list]
         batched_images = np.stack(batched_list, axis=0)
+        self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024)
+        self.input_info, self.output_info = self.initialize_tensors()
+        detections = self.tensorRT_inferencing(batched_images)
+        batch_size = detections[0].shape[0]
+        anchors = detections[0].shape[2]
+        results_dict = {i: [] for i in range(batch_size)}
+        for image_idx in range(batch_size):
+            # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
+            image_detections = detections[0][image_idx]
+        
+            if image_idx not in results_dict:
+                results_dict[image_idx] = []
+
+            for anchor_idx in range(anchors):
+                x_center = image_detections[0][anchor_idx]
+                y_center = image_detections[1][anchor_idx]
+                width = image_detections[2][anchor_idx]
+                height = image_detections[3][anchor_idx]
 
+                confidence = image_detections[4][anchor_idx]
+                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)]
+                predicted_class = np.argmax(class_probs)
+                predicted_prob = class_probs[predicted_class]
+
+                if confidence > 0.45:
+                
+                    #Convert from chw to tlbr  
+                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
+                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
+                    results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
+            results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
+
+
+    # will be called with nuscenes rosbag
     def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         #Taking msgs from all 6 ros2 subscribers
-        imageHeight = self.input_info[0].shape[-1]
-        imageWidth = self.input_info[0].shape[-2]
         image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
         batched_list = []
         #Preprocessing
         for msg in image_list:  
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
-            resized_compressedImage = cv2.resize(compressedImage,(imageWidth, imageHeight))
+            resized_compressedImage = cv2.resize(compressedImage,(640, 640))
             rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB)
             normalized_image = rgb_image / 255.0
             chw_image = np.transpose(normalized_image, (2,0,1))
             float_image = chw_image.astype(np.float32)
             tensor_image = torch.from_numpy(float_image).to('cuda')
             batched_list.append(tensor_image)
-        batched_list = [tensor.cpu().numpy() for tensor in batched_list]
+        batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
         batched_images = np.stack(batched_list, axis=0)
+        self.initialize_engine(self.tensorRT_model_path, 6,3,640,640)
+        self.input_info, self.output_info =  self.initialize_tensors()
         detections = self.tensorRT_inferencing(batched_images)
         """ 
             - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images 
@@ -402,10 +436,10 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             - 8400 represents 8400 anchors 
 
         """
-        results_dict = {i: [] for i in range(6)}
         batch_size = detections[0].shape[0]
+        results_dict = {i: [] for i in range(batch_size)}
         for image_idx in range(batch_size):
-            #self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
+            # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
             image_detections = detections[0][image_idx]
         
             if image_idx not in results_dict:
@@ -422,13 +456,14 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
                 predicted_class = np.argmax(class_probs)
                 predicted_prob = class_probs[predicted_class]
 
-                if confidence > 0.4:
+                if confidence > 0.45:
                 
                     #Convert from chw to tlbr  
-                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, imageHeight, imageWidth)
+                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
+                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
                     results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
             results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
-
+            #self.get_logger().info(f"resultsdict: {results_dict}")
           # To store final results after NMS
 
     def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth):
@@ -449,13 +484,10 @@ def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ):
         while bboxes_sorted:
             current_box = bboxes_sorted.pop(0)
             bbox_list_new.append(current_box)
-
         # Filter boxes with IoU below the threshold
             bboxes_sorted = [
                 box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold
                 ]
-
-        self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}")
         return bbox_list_new
 
     def get_iou(self, box1, box2):

From 9624d8d64db2e8f2451a739c5964a5a8ce35fdf6 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Thu, 28 Nov 2024 21:59:42 +0000
Subject: [PATCH 24/37] Created publishers and msgs

---
 .../yolov8_detection.py                       |  7 +++----
 .../CMakeLists.txt                            | 21 +++++++++++++++++++
 .../msg/BatchDetection.msg                    |  3 +++
 .../camera_object_detection_msgs/package.xml  | 20 ++++++++++++++++++
 4 files changed, 47 insertions(+), 4 deletions(-)
 create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
 create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg
 create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index c19c1f0d..1530bf38 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -249,7 +249,7 @@ def initialize_engine(self, weight, batch_size, rgb, width, height):
         trt.init_libnvinfer_plugins(self.logger, namespace ='')
         with trt.Runtime(self.logger) as runtime:
             self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
-            self.get_logger().info(f"TENSORRT Model:{self.weight}")
+            #self.get_logger().info(f"TENSORRT Model:{self.weight}")
         self.execution_context = self.tensorRT_model.create_execution_context()  
         
           
@@ -460,16 +460,15 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
                 
                     #Convert from chw to tlbr  
                     x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
-                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
+                    #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
                     results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
             results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
-            #self.get_logger().info(f"resultsdict: {results_dict}")
+            
           # To store final results after NMS
 
     def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth):
         half_width = width /2
         half_height = height /2 
-
         x_min = x_center - half_width
         y_min = y_center - half_height
         x_max = x_center + half_width
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
new file mode 100644
index 00000000..d4d41677
--- /dev/null
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.8)
+project(camera_objection_detection_msgs)
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+#find dependencies
+find_package(ament_cmake REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(sensor_msgs REQUIRED)
+find_package(vision_msgs REQUIRED)
+
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+  "msg/BatchDetection.msg"
+  DEPENDENCIES std_msgs sensor_msgs vision_msgs
+)
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
\ No newline at end of file
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg
new file mode 100644
index 00000000..e8ecdf73
--- /dev/null
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg
@@ -0,0 +1,3 @@
+std_msgs/Header header
+sensor_msgs/CompressedImage[] images  # Array of compressed images in the batch
+vision_msgs/Detection2DArray[] detections  # Array of detections for each image
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
new file mode 100644
index 00000000..4e218806
--- /dev/null
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>camera_object_detction_msgs</name>
+  <version>0.0.0</version>
+   <description>TODO: Package description</description>
+   <maintainer email="a9rahma@uwaterloo.ca"> a95rahma</maintainer>
+   <license>TODO: License declaration</license>
+   <buildtool_depend>ament_cmake</buildtool_depend>
+    <depend>std_msgs</depend>
+    <depend>sensor_msgs</depend>
+    <depend>vision_msgs</depend>
+
+    <buildtool_depend>rosidl_default_generators</buildtool_depend>
+    <exec_depend>rosidl_default_runtime</exec_depend>
+    <member_of_group>rosidl_interface_packages</member_of_group>
+     <export>
+    <build_type>ament_cmake</build_type>
+    </export>
+    </package>
\ No newline at end of file

From d24695acaf9b481de913d834f5d0769413f45080 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Fri, 29 Nov 2024 22:28:18 +0000
Subject: [PATCH 25/37] Added msg file configs

---
 .../camera_object_detection.Dockerfile        |  1 +
 .../yolov8_detection.py                       | 64 +++++++++++++------
 .../camera_object_detection/package.xml       |  3 +-
 .../CMakeLists.txt                            | 14 ++--
 .../camera_object_detection_msgs/package.xml  |  3 +-
 5 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile
index 5c3ee385..242c2d40 100644
--- a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile
+++ b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile
@@ -7,6 +7,7 @@ WORKDIR ${AMENT_WS}/src
 
 # Copy in source code 
 COPY src/perception/camera_object_detection camera_object_detection
+COPY src/wato_msgs/perception_msgs/camera_object_detection_msgs camera_object_detection_msgs
 
 # Scan for rosdeps
 RUN apt-get -qq update && rosdep update && \
diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 1530bf38..54948b79 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -2,12 +2,15 @@
 from rclpy.node import Node
 import os
 from message_filters import Subscriber, TimeSynchronizer, ApproximateTimeSynchronizer
+from camera_object_detection_msgs.msg import BatchDetection 
 from sensor_msgs.msg import Image, CompressedImage
 from vision_msgs.msg import (
     ObjectHypothesisWithPose,
     Detection2D,
     Detection2DArray,
 )
+
+from std_msgs.msg import Header
 from pathlib import Path
 from ultralytics import YOLO
 from ultralytics.nn.autobackend import AutoBackend
@@ -179,10 +182,10 @@ def __init__(self):
 
         # setup vis publishers
         #Batch vis publishers
-        self.batched_camera_message_publisher  = self.create_publisher(Image,self.batch_inference_topic, 10)
+        self.batched_camera_message_publisher  = self.create_publisher(BatchDetection,self.batch_inference_topic, 10)
         self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10)
         self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10)
-
+    
         #vis publishers
         self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10)
         self.detection_publisher = self.create_publisher(
@@ -460,7 +463,7 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
                 
                     #Convert from chw to tlbr  
                     x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
-                    #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
+                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
                     results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
             results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
             
@@ -526,26 +529,47 @@ def get_iou(self, box1, box2):
         iou = area_inter / area_union
 
         return iou
+    def publish_batch(self, image_list, results_dict):
+        batch_msg = BatchDetection()
+        batch_msg.header = Header()
+        batch_msg.header.stamp = self.get_clock().now().to_msg()
+        batch_msg.header.frame_id = "camera_frame"
+        for image in image_list:
+            compressed_msg = CompressedImage()
+            compressed_msg.header.stamp = self.get_clock().now().to_msg()
+            compressed_msg.header.frame_id = "camera_frame"
+            compressed_msg.format = "jpeg"
+            compressed_image = cv2.imencode('.jpg', image)[1].tobytes()
+            compressed_msg.data = compressed_image
+            batch_msgs.images.append(compressed_msg)
+        for image_idx, bboxes in result_dict.items():
+            detection_array_msg = Detection2DArray()
+            for bbox in bboxes:
+                x_min, y_min, x_max, y_max, confidence, class_id = bbox
+                detection_msg = Detection2D()
+                
+                # Populate bounding box
+                detection_msg.bbox.center.x = (x_min + x_max) / 2
+                detection_msg.bbox.center.y = (y_min + y_max) / 2
+                detection_msg.bbox.size_x = x_max - x_min
+                detection_msg.bbox.size_y = y_max - y_min
 
-    def publish_bounding_boxes(self, results_dict):
-        detection2darray = Detection2DArray()
-        for image_idx, bboxes in results_dict.items():
-            detection2d = Detection2D()
-            x_min, y_min, x_max, y_max, confidence, class_id = bbox
-            center_x = (x_min + x_max) / 2
-            center_y = (y_min + y_max) / 2
-            size_x = x_max - x_min
-            size_y = y_max - y_min
-            detection2d.bbox.center.position.x = center_x
-            detection2d.bbox.center.position.y = center_y
-            detection2d.bbox.size_x = size_x
-            detection2d.bbox.size_y = size_y
-            detected_object = ObjectHypothesisWithPose()
-            detected_object.hypothesis.class_id = class_id
-            detected_object.hypothesis.score = confidence
-            detection2darray.detections.append(detection2d)
+                # Add class and confidence metadata
+                hypothesis = ObjectHypothesisWithPose()
+                hypothesis.id = int(class_id)
+                hypothesis.score = confidence
+                detection_msg.results.append(hypothesis)
+
+                detection_array_msg.detections.append(detection_msg)
+
+            batch_msg.detections.append(detection_array_msg)
+            self.batch_publisher.publish(batch_msg)
+            self.get_logger().info("Published batch message with images and detections.")
 
 
+        #we populate the compressedIMage array with the image list with raw images from nuscenes
+        #We populate the detection2d array with our results_dict 
+   
     
     #Converting to tensor for batching
     def batch_convert_to_tensor(self, cv_image):
diff --git a/src/perception/camera_object_detection/package.xml b/src/perception/camera_object_detection/package.xml
index a8b24405..d1dfe6b3 100755
--- a/src/perception/camera_object_detection/package.xml
+++ b/src/perception/camera_object_detection/package.xml
@@ -12,7 +12,8 @@
   <depend>sensor_msgs</depend>
   <depend>vision_msgs</depend>
   <depend>std_msgs</depend>
-
+  <depend>camera_object_detection_msgs</depend>
+  
   <test_depend>ament_copyright</test_depend>
   <test_depend>ament_flake8</test_depend>
   <test_depend>ament_pep257</test_depend>
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
index d4d41677..eacbdaee 100644
--- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
@@ -1,11 +1,15 @@
 cmake_minimum_required(VERSION 3.8)
-project(camera_objection_detection_msgs)
+project(camera_object_detection_msgs)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+endif()
 
 if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   add_compile_options(-Wall -Wextra -Wpedantic)
 endif()
 
-#find dependencies
+
 find_package(ament_cmake REQUIRED)
 find_package(rosidl_default_generators REQUIRED)
 find_package(std_msgs REQUIRED)
@@ -13,9 +17,9 @@ find_package(sensor_msgs REQUIRED)
 find_package(vision_msgs REQUIRED)
 
 
+
 rosidl_generate_interfaces(${PROJECT_NAME}
-  "msg/BatchDetection.msg"
-  DEPENDENCIES std_msgs sensor_msgs vision_msgs
-)
+    "msg/BatchDetection.msg"
+    DEPENDENCIES std_msgs sensor_msgs vision_msgs)
 ament_export_dependencies(rosidl_default_runtime)
 ament_package()
\ No newline at end of file
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
index 4e218806..72d71357 100644
--- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
@@ -1,11 +1,12 @@
 <?xml version="1.0"?>
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
-  <name>camera_object_detction_msgs</name>
+  <name>camera_object_detection_msgs</name>
   <version>0.0.0</version>
    <description>TODO: Package description</description>
    <maintainer email="a9rahma@uwaterloo.ca"> a95rahma</maintainer>
    <license>TODO: License declaration</license>
+   
    <buildtool_depend>ament_cmake</buildtool_depend>
     <depend>std_msgs</depend>
     <depend>sensor_msgs</depend>

From b2cd80cd802d7aec226f21804a61c98fa974378c Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 3 Dec 2024 17:39:12 +0000
Subject: [PATCH 26/37] Refactored publishing

---
 .../camera_object_detection.Dockerfile        |  2 +-
 .../camera_object_detection/package.xml       |  2 ++
 .../CMakeLists.txt                            |  8 ++---
 .../camera_object_detection_msgs/package.xml  | 32 +++++++++----------
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile
index 242c2d40..933bddb6 100644
--- a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile
+++ b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile
@@ -33,7 +33,7 @@ RUN rm requirements.txt
 # Install Rosdep requirements
 COPY --from=source /tmp/colcon_install_list /tmp/colcon_install_list
 RUN apt-get update && apt-fast install -qq -y --no-install-recommends $(cat /tmp/colcon_install_list)
-
+RUN apt install ros-$ROS_DISTRO-tf-transformations -y
 # Copy in source code from source stage
 WORKDIR ${AMENT_WS}
 COPY --from=source ${AMENT_WS}/src src
diff --git a/src/perception/camera_object_detection/package.xml b/src/perception/camera_object_detection/package.xml
index d1dfe6b3..fc078392 100755
--- a/src/perception/camera_object_detection/package.xml
+++ b/src/perception/camera_object_detection/package.xml
@@ -9,10 +9,12 @@
   <license>TODO: License declaration</license>
 
   <depend>cv_bridge</depend>
+  <depend>rclpy</depend>
   <depend>sensor_msgs</depend>
   <depend>vision_msgs</depend>
   <depend>std_msgs</depend>
   <depend>camera_object_detection_msgs</depend>
+  <exec_depend>python3-pytest</exec_depend>
   
   <test_depend>ament_copyright</test_depend>
   <test_depend>ament_flake8</test_depend>
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
index eacbdaee..21c77460 100644
--- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
@@ -1,10 +1,6 @@
 cmake_minimum_required(VERSION 3.8)
 project(camera_object_detection_msgs)
 
-if(NOT CMAKE_CXX_STANDARD)
-  set(CMAKE_CXX_STANDARD 17)
-endif()
-
 if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   add_compile_options(-Wall -Wextra -Wpedantic)
 endif()
@@ -20,6 +16,8 @@ find_package(vision_msgs REQUIRED)
 
 rosidl_generate_interfaces(${PROJECT_NAME}
     "msg/BatchDetection.msg"
-    DEPENDENCIES std_msgs sensor_msgs vision_msgs)
+    DEPENDENCIES std_msgs sensor_msgs vision_msgs
+)
+
 ament_export_dependencies(rosidl_default_runtime)
 ament_package()
\ No newline at end of file
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
index 72d71357..b4edda30 100644
--- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
@@ -1,21 +1,21 @@
 <?xml version="1.0"?>
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
-  <name>camera_object_detection_msgs</name>
-  <version>0.0.0</version>
-   <description>TODO: Package description</description>
-   <maintainer email="a9rahma@uwaterloo.ca"> a95rahma</maintainer>
-   <license>TODO: License declaration</license>
-   
-   <buildtool_depend>ament_cmake</buildtool_depend>
-    <depend>std_msgs</depend>
-    <depend>sensor_msgs</depend>
-    <depend>vision_msgs</depend>
+<name>camera_object_detection_msgs</name>
+<version>0.0.0</version>
+<description>TODO: Package description</description>
+<maintainer email="a95rahma@uwaterloo.ca">a95rahma</maintainer>
+<license>TODO: License declaration</license>
+<buildtool_depend>ament_cmake</buildtool_depend>
 
-    <buildtool_depend>rosidl_default_generators</buildtool_depend>
-    <exec_depend>rosidl_default_runtime</exec_depend>
-    <member_of_group>rosidl_interface_packages</member_of_group>
-     <export>
+<depend>std_msgs</depend>
+<depend>vision_msgs</depend>
+<depend>geometry_msgs</depend>
+
+<buildtool_depend>rosidl_default_generators</buildtool_depend>
+<exec_depend>rosidl_default_runtime</exec_depend>
+<member_of_group>rosidl_interface_packages</member_of_group>
+<export>
     <build_type>ament_cmake</build_type>
-    </export>
-    </package>
\ No newline at end of file
+</export>
+</package>
\ No newline at end of file

From 6de65300578170b4b462493ca56d39122b2cff5c Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Wed, 4 Dec 2024 20:06:11 +0000
Subject: [PATCH 27/37] Publishing detections on foxglove

---
 .../yolov8_detection.py                       | 93 +++++++++++--------
 .../camera_object_detection_msgs/package.xml  |  2 +-
 2 files changed, 55 insertions(+), 40 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 54948b79..a9736031 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -420,6 +420,8 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
         for msg in image_list:  
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
+            original_height, original_width = compressedImage.shape[:2]
+            self.get_logger().info(f"original Height:{original_height} original width: {original_width}")
             resized_compressedImage = cv2.resize(compressedImage,(640, 640))
             rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB)
             normalized_image = rgb_image / 255.0
@@ -463,10 +465,10 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
                 
                     #Convert from chw to tlbr  
                     x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
-                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
+                    #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
                     results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
             results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
-            
+        self.publish_batch(image_list, results_dict)
           # To store final results after NMS
 
     def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth):
@@ -529,49 +531,62 @@ def get_iou(self, box1, box2):
         iou = area_inter / area_union
 
         return iou
+    
+    
     def publish_batch(self, image_list, results_dict):
         batch_msg = BatchDetection()
-        batch_msg.header = Header()
         batch_msg.header.stamp = self.get_clock().now().to_msg()
-        batch_msg.header.frame_id = "camera_frame"
-        for image in image_list:
-            compressed_msg = CompressedImage()
-            compressed_msg.header.stamp = self.get_clock().now().to_msg()
-            compressed_msg.header.frame_id = "camera_frame"
-            compressed_msg.format = "jpeg"
-            compressed_image = cv2.imencode('.jpg', image)[1].tobytes()
-            compressed_msg.data = compressed_image
-            batch_msgs.images.append(compressed_msg)
-        for image_idx, bboxes in result_dict.items():
-            detection_array_msg = Detection2DArray()
-            for bbox in bboxes:
-                x_min, y_min, x_max, y_max, confidence, class_id = bbox
-                detection_msg = Detection2D()
+        batch_msg.header.frame_id = "batch"
+
+        for idx, img_msg in enumerate(image_list):
+            compressed_image = CompressedImage()
+            compressed_image.header = img_msg.header
+            compressed_image.format = "jpeg"
+            compressed_image.data = img_msg.data  # Original data
+            batch_msg.images.append(compressed_image)
+
+        for idx in range(len(image_list)):
+            detection_array = Detection2DArray()
+            detection_array.header.stamp = batch_msg.header.stamp
+            detection_array.header.frame_id = f"image_{idx}"
+            for bbox in results_dict[idx]:
+                detection = Detection2D()
+                x_min, y_min, x_max, y_max, confidence, predicted_class = bbox
+                detection.bbox.center.position.x = (x_min + x_max) / 2
+                detection.bbox.center.position.y = (y_min + y_max) / 2
+                detection.bbox.size_x = x_max - x_min
+                detection.bbox.size_y = y_max - y_min
+                detected_object = ObjectHypothesisWithPose()
                 
-                # Populate bounding box
-                detection_msg.bbox.center.x = (x_min + x_max) / 2
-                detection_msg.bbox.center.y = (y_min + y_max) / 2
-                detection_msg.bbox.size_x = x_max - x_min
-                detection_msg.bbox.size_y = y_max - y_min
-
-                # Add class and confidence metadata
-                hypothesis = ObjectHypothesisWithPose()
-                hypothesis.id = int(class_id)
-                hypothesis.score = confidence
-                detection_msg.results.append(hypothesis)
-
-                detection_array_msg.detections.append(detection_msg)
+                detected_object.hypothesis.class_id = str(int(predicted_class))
+                # self.get_logger().info(f"Score: {confidence} (type: {type(confidence)})")
+                detected_object.hypothesis.score = float(confidence)
+              
 
-            batch_msg.detections.append(detection_array_msg)
-            self.batch_publisher.publish(batch_msg)
-            self.get_logger().info("Published batch message with images and detections.")
-
-
-        #we populate the compressedIMage array with the image list with raw images from nuscenes
-        #We populate the detection2d array with our results_dict 
-   
-    
+                detection.results.append(detected_object)
+                detection_array.detections.append(detection)
+                
+            batch_msg.detections.append(detection_array)
+            self.batch_detection_publisher.publish(detection_array)
+            vis_image = self.draw_bounding_boxes(img_msg, results_dict[idx])
+            vis_compressed_image = CompressedImage()
+            vis_compressed_image.header.stamp = self.get_clock().now().to_msg()
+            vis_compressed_image.format = "jpeg"
+            vis_compressed_image.data = cv2.imencode('.jpg', vis_image)[1].tobytes()
+            self.batch_vis_publisher.publish(vis_compressed_image)
+        self.batched_camera_message_publisher.publish(batch_msg)
     #Converting to tensor for batching
+
+    def draw_bounding_boxes(self,img_msg, detections):
+        numpy_image = np.frombuffer(img_msg.data, np.uint8)
+        image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR)
+        for bbox in detections:
+            x_min, y_min, x_max, y_max, confidence, predicted_class = bbox
+            label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
+            #self.get_logger().info(f"{label}")
+            cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)
+            cv2.putText(image, label, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+        return image
     def batch_convert_to_tensor(self, cv_image):
         img  = cv_image.transpose(2,0,1)
         img = torch.from_numpy(img).to(self.device)
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
index b4edda30..62cae926 100644
--- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml
@@ -4,7 +4,7 @@
 <name>camera_object_detection_msgs</name>
 <version>0.0.0</version>
 <description>TODO: Package description</description>
-<maintainer email="a95rahma@uwaterloo.ca">a95rahma</maintainer>
+<maintainer email="arfanrahman464@gmail.com">Arfan12630</maintainer>
 <license>TODO: License declaration</license>
 <buildtool_depend>ament_cmake</buildtool_depend>
 

From a85d45f2ddb4f2974b1e1a928a1b09036cd1f72f Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Thu, 5 Dec 2024 18:27:54 +0000
Subject: [PATCH 28/37] Showing every image in foxglove topic

---
 .../yolov8_detection.py                       | 133 +++++++++++-------
 1 file changed, 86 insertions(+), 47 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index a9736031..ce5f08ab 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -144,7 +144,7 @@ def __init__(self):
         self.back_center_camera_subscription = Subscriber(self, CompressedImage, self.back_center_camera_topic)
         self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic)
         self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
-        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1)
+        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5)
         self.ats.registerCallback(self.batch_inference_callback)
 
         #Subscription for Eve cameras
@@ -421,15 +421,15 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
             original_height, original_width = compressedImage.shape[:2]
-            self.get_logger().info(f"original Height:{original_height} original width: {original_width}")
+            #self.get_logger().info(f"original Height:{original_height} original width: {original_width}")
             resized_compressedImage = cv2.resize(compressedImage,(640, 640))
             rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB)
             normalized_image = rgb_image / 255.0
             chw_image = np.transpose(normalized_image, (2,0,1))
             float_image = chw_image.astype(np.float32)
-            tensor_image = torch.from_numpy(float_image).to('cuda')
-            batched_list.append(tensor_image)
-        batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
+            # tensor_image = torch.from_numpy(float_image).to('cuda')
+            batched_list.append(float_image)
+        # batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
         batched_images = np.stack(batched_list, axis=0)
         self.initialize_engine(self.tensorRT_model_path, 6,3,640,640)
         self.input_info, self.output_info =  self.initialize_tensors()
@@ -441,36 +441,64 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             - 8400 represents 8400 anchors 
 
         """
+       
         batch_size = detections[0].shape[0]
         results_dict = {i: [] for i in range(batch_size)}
         for image_idx in range(batch_size):
-            # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
+           # Extract detections for the current image
             image_detections = detections[0][image_idx]
-        
-            if image_idx not in results_dict:
-                results_dict[image_idx] = []
 
-            for anchor_idx in range(8400):
-                x_center = image_detections[0][anchor_idx]
-                y_center = image_detections[1][anchor_idx]
-                width = image_detections[2][anchor_idx]
-                height = image_detections[3][anchor_idx]
-
-                confidence = image_detections[4][anchor_idx]
-                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)]
-                predicted_class = np.argmax(class_probs)
-                predicted_prob = class_probs[predicted_class]
+            # Extract individual components
+            x_centers = image_detections[0]
+            y_centers = image_detections[1]
+            widths = image_detections[2]
+            heights = image_detections[3]
+            confidences = image_detections[4]
+            class_probs = image_detections[5:18]  # Shape: (13, 8400)
+
+            # Filter by confidence threshold
+            valid_indices = confidences > 0.3
+            if not np.any(valid_indices):
+                continue  # Skip image if no detections pass confidence threshold
+
+            # Filter relevant detections
+            x_centers = x_centers[valid_indices]
+            y_centers = y_centers[valid_indices]
+            widths = widths[valid_indices]
+            heights = heights[valid_indices]
+            confidences = confidences[valid_indices]
+            class_probs = class_probs[:, valid_indices]
+
+            # Predict classes and probabilities
+            predicted_classes = np.argmax(class_probs, axis=0)
+            predicted_probs = class_probs[predicted_classes, range(class_probs.shape[1])]
+
+            # Convert boxes to (x_min, y_min, x_max, y_max)
+            x_mins = x_centers - widths / 2
+            y_mins = y_centers - heights / 2
+            x_maxs = x_centers + widths / 2
+            y_maxs = y_centers + heights / 2
+
+            # Combine detections into final format
+            filtered_detections = np.stack([x_mins, y_mins, x_maxs, y_maxs, confidences, predicted_classes], axis=-1)
+            #self.get_logger().info(f"Detections:{filtered_detections}")
+            
+            # Apply NMS
+            results_dict[image_idx] = self.nms(filtered_detections.tolist(), confidence_threshold=0.55, iou_threshold=0.5)
+
+#Log final results if needed
+        for image_idx, detections in results_dict.items():
+            #self.get_logger().info(f"Image {image_idx}: {len(detections)} detections")
+            for detection in detections:
+                x_min, y_min, x_max, y_max, confidence, predicted_class = detection
+                self.get_logger().info(
+                    f"  Bounding Box: [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
+                    f"Confidence: {confidence:.2f}, Class: {predicted_class}"
+                )
 
-                if confidence > 0.45:
-                
-                    #Convert from chw to tlbr  
-                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
-                    #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
-                    results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
-            results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
         self.publish_batch(image_list, results_dict)
           # To store final results after NMS
-
+   
     def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth):
         half_width = width /2
         half_height = height /2 
@@ -480,7 +508,7 @@ def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageW
         y_max = y_center + half_height
         return x_min, y_min, x_max, y_max     
 
-    def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ):
+    def nms(self,bboxes, confidence_threshold= 0.45,iou_threshold = 0.5 ):
         #Confidence threshold
         bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold]
         bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True)
@@ -539,42 +567,53 @@ def publish_batch(self, image_list, results_dict):
         batch_msg.header.frame_id = "batch"
 
         for idx, img_msg in enumerate(image_list):
-            compressed_image = CompressedImage()
-            compressed_image.header = img_msg.header
-            compressed_image.format = "jpeg"
-            compressed_image.data = img_msg.data  # Original data
-            batch_msg.images.append(compressed_image)
-
-        for idx in range(len(image_list)):
+            numpy_image = np.frombuffer(img_msg.data, np.uint8)
+            image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR)
+            height, width = image.shape[:2]
+            detection_array = Detection2DArray()
             detection_array = Detection2DArray()
             detection_array.header.stamp = batch_msg.header.stamp
-            detection_array.header.frame_id = f"image_{idx}"
+            detection_array.header.frame_id = f"camera_{idx}"
+            detections = results_dict[idx]
             for bbox in results_dict[idx]:
+                x_min, y_min, x_max, y_max , confidence, predicted_class = bbox
+                x_min = int(x_min * width / 640)
+                x_max = int(x_max * width / 640)
+                y_min = int(y_min * height / 640)
+                y_max = int(y_max * height / 640)
+
+                label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
+                cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
+                cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                 detection = Detection2D()
-                x_min, y_min, x_max, y_max, confidence, predicted_class = bbox
                 detection.bbox.center.position.x = (x_min + x_max) / 2
                 detection.bbox.center.position.y = (y_min + y_max) / 2
-                detection.bbox.size_x = x_max - x_min
-                detection.bbox.size_y = y_max - y_min
+                detection.bbox.size_x = float(x_max - x_min)
+                detection.bbox.size_y = float(y_max - y_min)
+
                 detected_object = ObjectHypothesisWithPose()
-                
                 detected_object.hypothesis.class_id = str(int(predicted_class))
-                # self.get_logger().info(f"Score: {confidence} (type: {type(confidence)})")
                 detected_object.hypothesis.score = float(confidence)
-              
-
                 detection.results.append(detected_object)
                 detection_array.detections.append(detection)
-                
+
+            # Add detection array to batch message
             batch_msg.detections.append(detection_array)
-            self.batch_detection_publisher.publish(detection_array)
-            vis_image = self.draw_bounding_boxes(img_msg, results_dict[idx])
+
+            # Publish visualized image
             vis_compressed_image = CompressedImage()
             vis_compressed_image.header.stamp = self.get_clock().now().to_msg()
+            vis_compressed_image.header.frame_id = f"camera_{idx}"
             vis_compressed_image.format = "jpeg"
-            vis_compressed_image.data = cv2.imencode('.jpg', vis_image)[1].tobytes()
+            vis_compressed_image.data = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes()
             self.batch_vis_publisher.publish(vis_compressed_image)
+
+        # Publish Detection2DArray
+            self.batch_detection_publisher.publish(detection_array)
+
+    # Publish batch detection message
         self.batched_camera_message_publisher.publish(batch_msg)
+                
     #Converting to tensor for batching
 
     def draw_bounding_boxes(self,img_msg, detections):

From c77e53f84d392f29a3ee3175ede97dcfdea2d506 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Fri, 6 Dec 2024 14:53:52 +0000
Subject: [PATCH 29/37] Getting better accuracy for detections

---
 .../camera_object_detection/yolov8_detection.py   | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index ce5f08ab..497e13bf 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -144,7 +144,8 @@ def __init__(self):
         self.back_center_camera_subscription = Subscriber(self, CompressedImage, self.back_center_camera_topic)
         self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic)
         self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
-        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5)
+        #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5)
+        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription,  self.front_left_camera_subscription], queue_size=10, slop=0.5)
         self.ats.registerCallback(self.batch_inference_callback)
 
         #Subscription for Eve cameras
@@ -176,6 +177,8 @@ def __init__(self):
         #self.build_engine()
         # self.initialize_engine(self.tensorRT_model_path, 3,3,1024,1024)
         # self.input_info, self.output_info =  self.initialize_tensors()
+        self.last_publish_time = self.get_clock().now()
+
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
@@ -412,9 +415,9 @@ def eve_batch_inference_callback(self, msg1,msg2,msg3):
 
 
     # will be called with nuscenes rosbag
-    def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
+    def batch_inference_callback(self,msg1,msg2,msg3):
         #Taking msgs from all 6 ros2 subscribers
-        image_list =  [msg1,msg2,msg3,msg4,msg5,msg6]
+        image_list =  [msg1,msg2,msg3]
         batched_list = []
         #Preprocessing
         for msg in image_list:  
@@ -431,7 +434,7 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6):
             batched_list.append(float_image)
         # batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
         batched_images = np.stack(batched_list, axis=0)
-        self.initialize_engine(self.tensorRT_model_path, 6,3,640,640)
+        self.initialize_engine(self.tensorRT_model_path, 3,3,640,640)
         self.input_info, self.output_info =  self.initialize_tensors()
         detections = self.tensorRT_inferencing(batched_images)
         """ 
@@ -562,6 +565,10 @@ def get_iou(self, box1, box2):
     
     
     def publish_batch(self, image_list, results_dict):
+        current_time = self.get_clock().now()
+        if (current_time - self.last_publish_time).nanoseconds < 1e9 / 2:  # Publish at 2 Hz (0.5 seconds)
+            return
+        self.last_publish_time = current_time
         batch_msg = BatchDetection()
         batch_msg.header.stamp = self.get_clock().now().to_msg()
         batch_msg.header.frame_id = "batch"

From e06c1aa89f2edc3081c58db2841bfddc2b4b63cf Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Sat, 7 Dec 2024 00:23:08 +0000
Subject: [PATCH 30/37] Refactored detection parsing

---
 .../yolov8_detection.py                       | 329 ++++++++----------
 1 file changed, 149 insertions(+), 180 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 497e13bf..fa04bc73 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -145,7 +145,8 @@ def __init__(self):
         self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic)
         self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
         #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5)
-        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription,  self.front_left_camera_subscription], queue_size=10, slop=0.5)
+        #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription,  self.front_left_camera_subscription], queue_size=10, slop=0.5)
+        self.ats = ApproximateTimeSynchronizer([self.front_right_camera_subscription], queue_size=10, slop=0.3)
         self.ats.registerCallback(self.batch_inference_callback)
 
         #Subscription for Eve cameras
@@ -321,8 +322,7 @@ def initialize_tensors(self):
              cudart.cudaMemcpyAsync(
                     self.input_gpu, self.input_cpu.ctypes.data, self.input_cpu.nbytes,
                     cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
-             self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu))
-             
+            self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu)) 
         return self.input_info, self.output_info
     
     def tensorRT_inferencing(self, batch_array):
@@ -362,62 +362,14 @@ def tensorRT_inferencing(self, batch_array):
             self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes,
             cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream
                 )
-      
-        return self.outputs_ndarray
-
-    # will be called with eve rosbag 
-    def eve_batch_inference_callback(self, msg1,msg2,msg3):
-        imageList = [msg1,msg2,msg3]
-        eve_batched_list = []
-        for msg in imageList:
-            numpy_array = np.frombuffer(msg.data, np.uint8)
-            compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
-            resized_compressedImage  = cv2.resize(compressedImage, (1024, 1024))
-            rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB)
-            normalized_image = rgb_image / 255.0
-            chw_image = np.transpose(normalized_image, (2,0,1))
-            float_image = chw_image.astype(np.float32)
-            tensor_image = torch.from_numpy(float_image).to('cuda')
-            batched_list.append(tensor_image)
-        batched_list = [tensor.cpu().numpy() for tensor in batched_list]
-        batched_images = np.stack(batched_list, axis=0)
-        self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024)
-        self.input_info, self.output_info = self.initialize_tensors()
-        detections = self.tensorRT_inferencing(batched_images)
-        batch_size = detections[0].shape[0]
-        anchors = detections[0].shape[2]
-        results_dict = {i: [] for i in range(batch_size)}
-        for image_idx in range(batch_size):
-            # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
-            image_detections = detections[0][image_idx]
         
-            if image_idx not in results_dict:
-                results_dict[image_idx] = []
-
-            for anchor_idx in range(anchors):
-                x_center = image_detections[0][anchor_idx]
-                y_center = image_detections[1][anchor_idx]
-                width = image_detections[2][anchor_idx]
-                height = image_detections[3][anchor_idx]
-
-                confidence = image_detections[4][anchor_idx]
-                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)]
-                predicted_class = np.argmax(class_probs)
-                predicted_prob = class_probs[predicted_class]
-
-                if confidence > 0.45:
-                
-                    #Convert from chw to tlbr  
-                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
-                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
-                    results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
-            results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
+        return tuple(self.outputs_ndarray) if len(self.outputs_ndarray) > 1 else self.outputs_ndarray[0]
 
 
     # will be called with nuscenes rosbag
-    def batch_inference_callback(self,msg1,msg2,msg3):
+    def batch_inference_callback(self,msg1):
         #Taking msgs from all 6 ros2 subscribers
-        image_list =  [msg1,msg2,msg3]
+        image_list =  [msg1]
         batched_list = []
         #Preprocessing
         for msg in image_list:  
@@ -434,7 +386,7 @@ def batch_inference_callback(self,msg1,msg2,msg3):
             batched_list.append(float_image)
         # batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
         batched_images = np.stack(batched_list, axis=0)
-        self.initialize_engine(self.tensorRT_model_path, 3,3,640,640)
+        self.initialize_engine(self.tensorRT_model_path, 1,3,640,640)
         self.input_info, self.output_info =  self.initialize_tensors()
         detections = self.tensorRT_inferencing(batched_images)
         """ 
@@ -444,125 +396,104 @@ def batch_inference_callback(self,msg1,msg2,msg3):
             - 8400 represents 8400 anchors 
 
         """
-       
-        batch_size = detections[0].shape[0]
-        results_dict = {i: [] for i in range(batch_size)}
-        for image_idx in range(batch_size):
-           # Extract detections for the current image
-            image_detections = detections[0][image_idx]
+       # self.get_logger().info(f"Confidence Scores{detections[0][0][4][:10]}")
+        #self.get_logger().info(f"Classes{detections[0][0][5][:5]}")
+        decoded_results = self.postprocess_detections_nuscenes(detections)
+
+
+
+
+    def postprocess_detections_nuscenes(self, detections, class_labels = None ):
+        detection_tensor = detections[0]  # Assuming the first tensor corresponds to detections
+        batch_size = detection_tensor.shape[0]  # Number of images in the batch
+        num_anchors = detection_tensor.shape[2]  # Number of anchors (e.g., 8400)
+        decoded_results = []  # A list to hold decoded detections for each image
+        for batch_idx in range(batch_size):
+            image_detections = detection_tensor[batch_idx]  # Detections for the current image
+            valid_detections = []  # Store valid detections for this image
+            for anchor_idx in range(num_anchors):  # Loop through all anchors
+                detection = image_detections[:, anchor_idx]  # Get data for one anchor
+                confidence = detection[4]  # Extract confidence score
+
+                # Only process detections with confidence above the threshold
+                if confidence > 0.5:
+                    x_center, y_center, width, height = detection[:4]  # Bounding box info
+                    class_probs = detection[5:13]  # Class probabilities
+                    predicted_class = np.argmax(class_probs)  # Most likely class
+                    predicted_prob = class_probs[predicted_class]
+
+                    # Convert to (x_min, y_min, x_max, y_max)
+                    x_min = x_center - width / 2
+                    y_min = y_center - height / 2
+                    x_max = x_center + width / 2
+                    y_max = y_center + height / 2
+                    valid_detections.append([
+                    x_min, y_min, x_max, y_max, confidence, predicted_class
+                        ])
+            valid_detections = torch.tensor(valid_detections)
+            if valid_detections.shape[0] == 0:
+                decoded_results.append([])  # No detections for this image
+                continue
+
+        # Apply Non-Maximum Suppression (NMS)
+            nms_results = non_max_suppression(
+                valid_detections.unsqueeze(0),  # Add batch dimension for NMS
+                conf_thres=0.5,  # Confidence threshold
+                iou_thres=0.45   # IoU threshold
+            )
 
-            # Extract individual components
-            x_centers = image_detections[0]
-            y_centers = image_detections[1]
-            widths = image_detections[2]
-            heights = image_detections[3]
-            confidences = image_detections[4]
-            class_probs = image_detections[5:18]  # Shape: (13, 8400)
-
-            # Filter by confidence threshold
-            valid_indices = confidences > 0.3
-            if not np.any(valid_indices):
-                continue  # Skip image if no detections pass confidence threshold
-
-            # Filter relevant detections
-            x_centers = x_centers[valid_indices]
-            y_centers = y_centers[valid_indices]
-            widths = widths[valid_indices]
-            heights = heights[valid_indices]
-            confidences = confidences[valid_indices]
-            class_probs = class_probs[:, valid_indices]
-
-            # Predict classes and probabilities
-            predicted_classes = np.argmax(class_probs, axis=0)
-            predicted_probs = class_probs[predicted_classes, range(class_probs.shape[1])]
-
-            # Convert boxes to (x_min, y_min, x_max, y_max)
-            x_mins = x_centers - widths / 2
-            y_mins = y_centers - heights / 2
-            x_maxs = x_centers + widths / 2
-            y_maxs = y_centers + heights / 2
-
-            # Combine detections into final format
-            filtered_detections = np.stack([x_mins, y_mins, x_maxs, y_maxs, confidences, predicted_classes], axis=-1)
-            #self.get_logger().info(f"Detections:{filtered_detections}")
-            
-            # Apply NMS
-            results_dict[image_idx] = self.nms(filtered_detections.tolist(), confidence_threshold=0.55, iou_threshold=0.5)
-
-#Log final results if needed
-        for image_idx, detections in results_dict.items():
-            #self.get_logger().info(f"Image {image_idx}: {len(detections)} detections")
-            for detection in detections:
-                x_min, y_min, x_max, y_max, confidence, predicted_class = detection
+            final_detections = []
+            for det in nms_results[0]:  # Assuming only one set of results
+                x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy()
+                class_name = class_labels.get(int(predicted_class), "unknown") if class_labels else str(int(predicted_class))
+                final_detections.append({
+                    "bbox": [x_min, y_min, x_max, y_max],
+                    "confidence": confidence,
+                    "class": int(predicted_class),
+                    "class_name": class_name
+                })
+
+            decoded_results.append(final_detections)
+        for batch_idx, batch_detections in enumerate(decoded_results):
+            if len(batch_detections) == 0:
+                self.get_logger().info(f"Batch {batch_idx}: No detections")
+                continue
+
+            for anchor_idx, detection in enumerate(batch_detections):
+                # Extract values from the dictionary
+                bbox = detection["bbox"]
+                x_min, y_min, x_max, y_max = bbox
+                confidence = detection["confidence"]
+                predicted_class = detection["class"]
+                class_name = detection["class_name"]
+
+                # Log detection details
                 self.get_logger().info(
-                    f"  Bounding Box: [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
-                    f"Confidence: {confidence:.2f}, Class: {predicted_class}"
+                    f"Batch {batch_idx}, Anchor {anchor_idx}: "
+                    f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
+                    f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}"
                 )
 
-        self.publish_batch(image_list, results_dict)
-          # To store final results after NMS
-   
-    def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth):
-        half_width = width /2
-        half_height = height /2 
-        x_min = x_center - half_width
-        y_min = y_center - half_height
-        x_max = x_center + half_width
-        y_max = y_center + half_height
-        return x_min, y_min, x_max, y_max     
-
-    def nms(self,bboxes, confidence_threshold= 0.45,iou_threshold = 0.5 ):
-        #Confidence threshold
-        bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold]
-        bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True)
-        bbox_list_new = []
-        while bboxes_sorted:
-            current_box = bboxes_sorted.pop(0)
-            bbox_list_new.append(current_box)
-        # Filter boxes with IoU below the threshold
-            bboxes_sorted = [
-                box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold
-                ]
-        return bbox_list_new
-
-    def get_iou(self, box1, box2):
-        x1, y1, x2, y2 = box1
-        x3, y3, x4, y4 = box2
-
-    # Intersection coordinates
-        x_inter1 = max(x1, x3)
-        y_inter1 = max(y1, y3)
-        x_inter2 = min(x2, x4)
-        y_inter2 = min(y2, y4)
-
-        # Width and height of the intersection
-        width_inter = abs(x_inter2 - x_inter1)
-        height_inter = abs(y_inter2 - y_inter1)
-
-        # If there is no intersection
-        if x_inter2 < x_inter1 or y_inter2 < y_inter1:
-            return 0.0
-
-        # Area of intersection
-        area_inter = width_inter * height_inter
-
-        # Areas of the two boxes
-        width_box1 = abs(x2 - x1)
-        height_box1 = abs(y2 - y1)
-        area_box1 = width_box1 * height_box1
-
-        width_box2 = abs(x4 - x3)
-        height_box2 = abs(y4 - y3)
-        area_box2 = width_box2 * height_box2
-
-        # Union area
-        area_union = area_box1 + area_box2 - area_inter
-
-        # Intersection over Union
-        iou = area_inter / area_union
-
-        return iou
-    
+            return decoded_results
+        # decoded_results.append(final_detections)
+        #             class_name = class_labels.get(predicted_class, "unknown") if class_labels else str(predicted_class)
+        #             self.get_logger().info(
+        #             f"Batch {batch_idx}, Anchor {anchor_idx}: "
+        #             f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
+        #             f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}"
+        #         )
+
+        #         # Store the detection in a structured format
+        #             valid_detections.append({
+        #             "bbox": [x_min, y_min, x_max, y_max],
+        #             "confidence": confidence,
+        #             "class": predicted_class,
+        #             "class_name": class_name
+        #              })
+
+        # # Log the number of valid detections for this image
+        # self.get_logger().info(f"Batch {batch_idx}: {len(valid_detections)} valid detections")
+          
     
     def publish_batch(self, image_list, results_dict):
         current_time = self.get_clock().now()
@@ -622,17 +553,55 @@ def publish_batch(self, image_list, results_dict):
         self.batched_camera_message_publisher.publish(batch_msg)
                 
     #Converting to tensor for batching
+    
+    # will be called with eve rosbag 
+    def eve_batch_inference_callback(self, msg1,msg2,msg3):
+        imageList = [msg1,msg2,msg3]
+        eve_batched_list = []
+        for msg in imageList:
+            numpy_array = np.frombuffer(msg.data, np.uint8)
+            compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
+            resized_compressedImage  = cv2.resize(compressedImage, (1024, 1024))
+            rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB)
+            normalized_image = rgb_image / 255.0
+            chw_image = np.transpose(normalized_image, (2,0,1))
+            float_image = chw_image.astype(np.float32)
+            tensor_image = torch.from_numpy(float_image).to('cuda')
+            batched_list.append(tensor_image)
+        batched_list = [tensor.cpu().numpy() for tensor in batched_list]
+        batched_images = np.stack(batched_list, axis=0)
+        self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024)
+        self.input_info, self.output_info = self.initialize_tensors()
+        detections = self.tensorRT_inferencing(batched_images)
+        batch_size = detections[0].shape[0]
+        anchors = detections[0].shape[2]
+        results_dict = {i: [] for i in range(batch_size)}
+        for image_idx in range(batch_size):
+            # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
+            image_detections = detections[0][image_idx]
+        
+            if image_idx not in results_dict:
+                results_dict[image_idx] = []
+
+            for anchor_idx in range(anchors):
+                x_center = image_detections[0][anchor_idx]
+                y_center = image_detections[1][anchor_idx]
+                width = image_detections[2][anchor_idx]
+                height = image_detections[3][anchor_idx]
+
+                confidence = image_detections[4][anchor_idx]
+                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)]
+                predicted_class = np.argmax(class_probs)
+                predicted_prob = class_probs[predicted_class]
+
+                if confidence > 0.45:
+                
+                    #Convert from chw to tlbr  
+                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
+                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
+                    results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
+            results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
 
-    def draw_bounding_boxes(self,img_msg, detections):
-        numpy_image = np.frombuffer(img_msg.data, np.uint8)
-        image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR)
-        for bbox in detections:
-            x_min, y_min, x_max, y_max, confidence, predicted_class = bbox
-            label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
-            #self.get_logger().info(f"{label}")
-            cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)
-            cv2.putText(image, label, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
-        return image
     def batch_convert_to_tensor(self, cv_image):
         img  = cv_image.transpose(2,0,1)
         img = torch.from_numpy(img).to(self.device)

From a9822381a063dd892c3d5753a3554d0c2547e0a5 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Sun, 8 Dec 2024 00:17:04 +0000
Subject: [PATCH 31/37] parsing multiple tensors

---
 .../yolov8_detection.py                       | 245 +++++++++++-------
 1 file changed, 148 insertions(+), 97 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index fa04bc73..79edc685 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -146,17 +146,17 @@ def __init__(self):
         self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
         #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5)
         #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription,  self.front_left_camera_subscription], queue_size=10, slop=0.5)
-        self.ats = ApproximateTimeSynchronizer([self.front_right_camera_subscription], queue_size=10, slop=0.3)
+        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3)
         self.ats.registerCallback(self.batch_inference_callback)
 
-        #Subscription for Eve cameras
-        self.right_camera_subscription = Subscriber(self, Image, self.camera_topic)
-        self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic)
-        self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic)
-        self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1)
+        # #Subscription for Eve cameras
+        # self.right_camera_subscription = Subscriber(self, Image, self.camera_topic)
+        # self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic)
+        # self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic)
+        # self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1)
         
-        #Create this callback function tomorrow
-        self.eve_ats.registerCallback(self.eve_batch_inference_callback) 
+        # #Create this callback function tomorrow
+        # self.eve_ats.registerCallback(self.eve_batch_inference_callback) 
         self.get_logger().info(f"TENSORT VERSION:{trt.__version__}")
 
         self.orig_image_width = None
@@ -202,10 +202,10 @@ def __init__(self):
     def build_engine(self):
     #Only calling this function when we dont have an engine file
     #Reading the onnx file in the perception models directory
-        self.max_batch_size = 3
+        self.max_batch_size = 6
         self.channels = 3
-        self.height = 1024
-        self.width = 1024
+        self.height = 640
+        self.width = 640
         self.shape_input_model = [self.max_batch_size, self.channels, self.height, self.width]
         self.logger = trt.Logger(trt.Logger.VERBOSE)
         self.builder = trt.Builder(self.logger)
@@ -388,7 +388,9 @@ def batch_inference_callback(self,msg1):
         batched_images = np.stack(batched_list, axis=0)
         self.initialize_engine(self.tensorRT_model_path, 1,3,640,640)
         self.input_info, self.output_info =  self.initialize_tensors()
+       
         detections = self.tensorRT_inferencing(batched_images)
+
         """ 
             - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images 
             - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components  for bounding boxes, classes and confidence scores
@@ -398,8 +400,69 @@ def batch_inference_callback(self,msg1):
         """
        # self.get_logger().info(f"Confidence Scores{detections[0][0][4][:10]}")
         #self.get_logger().info(f"Classes{detections[0][0][5][:5]}")
-        decoded_results = self.postprocess_detections_nuscenes(detections)
+        # decoded_results = self.postprocess_detections_nuscenes(detections)
+        detection_tensor = detections[0]  # Assuming the first tensor corresponds to detections
+        self.get_logger().info(f"Detection Tensor Length:{len(detection_tensor.shape)}")
+        self.get_logger().info(f"Detection Tensor Length of other tensors:{len(detections[1].shape)}")
+        batch_size = detection_tensor.shape[0]  # Number of images in the batch
+        num_anchors = detection_tensor.shape[2]  # Number of anchors (e.g., 8400)
+        # self.get_logger().info(f"detections shape:{detections[1].shape}") #80x80
+        # self.get_logger().info(f"detections shape2:{detections[2][0][1][0].shape}") #40x40
+        
+        decoded_results = []  # A list to hold decoded detections for each image
+        for batch_idx in range(batch_size):
+            image_detections = detection_tensor[batch_idx]  # Detections for the current image
+            valid_detections = []  # Store valid detections for this image
+            for anchor_idx in range(num_anchors):  # Loop through all anchors
+                detection = image_detections[:, anchor_idx]  # Get data for one anchor
+                confidence = detection[4]  # Extract confidence score
 
+                # Only process detections with confidence above the threshold
+                if confidence > 0.1:
+                    x_center, y_center, width, height = detection[:4]  # Bounding box info
+                    class_probs = detection[5:13]  # Class probabilities
+                    predicted_class = np.argmax(class_probs)  # Most likely class
+                    predicted_prob = class_probs[predicted_class]
+
+                    # Convert to (x_min, y_min, x_max, y_max)
+                    x_min = x_center - width / 2
+                    y_min = y_center - height / 2
+                    x_max = x_center + width / 2
+                    y_max = y_center + height / 2
+                    valid_detections.append([
+                    x_min, y_min, x_max, y_max, confidence, predicted_class
+                        ])
+            valid_detections = torch.tensor(valid_detections)
+            if valid_detections.shape[0] == 0:
+                decoded_results.append([])  # No detections for this image
+                continue
+
+        # Apply Non-Maximum Suppression (NMS)
+            nms_results = non_max_suppression(
+                valid_detections.unsqueeze(0),  # Add batch dimension for NMS
+                conf_thres=0.2,  # Confidence threshold
+                iou_thres=0.45   # IoU threshold
+            )
+
+            final_detections = []
+            for img_idx, nms_result in enumerate(nms_results):  # Loop through each image's NMS results
+                if nms_result is None or len(nms_result) == 0:
+                    print(f"No detections for image {img_idx + 1}.")
+                    decoded_results.append([])  # No detections for this image
+                    continue
+
+                final_detections = []
+                for det in nms_result:  # Loop through detections in this image
+                    x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy()
+                    final_detections.append({
+                        "bbox": [x_min, y_min, x_max, y_max],
+                        "confidence": confidence,
+                        "class": int(predicted_class),
+                    })
+
+    # Add detections for this image to the results
+                decoded_results.append(final_detections)
+        self.publish_batch_nuscenes(image_list, decoded_results)
 
 
 
@@ -454,47 +517,82 @@ def postprocess_detections_nuscenes(self, detections, class_labels = None ):
                 })
 
             decoded_results.append(final_detections)
-        for batch_idx, batch_detections in enumerate(decoded_results):
-            if len(batch_detections) == 0:
-                self.get_logger().info(f"Batch {batch_idx}: No detections")
-                continue
-
-            for anchor_idx, detection in enumerate(batch_detections):
-                # Extract values from the dictionary
-                bbox = detection["bbox"]
-                x_min, y_min, x_max, y_max = bbox
-                confidence = detection["confidence"]
-                predicted_class = detection["class"]
-                class_name = detection["class_name"]
+        # for batch_idx, batch_detections in enumerate(decoded_results):
+        #     if len(batch_detections) == 0:
+        #         self.get_logger().info(f"Batch {batch_idx}: No detections")
+        #         continue
+
+        #     for anchor_idx, detection in enumerate(batch_detections):
+        #         # Extract values from the dictionary
+        #         bbox = detection["bbox"]
+        #         x_min, y_min, x_max, y_max = bbox
+        #         confidence = detection["confidence"]
+        #         predicted_class = detection["class"]
+        #         class_name = detection["class_name"]
 
                 # Log detection details
-                self.get_logger().info(
-                    f"Batch {batch_idx}, Anchor {anchor_idx}: "
-                    f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
-                    f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}"
-                )
+                # self.get_logger().info(
+                #     f"Batch {batch_idx}, Anchor {anchor_idx}: "
+                #     f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
+                #     f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}"
+                # )
 
             return decoded_results
-        # decoded_results.append(final_detections)
-        #             class_name = class_labels.get(predicted_class, "unknown") if class_labels else str(predicted_class)
-        #             self.get_logger().info(
-        #             f"Batch {batch_idx}, Anchor {anchor_idx}: "
-        #             f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
-        #             f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}"
-        #         )
-
-        #         # Store the detection in a structured format
-        #             valid_detections.append({
-        #             "bbox": [x_min, y_min, x_max, y_max],
-        #             "confidence": confidence,
-        #             "class": predicted_class,
-        #             "class_name": class_name
-        #              })
-
-        # # Log the number of valid detections for this image
-        # self.get_logger().info(f"Batch {batch_idx}: {len(valid_detections)} valid detections")
-          
-    
+    def publish_batch_nuscenes(self, image_list, decoded_results):
+        batch_msg = BatchDetection()
+        batch_msg.header.stamp = self.get_clock().now().to_msg()
+        batch_msg.header.frame_id = "batch"
+
+        for idx, img_msg in enumerate(image_list):
+            numpy_image = np.frombuffer(img_msg.data, np.uint8)
+            image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR)
+            height, width = image.shape[:2]
+            annotator = Annotator(image, line_width=2, example="Class:0")
+            detection_array = Detection2DArray()
+            detection_array.header.stamp = batch_msg.header.stamp
+            detection_array.header.frame_id = f"camera_{idx}"
+            for batch_idx, batch_detections in enumerate(decoded_results):
+                for anchor_idx, detection in enumerate(batch_detections):
+                # Extract values from the dictionary
+                    bbox = detection["bbox"]
+                    x_min, y_min, x_max, y_max = bbox
+                    confidence = detection["confidence"]
+                    predicted_class = detection["class"]
+                    x_min = int(x_min * width / 640)
+                    x_max = int(x_max * width / 640)
+                    y_min = int(y_min * height / 640)
+                    y_max = int(y_max * height / 640)
+                    label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
+                    annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0))
+                    detection = Detection2D()
+                    detection.bbox.center.position.x = (x_min + x_max) / 2
+                    detection.bbox.center.position.y = (y_min + y_max) / 2
+                    detection.bbox.size_x = float(x_max - x_min)
+                    detection.bbox.size_y = float(y_max - y_min)
+
+                    detected_object = ObjectHypothesisWithPose()
+                    detected_object.hypothesis.class_id = str(int(predicted_class))
+                    detected_object.hypothesis.score = float(confidence)
+                    detection.results.append(detected_object)
+                    detection_array.detections.append(detection)
+                batch_msg.detections.append(detection_array)
+            annotated_image = annotator.result()
+            vis_compressed_image = CompressedImage()
+            vis_compressed_image.header.stamp = self.get_clock().now().to_msg()
+            vis_compressed_image.header.frame_id = f"camera_{idx}"
+            vis_compressed_image.format = "jpeg"
+            vis_compressed_image.data = cv2.imencode('.jpg', annotated_image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes()
+            self.batch_vis_publisher.publish(vis_compressed_image)
+
+            # Publish Detection2DArray
+            self.batch_detection_publisher.publish(detection_array)
+
+    # Publish batch detection message
+        self.batched_camera_message_publisher.publish(batch_msg)
+
+
+            
+                
     def publish_batch(self, image_list, results_dict):
         current_time = self.get_clock().now()
         if (current_time - self.last_publish_time).nanoseconds < 1e9 / 2:  # Publish at 2 Hz (0.5 seconds)
@@ -554,54 +652,7 @@ def publish_batch(self, image_list, results_dict):
                 
     #Converting to tensor for batching
     
-    # will be called with eve rosbag 
-    def eve_batch_inference_callback(self, msg1,msg2,msg3):
-        imageList = [msg1,msg2,msg3]
-        eve_batched_list = []
-        for msg in imageList:
-            numpy_array = np.frombuffer(msg.data, np.uint8)
-            compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
-            resized_compressedImage  = cv2.resize(compressedImage, (1024, 1024))
-            rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB)
-            normalized_image = rgb_image / 255.0
-            chw_image = np.transpose(normalized_image, (2,0,1))
-            float_image = chw_image.astype(np.float32)
-            tensor_image = torch.from_numpy(float_image).to('cuda')
-            batched_list.append(tensor_image)
-        batched_list = [tensor.cpu().numpy() for tensor in batched_list]
-        batched_images = np.stack(batched_list, axis=0)
-        self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024)
-        self.input_info, self.output_info = self.initialize_tensors()
-        detections = self.tensorRT_inferencing(batched_images)
-        batch_size = detections[0].shape[0]
-        anchors = detections[0].shape[2]
-        results_dict = {i: [] for i in range(batch_size)}
-        for image_idx in range(batch_size):
-            # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}")
-            image_detections = detections[0][image_idx]
-        
-            if image_idx not in results_dict:
-                results_dict[image_idx] = []
-
-            for anchor_idx in range(anchors):
-                x_center = image_detections[0][anchor_idx]
-                y_center = image_detections[1][anchor_idx]
-                width = image_detections[2][anchor_idx]
-                height = image_detections[3][anchor_idx]
-
-                confidence = image_detections[4][anchor_idx]
-                class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)]
-                predicted_class = np.argmax(class_probs)
-                predicted_prob = class_probs[predicted_class]
-
-                if confidence > 0.45:
-                
-                    #Convert from chw to tlbr  
-                    x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640)
-                    self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}")
-                    results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) 
-            results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5)
-
+    
     def batch_convert_to_tensor(self, cv_image):
         img  = cv_image.transpose(2,0,1)
         img = torch.from_numpy(img).to(self.device)

From 4a5cdd61c473c6e5bb180cb5b1671e564afe9601 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Sun, 8 Dec 2024 06:11:38 +0000
Subject: [PATCH 32/37] Refactored Mounts

---
 modules/docker-compose.perception.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml
index 1d03eb54..ef633077 100644
--- a/modules/docker-compose.perception.yaml
+++ b/modules/docker-compose.perception.yaml
@@ -32,16 +32,11 @@ services:
     command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py"
     volumes:
       - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt
-      - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx
-      - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/ylov8m_dynamic.onnx
-      - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine
-      - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine
       - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt
       - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt
       - /mnt/wato-drive2/perception_models/tensorRT.onnx:/perception_models/tensorRT.onnx
       - /mnt/wato-drive2/perception_models/tensorRT.engine:/perception_models/tensorRT.engine
       - /mnt/wato-drive2/perception_models/eve.onnx:/perception_models/eve.onnx
-      - /mnt/wato-drive2/perception_models/:/perception_models
       
   lidar_object_detection:
     build:

From 5cea90fd86b38cad9897d1753eebb04417fe015e Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Sun, 8 Dec 2024 15:41:48 +0000
Subject: [PATCH 33/37] Getting all detections

---
 .../yolov8_detection.py                       | 216 ++----------------
 1 file changed, 16 insertions(+), 200 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 79edc685..e7a8af22 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -176,8 +176,6 @@ def __init__(self):
         assert status.value == 0, "IS NOT ZERO"
        
         #self.build_engine()
-        # self.initialize_engine(self.tensorRT_model_path, 3,3,1024,1024)
-        # self.input_info, self.output_info =  self.initialize_tensors()
         self.last_publish_time = self.get_clock().now()
 
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
@@ -329,21 +327,12 @@ def tensorRT_inferencing(self, batch_array):
         assert batch_array.ndim == 4
         batch_size = batch_array.shape[0]
         assert batch_size == self.input_info[0].shape[0]
-        #self.get_logger().info(f"Batch Array Stats: shape={batch_array.shape}, dtype={batch_array.dtype}, range=({batch_array.min()}, {batch_array.max()})")
-        #self.inputTensors = [batch_array]
         self.contiguous_inputs = [np.ascontiguousarray(i) for i in batch_array]
-        #self.get_logger().info(f"Contiguous Inputs: dtype={self.contiguous_inputs[0].dtype}, shape={self.contiguous_inputs[0].shape}")
-        #for i, input_tensor in enumerate(self.contiguous_inputs):
-            #self.get_logger().info(f"Input {i}: shape={input_tensor.shape}, dtype={input_tensor.dtype}, range=({input_tensor.min()}, {input_tensor.max()})")
-        #for i, input_gpu in enumerate(self.input_info):
-                #self.get_logger().info(f"Input {i} GPU address: {input_gpu.gpu}")
-
         for i in range(self.num_inputs):
                 name = self.input_info[i].name
                 cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data,
                             self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
                 self.execution_context.set_tensor_address(name, self.input_info[i].gpu)
-
         self.output_gpu_ptrs:List[int] = []
         self.outputs_ndarray:List[ndarray] = []
         for i in range(self.num_outputs):
@@ -353,8 +342,6 @@ def tensorRT_inferencing(self, batch_array):
                 self.outputs_ndarray.append(cpu)
                 self.output_gpu_ptrs.append(gpu)
                 self.execution_context.set_tensor_address(self.output_info[i].name, self.output_info[i].gpu)
-
-        
         self.execution_context.execute_async_v3(self.stream)
         cudart.cudaStreamSynchronize(self.stream)
         for i, o in enumerate(self.output_gpu_ptrs):
@@ -362,7 +349,6 @@ def tensorRT_inferencing(self, batch_array):
             self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes,
             cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream
                 )
-        
         return tuple(self.outputs_ndarray) if len(self.outputs_ndarray) > 1 else self.outputs_ndarray[0]
 
 
@@ -388,69 +374,39 @@ def batch_inference_callback(self,msg1):
         batched_images = np.stack(batched_list, axis=0)
         self.initialize_engine(self.tensorRT_model_path, 1,3,640,640)
         self.input_info, self.output_info =  self.initialize_tensors()
-       
         detections = self.tensorRT_inferencing(batched_images)
-
-        """ 
-            - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images 
-            - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components  for bounding boxes, classes and confidence scores
-            - Detection[0][0][0] represnets the first detection components ranging from 0 to 83 
-            - 8400 represents 8400 anchors 
-
-        """
-       # self.get_logger().info(f"Confidence Scores{detections[0][0][4][:10]}")
-        #self.get_logger().info(f"Classes{detections[0][0][5][:5]}")
-        # decoded_results = self.postprocess_detections_nuscenes(detections)
         detection_tensor = detections[0]  # Assuming the first tensor corresponds to detections
-        self.get_logger().info(f"Detection Tensor Length:{len(detection_tensor.shape)}")
-        self.get_logger().info(f"Detection Tensor Length of other tensors:{len(detections[1].shape)}")
         batch_size = detection_tensor.shape[0]  # Number of images in the batch
-        num_anchors = detection_tensor.shape[2]  # Number of anchors (e.g., 8400)
-        # self.get_logger().info(f"detections shape:{detections[1].shape}") #80x80
-        # self.get_logger().info(f"detections shape2:{detections[2][0][1][0].shape}") #40x40
-        
+        num_anchors = detection_tensor.shape[2]  # Number of anchors (e.g., 8400)   
         decoded_results = []  # A list to hold decoded detections for each image
         for batch_idx in range(batch_size):
             image_detections = detection_tensor[batch_idx]  # Detections for the current image
             valid_detections = []  # Store valid detections for this image
             for anchor_idx in range(num_anchors):  # Loop through all anchors
-                detection = image_detections[:, anchor_idx]  # Get data for one anchor
-                confidence = detection[4]  # Extract confidence score
-
-                # Only process detections with confidence above the threshold
-                if confidence > 0.1:
-                    x_center, y_center, width, height = detection[:4]  # Bounding box info
-                    class_probs = detection[5:13]  # Class probabilities
-                    predicted_class = np.argmax(class_probs)  # Most likely class
-                    predicted_prob = class_probs[predicted_class]
-
-                    # Convert to (x_min, y_min, x_max, y_max)
-                    x_min = x_center - width / 2
-                    y_min = y_center - height / 2
-                    x_max = x_center + width / 2
-                    y_max = y_center + height / 2
-                    valid_detections.append([
-                    x_min, y_min, x_max, y_max, confidence, predicted_class
+                detection = image_detections[:, anchor_idx]  # Get data for one anchor  
+                x_center, y_center, width, height = detection[:4]  # Bounding box info
+                class_probs = detection[4:]  # Class probabilities
+                predicted_class = np.argmax(class_probs)  # Most likely class
+                confidence = class_probs[predicted_class]
+                x_min = x_center - width / 2
+                y_min = y_center - height / 2
+                x_max = x_center + width / 2
+                y_max = y_center + height / 2
+                valid_detections.append([
+                x_min, y_min, x_max, y_max, confidence, predicted_class
                         ])
             valid_detections = torch.tensor(valid_detections)
             if valid_detections.shape[0] == 0:
-                decoded_results.append([])  # No detections for this image
+                decoded_results.append([]) 
                 continue
-
-        # Apply Non-Maximum Suppression (NMS)
             nms_results = non_max_suppression(
                 valid_detections.unsqueeze(0),  # Add batch dimension for NMS
-                conf_thres=0.2,  # Confidence threshold
+                conf_thres=0.45,  # Confidence threshold
                 iou_thres=0.45   # IoU threshold
             )
 
-            final_detections = []
-            for img_idx, nms_result in enumerate(nms_results):  # Loop through each image's NMS results
-                if nms_result is None or len(nms_result) == 0:
-                    print(f"No detections for image {img_idx + 1}.")
-                    decoded_results.append([])  # No detections for this image
-                    continue
-
+          
+            for img_idx, nms_result in enumerate(nms_results): 
                 final_detections = []
                 for det in nms_result:  # Loop through detections in this image
                     x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy()
@@ -459,85 +415,8 @@ def batch_inference_callback(self,msg1):
                         "confidence": confidence,
                         "class": int(predicted_class),
                     })
-
-    # Add detections for this image to the results
                 decoded_results.append(final_detections)
         self.publish_batch_nuscenes(image_list, decoded_results)
-
-
-
-    def postprocess_detections_nuscenes(self, detections, class_labels = None ):
-        detection_tensor = detections[0]  # Assuming the first tensor corresponds to detections
-        batch_size = detection_tensor.shape[0]  # Number of images in the batch
-        num_anchors = detection_tensor.shape[2]  # Number of anchors (e.g., 8400)
-        decoded_results = []  # A list to hold decoded detections for each image
-        for batch_idx in range(batch_size):
-            image_detections = detection_tensor[batch_idx]  # Detections for the current image
-            valid_detections = []  # Store valid detections for this image
-            for anchor_idx in range(num_anchors):  # Loop through all anchors
-                detection = image_detections[:, anchor_idx]  # Get data for one anchor
-                confidence = detection[4]  # Extract confidence score
-
-                # Only process detections with confidence above the threshold
-                if confidence > 0.5:
-                    x_center, y_center, width, height = detection[:4]  # Bounding box info
-                    class_probs = detection[5:13]  # Class probabilities
-                    predicted_class = np.argmax(class_probs)  # Most likely class
-                    predicted_prob = class_probs[predicted_class]
-
-                    # Convert to (x_min, y_min, x_max, y_max)
-                    x_min = x_center - width / 2
-                    y_min = y_center - height / 2
-                    x_max = x_center + width / 2
-                    y_max = y_center + height / 2
-                    valid_detections.append([
-                    x_min, y_min, x_max, y_max, confidence, predicted_class
-                        ])
-            valid_detections = torch.tensor(valid_detections)
-            if valid_detections.shape[0] == 0:
-                decoded_results.append([])  # No detections for this image
-                continue
-
-        # Apply Non-Maximum Suppression (NMS)
-            nms_results = non_max_suppression(
-                valid_detections.unsqueeze(0),  # Add batch dimension for NMS
-                conf_thres=0.5,  # Confidence threshold
-                iou_thres=0.45   # IoU threshold
-            )
-
-            final_detections = []
-            for det in nms_results[0]:  # Assuming only one set of results
-                x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy()
-                class_name = class_labels.get(int(predicted_class), "unknown") if class_labels else str(int(predicted_class))
-                final_detections.append({
-                    "bbox": [x_min, y_min, x_max, y_max],
-                    "confidence": confidence,
-                    "class": int(predicted_class),
-                    "class_name": class_name
-                })
-
-            decoded_results.append(final_detections)
-        # for batch_idx, batch_detections in enumerate(decoded_results):
-        #     if len(batch_detections) == 0:
-        #         self.get_logger().info(f"Batch {batch_idx}: No detections")
-        #         continue
-
-        #     for anchor_idx, detection in enumerate(batch_detections):
-        #         # Extract values from the dictionary
-        #         bbox = detection["bbox"]
-        #         x_min, y_min, x_max, y_max = bbox
-        #         confidence = detection["confidence"]
-        #         predicted_class = detection["class"]
-        #         class_name = detection["class_name"]
-
-                # Log detection details
-                # self.get_logger().info(
-                #     f"Batch {batch_idx}, Anchor {anchor_idx}: "
-                #     f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], "
-                #     f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}"
-                # )
-
-            return decoded_results
     def publish_batch_nuscenes(self, image_list, decoded_results):
         batch_msg = BatchDetection()
         batch_msg.header.stamp = self.get_clock().now().to_msg()
@@ -590,69 +469,6 @@ def publish_batch_nuscenes(self, image_list, decoded_results):
     # Publish batch detection message
         self.batched_camera_message_publisher.publish(batch_msg)
 
-
-            
-                
-    def publish_batch(self, image_list, results_dict):
-        current_time = self.get_clock().now()
-        if (current_time - self.last_publish_time).nanoseconds < 1e9 / 2:  # Publish at 2 Hz (0.5 seconds)
-            return
-        self.last_publish_time = current_time
-        batch_msg = BatchDetection()
-        batch_msg.header.stamp = self.get_clock().now().to_msg()
-        batch_msg.header.frame_id = "batch"
-
-        for idx, img_msg in enumerate(image_list):
-            numpy_image = np.frombuffer(img_msg.data, np.uint8)
-            image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR)
-            height, width = image.shape[:2]
-            detection_array = Detection2DArray()
-            detection_array = Detection2DArray()
-            detection_array.header.stamp = batch_msg.header.stamp
-            detection_array.header.frame_id = f"camera_{idx}"
-            detections = results_dict[idx]
-            for bbox in results_dict[idx]:
-                x_min, y_min, x_max, y_max , confidence, predicted_class = bbox
-                x_min = int(x_min * width / 640)
-                x_max = int(x_max * width / 640)
-                y_min = int(y_min * height / 640)
-                y_max = int(y_max * height / 640)
-
-                label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
-                cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
-                cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
-                detection = Detection2D()
-                detection.bbox.center.position.x = (x_min + x_max) / 2
-                detection.bbox.center.position.y = (y_min + y_max) / 2
-                detection.bbox.size_x = float(x_max - x_min)
-                detection.bbox.size_y = float(y_max - y_min)
-
-                detected_object = ObjectHypothesisWithPose()
-                detected_object.hypothesis.class_id = str(int(predicted_class))
-                detected_object.hypothesis.score = float(confidence)
-                detection.results.append(detected_object)
-                detection_array.detections.append(detection)
-
-            # Add detection array to batch message
-            batch_msg.detections.append(detection_array)
-
-            # Publish visualized image
-            vis_compressed_image = CompressedImage()
-            vis_compressed_image.header.stamp = self.get_clock().now().to_msg()
-            vis_compressed_image.header.frame_id = f"camera_{idx}"
-            vis_compressed_image.format = "jpeg"
-            vis_compressed_image.data = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes()
-            self.batch_vis_publisher.publish(vis_compressed_image)
-
-        # Publish Detection2DArray
-            self.batch_detection_publisher.publish(detection_array)
-
-    # Publish batch detection message
-        self.batched_camera_message_publisher.publish(batch_msg)
-                
-    #Converting to tensor for batching
-    
-    
     def batch_convert_to_tensor(self, cv_image):
         img  = cv_image.transpose(2,0,1)
         img = torch.from_numpy(img).to(self.device)

From 23bde0da7185260aa75ccf803076f1c076d50823 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 10 Dec 2024 23:25:13 +0000
Subject: [PATCH 34/37] Getting all detections from all angles nuscenes

---
 .../yolov8_detection.py                       | 128 +++++++++++-------
 1 file changed, 82 insertions(+), 46 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index e7a8af22..070e3eb2 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -145,8 +145,8 @@ def __init__(self):
         self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic)
         self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
         #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5)
-        #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription,  self.front_left_camera_subscription], queue_size=10, slop=0.5)
-        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3)
+        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription,  self.front_left_camera_subscription], queue_size=10, slop=0.1)
+        #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3)
         self.ats.registerCallback(self.batch_inference_callback)
 
         # #Subscription for Eve cameras
@@ -176,6 +176,7 @@ def __init__(self):
         assert status.value == 0, "IS NOT ZERO"
        
         #self.build_engine()
+        self.initialize_engine(self.tensorRT_model_path, 3,3,640,640)
         self.last_publish_time = self.get_clock().now()
 
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
@@ -185,8 +186,18 @@ def __init__(self):
         # setup vis publishers
         #Batch vis publishers
         self.batched_camera_message_publisher  = self.create_publisher(BatchDetection,self.batch_inference_topic, 10)
-        self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10)
-        self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10)
+        self.num_cameras = 3  # Adjust this based on the number of cameras
+        self.batch_vis_publishers = [
+            self.create_publisher(CompressedImage, f"/camera_{i}/vis", 10)
+            for i in range(self.num_cameras)
+        ]
+        self.batch_detection_publishers = [
+            self.create_publisher(Detection2DArray, f"/camera_{i}/detections", 10)
+            for i in range(self.num_cameras)
+        ]
+        
+        # self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10)
+        # self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10)
     
         #vis publishers
         self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10)
@@ -228,10 +239,9 @@ def build_engine(self):
          self.get_logger().info(f"Model {output.name} shape: {output.shape} {output.dtype}")  
         if self.max_batch_size > 1:
             self.profile  = self.builder.create_optimization_profile()
-            self.min_shape = [1, self.channels, self.height, self.width]                 # Minimum batch size of 1
-            self.opt_shape = [int(self.max_batch_size / 2), self.channels, self.height, self.width]  # Optimal batch size (half of max, so 3)
-            self.max_shape = self.shape_input_model
-
+            self.min_shape = [1, 3, 640, 640]
+            self.opt_shape = [3, 3, 640, 640] 
+            self.max_shape = [6, 3, 640, 640] 
             for input in self.inputs:
                 self.profile.set_shape(input.name, self.min_shape, self.opt_shape, self.max_shape)
             self.config.add_optimization_profile(self.profile)
@@ -327,35 +337,59 @@ def tensorRT_inferencing(self, batch_array):
         assert batch_array.ndim == 4
         batch_size = batch_array.shape[0]
         assert batch_size == self.input_info[0].shape[0]
-        self.contiguous_inputs = [np.ascontiguousarray(i) for i in batch_array]
+        
+        # Handle input
+        self.contiguous_inputs = [np.ascontiguousarray(batch_array)]
         for i in range(self.num_inputs):
-                name = self.input_info[i].name
-                cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data,
-                            self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream)
-                self.execution_context.set_tensor_address(name, self.input_info[i].gpu)
-        self.output_gpu_ptrs:List[int] = []
-        self.outputs_ndarray:List[ndarray] = []
+            name = self.input_info[i].name
+            cudart.cudaMemcpyAsync(
+                self.input_info[i].gpu, 
+                self.contiguous_inputs[i].ctypes.data,
+                self.contiguous_inputs[i].nbytes, 
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, 
+                self.stream
+            )
+            self.execution_context.set_tensor_address(name, self.input_info[i].gpu)
+        
+        # Handle dynamic outputs
+        self.output_gpu_ptrs = []
+        self.outputs_ndarray = []
         for i in range(self.num_outputs):
-                j = i + self.num_inputs  
-                cpu = self.output_info[i].cpu
-                gpu = self.output_info[i].gpu
-                self.outputs_ndarray.append(cpu)
-                self.output_gpu_ptrs.append(gpu)
-                self.execution_context.set_tensor_address(self.output_info[i].name, self.output_info[i].gpu)
-        self.execution_context.execute_async_v3(self.stream)
+            output_shape = self.execution_context.get_tensor_shape(self.output_info[i].name)
+            # Reallocate output buffer if shape changed
+            if self.output_info[i].cpu.shape != tuple(output_shape):
+                self.output_info[i].cpu = np.empty(output_shape, dtype=self.output_info[i].cpu.dtype)
+            
+            self.outputs_ndarray.append(self.output_info[i].cpu)
+            self.output_gpu_ptrs.append(self.output_info[i].gpu)
+            self.execution_context.set_tensor_address(
+                self.output_info[i].name, 
+                self.output_info[i].gpu
+            )
+        
+        # Execute inference
+        status = self.execution_context.execute_async_v3(self.stream)
+        assert status, "Inference execution failed"
+        
+        # Synchronize and copy results
         cudart.cudaStreamSynchronize(self.stream)
-        for i, o in enumerate(self.output_gpu_ptrs):
-             cudart.cudaMemcpyAsync(
-            self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes,
-            cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream
-                )
+        for i, gpu_ptr in enumerate(self.output_gpu_ptrs):
+            cudart.cudaMemcpyAsync(
+                self.outputs_ndarray[i].ctypes.data,
+                gpu_ptr,
+                self.outputs_ndarray[i].nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+                self.stream
+            )
+            cudart.cudaStreamSynchronize(self.stream)
+        
         return tuple(self.outputs_ndarray) if len(self.outputs_ndarray) > 1 else self.outputs_ndarray[0]
 
 
     # will be called with nuscenes rosbag
-    def batch_inference_callback(self,msg1):
+    def batch_inference_callback(self,msg1,msg2,msg3):
         #Taking msgs from all 6 ros2 subscribers
-        image_list =  [msg1]
+        image_list =  [msg1,msg2,msg3]
         batched_list = []
         #Preprocessing
         for msg in image_list:  
@@ -372,7 +406,8 @@ def batch_inference_callback(self,msg1):
             batched_list.append(float_image)
         # batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
         batched_images = np.stack(batched_list, axis=0)
-        self.initialize_engine(self.tensorRT_model_path, 1,3,640,640)
+        self.get_logger().info(f"batched image shape:{batched_images.shape}")
+        # self.initialize_engine(self.tensorRT_model_path, 3,3,640,640)
         self.input_info, self.output_info =  self.initialize_tensors()
         detections = self.tensorRT_inferencing(batched_images)
         detection_tensor = detections[0]  # Assuming the first tensor corresponds to detections
@@ -392,30 +427,29 @@ def batch_inference_callback(self,msg1):
                 y_min = y_center - height / 2
                 x_max = x_center + width / 2
                 y_max = y_center + height / 2
+                #if confidence > 0.5:
+                   #self.get_logger().info(f"Camera {batch_idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}") 
                 valid_detections.append([
                 x_min, y_min, x_max, y_max, confidence, predicted_class
                         ])
             valid_detections = torch.tensor(valid_detections)
-            if valid_detections.shape[0] == 0:
-                decoded_results.append([]) 
-                continue
+                #self.get_logger().info(f"Pre-NMS Batch {batch_idx}: Valid detections shape {valid_detections.shape}")
             nms_results = non_max_suppression(
                 valid_detections.unsqueeze(0),  # Add batch dimension for NMS
-                conf_thres=0.45,  # Confidence threshold
+                conf_thres=0.5,  # Confidence threshold
                 iou_thres=0.45   # IoU threshold
             )
-
-          
-            for img_idx, nms_result in enumerate(nms_results): 
-                final_detections = []
-                for det in nms_result:  # Loop through detections in this image
+            final_detections = []
+            for det in nms_results[0]:  # Loop through detections in this image
                     x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy()
                     final_detections.append({
                         "bbox": [x_min, y_min, x_max, y_max],
                         "confidence": confidence,
                         "class": int(predicted_class),
                     })
-                decoded_results.append(final_detections)
+                    #self.get_logger().info(f"Post-NMS Batch {batch_idx}: {len(nms_results[0])} detections")
+            decoded_results.append(final_detections)
+        #self.get_logger().info(f"Length decoded results:{len(decoded_results[1])}")
         self.publish_batch_nuscenes(image_list, decoded_results)
     def publish_batch_nuscenes(self, image_list, decoded_results):
         batch_msg = BatchDetection()
@@ -423,6 +457,7 @@ def publish_batch_nuscenes(self, image_list, decoded_results):
         batch_msg.header.frame_id = "batch"
 
         for idx, img_msg in enumerate(image_list):
+            #self.get_logger().info(f"{idx}")
             numpy_image = np.frombuffer(img_msg.data, np.uint8)
             image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR)
             height, width = image.shape[:2]
@@ -430,8 +465,8 @@ def publish_batch_nuscenes(self, image_list, decoded_results):
             detection_array = Detection2DArray()
             detection_array.header.stamp = batch_msg.header.stamp
             detection_array.header.frame_id = f"camera_{idx}"
-            for batch_idx, batch_detections in enumerate(decoded_results):
-                for anchor_idx, detection in enumerate(batch_detections):
+            batch_detections = decoded_results[idx]
+            for anchor_idx, detection in enumerate(batch_detections):
                 # Extract values from the dictionary
                     bbox = detection["bbox"]
                     x_min, y_min, x_max, y_max = bbox
@@ -441,6 +476,7 @@ def publish_batch_nuscenes(self, image_list, decoded_results):
                     x_max = int(x_max * width / 640)
                     y_min = int(y_min * height / 640)
                     y_max = int(y_max * height / 640)
+                    #self.get_logger().info(f"Camera {idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}")
                     label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
                     annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0))
                     detection = Detection2D()
@@ -454,17 +490,17 @@ def publish_batch_nuscenes(self, image_list, decoded_results):
                     detected_object.hypothesis.score = float(confidence)
                     detection.results.append(detected_object)
                     detection_array.detections.append(detection)
-                batch_msg.detections.append(detection_array)
+            batch_msg.detections.append(detection_array)
             annotated_image = annotator.result()
             vis_compressed_image = CompressedImage()
             vis_compressed_image.header.stamp = self.get_clock().now().to_msg()
             vis_compressed_image.header.frame_id = f"camera_{idx}"
             vis_compressed_image.format = "jpeg"
             vis_compressed_image.data = cv2.imencode('.jpg', annotated_image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes()
-            self.batch_vis_publisher.publish(vis_compressed_image)
+            self.batch_vis_publishers[idx].publish(vis_compressed_image)
 
-            # Publish Detection2DArray
-            self.batch_detection_publisher.publish(detection_array)
+                # Publish Detection2DArray
+            self.batch_detection_publishers[idx].publish(detection_array)
 
     # Publish batch detection message
         self.batched_camera_message_publisher.publish(batch_msg)

From bdf39a4154bff6a8490353846ebd4734e8a4c5b0 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Thu, 12 Dec 2024 19:54:03 +0000
Subject: [PATCH 35/37] Batching complete, eve test remaining

---
 .../yolov8_detection.py                       | 191 +++++++++++++-----
 .../CMakeLists.txt                            |   1 +
 .../msg/EveBatchDetection.msg                 |   3 +
 3 files changed, 143 insertions(+), 52 deletions(-)
 create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 070e3eb2..79ad5be6 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -2,7 +2,7 @@
 from rclpy.node import Node
 import os
 from message_filters import Subscriber, TimeSynchronizer, ApproximateTimeSynchronizer
-from camera_object_detection_msgs.msg import BatchDetection 
+from camera_object_detection_msgs.msg import BatchDetection, EveBatchDetection
 from sensor_msgs.msg import Image, CompressedImage
 from vision_msgs.msg import (
     ObjectHypothesisWithPose,
@@ -81,11 +81,13 @@ def __init__(self):
         
         #Declare batch topic  
         self.declare_parameter("batch_inference_topic", "/batched_camera_message")
+        self.declare_parameter("eve_batch_inference_topic", "/eve_batched_camera_message")
        
        
         
        #Batch inference topic 
         self.batch_inference_topic = self.get_parameter("batch_inference_topic").value
+        self.eve_batch_inference_topic = self.get_parameter("eve_batch_inference_topic").value
         self.batch_publish_detection_topic = self.get_parameter("batch_publish_detection_topic").value
         self.batch_publish_vis_topic = self.get_parameter("batch_publish_vis_topic").value
         self.onnx_model_path = self.get_parameter("onnx_model_path").value
@@ -145,24 +147,24 @@ def __init__(self):
         self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic)
         self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic)
         #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5)
-        self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription,  self.front_left_camera_subscription], queue_size=10, slop=0.1)
+        self.ats = ApproximateTimeSynchronizer([self.front_left_camera_subscription,self.front_center_camera_subscription, self.front_right_camera_subscription ], queue_size=10, slop=0.1)
         #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3)
         self.ats.registerCallback(self.batch_inference_callback)
 
-        # #Subscription for Eve cameras
-        # self.right_camera_subscription = Subscriber(self, Image, self.camera_topic)
-        # self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic)
-        # self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic)
-        # self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1)
+        #Subscription for Eve cameras
+        self.right_camera_subscription = Subscriber(self, Image, self.camera_topic)
+        self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic)
+        self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic)
+        self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1)
         
         # #Create this callback function tomorrow
-        # self.eve_ats.registerCallback(self.eve_batch_inference_callback) 
+        self.eve_ats.registerCallback(self.eve_batch_inference_callback) 
         self.get_logger().info(f"TENSORT VERSION:{trt.__version__}")
 
         self.orig_image_width = None
         self.orig_image_height = None
 
-        # set devicepublish_visDete
+
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if torch.cuda.is_available():
             self.get_logger().info("Using GPU for inference")
@@ -171,31 +173,46 @@ def __init__(self):
 
         # CV bridge
         self.cv_bridge = CvBridge()
-      
+    
+
         status, self.stream = cudart.cudaStreamCreate()
         assert status.value == 0, "IS NOT ZERO"
        
         #self.build_engine()
-        self.initialize_engine(self.tensorRT_model_path, 3,3,640,640)
         self.last_publish_time = self.get_clock().now()
 
         self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
         self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
         self.stride = int(self.model.stride)
 
-        # setup vis publishers
         #Batch vis publishers
         self.batched_camera_message_publisher  = self.create_publisher(BatchDetection,self.batch_inference_topic, 10)
+        self.eve_batched_camera_message_publisher = self.create_publisher(EveBatchDetection, self.eve_batch_inference_topic, 10)
         self.num_cameras = 3  # Adjust this based on the number of cameras
+       
+       #Nuscenes Publishers 
+        self.nuscenes_camera_names = ["/CAM_FRONT_LEFT", "/CAM_FRONT", "/CAM_FRONT_RIGHT"]
         self.batch_vis_publishers = [
-            self.create_publisher(CompressedImage, f"/camera_{i}/vis", 10)
-            for i in range(self.num_cameras)
+            self.create_publisher(CompressedImage, f"{nuscenes}/viz", 10)
+            for nuscenes in self.nuscenes_camera_names
         ]
         self.batch_detection_publishers = [
-            self.create_publisher(Detection2DArray, f"/camera_{i}/detections", 10)
-            for i in range(self.num_cameras)
+            self.create_publisher(Detection2DArray, f"{nuscenes}/detections", 10)
+            for nuscenes in self.nuscenes_camera_names
+        ]
+       
+        #Eve Publishers
+        self.eve_camera_names = [self.left_camera_topic, self.center_camera_topic, self.camera_topic]
+        self.eve_batch_vis_publishers = [
+            self.create_publisher(Image, f"{eve}/eve_viz", 10)
+            for eve in self.eve_camera_names
+        ]
+        self.eve_batch_detection_publishers = [
+            self.create_publisher(Detection2DArray, f"{eve}/eve_detections", 10)
+            for eve in self.eve_camera_names
         ]
         
+
         # self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10)
         # self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10)
     
@@ -264,7 +281,6 @@ def initialize_engine(self, weight, batch_size, rgb, width, height):
         trt.init_libnvinfer_plugins(self.logger, namespace ='')
         with trt.Runtime(self.logger) as runtime:
             self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes())
-            #self.get_logger().info(f"TENSORRT Model:{self.weight}")
         self.execution_context = self.tensorRT_model.create_execution_context()  
         
           
@@ -273,7 +289,6 @@ def initialize_engine(self, weight, batch_size, rgb, width, height):
       
         self.execution_context.set_input_shape(self.input_tensor_name, (batch_size, rgb, width, height))
         self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name)
-        #self.get_logger().info(f"tensor Shape:{self.inputShape}")
         self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)]
         self.num_io_tensors = self.tensorRT_model.num_io_tensors
         self.bindings = [0] * self.num_io_tensors
@@ -292,10 +307,7 @@ def initialize_engine(self, weight, batch_size, rgb, width, height):
         self.num_inputs = len(self.input_names)
         self.num_outputs = len(self.output_names)    
         self.input_names = self.names[:self.num_inputs]
-        #self.output_names = self.names[:self.num_outputs] #This includes the input name in output
         self.output_names = [name for name in self.output_names if name not in self.input_names] # This line removes it
-        #self.get_logger().info(f"INPUT NAMES:: {self.input_names}")
-        #self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}")
        
     
     def initialize_tensors(self):
@@ -305,11 +317,8 @@ def initialize_tensors(self):
         self.output_ptrs = []
       
        
-        for name in self.output_names:
-            #self.get_logger().info(f"NAMES of outputs:{name}")
-          
+        for name in self.output_names:          
             self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name)
-            #self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}")
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
             self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype)
             status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream)
@@ -322,7 +331,6 @@ def initialize_tensors(self):
         for i, name in enumerate(self.input_names):
             if self.tensorRT_model.get_tensor_name(i) == name: 
              self.tensorRT_input_shape = tuple(self.inputShape) 
-             #self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}")
              self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
              self.input_cpu  = np.empty(self.tensorRT_input_shape, self.dtype)
              status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream)
@@ -338,7 +346,7 @@ def tensorRT_inferencing(self, batch_array):
         batch_size = batch_array.shape[0]
         assert batch_size == self.input_info[0].shape[0]
         
-        # Handle input
+      
         self.contiguous_inputs = [np.ascontiguousarray(batch_array)]
         for i in range(self.num_inputs):
             name = self.input_info[i].name
@@ -351,7 +359,7 @@ def tensorRT_inferencing(self, batch_array):
             )
             self.execution_context.set_tensor_address(name, self.input_info[i].gpu)
         
-        # Handle dynamic outputs
+       
         self.output_gpu_ptrs = []
         self.outputs_ndarray = []
         for i in range(self.num_outputs):
@@ -396,7 +404,6 @@ def batch_inference_callback(self,msg1,msg2,msg3):
             numpy_array = np.frombuffer(msg.data, np.uint8)
             compressedImage  = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
             original_height, original_width = compressedImage.shape[:2]
-            #self.get_logger().info(f"original Height:{original_height} original width: {original_width}")
             resized_compressedImage = cv2.resize(compressedImage,(640, 640))
             rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB)
             normalized_image = rgb_image / 255.0
@@ -406,34 +413,35 @@ def batch_inference_callback(self,msg1,msg2,msg3):
             batched_list.append(float_image)
         # batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
         batched_images = np.stack(batched_list, axis=0)
-        self.get_logger().info(f"batched image shape:{batched_images.shape}")
-        # self.initialize_engine(self.tensorRT_model_path, 3,3,640,640)
+        batch = batched_images.shape[0]
+        self.initialize_engine(self.tensorRT_model_path, batch,3,640,640)
         self.input_info, self.output_info =  self.initialize_tensors()
         detections = self.tensorRT_inferencing(batched_images)
-        detection_tensor = detections[0]  # Assuming the first tensor corresponds to detections
-        batch_size = detection_tensor.shape[0]  # Number of images in the batch
-        num_anchors = detection_tensor.shape[2]  # Number of anchors (e.g., 8400)   
-        decoded_results = []  # A list to hold decoded detections for each image
+        decoded_results = self.parse_detections(detections)
+        self.publish_batch_nuscenes(image_list, decoded_results)
+    def parse_detections(self, detections):
+        detection_tensor = detections[0]  
+        batch_size = detection_tensor.shape[0]  
+        num_anchors = detection_tensor.shape[2]  
+        decoded_results = [] 
         for batch_idx in range(batch_size):
-            image_detections = detection_tensor[batch_idx]  # Detections for the current image
-            valid_detections = []  # Store valid detections for this image
-            for anchor_idx in range(num_anchors):  # Loop through all anchors
-                detection = image_detections[:, anchor_idx]  # Get data for one anchor  
-                x_center, y_center, width, height = detection[:4]  # Bounding box info
-                class_probs = detection[4:]  # Class probabilities
-                predicted_class = np.argmax(class_probs)  # Most likely class
+            image_detections = detection_tensor[batch_idx] 
+            valid_detections = []  
+            for anchor_idx in range(num_anchors):  
+                detection = image_detections[:, anchor_idx]    
+                x_center, y_center, width, height = detection[:4] 
+                class_probs = detection[4:]  
+                predicted_class = np.argmax(class_probs)  
                 confidence = class_probs[predicted_class]
                 x_min = x_center - width / 2
                 y_min = y_center - height / 2
                 x_max = x_center + width / 2
                 y_max = y_center + height / 2
-                #if confidence > 0.5:
-                   #self.get_logger().info(f"Camera {batch_idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}") 
+               
                 valid_detections.append([
                 x_min, y_min, x_max, y_max, confidence, predicted_class
                         ])
             valid_detections = torch.tensor(valid_detections)
-                #self.get_logger().info(f"Pre-NMS Batch {batch_idx}: Valid detections shape {valid_detections.shape}")
             nms_results = non_max_suppression(
                 valid_detections.unsqueeze(0),  # Add batch dimension for NMS
                 conf_thres=0.5,  # Confidence threshold
@@ -449,8 +457,9 @@ def batch_inference_callback(self,msg1,msg2,msg3):
                     })
                     #self.get_logger().info(f"Post-NMS Batch {batch_idx}: {len(nms_results[0])} detections")
             decoded_results.append(final_detections)
+        return decoded_results
         #self.get_logger().info(f"Length decoded results:{len(decoded_results[1])}")
-        self.publish_batch_nuscenes(image_list, decoded_results)
+
     def publish_batch_nuscenes(self, image_list, decoded_results):
         batch_msg = BatchDetection()
         batch_msg.header.stamp = self.get_clock().now().to_msg()
@@ -504,13 +513,91 @@ def publish_batch_nuscenes(self, image_list, decoded_results):
 
     # Publish batch detection message
         self.batched_camera_message_publisher.publish(batch_msg)
+    
+    def eve_batch_inference_callback(self, msg1,msg2,msg3):
+        image_list =  [msg1,msg2,msg3]
+        batched_list = []
+        for msg in image_list:
+            if self.compressed:
+                numpy_array = np.frombuffer(msg.data, np.uint8)
+                cv_image = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
+                original_height, original_width = cv_image.shape[:2]
+            else:
+                cv_image = self.cv_bridge.imgmsg_to_cv2(msg, desired_encoding="passthrough")
+            resized_image = cv2.resize(cv_image, (640,640)) # can also be 1024, 1024
+            rgb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
+            normalized_image  = rgb_image / 255
+            chw_image = np.transpose(normalized_image, (2,0,1))
+            float_image = chw_image.astype(np.float32)
+            batched_list.append(float_image)
+        batched_images = np.stack(batched_list, axis = 0)
+        self.get_logger().info(f"batched image shape:{batched_images.shape}")
+        self.initialize_engine(self.tensorRT_model_path, 3,3,640,640)
+        self.input_info, self.output_info =  self.initialize_tensors()
+        detections = self.tensorRT_inferencing(batched_images)
+        decoded_results = self.parse_detections(detections)
+    
+    
+    def publish_batch_eve(self,image_list, decoded_results):
+        batch_msg = BatchDetection()
+        batch_msg.header.stamp = self.get_clock().now().to_msg()
+        batch_msg.header.frame_id = "batch"
+
+        for idx, img_msg in enumerate(image_list):
+            if self.compressed:
+                numpy_array = np.frombuffer(msg.data, np.uint8)
+                image = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)
+                original_height, original_width = cv_image.shape[:2]
+            else:
+                image = self.cv_bridge.imgmsg_to_cv2(msg, desired_encoding="passthrough")
+            height, width = image.shape[:2]
+            annotator = Annotator(image, line_width=2, example="Class:0")
+            detection_array = Detection2DArray()
+            detection_array.header.stamp = batch_msg.header.stamp
+            detection_array.header.frame_id = f"camera_{idx}"
+            batch_detections = decoded_results[idx]
+            for anchor_idx, detection in enumerate(batch_detections):
+                # Extract values from the dictionary
+                    bbox = detection["bbox"]
+                    x_min, y_min, x_max, y_max = bbox
+                    confidence = detection["confidence"]
+                    predicted_class = detection["class"]
+                    x_min = int(x_min * width / 640)
+                    x_max = int(x_max * width / 640)
+                    y_min = int(y_min * height / 640)
+                    y_max = int(y_max * height / 640)
+                    #self.get_logger().info(f"Camera {idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}")
+                    label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
+                    annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0))
+                    detection = Detection2D()
+                    detection.bbox.center.position.x = (x_min + x_max) / 2
+                    detection.bbox.center.position.y = (y_min + y_max) / 2
+                    detection.bbox.size_x = float(x_max - x_min)
+                    detection.bbox.size_y = float(y_max - y_min)
+
+                    detected_object = ObjectHypothesisWithPose()
+                    detected_object.hypothesis.class_id = str(int(predicted_class))
+                    detected_object.hypothesis.score = float(confidence)
+                    detection.results.append(detected_object)
+                    detection_array.detections.append(detection)
+            batch_msg.detections.append(detection_array)
+            annotated_image = annotator.result()
+
+            
+            vis_image = Image()
+            vis_image.header.stamp = self.get_clock().now().to_msg()
+            vis_image.header.frame_id = f"camera_{idx}"
+            vis_image.format = "jpeg"
+            vis_image.data = self.cv_bridge.cv2_to_imgmsg( annotated_image,encoding="bgr8")
+            self.batch_vis_publishers[idx].publish(vis_image)
+        
+                # Publish Detection2DArray
+            self.batch_detection_publishers[idx].publish(detection_array)
+
+    # Publish batch detection message
+        self.batched_camera_message_publisher.publish(batch_msg)
+
 
-    def batch_convert_to_tensor(self, cv_image):
-        img  = cv_image.transpose(2,0,1)
-        img = torch.from_numpy(img).to(self.device)
-        img = img.half() if self.half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        return img
 
     def crop_image(self, cv_image):
         if self.crop_mode == "LetterBox":
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
index 21c77460..7a9097e0 100644
--- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt
@@ -16,6 +16,7 @@ find_package(vision_msgs REQUIRED)
 
 rosidl_generate_interfaces(${PROJECT_NAME}
     "msg/BatchDetection.msg"
+    "msg/EveBatchDetection.msg"
     DEPENDENCIES std_msgs sensor_msgs vision_msgs
 )
 
diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg
new file mode 100644
index 00000000..4d8f55e3
--- /dev/null
+++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg
@@ -0,0 +1,3 @@
+std_msgs/Header header
+sensor_msgs/Image[] images  # Array of compressed images in the batch
+vision_msgs/Detection2DArray[] detections  # Array of detections for each image

From a81ccbaf4a07afec963842f68b3b2ac1bd29431b Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Tue, 17 Dec 2024 15:40:13 +0000
Subject: [PATCH 36/37] Refactored Changes

---
 .../yolov8_detection.py                       | 36 +++++++++++++------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index 79ad5be6..b5a1e30b 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -204,11 +204,11 @@ def __init__(self):
         #Eve Publishers
         self.eve_camera_names = [self.left_camera_topic, self.center_camera_topic, self.camera_topic]
         self.eve_batch_vis_publishers = [
-            self.create_publisher(Image, f"{eve}/eve_viz", 10)
+            self.create_publisher(Image, f"{eve}/eve_batch_viz", 10)
             for eve in self.eve_camera_names
         ]
         self.eve_batch_detection_publishers = [
-            self.create_publisher(Detection2DArray, f"{eve}/eve_detections", 10)
+            self.create_publisher(Detection2DArray, f"{eve}/eve_batch_detections", 10)
             for eve in self.eve_camera_names
         ]
         
@@ -276,6 +276,11 @@ def build_engine(self):
         self.get_logger().info("FINISHED WRITING ")
  
     def initialize_engine(self, weight, batch_size, rgb, width, height):
+        """
+            Initializes engine file requirements 
+            - takes in file path for tensorRT file, batch size, # of rgb channels, width & height
+            - includes input names, output names, and setting dimensions for model input shape
+        """
         self.weight = Path(weight) if isinstance(weight, str) else weight
         self.logger  = trt.Logger(trt.Logger.WARNING)
         trt.init_libnvinfer_plugins(self.logger, namespace ='')
@@ -311,12 +316,17 @@ def initialize_engine(self, weight, batch_size, rgb, width, height):
        
     
     def initialize_tensors(self):
+        """
+            Initializes GPU from cuda to set up inferencing 
+            - Assigns input names, and shape
+            - Assigns output names, and shapes 
+        """
         self.dynamic = True 
         self.input_info = []
         self.output_info = []
         self.output_ptrs = []
       
-       
+        # Initializing output tensors 
         for name in self.output_names:          
             self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name)
             self.outputDtype  = trt.nptype(self.tensorRT_model.get_tensor_dtype(name))
@@ -328,6 +338,7 @@ def initialize_tensors(self):
             self.output_ptrs.append(self.output_gpu)
             self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu))
         
+        #Initializes input tensors
         for i, name in enumerate(self.input_names):
             if self.tensorRT_model.get_tensor_name(i) == name: 
              self.tensorRT_input_shape = tuple(self.inputShape) 
@@ -342,11 +353,15 @@ def initialize_tensors(self):
         return self.input_info, self.output_info
     
     def tensorRT_inferencing(self, batch_array):
+        """
+            Inferences through preprocessed batch images and gives data about detections
+            - Returns a contigious array of shape (3,84,8400)
+        """
         assert batch_array.ndim == 4
         batch_size = batch_array.shape[0]
         assert batch_size == self.input_info[0].shape[0]
         
-      
+        #Intializing memory, and names
         self.contiguous_inputs = [np.ascontiguousarray(batch_array)]
         for i in range(self.num_inputs):
             name = self.input_info[i].name
@@ -396,6 +411,11 @@ def tensorRT_inferencing(self, batch_array):
 
     # will be called with nuscenes rosbag
     def batch_inference_callback(self,msg1,msg2,msg3):
+        """
+        Returns final detections array for publishing to foxglove
+        - Preprocess and batch images 
+        - Call tensorRT and parse through detections and send for visualization
+        """
         #Taking msgs from all 6 ros2 subscribers
         image_list =  [msg1,msg2,msg3]
         batched_list = []
@@ -409,9 +429,7 @@ def batch_inference_callback(self,msg1,msg2,msg3):
             normalized_image = rgb_image / 255.0
             chw_image = np.transpose(normalized_image, (2,0,1))
             float_image = chw_image.astype(np.float32)
-            # tensor_image = torch.from_numpy(float_image).to('cuda')
-            batched_list.append(float_image)
-        # batched_list = [tensor.cpu().numpy() for tensor in batched_list]          
+            batched_list.append(float_image)     
         batched_images = np.stack(batched_list, axis=0)
         batch = batched_images.shape[0]
         self.initialize_engine(self.tensorRT_model_path, batch,3,640,640)
@@ -419,6 +437,7 @@ def batch_inference_callback(self,msg1,msg2,msg3):
         detections = self.tensorRT_inferencing(batched_images)
         decoded_results = self.parse_detections(detections)
         self.publish_batch_nuscenes(image_list, decoded_results)
+    
     def parse_detections(self, detections):
         detection_tensor = detections[0]  
         batch_size = detection_tensor.shape[0]  
@@ -455,10 +474,8 @@ def parse_detections(self, detections):
                         "confidence": confidence,
                         "class": int(predicted_class),
                     })
-                    #self.get_logger().info(f"Post-NMS Batch {batch_idx}: {len(nms_results[0])} detections")
             decoded_results.append(final_detections)
         return decoded_results
-        #self.get_logger().info(f"Length decoded results:{len(decoded_results[1])}")
 
     def publish_batch_nuscenes(self, image_list, decoded_results):
         batch_msg = BatchDetection()
@@ -485,7 +502,6 @@ def publish_batch_nuscenes(self, image_list, decoded_results):
                     x_max = int(x_max * width / 640)
                     y_min = int(y_min * height / 640)
                     y_max = int(y_max * height / 640)
-                    #self.get_logger().info(f"Camera {idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}")
                     label = f"Class: {predicted_class}, Conf: {confidence:.2f}"
                     annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0))
                     detection = Detection2D()

From 8a0a184de1453da6795a1faffa4bc47059dba308 Mon Sep 17 00:00:00 2001
From: Arfan12630 <arfanrahman464@gmail.com>
Date: Fri, 20 Dec 2024 04:33:49 +0000
Subject: [PATCH 37/37] Cleaned up code

---
 .../yolov8_detection.py                       | 224 +-----------------
 1 file changed, 1 insertion(+), 223 deletions(-)

diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
index b5a1e30b..8bd0482b 100755
--- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
+++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py
@@ -128,17 +128,6 @@ def __init__(self):
         self.half = False
 
         
-
-        self.subscription = self.create_subscription(
-            Image if not self.compressed else CompressedImage,
-            self.camera_topic,
-            self.image_callback,
-            qos_profile=QoSProfile(
-                reliability=QoSReliabilityPolicy.RELIABLE,
-                history=QoSHistoryPolicy.KEEP_LAST,
-                depth=10,
-            ),
-        )
         #Subscription for Nuscenes
         self.front_center_camera_subscription = Subscriber(self, CompressedImage, self.front_center_camera_topic)
         self.front_right_camera_subscription  = Subscriber(self, CompressedImage, self.front_right_camera_topic)
@@ -181,9 +170,7 @@ def __init__(self):
         #self.build_engine()
         self.last_publish_time = self.get_clock().now()
 
-        self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False)
-        self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names
-        self.stride = int(self.model.stride)
+        
 
         #Batch vis publishers
         self.batched_camera_message_publisher  = self.create_publisher(BatchDetection,self.batch_inference_topic, 10)
@@ -598,8 +585,6 @@ def publish_batch_eve(self,image_list, decoded_results):
                     detection_array.detections.append(detection)
             batch_msg.detections.append(detection_array)
             annotated_image = annotator.result()
-
-            
             vis_image = Image()
             vis_image.header.stamp = self.get_clock().now().to_msg()
             vis_image.header.frame_id = f"camera_{idx}"
@@ -612,213 +597,6 @@ def publish_batch_eve(self,image_list, decoded_results):
 
     # Publish batch detection message
         self.batched_camera_message_publisher.publish(batch_msg)
-
-
-
-    def crop_image(self, cv_image):
-        if self.crop_mode == "LetterBox":
-            img = LetterBox(self.image_size, stride=self.stride)(image=cv_image)
-        elif self.crop_mode == "CenterCrop":
-            img = CenterCrop(self.image_size)(cv_image)
-        else:
-            raise Exception("Invalid crop mode, please choose either 'LetterBox' or 'CenterCrop'!")
-
-        return img
-
-    def convert_bboxes_to_orig_frame(self, bbox):
-        """
-        Converts bounding box coordinates from the scaled image frame back to the original image frame.
-
-        This function takes into account the original image dimensions and the scaling method used
-        (either "LetterBox" or "CenterCrop") to accurately map the bounding box coordinates back to
-        their original positions in the original image.
-
-        Parameters:
-        bbox (list): A list containing the bounding box coordinates in the format [x1, y1, w1, h1]
-                    in the scaled image frame.
-
-        Returns:
-        list: A list containing the bounding box coordinates in the format [x1, y1, w1, h1]
-            in the original image frame.
-
-        """
-        width_scale = self.orig_image_width / self.image_size
-        height_scale = self.orig_image_height / self.image_size
-        if self.crop_mode == "LetterBox":
-            translation = (self.image_size - self.orig_image_height / width_scale) / 2
-            return [
-                bbox[0] * width_scale,
-                (bbox[1] - translation) * width_scale,
-                bbox[2] * width_scale,
-                bbox[3] * width_scale,
-            ]
-        elif self.crop_mode == "CenterCrop":
-            translation = (self.orig_image_width / height_scale - self.image_size) / 2
-            return [
-                (bbox[0] + translation) * height_scale,
-                bbox[1] * height_scale,
-                bbox[2] * height_scale,
-                bbox[3] * height_scale,
-            ]
-
-    def crop_and_convert_to_tensor(self, cv_image):
-        """
-        Preprocess the image by resizing, padding and rearranging the dimensions.
-
-        Parameters:
-            cv_image: A numpy or cv2 image of shape (w,h,3)
-
-        Returns:
-            torch.Tensor image for model input of shape (1,3,w,h)
-        """
-        img = self.crop_image(cv_image)
-
-        # Convert
-        img = cv_image.transpose(2, 0, 1)
-
-        # Further conversion
-        img = torch.from_numpy(img).to(self.device)
-        img = img.half() if self.half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        img = img.unsqueeze(0)
-
-        return img
-
-    def postprocess_detections(self, detections, annotator):
-        """
-        Post-process draws bouningboxes on camera image.
-
-        Parameters:
-            detections: A list of dict with the format
-                {
-                    "label": str,
-                    "bbox": [float],
-                    "conf": float
-                }
-            annotator: A ultralytics.yolo.utils.plotting.Annotator for the current image
-
-        Returns:
-            processed_detections: filtered detections
-            annotator_img: image with bounding boxes drawn on
-        """
-        processed_detections = detections
-
-        for det in detections:
-            label = f'{det["label"]} {det["conf"]:.2f}'
-            x1, y1, w1, h1 = det["bbox"]
-            xyxy = [x1, y1, x1 + w1, y1 + h1]
-            annotator.box_label(xyxy, label, color=colors(1, True))
-
-        annotator_img = annotator.result()
-        return (processed_detections, annotator_img)
-
-    def publish_vis(self, annotated_img, msg, feed):
-        # Publish visualizations
-        imgmsg = self.cv_bridge.cv2_to_imgmsg(annotated_img, "bgr8")
-        imgmsg.header.stamp = msg.header.stamp
-        imgmsg.header.frame_id = msg.header.frame_id
-        self.vis_publisher.publish(imgmsg)
- 
-    def publish_detections(self, detections, msg, feed):
-        # Publish detections to an detectionList message
-        detection2darray = Detection2DArray()
-
-        # fill header for detection list
-        detection2darray.header.stamp = msg.header.stamp
-        detection2darray.header.frame_id = msg.header.frame_id
-        # populate detection list
-        if detections is not None and len(detections):
-            for detection in detections:
-                detection2d = Detection2D()
-                detection2d.header.stamp = msg.header.stamp
-                detection2d.header.frame_id = msg.header.frame_id
-                detected_object = ObjectHypothesisWithPose()
-                detected_object.hypothesis.class_id = detection["label"]
-                detected_object.hypothesis.score = detection["conf"]
-                detection2d.results.append(detected_object)
-                detection2d.bbox.center.position.x = detection["bbox"][0]
-                detection2d.bbox.center.position.y = detection["bbox"][1]
-                detection2d.bbox.size_x = detection["bbox"][2]
-                detection2d.bbox.size_y = detection["bbox"][3]
-
-                # append detection to detection list
-                detection2darray.detections.append(detection2d)
-
-        self.detection_publisher.publish(detection2darray)
-
-    def image_callback(self, msg):
-        self.get_logger().debug("Received image")
-        if self.orig_image_width is None:
-            self.orig_image_width = msg.width
-            self.orig_image_height = msg.height
-
-        images = [msg]  # msg is a single sensor image
-        startTime = time.time()
-        for image in images:
-
-            # convert ros Image to cv::Mat
-            if self.compressed:
-                np_arr = np.frombuffer(msg.data, np.uint8)
-                cv_image = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
-            else:
-                try:
-                    cv_image = self.cv_bridge.imgmsg_to_cv2(image, desired_encoding="passthrough")
-                except CvBridgeError as e:
-                    self.get_logger().error(str(e))
-                    return
-
-            # preprocess image and run through prediction
-            img = self.crop_and_convert_to_tensor(cv_image)
-            pred = self.model(img)
-
-            # nms function used same as yolov8 detect.py
-            pred = non_max_suppression(pred) #Eliminates overlapping bounding boxes
-            detections = []
-            for i, det in enumerate(pred):  # per image
-                if len(det):
-                    # Write results
-                    for *xyxy, conf, cls in reversed(det):
-                        label = self.names[int(cls)]
-
-                        bbox = [
-                            xyxy[0],
-                            xyxy[1],
-                            xyxy[2] - xyxy[0],
-                            xyxy[3] - xyxy[1],
-                        ]
-                        bbox = [b.item() for b in bbox]
-                        bbox = self.convert_bboxes_to_orig_frame(bbox)
-
-                        detections.append(
-                            {
-                                "label": label,
-                                "conf": conf.item(),
-                                "bbox": bbox,
-                            }
-                        )
-                        self.get_logger().debug(f"{label}: {bbox}")
-
-            annotator = Annotator(
-                cv_image,
-                line_width=self.line_thickness,
-                example=str(self.names),
-            )
-            (detections, annotated_img) = self.postprocess_detections(detections, annotator)
-
-            # Currently we support a single camera so we pass an empty string
-            feed = ""
-            self.publish_vis(annotated_img, msg, feed)
-            self.publish_detections(detections, msg, feed)
-
-            if self.save_detections:
-                cv2.imwrite(f"detections/{self.counter}.jpg", annotated_img)
-                self.counter += 1
-
-        self.get_logger().info(
-            f"Finished in: {time.time() - startTime}, {1/(time.time() - startTime)} Hz"
-        )
-
-
 def main(args=None):
     rclpy.init(args=args)