From c054c5fa7a3cf88b6dcd57c2391d38c024392630 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 5 Nov 2024 21:19:59 +0000 Subject: [PATCH 01/37] Completed batching and creating tensrt engine --- .../yolov8_detection.py | 135 ++++++++++++++++-- .../camera_object_detection/requirements.txt | 3 +- watod-config.sh | 2 +- 3 files changed, 127 insertions(+), 13 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 05ceb431..0b73261d 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -1,14 +1,14 @@ import rclpy from rclpy.node import Node import os - +from message_filters import Subscriber, TimeSynchronizer, ApproximateTimeSynchronizer from sensor_msgs.msg import Image, CompressedImage from vision_msgs.msg import ( ObjectHypothesisWithPose, Detection2D, Detection2DArray, ) - +from ultralytics import YOLO from ultralytics.nn.autobackend import AutoBackend from ultralytics.data.augment import LetterBox, CenterCrop from ultralytics.utils.ops import non_max_suppression @@ -22,6 +22,8 @@ import time import torch +import onnx +import tensorrt as trt class CameraDetectionNode(Node): @@ -30,21 +32,65 @@ def __init__(self): torch.zeros(1).cuda() super().__init__("camera_object_detection_node") - self.get_logger().info("Creating camera detection node...") + self.get_logger().info("Creating batched camera detection node...") + + #Eve config params self.declare_parameter("camera_topic", "/camera/right/image_color") + self.declare_parameter("left_camera_topic", "/camera/left/image_color") + self.declare_parameter("center_camera_topic", "/camera/center/image_color") + + #Nuscenes + self.declare_parameter("front_center_camera_topic", "/CAM_FRONT/image_rect_compressed") + self.declare_parameter("front_right_camera_topic", "/CAM_FRONT_RIGHT/image_rect_compressed") + self.declare_parameter("front_left_camera_topic", "/CAM_FRONT_LEFT/image_rect_compressed") + + self.declare_parameter("back_center_camera_topic", "/CAM_BACK/image_rect_compressed") + self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed") + self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed") + + self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx") + self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") self.declare_parameter("publish_detection_topic", "/detections") self.declare_parameter("model_path", "/perception_models/yolov8m.pt") + self.declare_parameter("image_size", 1024) self.declare_parameter("compressed", False) self.declare_parameter("crop_mode", "LetterBox") self.declare_parameter("save_detections", False) - + + #Declare batch topic + self.declare_parameter("batch_inference_topic", "/batched_camera_message") + + + + #Batch inference topic + self.batch_inference_topic = self.get_parameter("batch_inference_topic").value + self.onnx_model_path = self.get_parameter("onnx_model_path").value + self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value + + #Camera topics for eve self.camera_topic = self.get_parameter("camera_topic").value + self.left_camera_topic = self.get_parameter("left_camera_topic").value + self.center_camera_topic = self.get_parameter("center_camera_topic").value + + #Camera topics for nuscenes + self.front_center_camera_topic = self.get_parameter("front_center_camera_topic").value + self.front_right_camera_topic = self.get_parameter("front_right_camera_topic").value + self.front_left_camera_topic = self.get_parameter("front_left_camera_topic").value + + self.back_center_camera_topic = self.get_parameter("back_center_camera_topic").value + self.back_right_camera_topic = self.get_parameter("back_right_camera_topic").value + self.back_left_camera_topic = self.get_parameter("back_left_camera_topic").value + + + #Parameter for TensorRT Model + self.publish_vis_topic = self.get_parameter("publish_vis_topic").value self.publish_detection_topic = self.get_parameter("publish_detection_topic").value self.model_path = self.get_parameter("model_path").value + self.image_size = self.get_parameter("image_size").value self.compressed = self.get_parameter("compressed").value self.crop_mode = self.get_parameter("crop_mode").value @@ -57,6 +103,8 @@ def __init__(self): self.line_thickness = 1 self.half = False + + self.subscription = self.create_subscription( Image if not self.compressed else CompressedImage, self.camera_topic, @@ -67,11 +115,22 @@ def __init__(self): depth=10, ), ) + #Subscription for Nuscenes + self.front_center_camera_subscription = Subscriber(self, CompressedImage, self.front_center_camera_topic) + self.front_right_camera_subscription = Subscriber(self, CompressedImage, self.front_right_camera_topic) + self.front_left_camera_subscription = Subscriber(self, CompressedImage, self.front_left_camera_topic) + self.back_center_camera_subscription = Subscriber(self, CompressedImage, self.back_center_camera_topic) + self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic) + self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) + self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1) + self.ats.registerCallback(self.batch_callback) + + self.orig_image_width = None self.orig_image_height = None - # set device + # set devicepublish_visDete self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): self.get_logger().info("Using GPU for inference") @@ -81,23 +140,77 @@ def __init__(self): # CV bridge self.cv_bridge = CvBridge() - # load yolov8 model - self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) + self.logger = trt.Logger(trt.Logger.WARNING) + self.builder = trt.Builder(self.logger) + self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + self.parser = trt.OnnxParser(self.network, self.logger) + #Commented this out because only run when building the tensorRT engine + # #Reading the onnx file in the perception models directory + # with open(self.onnx_model_path, "rb") as model_file: + + # if not self.parser.parse(model_file.read()): + + # for error in range(parser.num_errors): + # print(self.parser.get_error(error)) + # raise RuntimeError("Failed to parse the ONNX file.") + + # #Building the tensorRT engine for inferencing + # self.config = self.builder.create_builder_config() + # self.total_memory = torch.cuda.get_device_properties(self.device).total_memory + # self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory) + # self.engine_bytes = self.builder.build_serialized_network(self.network, self.config) + # assert self.engine_bytes is not None, "Failed to create engine" + # self.get_logger().info("BUILT THE ENGINE ") + + # #Writing to the perception models directory + # # Getting bug on these lines + # with open(self.tensorRT_model_path, "wb") as f: + # self.get_logger().info("WRITINTG THE ENGINE ") + # f.write(self.engine_bytes) + self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names - self.stride = int(self.model.stride) # setup vis publishers + self.batched_camera_message_publisher = self.create_publisher(Image,self.batch_inference_topic, 10) self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10) self.detection_publisher = self.create_publisher( Detection2DArray, self.publish_detection_topic, 10 ) - self.get_logger().info( f"Successfully created node listening on camera topic: {self.camera_topic}..." ) + def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): + image_list = [msg1,msg2,msg3,msg4,msg5,msg6] + batched_list = [] + for msg in image_list: + # Need to convert to numpy array and then make it have 3 color channels + numpy_array = np.frombuffer(msg.data, np.uint8) + compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) + resized_compressedImage = cv2.resize(compressedImage,(1920, 1080)) + preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage) + batched_list.append(preprocessedImage) + # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}") + + + batch_tensor = torch.stack(batched_list) + self.get_logger().info(f"batch tensor shape: {batch_tensor.shape}") + + #Now need to pass it to yolov8 tensorRT model + # predictions = self.model(batch_tensor) + + + + #Converting to tensor for batching + def batch_convert_to_tensor(self, cv_image): + img = cv_image.transpose(2,0,1) + img = torch.from_numpy(img).to(self.device) + img = img.half() if self.half else img.float() # uint8 to fp16/32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + return img + def crop_image(self, cv_image): if self.crop_mode == "LetterBox": img = LetterBox(self.image_size, stride=self.stride)(image=cv_image) @@ -157,7 +270,7 @@ def crop_and_convert_to_tensor(self, cv_image): img = self.crop_image(cv_image) # Convert - img = img.transpose(2, 0, 1) + img = cv_image.transpose(2, 0, 1) # Further conversion img = torch.from_numpy(img).to(self.device) @@ -255,7 +368,7 @@ def image_callback(self, msg): pred = self.model(img) # nms function used same as yolov8 detect.py - pred = non_max_suppression(pred) + pred = non_max_suppression(pred) #Eliminates overlapping bounding boxes detections = [] for i, det in enumerate(pred): # per image if len(det): diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt index 0807da67..b4eb4248 100644 --- a/src/perception/camera_object_detection/requirements.txt +++ b/src/perception/camera_object_detection/requirements.txt @@ -4,4 +4,5 @@ setuptools==65.5.1 easydict huggingface huggingface_hub -numpy==1.24.2 \ No newline at end of file +numpy==1.24.2 +onnx \ No newline at end of file diff --git a/watod-config.sh b/watod-config.sh index 3255dd40..96033167 100755 --- a/watod-config.sh +++ b/watod-config.sh @@ -15,7 +15,7 @@ ## - simulation : starts simulation ## - samples : starts sample ROS2 pubsub nodes -# ACTIVE_MODULES="" +ACTIVE_MODULES="perception infrastructure" ################################# MODE OF OPERATION ################################# ## Possible modes of operation when running watod. From f1f9cee43a3fccd81e7bb2196cb7c7183463508b Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 5 Nov 2024 21:22:34 +0000 Subject: [PATCH 02/37] Mounted file and directory --- modules/docker-compose.perception.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index 99dc9a49..4370e488 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -32,8 +32,11 @@ services: command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py" volumes: - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt + - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx + - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt + - /mnt/wato-drive2/perception_models/:/perception_models lidar_object_detection: build: From 359626b2c674aec3c7cf0aa1c5d2d81801a88350 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 6 Nov 2024 03:38:20 +0000 Subject: [PATCH 03/37] Requirment txt changes --- src/perception/camera_object_detection/requirements.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt index b4eb4248..9ae1f4cb 100644 --- a/src/perception/camera_object_detection/requirements.txt +++ b/src/perception/camera_object_detection/requirements.txt @@ -5,4 +5,9 @@ easydict huggingface huggingface_hub numpy==1.24.2 -onnx \ No newline at end of file +onnx +onnxsim +onnxruntime-gpu +tensorrt +tensorrt_lean +tensorrt_dispatch \ No newline at end of file From 5cb072ea36f947ee0863d26bc1ebe08bd768c604 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 6 Nov 2024 14:46:45 +0000 Subject: [PATCH 04/37] Reafactored engine build to a function --- .../yolov8_detection.py | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 0b73261d..cffb5309 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -144,29 +144,10 @@ def __init__(self): self.builder = trt.Builder(self.logger) self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) self.parser = trt.OnnxParser(self.network, self.logger) - #Commented this out because only run when building the tensorRT engine - # #Reading the onnx file in the perception models directory - # with open(self.onnx_model_path, "rb") as model_file: - - # if not self.parser.parse(model_file.read()): - - # for error in range(parser.num_errors): - # print(self.parser.get_error(error)) - # raise RuntimeError("Failed to parse the ONNX file.") - - # #Building the tensorRT engine for inferencing - # self.config = self.builder.create_builder_config() - # self.total_memory = torch.cuda.get_device_properties(self.device).total_memory - # self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory) - # self.engine_bytes = self.builder.build_serialized_network(self.network, self.config) - # assert self.engine_bytes is not None, "Failed to create engine" - # self.get_logger().info("BUILT THE ENGINE ") - - # #Writing to the perception models directory - # # Getting bug on these lines - # with open(self.tensorRT_model_path, "wb") as f: - # self.get_logger().info("WRITINTG THE ENGINE ") - # f.write(self.engine_bytes) + + # #Commented this out because only run when building the tensorRT engine + # self.build_engine() + self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names @@ -181,7 +162,33 @@ def __init__(self): self.get_logger().info( f"Successfully created node listening on camera topic: {self.camera_topic}..." ) - + + def build_engine(self): + #Reading the onnx file in the perception models directory + with open(self.onnx_model_path, "rb") as model_file: + + if not self.parser.parse(model_file.read()): + + for error in range(parser.num_errors): + print(self.parser.get_error(error)) + raise RuntimeError("Failed to parse the ONNX file.") + + #Building the tensorRT engine for inferencing + self.config = self.builder.create_builder_config() + self.total_memory = torch.cuda.get_device_properties(self.device).total_memory + self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory) + self.engine_bytes = self.builder.build_serialized_network(self.network, self.config) + assert self.engine_bytes is not None, "Failed to create engine" + self.get_logger().info("BUILT THE ENGINE ") + + #Writing to the perception models directory + #Getting bug on these lines + with open(self.tensorRT_model_path, "wb") as f: + self.get_logger().info("WRITINTG THE ENGINE ") + f.write(self.engine_bytes) + self.get_logger().info("RETURNS 1") + + def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] From 387e49199394c66d3f7db2ebee209d6dd2086f16 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 6 Nov 2024 16:15:53 +0000 Subject: [PATCH 05/37] Fixed file error --- modules/docker-compose.perception.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index 4370e488..13fc7f53 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -33,7 +33,7 @@ services: volumes: - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx - - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.engine + - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt - /mnt/wato-drive2/perception_models/:/perception_models From 61f48d84421a6d5e45acbac187377bc2c4842dc7 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 6 Nov 2024 16:18:50 +0000 Subject: [PATCH 06/37] Changed docker compose file --- modules/docker-compose.perception.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index 13fc7f53..c82d5589 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -32,7 +32,6 @@ services: command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py" volumes: - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt - - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt From bea933d4376917d2ec9ba493489b417c1eb5f51f Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 6 Nov 2024 16:20:59 +0000 Subject: [PATCH 07/37] Refactored docker changes --- modules/docker-compose.perception.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index c82d5589..13fc7f53 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -32,6 +32,7 @@ services: command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py" volumes: - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt + - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt From 4d4513dff98a160a7c5ba0e0bf0e45fd7674e012 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 6 Nov 2024 21:53:19 +0000 Subject: [PATCH 08/37] initialized engine --- .../yolov8_detection.py | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index cffb5309..9dd19eb9 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -8,6 +8,7 @@ Detection2D, Detection2DArray, ) +from pathlib import Path from ultralytics import YOLO from ultralytics.nn.autobackend import AutoBackend from ultralytics.data.augment import LetterBox, CenterCrop @@ -125,7 +126,7 @@ def __init__(self): self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1) self.ats.registerCallback(self.batch_callback) - + self.get_logger().info(f"TENSORT VERSION:{trt.__version__}") self.orig_image_width = None self.orig_image_height = None @@ -140,15 +141,14 @@ def __init__(self): # CV bridge self.cv_bridge = CvBridge() - self.logger = trt.Logger(trt.Logger.WARNING) - self.builder = trt.Builder(self.logger) - self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) - self.parser = trt.OnnxParser(self.network, self.logger) + # #Commented this out because only run when building the tensorRT engine # self.build_engine() - - + + + + self.initialize_engine(self.tensorRT_model_path) self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) @@ -165,6 +165,10 @@ def __init__(self): def build_engine(self): #Reading the onnx file in the perception models directory + self.logger = trt.Logger(trt.Logger.WARNING) + self.builder = trt.Builder(self.logger) + self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + self.parser = trt.OnnxParser(self.network, self.logger) with open(self.onnx_model_path, "rb") as model_file: if not self.parser.parse(model_file.read()): @@ -172,6 +176,8 @@ def build_engine(self): for error in range(parser.num_errors): print(self.parser.get_error(error)) raise RuntimeError("Failed to parse the ONNX file.") + + #Building the tensorRT engine for inferencing self.config = self.builder.create_builder_config() @@ -181,13 +187,40 @@ def build_engine(self): assert self.engine_bytes is not None, "Failed to create engine" self.get_logger().info("BUILT THE ENGINE ") + #Writing to the perception models directory #Getting bug on these lines with open(self.tensorRT_model_path, "wb") as f: self.get_logger().info("WRITINTG THE ENGINE ") f.write(self.engine_bytes) - self.get_logger().info("RETURNS 1") + + def initialize_engine(self, weight): + self.weight = Path(weight) if isinstance(weight, str) else weight + self.logger = trt.Logger(trt.Logger.WARNING) + trt.init_libnvinfer_plugins(self.logger, namespace ='') + with trt.Runtime(self.logger) as runtime: + self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) + self.execution_context = self.tensorRT_model.create_execution_context() + #self.names = [self.tensorRT_model.get_binding_name(i) for i in range(self.tensorRT_model.num_bindings)] + self.num_io_tensors = self.tensorRT_model.num_io_tensors + self.get_logger().info(f"BINDINGS: {self.num_io_tensors}") + self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] + self.num_io_tensors = self.tensorRT_model.num_io_tensors + self.bindings: List[int] = [0] * self.num_io_tensors + self.num_inputs = 0 + self.num_outputs = 0 + self.input_names = [] + self.output_names = [] + + for i in range(self.num_io_tensors): + self.tensor_name = self.tensorRT_model.get_tensor_name(i) + if self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.INPUT: + self.input_names.append(self.tensor_name) + self.num_inputs += 1 + else: + self.output_names.append(self.tensor_name) + self.num_outputs += 1 def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): image_list = [msg1,msg2,msg3,msg4,msg5,msg6] @@ -199,7 +232,7 @@ def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): resized_compressedImage = cv2.resize(compressedImage,(1920, 1080)) preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage) batched_list.append(preprocessedImage) - # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}") + # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")a batch_tensor = torch.stack(batched_list) From 8074ca40bd1fc3d7f2d1a560665eb1099042f2b8 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Fri, 8 Nov 2024 14:37:33 +0000 Subject: [PATCH 09/37] Built and initliazed tensorRT engine --- .../yolov8_detection.py | 80 +++++++++++-------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 9dd19eb9..91ff6e70 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -69,6 +69,7 @@ def __init__(self): #Batch inference topic self.batch_inference_topic = self.get_parameter("batch_inference_topic").value self.onnx_model_path = self.get_parameter("onnx_model_path").value + self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value #Camera topics for eve @@ -140,14 +141,6 @@ def __init__(self): # CV bridge self.cv_bridge = CvBridge() - - - - # #Commented this out because only run when building the tensorRT engine - # self.build_engine() - - - self.initialize_engine(self.tensorRT_model_path) self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names @@ -162,38 +155,56 @@ def __init__(self): self.get_logger().info( f"Successfully created node listening on camera topic: {self.camera_topic}..." ) - + def build_engine(self): #Reading the onnx file in the perception models directory - self.logger = trt.Logger(trt.Logger.WARNING) + self.max_batch_size = 6 + self.channels = 3 + self.height = 640 + self.width = 640 + self.shape_input_model = [self.max_batch_size, self.channels, self.height, self.width] + self.logger = trt.Logger(trt.Logger.VERBOSE) self.builder = trt.Builder(self.logger) - self.network = self.builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) - self.parser = trt.OnnxParser(self.network, self.logger) - with open(self.onnx_model_path, "rb") as model_file: - - if not self.parser.parse(model_file.read()): - - for error in range(parser.num_errors): - print(self.parser.get_error(error)) - raise RuntimeError("Failed to parse the ONNX file.") - - - - #Building the tensorRT engine for inferencing - self.config = self.builder.create_builder_config() + self.config = self.builder.create_builder_config() + self.cache = self.config.create_timing_cache(b"") + self.config.set_timing_cache(self.cache, ignore_mismatch=False) self.total_memory = torch.cuda.get_device_properties(self.device).total_memory self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, self.total_memory) + self.flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + self.network = self.builder.create_network(self.flag) + self.parser = trt.OnnxParser(self.network, self.logger) + with open(self.onnx_model_path, "rb") as f: + if not self.parser.parse(f.read()): + self.get_logger().info("ERROR: Cannot read ONNX FILE") + for error in range(self.parser.num_errors): + self.get_logger().info(f"{self.parser.get_error(error)}") + self.inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)] + self.outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)] + for input in self.inputs: + self.get_logger().info(f"Model {input.name} shape:{input.shape} {input.dtype}") + for output in self.outputs: + self.get_logger().info(f"Model {output.name} shape: {output.shape} {output.dtype}") + if self.max_batch_size > 1: + self.profile = self.builder.create_optimization_profile() + self.min_shape = [1, self.channels, self.height, self.width] # Minimum batch size of 1 + self.opt_shape = [int(self.max_batch_size / 2), self.channels, self.height, self.width] # Optimal batch size (half of max, so 3) + self.max_shape = self.shape_input_model + + for input in self.inputs: + self.profile.set_shape(input.name, self.min_shape, self.opt_shape, self.max_shape) + self.config.add_optimization_profile(self.profile) + self.half = True + self.int8 = False + if self.half: + self.config.set_flag(trt.BuilderFlag.FP16) + elif self.int8: + self.config.set_flag(trt.BuilderFlag.INT8) self.engine_bytes = self.builder.build_serialized_network(self.network, self.config) assert self.engine_bytes is not None, "Failed to create engine" self.get_logger().info("BUILT THE ENGINE ") - - - #Writing to the perception models directory - #Getting bug on these lines with open(self.tensorRT_model_path, "wb") as f: - self.get_logger().info("WRITINTG THE ENGINE ") f.write(self.engine_bytes) - + self.get_logger().info("FINISHED WRITING ") def initialize_engine(self, weight): self.weight = Path(weight) if isinstance(weight, str) else weight @@ -201,10 +212,15 @@ def initialize_engine(self, weight): trt.init_libnvinfer_plugins(self.logger, namespace ='') with trt.Runtime(self.logger) as runtime: self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) + self.get_logger().info(f"TENSORRT Model:{self.weight}") self.execution_context = self.tensorRT_model.create_execution_context() - #self.names = [self.tensorRT_model.get_binding_name(i) for i in range(self.tensorRT_model.num_bindings)] + + self.num_io_tensors = self.tensorRT_model.num_io_tensors - self.get_logger().info(f"BINDINGS: {self.num_io_tensors}") + self.input_tensor_name = self.tensorRT_model.get_tensor_name(0) + self.execution_context.set_input_shape(self.input_tensor_name, [6, 3, 1080, 1920]) + self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name) + self.get_logger().info(f"INPUT SHAPE:{self.inputShape}") self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] self.num_io_tensors = self.tensorRT_model.num_io_tensors self.bindings: List[int] = [0] * self.num_io_tensors From 9b4637e309454ea97db5d4153e67b9d0f774075c Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Mon, 11 Nov 2024 21:29:39 +0000 Subject: [PATCH 10/37] Allocated memory for inferencing --- modules/docker-compose.perception.yaml | 2 + .../yolov8_detection.py | 63 +++++++++++++++---- .../camera_object_detection/requirements.txt | 2 - 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index 13fc7f53..0679b51a 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -33,7 +33,9 @@ services: volumes: - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx + - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/yolov8m_dynamic.onnx - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine + - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt - /mnt/wato-drive2/perception_models/:/perception_models diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 91ff6e70..8c00ef72 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -25,6 +25,7 @@ import torch import onnx import tensorrt as trt +# import pycuda.driver as cuda class CameraDetectionNode(Node): @@ -50,8 +51,8 @@ def __init__(self): self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed") self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed") - self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx") - self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine") + self.declare_parameter("onnx_model_path", "/perception_models/yolov8m_dynamic.onnx") + self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m_copy2.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") self.declare_parameter("publish_detection_topic", "/detections") self.declare_parameter("model_path", "/perception_models/yolov8m.pt") @@ -87,7 +88,7 @@ def __init__(self): self.back_left_camera_topic = self.get_parameter("back_left_camera_topic").value - #Parameter for TensorRT Model + self.publish_vis_topic = self.get_parameter("publish_vis_topic").value self.publish_detection_topic = self.get_parameter("publish_detection_topic").value @@ -141,7 +142,9 @@ def __init__(self): # CV bridge self.cv_bridge = CvBridge() + # self.stream = cuda.Stream() self.initialize_engine(self.tensorRT_model_path) + self.initialize_tensors() self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) @@ -157,6 +160,7 @@ def __init__(self): ) def build_engine(self): + #Only calling this function when we dont have an engine file #Reading the onnx file in the perception models directory self.max_batch_size = 6 self.channels = 3 @@ -220,24 +224,61 @@ def initialize_engine(self, weight): self.input_tensor_name = self.tensorRT_model.get_tensor_name(0) self.execution_context.set_input_shape(self.input_tensor_name, [6, 3, 1080, 1920]) self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name) - self.get_logger().info(f"INPUT SHAPE:{self.inputShape}") self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] self.num_io_tensors = self.tensorRT_model.num_io_tensors - self.bindings: List[int] = [0] * self.num_io_tensors - self.num_inputs = 0 - self.num_outputs = 0 + self.get_logger().info(f"Number of tensors: {self.num_io_tensors}") + self.bindings = [0] * self.num_io_tensors + self.num_inputs = 0 + self.num_outputs = 0 self.input_names = [] self.output_names = [] - for i in range(self.num_io_tensors): self.tensor_name = self.tensorRT_model.get_tensor_name(i) if self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.INPUT: self.input_names.append(self.tensor_name) self.num_inputs += 1 - else: + elif self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.OUTPUT: self.output_names.append(self.tensor_name) - self.num_outputs += 1 - + self.num_outputs += 1 + # self.get_logger().info(f"Inputs:{self.input_names}") + # self.get_logger().info(f"Outputs:{self.output_names}") + self.num_inputs = len(self.input_names) + self.num_outputs = len(self.output_names) + # self.get_logger().info(f"Number of inputs:{self.num_inputs}") + # self.get_logger().info(f"Number of outputs:{self.num_outputs}") + self.input_names = self.names[:self.num_inputs] + self.output_names = [name for name in self.output_names if name not in self.input_names] + self.get_logger().info(f"INPUT NAMES:: {self.input_names}") + self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}") + + def initialize_tensors(self): + self.dynamic = True + self.input_info = [] + self.output_info = [] + self.output_ptrs = [] + self.input_device_bindings = [] + self.output_device_bindings = [] + + for i, name in enumerate(self.input_names): + if self.tensorRT_model.get_tensor_name(i) == name: + + self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) + self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) + self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}") + self.get_logger().info(f"Tensor Datatype:{self.dtype}") + # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype) + # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes) + # self.input_device_bindings.append(self.input_device_memory) + + for i, name in enumerate(self.output_names): + if self.tensorRT_model.get_tensor_name(i) == name: + self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) + self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) + self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}") + self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}") + # self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype) + # self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes) + # self.output_device_bindings.append(self.output_device_memory) def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt index 9ae1f4cb..4373e3c7 100644 --- a/src/perception/camera_object_detection/requirements.txt +++ b/src/perception/camera_object_detection/requirements.txt @@ -9,5 +9,3 @@ onnx onnxsim onnxruntime-gpu tensorrt -tensorrt_lean -tensorrt_dispatch \ No newline at end of file From 72a58a22574754bb5e2fff14b09288cad07afef3 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Mon, 11 Nov 2024 22:05:32 +0000 Subject: [PATCH 11/37] fixed tensor function --- .../yolov8_detection.py | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 8c00ef72..53795d93 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -25,7 +25,17 @@ import torch import onnx import tensorrt as trt +from typing import List, Optional, Tuple, Union +from dataclasses import dataclass +from numpy import ndarray # import pycuda.driver as cuda +@dataclass +class Tensor: + name:str + dtype:np.dtype + shape:Tuple + cpu:ndarray + gpu:int class CameraDetectionNode(Node): @@ -250,6 +260,7 @@ def initialize_engine(self, weight): self.output_names = [name for name in self.output_names if name not in self.input_names] self.get_logger().info(f"INPUT NAMES:: {self.input_names}") self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}") + def initialize_tensors(self): self.dynamic = True @@ -258,10 +269,20 @@ def initialize_tensors(self): self.output_ptrs = [] self.input_device_bindings = [] self.output_device_bindings = [] - + + for name in self.output_names: + self.get_logger().info(f"NAMES of outputs:{name}") + self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) + self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) + self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}") + self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}") + # self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype) + # self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes) + # self.output_device_bindings.append(self.output_device_memory) + for i, name in enumerate(self.input_names): if self.tensorRT_model.get_tensor_name(i) == name: - + self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}") @@ -270,15 +291,7 @@ def initialize_tensors(self): # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes) # self.input_device_bindings.append(self.input_device_memory) - for i, name in enumerate(self.output_names): - if self.tensorRT_model.get_tensor_name(i) == name: - self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) - self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) - self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}") - self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}") - # self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype) - # self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes) - # self.output_device_bindings.append(self.output_device_memory) + def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] @@ -289,12 +302,14 @@ def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): resized_compressedImage = cv2.resize(compressedImage,(1920, 1080)) preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage) batched_list.append(preprocessedImage) + + # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")a batch_tensor = torch.stack(batched_list) self.get_logger().info(f"batch tensor shape: {batch_tensor.shape}") - + #Now need to pass it to yolov8 tensorRT model # predictions = self.model(batch_tensor) From 46a0d38e5ee750952222cfc1f70bafd3894a5a67 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Mon, 11 Nov 2024 22:53:59 +0000 Subject: [PATCH 12/37] Appended output tensor --- .../camera_object_detection/yolov8_detection.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 53795d93..a8f19fd1 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -29,6 +29,7 @@ from dataclasses import dataclass from numpy import ndarray # import pycuda.driver as cuda + @dataclass class Tensor: name:str @@ -276,9 +277,13 @@ def initialize_tensors(self): self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}") self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}") - # self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype) + self.output_cpu = np.empty(self.tensorRT_output_shape, self.outputDtype) + self.gpu = 0 + # self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype) # self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes) # self.output_device_bindings.append(self.output_device_memory) + self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.cpu, self.gpu)) + for i, name in enumerate(self.input_names): if self.tensorRT_model.get_tensor_name(i) == name: @@ -287,6 +292,7 @@ def initialize_tensors(self): self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}") self.get_logger().info(f"Tensor Datatype:{self.dtype}") + self.input_cpu = 0 # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype) # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes) # self.input_device_bindings.append(self.input_device_memory) @@ -299,7 +305,7 @@ def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): # Need to convert to numpy array and then make it have 3 color channels numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) - resized_compressedImage = cv2.resize(compressedImage,(1920, 1080)) + resized_compressedImage = cv2.resize(compressedImage,(1920, 1088)) preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage) batched_list.append(preprocessedImage) From 2fffe0d1970c179429df14ed9d43b2e5f0f2b350 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 12 Nov 2024 15:14:28 +0000 Subject: [PATCH 13/37] Set up tensors, focusing on gpu allocation --- .../yolov8_detection.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index a8f19fd1..0d1f97a7 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -155,7 +155,11 @@ def __init__(self): self.cv_bridge = CvBridge() # self.stream = cuda.Stream() self.initialize_engine(self.tensorRT_model_path) - self.initialize_tensors() + self.input_info, self.output_info = self.initialize_tensors() + self.engine_gpu_allocation(self.input_info) + self.get_logger().info(f"Input Info:{self.input_info}") + self.get_logger().info(f"Output Info:{self.output_info}") + self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) @@ -267,7 +271,6 @@ def initialize_tensors(self): self.dynamic = True self.input_info = [] self.output_info = [] - self.output_ptrs = [] self.input_device_bindings = [] self.output_device_bindings = [] @@ -277,13 +280,15 @@ def initialize_tensors(self): self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}") self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}") - self.output_cpu = np.empty(self.tensorRT_output_shape, self.outputDtype) - self.gpu = 0 + self.output_cpu = np.empty(0) + self.output_gpu = 0 + self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu)) # self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype) # self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes) # self.output_device_bindings.append(self.output_device_memory) - self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.cpu, self.gpu)) - + + + self.get_logger().info(f"Output Info:{self.output_info}") for i, name in enumerate(self.input_names): if self.tensorRT_model.get_tensor_name(i) == name: @@ -292,12 +297,16 @@ def initialize_tensors(self): self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}") self.get_logger().info(f"Tensor Datatype:{self.dtype}") - self.input_cpu = 0 + self.input_cpu = np.empty(0) + self.input_gpu = 0 + self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu)) + # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype) # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes) # self.input_device_bindings.append(self.input_device_memory) - - + return self.input_info, self.output_info + def engine_gpu_allocation(self, input_info): + def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] From 87f0785280efcb83321843af949dbcb02e697b61 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 12 Nov 2024 22:42:33 +0000 Subject: [PATCH 14/37] Allocated memory --- .../yolov8_detection.py | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 0d1f97a7..95345111 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -63,7 +63,7 @@ def __init__(self): self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed") self.declare_parameter("onnx_model_path", "/perception_models/yolov8m_dynamic.onnx") - self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m_copy2.engine") + self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") self.declare_parameter("publish_detection_topic", "/detections") self.declare_parameter("model_path", "/perception_models/yolov8m.pt") @@ -170,7 +170,7 @@ def __init__(self): self.detection_publisher = self.create_publisher( Detection2DArray, self.publish_detection_topic, 10 ) - self.get_logger().info( + self.get_logger().info( f"Successfully created node listening on camera topic: {self.camera_topic}..." ) @@ -273,13 +273,13 @@ def initialize_tensors(self): self.output_info = [] self.input_device_bindings = [] self.output_device_bindings = [] + batch_size = 6 for name in self.output_names: self.get_logger().info(f"NAMES of outputs:{name}") self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) + self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:] self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) - self.get_logger().info(f"Tensor Output shape:{self.tensorRT_output_shape}") - self.get_logger().info(f"Tensor Output Datatype:{self.outputDtype}") self.output_cpu = np.empty(0) self.output_gpu = 0 self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu)) @@ -294,6 +294,7 @@ def initialize_tensors(self): if self.tensorRT_model.get_tensor_name(i) == name: self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) + self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:] self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}") self.get_logger().info(f"Tensor Datatype:{self.dtype}") @@ -305,8 +306,41 @@ def initialize_tensors(self): # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes) # self.input_device_bindings.append(self.input_device_memory) return self.input_info, self.output_info + def engine_gpu_allocation(self, input_info): + + for i in range(10): + self.inputTensors = [] + for i in self.input_info: + self.inputTensors.append(i.cpu) + + assert len(self.inputTensors) == self.num_inputs, "does not match" + self.contiguous_inputs:List[ndarray] = [ + np.ascontiguousarray(i) for i in self.inputTensors + ] + self.get_logger().info(f"contigous shape:{self.contiguous_inputs}") + for i in range(self.num_inputs): + name = self.input_info[i].name + shape = self.input_info[i].shape + self.get_logger().info(f"input names:{name} & input shape:{shape}") + # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i]) + + # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream) + self.bindings[i] = int(self.inp_info[i].gpu) + for i in range(self.num_outputs): + name = self.output_info[i].name + shape = self.output_info[i].shape + self.get_logger().info(f"output names: {name}& output SHAPES:{shape}") + dtype = self.output_info[i].dtype + cpu = np.empty(shape, dtype=dtype) + # gpu = cuda.mem_alloc(cpu.nbytes) + # cuda.memcpy_htod_async(gpu,cpu, self.stream) + # self.outputCpuTensor.append(cpu) + + # self.execution_context.execute_async_v2() + # self.stream.synchronize() + def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] From 592a877fb00c4738ceea27d53b4929525d8cd5ea Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 12 Nov 2024 22:51:46 +0000 Subject: [PATCH 15/37] fixed small bug --- .../camera_object_detection/yolov8_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 95345111..4b9c80aa 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -327,7 +327,7 @@ def engine_gpu_allocation(self, input_info): # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i]) # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream) - self.bindings[i] = int(self.inp_info[i].gpu) + self.bindings[i] = int(self.input_info[i].gpu) for i in range(self.num_outputs): name = self.output_info[i].name shape = self.output_info[i].shape From ae4153ca8d06a4e952c69b71bfc6e0d6be9eb393 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Thu, 14 Nov 2024 21:05:33 +0000 Subject: [PATCH 16/37] Binded inp. & out., alloc. memory, setting up inf. --- modules/docker-compose.perception.yaml | 2 +- .../yolov8_detection.py | 94 ++++++++++--------- .../camera_object_detection/requirements.txt | 2 +- 3 files changed, 52 insertions(+), 46 deletions(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index 0679b51a..f56b2e6b 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -39,7 +39,7 @@ services: - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt - /mnt/wato-drive2/perception_models/:/perception_models - + lidar_object_detection: build: context: .. diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 4b9c80aa..5d9cb749 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -137,7 +137,7 @@ def __init__(self): self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic) self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1) - self.ats.registerCallback(self.batch_callback) + self.ats.registerCallback(self.batch_inference_callback) self.get_logger().info(f"TENSORT VERSION:{trt.__version__}") @@ -157,8 +157,8 @@ def __init__(self): self.initialize_engine(self.tensorRT_model_path) self.input_info, self.output_info = self.initialize_tensors() self.engine_gpu_allocation(self.input_info) - self.get_logger().info(f"Input Info:{self.input_info}") - self.get_logger().info(f"Output Info:{self.output_info}") + + self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names @@ -254,13 +254,9 @@ def initialize_engine(self, weight): self.num_inputs += 1 elif self.tensorRT_model.get_tensor_mode(self.tensor_name) == trt.TensorIOMode.OUTPUT: self.output_names.append(self.tensor_name) - self.num_outputs += 1 - # self.get_logger().info(f"Inputs:{self.input_names}") - # self.get_logger().info(f"Outputs:{self.output_names}") + self.num_outputs += 1 self.num_inputs = len(self.input_names) self.num_outputs = len(self.output_names) - # self.get_logger().info(f"Number of inputs:{self.num_inputs}") - # self.get_logger().info(f"Number of outputs:{self.num_outputs}") self.input_names = self.names[:self.num_inputs] self.output_names = [name for name in self.output_names if name not in self.input_names] self.get_logger().info(f"INPUT NAMES:: {self.input_names}") @@ -271,6 +267,7 @@ def initialize_tensors(self): self.dynamic = True self.input_info = [] self.output_info = [] + self.output_ptrs = [] self.input_device_bindings = [] self.output_device_bindings = [] batch_size = 6 @@ -279,37 +276,28 @@ def initialize_tensors(self): self.get_logger().info(f"NAMES of outputs:{name}") self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:] + self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) - self.output_cpu = np.empty(0) + self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype) self.output_gpu = 0 + # self.output_gpu = cuda.mem_alloc(self.output_cpu.nbytes) + # cyda.memcpy_htod_async(self.output_gpu, self.output_cpu, self.stream) + self.output_ptrs.append(self.output_gpu) self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu)) - # self.output_host_memory = cuda.pagelocked_empty(self.tensorRT_output_shape, self.outputDtype) - # self.output_device_memory = cuda.mem_alloc(self.output_host_memory.nbytes) - # self.output_device_bindings.append(self.output_device_memory) - - - self.get_logger().info(f"Output Info:{self.output_info}") - + for i, name in enumerate(self.input_names): - if self.tensorRT_model.get_tensor_name(i) == name: - + if self.tensorRT_model.get_tensor_name(i) == name: self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:] self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) - self.get_logger().info(f"Tensor shape:{self.tensorRT_input_shape}") - self.get_logger().info(f"Tensor Datatype:{self.dtype}") - self.input_cpu = np.empty(0) - self.input_gpu = 0 + self.input_cpu = np.empty(self.tensorRT_input_shape, self.dtype) + self.input_gpu = 1 + # self.input_gpu = cuda.mem_alloc(self.input_cpu.nbytes) + # cuda.memcpy_htod_async(self.input_gpu, self.input_cpu, self.stream) self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu)) - - # self.input_host_memory = cuda.pagelocked_empty(self.tensorRT_input_shape, self.dtype) - # self.input_device_memory = cuda.mem_alloc(self.input_host_memory.nbytes) - # self.input_device_bindings.append(self.input_device_memory) return self.input_info, self.output_info def engine_gpu_allocation(self, input_info): - - for i in range(10): self.inputTensors = [] for i in self.input_info: @@ -319,51 +307,69 @@ def engine_gpu_allocation(self, input_info): self.contiguous_inputs:List[ndarray] = [ np.ascontiguousarray(i) for i in self.inputTensors ] - self.get_logger().info(f"contigous shape:{self.contiguous_inputs}") + for i in range(self.num_inputs): name = self.input_info[i].name shape = self.input_info[i].shape + contigous_shape = self.contiguous_inputs[i].nbytes + self.get_logger().info(f"input names:{name} & input shape:{shape}") - # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i]) + + # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i].nbytes) # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream) self.bindings[i] = int(self.input_info[i].gpu) + + self.output_gpu_ptrs:List[int] = [] + self.outputs_ndarray:List[ndarray] = [] for i in range(self.num_outputs): + j = i + self.num_inputs name = self.output_info[i].name shape = self.output_info[i].shape self.get_logger().info(f"output names: {name}& output SHAPES:{shape}") dtype = self.output_info[i].dtype - cpu = np.empty(shape, dtype=dtype) + self.outputs_ndarray.append(self.output_cpu) + self.output_gpu_ptrs.append(self.output_gpu) # gpu = cuda.mem_alloc(cpu.nbytes) # cuda.memcpy_htod_async(gpu,cpu, self.stream) - # self.outputCpuTensor.append(cpu) + self.bindings[j] = int(self.output_info[i].gpu) + self.get_logger().info(f"Bindings Array:{self.bindings}") + self.get_logger().info(f"Outputs Array after append{self.output_gpu_ptrs}") - # self.execution_context.execute_async_v2() + #this is for the inferencing part + # self.execution_context.execute_async_v2(self.bindings, self.stream.handle) # self.stream.synchronize() + + # for i, o in enumerate(self.output_gpu_ptrs): + # cuda.memcpy_dtoh_async(self.outputs[i], o, self.stream) + - def batch_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): + def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] for msg in image_list: # Need to convert to numpy array and then make it have 3 color channels numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) - resized_compressedImage = cv2.resize(compressedImage,(1920, 1088)) + resized_compressedImage = cv2.resize(compressedImage,(640, 640)) preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage) batched_list.append(preprocessedImage) - - - # self.get_logger().info(f"Resized Image Shape:{resized_compressedImage.shape}")a - - - batch_tensor = torch.stack(batched_list) - self.get_logger().info(f"batch tensor shape: {batch_tensor.shape}") - + self.batch_tensor = torch.stack(batched_list) + self.batch_array = self.batch_tensor.cpu().numpy().astype(np.float32) + self.get_logger().info(f"batch array shape: {self.batch_array.shape}") + height, width = self.input_info[0].shape[-2:] + self.get_logger().info(f"HEIGHT:{height}") + self.get_logger().info(f"Width:{width}") + cuda.memcpy_htod_async(self.input_info[0].gpu, self.batch_array, self.stream) + self.execution_context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.handle) + self.stream.synchronize() # Wait for inference to complete + #Now need to pass it to yolov8 tensorRT model # predictions = self.model(batch_tensor) - + + #Converting to tensor for batching def batch_convert_to_tensor(self, cv_image): img = cv_image.transpose(2,0,1) diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt index 4373e3c7..1bdf147b 100644 --- a/src/perception/camera_object_detection/requirements.txt +++ b/src/perception/camera_object_detection/requirements.txt @@ -8,4 +8,4 @@ numpy==1.24.2 onnx onnxsim onnxruntime-gpu -tensorrt +tensorrt \ No newline at end of file From 2252b0eb17daee7f73ce54e084f31f46e916f0de Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Mon, 18 Nov 2024 19:48:47 +0000 Subject: [PATCH 17/37] Finished Inference function and allocated gpu --- modules/docker-compose.perception.yaml | 2 +- .../yolov8_detection.py | 110 +++++++++--------- .../camera_object_detection/requirements.txt | 5 +- 3 files changed, 61 insertions(+), 56 deletions(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index f56b2e6b..74ce35d3 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -33,7 +33,7 @@ services: volumes: - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx - - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/yolov8m_dynamic.onnx + - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/ylov8m_dynamic.onnx - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 5d9cb749..a06303f8 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -29,7 +29,7 @@ from dataclasses import dataclass from numpy import ndarray # import pycuda.driver as cuda - +from cuda import cudart @dataclass class Tensor: name:str @@ -62,7 +62,7 @@ def __init__(self): self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed") self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed") - self.declare_parameter("onnx_model_path", "/perception_models/yolov8m_dynamic.onnx") + self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx") self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") self.declare_parameter("publish_detection_topic", "/detections") @@ -153,13 +153,15 @@ def __init__(self): # CV bridge self.cv_bridge = CvBridge() - # self.stream = cuda.Stream() + + status, self.stream = cudart.cudaStreamCreate() + assert status.value == 0, "IS NOT ZERO" + + #self.build_engine() self.initialize_engine(self.tensorRT_model_path) self.input_info, self.output_info = self.initialize_tensors() - self.engine_gpu_allocation(self.input_info) - - - + self.tensorRT_inferencing(self.input_info) + self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) @@ -224,7 +226,7 @@ def build_engine(self): with open(self.tensorRT_model_path, "wb") as f: f.write(self.engine_bytes) self.get_logger().info("FINISHED WRITING ") - + def initialize_engine(self, weight): self.weight = Path(weight) if isinstance(weight, str) else weight self.logger = trt.Logger(trt.Logger.WARNING) @@ -237,11 +239,9 @@ def initialize_engine(self, weight): self.num_io_tensors = self.tensorRT_model.num_io_tensors self.input_tensor_name = self.tensorRT_model.get_tensor_name(0) - self.execution_context.set_input_shape(self.input_tensor_name, [6, 3, 1080, 1920]) self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name) self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] self.num_io_tensors = self.tensorRT_model.num_io_tensors - self.get_logger().info(f"Number of tensors: {self.num_io_tensors}") self.bindings = [0] * self.num_io_tensors self.num_inputs = 0 self.num_outputs = 0 @@ -268,20 +268,21 @@ def initialize_tensors(self): self.input_info = [] self.output_info = [] self.output_ptrs = [] - self.input_device_bindings = [] - self.output_device_bindings = [] batch_size = 6 for name in self.output_names: self.get_logger().info(f"NAMES of outputs:{name}") self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) + #Batch size changes from -1 to 6 self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:] - self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") + # self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") + self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype) - self.output_gpu = 0 - # self.output_gpu = cuda.mem_alloc(self.output_cpu.nbytes) - # cyda.memcpy_htod_async(self.output_gpu, self.output_cpu, self.stream) + status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream) + assert status.value == 0 + cudart.cudaMemcpyAsync(self.output_gpu, self.output_cpu.ctypes.data, self.output_cpu.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) self.output_ptrs.append(self.output_gpu) self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu)) @@ -291,64 +292,63 @@ def initialize_tensors(self): self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:] self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.input_cpu = np.empty(self.tensorRT_input_shape, self.dtype) - self.input_gpu = 1 - # self.input_gpu = cuda.mem_alloc(self.input_cpu.nbytes) - # cuda.memcpy_htod_async(self.input_gpu, self.input_cpu, self.stream) + status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream) + assert status.value == 0, "DOES NOT MATCH" + cudart.cudaMemcpyAsync( + self.input_gpu, self.input_cpu.ctypes.data, self.input_cpu.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu)) + return self.input_info, self.output_info - def engine_gpu_allocation(self, input_info): + def tensorRT_inferencing(self, input_info): for i in range(10): self.inputTensors = [] for i in self.input_info: self.inputTensors.append(i.cpu) - assert len(self.inputTensors) == self.num_inputs, "does not match" self.contiguous_inputs:List[ndarray] = [ np.ascontiguousarray(i) for i in self.inputTensors ] - for i in range(self.num_inputs): name = self.input_info[i].name shape = self.input_info[i].shape contigous_shape = self.contiguous_inputs[i].nbytes + cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data, + self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) + self.bindings[i] = self.input_info[i].gpu - self.get_logger().info(f"input names:{name} & input shape:{shape}") - - # gpu = self.input_info[i].gpu = cuda.mem_alloc(contiguous_inputs[i].nbytes) - - # cuda.memcpy_htod_async(self.inp_info[i].gpu, contiguous_inputs[i],self.stream) - self.bindings[i] = int(self.input_info[i].gpu) - self.output_gpu_ptrs:List[int] = [] self.outputs_ndarray:List[ndarray] = [] for i in range(self.num_outputs): j = i + self.num_inputs - name = self.output_info[i].name - shape = self.output_info[i].shape - self.get_logger().info(f"output names: {name}& output SHAPES:{shape}") - dtype = self.output_info[i].dtype - self.outputs_ndarray.append(self.output_cpu) - self.output_gpu_ptrs.append(self.output_gpu) - # gpu = cuda.mem_alloc(cpu.nbytes) - # cuda.memcpy_htod_async(gpu,cpu, self.stream) - self.bindings[j] = int(self.output_info[i].gpu) - self.get_logger().info(f"Bindings Array:{self.bindings}") - self.get_logger().info(f"Outputs Array after append{self.output_gpu_ptrs}") - - #this is for the inferencing part - # self.execution_context.execute_async_v2(self.bindings, self.stream.handle) - # self.stream.synchronize() + cpu = self.output_info[i].cpu + gpu = self.output_info[i].gpu + self.outputs_ndarray.append(cpu) + self.output_gpu_ptrs.append(gpu) + self.bindings[j] = gpu + self.get_logger().info(f"Binding Gpu array:{self.bindings}") + for i in range(self.num_io_tensors): + # self.get_logger().info(f"Number of tensors: {self.num_io_tensors}") + # self.get_logger().info(f"TENSOR NAMES: {self.tensorRT_model.get_tensor_name(i)}") + self.get_logger().info(f"Binding Gpu array:{self.bindings[i]}") + self.execution_context.set_tensor_address(self.tensorRT_model.get_tensor_name(i), self.bindings[i]) + + self.execution_context.execute_async_v3(self.stream) + cudart.cudaStreamSynchronize(self.stream) + # for i, o in enumerate(output_gpu_ptrs): + # cuda.memcpy_dtoh_async(outputs[i], o, self.stream) + + # return tuple(outputs) if len(outputs) > 1 else outputs[0] - # for i, o in enumerate(self.output_gpu_ptrs): - # cuda.memcpy_dtoh_async(self.outputs[i], o, self.stream) - def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): + #Taking msgs from all 6 ros2 subscribers image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] + + #Preprocessing for msg in image_list: - # Need to convert to numpy array and then make it have 3 color channels numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) resized_compressedImage = cv2.resize(compressedImage,(640, 640)) @@ -360,15 +360,17 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): height, width = self.input_info[0].shape[-2:] self.get_logger().info(f"HEIGHT:{height}") self.get_logger().info(f"Width:{width}") - cuda.memcpy_htod_async(self.input_info[0].gpu, self.batch_array, self.stream) - self.execution_context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.handle) - self.stream.synchronize() # Wait for inference to complete - + + #We have to call inferencing function here + + + #Bounding Boxes & oost processing detections + + #Now need to pass it to yolov8 tensorRT model # predictions = self.model(batch_tensor) - - + #Converting to tensor for batching def batch_convert_to_tensor(self, cv_image): diff --git a/src/perception/camera_object_detection/requirements.txt b/src/perception/camera_object_detection/requirements.txt index 1bdf147b..877493f7 100644 --- a/src/perception/camera_object_detection/requirements.txt +++ b/src/perception/camera_object_detection/requirements.txt @@ -8,4 +8,7 @@ numpy==1.24.2 onnx onnxsim onnxruntime-gpu -tensorrt \ No newline at end of file +tensorrt==10.6.0 +tensorrt_lean +tensorrt_dispatch +cuda-python \ No newline at end of file From b27effbf5b27da4104e65a59c6749edb06a8c7eb Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Thu, 21 Nov 2024 20:55:22 +0000 Subject: [PATCH 18/37] Got Bounding boxes, socres, and predicted classes --- .../yolov8_detection.py | 133 +++++++++++------- 1 file changed, 84 insertions(+), 49 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index a06303f8..bf99facd 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -160,7 +160,7 @@ def __init__(self): #self.build_engine() self.initialize_engine(self.tensorRT_model_path) self.input_info, self.output_info = self.initialize_tensors() - self.tensorRT_inferencing(self.input_info) + # self.tensorRT_inferencing(self.input_info) self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names @@ -239,7 +239,10 @@ def initialize_engine(self, weight): self.num_io_tensors = self.tensorRT_model.num_io_tensors self.input_tensor_name = self.tensorRT_model.get_tensor_name(0) + + self.execution_context.set_input_shape(self.input_tensor_name, (6,3,640,640)) self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name) + self.get_logger().info(f"tensor Shape:{self.inputShape}") self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] self.num_io_tensors = self.tensorRT_model.num_io_tensors self.bindings = [0] * self.num_io_tensors @@ -258,7 +261,8 @@ def initialize_engine(self, weight): self.num_inputs = len(self.input_names) self.num_outputs = len(self.output_names) self.input_names = self.names[:self.num_inputs] - self.output_names = [name for name in self.output_names if name not in self.input_names] + #self.output_names = self.names[:self.num_outputs] #This includes the input name in output + self.output_names = [name for name in self.output_names if name not in self.input_names] # This line removes it self.get_logger().info(f"INPUT NAMES:: {self.input_names}") self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}") @@ -272,11 +276,9 @@ def initialize_tensors(self): for name in self.output_names: self.get_logger().info(f"NAMES of outputs:{name}") - self.tensorRT_output_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) - #Batch size changes from -1 to 6 - self.tensorRT_output_shape = (batch_size,) + self.tensorRT_output_shape[1:] - # self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") - + + self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name) + self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype) status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream) @@ -301,72 +303,105 @@ def initialize_tensors(self): return self.input_info, self.output_info - def tensorRT_inferencing(self, input_info): - for i in range(10): - self.inputTensors = [] - for i in self.input_info: - self.inputTensors.append(i.cpu) - assert len(self.inputTensors) == self.num_inputs, "does not match" - self.contiguous_inputs:List[ndarray] = [ - np.ascontiguousarray(i) for i in self.inputTensors - ] - for i in range(self.num_inputs): + def tensorRT_inferencing(self, batch_array): + assert batch_array.ndim == 4 + batch_size = batch_array.shape[0] + assert batch_size == self.input_info[0].shape[0] + #self.get_logger().info(f"Batch Array Stats: shape={batch_array.shape}, dtype={batch_array.dtype}, range=({batch_array.min()}, {batch_array.max()})") + #self.inputTensors = [batch_array] + self.contiguous_inputs = [np.ascontiguousarray(i) for i in batch_array] + #self.get_logger().info(f"Contiguous Inputs: dtype={self.contiguous_inputs[0].dtype}, shape={self.contiguous_inputs[0].shape}") + #for i, input_tensor in enumerate(self.contiguous_inputs): + #self.get_logger().info(f"Input {i}: shape={input_tensor.shape}, dtype={input_tensor.dtype}, range=({input_tensor.min()}, {input_tensor.max()})") + #for i, input_gpu in enumerate(self.input_info): + #self.get_logger().info(f"Input {i} GPU address: {input_gpu.gpu}") + + for i in range(self.num_inputs): name = self.input_info[i].name - shape = self.input_info[i].shape - contigous_shape = self.contiguous_inputs[i].nbytes cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data, self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) - self.bindings[i] = self.input_info[i].gpu + self.execution_context.set_tensor_address(name, self.input_info[i].gpu) - self.output_gpu_ptrs:List[int] = [] - self.outputs_ndarray:List[ndarray] = [] - for i in range(self.num_outputs): + self.output_gpu_ptrs:List[int] = [] + self.outputs_ndarray:List[ndarray] = [] + for i in range(self.num_outputs): j = i + self.num_inputs cpu = self.output_info[i].cpu gpu = self.output_info[i].gpu self.outputs_ndarray.append(cpu) self.output_gpu_ptrs.append(gpu) - self.bindings[j] = gpu - self.get_logger().info(f"Binding Gpu array:{self.bindings}") - for i in range(self.num_io_tensors): - # self.get_logger().info(f"Number of tensors: {self.num_io_tensors}") - # self.get_logger().info(f"TENSOR NAMES: {self.tensorRT_model.get_tensor_name(i)}") - self.get_logger().info(f"Binding Gpu array:{self.bindings[i]}") - self.execution_context.set_tensor_address(self.tensorRT_model.get_tensor_name(i), self.bindings[i]) - - self.execution_context.execute_async_v3(self.stream) - cudart.cudaStreamSynchronize(self.stream) - # for i, o in enumerate(output_gpu_ptrs): - # cuda.memcpy_dtoh_async(outputs[i], o, self.stream) - - # return tuple(outputs) if len(outputs) > 1 else outputs[0] + self.execution_context.set_tensor_address(self.output_info[i].name, self.output_info[i].gpu) + self.execution_context.execute_async_v3(self.stream) + cudart.cudaStreamSynchronize(self.stream) + for i, o in enumerate(self.output_gpu_ptrs): + cudart.cudaMemcpyAsync( + self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes, + cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream + ) + # for i, output in enumerate(self.outputs_ndarray): + + #for i, output in enumerate(self.outputs_ndarray): + + return self.outputs_ndarray def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): #Taking msgs from all 6 ros2 subscribers image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] - #Preprocessing for msg in image_list: numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) resized_compressedImage = cv2.resize(compressedImage,(640, 640)) - preprocessedImage = self.batch_convert_to_tensor(resized_compressedImage) - batched_list.append(preprocessedImage) - self.batch_tensor = torch.stack(batched_list) - self.batch_array = self.batch_tensor.cpu().numpy().astype(np.float32) - self.get_logger().info(f"batch array shape: {self.batch_array.shape}") - height, width = self.input_info[0].shape[-2:] - self.get_logger().info(f"HEIGHT:{height}") - self.get_logger().info(f"Width:{width}") - + rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB) + normalized_image = rgb_image / 255.0 + chw_image = np.transpose(normalized_image, (2,0,1)) + float_image = chw_image.astype(np.float32) + tensor_image = torch.from_numpy(float_image).to('cuda') + batched_list.append(tensor_image) + batched_list = [tensor.cpu().numpy() for tensor in batched_list] + batched_images = np.stack(batched_list, axis=0) + #We have to call inferencing function here - + detections = self.tensorRT_inferencing(batched_images) - #Bounding Boxes & oost processing detections + #self.get_logger().info(f"detections[0] output:{detections[0][0]}") + # for i, output in enumerate(detections[0][0][:2]): + # self.get_logger().info(f"Output {i}: shape={output.shape}, dtype={output.dtype}, range=({output.min()}, {output.max()})") + """ + - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images + - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components for bounding boxes, classes and confidence scores + - Detection[0][0][0] represnets the first detection components ranging from 0 to 83 + - 8400 represents 8400 anchors + """ + batch_size = detections[0].shape[0] + for image_idx in range(batch_size): + image_detections = detections[0][image_idx] + + for anchor_idx in range(8400): + x_min = image_detections[0][anchor_idx] + y_min = image_detections[1][anchor_idx] + x_max = image_detections[2][anchor_idx] + y_max = image_detections[3][anchor_idx] + + confidence = image_detections[4][anchor_idx] + class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,84)] + predicted_class = np.argmax(class_probs) + predicted_prob = class_probs[predicted_class] + + if confidence > 0.5: + self.get_logger().info(f"Bounding Box: ({x_min}, {y_min}, {x_max}, {y_max})") + self.get_logger().info(f" Predicted Class: {predicted_class} (Probability: {predicted_prob})") + self.get_logger().info(f"Confidence:{confidence}") + #self.get_logger().info(f"SHAPE:{detections[0].shape[0]}") + + #self.get_logger().info(f"result detections shape:{results_detections[0].shape}") + #self.get_logger().info(f"result detections [0]:{results_detections[0]}") + + #Now need to pass it to yolov8 tensorRT model # predictions = self.model(batch_tensor) From 485d59227f65015d801ebf7f8ab1e678a682a686 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 26 Nov 2024 17:07:43 +0000 Subject: [PATCH 19/37] Stored results, working on visualization --- modules/docker-compose.perception.yaml | 2 + .../yolov8_detection.py | 126 +++++++++++++----- 2 files changed, 94 insertions(+), 34 deletions(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index 74ce35d3..db2efd1a 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -38,6 +38,8 @@ services: - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt + - /mnt/wato-drive2/perception_models/tensorRT.onnx:/perception_models/tensorRT.onnx + - /mnt/wato-drive2/perception_models/tensorRT.engine:/perception_models/tensorRT.engine - /mnt/wato-drive2/perception_models/:/perception_models lidar_object_detection: diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index bf99facd..a5795c3f 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -62,7 +62,7 @@ def __init__(self): self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed") self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed") - self.declare_parameter("onnx_model_path", "/perception_models/yolov8m.onnx") + self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx") self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") self.declare_parameter("publish_detection_topic", "/detections") @@ -290,8 +290,8 @@ def initialize_tensors(self): for i, name in enumerate(self.input_names): if self.tensorRT_model.get_tensor_name(i) == name: - self.tensorRT_input_shape = tuple(self.tensorRT_model.get_tensor_shape(name)) - self.tensorRT_input_shape = (batch_size,) + self.tensorRT_input_shape[1:] + self.tensorRT_input_shape = tuple(self.inputShape) + self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}") self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.input_cpu = np.empty(self.tensorRT_input_shape, self.dtype) status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream) @@ -340,20 +340,19 @@ def tensorRT_inferencing(self, batch_array): self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream ) - # for i, output in enumerate(self.outputs_ndarray): - - #for i, output in enumerate(self.outputs_ndarray): - + return self.outputs_ndarray def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): - #Taking msgs from all 6 ros2 subscribers + #Taking msgs from all 6 ros2 subscribers + imageHeight = self.input_info[0].shape[-1] + imageWidth = self.input_info[0].shape[-2] image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] #Preprocessing for msg in image_list: numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) - resized_compressedImage = cv2.resize(compressedImage,(640, 640)) + resized_compressedImage = cv2.resize(compressedImage,(imageWidth, imageHeight)) rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB) normalized_image = rgb_image / 255.0 chw_image = np.transpose(normalized_image, (2,0,1)) @@ -362,14 +361,7 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): batched_list.append(tensor_image) batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) - - #We have to call inferencing function here detections = self.tensorRT_inferencing(batched_images) - - #self.get_logger().info(f"detections[0] output:{detections[0][0]}") - # for i, output in enumerate(detections[0][0][:2]): - # self.get_logger().info(f"Output {i}: shape={output.shape}, dtype={output.dtype}, range=({output.min()}, {output.max()})") - """ - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components for bounding boxes, classes and confidence scores @@ -377,36 +369,102 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): - 8400 represents 8400 anchors """ + results_dict = {i: [] for i in range(6)} batch_size = detections[0].shape[0] for image_idx in range(batch_size): + #self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") image_detections = detections[0][image_idx] + + if image_idx not in results_dict: + results_dict[image_idx] = [] for anchor_idx in range(8400): - x_min = image_detections[0][anchor_idx] - y_min = image_detections[1][anchor_idx] - x_max = image_detections[2][anchor_idx] - y_max = image_detections[3][anchor_idx] + x_center = image_detections[0][anchor_idx] + y_center = image_detections[1][anchor_idx] + width = image_detections[2][anchor_idx] + height = image_detections[3][anchor_idx] confidence = image_detections[4][anchor_idx] - class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,84)] + class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)] predicted_class = np.argmax(class_probs) predicted_prob = class_probs[predicted_class] - if confidence > 0.5: - self.get_logger().info(f"Bounding Box: ({x_min}, {y_min}, {x_max}, {y_max})") - self.get_logger().info(f" Predicted Class: {predicted_class} (Probability: {predicted_prob})") - self.get_logger().info(f"Confidence:{confidence}") - #self.get_logger().info(f"SHAPE:{detections[0].shape[0]}") - - #self.get_logger().info(f"result detections shape:{results_detections[0].shape}") - #self.get_logger().info(f"result detections [0]:{results_detections[0]}") - + if confidence > 0.4: - #Now need to pass it to yolov8 tensorRT model - # predictions = self.model(batch_tensor) - + #Convert from chw to tlbr + x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, imageHeight, imageWidth) + results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) + results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) + + # To store final results after NMS + + + + def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth): + half_width = width /2 + half_height = height /2 + + x_min = x_center - half_width + y_min = y_center - half_height + x_max = x_center + half_width + y_max = y_center + half_height + return x_min, y_min, x_max, y_max + + def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ): + #Confidence threshold + bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold] + bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True) + bbox_list_new = [] + while bboxes_sorted: + current_box = bboxes_sorted.pop(0) + bbox_list_new.append(current_box) + + # Filter boxes with IoU below the threshold + bboxes_sorted = [ + box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold + ] + + self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}") + return bbox_list_new + + def get_iou(self, box1, box2): + x1, y1, x2, y2 = box1 + x3, y3, x4, y4 = box2 + + # Intersection coordinates + x_inter1 = max(x1, x3) + y_inter1 = max(y1, y3) + x_inter2 = min(x2, x4) + y_inter2 = min(y2, y4) + + # Width and height of the intersection + width_inter = abs(x_inter2 - x_inter1) + height_inter = abs(y_inter2 - y_inter1) + + # If there is no intersection + if x_inter2 < x_inter1 or y_inter2 < y_inter1: + return 0.0 + + # Area of intersection + area_inter = width_inter * height_inter + + # Areas of the two boxes + width_box1 = abs(x2 - x1) + height_box1 = abs(y2 - y1) + area_box1 = width_box1 * height_box1 + + width_box2 = abs(x4 - x3) + height_box2 = abs(y4 - y3) + area_box2 = width_box2 * height_box2 + + # Union area + area_union = area_box1 + area_box2 - area_inter + + # Intersection over Union + iou = area_inter / area_union + + return iou - #Converting to tensor for batching def batch_convert_to_tensor(self, cv_image): img = cv_image.transpose(2,0,1) From 43434350d15a76cd943a633a1728903e06d2f3eb Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 26 Nov 2024 18:01:19 +0000 Subject: [PATCH 20/37] Refactored code --- .../yolov8_detection.py | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index a5795c3f..e0ae4eaa 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -65,6 +65,8 @@ def __init__(self): self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx") self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") + self.declare_parameter("batch_publish_vis_topic", "/batch_annotated_img") + self.declare_parameter("batch_publish_detection_topic", "/batch_detections") self.declare_parameter("publish_detection_topic", "/detections") self.declare_parameter("model_path", "/perception_models/yolov8m.pt") @@ -80,6 +82,8 @@ def __init__(self): #Batch inference topic self.batch_inference_topic = self.get_parameter("batch_inference_topic").value + self.batch_publish_detection_topic = self.get_parameter("batch_publish_detection_topic").value + self.batch_publish_vis_topic = self.get_parameter("batch_publish_vis_topic").value self.onnx_model_path = self.get_parameter("onnx_model_path").value self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value @@ -168,6 +172,9 @@ def __init__(self): # setup vis publishers self.batched_camera_message_publisher = self.create_publisher(Image,self.batch_inference_topic, 10) + self.batch_detection_publisher = self.create_publisher(Detection2DArray, batch_publish_detection_topic, 10) + self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_vis_publisher, 10) + self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10) self.detection_publisher = self.create_publisher( Detection2DArray, self.publish_detection_topic, 10 @@ -412,20 +419,20 @@ def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageW def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ): #Confidence threshold - bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold] - bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True) - bbox_list_new = [] - while bboxes_sorted: - current_box = bboxes_sorted.pop(0) - bbox_list_new.append(current_box) + bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold] + bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True) + bbox_list_new = [] + while bboxes_sorted: + current_box = bboxes_sorted.pop(0) + bbox_list_new.append(current_box) # Filter boxes with IoU below the threshold - bboxes_sorted = [ - box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold - ] + bboxes_sorted = [ + box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold + ] - self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}") - return bbox_list_new + self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}") + return bbox_list_new def get_iou(self, box1, box2): x1, y1, x2, y2 = box1 From c3551fcfb620658d050899a40493cbb6dfa299a7 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 26 Nov 2024 18:08:26 +0000 Subject: [PATCH 21/37] Refactored publishers --- .../camera_object_detection/yolov8_detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index e0ae4eaa..7f242caa 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -172,8 +172,8 @@ def __init__(self): # setup vis publishers self.batched_camera_message_publisher = self.create_publisher(Image,self.batch_inference_topic, 10) - self.batch_detection_publisher = self.create_publisher(Detection2DArray, batch_publish_detection_topic, 10) - self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_vis_publisher, 10) + self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10) + self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10) self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10) self.detection_publisher = self.create_publisher( From e1c2e471df33943ae73aadd35e50f974a8816268 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 27 Nov 2024 20:12:15 +0000 Subject: [PATCH 22/37] Added eve configuration --- modules/docker-compose.perception.yaml | 3 +- .../yolov8_detection.py | 68 +++++++++++++++---- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index db2efd1a..1d03eb54 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -40,8 +40,9 @@ services: - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt - /mnt/wato-drive2/perception_models/tensorRT.onnx:/perception_models/tensorRT.onnx - /mnt/wato-drive2/perception_models/tensorRT.engine:/perception_models/tensorRT.engine + - /mnt/wato-drive2/perception_models/eve.onnx:/perception_models/eve.onnx - /mnt/wato-drive2/perception_models/:/perception_models - + lidar_object_detection: build: context: .. diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 7f242caa..ed297ce1 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -62,8 +62,8 @@ def __init__(self): self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed") self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed") - self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx") - self.declare_parameter("tensorRT_model_path", "/perception_models/yolov8m.engine") + self.declare_parameter("onnx_model_path", "/perception_models/eve.onnx") #tensorRT. extensions + self.declare_parameter("tensorRT_model_path", "/perception_models/eve.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") self.declare_parameter("batch_publish_vis_topic", "/batch_annotated_img") self.declare_parameter("batch_publish_detection_topic", "/batch_detections") @@ -75,7 +75,7 @@ def __init__(self): self.declare_parameter("crop_mode", "LetterBox") self.declare_parameter("save_detections", False) - #Declare batch topic + #Declare batch topic self.declare_parameter("batch_inference_topic", "/batched_camera_message") @@ -143,6 +143,14 @@ def __init__(self): self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1) self.ats.registerCallback(self.batch_inference_callback) + #Subscription for Eve cameras + self.right_camera_subscription = Subscriber(self, Image, self.camera_topic) + self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic) + self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic) + self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1) + + #Create this callback function tomorrow + self.eve_ats.registerCallback(self.eve_batch_inference_callback) self.get_logger().info(f"TENSORT VERSION:{trt.__version__}") self.orig_image_width = None @@ -164,17 +172,17 @@ def __init__(self): #self.build_engine() self.initialize_engine(self.tensorRT_model_path) self.input_info, self.output_info = self.initialize_tensors() - # self.tensorRT_inferencing(self.input_info) - self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) # setup vis publishers + #Batch vis publishers self.batched_camera_message_publisher = self.create_publisher(Image,self.batch_inference_topic, 10) self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10) self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10) + #vis publishers self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10) self.detection_publisher = self.create_publisher( Detection2DArray, self.publish_detection_topic, 10 @@ -186,10 +194,10 @@ def __init__(self): def build_engine(self): #Only calling this function when we dont have an engine file #Reading the onnx file in the perception models directory - self.max_batch_size = 6 + self.max_batch_size = 3 self.channels = 3 - self.height = 640 - self.width = 640 + self.height = 1024 + self.width = 1024 self.shape_input_model = [self.max_batch_size, self.channels, self.height, self.width] self.logger = trt.Logger(trt.Logger.VERBOSE) self.builder = trt.Builder(self.logger) @@ -247,7 +255,7 @@ def initialize_engine(self, weight): self.num_io_tensors = self.tensorRT_model.num_io_tensors self.input_tensor_name = self.tensorRT_model.get_tensor_name(0) - self.execution_context.set_input_shape(self.input_tensor_name, (6,3,640,640)) + self.execution_context.set_input_shape(self.input_tensor_name, (3,3,1024,1024)) self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name) self.get_logger().info(f"tensor Shape:{self.inputShape}") self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] @@ -349,6 +357,24 @@ def tensorRT_inferencing(self, batch_array): ) return self.outputs_ndarray + def eve_batch_inference_callback(self, msg1,msg2,msg3): + imageHeight = self.input_info[0].shape[-1] + imageWidth = self.input_info[0].shape[-2] + imageList = [msg1,msg2,msg3] + eve_batched_list = [] + for msg in imageList: + numpy_array = np.frombuffer(msg.data, np.uint8) + compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) + resized_compressedImage = cv2.resize(compressedImage, (imageWidth, imageHeight)) + rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB) + normalized_image = rgb_image / 255.0 + chw_image = np.transpose(normalized_image, (2,0,1)) + float_image = chw_image.astype(np.float32) + tensor_image = torch.from_numpy(float_image).to('cuda') + batched_list.append(tensor_image) + batched_list = [tensor.cpu().numpy() for tensor in batched_list] + batched_images = np.stack(batched_list, axis=0) + def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): #Taking msgs from all 6 ros2 subscribers imageHeight = self.input_info[0].shape[-1] @@ -405,8 +431,6 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): # To store final results after NMS - - def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth): half_width = width /2 half_height = height /2 @@ -472,6 +496,26 @@ def get_iou(self, box1, box2): return iou + def publish_bounding_boxes(self, results_dict): + detection2darray = Detection2DArray() + for image_idx, bboxes in results_dict.items(): + detection2d = Detection2D() + x_min, y_min, x_max, y_max, confidence, class_id = bbox + center_x = (x_min + x_max) / 2 + center_y = (y_min + y_max) / 2 + size_x = x_max - x_min + size_y = y_max - y_min + detection2d.bbox.center.position.x = center_x + detection2d.bbox.center.position.y = center_y + detection2d.bbox.size_x = size_x + detection2d.bbox.size_y = size_y + detected_object = ObjectHypothesisWithPose() + detected_object.hypothesis.class_id = class_id + detected_object.hypothesis.score = confidence + detection2darray.detections.append(detection2d) + + + #Converting to tensor for batching def batch_convert_to_tensor(self, cv_image): img = cv_image.transpose(2,0,1) @@ -583,7 +627,7 @@ def publish_vis(self, annotated_img, msg, feed): imgmsg.header.stamp = msg.header.stamp imgmsg.header.frame_id = msg.header.frame_id self.vis_publisher.publish(imgmsg) - + def publish_detections(self, detections, msg, feed): # Publish detections to an detectionList message detection2darray = Detection2DArray() From 976ffb143d1d01edf3f199239242cd9dc279f2ce Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 27 Nov 2024 21:44:22 +0000 Subject: [PATCH 23/37] Eve configuiration completed for batch inf --- .../yolov8_detection.py | 90 +++++++++++++------ 1 file changed, 61 insertions(+), 29 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index ed297ce1..c19c1f0d 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -62,8 +62,9 @@ def __init__(self): self.declare_parameter("back_right_camera_topic", "/CAM_BACK_RIGHT/image_rect_compressed") self.declare_parameter("back_left_camera_topic", "/CAM_BACK_LEFT/image_rect_compressed") - self.declare_parameter("onnx_model_path", "/perception_models/eve.onnx") #tensorRT. extensions - self.declare_parameter("tensorRT_model_path", "/perception_models/eve.engine") + self.declare_parameter("onnx_model_path", "/perception_models/tensorRT.onnx") #tensorRT. extensions + self.declare_parameter("tensorRT_model_path", "/perception_models/tensorRT.engine") + self.declare_parameter("eve_tensorRT_model_path", "/perception_models/eve.engine") self.declare_parameter("publish_vis_topic", "/annotated_img") self.declare_parameter("batch_publish_vis_topic", "/batch_annotated_img") self.declare_parameter("batch_publish_detection_topic", "/batch_detections") @@ -87,7 +88,7 @@ def __init__(self): self.onnx_model_path = self.get_parameter("onnx_model_path").value self.tensorRT_model_path = self.get_parameter("tensorRT_model_path").value - + self.eve_tensorRT_model_path = self.get_parameter("eve_tensorRT_model_path").value #Camera topics for eve self.camera_topic = self.get_parameter("camera_topic").value self.left_camera_topic = self.get_parameter("left_camera_topic").value @@ -170,8 +171,8 @@ def __init__(self): assert status.value == 0, "IS NOT ZERO" #self.build_engine() - self.initialize_engine(self.tensorRT_model_path) - self.input_info, self.output_info = self.initialize_tensors() + # self.initialize_engine(self.tensorRT_model_path, 3,3,1024,1024) + # self.input_info, self.output_info = self.initialize_tensors() self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) @@ -242,7 +243,7 @@ def build_engine(self): f.write(self.engine_bytes) self.get_logger().info("FINISHED WRITING ") - def initialize_engine(self, weight): + def initialize_engine(self, weight, batch_size, rgb, width, height): self.weight = Path(weight) if isinstance(weight, str) else weight self.logger = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(self.logger, namespace ='') @@ -255,9 +256,9 @@ def initialize_engine(self, weight): self.num_io_tensors = self.tensorRT_model.num_io_tensors self.input_tensor_name = self.tensorRT_model.get_tensor_name(0) - self.execution_context.set_input_shape(self.input_tensor_name, (3,3,1024,1024)) + self.execution_context.set_input_shape(self.input_tensor_name, (batch_size, rgb, width, height)) self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name) - self.get_logger().info(f"tensor Shape:{self.inputShape}") + #self.get_logger().info(f"tensor Shape:{self.inputShape}") self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] self.num_io_tensors = self.tensorRT_model.num_io_tensors self.bindings = [0] * self.num_io_tensors @@ -278,8 +279,8 @@ def initialize_engine(self, weight): self.input_names = self.names[:self.num_inputs] #self.output_names = self.names[:self.num_outputs] #This includes the input name in output self.output_names = [name for name in self.output_names if name not in self.input_names] # This line removes it - self.get_logger().info(f"INPUT NAMES:: {self.input_names}") - self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}") + #self.get_logger().info(f"INPUT NAMES:: {self.input_names}") + #self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}") def initialize_tensors(self): @@ -287,13 +288,13 @@ def initialize_tensors(self): self.input_info = [] self.output_info = [] self.output_ptrs = [] - batch_size = 6 + for name in self.output_names: - self.get_logger().info(f"NAMES of outputs:{name}") + #self.get_logger().info(f"NAMES of outputs:{name}") self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name) - self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") + #self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype) status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream) @@ -306,7 +307,7 @@ def initialize_tensors(self): for i, name in enumerate(self.input_names): if self.tensorRT_model.get_tensor_name(i) == name: self.tensorRT_input_shape = tuple(self.inputShape) - self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}") + #self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}") self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.input_cpu = np.empty(self.tensorRT_input_shape, self.dtype) status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream) @@ -357,15 +358,15 @@ def tensorRT_inferencing(self, batch_array): ) return self.outputs_ndarray + + # will be called with eve rosbag def eve_batch_inference_callback(self, msg1,msg2,msg3): - imageHeight = self.input_info[0].shape[-1] - imageWidth = self.input_info[0].shape[-2] imageList = [msg1,msg2,msg3] eve_batched_list = [] for msg in imageList: numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) - resized_compressedImage = cv2.resize(compressedImage, (imageWidth, imageHeight)) + resized_compressedImage = cv2.resize(compressedImage, (1024, 1024)) rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB) normalized_image = rgb_image / 255.0 chw_image = np.transpose(normalized_image, (2,0,1)) @@ -374,26 +375,59 @@ def eve_batch_inference_callback(self, msg1,msg2,msg3): batched_list.append(tensor_image) batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) + self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024) + self.input_info, self.output_info = self.initialize_tensors() + detections = self.tensorRT_inferencing(batched_images) + batch_size = detections[0].shape[0] + anchors = detections[0].shape[2] + results_dict = {i: [] for i in range(batch_size)} + for image_idx in range(batch_size): + # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") + image_detections = detections[0][image_idx] + + if image_idx not in results_dict: + results_dict[image_idx] = [] + + for anchor_idx in range(anchors): + x_center = image_detections[0][anchor_idx] + y_center = image_detections[1][anchor_idx] + width = image_detections[2][anchor_idx] + height = image_detections[3][anchor_idx] + confidence = image_detections[4][anchor_idx] + class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)] + predicted_class = np.argmax(class_probs) + predicted_prob = class_probs[predicted_class] + + if confidence > 0.45: + + #Convert from chw to tlbr + x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) + self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") + results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) + results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) + + + # will be called with nuscenes rosbag def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): #Taking msgs from all 6 ros2 subscribers - imageHeight = self.input_info[0].shape[-1] - imageWidth = self.input_info[0].shape[-2] image_list = [msg1,msg2,msg3,msg4,msg5,msg6] batched_list = [] #Preprocessing for msg in image_list: numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) - resized_compressedImage = cv2.resize(compressedImage,(imageWidth, imageHeight)) + resized_compressedImage = cv2.resize(compressedImage,(640, 640)) rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB) normalized_image = rgb_image / 255.0 chw_image = np.transpose(normalized_image, (2,0,1)) float_image = chw_image.astype(np.float32) tensor_image = torch.from_numpy(float_image).to('cuda') batched_list.append(tensor_image) - batched_list = [tensor.cpu().numpy() for tensor in batched_list] + batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) + self.initialize_engine(self.tensorRT_model_path, 6,3,640,640) + self.input_info, self.output_info = self.initialize_tensors() detections = self.tensorRT_inferencing(batched_images) """ - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images @@ -402,10 +436,10 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): - 8400 represents 8400 anchors """ - results_dict = {i: [] for i in range(6)} batch_size = detections[0].shape[0] + results_dict = {i: [] for i in range(batch_size)} for image_idx in range(batch_size): - #self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") + # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") image_detections = detections[0][image_idx] if image_idx not in results_dict: @@ -422,13 +456,14 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): predicted_class = np.argmax(class_probs) predicted_prob = class_probs[predicted_class] - if confidence > 0.4: + if confidence > 0.45: #Convert from chw to tlbr - x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, imageHeight, imageWidth) + x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) + self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) - + #self.get_logger().info(f"resultsdict: {results_dict}") # To store final results after NMS def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth): @@ -449,13 +484,10 @@ def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ): while bboxes_sorted: current_box = bboxes_sorted.pop(0) bbox_list_new.append(current_box) - # Filter boxes with IoU below the threshold bboxes_sorted = [ box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold ] - - self.get_logger().info(f"Filtered Boxes After NMS: {bbox_list_new}") return bbox_list_new def get_iou(self, box1, box2): From 9624d8d64db2e8f2451a739c5964a5a8ce35fdf6 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Thu, 28 Nov 2024 21:59:42 +0000 Subject: [PATCH 24/37] Created publishers and msgs --- .../yolov8_detection.py | 7 +++---- .../CMakeLists.txt | 21 +++++++++++++++++++ .../msg/BatchDetection.msg | 3 +++ .../camera_object_detection_msgs/package.xml | 20 ++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index c19c1f0d..1530bf38 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -249,7 +249,7 @@ def initialize_engine(self, weight, batch_size, rgb, width, height): trt.init_libnvinfer_plugins(self.logger, namespace ='') with trt.Runtime(self.logger) as runtime: self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) - self.get_logger().info(f"TENSORRT Model:{self.weight}") + #self.get_logger().info(f"TENSORRT Model:{self.weight}") self.execution_context = self.tensorRT_model.create_execution_context() @@ -460,16 +460,15 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): #Convert from chw to tlbr x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) - self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") + #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) - #self.get_logger().info(f"resultsdict: {results_dict}") + # To store final results after NMS def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth): half_width = width /2 half_height = height /2 - x_min = x_center - half_width y_min = y_center - half_height x_max = x_center + half_width diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt new file mode 100644 index 00000000..d4d41677 --- /dev/null +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.8) +project(camera_objection_detection_msgs) + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +#find dependencies +find_package(ament_cmake REQUIRED) +find_package(rosidl_default_generators REQUIRED) +find_package(std_msgs REQUIRED) +find_package(sensor_msgs REQUIRED) +find_package(vision_msgs REQUIRED) + + +rosidl_generate_interfaces(${PROJECT_NAME} + "msg/BatchDetection.msg" + DEPENDENCIES std_msgs sensor_msgs vision_msgs +) +ament_export_dependencies(rosidl_default_runtime) +ament_package() \ No newline at end of file diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg new file mode 100644 index 00000000..e8ecdf73 --- /dev/null +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/BatchDetection.msg @@ -0,0 +1,3 @@ +std_msgs/Header header +sensor_msgs/CompressedImage[] images # Array of compressed images in the batch +vision_msgs/Detection2DArray[] detections # Array of detections for each image diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml new file mode 100644 index 00000000..4e218806 --- /dev/null +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml @@ -0,0 +1,20 @@ + + + + camera_object_detction_msgs + 0.0.0 + TODO: Package description + a95rahma + TODO: License declaration + ament_cmake + std_msgs + sensor_msgs + vision_msgs + + rosidl_default_generators + rosidl_default_runtime + rosidl_interface_packages + + ament_cmake + + \ No newline at end of file From d24695acaf9b481de913d834f5d0769413f45080 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Fri, 29 Nov 2024 22:28:18 +0000 Subject: [PATCH 25/37] Added msg file configs --- .../camera_object_detection.Dockerfile | 1 + .../yolov8_detection.py | 64 +++++++++++++------ .../camera_object_detection/package.xml | 3 +- .../CMakeLists.txt | 14 ++-- .../camera_object_detection_msgs/package.xml | 3 +- 5 files changed, 58 insertions(+), 27 deletions(-) diff --git a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile index 5c3ee385..242c2d40 100644 --- a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile +++ b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile @@ -7,6 +7,7 @@ WORKDIR ${AMENT_WS}/src # Copy in source code COPY src/perception/camera_object_detection camera_object_detection +COPY src/wato_msgs/perception_msgs/camera_object_detection_msgs camera_object_detection_msgs # Scan for rosdeps RUN apt-get -qq update && rosdep update && \ diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 1530bf38..54948b79 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -2,12 +2,15 @@ from rclpy.node import Node import os from message_filters import Subscriber, TimeSynchronizer, ApproximateTimeSynchronizer +from camera_object_detection_msgs.msg import BatchDetection from sensor_msgs.msg import Image, CompressedImage from vision_msgs.msg import ( ObjectHypothesisWithPose, Detection2D, Detection2DArray, ) + +from std_msgs.msg import Header from pathlib import Path from ultralytics import YOLO from ultralytics.nn.autobackend import AutoBackend @@ -179,10 +182,10 @@ def __init__(self): # setup vis publishers #Batch vis publishers - self.batched_camera_message_publisher = self.create_publisher(Image,self.batch_inference_topic, 10) + self.batched_camera_message_publisher = self.create_publisher(BatchDetection,self.batch_inference_topic, 10) self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10) self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10) - + #vis publishers self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10) self.detection_publisher = self.create_publisher( @@ -460,7 +463,7 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): #Convert from chw to tlbr x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) - #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") + self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) @@ -526,26 +529,47 @@ def get_iou(self, box1, box2): iou = area_inter / area_union return iou + def publish_batch(self, image_list, results_dict): + batch_msg = BatchDetection() + batch_msg.header = Header() + batch_msg.header.stamp = self.get_clock().now().to_msg() + batch_msg.header.frame_id = "camera_frame" + for image in image_list: + compressed_msg = CompressedImage() + compressed_msg.header.stamp = self.get_clock().now().to_msg() + compressed_msg.header.frame_id = "camera_frame" + compressed_msg.format = "jpeg" + compressed_image = cv2.imencode('.jpg', image)[1].tobytes() + compressed_msg.data = compressed_image + batch_msgs.images.append(compressed_msg) + for image_idx, bboxes in result_dict.items(): + detection_array_msg = Detection2DArray() + for bbox in bboxes: + x_min, y_min, x_max, y_max, confidence, class_id = bbox + detection_msg = Detection2D() + + # Populate bounding box + detection_msg.bbox.center.x = (x_min + x_max) / 2 + detection_msg.bbox.center.y = (y_min + y_max) / 2 + detection_msg.bbox.size_x = x_max - x_min + detection_msg.bbox.size_y = y_max - y_min - def publish_bounding_boxes(self, results_dict): - detection2darray = Detection2DArray() - for image_idx, bboxes in results_dict.items(): - detection2d = Detection2D() - x_min, y_min, x_max, y_max, confidence, class_id = bbox - center_x = (x_min + x_max) / 2 - center_y = (y_min + y_max) / 2 - size_x = x_max - x_min - size_y = y_max - y_min - detection2d.bbox.center.position.x = center_x - detection2d.bbox.center.position.y = center_y - detection2d.bbox.size_x = size_x - detection2d.bbox.size_y = size_y - detected_object = ObjectHypothesisWithPose() - detected_object.hypothesis.class_id = class_id - detected_object.hypothesis.score = confidence - detection2darray.detections.append(detection2d) + # Add class and confidence metadata + hypothesis = ObjectHypothesisWithPose() + hypothesis.id = int(class_id) + hypothesis.score = confidence + detection_msg.results.append(hypothesis) + + detection_array_msg.detections.append(detection_msg) + + batch_msg.detections.append(detection_array_msg) + self.batch_publisher.publish(batch_msg) + self.get_logger().info("Published batch message with images and detections.") + #we populate the compressedIMage array with the image list with raw images from nuscenes + #We populate the detection2d array with our results_dict + #Converting to tensor for batching def batch_convert_to_tensor(self, cv_image): diff --git a/src/perception/camera_object_detection/package.xml b/src/perception/camera_object_detection/package.xml index a8b24405..d1dfe6b3 100755 --- a/src/perception/camera_object_detection/package.xml +++ b/src/perception/camera_object_detection/package.xml @@ -12,7 +12,8 @@ sensor_msgs vision_msgs std_msgs - + camera_object_detection_msgs + ament_copyright ament_flake8 ament_pep257 diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt index d4d41677..eacbdaee 100644 --- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt @@ -1,11 +1,15 @@ cmake_minimum_required(VERSION 3.8) -project(camera_objection_detection_msgs) +project(camera_object_detection_msgs) + +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) +endif() if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-Wall -Wextra -Wpedantic) endif() -#find dependencies + find_package(ament_cmake REQUIRED) find_package(rosidl_default_generators REQUIRED) find_package(std_msgs REQUIRED) @@ -13,9 +17,9 @@ find_package(sensor_msgs REQUIRED) find_package(vision_msgs REQUIRED) + rosidl_generate_interfaces(${PROJECT_NAME} - "msg/BatchDetection.msg" - DEPENDENCIES std_msgs sensor_msgs vision_msgs -) + "msg/BatchDetection.msg" + DEPENDENCIES std_msgs sensor_msgs vision_msgs) ament_export_dependencies(rosidl_default_runtime) ament_package() \ No newline at end of file diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml index 4e218806..72d71357 100644 --- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml @@ -1,11 +1,12 @@ - camera_object_detction_msgs + camera_object_detection_msgs 0.0.0 TODO: Package description a95rahma TODO: License declaration + ament_cmake std_msgs sensor_msgs From b2cd80cd802d7aec226f21804a61c98fa974378c Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 3 Dec 2024 17:39:12 +0000 Subject: [PATCH 26/37] Refactored publishing --- .../camera_object_detection.Dockerfile | 2 +- .../camera_object_detection/package.xml | 2 ++ .../CMakeLists.txt | 8 ++--- .../camera_object_detection_msgs/package.xml | 32 +++++++++---------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile index 242c2d40..933bddb6 100644 --- a/docker/perception/camera_object_detection/camera_object_detection.Dockerfile +++ b/docker/perception/camera_object_detection/camera_object_detection.Dockerfile @@ -33,7 +33,7 @@ RUN rm requirements.txt # Install Rosdep requirements COPY --from=source /tmp/colcon_install_list /tmp/colcon_install_list RUN apt-get update && apt-fast install -qq -y --no-install-recommends $(cat /tmp/colcon_install_list) - +RUN apt install ros-$ROS_DISTRO-tf-transformations -y # Copy in source code from source stage WORKDIR ${AMENT_WS} COPY --from=source ${AMENT_WS}/src src diff --git a/src/perception/camera_object_detection/package.xml b/src/perception/camera_object_detection/package.xml index d1dfe6b3..fc078392 100755 --- a/src/perception/camera_object_detection/package.xml +++ b/src/perception/camera_object_detection/package.xml @@ -9,10 +9,12 @@ TODO: License declaration cv_bridge + rclpy sensor_msgs vision_msgs std_msgs camera_object_detection_msgs + python3-pytest ament_copyright ament_flake8 diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt index eacbdaee..21c77460 100644 --- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt @@ -1,10 +1,6 @@ cmake_minimum_required(VERSION 3.8) project(camera_object_detection_msgs) -if(NOT CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) -endif() - if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-Wall -Wextra -Wpedantic) endif() @@ -20,6 +16,8 @@ find_package(vision_msgs REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "msg/BatchDetection.msg" - DEPENDENCIES std_msgs sensor_msgs vision_msgs) + DEPENDENCIES std_msgs sensor_msgs vision_msgs +) + ament_export_dependencies(rosidl_default_runtime) ament_package() \ No newline at end of file diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml index 72d71357..b4edda30 100644 --- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml @@ -1,21 +1,21 @@ - camera_object_detection_msgs - 0.0.0 - TODO: Package description - a95rahma - TODO: License declaration - - ament_cmake - std_msgs - sensor_msgs - vision_msgs +camera_object_detection_msgs +0.0.0 +TODO: Package description +a95rahma +TODO: License declaration +ament_cmake - rosidl_default_generators - rosidl_default_runtime - rosidl_interface_packages - +std_msgs +vision_msgs +geometry_msgs + +rosidl_default_generators +rosidl_default_runtime +rosidl_interface_packages + ament_cmake - - \ No newline at end of file + + \ No newline at end of file From 6de65300578170b4b462493ca56d39122b2cff5c Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Wed, 4 Dec 2024 20:06:11 +0000 Subject: [PATCH 27/37] Publishing detections on foxglove --- .../yolov8_detection.py | 93 +++++++++++-------- .../camera_object_detection_msgs/package.xml | 2 +- 2 files changed, 55 insertions(+), 40 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 54948b79..a9736031 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -420,6 +420,8 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): for msg in image_list: numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) + original_height, original_width = compressedImage.shape[:2] + self.get_logger().info(f"original Height:{original_height} original width: {original_width}") resized_compressedImage = cv2.resize(compressedImage,(640, 640)) rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB) normalized_image = rgb_image / 255.0 @@ -463,10 +465,10 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): #Convert from chw to tlbr x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) - self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") + #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) - + self.publish_batch(image_list, results_dict) # To store final results after NMS def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth): @@ -529,49 +531,62 @@ def get_iou(self, box1, box2): iou = area_inter / area_union return iou + + def publish_batch(self, image_list, results_dict): batch_msg = BatchDetection() - batch_msg.header = Header() batch_msg.header.stamp = self.get_clock().now().to_msg() - batch_msg.header.frame_id = "camera_frame" - for image in image_list: - compressed_msg = CompressedImage() - compressed_msg.header.stamp = self.get_clock().now().to_msg() - compressed_msg.header.frame_id = "camera_frame" - compressed_msg.format = "jpeg" - compressed_image = cv2.imencode('.jpg', image)[1].tobytes() - compressed_msg.data = compressed_image - batch_msgs.images.append(compressed_msg) - for image_idx, bboxes in result_dict.items(): - detection_array_msg = Detection2DArray() - for bbox in bboxes: - x_min, y_min, x_max, y_max, confidence, class_id = bbox - detection_msg = Detection2D() + batch_msg.header.frame_id = "batch" + + for idx, img_msg in enumerate(image_list): + compressed_image = CompressedImage() + compressed_image.header = img_msg.header + compressed_image.format = "jpeg" + compressed_image.data = img_msg.data # Original data + batch_msg.images.append(compressed_image) + + for idx in range(len(image_list)): + detection_array = Detection2DArray() + detection_array.header.stamp = batch_msg.header.stamp + detection_array.header.frame_id = f"image_{idx}" + for bbox in results_dict[idx]: + detection = Detection2D() + x_min, y_min, x_max, y_max, confidence, predicted_class = bbox + detection.bbox.center.position.x = (x_min + x_max) / 2 + detection.bbox.center.position.y = (y_min + y_max) / 2 + detection.bbox.size_x = x_max - x_min + detection.bbox.size_y = y_max - y_min + detected_object = ObjectHypothesisWithPose() - # Populate bounding box - detection_msg.bbox.center.x = (x_min + x_max) / 2 - detection_msg.bbox.center.y = (y_min + y_max) / 2 - detection_msg.bbox.size_x = x_max - x_min - detection_msg.bbox.size_y = y_max - y_min - - # Add class and confidence metadata - hypothesis = ObjectHypothesisWithPose() - hypothesis.id = int(class_id) - hypothesis.score = confidence - detection_msg.results.append(hypothesis) - - detection_array_msg.detections.append(detection_msg) + detected_object.hypothesis.class_id = str(int(predicted_class)) + # self.get_logger().info(f"Score: {confidence} (type: {type(confidence)})") + detected_object.hypothesis.score = float(confidence) + - batch_msg.detections.append(detection_array_msg) - self.batch_publisher.publish(batch_msg) - self.get_logger().info("Published batch message with images and detections.") - - - #we populate the compressedIMage array with the image list with raw images from nuscenes - #We populate the detection2d array with our results_dict - - + detection.results.append(detected_object) + detection_array.detections.append(detection) + + batch_msg.detections.append(detection_array) + self.batch_detection_publisher.publish(detection_array) + vis_image = self.draw_bounding_boxes(img_msg, results_dict[idx]) + vis_compressed_image = CompressedImage() + vis_compressed_image.header.stamp = self.get_clock().now().to_msg() + vis_compressed_image.format = "jpeg" + vis_compressed_image.data = cv2.imencode('.jpg', vis_image)[1].tobytes() + self.batch_vis_publisher.publish(vis_compressed_image) + self.batched_camera_message_publisher.publish(batch_msg) #Converting to tensor for batching + + def draw_bounding_boxes(self,img_msg, detections): + numpy_image = np.frombuffer(img_msg.data, np.uint8) + image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR) + for bbox in detections: + x_min, y_min, x_max, y_max, confidence, predicted_class = bbox + label = f"Class: {predicted_class}, Conf: {confidence:.2f}" + #self.get_logger().info(f"{label}") + cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2) + cv2.putText(image, label, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) + return image def batch_convert_to_tensor(self, cv_image): img = cv_image.transpose(2,0,1) img = torch.from_numpy(img).to(self.device) diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml index b4edda30..62cae926 100644 --- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/package.xml @@ -4,7 +4,7 @@ camera_object_detection_msgs 0.0.0 TODO: Package description -a95rahma +Arfan12630 TODO: License declaration ament_cmake From a85d45f2ddb4f2974b1e1a928a1b09036cd1f72f Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Thu, 5 Dec 2024 18:27:54 +0000 Subject: [PATCH 28/37] Showing every image in foxglove topic --- .../yolov8_detection.py | 133 +++++++++++------- 1 file changed, 86 insertions(+), 47 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index a9736031..ce5f08ab 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -144,7 +144,7 @@ def __init__(self): self.back_center_camera_subscription = Subscriber(self, CompressedImage, self.back_center_camera_topic) self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic) self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) - self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.1) + self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5) self.ats.registerCallback(self.batch_inference_callback) #Subscription for Eve cameras @@ -421,15 +421,15 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) original_height, original_width = compressedImage.shape[:2] - self.get_logger().info(f"original Height:{original_height} original width: {original_width}") + #self.get_logger().info(f"original Height:{original_height} original width: {original_width}") resized_compressedImage = cv2.resize(compressedImage,(640, 640)) rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB) normalized_image = rgb_image / 255.0 chw_image = np.transpose(normalized_image, (2,0,1)) float_image = chw_image.astype(np.float32) - tensor_image = torch.from_numpy(float_image).to('cuda') - batched_list.append(tensor_image) - batched_list = [tensor.cpu().numpy() for tensor in batched_list] + # tensor_image = torch.from_numpy(float_image).to('cuda') + batched_list.append(float_image) + # batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) self.initialize_engine(self.tensorRT_model_path, 6,3,640,640) self.input_info, self.output_info = self.initialize_tensors() @@ -441,36 +441,64 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): - 8400 represents 8400 anchors """ + batch_size = detections[0].shape[0] results_dict = {i: [] for i in range(batch_size)} for image_idx in range(batch_size): - # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") + # Extract detections for the current image image_detections = detections[0][image_idx] - - if image_idx not in results_dict: - results_dict[image_idx] = [] - for anchor_idx in range(8400): - x_center = image_detections[0][anchor_idx] - y_center = image_detections[1][anchor_idx] - width = image_detections[2][anchor_idx] - height = image_detections[3][anchor_idx] - - confidence = image_detections[4][anchor_idx] - class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)] - predicted_class = np.argmax(class_probs) - predicted_prob = class_probs[predicted_class] + # Extract individual components + x_centers = image_detections[0] + y_centers = image_detections[1] + widths = image_detections[2] + heights = image_detections[3] + confidences = image_detections[4] + class_probs = image_detections[5:18] # Shape: (13, 8400) + + # Filter by confidence threshold + valid_indices = confidences > 0.3 + if not np.any(valid_indices): + continue # Skip image if no detections pass confidence threshold + + # Filter relevant detections + x_centers = x_centers[valid_indices] + y_centers = y_centers[valid_indices] + widths = widths[valid_indices] + heights = heights[valid_indices] + confidences = confidences[valid_indices] + class_probs = class_probs[:, valid_indices] + + # Predict classes and probabilities + predicted_classes = np.argmax(class_probs, axis=0) + predicted_probs = class_probs[predicted_classes, range(class_probs.shape[1])] + + # Convert boxes to (x_min, y_min, x_max, y_max) + x_mins = x_centers - widths / 2 + y_mins = y_centers - heights / 2 + x_maxs = x_centers + widths / 2 + y_maxs = y_centers + heights / 2 + + # Combine detections into final format + filtered_detections = np.stack([x_mins, y_mins, x_maxs, y_maxs, confidences, predicted_classes], axis=-1) + #self.get_logger().info(f"Detections:{filtered_detections}") + + # Apply NMS + results_dict[image_idx] = self.nms(filtered_detections.tolist(), confidence_threshold=0.55, iou_threshold=0.5) + +#Log final results if needed + for image_idx, detections in results_dict.items(): + #self.get_logger().info(f"Image {image_idx}: {len(detections)} detections") + for detection in detections: + x_min, y_min, x_max, y_max, confidence, predicted_class = detection + self.get_logger().info( + f" Bounding Box: [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " + f"Confidence: {confidence:.2f}, Class: {predicted_class}" + ) - if confidence > 0.45: - - #Convert from chw to tlbr - x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) - #self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") - results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) - results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) self.publish_batch(image_list, results_dict) # To store final results after NMS - + def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth): half_width = width /2 half_height = height /2 @@ -480,7 +508,7 @@ def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageW y_max = y_center + half_height return x_min, y_min, x_max, y_max - def nms(self,bboxes, confidence_threshold= 0.55,iou_threshold = 0.5 ): + def nms(self,bboxes, confidence_threshold= 0.45,iou_threshold = 0.5 ): #Confidence threshold bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold] bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True) @@ -539,42 +567,53 @@ def publish_batch(self, image_list, results_dict): batch_msg.header.frame_id = "batch" for idx, img_msg in enumerate(image_list): - compressed_image = CompressedImage() - compressed_image.header = img_msg.header - compressed_image.format = "jpeg" - compressed_image.data = img_msg.data # Original data - batch_msg.images.append(compressed_image) - - for idx in range(len(image_list)): + numpy_image = np.frombuffer(img_msg.data, np.uint8) + image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR) + height, width = image.shape[:2] + detection_array = Detection2DArray() detection_array = Detection2DArray() detection_array.header.stamp = batch_msg.header.stamp - detection_array.header.frame_id = f"image_{idx}" + detection_array.header.frame_id = f"camera_{idx}" + detections = results_dict[idx] for bbox in results_dict[idx]: + x_min, y_min, x_max, y_max , confidence, predicted_class = bbox + x_min = int(x_min * width / 640) + x_max = int(x_max * width / 640) + y_min = int(y_min * height / 640) + y_max = int(y_max * height / 640) + + label = f"Class: {predicted_class}, Conf: {confidence:.2f}" + cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) + cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) detection = Detection2D() - x_min, y_min, x_max, y_max, confidence, predicted_class = bbox detection.bbox.center.position.x = (x_min + x_max) / 2 detection.bbox.center.position.y = (y_min + y_max) / 2 - detection.bbox.size_x = x_max - x_min - detection.bbox.size_y = y_max - y_min + detection.bbox.size_x = float(x_max - x_min) + detection.bbox.size_y = float(y_max - y_min) + detected_object = ObjectHypothesisWithPose() - detected_object.hypothesis.class_id = str(int(predicted_class)) - # self.get_logger().info(f"Score: {confidence} (type: {type(confidence)})") detected_object.hypothesis.score = float(confidence) - - detection.results.append(detected_object) detection_array.detections.append(detection) - + + # Add detection array to batch message batch_msg.detections.append(detection_array) - self.batch_detection_publisher.publish(detection_array) - vis_image = self.draw_bounding_boxes(img_msg, results_dict[idx]) + + # Publish visualized image vis_compressed_image = CompressedImage() vis_compressed_image.header.stamp = self.get_clock().now().to_msg() + vis_compressed_image.header.frame_id = f"camera_{idx}" vis_compressed_image.format = "jpeg" - vis_compressed_image.data = cv2.imencode('.jpg', vis_image)[1].tobytes() + vis_compressed_image.data = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes() self.batch_vis_publisher.publish(vis_compressed_image) + + # Publish Detection2DArray + self.batch_detection_publisher.publish(detection_array) + + # Publish batch detection message self.batched_camera_message_publisher.publish(batch_msg) + #Converting to tensor for batching def draw_bounding_boxes(self,img_msg, detections): From c77e53f84d392f29a3ee3175ede97dcfdea2d506 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Fri, 6 Dec 2024 14:53:52 +0000 Subject: [PATCH 29/37] Getting better accuracy for detections --- .../camera_object_detection/yolov8_detection.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index ce5f08ab..497e13bf 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -144,7 +144,8 @@ def __init__(self): self.back_center_camera_subscription = Subscriber(self, CompressedImage, self.back_center_camera_topic) self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic) self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) - self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5) + #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5) + self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription, self.front_left_camera_subscription], queue_size=10, slop=0.5) self.ats.registerCallback(self.batch_inference_callback) #Subscription for Eve cameras @@ -176,6 +177,8 @@ def __init__(self): #self.build_engine() # self.initialize_engine(self.tensorRT_model_path, 3,3,1024,1024) # self.input_info, self.output_info = self.initialize_tensors() + self.last_publish_time = self.get_clock().now() + self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) @@ -412,9 +415,9 @@ def eve_batch_inference_callback(self, msg1,msg2,msg3): # will be called with nuscenes rosbag - def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): + def batch_inference_callback(self,msg1,msg2,msg3): #Taking msgs from all 6 ros2 subscribers - image_list = [msg1,msg2,msg3,msg4,msg5,msg6] + image_list = [msg1,msg2,msg3] batched_list = [] #Preprocessing for msg in image_list: @@ -431,7 +434,7 @@ def batch_inference_callback(self,msg1,msg2,msg3, msg4,msg5,msg6): batched_list.append(float_image) # batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) - self.initialize_engine(self.tensorRT_model_path, 6,3,640,640) + self.initialize_engine(self.tensorRT_model_path, 3,3,640,640) self.input_info, self.output_info = self.initialize_tensors() detections = self.tensorRT_inferencing(batched_images) """ @@ -562,6 +565,10 @@ def get_iou(self, box1, box2): def publish_batch(self, image_list, results_dict): + current_time = self.get_clock().now() + if (current_time - self.last_publish_time).nanoseconds < 1e9 / 2: # Publish at 2 Hz (0.5 seconds) + return + self.last_publish_time = current_time batch_msg = BatchDetection() batch_msg.header.stamp = self.get_clock().now().to_msg() batch_msg.header.frame_id = "batch" From e06c1aa89f2edc3081c58db2841bfddc2b4b63cf Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Sat, 7 Dec 2024 00:23:08 +0000 Subject: [PATCH 30/37] Refactored detection parsing --- .../yolov8_detection.py | 329 ++++++++---------- 1 file changed, 149 insertions(+), 180 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 497e13bf..fa04bc73 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -145,7 +145,8 @@ def __init__(self): self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic) self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5) - self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription, self.front_left_camera_subscription], queue_size=10, slop=0.5) + #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription, self.front_left_camera_subscription], queue_size=10, slop=0.5) + self.ats = ApproximateTimeSynchronizer([self.front_right_camera_subscription], queue_size=10, slop=0.3) self.ats.registerCallback(self.batch_inference_callback) #Subscription for Eve cameras @@ -321,8 +322,7 @@ def initialize_tensors(self): cudart.cudaMemcpyAsync( self.input_gpu, self.input_cpu.ctypes.data, self.input_cpu.nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) - self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu)) - + self.input_info.append(Tensor(name, self.dtype, self.tensorRT_input_shape, self.input_cpu, self.input_gpu)) return self.input_info, self.output_info def tensorRT_inferencing(self, batch_array): @@ -362,62 +362,14 @@ def tensorRT_inferencing(self, batch_array): self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream ) - - return self.outputs_ndarray - - # will be called with eve rosbag - def eve_batch_inference_callback(self, msg1,msg2,msg3): - imageList = [msg1,msg2,msg3] - eve_batched_list = [] - for msg in imageList: - numpy_array = np.frombuffer(msg.data, np.uint8) - compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) - resized_compressedImage = cv2.resize(compressedImage, (1024, 1024)) - rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB) - normalized_image = rgb_image / 255.0 - chw_image = np.transpose(normalized_image, (2,0,1)) - float_image = chw_image.astype(np.float32) - tensor_image = torch.from_numpy(float_image).to('cuda') - batched_list.append(tensor_image) - batched_list = [tensor.cpu().numpy() for tensor in batched_list] - batched_images = np.stack(batched_list, axis=0) - self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024) - self.input_info, self.output_info = self.initialize_tensors() - detections = self.tensorRT_inferencing(batched_images) - batch_size = detections[0].shape[0] - anchors = detections[0].shape[2] - results_dict = {i: [] for i in range(batch_size)} - for image_idx in range(batch_size): - # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") - image_detections = detections[0][image_idx] - if image_idx not in results_dict: - results_dict[image_idx] = [] - - for anchor_idx in range(anchors): - x_center = image_detections[0][anchor_idx] - y_center = image_detections[1][anchor_idx] - width = image_detections[2][anchor_idx] - height = image_detections[3][anchor_idx] - - confidence = image_detections[4][anchor_idx] - class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)] - predicted_class = np.argmax(class_probs) - predicted_prob = class_probs[predicted_class] - - if confidence > 0.45: - - #Convert from chw to tlbr - x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) - self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") - results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) - results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) + return tuple(self.outputs_ndarray) if len(self.outputs_ndarray) > 1 else self.outputs_ndarray[0] # will be called with nuscenes rosbag - def batch_inference_callback(self,msg1,msg2,msg3): + def batch_inference_callback(self,msg1): #Taking msgs from all 6 ros2 subscribers - image_list = [msg1,msg2,msg3] + image_list = [msg1] batched_list = [] #Preprocessing for msg in image_list: @@ -434,7 +386,7 @@ def batch_inference_callback(self,msg1,msg2,msg3): batched_list.append(float_image) # batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) - self.initialize_engine(self.tensorRT_model_path, 3,3,640,640) + self.initialize_engine(self.tensorRT_model_path, 1,3,640,640) self.input_info, self.output_info = self.initialize_tensors() detections = self.tensorRT_inferencing(batched_images) """ @@ -444,125 +396,104 @@ def batch_inference_callback(self,msg1,msg2,msg3): - 8400 represents 8400 anchors """ - - batch_size = detections[0].shape[0] - results_dict = {i: [] for i in range(batch_size)} - for image_idx in range(batch_size): - # Extract detections for the current image - image_detections = detections[0][image_idx] + # self.get_logger().info(f"Confidence Scores{detections[0][0][4][:10]}") + #self.get_logger().info(f"Classes{detections[0][0][5][:5]}") + decoded_results = self.postprocess_detections_nuscenes(detections) + + + + + def postprocess_detections_nuscenes(self, detections, class_labels = None ): + detection_tensor = detections[0] # Assuming the first tensor corresponds to detections + batch_size = detection_tensor.shape[0] # Number of images in the batch + num_anchors = detection_tensor.shape[2] # Number of anchors (e.g., 8400) + decoded_results = [] # A list to hold decoded detections for each image + for batch_idx in range(batch_size): + image_detections = detection_tensor[batch_idx] # Detections for the current image + valid_detections = [] # Store valid detections for this image + for anchor_idx in range(num_anchors): # Loop through all anchors + detection = image_detections[:, anchor_idx] # Get data for one anchor + confidence = detection[4] # Extract confidence score + + # Only process detections with confidence above the threshold + if confidence > 0.5: + x_center, y_center, width, height = detection[:4] # Bounding box info + class_probs = detection[5:13] # Class probabilities + predicted_class = np.argmax(class_probs) # Most likely class + predicted_prob = class_probs[predicted_class] + + # Convert to (x_min, y_min, x_max, y_max) + x_min = x_center - width / 2 + y_min = y_center - height / 2 + x_max = x_center + width / 2 + y_max = y_center + height / 2 + valid_detections.append([ + x_min, y_min, x_max, y_max, confidence, predicted_class + ]) + valid_detections = torch.tensor(valid_detections) + if valid_detections.shape[0] == 0: + decoded_results.append([]) # No detections for this image + continue + + # Apply Non-Maximum Suppression (NMS) + nms_results = non_max_suppression( + valid_detections.unsqueeze(0), # Add batch dimension for NMS + conf_thres=0.5, # Confidence threshold + iou_thres=0.45 # IoU threshold + ) - # Extract individual components - x_centers = image_detections[0] - y_centers = image_detections[1] - widths = image_detections[2] - heights = image_detections[3] - confidences = image_detections[4] - class_probs = image_detections[5:18] # Shape: (13, 8400) - - # Filter by confidence threshold - valid_indices = confidences > 0.3 - if not np.any(valid_indices): - continue # Skip image if no detections pass confidence threshold - - # Filter relevant detections - x_centers = x_centers[valid_indices] - y_centers = y_centers[valid_indices] - widths = widths[valid_indices] - heights = heights[valid_indices] - confidences = confidences[valid_indices] - class_probs = class_probs[:, valid_indices] - - # Predict classes and probabilities - predicted_classes = np.argmax(class_probs, axis=0) - predicted_probs = class_probs[predicted_classes, range(class_probs.shape[1])] - - # Convert boxes to (x_min, y_min, x_max, y_max) - x_mins = x_centers - widths / 2 - y_mins = y_centers - heights / 2 - x_maxs = x_centers + widths / 2 - y_maxs = y_centers + heights / 2 - - # Combine detections into final format - filtered_detections = np.stack([x_mins, y_mins, x_maxs, y_maxs, confidences, predicted_classes], axis=-1) - #self.get_logger().info(f"Detections:{filtered_detections}") - - # Apply NMS - results_dict[image_idx] = self.nms(filtered_detections.tolist(), confidence_threshold=0.55, iou_threshold=0.5) - -#Log final results if needed - for image_idx, detections in results_dict.items(): - #self.get_logger().info(f"Image {image_idx}: {len(detections)} detections") - for detection in detections: - x_min, y_min, x_max, y_max, confidence, predicted_class = detection + final_detections = [] + for det in nms_results[0]: # Assuming only one set of results + x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy() + class_name = class_labels.get(int(predicted_class), "unknown") if class_labels else str(int(predicted_class)) + final_detections.append({ + "bbox": [x_min, y_min, x_max, y_max], + "confidence": confidence, + "class": int(predicted_class), + "class_name": class_name + }) + + decoded_results.append(final_detections) + for batch_idx, batch_detections in enumerate(decoded_results): + if len(batch_detections) == 0: + self.get_logger().info(f"Batch {batch_idx}: No detections") + continue + + for anchor_idx, detection in enumerate(batch_detections): + # Extract values from the dictionary + bbox = detection["bbox"] + x_min, y_min, x_max, y_max = bbox + confidence = detection["confidence"] + predicted_class = detection["class"] + class_name = detection["class_name"] + + # Log detection details self.get_logger().info( - f" Bounding Box: [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " - f"Confidence: {confidence:.2f}, Class: {predicted_class}" + f"Batch {batch_idx}, Anchor {anchor_idx}: " + f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " + f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}" ) - self.publish_batch(image_list, results_dict) - # To store final results after NMS - - def convert_to_xyxy(self, x_center, y_center, width, height, imageHeight, imageWidth): - half_width = width /2 - half_height = height /2 - x_min = x_center - half_width - y_min = y_center - half_height - x_max = x_center + half_width - y_max = y_center + half_height - return x_min, y_min, x_max, y_max - - def nms(self,bboxes, confidence_threshold= 0.45,iou_threshold = 0.5 ): - #Confidence threshold - bboxes_thresholded = [bbox for bbox in bboxes if bbox[4] > confidence_threshold] - bboxes_sorted = sorted(bboxes_thresholded, key=lambda x: x[4], reverse=True) - bbox_list_new = [] - while bboxes_sorted: - current_box = bboxes_sorted.pop(0) - bbox_list_new.append(current_box) - # Filter boxes with IoU below the threshold - bboxes_sorted = [ - box for box in bboxes_sorted if self.get_iou(current_box[:4], box[:4]) < iou_threshold - ] - return bbox_list_new - - def get_iou(self, box1, box2): - x1, y1, x2, y2 = box1 - x3, y3, x4, y4 = box2 - - # Intersection coordinates - x_inter1 = max(x1, x3) - y_inter1 = max(y1, y3) - x_inter2 = min(x2, x4) - y_inter2 = min(y2, y4) - - # Width and height of the intersection - width_inter = abs(x_inter2 - x_inter1) - height_inter = abs(y_inter2 - y_inter1) - - # If there is no intersection - if x_inter2 < x_inter1 or y_inter2 < y_inter1: - return 0.0 - - # Area of intersection - area_inter = width_inter * height_inter - - # Areas of the two boxes - width_box1 = abs(x2 - x1) - height_box1 = abs(y2 - y1) - area_box1 = width_box1 * height_box1 - - width_box2 = abs(x4 - x3) - height_box2 = abs(y4 - y3) - area_box2 = width_box2 * height_box2 - - # Union area - area_union = area_box1 + area_box2 - area_inter - - # Intersection over Union - iou = area_inter / area_union - - return iou - + return decoded_results + # decoded_results.append(final_detections) + # class_name = class_labels.get(predicted_class, "unknown") if class_labels else str(predicted_class) + # self.get_logger().info( + # f"Batch {batch_idx}, Anchor {anchor_idx}: " + # f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " + # f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}" + # ) + + # # Store the detection in a structured format + # valid_detections.append({ + # "bbox": [x_min, y_min, x_max, y_max], + # "confidence": confidence, + # "class": predicted_class, + # "class_name": class_name + # }) + + # # Log the number of valid detections for this image + # self.get_logger().info(f"Batch {batch_idx}: {len(valid_detections)} valid detections") + def publish_batch(self, image_list, results_dict): current_time = self.get_clock().now() @@ -622,17 +553,55 @@ def publish_batch(self, image_list, results_dict): self.batched_camera_message_publisher.publish(batch_msg) #Converting to tensor for batching + + # will be called with eve rosbag + def eve_batch_inference_callback(self, msg1,msg2,msg3): + imageList = [msg1,msg2,msg3] + eve_batched_list = [] + for msg in imageList: + numpy_array = np.frombuffer(msg.data, np.uint8) + compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) + resized_compressedImage = cv2.resize(compressedImage, (1024, 1024)) + rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB) + normalized_image = rgb_image / 255.0 + chw_image = np.transpose(normalized_image, (2,0,1)) + float_image = chw_image.astype(np.float32) + tensor_image = torch.from_numpy(float_image).to('cuda') + batched_list.append(tensor_image) + batched_list = [tensor.cpu().numpy() for tensor in batched_list] + batched_images = np.stack(batched_list, axis=0) + self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024) + self.input_info, self.output_info = self.initialize_tensors() + detections = self.tensorRT_inferencing(batched_images) + batch_size = detections[0].shape[0] + anchors = detections[0].shape[2] + results_dict = {i: [] for i in range(batch_size)} + for image_idx in range(batch_size): + # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") + image_detections = detections[0][image_idx] + + if image_idx not in results_dict: + results_dict[image_idx] = [] + + for anchor_idx in range(anchors): + x_center = image_detections[0][anchor_idx] + y_center = image_detections[1][anchor_idx] + width = image_detections[2][anchor_idx] + height = image_detections[3][anchor_idx] + + confidence = image_detections[4][anchor_idx] + class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)] + predicted_class = np.argmax(class_probs) + predicted_prob = class_probs[predicted_class] + + if confidence > 0.45: + + #Convert from chw to tlbr + x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) + self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") + results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) + results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) - def draw_bounding_boxes(self,img_msg, detections): - numpy_image = np.frombuffer(img_msg.data, np.uint8) - image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR) - for bbox in detections: - x_min, y_min, x_max, y_max, confidence, predicted_class = bbox - label = f"Class: {predicted_class}, Conf: {confidence:.2f}" - #self.get_logger().info(f"{label}") - cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2) - cv2.putText(image, label, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) - return image def batch_convert_to_tensor(self, cv_image): img = cv_image.transpose(2,0,1) img = torch.from_numpy(img).to(self.device) From a9822381a063dd892c3d5753a3554d0c2547e0a5 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Sun, 8 Dec 2024 00:17:04 +0000 Subject: [PATCH 31/37] parsing multiple tensors --- .../yolov8_detection.py | 245 +++++++++++------- 1 file changed, 148 insertions(+), 97 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index fa04bc73..79edc685 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -146,17 +146,17 @@ def __init__(self): self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5) #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription, self.front_left_camera_subscription], queue_size=10, slop=0.5) - self.ats = ApproximateTimeSynchronizer([self.front_right_camera_subscription], queue_size=10, slop=0.3) + self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3) self.ats.registerCallback(self.batch_inference_callback) - #Subscription for Eve cameras - self.right_camera_subscription = Subscriber(self, Image, self.camera_topic) - self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic) - self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic) - self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1) + # #Subscription for Eve cameras + # self.right_camera_subscription = Subscriber(self, Image, self.camera_topic) + # self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic) + # self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic) + # self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1) - #Create this callback function tomorrow - self.eve_ats.registerCallback(self.eve_batch_inference_callback) + # #Create this callback function tomorrow + # self.eve_ats.registerCallback(self.eve_batch_inference_callback) self.get_logger().info(f"TENSORT VERSION:{trt.__version__}") self.orig_image_width = None @@ -202,10 +202,10 @@ def __init__(self): def build_engine(self): #Only calling this function when we dont have an engine file #Reading the onnx file in the perception models directory - self.max_batch_size = 3 + self.max_batch_size = 6 self.channels = 3 - self.height = 1024 - self.width = 1024 + self.height = 640 + self.width = 640 self.shape_input_model = [self.max_batch_size, self.channels, self.height, self.width] self.logger = trt.Logger(trt.Logger.VERBOSE) self.builder = trt.Builder(self.logger) @@ -388,7 +388,9 @@ def batch_inference_callback(self,msg1): batched_images = np.stack(batched_list, axis=0) self.initialize_engine(self.tensorRT_model_path, 1,3,640,640) self.input_info, self.output_info = self.initialize_tensors() + detections = self.tensorRT_inferencing(batched_images) + """ - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components for bounding boxes, classes and confidence scores @@ -398,8 +400,69 @@ def batch_inference_callback(self,msg1): """ # self.get_logger().info(f"Confidence Scores{detections[0][0][4][:10]}") #self.get_logger().info(f"Classes{detections[0][0][5][:5]}") - decoded_results = self.postprocess_detections_nuscenes(detections) + # decoded_results = self.postprocess_detections_nuscenes(detections) + detection_tensor = detections[0] # Assuming the first tensor corresponds to detections + self.get_logger().info(f"Detection Tensor Length:{len(detection_tensor.shape)}") + self.get_logger().info(f"Detection Tensor Length of other tensors:{len(detections[1].shape)}") + batch_size = detection_tensor.shape[0] # Number of images in the batch + num_anchors = detection_tensor.shape[2] # Number of anchors (e.g., 8400) + # self.get_logger().info(f"detections shape:{detections[1].shape}") #80x80 + # self.get_logger().info(f"detections shape2:{detections[2][0][1][0].shape}") #40x40 + + decoded_results = [] # A list to hold decoded detections for each image + for batch_idx in range(batch_size): + image_detections = detection_tensor[batch_idx] # Detections for the current image + valid_detections = [] # Store valid detections for this image + for anchor_idx in range(num_anchors): # Loop through all anchors + detection = image_detections[:, anchor_idx] # Get data for one anchor + confidence = detection[4] # Extract confidence score + # Only process detections with confidence above the threshold + if confidence > 0.1: + x_center, y_center, width, height = detection[:4] # Bounding box info + class_probs = detection[5:13] # Class probabilities + predicted_class = np.argmax(class_probs) # Most likely class + predicted_prob = class_probs[predicted_class] + + # Convert to (x_min, y_min, x_max, y_max) + x_min = x_center - width / 2 + y_min = y_center - height / 2 + x_max = x_center + width / 2 + y_max = y_center + height / 2 + valid_detections.append([ + x_min, y_min, x_max, y_max, confidence, predicted_class + ]) + valid_detections = torch.tensor(valid_detections) + if valid_detections.shape[0] == 0: + decoded_results.append([]) # No detections for this image + continue + + # Apply Non-Maximum Suppression (NMS) + nms_results = non_max_suppression( + valid_detections.unsqueeze(0), # Add batch dimension for NMS + conf_thres=0.2, # Confidence threshold + iou_thres=0.45 # IoU threshold + ) + + final_detections = [] + for img_idx, nms_result in enumerate(nms_results): # Loop through each image's NMS results + if nms_result is None or len(nms_result) == 0: + print(f"No detections for image {img_idx + 1}.") + decoded_results.append([]) # No detections for this image + continue + + final_detections = [] + for det in nms_result: # Loop through detections in this image + x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy() + final_detections.append({ + "bbox": [x_min, y_min, x_max, y_max], + "confidence": confidence, + "class": int(predicted_class), + }) + + # Add detections for this image to the results + decoded_results.append(final_detections) + self.publish_batch_nuscenes(image_list, decoded_results) @@ -454,47 +517,82 @@ def postprocess_detections_nuscenes(self, detections, class_labels = None ): }) decoded_results.append(final_detections) - for batch_idx, batch_detections in enumerate(decoded_results): - if len(batch_detections) == 0: - self.get_logger().info(f"Batch {batch_idx}: No detections") - continue - - for anchor_idx, detection in enumerate(batch_detections): - # Extract values from the dictionary - bbox = detection["bbox"] - x_min, y_min, x_max, y_max = bbox - confidence = detection["confidence"] - predicted_class = detection["class"] - class_name = detection["class_name"] + # for batch_idx, batch_detections in enumerate(decoded_results): + # if len(batch_detections) == 0: + # self.get_logger().info(f"Batch {batch_idx}: No detections") + # continue + + # for anchor_idx, detection in enumerate(batch_detections): + # # Extract values from the dictionary + # bbox = detection["bbox"] + # x_min, y_min, x_max, y_max = bbox + # confidence = detection["confidence"] + # predicted_class = detection["class"] + # class_name = detection["class_name"] # Log detection details - self.get_logger().info( - f"Batch {batch_idx}, Anchor {anchor_idx}: " - f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " - f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}" - ) + # self.get_logger().info( + # f"Batch {batch_idx}, Anchor {anchor_idx}: " + # f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " + # f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}" + # ) return decoded_results - # decoded_results.append(final_detections) - # class_name = class_labels.get(predicted_class, "unknown") if class_labels else str(predicted_class) - # self.get_logger().info( - # f"Batch {batch_idx}, Anchor {anchor_idx}: " - # f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " - # f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}" - # ) - - # # Store the detection in a structured format - # valid_detections.append({ - # "bbox": [x_min, y_min, x_max, y_max], - # "confidence": confidence, - # "class": predicted_class, - # "class_name": class_name - # }) - - # # Log the number of valid detections for this image - # self.get_logger().info(f"Batch {batch_idx}: {len(valid_detections)} valid detections") - - + def publish_batch_nuscenes(self, image_list, decoded_results): + batch_msg = BatchDetection() + batch_msg.header.stamp = self.get_clock().now().to_msg() + batch_msg.header.frame_id = "batch" + + for idx, img_msg in enumerate(image_list): + numpy_image = np.frombuffer(img_msg.data, np.uint8) + image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR) + height, width = image.shape[:2] + annotator = Annotator(image, line_width=2, example="Class:0") + detection_array = Detection2DArray() + detection_array.header.stamp = batch_msg.header.stamp + detection_array.header.frame_id = f"camera_{idx}" + for batch_idx, batch_detections in enumerate(decoded_results): + for anchor_idx, detection in enumerate(batch_detections): + # Extract values from the dictionary + bbox = detection["bbox"] + x_min, y_min, x_max, y_max = bbox + confidence = detection["confidence"] + predicted_class = detection["class"] + x_min = int(x_min * width / 640) + x_max = int(x_max * width / 640) + y_min = int(y_min * height / 640) + y_max = int(y_max * height / 640) + label = f"Class: {predicted_class}, Conf: {confidence:.2f}" + annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0)) + detection = Detection2D() + detection.bbox.center.position.x = (x_min + x_max) / 2 + detection.bbox.center.position.y = (y_min + y_max) / 2 + detection.bbox.size_x = float(x_max - x_min) + detection.bbox.size_y = float(y_max - y_min) + + detected_object = ObjectHypothesisWithPose() + detected_object.hypothesis.class_id = str(int(predicted_class)) + detected_object.hypothesis.score = float(confidence) + detection.results.append(detected_object) + detection_array.detections.append(detection) + batch_msg.detections.append(detection_array) + annotated_image = annotator.result() + vis_compressed_image = CompressedImage() + vis_compressed_image.header.stamp = self.get_clock().now().to_msg() + vis_compressed_image.header.frame_id = f"camera_{idx}" + vis_compressed_image.format = "jpeg" + vis_compressed_image.data = cv2.imencode('.jpg', annotated_image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes() + self.batch_vis_publisher.publish(vis_compressed_image) + + # Publish Detection2DArray + self.batch_detection_publisher.publish(detection_array) + + # Publish batch detection message + self.batched_camera_message_publisher.publish(batch_msg) + + + + def publish_batch(self, image_list, results_dict): current_time = self.get_clock().now() if (current_time - self.last_publish_time).nanoseconds < 1e9 / 2: # Publish at 2 Hz (0.5 seconds) @@ -554,54 +652,7 @@ def publish_batch(self, image_list, results_dict): #Converting to tensor for batching - # will be called with eve rosbag - def eve_batch_inference_callback(self, msg1,msg2,msg3): - imageList = [msg1,msg2,msg3] - eve_batched_list = [] - for msg in imageList: - numpy_array = np.frombuffer(msg.data, np.uint8) - compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) - resized_compressedImage = cv2.resize(compressedImage, (1024, 1024)) - rgb_image = cv2.color(resized_compressedImage, cv2.COLOR_BGR2RGB) - normalized_image = rgb_image / 255.0 - chw_image = np.transpose(normalized_image, (2,0,1)) - float_image = chw_image.astype(np.float32) - tensor_image = torch.from_numpy(float_image).to('cuda') - batched_list.append(tensor_image) - batched_list = [tensor.cpu().numpy() for tensor in batched_list] - batched_images = np.stack(batched_list, axis=0) - self.initialize_engine(self.eve_tensorRT_model_path, 3,3,1024,1024) - self.input_info, self.output_info = self.initialize_tensors() - detections = self.tensorRT_inferencing(batched_images) - batch_size = detections[0].shape[0] - anchors = detections[0].shape[2] - results_dict = {i: [] for i in range(batch_size)} - for image_idx in range(batch_size): - # self.get_logger().info(f"[{time.time()}] Processing image index: {image_idx}") - image_detections = detections[0][image_idx] - - if image_idx not in results_dict: - results_dict[image_idx] = [] - - for anchor_idx in range(anchors): - x_center = image_detections[0][anchor_idx] - y_center = image_detections[1][anchor_idx] - width = image_detections[2][anchor_idx] - height = image_detections[3][anchor_idx] - - confidence = image_detections[4][anchor_idx] - class_probs = [image_detections[class_idx][anchor_idx] for class_idx in range(5,18)] - predicted_class = np.argmax(class_probs) - predicted_prob = class_probs[predicted_class] - - if confidence > 0.45: - - #Convert from chw to tlbr - x_min, y_min, x_max, y_max = self.convert_to_xyxy(x_center, y_center, width, height, 640, 640) - self.get_logger().info(f"Added bounding box for image {image_idx}: {x_min, y_min, x_max, y_max}, class:{predicted_class}") - results_dict[image_idx].append([x_min, y_min, x_max, y_max, confidence, predicted_class]) - results_dict[image_idx] = self.nms(results_dict[image_idx], confidence_threshold = 0.55, iou_threshold=0.5) - + def batch_convert_to_tensor(self, cv_image): img = cv_image.transpose(2,0,1) img = torch.from_numpy(img).to(self.device) From 4a5cdd61c473c6e5bb180cb5b1671e564afe9601 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Sun, 8 Dec 2024 06:11:38 +0000 Subject: [PATCH 32/37] Refactored Mounts --- modules/docker-compose.perception.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modules/docker-compose.perception.yaml b/modules/docker-compose.perception.yaml index 1d03eb54..ef633077 100644 --- a/modules/docker-compose.perception.yaml +++ b/modules/docker-compose.perception.yaml @@ -32,16 +32,11 @@ services: command: /bin/bash -c "ros2 launch camera_object_detection eve.launch.py" volumes: - /mnt/wato-drive2/perception_models/yolov8m.pt:/perception_models/yolov8m.pt - - /mnt/wato-drive2/perception_models/yolov8m.onnx:/perception_models/yolov8m.onnx - - /mnt/wato-drive2/perception_models/yolov8m_dynamic.onnx:/perception_models/ylov8m_dynamic.onnx - - /mnt/wato-drive2/perception_models/yolov8m.engine:/perception_models/yolov8m.engine - - /mnt/wato-drive2/perception_models/yolov8m_copy2.engine:/perception_models/yolov8m_copy2.engine - /mnt/wato-drive2/perception_models/traffic_light.pt:/perception_models/traffic_light.pt - /mnt/wato-drive2/perception_models/traffic_signs_v0.pt:/perception_models/traffic_signs_v1.pt - /mnt/wato-drive2/perception_models/tensorRT.onnx:/perception_models/tensorRT.onnx - /mnt/wato-drive2/perception_models/tensorRT.engine:/perception_models/tensorRT.engine - /mnt/wato-drive2/perception_models/eve.onnx:/perception_models/eve.onnx - - /mnt/wato-drive2/perception_models/:/perception_models lidar_object_detection: build: From 5cea90fd86b38cad9897d1753eebb04417fe015e Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Sun, 8 Dec 2024 15:41:48 +0000 Subject: [PATCH 33/37] Getting all detections --- .../yolov8_detection.py | 216 ++---------------- 1 file changed, 16 insertions(+), 200 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 79edc685..e7a8af22 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -176,8 +176,6 @@ def __init__(self): assert status.value == 0, "IS NOT ZERO" #self.build_engine() - # self.initialize_engine(self.tensorRT_model_path, 3,3,1024,1024) - # self.input_info, self.output_info = self.initialize_tensors() self.last_publish_time = self.get_clock().now() self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) @@ -329,21 +327,12 @@ def tensorRT_inferencing(self, batch_array): assert batch_array.ndim == 4 batch_size = batch_array.shape[0] assert batch_size == self.input_info[0].shape[0] - #self.get_logger().info(f"Batch Array Stats: shape={batch_array.shape}, dtype={batch_array.dtype}, range=({batch_array.min()}, {batch_array.max()})") - #self.inputTensors = [batch_array] self.contiguous_inputs = [np.ascontiguousarray(i) for i in batch_array] - #self.get_logger().info(f"Contiguous Inputs: dtype={self.contiguous_inputs[0].dtype}, shape={self.contiguous_inputs[0].shape}") - #for i, input_tensor in enumerate(self.contiguous_inputs): - #self.get_logger().info(f"Input {i}: shape={input_tensor.shape}, dtype={input_tensor.dtype}, range=({input_tensor.min()}, {input_tensor.max()})") - #for i, input_gpu in enumerate(self.input_info): - #self.get_logger().info(f"Input {i} GPU address: {input_gpu.gpu}") - for i in range(self.num_inputs): name = self.input_info[i].name cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data, self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) self.execution_context.set_tensor_address(name, self.input_info[i].gpu) - self.output_gpu_ptrs:List[int] = [] self.outputs_ndarray:List[ndarray] = [] for i in range(self.num_outputs): @@ -353,8 +342,6 @@ def tensorRT_inferencing(self, batch_array): self.outputs_ndarray.append(cpu) self.output_gpu_ptrs.append(gpu) self.execution_context.set_tensor_address(self.output_info[i].name, self.output_info[i].gpu) - - self.execution_context.execute_async_v3(self.stream) cudart.cudaStreamSynchronize(self.stream) for i, o in enumerate(self.output_gpu_ptrs): @@ -362,7 +349,6 @@ def tensorRT_inferencing(self, batch_array): self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream ) - return tuple(self.outputs_ndarray) if len(self.outputs_ndarray) > 1 else self.outputs_ndarray[0] @@ -388,69 +374,39 @@ def batch_inference_callback(self,msg1): batched_images = np.stack(batched_list, axis=0) self.initialize_engine(self.tensorRT_model_path, 1,3,640,640) self.input_info, self.output_info = self.initialize_tensors() - detections = self.tensorRT_inferencing(batched_images) - - """ - - Detection[0] represents the first output name, 'outputs' with shape(6,84,8400) -> 6 representing 6 images - - Detection[0][0] represents the first image can range from 0 to 5, and the shape would be (84,8400) -> 84 representing 84 detection components for bounding boxes, classes and confidence scores - - Detection[0][0][0] represnets the first detection components ranging from 0 to 83 - - 8400 represents 8400 anchors - - """ - # self.get_logger().info(f"Confidence Scores{detections[0][0][4][:10]}") - #self.get_logger().info(f"Classes{detections[0][0][5][:5]}") - # decoded_results = self.postprocess_detections_nuscenes(detections) detection_tensor = detections[0] # Assuming the first tensor corresponds to detections - self.get_logger().info(f"Detection Tensor Length:{len(detection_tensor.shape)}") - self.get_logger().info(f"Detection Tensor Length of other tensors:{len(detections[1].shape)}") batch_size = detection_tensor.shape[0] # Number of images in the batch - num_anchors = detection_tensor.shape[2] # Number of anchors (e.g., 8400) - # self.get_logger().info(f"detections shape:{detections[1].shape}") #80x80 - # self.get_logger().info(f"detections shape2:{detections[2][0][1][0].shape}") #40x40 - + num_anchors = detection_tensor.shape[2] # Number of anchors (e.g., 8400) decoded_results = [] # A list to hold decoded detections for each image for batch_idx in range(batch_size): image_detections = detection_tensor[batch_idx] # Detections for the current image valid_detections = [] # Store valid detections for this image for anchor_idx in range(num_anchors): # Loop through all anchors - detection = image_detections[:, anchor_idx] # Get data for one anchor - confidence = detection[4] # Extract confidence score - - # Only process detections with confidence above the threshold - if confidence > 0.1: - x_center, y_center, width, height = detection[:4] # Bounding box info - class_probs = detection[5:13] # Class probabilities - predicted_class = np.argmax(class_probs) # Most likely class - predicted_prob = class_probs[predicted_class] - - # Convert to (x_min, y_min, x_max, y_max) - x_min = x_center - width / 2 - y_min = y_center - height / 2 - x_max = x_center + width / 2 - y_max = y_center + height / 2 - valid_detections.append([ - x_min, y_min, x_max, y_max, confidence, predicted_class + detection = image_detections[:, anchor_idx] # Get data for one anchor + x_center, y_center, width, height = detection[:4] # Bounding box info + class_probs = detection[4:] # Class probabilities + predicted_class = np.argmax(class_probs) # Most likely class + confidence = class_probs[predicted_class] + x_min = x_center - width / 2 + y_min = y_center - height / 2 + x_max = x_center + width / 2 + y_max = y_center + height / 2 + valid_detections.append([ + x_min, y_min, x_max, y_max, confidence, predicted_class ]) valid_detections = torch.tensor(valid_detections) if valid_detections.shape[0] == 0: - decoded_results.append([]) # No detections for this image + decoded_results.append([]) continue - - # Apply Non-Maximum Suppression (NMS) nms_results = non_max_suppression( valid_detections.unsqueeze(0), # Add batch dimension for NMS - conf_thres=0.2, # Confidence threshold + conf_thres=0.45, # Confidence threshold iou_thres=0.45 # IoU threshold ) - final_detections = [] - for img_idx, nms_result in enumerate(nms_results): # Loop through each image's NMS results - if nms_result is None or len(nms_result) == 0: - print(f"No detections for image {img_idx + 1}.") - decoded_results.append([]) # No detections for this image - continue - + + for img_idx, nms_result in enumerate(nms_results): final_detections = [] for det in nms_result: # Loop through detections in this image x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy() @@ -459,85 +415,8 @@ def batch_inference_callback(self,msg1): "confidence": confidence, "class": int(predicted_class), }) - - # Add detections for this image to the results decoded_results.append(final_detections) self.publish_batch_nuscenes(image_list, decoded_results) - - - - def postprocess_detections_nuscenes(self, detections, class_labels = None ): - detection_tensor = detections[0] # Assuming the first tensor corresponds to detections - batch_size = detection_tensor.shape[0] # Number of images in the batch - num_anchors = detection_tensor.shape[2] # Number of anchors (e.g., 8400) - decoded_results = [] # A list to hold decoded detections for each image - for batch_idx in range(batch_size): - image_detections = detection_tensor[batch_idx] # Detections for the current image - valid_detections = [] # Store valid detections for this image - for anchor_idx in range(num_anchors): # Loop through all anchors - detection = image_detections[:, anchor_idx] # Get data for one anchor - confidence = detection[4] # Extract confidence score - - # Only process detections with confidence above the threshold - if confidence > 0.5: - x_center, y_center, width, height = detection[:4] # Bounding box info - class_probs = detection[5:13] # Class probabilities - predicted_class = np.argmax(class_probs) # Most likely class - predicted_prob = class_probs[predicted_class] - - # Convert to (x_min, y_min, x_max, y_max) - x_min = x_center - width / 2 - y_min = y_center - height / 2 - x_max = x_center + width / 2 - y_max = y_center + height / 2 - valid_detections.append([ - x_min, y_min, x_max, y_max, confidence, predicted_class - ]) - valid_detections = torch.tensor(valid_detections) - if valid_detections.shape[0] == 0: - decoded_results.append([]) # No detections for this image - continue - - # Apply Non-Maximum Suppression (NMS) - nms_results = non_max_suppression( - valid_detections.unsqueeze(0), # Add batch dimension for NMS - conf_thres=0.5, # Confidence threshold - iou_thres=0.45 # IoU threshold - ) - - final_detections = [] - for det in nms_results[0]: # Assuming only one set of results - x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy() - class_name = class_labels.get(int(predicted_class), "unknown") if class_labels else str(int(predicted_class)) - final_detections.append({ - "bbox": [x_min, y_min, x_max, y_max], - "confidence": confidence, - "class": int(predicted_class), - "class_name": class_name - }) - - decoded_results.append(final_detections) - # for batch_idx, batch_detections in enumerate(decoded_results): - # if len(batch_detections) == 0: - # self.get_logger().info(f"Batch {batch_idx}: No detections") - # continue - - # for anchor_idx, detection in enumerate(batch_detections): - # # Extract values from the dictionary - # bbox = detection["bbox"] - # x_min, y_min, x_max, y_max = bbox - # confidence = detection["confidence"] - # predicted_class = detection["class"] - # class_name = detection["class_name"] - - # Log detection details - # self.get_logger().info( - # f"Batch {batch_idx}, Anchor {anchor_idx}: " - # f"BBox [{x_min:.2f}, {y_min:.2f}, {x_max:.2f}, {y_max:.2f}], " - # f"Class {class_name} ({predicted_class}), Confidence {confidence:.2f}" - # ) - - return decoded_results def publish_batch_nuscenes(self, image_list, decoded_results): batch_msg = BatchDetection() batch_msg.header.stamp = self.get_clock().now().to_msg() @@ -590,69 +469,6 @@ def publish_batch_nuscenes(self, image_list, decoded_results): # Publish batch detection message self.batched_camera_message_publisher.publish(batch_msg) - - - - def publish_batch(self, image_list, results_dict): - current_time = self.get_clock().now() - if (current_time - self.last_publish_time).nanoseconds < 1e9 / 2: # Publish at 2 Hz (0.5 seconds) - return - self.last_publish_time = current_time - batch_msg = BatchDetection() - batch_msg.header.stamp = self.get_clock().now().to_msg() - batch_msg.header.frame_id = "batch" - - for idx, img_msg in enumerate(image_list): - numpy_image = np.frombuffer(img_msg.data, np.uint8) - image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR) - height, width = image.shape[:2] - detection_array = Detection2DArray() - detection_array = Detection2DArray() - detection_array.header.stamp = batch_msg.header.stamp - detection_array.header.frame_id = f"camera_{idx}" - detections = results_dict[idx] - for bbox in results_dict[idx]: - x_min, y_min, x_max, y_max , confidence, predicted_class = bbox - x_min = int(x_min * width / 640) - x_max = int(x_max * width / 640) - y_min = int(y_min * height / 640) - y_max = int(y_max * height / 640) - - label = f"Class: {predicted_class}, Conf: {confidence:.2f}" - cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) - cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) - detection = Detection2D() - detection.bbox.center.position.x = (x_min + x_max) / 2 - detection.bbox.center.position.y = (y_min + y_max) / 2 - detection.bbox.size_x = float(x_max - x_min) - detection.bbox.size_y = float(y_max - y_min) - - detected_object = ObjectHypothesisWithPose() - detected_object.hypothesis.class_id = str(int(predicted_class)) - detected_object.hypothesis.score = float(confidence) - detection.results.append(detected_object) - detection_array.detections.append(detection) - - # Add detection array to batch message - batch_msg.detections.append(detection_array) - - # Publish visualized image - vis_compressed_image = CompressedImage() - vis_compressed_image.header.stamp = self.get_clock().now().to_msg() - vis_compressed_image.header.frame_id = f"camera_{idx}" - vis_compressed_image.format = "jpeg" - vis_compressed_image.data = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes() - self.batch_vis_publisher.publish(vis_compressed_image) - - # Publish Detection2DArray - self.batch_detection_publisher.publish(detection_array) - - # Publish batch detection message - self.batched_camera_message_publisher.publish(batch_msg) - - #Converting to tensor for batching - - def batch_convert_to_tensor(self, cv_image): img = cv_image.transpose(2,0,1) img = torch.from_numpy(img).to(self.device) From 23bde0da7185260aa75ccf803076f1c076d50823 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 10 Dec 2024 23:25:13 +0000 Subject: [PATCH 34/37] Getting all detections from all angles nuscenes --- .../yolov8_detection.py | 128 +++++++++++------- 1 file changed, 82 insertions(+), 46 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index e7a8af22..070e3eb2 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -145,8 +145,8 @@ def __init__(self): self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic) self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5) - #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription, self.front_left_camera_subscription], queue_size=10, slop=0.5) - self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3) + self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription, self.front_left_camera_subscription], queue_size=10, slop=0.1) + #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3) self.ats.registerCallback(self.batch_inference_callback) # #Subscription for Eve cameras @@ -176,6 +176,7 @@ def __init__(self): assert status.value == 0, "IS NOT ZERO" #self.build_engine() + self.initialize_engine(self.tensorRT_model_path, 3,3,640,640) self.last_publish_time = self.get_clock().now() self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) @@ -185,8 +186,18 @@ def __init__(self): # setup vis publishers #Batch vis publishers self.batched_camera_message_publisher = self.create_publisher(BatchDetection,self.batch_inference_topic, 10) - self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10) - self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10) + self.num_cameras = 3 # Adjust this based on the number of cameras + self.batch_vis_publishers = [ + self.create_publisher(CompressedImage, f"/camera_{i}/vis", 10) + for i in range(self.num_cameras) + ] + self.batch_detection_publishers = [ + self.create_publisher(Detection2DArray, f"/camera_{i}/detections", 10) + for i in range(self.num_cameras) + ] + + # self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10) + # self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10) #vis publishers self.vis_publisher = self.create_publisher(Image, self.publish_vis_topic, 10) @@ -228,10 +239,9 @@ def build_engine(self): self.get_logger().info(f"Model {output.name} shape: {output.shape} {output.dtype}") if self.max_batch_size > 1: self.profile = self.builder.create_optimization_profile() - self.min_shape = [1, self.channels, self.height, self.width] # Minimum batch size of 1 - self.opt_shape = [int(self.max_batch_size / 2), self.channels, self.height, self.width] # Optimal batch size (half of max, so 3) - self.max_shape = self.shape_input_model - + self.min_shape = [1, 3, 640, 640] + self.opt_shape = [3, 3, 640, 640] + self.max_shape = [6, 3, 640, 640] for input in self.inputs: self.profile.set_shape(input.name, self.min_shape, self.opt_shape, self.max_shape) self.config.add_optimization_profile(self.profile) @@ -327,35 +337,59 @@ def tensorRT_inferencing(self, batch_array): assert batch_array.ndim == 4 batch_size = batch_array.shape[0] assert batch_size == self.input_info[0].shape[0] - self.contiguous_inputs = [np.ascontiguousarray(i) for i in batch_array] + + # Handle input + self.contiguous_inputs = [np.ascontiguousarray(batch_array)] for i in range(self.num_inputs): - name = self.input_info[i].name - cudart.cudaMemcpyAsync(self.input_info[i].gpu, self.contiguous_inputs[i].ctypes.data, - self.contiguous_inputs[i].nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, self.stream) - self.execution_context.set_tensor_address(name, self.input_info[i].gpu) - self.output_gpu_ptrs:List[int] = [] - self.outputs_ndarray:List[ndarray] = [] + name = self.input_info[i].name + cudart.cudaMemcpyAsync( + self.input_info[i].gpu, + self.contiguous_inputs[i].ctypes.data, + self.contiguous_inputs[i].nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, + self.stream + ) + self.execution_context.set_tensor_address(name, self.input_info[i].gpu) + + # Handle dynamic outputs + self.output_gpu_ptrs = [] + self.outputs_ndarray = [] for i in range(self.num_outputs): - j = i + self.num_inputs - cpu = self.output_info[i].cpu - gpu = self.output_info[i].gpu - self.outputs_ndarray.append(cpu) - self.output_gpu_ptrs.append(gpu) - self.execution_context.set_tensor_address(self.output_info[i].name, self.output_info[i].gpu) - self.execution_context.execute_async_v3(self.stream) + output_shape = self.execution_context.get_tensor_shape(self.output_info[i].name) + # Reallocate output buffer if shape changed + if self.output_info[i].cpu.shape != tuple(output_shape): + self.output_info[i].cpu = np.empty(output_shape, dtype=self.output_info[i].cpu.dtype) + + self.outputs_ndarray.append(self.output_info[i].cpu) + self.output_gpu_ptrs.append(self.output_info[i].gpu) + self.execution_context.set_tensor_address( + self.output_info[i].name, + self.output_info[i].gpu + ) + + # Execute inference + status = self.execution_context.execute_async_v3(self.stream) + assert status, "Inference execution failed" + + # Synchronize and copy results cudart.cudaStreamSynchronize(self.stream) - for i, o in enumerate(self.output_gpu_ptrs): - cudart.cudaMemcpyAsync( - self.outputs_ndarray[i].ctypes.data, o, self.outputs_ndarray[i].nbytes, - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, self.stream - ) + for i, gpu_ptr in enumerate(self.output_gpu_ptrs): + cudart.cudaMemcpyAsync( + self.outputs_ndarray[i].ctypes.data, + gpu_ptr, + self.outputs_ndarray[i].nbytes, + cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, + self.stream + ) + cudart.cudaStreamSynchronize(self.stream) + return tuple(self.outputs_ndarray) if len(self.outputs_ndarray) > 1 else self.outputs_ndarray[0] # will be called with nuscenes rosbag - def batch_inference_callback(self,msg1): + def batch_inference_callback(self,msg1,msg2,msg3): #Taking msgs from all 6 ros2 subscribers - image_list = [msg1] + image_list = [msg1,msg2,msg3] batched_list = [] #Preprocessing for msg in image_list: @@ -372,7 +406,8 @@ def batch_inference_callback(self,msg1): batched_list.append(float_image) # batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) - self.initialize_engine(self.tensorRT_model_path, 1,3,640,640) + self.get_logger().info(f"batched image shape:{batched_images.shape}") + # self.initialize_engine(self.tensorRT_model_path, 3,3,640,640) self.input_info, self.output_info = self.initialize_tensors() detections = self.tensorRT_inferencing(batched_images) detection_tensor = detections[0] # Assuming the first tensor corresponds to detections @@ -392,30 +427,29 @@ def batch_inference_callback(self,msg1): y_min = y_center - height / 2 x_max = x_center + width / 2 y_max = y_center + height / 2 + #if confidence > 0.5: + #self.get_logger().info(f"Camera {batch_idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}") valid_detections.append([ x_min, y_min, x_max, y_max, confidence, predicted_class ]) valid_detections = torch.tensor(valid_detections) - if valid_detections.shape[0] == 0: - decoded_results.append([]) - continue + #self.get_logger().info(f"Pre-NMS Batch {batch_idx}: Valid detections shape {valid_detections.shape}") nms_results = non_max_suppression( valid_detections.unsqueeze(0), # Add batch dimension for NMS - conf_thres=0.45, # Confidence threshold + conf_thres=0.5, # Confidence threshold iou_thres=0.45 # IoU threshold ) - - - for img_idx, nms_result in enumerate(nms_results): - final_detections = [] - for det in nms_result: # Loop through detections in this image + final_detections = [] + for det in nms_results[0]: # Loop through detections in this image x_min, y_min, x_max, y_max, confidence, predicted_class = det.cpu().numpy() final_detections.append({ "bbox": [x_min, y_min, x_max, y_max], "confidence": confidence, "class": int(predicted_class), }) - decoded_results.append(final_detections) + #self.get_logger().info(f"Post-NMS Batch {batch_idx}: {len(nms_results[0])} detections") + decoded_results.append(final_detections) + #self.get_logger().info(f"Length decoded results:{len(decoded_results[1])}") self.publish_batch_nuscenes(image_list, decoded_results) def publish_batch_nuscenes(self, image_list, decoded_results): batch_msg = BatchDetection() @@ -423,6 +457,7 @@ def publish_batch_nuscenes(self, image_list, decoded_results): batch_msg.header.frame_id = "batch" for idx, img_msg in enumerate(image_list): + #self.get_logger().info(f"{idx}") numpy_image = np.frombuffer(img_msg.data, np.uint8) image = cv2.imdecode(numpy_image, cv2.IMREAD_COLOR) height, width = image.shape[:2] @@ -430,8 +465,8 @@ def publish_batch_nuscenes(self, image_list, decoded_results): detection_array = Detection2DArray() detection_array.header.stamp = batch_msg.header.stamp detection_array.header.frame_id = f"camera_{idx}" - for batch_idx, batch_detections in enumerate(decoded_results): - for anchor_idx, detection in enumerate(batch_detections): + batch_detections = decoded_results[idx] + for anchor_idx, detection in enumerate(batch_detections): # Extract values from the dictionary bbox = detection["bbox"] x_min, y_min, x_max, y_max = bbox @@ -441,6 +476,7 @@ def publish_batch_nuscenes(self, image_list, decoded_results): x_max = int(x_max * width / 640) y_min = int(y_min * height / 640) y_max = int(y_max * height / 640) + #self.get_logger().info(f"Camera {idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}") label = f"Class: {predicted_class}, Conf: {confidence:.2f}" annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0)) detection = Detection2D() @@ -454,17 +490,17 @@ def publish_batch_nuscenes(self, image_list, decoded_results): detected_object.hypothesis.score = float(confidence) detection.results.append(detected_object) detection_array.detections.append(detection) - batch_msg.detections.append(detection_array) + batch_msg.detections.append(detection_array) annotated_image = annotator.result() vis_compressed_image = CompressedImage() vis_compressed_image.header.stamp = self.get_clock().now().to_msg() vis_compressed_image.header.frame_id = f"camera_{idx}" vis_compressed_image.format = "jpeg" vis_compressed_image.data = cv2.imencode('.jpg', annotated_image, [cv2.IMWRITE_JPEG_QUALITY, 70])[1].tobytes() - self.batch_vis_publisher.publish(vis_compressed_image) + self.batch_vis_publishers[idx].publish(vis_compressed_image) - # Publish Detection2DArray - self.batch_detection_publisher.publish(detection_array) + # Publish Detection2DArray + self.batch_detection_publishers[idx].publish(detection_array) # Publish batch detection message self.batched_camera_message_publisher.publish(batch_msg) From bdf39a4154bff6a8490353846ebd4734e8a4c5b0 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Thu, 12 Dec 2024 19:54:03 +0000 Subject: [PATCH 35/37] Batching complete, eve test remaining --- .../yolov8_detection.py | 191 +++++++++++++----- .../CMakeLists.txt | 1 + .../msg/EveBatchDetection.msg | 3 + 3 files changed, 143 insertions(+), 52 deletions(-) create mode 100644 src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 070e3eb2..79ad5be6 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -2,7 +2,7 @@ from rclpy.node import Node import os from message_filters import Subscriber, TimeSynchronizer, ApproximateTimeSynchronizer -from camera_object_detection_msgs.msg import BatchDetection +from camera_object_detection_msgs.msg import BatchDetection, EveBatchDetection from sensor_msgs.msg import Image, CompressedImage from vision_msgs.msg import ( ObjectHypothesisWithPose, @@ -81,11 +81,13 @@ def __init__(self): #Declare batch topic self.declare_parameter("batch_inference_topic", "/batched_camera_message") + self.declare_parameter("eve_batch_inference_topic", "/eve_batched_camera_message") #Batch inference topic self.batch_inference_topic = self.get_parameter("batch_inference_topic").value + self.eve_batch_inference_topic = self.get_parameter("eve_batch_inference_topic").value self.batch_publish_detection_topic = self.get_parameter("batch_publish_detection_topic").value self.batch_publish_vis_topic = self.get_parameter("batch_publish_vis_topic").value self.onnx_model_path = self.get_parameter("onnx_model_path").value @@ -145,24 +147,24 @@ def __init__(self): self.back_right_camera_subscription = Subscriber(self, CompressedImage, self.back_right_camera_topic) self.back_left_camera_subscription = Subscriber(self, CompressedImage, self.back_left_camera_topic) #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.back_center_camera_subscription, self.front_right_camera_subscription, self.back_right_camera_subscription, self.front_left_camera_subscription, self.back_left_camera_subscription], queue_size=10, slop=0.5) - self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription, self.front_right_camera_subscription, self.front_left_camera_subscription], queue_size=10, slop=0.1) + self.ats = ApproximateTimeSynchronizer([self.front_left_camera_subscription,self.front_center_camera_subscription, self.front_right_camera_subscription ], queue_size=10, slop=0.1) #self.ats = ApproximateTimeSynchronizer([self.front_center_camera_subscription], queue_size=10, slop=0.3) self.ats.registerCallback(self.batch_inference_callback) - # #Subscription for Eve cameras - # self.right_camera_subscription = Subscriber(self, Image, self.camera_topic) - # self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic) - # self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic) - # self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1) + #Subscription for Eve cameras + self.right_camera_subscription = Subscriber(self, Image, self.camera_topic) + self.left_camera_subscription = Subscriber(self, Image, self.left_camera_topic) + self.center_camera_subscription = Subscriber(self, Image, self.center_camera_topic) + self.eve_ats = ApproximateTimeSynchronizer([self.right_camera_subscription, self.left_camera_subscription,self.center_camera_subscription], queue_size=10, slop=0.1) # #Create this callback function tomorrow - # self.eve_ats.registerCallback(self.eve_batch_inference_callback) + self.eve_ats.registerCallback(self.eve_batch_inference_callback) self.get_logger().info(f"TENSORT VERSION:{trt.__version__}") self.orig_image_width = None self.orig_image_height = None - # set devicepublish_visDete + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): self.get_logger().info("Using GPU for inference") @@ -171,31 +173,46 @@ def __init__(self): # CV bridge self.cv_bridge = CvBridge() - + + status, self.stream = cudart.cudaStreamCreate() assert status.value == 0, "IS NOT ZERO" #self.build_engine() - self.initialize_engine(self.tensorRT_model_path, 3,3,640,640) self.last_publish_time = self.get_clock().now() self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names self.stride = int(self.model.stride) - # setup vis publishers #Batch vis publishers self.batched_camera_message_publisher = self.create_publisher(BatchDetection,self.batch_inference_topic, 10) + self.eve_batched_camera_message_publisher = self.create_publisher(EveBatchDetection, self.eve_batch_inference_topic, 10) self.num_cameras = 3 # Adjust this based on the number of cameras + + #Nuscenes Publishers + self.nuscenes_camera_names = ["/CAM_FRONT_LEFT", "/CAM_FRONT", "/CAM_FRONT_RIGHT"] self.batch_vis_publishers = [ - self.create_publisher(CompressedImage, f"/camera_{i}/vis", 10) - for i in range(self.num_cameras) + self.create_publisher(CompressedImage, f"{nuscenes}/viz", 10) + for nuscenes in self.nuscenes_camera_names ] self.batch_detection_publishers = [ - self.create_publisher(Detection2DArray, f"/camera_{i}/detections", 10) - for i in range(self.num_cameras) + self.create_publisher(Detection2DArray, f"{nuscenes}/detections", 10) + for nuscenes in self.nuscenes_camera_names + ] + + #Eve Publishers + self.eve_camera_names = [self.left_camera_topic, self.center_camera_topic, self.camera_topic] + self.eve_batch_vis_publishers = [ + self.create_publisher(Image, f"{eve}/eve_viz", 10) + for eve in self.eve_camera_names + ] + self.eve_batch_detection_publishers = [ + self.create_publisher(Detection2DArray, f"{eve}/eve_detections", 10) + for eve in self.eve_camera_names ] + # self.batch_detection_publisher = self.create_publisher(Detection2DArray, self.batch_publish_detection_topic, 10) # self.batch_vis_publisher = self.create_publisher(CompressedImage, self.batch_publish_vis_topic, 10) @@ -264,7 +281,6 @@ def initialize_engine(self, weight, batch_size, rgb, width, height): trt.init_libnvinfer_plugins(self.logger, namespace ='') with trt.Runtime(self.logger) as runtime: self.tensorRT_model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) - #self.get_logger().info(f"TENSORRT Model:{self.weight}") self.execution_context = self.tensorRT_model.create_execution_context() @@ -273,7 +289,6 @@ def initialize_engine(self, weight, batch_size, rgb, width, height): self.execution_context.set_input_shape(self.input_tensor_name, (batch_size, rgb, width, height)) self.inputShape = self.execution_context.get_tensor_shape(self.input_tensor_name) - #self.get_logger().info(f"tensor Shape:{self.inputShape}") self.names = [self.tensorRT_model.get_tensor_name(i) for i in range(self.num_io_tensors)] self.num_io_tensors = self.tensorRT_model.num_io_tensors self.bindings = [0] * self.num_io_tensors @@ -292,10 +307,7 @@ def initialize_engine(self, weight, batch_size, rgb, width, height): self.num_inputs = len(self.input_names) self.num_outputs = len(self.output_names) self.input_names = self.names[:self.num_inputs] - #self.output_names = self.names[:self.num_outputs] #This includes the input name in output self.output_names = [name for name in self.output_names if name not in self.input_names] # This line removes it - #self.get_logger().info(f"INPUT NAMES:: {self.input_names}") - #self.get_logger().info(f"OUTPUT NAMES:: {self.output_names}") def initialize_tensors(self): @@ -305,11 +317,8 @@ def initialize_tensors(self): self.output_ptrs = [] - for name in self.output_names: - #self.get_logger().info(f"NAMES of outputs:{name}") - + for name in self.output_names: self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name) - #self.get_logger().info(f"Tensor shape:{self.tensorRT_output_shape}") self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.output_cpu = np.empty(self.tensorRT_output_shape, dtype = self.outputDtype) status, self.output_gpu = cudart.cudaMallocAsync(self.output_cpu.nbytes, self.stream) @@ -322,7 +331,6 @@ def initialize_tensors(self): for i, name in enumerate(self.input_names): if self.tensorRT_model.get_tensor_name(i) == name: self.tensorRT_input_shape = tuple(self.inputShape) - #self.get_logger().info(f"Final Input Shape:{self.tensorRT_input_shape}") self.dtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) self.input_cpu = np.empty(self.tensorRT_input_shape, self.dtype) status, self.input_gpu = cudart.cudaMallocAsync(self.input_cpu.nbytes, self.stream) @@ -338,7 +346,7 @@ def tensorRT_inferencing(self, batch_array): batch_size = batch_array.shape[0] assert batch_size == self.input_info[0].shape[0] - # Handle input + self.contiguous_inputs = [np.ascontiguousarray(batch_array)] for i in range(self.num_inputs): name = self.input_info[i].name @@ -351,7 +359,7 @@ def tensorRT_inferencing(self, batch_array): ) self.execution_context.set_tensor_address(name, self.input_info[i].gpu) - # Handle dynamic outputs + self.output_gpu_ptrs = [] self.outputs_ndarray = [] for i in range(self.num_outputs): @@ -396,7 +404,6 @@ def batch_inference_callback(self,msg1,msg2,msg3): numpy_array = np.frombuffer(msg.data, np.uint8) compressedImage = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) original_height, original_width = compressedImage.shape[:2] - #self.get_logger().info(f"original Height:{original_height} original width: {original_width}") resized_compressedImage = cv2.resize(compressedImage,(640, 640)) rgb_image = cv2.cvtColor(resized_compressedImage, cv2.COLOR_BGR2RGB) normalized_image = rgb_image / 255.0 @@ -406,34 +413,35 @@ def batch_inference_callback(self,msg1,msg2,msg3): batched_list.append(float_image) # batched_list = [tensor.cpu().numpy() for tensor in batched_list] batched_images = np.stack(batched_list, axis=0) - self.get_logger().info(f"batched image shape:{batched_images.shape}") - # self.initialize_engine(self.tensorRT_model_path, 3,3,640,640) + batch = batched_images.shape[0] + self.initialize_engine(self.tensorRT_model_path, batch,3,640,640) self.input_info, self.output_info = self.initialize_tensors() detections = self.tensorRT_inferencing(batched_images) - detection_tensor = detections[0] # Assuming the first tensor corresponds to detections - batch_size = detection_tensor.shape[0] # Number of images in the batch - num_anchors = detection_tensor.shape[2] # Number of anchors (e.g., 8400) - decoded_results = [] # A list to hold decoded detections for each image + decoded_results = self.parse_detections(detections) + self.publish_batch_nuscenes(image_list, decoded_results) + def parse_detections(self, detections): + detection_tensor = detections[0] + batch_size = detection_tensor.shape[0] + num_anchors = detection_tensor.shape[2] + decoded_results = [] for batch_idx in range(batch_size): - image_detections = detection_tensor[batch_idx] # Detections for the current image - valid_detections = [] # Store valid detections for this image - for anchor_idx in range(num_anchors): # Loop through all anchors - detection = image_detections[:, anchor_idx] # Get data for one anchor - x_center, y_center, width, height = detection[:4] # Bounding box info - class_probs = detection[4:] # Class probabilities - predicted_class = np.argmax(class_probs) # Most likely class + image_detections = detection_tensor[batch_idx] + valid_detections = [] + for anchor_idx in range(num_anchors): + detection = image_detections[:, anchor_idx] + x_center, y_center, width, height = detection[:4] + class_probs = detection[4:] + predicted_class = np.argmax(class_probs) confidence = class_probs[predicted_class] x_min = x_center - width / 2 y_min = y_center - height / 2 x_max = x_center + width / 2 y_max = y_center + height / 2 - #if confidence > 0.5: - #self.get_logger().info(f"Camera {batch_idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}") + valid_detections.append([ x_min, y_min, x_max, y_max, confidence, predicted_class ]) valid_detections = torch.tensor(valid_detections) - #self.get_logger().info(f"Pre-NMS Batch {batch_idx}: Valid detections shape {valid_detections.shape}") nms_results = non_max_suppression( valid_detections.unsqueeze(0), # Add batch dimension for NMS conf_thres=0.5, # Confidence threshold @@ -449,8 +457,9 @@ def batch_inference_callback(self,msg1,msg2,msg3): }) #self.get_logger().info(f"Post-NMS Batch {batch_idx}: {len(nms_results[0])} detections") decoded_results.append(final_detections) + return decoded_results #self.get_logger().info(f"Length decoded results:{len(decoded_results[1])}") - self.publish_batch_nuscenes(image_list, decoded_results) + def publish_batch_nuscenes(self, image_list, decoded_results): batch_msg = BatchDetection() batch_msg.header.stamp = self.get_clock().now().to_msg() @@ -504,13 +513,91 @@ def publish_batch_nuscenes(self, image_list, decoded_results): # Publish batch detection message self.batched_camera_message_publisher.publish(batch_msg) + + def eve_batch_inference_callback(self, msg1,msg2,msg3): + image_list = [msg1,msg2,msg3] + batched_list = [] + for msg in image_list: + if self.compressed: + numpy_array = np.frombuffer(msg.data, np.uint8) + cv_image = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) + original_height, original_width = cv_image.shape[:2] + else: + cv_image = self.cv_bridge.imgmsg_to_cv2(msg, desired_encoding="passthrough") + resized_image = cv2.resize(cv_image, (640,640)) # can also be 1024, 1024 + rgb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB) + normalized_image = rgb_image / 255 + chw_image = np.transpose(normalized_image, (2,0,1)) + float_image = chw_image.astype(np.float32) + batched_list.append(float_image) + batched_images = np.stack(batched_list, axis = 0) + self.get_logger().info(f"batched image shape:{batched_images.shape}") + self.initialize_engine(self.tensorRT_model_path, 3,3,640,640) + self.input_info, self.output_info = self.initialize_tensors() + detections = self.tensorRT_inferencing(batched_images) + decoded_results = self.parse_detections(detections) + + + def publish_batch_eve(self,image_list, decoded_results): + batch_msg = BatchDetection() + batch_msg.header.stamp = self.get_clock().now().to_msg() + batch_msg.header.frame_id = "batch" + + for idx, img_msg in enumerate(image_list): + if self.compressed: + numpy_array = np.frombuffer(msg.data, np.uint8) + image = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR) + original_height, original_width = cv_image.shape[:2] + else: + image = self.cv_bridge.imgmsg_to_cv2(msg, desired_encoding="passthrough") + height, width = image.shape[:2] + annotator = Annotator(image, line_width=2, example="Class:0") + detection_array = Detection2DArray() + detection_array.header.stamp = batch_msg.header.stamp + detection_array.header.frame_id = f"camera_{idx}" + batch_detections = decoded_results[idx] + for anchor_idx, detection in enumerate(batch_detections): + # Extract values from the dictionary + bbox = detection["bbox"] + x_min, y_min, x_max, y_max = bbox + confidence = detection["confidence"] + predicted_class = detection["class"] + x_min = int(x_min * width / 640) + x_max = int(x_max * width / 640) + y_min = int(y_min * height / 640) + y_max = int(y_max * height / 640) + #self.get_logger().info(f"Camera {idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}") + label = f"Class: {predicted_class}, Conf: {confidence:.2f}" + annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0)) + detection = Detection2D() + detection.bbox.center.position.x = (x_min + x_max) / 2 + detection.bbox.center.position.y = (y_min + y_max) / 2 + detection.bbox.size_x = float(x_max - x_min) + detection.bbox.size_y = float(y_max - y_min) + + detected_object = ObjectHypothesisWithPose() + detected_object.hypothesis.class_id = str(int(predicted_class)) + detected_object.hypothesis.score = float(confidence) + detection.results.append(detected_object) + detection_array.detections.append(detection) + batch_msg.detections.append(detection_array) + annotated_image = annotator.result() + + + vis_image = Image() + vis_image.header.stamp = self.get_clock().now().to_msg() + vis_image.header.frame_id = f"camera_{idx}" + vis_image.format = "jpeg" + vis_image.data = self.cv_bridge.cv2_to_imgmsg( annotated_image,encoding="bgr8") + self.batch_vis_publishers[idx].publish(vis_image) + + # Publish Detection2DArray + self.batch_detection_publishers[idx].publish(detection_array) + + # Publish batch detection message + self.batched_camera_message_publisher.publish(batch_msg) + - def batch_convert_to_tensor(self, cv_image): - img = cv_image.transpose(2,0,1) - img = torch.from_numpy(img).to(self.device) - img = img.half() if self.half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - return img def crop_image(self, cv_image): if self.crop_mode == "LetterBox": diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt index 21c77460..7a9097e0 100644 --- a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/CMakeLists.txt @@ -16,6 +16,7 @@ find_package(vision_msgs REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "msg/BatchDetection.msg" + "msg/EveBatchDetection.msg" DEPENDENCIES std_msgs sensor_msgs vision_msgs ) diff --git a/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg new file mode 100644 index 00000000..4d8f55e3 --- /dev/null +++ b/src/wato_msgs/perception_msgs/camera_object_detection_msgs/msg/EveBatchDetection.msg @@ -0,0 +1,3 @@ +std_msgs/Header header +sensor_msgs/Image[] images # Array of compressed images in the batch +vision_msgs/Detection2DArray[] detections # Array of detections for each image From a81ccbaf4a07afec963842f68b3b2ac1bd29431b Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Tue, 17 Dec 2024 15:40:13 +0000 Subject: [PATCH 36/37] Refactored Changes --- .../yolov8_detection.py | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index 79ad5be6..b5a1e30b 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -204,11 +204,11 @@ def __init__(self): #Eve Publishers self.eve_camera_names = [self.left_camera_topic, self.center_camera_topic, self.camera_topic] self.eve_batch_vis_publishers = [ - self.create_publisher(Image, f"{eve}/eve_viz", 10) + self.create_publisher(Image, f"{eve}/eve_batch_viz", 10) for eve in self.eve_camera_names ] self.eve_batch_detection_publishers = [ - self.create_publisher(Detection2DArray, f"{eve}/eve_detections", 10) + self.create_publisher(Detection2DArray, f"{eve}/eve_batch_detections", 10) for eve in self.eve_camera_names ] @@ -276,6 +276,11 @@ def build_engine(self): self.get_logger().info("FINISHED WRITING ") def initialize_engine(self, weight, batch_size, rgb, width, height): + """ + Initializes engine file requirements + - takes in file path for tensorRT file, batch size, # of rgb channels, width & height + - includes input names, output names, and setting dimensions for model input shape + """ self.weight = Path(weight) if isinstance(weight, str) else weight self.logger = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(self.logger, namespace ='') @@ -311,12 +316,17 @@ def initialize_engine(self, weight, batch_size, rgb, width, height): def initialize_tensors(self): + """ + Initializes GPU from cuda to set up inferencing + - Assigns input names, and shape + - Assigns output names, and shapes + """ self.dynamic = True self.input_info = [] self.output_info = [] self.output_ptrs = [] - + # Initializing output tensors for name in self.output_names: self.tensorRT_output_shape = self.execution_context.get_tensor_shape(name) self.outputDtype = trt.nptype(self.tensorRT_model.get_tensor_dtype(name)) @@ -328,6 +338,7 @@ def initialize_tensors(self): self.output_ptrs.append(self.output_gpu) self.output_info.append(Tensor(name, self.outputDtype, self.tensorRT_output_shape, self.output_cpu, self.output_gpu)) + #Initializes input tensors for i, name in enumerate(self.input_names): if self.tensorRT_model.get_tensor_name(i) == name: self.tensorRT_input_shape = tuple(self.inputShape) @@ -342,11 +353,15 @@ def initialize_tensors(self): return self.input_info, self.output_info def tensorRT_inferencing(self, batch_array): + """ + Inferences through preprocessed batch images and gives data about detections + - Returns a contigious array of shape (3,84,8400) + """ assert batch_array.ndim == 4 batch_size = batch_array.shape[0] assert batch_size == self.input_info[0].shape[0] - + #Intializing memory, and names self.contiguous_inputs = [np.ascontiguousarray(batch_array)] for i in range(self.num_inputs): name = self.input_info[i].name @@ -396,6 +411,11 @@ def tensorRT_inferencing(self, batch_array): # will be called with nuscenes rosbag def batch_inference_callback(self,msg1,msg2,msg3): + """ + Returns final detections array for publishing to foxglove + - Preprocess and batch images + - Call tensorRT and parse through detections and send for visualization + """ #Taking msgs from all 6 ros2 subscribers image_list = [msg1,msg2,msg3] batched_list = [] @@ -409,9 +429,7 @@ def batch_inference_callback(self,msg1,msg2,msg3): normalized_image = rgb_image / 255.0 chw_image = np.transpose(normalized_image, (2,0,1)) float_image = chw_image.astype(np.float32) - # tensor_image = torch.from_numpy(float_image).to('cuda') - batched_list.append(float_image) - # batched_list = [tensor.cpu().numpy() for tensor in batched_list] + batched_list.append(float_image) batched_images = np.stack(batched_list, axis=0) batch = batched_images.shape[0] self.initialize_engine(self.tensorRT_model_path, batch,3,640,640) @@ -419,6 +437,7 @@ def batch_inference_callback(self,msg1,msg2,msg3): detections = self.tensorRT_inferencing(batched_images) decoded_results = self.parse_detections(detections) self.publish_batch_nuscenes(image_list, decoded_results) + def parse_detections(self, detections): detection_tensor = detections[0] batch_size = detection_tensor.shape[0] @@ -455,10 +474,8 @@ def parse_detections(self, detections): "confidence": confidence, "class": int(predicted_class), }) - #self.get_logger().info(f"Post-NMS Batch {batch_idx}: {len(nms_results[0])} detections") decoded_results.append(final_detections) return decoded_results - #self.get_logger().info(f"Length decoded results:{len(decoded_results[1])}") def publish_batch_nuscenes(self, image_list, decoded_results): batch_msg = BatchDetection() @@ -485,7 +502,6 @@ def publish_batch_nuscenes(self, image_list, decoded_results): x_max = int(x_max * width / 640) y_min = int(y_min * height / 640) y_max = int(y_max * height / 640) - #self.get_logger().info(f"Camera {idx}: bbox: {x_min, y_min, x_max, y_max}, conf: {confidence}") label = f"Class: {predicted_class}, Conf: {confidence:.2f}" annotator.box_label((x_min, y_min, x_max, y_max), label, color=(0, 255, 0)) detection = Detection2D() From 8a0a184de1453da6795a1faffa4bc47059dba308 Mon Sep 17 00:00:00 2001 From: Arfan12630 Date: Fri, 20 Dec 2024 04:33:49 +0000 Subject: [PATCH 37/37] Cleaned up code --- .../yolov8_detection.py | 224 +----------------- 1 file changed, 1 insertion(+), 223 deletions(-) diff --git a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py index b5a1e30b..8bd0482b 100755 --- a/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py +++ b/src/perception/camera_object_detection/camera_object_detection/yolov8_detection.py @@ -128,17 +128,6 @@ def __init__(self): self.half = False - - self.subscription = self.create_subscription( - Image if not self.compressed else CompressedImage, - self.camera_topic, - self.image_callback, - qos_profile=QoSProfile( - reliability=QoSReliabilityPolicy.RELIABLE, - history=QoSHistoryPolicy.KEEP_LAST, - depth=10, - ), - ) #Subscription for Nuscenes self.front_center_camera_subscription = Subscriber(self, CompressedImage, self.front_center_camera_topic) self.front_right_camera_subscription = Subscriber(self, CompressedImage, self.front_right_camera_topic) @@ -181,9 +170,7 @@ def __init__(self): #self.build_engine() self.last_publish_time = self.get_clock().now() - self.model = AutoBackend(self.model_path, device=self.device, dnn=False, fp16=False) - self.names = self.model.module.names if hasattr(self.model, "module") else self.model.names - self.stride = int(self.model.stride) + #Batch vis publishers self.batched_camera_message_publisher = self.create_publisher(BatchDetection,self.batch_inference_topic, 10) @@ -598,8 +585,6 @@ def publish_batch_eve(self,image_list, decoded_results): detection_array.detections.append(detection) batch_msg.detections.append(detection_array) annotated_image = annotator.result() - - vis_image = Image() vis_image.header.stamp = self.get_clock().now().to_msg() vis_image.header.frame_id = f"camera_{idx}" @@ -612,213 +597,6 @@ def publish_batch_eve(self,image_list, decoded_results): # Publish batch detection message self.batched_camera_message_publisher.publish(batch_msg) - - - - def crop_image(self, cv_image): - if self.crop_mode == "LetterBox": - img = LetterBox(self.image_size, stride=self.stride)(image=cv_image) - elif self.crop_mode == "CenterCrop": - img = CenterCrop(self.image_size)(cv_image) - else: - raise Exception("Invalid crop mode, please choose either 'LetterBox' or 'CenterCrop'!") - - return img - - def convert_bboxes_to_orig_frame(self, bbox): - """ - Converts bounding box coordinates from the scaled image frame back to the original image frame. - - This function takes into account the original image dimensions and the scaling method used - (either "LetterBox" or "CenterCrop") to accurately map the bounding box coordinates back to - their original positions in the original image. - - Parameters: - bbox (list): A list containing the bounding box coordinates in the format [x1, y1, w1, h1] - in the scaled image frame. - - Returns: - list: A list containing the bounding box coordinates in the format [x1, y1, w1, h1] - in the original image frame. - - """ - width_scale = self.orig_image_width / self.image_size - height_scale = self.orig_image_height / self.image_size - if self.crop_mode == "LetterBox": - translation = (self.image_size - self.orig_image_height / width_scale) / 2 - return [ - bbox[0] * width_scale, - (bbox[1] - translation) * width_scale, - bbox[2] * width_scale, - bbox[3] * width_scale, - ] - elif self.crop_mode == "CenterCrop": - translation = (self.orig_image_width / height_scale - self.image_size) / 2 - return [ - (bbox[0] + translation) * height_scale, - bbox[1] * height_scale, - bbox[2] * height_scale, - bbox[3] * height_scale, - ] - - def crop_and_convert_to_tensor(self, cv_image): - """ - Preprocess the image by resizing, padding and rearranging the dimensions. - - Parameters: - cv_image: A numpy or cv2 image of shape (w,h,3) - - Returns: - torch.Tensor image for model input of shape (1,3,w,h) - """ - img = self.crop_image(cv_image) - - # Convert - img = cv_image.transpose(2, 0, 1) - - # Further conversion - img = torch.from_numpy(img).to(self.device) - img = img.half() if self.half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - img = img.unsqueeze(0) - - return img - - def postprocess_detections(self, detections, annotator): - """ - Post-process draws bouningboxes on camera image. - - Parameters: - detections: A list of dict with the format - { - "label": str, - "bbox": [float], - "conf": float - } - annotator: A ultralytics.yolo.utils.plotting.Annotator for the current image - - Returns: - processed_detections: filtered detections - annotator_img: image with bounding boxes drawn on - """ - processed_detections = detections - - for det in detections: - label = f'{det["label"]} {det["conf"]:.2f}' - x1, y1, w1, h1 = det["bbox"] - xyxy = [x1, y1, x1 + w1, y1 + h1] - annotator.box_label(xyxy, label, color=colors(1, True)) - - annotator_img = annotator.result() - return (processed_detections, annotator_img) - - def publish_vis(self, annotated_img, msg, feed): - # Publish visualizations - imgmsg = self.cv_bridge.cv2_to_imgmsg(annotated_img, "bgr8") - imgmsg.header.stamp = msg.header.stamp - imgmsg.header.frame_id = msg.header.frame_id - self.vis_publisher.publish(imgmsg) - - def publish_detections(self, detections, msg, feed): - # Publish detections to an detectionList message - detection2darray = Detection2DArray() - - # fill header for detection list - detection2darray.header.stamp = msg.header.stamp - detection2darray.header.frame_id = msg.header.frame_id - # populate detection list - if detections is not None and len(detections): - for detection in detections: - detection2d = Detection2D() - detection2d.header.stamp = msg.header.stamp - detection2d.header.frame_id = msg.header.frame_id - detected_object = ObjectHypothesisWithPose() - detected_object.hypothesis.class_id = detection["label"] - detected_object.hypothesis.score = detection["conf"] - detection2d.results.append(detected_object) - detection2d.bbox.center.position.x = detection["bbox"][0] - detection2d.bbox.center.position.y = detection["bbox"][1] - detection2d.bbox.size_x = detection["bbox"][2] - detection2d.bbox.size_y = detection["bbox"][3] - - # append detection to detection list - detection2darray.detections.append(detection2d) - - self.detection_publisher.publish(detection2darray) - - def image_callback(self, msg): - self.get_logger().debug("Received image") - if self.orig_image_width is None: - self.orig_image_width = msg.width - self.orig_image_height = msg.height - - images = [msg] # msg is a single sensor image - startTime = time.time() - for image in images: - - # convert ros Image to cv::Mat - if self.compressed: - np_arr = np.frombuffer(msg.data, np.uint8) - cv_image = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) - else: - try: - cv_image = self.cv_bridge.imgmsg_to_cv2(image, desired_encoding="passthrough") - except CvBridgeError as e: - self.get_logger().error(str(e)) - return - - # preprocess image and run through prediction - img = self.crop_and_convert_to_tensor(cv_image) - pred = self.model(img) - - # nms function used same as yolov8 detect.py - pred = non_max_suppression(pred) #Eliminates overlapping bounding boxes - detections = [] - for i, det in enumerate(pred): # per image - if len(det): - # Write results - for *xyxy, conf, cls in reversed(det): - label = self.names[int(cls)] - - bbox = [ - xyxy[0], - xyxy[1], - xyxy[2] - xyxy[0], - xyxy[3] - xyxy[1], - ] - bbox = [b.item() for b in bbox] - bbox = self.convert_bboxes_to_orig_frame(bbox) - - detections.append( - { - "label": label, - "conf": conf.item(), - "bbox": bbox, - } - ) - self.get_logger().debug(f"{label}: {bbox}") - - annotator = Annotator( - cv_image, - line_width=self.line_thickness, - example=str(self.names), - ) - (detections, annotated_img) = self.postprocess_detections(detections, annotator) - - # Currently we support a single camera so we pass an empty string - feed = "" - self.publish_vis(annotated_img, msg, feed) - self.publish_detections(detections, msg, feed) - - if self.save_detections: - cv2.imwrite(f"detections/{self.counter}.jpg", annotated_img) - self.counter += 1 - - self.get_logger().info( - f"Finished in: {time.time() - startTime}, {1/(time.time() - startTime)} Hz" - ) - - def main(args=None): rclpy.init(args=args)