diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml index 9c3e815e8abb4..7e389f5790051 100644 --- a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml +++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml @@ -5,7 +5,7 @@ - + diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml index f941bc8d771b1..274b39be681b8 100644 --- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml +++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml @@ -2,16 +2,41 @@ - + - + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/launch/tier4_perception_launch/launch/perception.launch.xml b/launch/tier4_perception_launch/launch/perception.launch.xml index e1a2260f3ee6d..3ce78e3b29f29 100644 --- a/launch/tier4_perception_launch/launch/perception.launch.xml +++ b/launch/tier4_perception_launch/launch/perception.launch.xml @@ -47,7 +47,7 @@ - + diff --git a/perception/lidar_transfusion/CMakeLists.txt b/perception/lidar_transfusion/CMakeLists.txt new file mode 100644 index 0000000000000..c0e2d85f27e62 --- /dev/null +++ b/perception/lidar_transfusion/CMakeLists.txt @@ -0,0 +1,156 @@ +cmake_minimum_required(VERSION 3.14) +project(lidar_transfusion) + +find_package(autoware_cmake REQUIRED) +autoware_package() + +add_compile_options(-Wno-deprecated-declarations) + +option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF) + +# set flags for CUDA availability +option(CUDA_AVAIL "CUDA available" OFF) +find_package(CUDA) +if(CUDA_FOUND) + find_library(CUBLAS_LIBRARIES cublas HINTS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib + ) + if(CUDA_VERBOSE) + message("CUDA is available!") + message("CUDA Libs: ${CUDA_LIBRARIES}") + message("CUDA Headers: ${CUDA_INCLUDE_DIRS}") + endif() + # Note: cublas_device was depreciated in CUDA version 9.2 + # https://forums.developer.nvidia.com/t/where-can-i-find-libcublas-device-so-or-libcublas-device-a/67251/4 + # In LibTorch, CUDA_cublas_device_LIBRARY is used. + unset(CUDA_cublas_device_LIBRARY CACHE) + set(CUDA_AVAIL ON) +else() + message("CUDA NOT FOUND") + set(CUDA_AVAIL OFF) +endif() + +# set flags for TensorRT availability +option(TRT_AVAIL "TensorRT available" OFF) +# try to find the tensorRT modules +find_library(NVINFER nvinfer) +find_library(NVONNXPARSER nvonnxparser) +if(NVINFER AND NVONNXPARSER) + if(CUDA_VERBOSE) + message("TensorRT is available!") + message("NVINFER: ${NVINFER}") + message("NVONNXPARSER: ${NVONNXPARSER}") + endif() + set(TRT_AVAIL ON) +else() + message("TensorRT is NOT Available") + set(TRT_AVAIL OFF) +endif() + +# set flags for CUDNN availability +option(CUDNN_AVAIL "CUDNN available" OFF) +# try to find the CUDNN module +find_library(CUDNN_LIBRARY +NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} +PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} +PATH_SUFFIXES lib lib64 bin +DOC "CUDNN library." +) +if(CUDNN_LIBRARY) + if(CUDA_VERBOSE) + message(STATUS "CUDNN is available!") + message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}") + endif() + set(CUDNN_AVAIL ON) +else() + message("CUDNN is NOT Available") + set(CUDNN_AVAIL OFF) +endif() + +if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) + find_package(ament_cmake_auto REQUIRED) + ament_auto_find_build_dependencies() + + include_directories( + include + ${CUDA_INCLUDE_DIRS} + ) + + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-g -G") + set(CUDA_NVCC_FLAGS "-g -G") + endif() + + ament_auto_add_library(transfusion_lib SHARED + lib/detection_class_remapper.cpp + lib/network/network_trt.cpp + lib/postprocess/non_maximum_suppression.cpp + lib/preprocess/voxel_generator.cpp + lib/preprocess/pointcloud_densification.cpp + lib/ros_utils.cpp + lib/transfusion_trt.cpp + ) + + cuda_add_library(transfusion_cuda_lib SHARED + lib/postprocess/circle_nms_kernel.cu + lib/postprocess/postprocess_kernel.cu + lib/preprocess/preprocess_kernel.cu + ) + + target_link_libraries(transfusion_lib + ${NVINFER} + ${NVONNXPARSER} + ${CUDA_LIBRARIES} + ${CUBLAS_LIBRARIES} + ${CUDA_curand_LIBRARY} + ${CUDNN_LIBRARY} + transfusion_cuda_lib + ) + + target_include_directories(transfusion_lib + PUBLIC + $ + $ + ) + + # To suppress unknown-pragmas error. The root-cause is CUB library in CUDA 11.6. + # This issue was fixed by https://github.com/NVIDIA/cub/commit/7d608bf1dc14553e2fb219eabeed80b76621b6fe + target_include_directories(transfusion_lib + SYSTEM PUBLIC + ${CUDA_INCLUDE_DIRS} + ) + + ament_auto_add_library(lidar_transfusion_component SHARED + src/lidar_transfusion_node.cpp + ) + + target_link_libraries(lidar_transfusion_component + transfusion_lib + ) + + rclcpp_components_register_node(lidar_transfusion_component + PLUGIN "lidar_transfusion::LidarTransfusionNode" + EXECUTABLE lidar_transfusion_node + ) + + if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + ament_lint_auto_find_test_dependencies() + endif() + + ament_auto_package( + INSTALL_TO_SHARE + launch + config + ) + +else() + find_package(ament_cmake_auto REQUIRED) + ament_auto_find_build_dependencies() + + ament_auto_package( + INSTALL_TO_SHARE + launch + ) +endif() diff --git a/perception/lidar_transfusion/README.md b/perception/lidar_transfusion/README.md new file mode 100644 index 0000000000000..7974714720c32 --- /dev/null +++ b/perception/lidar_transfusion/README.md @@ -0,0 +1,113 @@ +# lidar_transfusion + +## Purpose + +The `lidar_transfusion` package is used for 3D object detection based on lidar data (x, y, z, intensity). + +## Inner-workings / Algorithms + +The implementation bases on TransFusion [1] work. It uses TensorRT library for data process and network inference. + +We trained the models using . + +## Inputs / Outputs + +### Input + +| Name | Type | Description | +| -------------------- | ------------------------------- | ----------------- | +| `~/input/pointcloud` | `sensor_msgs::msg::PointCloud2` | Input pointcloud. | + +### Output + +| Name | Type | Description | +| -------------------------------------- | ------------------------------------------------ | --------------------------- | +| `~/output/objects` | `autoware_perception_msgs::msg::DetectedObjects` | Detected objects. | +| `debug/cyclic_time_ms` | `tier4_debug_msgs::msg::Float64Stamped` | Cyclic time (ms). | +| `debug/pipeline_latency_ms` | `tier4_debug_msgs::msg::Float64Stamped` | Pipeline latency time (ms). | +| `debug/processing_time/preprocess_ms` | `tier4_debug_msgs::msg::Float64Stamped` | Preprocess (ms). | +| `debug/processing_time/inference_ms` | `tier4_debug_msgs::msg::Float64Stamped` | Inference time (ms). | +| `debug/processing_time/postprocess_ms` | `tier4_debug_msgs::msg::Float64Stamped` | Postprocess time (ms). | +| `debug/processing_time/total_ms` | `tier4_debug_msgs::msg::Float64Stamped` | Total processing time (ms). | + +## Parameters + +### TransFusion + +{{ json_to_markdown("perception/lidar_transfusion/schema/transfusion.schema.json") }} + +### Detection class remapper + +{{ json_to_markdown("perception/lidar_transfusion/schema/detection_class_remapper.schema.json") }} + +### The `build_only` option + +The `lidar_transfusion` node has `build_only` option to build the TensorRT engine file from the ONNX file. +Although it is preferred to move all the ROS parameters in `.param.yaml` file in Autoware Universe, the `build_only` option is not moved to the `.param.yaml` file for now, because it may be used as a flag to execute the build as a pre-task. You can execute with the following command: + +```bash +ros2 launch lidar_transfusion lidar_transfusion.launch.xml build_only:=true +``` + +### The `log_level` option + +The default logging severity level for `lidar_transfusion` is `info`. For debugging purposes, the developer may decrease severity level using `log_level` parameter: + +```bash +ros2 launch lidar_transfusion lidar_transfusion.launch.xml log_level:=debug +``` + +## Assumptions / Known limits + +## Trained Models + +You can download the onnx format of trained models by clicking on the links below. + +- TransFusion: [transfusion.onnx](https://awf.ml.dev.web.auto/perception/models/transfusion/v1/transfusion.onnx) + +The model was trained in TIER IV's internal database (~11k lidar frames) for 20 epochs. + +### Changelog + +## (Optional) Error detection and handling + + + +## (Optional) Performance characterization + + + +## References/External links + +[1] Xuyang Bai, Zeyu Hu, Xinge Zhu, Qingqiu Huang, Yilun Chen, Hongbo Fu and Chiew-Lan Tai. "TransFusion: Robust LiDAR-Camera Fusion for 3D Object Detection with Transformers." arXiv preprint arXiv:2203.11496 (2022). + +[2] + +[3] + +[4] + +[5] + +## (Optional) Future extensions / Unimplemented parts + + diff --git a/perception/lidar_transfusion/config/detection_class_remapper.param.yaml b/perception/lidar_transfusion/config/detection_class_remapper.param.yaml new file mode 100644 index 0000000000000..baea087c96bca --- /dev/null +++ b/perception/lidar_transfusion/config/detection_class_remapper.param.yaml @@ -0,0 +1,38 @@ +/**: + ros__parameters: + allow_remapping_by_area_matrix: + # NOTE(kl): We turn all vehicles into trailers if they go over 3x12 [m^2]. + # NOTE(kl): We turn cars into trucks if they have an area between 2.2 x 5.5 and 3.0 * 12.0 [m^2] + # row: original class. column: class to remap to + #UNKNOWN, CAR, TRUCK, BUS, TRAILER, MOTORBIKE, BICYCLE,PEDESTRIAN + [0, 0, 0, 0, 0, 0, 0, 0, #UNKNOWN + 0, 0, 1, 0, 1, 0, 0, 0, #CAR + 0, 0, 0, 0, 1, 0, 0, 0, #TRUCK + 0, 0, 0, 0, 1, 0, 0, 0, #BUS + 0, 0, 0, 0, 0, 0, 0, 0, #TRAILER + 0, 0, 0, 0, 0, 0, 0, 0, #MOTORBIKE + 0, 0, 0, 0, 0, 0, 0, 0, #BICYCLE + 0, 0, 0, 0, 0, 0, 0, 0] #PEDESTRIAN + + min_area_matrix: + #UNKNOWN, CAR, TRUCK, BUS, TRAILER, MOTORBIKE, BICYCLE, PEDESTRIAN + [ 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #UNKNOWN + 0.000, 0.000, 12.100, 0.000, 36.000, 0.000, 0.000, 0.000, #CAR + 0.000, 0.000, 0.000, 0.000, 36.000, 0.000, 0.000, 0.000, #TRUCK + 0.000, 0.000, 0.000, 0.000, 36.000, 0.000, 0.000, 0.000, #BUS + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #TRAILER + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #MOTORBIKE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #BICYCLE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] #PEDESTRIAN + + + max_area_matrix: + #UNKNOWN, CAR, TRUCK, BUS, TRAILER, MOTORBIKE, BICYCLE, PEDESTRIAN + [ 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #UNKNOWN + 0.000, 0.000, 36.000, 0.000, 999.999, 0.000, 0.000, 0.000, #CAR + 0.000, 0.000, 0.000, 0.000, 999.999, 0.000, 0.000, 0.000, #TRUCK + 0.000, 0.000, 0.000, 0.000, 999.999, 0.000, 0.000, 0.000, #BUS + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #TRAILER + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #MOTORBIKE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #BICYCLE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] #PEDESTRIAN diff --git a/perception/lidar_transfusion/config/transfusion.param.yaml b/perception/lidar_transfusion/config/transfusion.param.yaml new file mode 100644 index 0000000000000..feabe359caf1f --- /dev/null +++ b/perception/lidar_transfusion/config/transfusion.param.yaml @@ -0,0 +1,20 @@ +/**: + ros__parameters: + # network + class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"] + trt_precision: fp16 + voxels_num: [5000, 30000, 60000] # [min, opt, max] + point_cloud_range: [-76.8, -76.8, -3.0, 76.8, 76.8, 5.0] # [x_min, y_min, z_min, x_max, y_max, z_max] + voxel_size: [0.3, 0.3, 8.0] # [x, y, z] + onnx_path: "$(var model_path)/transfusion.onnx" + engine_path: "$(var model_path)/transfusion.engine" + # pre-process params + densification_num_past_frames: 1 + densification_world_frame_id: map + # post-process params + circle_nms_dist_threshold: 0.5 + iou_nms_target_class_names: ["CAR"] + iou_nms_search_distance_2d: 10.0 + iou_nms_threshold: 0.1 + yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0] # refers to the class_names + score_threshold: 0.2 diff --git a/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp new file mode 100644 index 0000000000000..5c25a936d5392 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp @@ -0,0 +1,126 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * This code is licensed under CC0 1.0 Universal (Public Domain). + * You can use this without any limitation. + * https://creativecommons.org/publicdomain/zero/1.0/deed.en + */ + +#ifndef LIDAR_TRANSFUSION__CUDA_UTILS_HPP_ +#define LIDAR_TRANSFUSION__CUDA_UTILS_HPP_ + +#include + +#include +#include +#include +#include + +#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__)) + +namespace cuda +{ +inline void check_error(const ::cudaError_t e, const char * f, int n) +{ + if (e != ::cudaSuccess) { + ::std::stringstream s; + s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": " + << ::cudaGetErrorString(e); + throw ::std::runtime_error{s.str()}; + } +} + +struct deleter +{ + void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); } +}; + +template +using unique_ptr = ::std::unique_ptr; + +template +typename ::std::enable_if<::std::is_array::value, cuda::unique_ptr>::type make_unique( + const ::std::size_t n) +{ + using U = typename ::std::remove_extent::type; + U * p; + CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast(&p), sizeof(U) * n)); + return cuda::unique_ptr{p}; +} + +template +cuda::unique_ptr make_unique() +{ + T * p; + CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast(&p), sizeof(T))); + return cuda::unique_ptr{p}; +} + +constexpr size_t CUDA_ALIGN = 256; + +template +inline size_t get_size_aligned(size_t num_elem) +{ + size_t size = num_elem * sizeof(T); + size_t extra_align = 0; + if (size % CUDA_ALIGN != 0) { + extra_align = CUDA_ALIGN - size % CUDA_ALIGN; + } + return size + extra_align; +} + +template +inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size) +{ + size_t size = get_size_aligned(num_elem); + if (size > workspace_size) { + throw ::std::runtime_error("Workspace is too small!"); + } + workspace_size -= size; + T * ptr = reinterpret_cast(workspace); + workspace = reinterpret_cast(reinterpret_cast(workspace) + size); + return ptr; +} + +template +void clear_async(T * ptr, size_t num_elem, cudaStream_t stream) +{ + CHECK_CUDA_ERROR(::cudaMemsetAsync(ptr, 0, sizeof(T) * num_elem, stream)); +} + +} // namespace cuda + +#endif // LIDAR_TRANSFUSION__CUDA_UTILS_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/detection_class_remapper.hpp b/perception/lidar_transfusion/include/lidar_transfusion/detection_class_remapper.hpp new file mode 100644 index 0000000000000..ace7dc5ff3405 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/detection_class_remapper.hpp @@ -0,0 +1,47 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__DETECTION_CLASS_REMAPPER_HPP_ +#define LIDAR_TRANSFUSION__DETECTION_CLASS_REMAPPER_HPP_ + +#include + +#include +#include +#include +#include + +#include + +namespace lidar_transfusion +{ + +class DetectionClassRemapper +{ +public: + void setParameters( + const std::vector & allow_remapping_by_area_matrix, + const std::vector & min_area_matrix, const std::vector & max_area_matrix); + void mapClasses(autoware_perception_msgs::msg::DetectedObjects & msg); + +protected: + Eigen::Matrix allow_remapping_by_area_matrix_; + Eigen::MatrixXd min_area_matrix_; + Eigen::MatrixXd max_area_matrix_; + int num_labels_; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__DETECTION_CLASS_REMAPPER_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/lidar_transfusion_node.hpp b/perception/lidar_transfusion/include/lidar_transfusion/lidar_transfusion_node.hpp new file mode 100644 index 0000000000000..86303618d7877 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/lidar_transfusion_node.hpp @@ -0,0 +1,73 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +//    http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__LIDAR_TRANSFUSION_NODE_HPP_ +#define LIDAR_TRANSFUSION__LIDAR_TRANSFUSION_NODE_HPP_ + +#include "lidar_transfusion/detection_class_remapper.hpp" +#include "lidar_transfusion/postprocess/non_maximum_suppression.hpp" +#include "lidar_transfusion/preprocess/pointcloud_densification.hpp" +#include "lidar_transfusion/transfusion_trt.hpp" +#include "lidar_transfusion/visibility_control.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace lidar_transfusion +{ + +class LIDAR_TRANSFUSION_PUBLIC LidarTransfusionNode : public rclcpp::Node +{ +public: + explicit LidarTransfusionNode(const rclcpp::NodeOptions & options); + +private: + void cloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr msg); + + rclcpp::Subscription::ConstSharedPtr cloud_sub_{nullptr}; + rclcpp::Publisher::SharedPtr objects_pub_{ + nullptr}; + + tf2_ros::Buffer tf_buffer_; + tf2_ros::TransformListener tf_listener_{tf_buffer_}; + + DetectionClassRemapper detection_class_remapper_; + float score_threshold_{0.0}; + std::vector class_names_; + + NonMaximumSuppression iou_bev_nms_; + + std::unique_ptr detector_ptr_{nullptr}; + + // debugger + std::unique_ptr> stop_watch_ptr_{ + nullptr}; + std::unique_ptr debug_publisher_ptr_{nullptr}; + std::unique_ptr published_time_pub_{nullptr}; +}; +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__LIDAR_TRANSFUSION_NODE_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/network/network_trt.hpp b/perception/lidar_transfusion/include/lidar_transfusion/network/network_trt.hpp new file mode 100644 index 0000000000000..3f8f32e346b0a --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/network/network_trt.hpp @@ -0,0 +1,87 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_ +#define LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_ + +#include "lidar_transfusion/transfusion_config.hpp" +#include "lidar_transfusion/utils.hpp" + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace lidar_transfusion +{ + +struct ProfileDimension +{ + nvinfer1::Dims min; + nvinfer1::Dims opt; + nvinfer1::Dims max; + + bool operator!=(const ProfileDimension & rhs) const + { + return min.nbDims != rhs.min.nbDims || opt.nbDims != rhs.opt.nbDims || + max.nbDims != rhs.max.nbDims || !std::equal(min.d, min.d + min.nbDims, rhs.min.d) || + !std::equal(opt.d, opt.d + opt.nbDims, rhs.opt.d) || + !std::equal(max.d, max.d + max.nbDims, rhs.max.d); + } +}; + +class NetworkTRT +{ +public: + explicit NetworkTRT(const TransfusionConfig & config); + ~NetworkTRT(); + + bool init( + const std::string & onnx_path, const std::string & engine_path, const std::string & precision); + const char * getTensorName(NetworkIO name); + + tensorrt_common::TrtUniquePtr engine{nullptr}; + tensorrt_common::TrtUniquePtr context{nullptr}; + +private: + bool parseONNX( + const std::string & onnx_path, const std::string & engine_path, const std::string & precision, + size_t workspace_size = (1ULL << 30)); + bool saveEngine(const std::string & engine_path); + bool loadEngine(const std::string & engine_path); + bool createContext(); + bool setProfile( + nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network, + nvinfer1::IBuilderConfig & config); + bool validateNetworkIO(); + nvinfer1::Dims validateTensorShape(NetworkIO name, const std::vector shape); + + tensorrt_common::TrtUniquePtr runtime_{nullptr}; + tensorrt_common::TrtUniquePtr plan_{nullptr}; + tensorrt_common::Logger logger_; + TransfusionConfig config_; + std::vector tensors_names_; + + std::array in_profile_dims_; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/postprocess/circle_nms_kernel.hpp b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/circle_nms_kernel.hpp new file mode 100644 index 0000000000000..e231482652b98 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/circle_nms_kernel.hpp @@ -0,0 +1,32 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ +#define LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ + +#include "lidar_transfusion/utils.hpp" + +#include + +namespace lidar_transfusion +{ +// Non-maximum suppression (NMS) uses the distance on the xy plane instead of +// intersection over union (IoU) to suppress overlapped objects. +std::size_t circleNMS( + thrust::device_vector & boxes3d, const float distance_threshold, + thrust::device_vector & keep_mask, cudaStream_t stream); + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/postprocess/non_maximum_suppression.hpp b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/non_maximum_suppression.hpp new file mode 100644 index 0000000000000..daf2cc7c1fac1 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/non_maximum_suppression.hpp @@ -0,0 +1,80 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_ +#define LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_ + +#include "lidar_transfusion/ros_utils.hpp" + +#include + +#include + +#include +#include + +namespace lidar_transfusion +{ +using autoware_perception_msgs::msg::DetectedObject; + +enum class NMS_TYPE { + IoU_BEV + // IoU_3D + // Distance_2D + // Distance_3D +}; + +struct NMSParams +{ + NMS_TYPE nms_type_{}; + std::vector target_class_names_{}; + double search_distance_2d_{}; + double iou_threshold_{}; + // double distance_threshold_{}; +}; + +std::vector classNamesToBooleanMask(const std::vector & class_names) +{ + std::vector mask; + constexpr std::size_t num_object_classification = 8; + mask.resize(num_object_classification); + for (const auto & class_name : class_names) { + const auto semantic_type = getSemanticType(class_name); + mask.at(semantic_type) = true; + } + + return mask; +} + +class NonMaximumSuppression +{ +public: + void setParameters(const NMSParams &); + + std::vector apply(const std::vector &); + +private: + bool isTargetLabel(const std::uint8_t); + + bool isTargetPairObject(const DetectedObject &, const DetectedObject &); + + Eigen::MatrixXd generateIoUMatrix(const std::vector &); + + NMSParams params_{}; + std::vector target_class_mask_{}; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/postprocess/postprocess_kernel.hpp b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/postprocess_kernel.hpp new file mode 100644 index 0000000000000..01435424aa248 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/postprocess_kernel.hpp @@ -0,0 +1,50 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ +#define LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ + +#include "lidar_transfusion/transfusion_config.hpp" +#include "lidar_transfusion/utils.hpp" + +#include +#include +#include + +#include + +namespace lidar_transfusion +{ + +class PostprocessCuda +{ +public: + explicit PostprocessCuda(const TransfusionConfig & config, cudaStream_t & stream); + + cudaError_t generateDetectedBoxes3D_launch( + const float * cls_output, const float * box_output, const float * dir_cls_output, + std::vector & det_boxes3d, cudaStream_t stream); + +private: + TransfusionConfig config_; + cudaStream_t stream_; + cudaStream_t stream_event_; + cudaEvent_t start_, stop_; + thrust::device_vector boxes3d_d_; + thrust::device_vector yaw_norm_thresholds_d_; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp new file mode 100644 index 0000000000000..25095f4dc9c0b --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp @@ -0,0 +1,101 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ +#define LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ + +#include "lidar_transfusion/cuda_utils.hpp" + +#include +#include +#ifdef ROS_DISTRO_GALACTIC +#include +#else +#include +#endif + +#include +#include +#include + +namespace lidar_transfusion +{ + +class DensificationParam +{ +public: + DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames) + : world_frame_id_(std::move(world_frame_id)), + pointcloud_cache_size_(num_past_frames + /*current frame*/ 1) + { + } + + std::string world_frame_id() const { return world_frame_id_; } + unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; } + +private: + std::string world_frame_id_; + unsigned int pointcloud_cache_size_{1}; +}; + +struct PointCloudWithTransform +{ + cuda::unique_ptr points_d{nullptr}; + std_msgs::msg::Header header; + size_t num_points{0}; + Eigen::Affine3f affine_past2world; +}; + +class PointCloudDensification +{ +public: + explicit PointCloudDensification(const DensificationParam & param, cudaStream_t & stream); + + bool enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer); + + double getCurrentTimestamp() const { return current_timestamp_; } + Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; } + std::list::iterator getPointCloudCacheIter() + { + return pointcloud_cache_.begin(); + } + bool isCacheEnd(std::list::iterator iter) + { + return iter == pointcloud_cache_.end(); + } + size_t getIdx(std::list::iterator iter) + { + return std::distance(pointcloud_cache_.begin(), iter); + } + size_t getCacheSize() + { + return std::distance(pointcloud_cache_.begin(), pointcloud_cache_.end()); + } + unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); } + +private: + void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine); + void dequeue(); + + DensificationParam param_; + double current_timestamp_{0.0}; + Eigen::Affine3f affine_world2current_; + std::list pointcloud_cache_; + cudaStream_t stream_; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp new file mode 100644 index 0000000000000..c571990d84b51 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp @@ -0,0 +1,77 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/* + * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + * All rights reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_ +#define LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_ + +#include "lidar_transfusion/cuda_utils.hpp" +#include "lidar_transfusion/transfusion_config.hpp" +#include "lidar_transfusion/utils.hpp" + +#include + +namespace lidar_transfusion +{ + +class PreprocessCuda +{ +public: + PreprocessCuda(const TransfusionConfig & config, cudaStream_t & stream); + + void generateVoxels( + float * points, unsigned int points_size, unsigned int * pillar_num, float * voxel_features, + unsigned int * voxel_num, unsigned int * voxel_idxs); + + cudaError_t generateVoxels_random_launch( + float * points, unsigned int points_size, unsigned int * mask, float * voxels); + + cudaError_t generateBaseFeatures_launch( + unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features, + unsigned int * voxel_num, unsigned int * voxel_idxs); + + // cudaError_t generateVoxelsInput_launch( + // uint8_t * cloud_data, CloudInfo & cloud_info, unsigned int points_agg, unsigned int + // points_size, float time_lag, float * affine_transform, float * points); + + cudaError_t generateSweepPoints_launch( + const float * input_points, size_t points_size, int input_point_step, float time_lag, + const float * transform, float * output_points); + +private: + TransfusionConfig config_; + cudaStream_t stream_; + cuda::unique_ptr mask_{nullptr}; + cuda::unique_ptr voxels_{nullptr}; + unsigned int mask_size_; + unsigned int voxels_size_; +}; +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp new file mode 100644 index 0000000000000..3e3660e238473 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp @@ -0,0 +1,70 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_ +#define LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_ + +#include "lidar_transfusion/cuda_utils.hpp" +#include "lidar_transfusion/preprocess/pointcloud_densification.hpp" +#include "lidar_transfusion/preprocess/preprocess_kernel.hpp" +#include "lidar_transfusion/transfusion_config.hpp" +#include "lidar_transfusion/utils.hpp" + +#ifdef ROS_DISTRO_GALACTIC +#include +#else +#include +#endif + +#include + +#include +#include +#include +#include + +namespace lidar_transfusion +{ +constexpr size_t AFF_MAT_SIZE = 16; // 4x4 matrix +constexpr size_t MAX_CLOUD_STEP_SIZE = 35; // PointXYZIRCADT + +class VoxelGenerator +{ +public: + explicit VoxelGenerator( + const DensificationParam & densification_param, const TransfusionConfig & config, + cudaStream_t & stream); + std::size_t generateSweepPoints( + const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr & points_d); + bool enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer); + void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg); + std::tuple getFieldInfo( + const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name); + +private: + std::unique_ptr pd_ptr_{nullptr}; + std::unique_ptr pre_ptr_{nullptr}; + TransfusionConfig config_; + cuda::unique_ptr cloud_data_d_{nullptr}; + cuda::unique_ptr affine_past2current_d_{nullptr}; + std::vector points_; + cudaStream_t stream_; + CloudInfo cloud_info_; + bool is_initialized_{false}; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp new file mode 100644 index 0000000000000..f013a02404a29 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp @@ -0,0 +1,39 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__ROS_UTILS_HPP_ +#define LIDAR_TRANSFUSION__ROS_UTILS_HPP_ + +#include "lidar_transfusion/utils.hpp" + +#include +#include +#include +#include + +#include +#include + +namespace lidar_transfusion +{ + +void box3DToDetectedObject( + const Box3D & box3d, const std::vector & class_names, + autoware_perception_msgs::msg::DetectedObject & obj); + +uint8_t getSemanticType(const std::string & class_name); + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__ROS_UTILS_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp new file mode 100644 index 0000000000000..31976de56a9da --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp @@ -0,0 +1,161 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_ +#define LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_ + +#include +#include + +namespace lidar_transfusion +{ + +class TransfusionConfig +{ +public: + TransfusionConfig( + const std::vector & voxels_num, const std::vector & point_cloud_range, + const std::vector & voxel_size, const float circle_nms_dist_threshold, + const std::vector & yaw_norm_thresholds, const float score_threshold) + { + if (voxels_num.size() == 3) { + max_voxels_ = voxels_num[2]; + + voxels_num_[0] = voxels_num[0]; + voxels_num_[1] = voxels_num[1]; + voxels_num_[2] = voxels_num[2]; + + min_voxel_size_ = voxels_num[0]; + opt_voxel_size_ = voxels_num[1]; + max_voxel_size_ = voxels_num[2]; + + min_points_size_ = voxels_num[0]; + opt_points_size_ = voxels_num[1]; + max_points_size_ = voxels_num[2]; + + min_coors_size_ = voxels_num[0]; + opt_coors_size_ = voxels_num[1]; + max_coors_size_ = voxels_num[2]; + } + if (point_cloud_range.size() == 6) { + min_x_range_ = static_cast(point_cloud_range[0]); + min_y_range_ = static_cast(point_cloud_range[1]); + min_z_range_ = static_cast(point_cloud_range[2]); + max_x_range_ = static_cast(point_cloud_range[3]); + max_y_range_ = static_cast(point_cloud_range[4]); + max_z_range_ = static_cast(point_cloud_range[5]); + } + if (voxel_size.size() == 3) { + voxel_x_size_ = static_cast(voxel_size[0]); + voxel_y_size_ = static_cast(voxel_size[1]); + voxel_z_size_ = static_cast(voxel_size[2]); + } + if (score_threshold > 0.0) { + score_threshold_ = score_threshold; + } + if (circle_nms_dist_threshold > 0.0) { + circle_nms_dist_threshold_ = circle_nms_dist_threshold; + } + yaw_norm_thresholds_ = + std::vector(yaw_norm_thresholds.begin(), yaw_norm_thresholds.end()); + for (auto & yaw_norm_threshold : yaw_norm_thresholds_) { + yaw_norm_threshold = + (yaw_norm_threshold >= 0.0 && yaw_norm_threshold < 1.0) ? yaw_norm_threshold : 0.0; + } + grid_x_size_ = static_cast((max_x_range_ - min_x_range_) / voxel_x_size_); + grid_y_size_ = static_cast((max_y_range_ - min_y_range_) / voxel_y_size_); + grid_z_size_ = static_cast((max_z_range_ - min_z_range_) / voxel_z_size_); + } + + ///// INPUT PARAMETERS ///// + const std::size_t cloud_capacity_{1000000}; + ///// KERNEL PARAMETERS ///// + const std::size_t threads_for_voxel_{256}; // threads number for a block + const std::size_t points_per_voxel_{20}; + const std::size_t warp_size_{32}; // one warp(32 threads) for one pillar + const std::size_t pillars_per_block_{64}; // one thread deals with one pillar + // and a block has pillars_per_block threads + const std::size_t pillar_feature_size_{64}; + std::size_t max_voxels_{60000}; + + ///// NETWORK PARAMETERS ///// + const std::size_t batch_size_{1}; + const std::size_t num_classes_{5}; + const std::size_t num_point_feature_size_{5}; // x, y, z, intensity, lag + // the dimension of the input cloud + float min_x_range_{-76.8}; + float max_x_range_{76.8}; + float min_y_range_{-76.8}; + float max_y_range_{76.8}; + float min_z_range_{-3.0}; + float max_z_range_{5.0}; + // the size of a pillar + float voxel_x_size_{0.3}; + float voxel_y_size_{0.3}; + float voxel_z_size_{8.0}; + const std::size_t out_size_factor_{4}; + const std::size_t max_num_points_per_pillar_{points_per_voxel_}; + const std::size_t num_point_values_{4}; + const std::size_t num_proposals_{200}; + // the number of feature maps for pillar scatter + const std::size_t num_feature_scatter_{pillar_feature_size_}; + // the score threshold for classification + float score_threshold_{0.2}; + float circle_nms_dist_threshold_{0.5}; + std::vector yaw_norm_thresholds_{0.3, 0.3, 0.3, 0.3, 0.0}; + std::size_t max_num_pillars_{max_voxels_}; + const std::size_t pillar_points_bev_{max_num_points_per_pillar_ * max_num_pillars_}; + // the detected boxes result decode by (x, y, z, w, l, h, yaw) + const std::size_t num_box_values_{8}; + // the input size of the 2D backbone network + std::size_t grid_x_size_{512}; + std::size_t grid_y_size_{512}; + std::size_t grid_z_size_{1}; + // the output size of the 2D backbone network + std::size_t feature_x_size_{grid_x_size_ / out_size_factor_}; + std::size_t feature_y_size_{grid_y_size_ / out_size_factor_}; + + ///// RUNTIME DIMENSIONS ///// + std::vector voxels_num_{5000, 30000, 60000}; + // voxels + std::size_t min_voxel_size_{voxels_num_[0]}; + std::size_t opt_voxel_size_{voxels_num_[1]}; + std::size_t max_voxel_size_{voxels_num_[2]}; + + std::size_t min_point_in_voxel_size_{points_per_voxel_}; + std::size_t opt_point_in_voxel_size_{points_per_voxel_}; + std::size_t max_point_in_voxel_size_{points_per_voxel_}; + + std::size_t min_network_feature_size_{num_point_feature_size_}; + std::size_t opt_network_feature_size_{num_point_feature_size_}; + std::size_t max_network_feature_size_{num_point_feature_size_}; + + // num_points + std::size_t min_points_size_{voxels_num_[0]}; + std::size_t opt_points_size_{voxels_num_[1]}; + std::size_t max_points_size_{voxels_num_[2]}; + + // coors + std::size_t min_coors_size_{voxels_num_[0]}; + std::size_t opt_coors_size_{voxels_num_[1]}; + std::size_t max_coors_size_{voxels_num_[2]}; + + std::size_t min_coors_dim_size_{num_point_values_}; + std::size_t opt_coors_dim_size_{num_point_values_}; + std::size_t max_coors_dim_size_{num_point_values_}; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_trt.hpp b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_trt.hpp new file mode 100644 index 0000000000000..a94c3c1871136 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_trt.hpp @@ -0,0 +1,114 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_ +#define LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_ + +#include "lidar_transfusion/cuda_utils.hpp" +#include "lidar_transfusion/network/network_trt.hpp" +#include "lidar_transfusion/postprocess/postprocess_kernel.hpp" +#include "lidar_transfusion/preprocess/pointcloud_densification.hpp" +#include "lidar_transfusion/preprocess/preprocess_kernel.hpp" +#include "lidar_transfusion/preprocess/voxel_generator.hpp" +#include "lidar_transfusion/utils.hpp" +#include "lidar_transfusion/visibility_control.hpp" + +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace lidar_transfusion +{ + +class NetworkParam +{ +public: + NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision) + : onnx_path_(std::move(onnx_path)), + engine_path_(std::move(engine_path)), + trt_precision_(std::move(trt_precision)) + { + } + + std::string onnx_path() const { return onnx_path_; } + std::string engine_path() const { return engine_path_; } + std::string trt_precision() const { return trt_precision_; } + +private: + std::string onnx_path_; + std::string engine_path_; + std::string trt_precision_; +}; + +class LIDAR_TRANSFUSION_PUBLIC TransfusionTRT +{ +public: + explicit TransfusionTRT( + const NetworkParam & network_param, const DensificationParam & densification_param, + const TransfusionConfig & config); + virtual ~TransfusionTRT(); + + bool detect( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer, + std::vector & det_boxes3d, std::unordered_map & proc_timing); + +protected: + void initPtr(); + + bool preprocess(const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer); + + bool inference(); + + bool postprocess(std::vector & det_boxes3d); + + std::unique_ptr network_trt_ptr_{nullptr}; + std::unique_ptr vg_ptr_{nullptr}; + std::unique_ptr> stop_watch_ptr_{ + nullptr}; + std::unique_ptr pre_ptr_{nullptr}; + std::unique_ptr post_ptr_{nullptr}; + cudaStream_t stream_{nullptr}; + + TransfusionConfig config_; + + // input of pre-process + + unsigned int voxel_features_size_{0}; + unsigned int voxel_num_size_{0}; + unsigned int voxel_idxs_size_{0}; + unsigned int cls_size_{0}; + unsigned int box_size_{0}; + unsigned int dir_cls_size_{0}; + cuda::unique_ptr points_d_{nullptr}; + cuda::unique_ptr params_input_d_{nullptr}; + cuda::unique_ptr voxel_features_d_{nullptr}; + cuda::unique_ptr voxel_num_d_{nullptr}; + cuda::unique_ptr voxel_idxs_d_{nullptr}; + cuda::unique_ptr cls_output_d_{nullptr}; + cuda::unique_ptr box_output_d_{nullptr}; + cuda::unique_ptr dir_cls_output_d_{nullptr}; +}; + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp new file mode 100644 index 0000000000000..e73fe7f055953 --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp @@ -0,0 +1,103 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__UTILS_HPP_ +#define LIDAR_TRANSFUSION__UTILS_HPP_ + +#include +#include +#include +#include + +namespace lidar_transfusion +{ + +struct Box3D +{ + int label; + float score; + float x; + float y; + float z; + float width; + float length; + float height; + float yaw; +}; + +struct CloudInfo +{ + uint32_t x_offset; + uint32_t y_offset; + uint32_t z_offset; + uint32_t intensity_offset; + uint8_t x_datatype; + uint8_t y_datatype; + uint8_t z_datatype; + uint8_t intensity_datatype; + uint8_t x_count; + uint8_t y_count; + uint8_t z_count; + uint8_t intensity_count; + uint32_t point_step; + bool is_bigendian; + + CloudInfo() + : x_offset(0), + y_offset(4), + z_offset(8), + intensity_offset(12), + x_datatype(7), + y_datatype(7), + z_datatype(7), + intensity_datatype(7), + x_count(1), + y_count(1), + z_count(1), + intensity_count(1), + point_step(16), + is_bigendian(false) + { + } + + bool operator!=(const CloudInfo & rhs) const + { + return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset || + intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype || + y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype || + intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count || + y_count != rhs.y_count || z_count != rhs.z_count || + intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian; + } +}; + +enum NetworkIO { voxels = 0, num_points, coors, cls_score, dir_pred, bbox_pred, ENUM_SIZE }; + +// cspell: ignore divup +template +unsigned int divup(const T1 a, const T2 b) +{ + if (a == 0) { + throw std::runtime_error("A dividend of divup isn't positive."); + } + if (b == 0) { + throw std::runtime_error("A divisor of divup isn't positive."); + } + + return (a + b - 1) / b; +} + +} // namespace lidar_transfusion + +#endif // LIDAR_TRANSFUSION__UTILS_HPP_ diff --git a/perception/lidar_transfusion/include/lidar_transfusion/visibility_control.hpp b/perception/lidar_transfusion/include/lidar_transfusion/visibility_control.hpp new file mode 100644 index 0000000000000..aeab20906628a --- /dev/null +++ b/perception/lidar_transfusion/include/lidar_transfusion/visibility_control.hpp @@ -0,0 +1,37 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +//    http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_ +#define LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_ + +//////////////////////////////////////////////////////////////////////////////// +#if defined(__WIN32) +#if defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS) +#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllexport) +#define LIDAR_TRANSFUSION_LOCAL +#else // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS) +#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllimport) +#define LIDAR_TRANSFUSION_LOCAL +#endif // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS) +#elif defined(__linux__) +#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default"))) +#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden"))) +#elif defined(__APPLE__) +#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default"))) +#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden"))) +#else +#error "Unsupported Build Configuration" +#endif + +#endif // LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_ diff --git a/perception/lidar_transfusion/launch/lidar_transfusion.launch.xml b/perception/lidar_transfusion/launch/lidar_transfusion.launch.xml new file mode 100644 index 0000000000000..492eb8c4d59b7 --- /dev/null +++ b/perception/lidar_transfusion/launch/lidar_transfusion.launch.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/perception/lidar_transfusion/lib/detection_class_remapper.cpp b/perception/lidar_transfusion/lib/detection_class_remapper.cpp new file mode 100644 index 0000000000000..e093637402448 --- /dev/null +++ b/perception/lidar_transfusion/lib/detection_class_remapper.cpp @@ -0,0 +1,71 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace lidar_transfusion +{ + +void DetectionClassRemapper::setParameters( + const std::vector & allow_remapping_by_area_matrix, + const std::vector & min_area_matrix, const std::vector & max_area_matrix) +{ + assert(allow_remapping_by_area_matrix.size() == min_area_matrix.size()); + assert(allow_remapping_by_area_matrix.size() == max_area_matrix.size()); + assert( + std::pow(static_cast(std::sqrt(min_area_matrix.size())), 2) == min_area_matrix.size()); + + num_labels_ = static_cast(std::sqrt(min_area_matrix.size())); + + Eigen::Map> + allow_remapping_by_area_matrix_tmp( + allow_remapping_by_area_matrix.data(), num_labels_, num_labels_); + allow_remapping_by_area_matrix_ = allow_remapping_by_area_matrix_tmp.transpose() + .cast(); // Eigen is column major by default + + Eigen::Map min_area_matrix_tmp( + min_area_matrix.data(), num_labels_, num_labels_); + min_area_matrix_ = min_area_matrix_tmp.transpose(); // Eigen is column major by default + + Eigen::Map max_area_matrix_tmp( + max_area_matrix.data(), num_labels_, num_labels_); + max_area_matrix_ = max_area_matrix_tmp.transpose(); // Eigen is column major by default + + min_area_matrix_ = min_area_matrix_.unaryExpr( + [](double v) { return std::isfinite(v) ? v : std::numeric_limits::max(); }); + max_area_matrix_ = max_area_matrix_.unaryExpr( + [](double v) { return std::isfinite(v) ? v : std::numeric_limits::max(); }); +} + +void DetectionClassRemapper::mapClasses(autoware_perception_msgs::msg::DetectedObjects & msg) +{ + for (auto & object : msg.objects) { + const float bev_area = object.shape.dimensions.x * object.shape.dimensions.y; + + for (auto & classification : object.classification) { + auto & label = classification.label; + + for (int i = 0; i < num_labels_; ++i) { + if ( + allow_remapping_by_area_matrix_(label, i) && bev_area >= min_area_matrix_(label, i) && + bev_area <= max_area_matrix_(label, i)) { + label = i; + break; + } + } + } + } +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/network/network_trt.cpp b/perception/lidar_transfusion/lib/network/network_trt.cpp new file mode 100644 index 0000000000000..a36890facceec --- /dev/null +++ b/perception/lidar_transfusion/lib/network/network_trt.cpp @@ -0,0 +1,332 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/network/network_trt.hpp" + +#include + +#include +#include +#include + +namespace lidar_transfusion +{ + +std::ostream & operator<<(std::ostream & os, const ProfileDimension & profile) +{ + std::string delim = ""; + os << "min->["; + for (int i = 0; i < profile.min.nbDims; ++i) { + os << delim << profile.min.d[i]; + delim = ", "; + } + os << "], opt->["; + delim = ""; + for (int i = 0; i < profile.opt.nbDims; ++i) { + os << delim << profile.opt.d[i]; + delim = ", "; + } + os << "], max->["; + delim = ""; + for (int i = 0; i < profile.max.nbDims; ++i) { + os << delim << profile.max.d[i]; + delim = ", "; + } + os << "]"; + return os; +} + +NetworkTRT::NetworkTRT(const TransfusionConfig & config) : config_(config) +{ + ProfileDimension voxels_dims = { + nvinfer1::Dims3( + config_.min_voxel_size_, config_.min_point_in_voxel_size_, config_.min_network_feature_size_), + nvinfer1::Dims3( + config_.opt_voxel_size_, config_.opt_point_in_voxel_size_, config_.opt_network_feature_size_), + nvinfer1::Dims3( + config_.max_voxel_size_, config_.max_point_in_voxel_size_, + config_.max_network_feature_size_)}; + ProfileDimension num_points_dims = { + nvinfer1::Dims{1, {static_cast(config_.min_points_size_)}}, + nvinfer1::Dims{1, {static_cast(config_.opt_points_size_)}}, + nvinfer1::Dims{1, {static_cast(config_.max_points_size_)}}}; + ProfileDimension coors_dims = { + nvinfer1::Dims2(config_.min_coors_size_, config_.min_coors_dim_size_), + nvinfer1::Dims2(config_.opt_coors_size_, config_.opt_coors_dim_size_), + nvinfer1::Dims2(config_.max_coors_size_, config_.max_coors_dim_size_)}; + in_profile_dims_ = {voxels_dims, num_points_dims, coors_dims}; +} + +NetworkTRT::~NetworkTRT() +{ + context.reset(); + runtime_.reset(); + plan_.reset(); + engine.reset(); +} + +bool NetworkTRT::init( + const std::string & onnx_path, const std::string & engine_path, const std::string & precision) +{ + runtime_ = + tensorrt_common::TrtUniquePtr(nvinfer1::createInferRuntime(logger_)); + if (!runtime_) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to create runtime" << std::endl; + return false; + } + + bool success; + std::ifstream engine_file(engine_path); + if (engine_file.is_open()) { + success = loadEngine(engine_path); + } else { + auto log_thread = logger_.log_throttle( + nvinfer1::ILogger::Severity::kINFO, + "Applying optimizations and building TRT CUDA engine. Please wait a minutes...", 5); + success = parseONNX(onnx_path, engine_path, precision); + logger_.stop_throttle(log_thread); + } + success &= createContext(); + + return success; +} + +bool NetworkTRT::setProfile( + nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network, + nvinfer1::IBuilderConfig & config) +{ + auto profile = builder.createOptimizationProfile(); + + auto voxels_name = network.getInput(NetworkIO::voxels)->getName(); + auto num_points_name = network.getInput(NetworkIO::num_points)->getName(); + auto coors_name = network.getInput(NetworkIO::coors)->getName(); + + profile->setDimensions( + voxels_name, nvinfer1::OptProfileSelector::kMIN, in_profile_dims_[NetworkIO::voxels].min); + profile->setDimensions( + voxels_name, nvinfer1::OptProfileSelector::kOPT, in_profile_dims_[NetworkIO::voxels].opt); + profile->setDimensions( + voxels_name, nvinfer1::OptProfileSelector::kMAX, in_profile_dims_[NetworkIO::voxels].max); + + profile->setDimensions( + num_points_name, nvinfer1::OptProfileSelector::kMIN, + in_profile_dims_[NetworkIO::num_points].min); + profile->setDimensions( + num_points_name, nvinfer1::OptProfileSelector::kOPT, + in_profile_dims_[NetworkIO::num_points].opt); + profile->setDimensions( + num_points_name, nvinfer1::OptProfileSelector::kMAX, + in_profile_dims_[NetworkIO::num_points].max); + + profile->setDimensions( + coors_name, nvinfer1::OptProfileSelector::kMIN, in_profile_dims_[NetworkIO::coors].min); + profile->setDimensions( + coors_name, nvinfer1::OptProfileSelector::kOPT, in_profile_dims_[NetworkIO::coors].opt); + profile->setDimensions( + coors_name, nvinfer1::OptProfileSelector::kMAX, in_profile_dims_[NetworkIO::coors].max); + + config.addOptimizationProfile(profile); + return true; +} + +bool NetworkTRT::createContext() +{ + if (!engine) { + tensorrt_common::LOG_ERROR(logger_) + << "Failed to create context: Engine was not created" << std::endl; + return false; + } + + context = + tensorrt_common::TrtUniquePtr(engine->createExecutionContext()); + if (!context) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to create context" << std::endl; + return false; + } + + return true; +} + +bool NetworkTRT::parseONNX( + const std::string & onnx_path, const std::string & engine_path, const std::string & precision, + const size_t workspace_size) +{ + auto builder = + tensorrt_common::TrtUniquePtr(nvinfer1::createInferBuilder(logger_)); + if (!builder) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to create builder" << std::endl; + return false; + } + + auto config = + tensorrt_common::TrtUniquePtr(builder->createBuilderConfig()); + if (!config) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to create config" << std::endl; + return false; + } +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400 + config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, workspace_size); +#else + config->setMaxWorkspaceSize(workspace_size); +#endif + if (precision == "fp16") { + if (builder->platformHasFastFp16()) { + tensorrt_common::LOG_INFO(logger_) << "Using TensorRT FP16 Inference" << std::endl; + config->setFlag(nvinfer1::BuilderFlag::kFP16); + } else { + tensorrt_common::LOG_INFO(logger_) + << "TensorRT FP16 Inference isn't supported in this environment" << std::endl; + } + } + + const auto flag = + 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + auto network = + tensorrt_common::TrtUniquePtr(builder->createNetworkV2(flag)); + if (!network) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to create network" << std::endl; + return false; + } + + auto parser = tensorrt_common::TrtUniquePtr( + nvonnxparser::createParser(*network, logger_)); + parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kERROR)); + + if (!setProfile(*builder, *network, *config)) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to set profile" << std::endl; + return false; + } + + plan_ = tensorrt_common::TrtUniquePtr( + builder->buildSerializedNetwork(*network, *config)); + if (!plan_) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to create serialized network" << std::endl; + return false; + } + engine = tensorrt_common::TrtUniquePtr( + runtime_->deserializeCudaEngine(plan_->data(), plan_->size())); + if (!engine) { + tensorrt_common::LOG_ERROR(logger_) << "Failed to create engine" << std::endl; + return false; + } + + return saveEngine(engine_path); +} + +bool NetworkTRT::saveEngine(const std::string & engine_path) +{ + tensorrt_common::LOG_INFO(logger_) << "Writing to " << engine_path << std::endl; + std::ofstream file(engine_path, std::ios::out | std::ios::binary); + file.write(reinterpret_cast(plan_->data()), plan_->size()); + return validateNetworkIO(); +} + +bool NetworkTRT::loadEngine(const std::string & engine_path) +{ + std::ifstream engine_file(engine_path); + std::stringstream engine_buffer; + engine_buffer << engine_file.rdbuf(); + std::string engine_str = engine_buffer.str(); + engine = tensorrt_common::TrtUniquePtr(runtime_->deserializeCudaEngine( + reinterpret_cast(engine_str.data()), engine_str.size())); + tensorrt_common::LOG_INFO(logger_) << "Loaded engine from " << engine_path << std::endl; + return validateNetworkIO(); +} + +bool NetworkTRT::validateNetworkIO() +{ + // Whether the number of IO match the expected size + if (engine->getNbIOTensors() != NetworkIO::ENUM_SIZE) { + tensorrt_common::LOG_ERROR(logger_) + << "Invalid network IO. Expected size: " << NetworkIO::ENUM_SIZE + << ". Actual size: " << engine->getNbIOTensors() << "." << std::endl; + throw std::runtime_error("Failed to initialize TRT network."); + } + for (int i = 0; i < NetworkIO::ENUM_SIZE; ++i) { + tensors_names_.push_back(engine->getIOTensorName(i)); + } + + // Log the network IO + std::string tensors = std::accumulate( + tensors_names_.begin(), tensors_names_.end(), std::string(), + [](const std::string & a, const std::string & b) -> std::string { return a + b + " "; }); + tensorrt_common::LOG_INFO(logger_) << "Network IO: " << tensors << std::endl; + + // Whether the current engine input profile match the config input profile + for (int i = 0; i <= NetworkIO::coors; ++i) { + ProfileDimension engine_dims{ + engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kMIN), + engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kOPT), + engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kMAX)}; + + tensorrt_common::LOG_INFO(logger_) + << "Profile for " << tensors_names_[i] << ": " << engine_dims << std::endl; + + if (engine_dims != in_profile_dims_[i]) { + tensorrt_common::LOG_ERROR(logger_) + << "Invalid network input dimension. Config: " << in_profile_dims_[i] + << ". Please change the input profile or delete the engine file and build engine again." + << std::endl; + throw std::runtime_error("Failed to initialize TRT network."); + } + } + + // Whether the IO tensor shapes match the network config, -1 for dynamic size + validateTensorShape( + NetworkIO::voxels, {-1, static_cast(config_.points_per_voxel_), + static_cast(config_.num_point_feature_size_)}); + validateTensorShape(NetworkIO::num_points, {-1}); + validateTensorShape(NetworkIO::coors, {-1, static_cast(config_.num_point_values_)}); + auto cls_score = validateTensorShape( + NetworkIO::cls_score, + {static_cast(config_.batch_size_), static_cast(config_.num_classes_), + static_cast(config_.num_proposals_)}); + tensorrt_common::LOG_INFO(logger_) << "Network num classes: " << cls_score.d[1] << std::endl; + validateTensorShape( + NetworkIO::dir_pred, + {static_cast(config_.batch_size_), 2, static_cast(config_.num_proposals_)}); // x, y + validateTensorShape( + NetworkIO::bbox_pred, + {static_cast(config_.batch_size_), static_cast(config_.num_box_values_), + static_cast(config_.num_proposals_)}); + + return true; +} + +const char * NetworkTRT::getTensorName(NetworkIO name) +{ + return tensors_names_.at(name); +} + +nvinfer1::Dims NetworkTRT::validateTensorShape(NetworkIO name, const std::vector shape) +{ + auto tensor_shape = engine->getTensorShape(tensors_names_[name]); + if (tensor_shape.nbDims != static_cast(shape.size())) { + tensorrt_common::LOG_ERROR(logger_) + << "Invalid tensor shape for " << tensors_names_[name] << ". Expected size: " << shape.size() + << ". Actual size: " << tensor_shape.nbDims << "." << std::endl; + throw std::runtime_error("Failed to initialize TRT network."); + } + for (int i = 0; i < tensor_shape.nbDims; ++i) { + if (tensor_shape.d[i] != static_cast(shape[i])) { + tensorrt_common::LOG_ERROR(logger_) + << "Invalid tensor shape for " << tensors_names_[name] << ". Expected: " << shape[i] + << ". Actual: " << tensor_shape.d[i] << "." << std::endl; + throw std::runtime_error("Failed to initialize TRT network."); + } + } + return tensor_shape; +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/postprocess/circle_nms_kernel.cu b/perception/lidar_transfusion/lib/postprocess/circle_nms_kernel.cu new file mode 100644 index 0000000000000..dcb60831825bb --- /dev/null +++ b/perception/lidar_transfusion/lib/postprocess/circle_nms_kernel.cu @@ -0,0 +1,144 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Modified from +// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu + +/* +3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) +Written by Shaoshuai Shi +All Rights Reserved 2019-2020. +*/ + +#include "lidar_transfusion/cuda_utils.hpp" +#include "lidar_transfusion/postprocess/circle_nms_kernel.hpp" +#include "lidar_transfusion/utils.hpp" + +#include + +namespace +{ +const std::size_t THREADS_PER_BLOCK_NMS = 16; +} // namespace + +namespace lidar_transfusion +{ + +__device__ inline float dist2dPow(const Box3D * a, const Box3D * b) +{ + return powf(a->x - b->x, 2) + powf(a->y - b->y, 2); +} + +// cspell: ignore divup +__global__ void circleNMS_Kernel( + const Box3D * boxes, const std::size_t num_boxes3d, const std::size_t col_blocks, + const float dist2d_pow_threshold, std::uint64_t * mask) +{ + // params: boxes (N,) + // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS)) + + const auto row_start = blockIdx.y; + const auto col_start = blockIdx.x; + + if (row_start > col_start) return; + + const std::size_t row_size = + fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + const std::size_t col_size = + fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + + __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const Box3D * cur_box = boxes + cur_box_idx; + + std::uint64_t t = 0; + std::size_t start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (std::size_t i = start; i < col_size; i++) { + if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) { + t |= 1ULL << i; + } + } + mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +cudaError_t circleNMS_launch( + const thrust::device_vector & boxes3d, const std::size_t num_boxes3d, + std::size_t col_blocks, const float distance_threshold, + thrust::device_vector & mask, cudaStream_t stream) +{ + const float dist2d_pow_thres = powf(distance_threshold, 2); + + dim3 blocks(col_blocks, col_blocks); + dim3 threads(THREADS_PER_BLOCK_NMS); + circleNMS_Kernel<<>>( + thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres, + thrust::raw_pointer_cast(mask.data())); + + return cudaGetLastError(); +} + +std::size_t circleNMS( + thrust::device_vector & boxes3d, const float distance_threshold, + thrust::device_vector & keep_mask, cudaStream_t stream) +{ + const auto num_boxes3d = boxes3d.size(); + const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS); + thrust::device_vector mask_d(num_boxes3d * col_blocks); + + CHECK_CUDA_ERROR( + circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream)); + + // memcpy device to host + thrust::host_vector mask_h(mask_d.size()); + thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin()); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream)); + + // generate keep_mask + std::vector remv_h(col_blocks); + thrust::host_vector keep_mask_h(keep_mask.size()); + std::size_t num_to_keep = 0; + for (std::size_t i = 0; i < num_boxes3d; i++) { + auto nblock = i / THREADS_PER_BLOCK_NMS; + auto inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_h[nblock] & (1ULL << inblock))) { + keep_mask_h[i] = true; + num_to_keep++; + std::uint64_t * p = &mask_h[0] + i * col_blocks; + for (std::size_t j = nblock; j < col_blocks; j++) { + remv_h[j] |= p[j]; + } + } else { + keep_mask_h[i] = false; + } + } + + // memcpy host to device + keep_mask = keep_mask_h; + + return num_to_keep; +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/postprocess/non_maximum_suppression.cpp b/perception/lidar_transfusion/lib/postprocess/non_maximum_suppression.cpp new file mode 100644 index 0000000000000..c1179a3f6673d --- /dev/null +++ b/perception/lidar_transfusion/lib/postprocess/non_maximum_suppression.cpp @@ -0,0 +1,104 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/postprocess/non_maximum_suppression.hpp" + +#include +#include +#include + +namespace lidar_transfusion +{ + +void NonMaximumSuppression::setParameters(const NMSParams & params) +{ + assert(params.search_distance_2d_ >= 0.0); + assert(params.iou_threshold_ >= 0.0 && params.iou_threshold_ <= 1.0); + + params_ = params; + target_class_mask_ = classNamesToBooleanMask(params.target_class_names_); +} + +bool NonMaximumSuppression::isTargetLabel(const uint8_t label) +{ + if (label >= target_class_mask_.size()) { + return false; + } + return target_class_mask_.at(label); +} + +bool NonMaximumSuppression::isTargetPairObject( + const DetectedObject & object1, const DetectedObject & object2) +{ + const auto label1 = object_recognition_utils::getHighestProbLabel(object1.classification); + const auto label2 = object_recognition_utils::getHighestProbLabel(object2.classification); + + if (isTargetLabel(label1) && isTargetLabel(label2)) { + return true; + } + + const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_; + const auto sqr_dist_2d = tier4_autoware_utils::calcSquaredDistance2d( + object_recognition_utils::getPose(object1), object_recognition_utils::getPose(object2)); + return sqr_dist_2d <= search_sqr_dist_2d; +} + +Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix( + const std::vector & input_objects) +{ + // NOTE: row = target objects to be suppressed, col = source objects to be compared + Eigen::MatrixXd triangular_matrix = + Eigen::MatrixXd::Zero(input_objects.size(), input_objects.size()); + for (std::size_t target_i = 0; target_i < input_objects.size(); ++target_i) { + for (std::size_t source_i = 0; source_i < target_i; ++source_i) { + const auto & target_obj = input_objects.at(target_i); + const auto & source_obj = input_objects.at(source_i); + if (!isTargetPairObject(target_obj, source_obj)) { + continue; + } + + if (params_.nms_type_ == NMS_TYPE::IoU_BEV) { + const double iou = object_recognition_utils::get2dIoU(target_obj, source_obj); + triangular_matrix(target_i, source_i) = iou; + // NOTE: If the target object has any objects with iou > iou_threshold, it + // will be suppressed regardless of later results. + if (iou > params_.iou_threshold_) { + break; + } + } + } + } + + return triangular_matrix; +} + +std::vector NonMaximumSuppression::apply( + const std::vector & input_objects) +{ + Eigen::MatrixXd iou_matrix = generateIoUMatrix(input_objects); + + std::vector output_objects; + output_objects.reserve(input_objects.size()); + for (std::size_t i = 0; i < input_objects.size(); ++i) { + const auto value = iou_matrix.row(i).maxCoeff(); + if (params_.nms_type_ == NMS_TYPE::IoU_BEV) { + if (value <= params_.iou_threshold_) { + output_objects.emplace_back(input_objects.at(i)); + } + } + } + + return output_objects; +} +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/postprocess/postprocess_kernel.cu b/perception/lidar_transfusion/lib/postprocess/postprocess_kernel.cu new file mode 100644 index 0000000000000..bca23e9961b40 --- /dev/null +++ b/perception/lidar_transfusion/lib/postprocess/postprocess_kernel.cu @@ -0,0 +1,145 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/postprocess/circle_nms_kernel.hpp" +#include "lidar_transfusion/postprocess/postprocess_kernel.hpp" + +#include +#include +#include + +namespace lidar_transfusion +{ +const size_t THREADS_PER_BLOCK = 256; + +struct is_score_greater +{ + is_score_greater(float t) : t_(t) {} + + __device__ bool operator()(const Box3D & b) { return b.score > t_; } + +private: + float t_{0.0}; +}; + +struct is_kept +{ + __device__ bool operator()(const bool keep) { return keep; } +}; + +struct score_greater +{ + __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; } +}; + +__device__ inline float sigmoid(float x) +{ + return 1.0f / (1.0f + expf(-x)); +} + +__global__ void generateBoxes3D_kernel( + const float * __restrict__ cls_output, const float * __restrict__ box_output, + const float * __restrict__ dir_cls_output, const float voxel_size_x, const float voxel_size_y, + const float min_x_range, const float min_y_range, const int num_proposals, const int num_classes, + const int num_point_values, const float * __restrict__ yaw_norm_thresholds, + Box3D * __restrict__ det_boxes3d) +{ + int point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx >= num_proposals) { + return; + } + + int class_id = 0; + float max_score = cls_output[point_idx]; + +#pragma unroll + for (int i = 1; i < num_classes; i++) { + float score = cls_output[i * num_proposals + point_idx]; + if (score > max_score) { + max_score = score; + class_id = i; + } + } + + // yaw validation + const float yaw_sin = dir_cls_output[point_idx]; + const float yaw_cos = dir_cls_output[point_idx + num_proposals]; + const float yaw_norm = sqrtf(yaw_sin * yaw_sin + yaw_cos * yaw_cos); + + det_boxes3d[point_idx].label = class_id; + det_boxes3d[point_idx].score = yaw_norm >= yaw_norm_thresholds[class_id] ? max_score : 0.f; + det_boxes3d[point_idx].x = box_output[point_idx] * num_point_values * voxel_size_x + min_x_range; + det_boxes3d[point_idx].y = + box_output[point_idx + num_proposals] * num_point_values * voxel_size_y + min_y_range; + det_boxes3d[point_idx].z = box_output[point_idx + 2 * num_proposals]; + det_boxes3d[point_idx].width = expf(box_output[point_idx + 3 * num_proposals]); + det_boxes3d[point_idx].length = expf(box_output[point_idx + 4 * num_proposals]); + det_boxes3d[point_idx].height = expf(box_output[point_idx + 5 * num_proposals]); + det_boxes3d[point_idx].yaw = + atan2f(dir_cls_output[point_idx], dir_cls_output[point_idx + num_proposals]); +} + +PostprocessCuda::PostprocessCuda(const TransfusionConfig & config, cudaStream_t & stream) +: config_(config), stream_(stream) +{ + boxes3d_d_ = thrust::device_vector(config_.num_proposals_); + yaw_norm_thresholds_d_ = thrust::device_vector( + config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end()); +} + +// cspell: ignore divup +cudaError_t PostprocessCuda::generateDetectedBoxes3D_launch( + const float * cls_output, const float * box_output, const float * dir_cls_output, + std::vector & det_boxes3d, cudaStream_t stream) +{ + dim3 threads = {THREADS_PER_BLOCK}; + dim3 blocks = {divup(config_.num_proposals_, threads.x)}; + + generateBoxes3D_kernel<<>>( + cls_output, box_output, dir_cls_output, config_.voxel_x_size_, config_.voxel_y_size_, + config_.min_x_range_, config_.min_y_range_, config_.num_proposals_, config_.num_classes_, + config_.num_point_values_, thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()), + thrust::raw_pointer_cast(boxes3d_d_.data())); + + // suppress by score + const auto num_det_boxes3d = thrust::count_if( + thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), + is_score_greater(config_.score_threshold_)); + if (num_det_boxes3d == 0) { + return cudaGetLastError(); + } + thrust::device_vector det_boxes3d_d(num_det_boxes3d); + thrust::copy_if( + thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(), + is_score_greater(config_.score_threshold_)); + + // sort by score + thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater()); + + // supress by NMS + thrust::device_vector final_keep_mask_d(num_det_boxes3d); + const auto num_final_det_boxes3d = + circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream); + thrust::device_vector final_det_boxes3d_d(num_final_det_boxes3d); + thrust::copy_if( + thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(), + final_det_boxes3d_d.begin(), is_kept()); + + // memcpy device to host + det_boxes3d.resize(num_final_det_boxes3d); + thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin()); + return cudaGetLastError(); +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp b/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp new file mode 100644 index 0000000000000..774b3a05d71c1 --- /dev/null +++ b/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp @@ -0,0 +1,117 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/preprocess/pointcloud_densification.hpp" + +#include + +#include +#ifdef ROS_DISTRO_GALACTIC +#include +#else +#include +#endif + +#include + +#include +#include + +namespace +{ + +boost::optional getTransform( + const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id, + const std::string & source_frame_id, const rclcpp::Time & time) +{ + try { + geometry_msgs::msg::TransformStamped transform_stamped; + transform_stamped = tf_buffer.lookupTransform( + target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5)); + return transform_stamped.transform; + } catch (tf2::TransformException & ex) { + RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_transfusion"), ex.what()); + return boost::none; + } +} + +Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t) +{ + Eigen::Affine3f a; + a.matrix() = tf2::transformToEigen(t).matrix().cast(); + return a; +} + +} // namespace + +namespace lidar_transfusion +{ + +PointCloudDensification::PointCloudDensification( + const DensificationParam & param, cudaStream_t & stream) +: param_(param), stream_(stream) +{ +} + +bool PointCloudDensification::enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer) +{ + const auto header = pointcloud_msg.header; + + if (param_.pointcloud_cache_size() > 1) { + auto transform_world2current = + getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp); + if (!transform_world2current) { + return false; + } + auto affine_world2current = transformToEigen(transform_world2current.get()); + + enqueue(pointcloud_msg, affine_world2current); + } else { + enqueue(pointcloud_msg, Eigen::Affine3f::Identity()); + } + + dequeue(); + + return true; +} + +void PointCloudDensification::enqueue( + const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current) +{ + affine_world2current_ = affine_world2current; + current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds(); + + assert(sizeof(uint8_t) * msg.width * msg.height * msg.point_step % sizeof(float) == 1); + auto points_d = cuda::make_unique( + sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(float)); + + CHECK_CUDA_ERROR(cudaMemcpyAsync( + points_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step, + cudaMemcpyHostToDevice, stream_)); + + PointCloudWithTransform pointcloud = { + std::move(points_d), msg.header, msg.width * msg.height, affine_world2current.inverse()}; + + pointcloud_cache_.push_front(std::move(pointcloud)); +} + +void PointCloudDensification::dequeue() +{ + if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) { + pointcloud_cache_.pop_back(); + } +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu b/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu new file mode 100644 index 0000000000000..b8f9ae998fd4f --- /dev/null +++ b/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu @@ -0,0 +1,208 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/* + * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + * All rights reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lidar_transfusion/cuda_utils.hpp" +#include "lidar_transfusion/preprocess/preprocess_kernel.hpp" + +namespace lidar_transfusion +{ + +PreprocessCuda::PreprocessCuda(const TransfusionConfig & config, cudaStream_t & stream) +: stream_(stream), config_(config) +{ + mask_size_ = config_.grid_z_size_ * config_.grid_y_size_ * config_.grid_x_size_; + voxels_size_ = config_.grid_z_size_ * config_.grid_y_size_ * config_.grid_x_size_ * + config_.max_num_points_per_pillar_ * config_.num_point_feature_size_ + + 1; + mask_ = cuda::make_unique(mask_size_); + voxels_ = cuda::make_unique(voxels_size_); +} + +void PreprocessCuda::generateVoxels( + float * points, unsigned int points_size, unsigned int * pillar_num, float * voxel_features, + unsigned int * voxel_num, unsigned int * voxel_idxs) +{ + cuda::clear_async(mask_.get(), mask_size_, stream_); + cuda::clear_async(voxels_.get(), voxels_size_, stream_); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + CHECK_CUDA_ERROR(generateVoxels_random_launch(points, points_size, mask_.get(), voxels_.get())); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + CHECK_CUDA_ERROR(generateBaseFeatures_launch( + mask_.get(), voxels_.get(), pillar_num, voxel_features, voxel_num, voxel_idxs)); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); +} + +__global__ void generateVoxels_random_kernel( + float * points, unsigned int points_size, float min_x_range, float max_x_range, float min_y_range, + float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size, + float pillar_z_size, int grid_y_size, int grid_x_size, int points_per_voxel, unsigned int * mask, + float * voxels) +{ + int point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx >= points_size) return; + + float x = points[point_idx * 5]; + float y = points[point_idx * 5 + 1]; + float z = points[point_idx * 5 + 2]; + float i = points[point_idx * 5 + 3]; + float t = points[point_idx * 5 + 4]; + + if ( + x <= min_x_range || x >= max_x_range || y <= min_y_range || y >= max_y_range || + z <= min_z_range || z >= max_z_range) + return; + + int voxel_idx = floorf((x - min_x_range) / pillar_x_size); + int voxel_idy = floorf((y - min_y_range) / pillar_y_size); + unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx; + + unsigned int point_id = atomicAdd(&(mask[voxel_index]), 1); + + if (point_id >= points_per_voxel) return; + float * address = voxels + (voxel_index * points_per_voxel + point_id) * 5; + atomicExch(address + 0, x); + atomicExch(address + 1, y); + atomicExch(address + 2, z); + atomicExch(address + 3, i); + atomicExch(address + 4, t); +} + +cudaError_t PreprocessCuda::generateVoxels_random_launch( + float * points, unsigned int points_size, unsigned int * mask, float * voxels) +{ + int threadNum = config_.threads_for_voxel_; + dim3 blocks((points_size + threadNum - 1) / threadNum); + dim3 threads(threadNum); + generateVoxels_random_kernel<<>>( + points, points_size, config_.min_x_range_, config_.max_x_range_, config_.min_y_range_, + config_.max_y_range_, config_.min_z_range_, config_.max_z_range_, config_.voxel_x_size_, + config_.voxel_y_size_, config_.voxel_z_size_, config_.grid_y_size_, config_.grid_x_size_, + config_.points_per_voxel_, mask, voxels); + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void generateBaseFeatures_kernel( + unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, float points_per_voxel, + float max_voxels, unsigned int * pillar_num, float * voxel_features, unsigned int * voxel_num, + unsigned int * voxel_idxs) +{ + unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y; + + if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return; + + unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx; + unsigned int count = mask[voxel_index]; + if (!(count > 0)) return; + count = count < points_per_voxel ? count : points_per_voxel; + + unsigned int current_pillarId = 0; + current_pillarId = atomicAdd(pillar_num, 1); + if (current_pillarId >= max_voxels) return; + + voxel_num[current_pillarId] = count; + + uint4 idx = {0, 0, voxel_idy, voxel_idx}; + ((uint4 *)voxel_idxs)[current_pillarId] = idx; + + for (int i = 0; i < count; i++) { + int inIndex = voxel_index * points_per_voxel + i; + int outIndex = current_pillarId * points_per_voxel + i; + voxel_features[outIndex * 5] = voxels[inIndex * 5]; + voxel_features[outIndex * 5 + 1] = voxels[inIndex * 5 + 1]; + voxel_features[outIndex * 5 + 2] = voxels[inIndex * 5 + 2]; + voxel_features[outIndex * 5 + 3] = voxels[inIndex * 5 + 3]; + voxel_features[outIndex * 5 + 4] = voxels[inIndex * 5 + 4]; + } + + // clear buffer for next infer + atomicExch(mask + voxel_index, 0); +} + +// create 4 channels +cudaError_t PreprocessCuda::generateBaseFeatures_launch( + unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features, + unsigned int * voxel_num, unsigned int * voxel_idxs) +{ + dim3 threads = {32, 32}; + dim3 blocks = {divup(config_.grid_x_size_, threads.x), divup(config_.grid_y_size_, threads.y)}; + + generateBaseFeatures_kernel<<>>( + mask, voxels, config_.grid_y_size_, config_.grid_x_size_, config_.points_per_voxel_, + config_.max_voxels_, pillar_num, voxel_features, voxel_num, voxel_idxs); + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void generateSweepPoints_kernel( + const float * input_points, size_t points_size, int input_point_step, float time_lag, + const float * transform_array, int num_features, float * output_points) +{ + int point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx >= points_size) return; + + const float input_x = input_points[point_idx * input_point_step + 0]; + const float input_y = input_points[point_idx * input_point_step + 1]; + const float input_z = input_points[point_idx * input_point_step + 2]; + const float intensity = input_points[point_idx * input_point_step + 3]; + + output_points[point_idx * num_features] = transform_array[0] * input_x + + transform_array[4] * input_y + + transform_array[8] * input_z + transform_array[12]; + output_points[point_idx * num_features + 1] = transform_array[1] * input_x + + transform_array[5] * input_y + + transform_array[9] * input_z + transform_array[13]; + output_points[point_idx * num_features + 2] = transform_array[2] * input_x + + transform_array[6] * input_y + + transform_array[10] * input_z + transform_array[14]; + output_points[point_idx * num_features + 3] = intensity; + output_points[point_idx * num_features + 4] = time_lag; +} + +cudaError_t PreprocessCuda::generateSweepPoints_launch( + const float * input_points, size_t points_size, int input_point_step, float time_lag, + const float * transform_array, float * output_points) +{ + int threadNum = config_.threads_for_voxel_; + dim3 blocks((points_size + threadNum - 1) / threadNum); + dim3 threads(threadNum); + + generateSweepPoints_kernel<<>>( + input_points, points_size, input_point_step, time_lag, transform_array, + config_.num_point_feature_size_, output_points); + + cudaError_t err = cudaGetLastError(); + return err; +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp b/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp new file mode 100644 index 0000000000000..7072e8491af66 --- /dev/null +++ b/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp @@ -0,0 +1,142 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/preprocess/voxel_generator.hpp" + +#include "lidar_transfusion/preprocess/preprocess_kernel.hpp" + +#include + +#include + +namespace lidar_transfusion +{ + +std::ostream & operator<<(std::ostream & os, const CloudInfo & info) +{ + os << "x_offset: " << static_cast(info.x_offset) << std::endl; + os << "y_offset: " << static_cast(info.y_offset) << std::endl; + os << "z_offset: " << static_cast(info.z_offset) << std::endl; + os << "intensity_offset: " << static_cast(info.intensity_offset) << std::endl; + os << "x_datatype: " << static_cast(info.x_datatype) << std::endl; + os << "y_datatype: " << static_cast(info.y_datatype) << std::endl; + os << "z_datatype: " << static_cast(info.z_datatype) << std::endl; + os << "intensity_datatype: " << static_cast(info.intensity_datatype) << std::endl; + os << "x_count: " << static_cast(info.x_count) << std::endl; + os << "y_count: " << static_cast(info.y_count) << std::endl; + os << "z_count: " << static_cast(info.z_count) << std::endl; + os << "intensity_count: " << static_cast(info.intensity_count) << std::endl; + os << "point_step: " << static_cast(info.point_step) << std::endl; + os << "is_bigendian: " << static_cast(info.is_bigendian) << std::endl; + return os; +} + +VoxelGenerator::VoxelGenerator( + const DensificationParam & densification_param, const TransfusionConfig & config, + cudaStream_t & stream) +: config_(config), stream_(stream) +{ + pd_ptr_ = std::make_unique(densification_param, stream_); + pre_ptr_ = std::make_unique(config_, stream_); + cloud_data_d_ = cuda::make_unique(config_.cloud_capacity_ * MAX_CLOUD_STEP_SIZE); + affine_past2current_d_ = cuda::make_unique(AFF_MAT_SIZE); +} + +bool VoxelGenerator::enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer) +{ + return pd_ptr_->enqueuePointCloud(msg, tf_buffer); +} + +std::size_t VoxelGenerator::generateSweepPoints( + const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr & points_d) +{ + if (!is_initialized_) { + initCloudInfo(msg); + std::stringstream ss; + ss << "Input point cloud information: " << std::endl << cloud_info_; + RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str()); + + CloudInfo default_cloud_info; + if (cloud_info_ != default_cloud_info) { + ss << "Expected point cloud information: " << std::endl << default_cloud_info; + RCLCPP_ERROR_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str()); + throw std::runtime_error("Input point cloud has unsupported format"); + } + is_initialized_ = true; + } + + size_t point_counter{0}; + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter); + pc_cache_iter++) { + auto sweep_num_points = pc_cache_iter->num_points; + if (point_counter + sweep_num_points >= config_.cloud_capacity_) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), "Exceeding cloud capacity. Used " + << pd_ptr_->getIdx(pc_cache_iter) << " out of " + << pd_ptr_->getCacheSize() << " sweep(s)"); + break; + } + auto shift = point_counter * config_.num_point_feature_size_; + + auto affine_past2current = + pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world; + static_assert(std::is_same::value); + static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major."); + + float time_lag = static_cast( + pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds()); + + CHECK_CUDA_ERROR(cudaMemcpyAsync( + affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float), + cudaMemcpyHostToDevice, stream_)); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + pre_ptr_->generateSweepPoints_launch( + pc_cache_iter->points_d.get(), sweep_num_points, cloud_info_.point_step / sizeof(float), + time_lag, affine_past2current_d_.get(), points_d.get() + shift); + point_counter += sweep_num_points; + } + + return point_counter; +} + +void VoxelGenerator::initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg) +{ + std::tie(cloud_info_.x_offset, cloud_info_.x_datatype, cloud_info_.x_count) = + getFieldInfo(msg, "x"); + std::tie(cloud_info_.y_offset, cloud_info_.y_datatype, cloud_info_.y_count) = + getFieldInfo(msg, "y"); + std::tie(cloud_info_.z_offset, cloud_info_.z_datatype, cloud_info_.z_count) = + getFieldInfo(msg, "z"); + std::tie( + cloud_info_.intensity_offset, cloud_info_.intensity_datatype, cloud_info_.intensity_count) = + getFieldInfo(msg, "intensity"); + cloud_info_.point_step = msg.point_step; + cloud_info_.is_bigendian = msg.is_bigendian; +} + +std::tuple VoxelGenerator::getFieldInfo( + const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name) +{ + for (const auto & field : msg.fields) { + if (field.name == field_name) { + return std::make_tuple(field.offset, field.datatype, field.count); + } + } + throw std::runtime_error("Missing field: " + field_name); +} +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/ros_utils.cpp b/perception/lidar_transfusion/lib/ros_utils.cpp new file mode 100644 index 0000000000000..0ecb34faf732d --- /dev/null +++ b/perception/lidar_transfusion/lib/ros_utils.cpp @@ -0,0 +1,83 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/ros_utils.hpp" + +#include +#include +#include + +namespace lidar_transfusion +{ + +using Label = autoware_perception_msgs::msg::ObjectClassification; + +void box3DToDetectedObject( + const Box3D & box3d, const std::vector & class_names, + autoware_perception_msgs::msg::DetectedObject & obj) +{ + obj.existence_probability = box3d.score; + + // classification + autoware_perception_msgs::msg::ObjectClassification classification; + classification.probability = 1.0f; + if (box3d.label >= 0 && static_cast(box3d.label) < class_names.size()) { + classification.label = getSemanticType(class_names[box3d.label]); + } else { + classification.label = Label::UNKNOWN; + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), "Unexpected label: UNKNOWN is set."); + } + + if (object_recognition_utils::isCarLikeVehicle(classification.label)) { + obj.kinematics.orientation_availability = + autoware_perception_msgs::msg::DetectedObjectKinematics::SIGN_UNKNOWN; + } + + obj.classification.emplace_back(classification); + + // pose and shape + // mmdet3d yaw format to ros yaw format + float yaw = box3d.yaw + tier4_autoware_utils::pi / 2; + obj.kinematics.pose_with_covariance.pose.position = + tier4_autoware_utils::createPoint(box3d.x, box3d.y, box3d.z); + obj.kinematics.pose_with_covariance.pose.orientation = + tier4_autoware_utils::createQuaternionFromYaw(yaw); + obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX; + obj.shape.dimensions = + tier4_autoware_utils::createTranslation(box3d.length, box3d.width, box3d.height); +} + +uint8_t getSemanticType(const std::string & class_name) +{ + if (class_name == "CAR") { + return Label::CAR; + } else if (class_name == "TRUCK") { + return Label::TRUCK; + } else if (class_name == "BUS") { + return Label::BUS; + } else if (class_name == "TRAILER") { + return Label::TRAILER; + } else if (class_name == "MOTORCYCLE") { + return Label::MOTORCYCLE; + } else if (class_name == "BICYCLE") { + return Label::BICYCLE; + } else if (class_name == "PEDESTRIAN") { + return Label::PEDESTRIAN; + } else { // CONSTRUCTION_VEHICLE, BARRIER, TRAFFIC_CONE + return Label::UNKNOWN; + } +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/lib/transfusion_trt.cpp b/perception/lidar_transfusion/lib/transfusion_trt.cpp new file mode 100644 index 0000000000000..c876c0906162b --- /dev/null +++ b/perception/lidar_transfusion/lib/transfusion_trt.cpp @@ -0,0 +1,205 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/transfusion_trt.hpp" + +#include "lidar_transfusion/preprocess/preprocess_kernel.hpp" +#include "lidar_transfusion/transfusion_config.hpp" + +#include + +#include +#include +#include +#include + +namespace lidar_transfusion +{ + +TransfusionTRT::TransfusionTRT( + const NetworkParam & network_param, const DensificationParam & densification_param, + const TransfusionConfig & config) +: config_(config) +{ + network_trt_ptr_ = std::make_unique(config_); + + network_trt_ptr_->init( + network_param.onnx_path(), network_param.engine_path(), network_param.trt_precision()); + vg_ptr_ = std::make_unique(densification_param, config_, stream_); + stop_watch_ptr_ = std::make_unique>(); + stop_watch_ptr_->tic("processing/inner"); + initPtr(); + + CHECK_CUDA_ERROR(cudaStreamCreate(&stream_)); +} + +TransfusionTRT::~TransfusionTRT() +{ + if (stream_) { + cudaStreamSynchronize(stream_); + cudaStreamDestroy(stream_); + } +} + +void TransfusionTRT::initPtr() +{ + // point cloud to voxels + voxel_features_size_ = + config_.max_voxels_ * config_.max_num_points_per_pillar_ * config_.num_point_feature_size_; + voxel_num_size_ = config_.max_voxels_; + voxel_idxs_size_ = config_.max_voxels_ * config_.num_point_values_; + + // output of TRT -- input of post-process + cls_size_ = config_.num_proposals_ * config_.num_classes_; + box_size_ = config_.num_proposals_ * config_.num_box_values_; + dir_cls_size_ = config_.num_proposals_ * 2; // x, y + cls_output_d_ = cuda::make_unique(cls_size_); + box_output_d_ = cuda::make_unique(box_size_); + dir_cls_output_d_ = cuda::make_unique(dir_cls_size_); + + params_input_d_ = cuda::make_unique(); + voxel_features_d_ = cuda::make_unique(voxel_features_size_); + voxel_num_d_ = cuda::make_unique(voxel_num_size_); + voxel_idxs_d_ = cuda::make_unique(voxel_idxs_size_); + points_d_ = cuda::make_unique(config_.cloud_capacity_ * config_.num_point_feature_size_); + pre_ptr_ = std::make_unique(config_, stream_); + post_ptr_ = std::make_unique(config_, stream_); +} + +bool TransfusionTRT::detect( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer, + std::vector & det_boxes3d, std::unordered_map & proc_timing) +{ + stop_watch_ptr_->toc("processing/inner", true); + if (!preprocess(msg, tf_buffer)) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), "Fail to preprocess and skip to detect."); + return false; + } + proc_timing.emplace( + "debug/processing_time/preprocess_ms", stop_watch_ptr_->toc("processing/inner", true)); + + if (!inference()) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), "Fail to inference and skip to detect."); + return false; + } + proc_timing.emplace( + "debug/processing_time/inference_ms", stop_watch_ptr_->toc("processing/inner", true)); + + if (!postprocess(det_boxes3d)) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), "Fail to postprocess and skip to detect."); + return false; + } + proc_timing.emplace( + "debug/processing_time/postprocess_ms", stop_watch_ptr_->toc("processing/inner", true)); + + return true; +} + +bool TransfusionTRT::preprocess( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer) +{ + if (!vg_ptr_->enqueuePointCloud(msg, tf_buffer)) { + return false; + } + + cuda::clear_async(cls_output_d_.get(), cls_size_, stream_); + cuda::clear_async(box_output_d_.get(), box_size_, stream_); + cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_); + cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_); + cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_); + cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_); + cuda::clear_async(params_input_d_.get(), 1, stream_); + cuda::clear_async( + points_d_.get(), config_.cloud_capacity_ * config_.num_point_feature_size_, stream_); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + const auto count = vg_ptr_->generateSweepPoints(msg, points_d_); + RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), "Generated sweep points: " << count); + + pre_ptr_->generateVoxels( + points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(), + voxel_idxs_d_.get()); + unsigned int params_input; + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + CHECK_CUDA_ERROR(cudaMemcpyAsync( + ¶ms_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost, stream_)); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + if (params_input < config_.min_voxel_size_) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), + "Too few voxels (" << params_input << ") for actual optimization profile (" + << config_.min_voxel_size_ << ")"); + return false; + } + + if (params_input > config_.max_voxels_) { + params_input = config_.max_voxels_; + } + RCLCPP_DEBUG_STREAM( + rclcpp::get_logger("lidar_transfusion"), "Generated input voxels: " << params_input); + + network_trt_ptr_->context->setTensorAddress( + network_trt_ptr_->getTensorName(NetworkIO::voxels), voxel_features_d_.get()); + network_trt_ptr_->context->setInputShape( + network_trt_ptr_->getTensorName(NetworkIO::voxels), + nvinfer1::Dims3{ + static_cast(params_input), static_cast(config_.max_num_points_per_pillar_), + static_cast(config_.num_point_feature_size_)}); + network_trt_ptr_->context->setTensorAddress( + network_trt_ptr_->getTensorName(NetworkIO::num_points), voxel_num_d_.get()); + network_trt_ptr_->context->setInputShape( + network_trt_ptr_->getTensorName(NetworkIO::num_points), + nvinfer1::Dims{1, {static_cast(params_input)}}); + network_trt_ptr_->context->setTensorAddress( + network_trt_ptr_->getTensorName(NetworkIO::coors), voxel_idxs_d_.get()); + network_trt_ptr_->context->setInputShape( + network_trt_ptr_->getTensorName(NetworkIO::coors), + nvinfer1::Dims2{ + static_cast(params_input), static_cast(config_.num_point_values_)}); + network_trt_ptr_->context->setTensorAddress( + network_trt_ptr_->getTensorName(NetworkIO::cls_score), cls_output_d_.get()); + network_trt_ptr_->context->setTensorAddress( + network_trt_ptr_->getTensorName(NetworkIO::bbox_pred), box_output_d_.get()); + network_trt_ptr_->context->setTensorAddress( + network_trt_ptr_->getTensorName(NetworkIO::dir_pred), dir_cls_output_d_.get()); + return true; +} + +bool TransfusionTRT::inference() +{ + auto status = network_trt_ptr_->context->enqueueV3(stream_); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + if (!status) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), "Fail to enqueue and skip to detect."); + return false; + } + return true; +} + +bool TransfusionTRT::postprocess(std::vector & det_boxes3d) +{ + CHECK_CUDA_ERROR(post_ptr_->generateDetectedBoxes3D_launch( + cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_)); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + return true; +} + +} // namespace lidar_transfusion diff --git a/perception/lidar_transfusion/package.xml b/perception/lidar_transfusion/package.xml new file mode 100644 index 0000000000000..29643d0957eb1 --- /dev/null +++ b/perception/lidar_transfusion/package.xml @@ -0,0 +1,34 @@ + + + + lidar_transfusion + 1.0.0 + The lidar_transfusion package + Amadeusz Szymko + Kenzo Lobos-Tsunekawa + Satoshi Tanaka + Apache License 2.0 + + ament_cmake_auto + autoware_cmake + + autoware_perception_msgs + launch_ros + object_recognition_utils + pcl_ros + rclcpp + rclcpp_components + tensorrt_common + tf2_eigen + tf2_geometry_msgs + tf2_sensor_msgs + tier4_autoware_utils + + ament_cmake_ros + ament_lint_auto + autoware_lint_common + + + ament_cmake + + diff --git a/perception/lidar_transfusion/schema/detection_class_remapper.schema.json b/perception/lidar_transfusion/schema/detection_class_remapper.schema.json new file mode 100644 index 0000000000000..8aaefa295fcc0 --- /dev/null +++ b/perception/lidar_transfusion/schema/detection_class_remapper.schema.json @@ -0,0 +1,72 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for Detection Class Remapper", + "type": "object", + "definitions": { + "detection_class_remapper": { + "type": "object", + "properties": { + "allow_remapping_by_area_matrix": { + "type": "array", + "items": { + "type": "integer" + }, + "description": "Whether to allow remapping of classes. The order of 8x8 matrix classes comes from ObjectClassification msg.", + "default": [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + ], + "minItems": 64, + "maxItems": 64 + }, + "min_area_matrix": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Minimum area for specific class to consider class remapping.", + "default": [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.1, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "minItems": 64, + "maxItems": 64 + }, + "max_area_matrix": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Maximum area for specific class to consider class remapping.", + "default": [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 999.999, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 999.999, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "minItems": 64, + "maxItems": 64 + } + }, + "required": ["allow_remapping_by_area_matrix", "min_area_matrix", "max_area_matrix"], + "additionalProperties": false + } + }, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/detection_class_remapper" + } + }, + "required": ["ros__parameters"], + "additionalProperties": false + } + }, + "required": ["/**"], + "additionalProperties": false +} diff --git a/perception/lidar_transfusion/schema/transfusion.schema.json b/perception/lidar_transfusion/schema/transfusion.schema.json new file mode 100644 index 0000000000000..41d8d887236a8 --- /dev/null +++ b/perception/lidar_transfusion/schema/transfusion.schema.json @@ -0,0 +1,160 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for LiDAR TransFusion Node", + "type": "object", + "definitions": { + "transfusion": { + "type": "object", + "properties": { + "class_names": { + "type": "array", + "items": { + "type": "string" + }, + "description": "An array of class names will be predicted.", + "default": ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"], + "uniqueItems": true + }, + "trt_precision": { + "type": "string", + "description": "A precision of TensorRT engine.", + "default": "fp16", + "enum": ["fp16", "fp32"] + }, + "voxels_num": { + "type": "array", + "items": { + "type": "integer" + }, + "description": "A maximum number of voxels [min, opt, max].", + "default": [5000, 30000, 60000] + }, + "point_cloud_range": { + "type": "array", + "items": { + "type": "number" + }, + "description": "An array of distance ranges of each class.", + "default": [-76.8, -76.8, -3.0, 76.8, 76.8, 5.0], + "minItems": 6, + "maxItems": 6 + }, + "voxel_size": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Voxels size [x, y, z].", + "default": [0.3, 0.3, 8.0], + "minItems": 3, + "maxItems": 3 + }, + "onnx_path": { + "type": "string", + "description": "A path to ONNX model file.", + "default": "$(var model_path)/transfusion.onnx", + "pattern": "\\.onnx$" + }, + "engine_path": { + "type": "string", + "description": "A path to TensorRT engine file.", + "default": "$(var model_path)/transfusion.engine", + "pattern": "\\.engine$" + }, + "down_sample_factor": { + "type": "integer", + "description": "A scale factor of downsampling points", + "default": 1, + "minimum": 1 + }, + "densification_num_past_frames": { + "type": "integer", + "description": "A number of past frames to be considered as same input frame.", + "default": 1, + "minimum": 0 + }, + "densification_world_frame_id": { + "type": "string", + "description": "A name of frame id where world coordinates system is defined with respect to.", + "default": "map" + }, + "circle_nms_dist_threshold": { + "type": "number", + "description": "A distance threshold between detections in NMS.", + "default": 0.5, + "minimum": 0.0 + }, + "iou_nms_target_class_names": { + "type": "array", + "items": { + "type": "string" + }, + "description": "An array of class names to be target in NMS.", + "default": ["CAR"], + "uniqueItems": true + }, + "iou_nms_search_distance_2d": { + "type": "number", + "description": "A maximum distance value to search the nearest objects.", + "default": 10.0, + "minimum": 0.0 + }, + "iou_nms_threshold": { + "type": "number", + "description": "A threshold value of NMS using IoU score.", + "default": 0.1, + "minimum": 0.0, + "maximum": 1.0 + }, + "yaw_norm_thresholds": { + "type": "array", + "items": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + "description": "A thresholds array of direction vectors norm, all of objects with vector norm less than this threshold are ignored.", + "default": [0.3, 0.3, 0.3, 0.3, 0.0] + }, + "score_threshold": { + "type": "number", + "description": "A threshold value of confidence score, all of objects with score less than this threshold are ignored.", + "default": 0.2, + "minimum": 0.0 + } + }, + "required": [ + "class_names", + "trt_precision", + "voxels_num", + "point_cloud_range", + "voxel_size", + "onnx_path", + "engine_path", + "densification_num_past_frames", + "densification_world_frame_id", + "circle_nms_dist_threshold", + "iou_nms_target_class_names", + "iou_nms_search_distance_2d", + "iou_nms_threshold", + "yaw_norm_thresholds", + "score_threshold" + ], + "additionalProperties": false + } + }, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/transfusion" + } + }, + "required": ["ros__parameters"], + "additionalProperties": false + } + }, + "required": ["/**"], + "additionalProperties": false +} diff --git a/perception/lidar_transfusion/src/lidar_transfusion_node.cpp b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp new file mode 100644 index 0000000000000..9313015002973 --- /dev/null +++ b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp @@ -0,0 +1,178 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +//    http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lidar_transfusion/lidar_transfusion_node.hpp" + +#include "lidar_transfusion/utils.hpp" + +namespace lidar_transfusion +{ + +LidarTransfusionNode::LidarTransfusionNode(const rclcpp::NodeOptions & options) +: Node("lidar_transfusion", options), tf_buffer_(this->get_clock()) +{ + auto descriptor = rcl_interfaces::msg::ParameterDescriptor{}.set__read_only(true); + // network + class_names_ = this->declare_parameter>("class_names", descriptor); + const std::string trt_precision = + this->declare_parameter("trt_precision", descriptor); + const auto voxels_num = this->declare_parameter>("voxels_num", descriptor); + const auto point_cloud_range = + this->declare_parameter>("point_cloud_range", descriptor); + const auto voxel_size = this->declare_parameter>("voxel_size", descriptor); + const std::string onnx_path = this->declare_parameter("onnx_path", descriptor); + const std::string engine_path = this->declare_parameter("engine_path", descriptor); + + if (point_cloud_range.size() != 6) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), + "The size of point_cloud_range != 6: use the default parameters."); + } + if (voxel_size.size() != 3) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_transfusion"), + "The size of voxel_size != 3: use the default parameters."); + } + + // pre-process + const std::string densification_world_frame_id = + this->declare_parameter("densification_world_frame_id", descriptor); + const int densification_num_past_frames = + this->declare_parameter("densification_num_past_frames", descriptor); + + // post-process + const float circle_nms_dist_threshold = + static_cast(this->declare_parameter("circle_nms_dist_threshold", descriptor)); + { // IoU NMS + NMSParams p; + p.nms_type_ = NMS_TYPE::IoU_BEV; + p.target_class_names_ = + this->declare_parameter>("iou_nms_target_class_names", descriptor); + p.search_distance_2d_ = + this->declare_parameter("iou_nms_search_distance_2d", descriptor); + p.iou_threshold_ = this->declare_parameter("iou_nms_threshold", descriptor); + iou_bev_nms_.setParameters(p); + } + const auto yaw_norm_thresholds = + this->declare_parameter>("yaw_norm_thresholds", descriptor); + const float score_threshold = + static_cast(this->declare_parameter("score_threshold", descriptor)); + + NetworkParam network_param(onnx_path, engine_path, trt_precision); + DensificationParam densification_param( + densification_world_frame_id, densification_num_past_frames); + TransfusionConfig config( + voxels_num, point_cloud_range, voxel_size, circle_nms_dist_threshold, yaw_norm_thresholds, + score_threshold); + + const auto allow_remapping_by_area_matrix = + this->declare_parameter>("allow_remapping_by_area_matrix", descriptor); + const auto min_area_matrix = + this->declare_parameter>("min_area_matrix", descriptor); + const auto max_area_matrix = + this->declare_parameter>("max_area_matrix", descriptor); + detection_class_remapper_.setParameters( + allow_remapping_by_area_matrix, min_area_matrix, max_area_matrix); + + detector_ptr_ = std::make_unique(network_param, densification_param, config); + + cloud_sub_ = this->create_subscription( + "~/input/pointcloud", rclcpp::SensorDataQoS{}.keep_last(1), + std::bind(&LidarTransfusionNode::cloudCallback, this, std::placeholders::_1)); + + objects_pub_ = this->create_publisher( + "~/output/objects", rclcpp::QoS(1)); + + published_time_pub_ = std::make_unique(this); + + // initialize debug tool + { + using tier4_autoware_utils::DebugPublisher; + using tier4_autoware_utils::StopWatch; + stop_watch_ptr_ = std::make_unique>(); + debug_publisher_ptr_ = std::make_unique(this, this->get_name()); + stop_watch_ptr_->tic("cyclic"); + stop_watch_ptr_->tic("processing/total"); + } + + if (this->declare_parameter("build_only", false, descriptor)) { + RCLCPP_INFO(this->get_logger(), "TensorRT engine is built and shutdown node."); + rclcpp::shutdown(); + } +} + +void LidarTransfusionNode::cloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr msg) +{ + const auto objects_sub_count = + objects_pub_->get_subscription_count() + objects_pub_->get_intra_process_subscription_count(); + if (objects_sub_count < 1) { + return; + } + + if (stop_watch_ptr_) { + stop_watch_ptr_->toc("processing/total", true); + } + + std::vector det_boxes3d; + std::unordered_map proc_timing; + bool is_success = detector_ptr_->detect(*msg, tf_buffer_, det_boxes3d, proc_timing); + if (!is_success) { + return; + } + + std::vector raw_objects; + raw_objects.reserve(det_boxes3d.size()); + for (const auto & box3d : det_boxes3d) { + autoware_perception_msgs::msg::DetectedObject obj; + box3DToDetectedObject(box3d, class_names_, obj); + raw_objects.emplace_back(obj); + } + + autoware_perception_msgs::msg::DetectedObjects output_msg; + output_msg.header = msg->header; + output_msg.objects = iou_bev_nms_.apply(raw_objects); + + detection_class_remapper_.mapClasses(output_msg); + + if (objects_sub_count > 0) { + objects_pub_->publish(output_msg); + published_time_pub_->publish_if_subscribed(objects_pub_, output_msg.header.stamp); + } + + // add processing time for debug + if (debug_publisher_ptr_ && stop_watch_ptr_) { + const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic", true); + const double processing_time_ms = stop_watch_ptr_->toc("processing/total", true); + const double pipeline_latency_ms = + std::chrono::duration( + std::chrono::nanoseconds( + (this->get_clock()->now() - output_msg.header.stamp).nanoseconds())) + .count(); + debug_publisher_ptr_->publish( + "debug/cyclic_time_ms", cyclic_time_ms); + debug_publisher_ptr_->publish( + "debug/pipeline_latency_ms", pipeline_latency_ms); + debug_publisher_ptr_->publish( + "debug/processing_time/total_ms", processing_time_ms); + for (const auto & [topic, time_ms] : proc_timing) { + debug_publisher_ptr_->publish(topic, time_ms); + } + } +} + +} // namespace lidar_transfusion + +#include "rclcpp_components/register_node_macro.hpp" + +RCLCPP_COMPONENTS_REGISTER_NODE(lidar_transfusion::LidarTransfusionNode)