diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
index 9c3e815e8abb4..7e389f5790051 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
@@ -5,7 +5,7 @@
 
   <!-- Lidar parameters -->
   <arg name="input/pointcloud"/>
-  <arg name="lidar_detection_model" default="centerpoint" description="options: `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
+  <arg name="lidar_detection_model" default="centerpoint" description="options: `transfusion`, `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
   <arg name="use_object_filter" default="true" description="use object filter"/>
   <arg name="pointcloud_container_name" default="pointcloud_container"/>
 
diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml
index f941bc8d771b1..274b39be681b8 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml
@@ -2,16 +2,41 @@
 <launch>
   <!-- Lidar parameters -->
   <arg name="input/pointcloud"/>
-  <arg name="lidar_detection_model" default="centerpoint" description="options: `centerpoint`, `apollo`, `clustering`"/>
+  <arg name="lidar_detection_model" default="centerpoint" description="options: `transfusion`, `centerpoint`, `apollo`, `clustering`"/>
 
   <!-- Lidar detector centerpoint parameters -->
   <arg name="centerpoint_model_name" default="centerpoint_tiny" description="options: `centerpoint`, `centerpoint_tiny` or `centerpoint_sigma`"/>
   <arg name="centerpoint_model_path" default="$(var data_path)/lidar_centerpoint"/>
-  <arg name="lidar_model_param_path" default="$(find-pkg-share lidar_centerpoint)/config"/>
+
+  <!-- Lidar detector transfusion parameters -->
+  <arg name="transfusion_model_name" default="transfusion" description="options: `transfusion`"/>
+  <arg name="transfusion_model_path" default="$(var data_path)/lidar_transfusion"/>
+
+  <!-- TransFusion -->
+  <group if="$(eval &quot;'$(var lidar_detection_model)'=='transfusion'&quot;)">
+    <push-ros-namespace namespace="transfusion"/>
+    <arg name="lidar_model_param_path" default="$(find-pkg-share lidar_transfusion)/config"/>
+
+    <group>
+      <include file="$(find-pkg-share lidar_transfusion)/launch/lidar_transfusion.launch.xml">
+        <arg name="input/pointcloud" value="$(var input/pointcloud)"/>
+        <arg name="output/objects" value="objects"/>
+        <arg name="model_name" value="$(var transfusion_model_name)"/>
+        <arg name="model_path" value="$(var transfusion_model_path)"/>
+        <arg name="model_param_path" value="$(var lidar_model_param_path)/$(var transfusion_model_name).param.yaml"/>
+        <arg name="class_remapper_param_path" value="$(var lidar_model_param_path)/detection_class_remapper.param.yaml"/>
+
+        <arg name="use_pointcloud_container" value="true"/>
+        <arg name="pointcloud_container_name" value="$(var pointcloud_container_name)"/>
+      </include>
+    </group>
+  </group>
 
   <!-- CenterPoint -->
   <group if="$(eval &quot;'$(var lidar_detection_model)'=='centerpoint'&quot;)">
     <push-ros-namespace namespace="centerpoint"/>
+    <arg name="lidar_model_param_path" default="$(find-pkg-share lidar_centerpoint)/config"/>
+
     <group>
       <include file="$(find-pkg-share lidar_centerpoint)/launch/lidar_centerpoint.launch.xml">
         <arg name="input/pointcloud" value="$(var input/pointcloud)"/>
diff --git a/launch/tier4_perception_launch/launch/perception.launch.xml b/launch/tier4_perception_launch/launch/perception.launch.xml
index e1a2260f3ee6d..3ce78e3b29f29 100644
--- a/launch/tier4_perception_launch/launch/perception.launch.xml
+++ b/launch/tier4_perception_launch/launch/perception.launch.xml
@@ -47,7 +47,7 @@
   <arg name="input/pointcloud" default="/sensing/lidar/concatenated/pointcloud" description="The topic will be used in the detection module"/>
   <arg name="mode" default="camera_lidar_fusion" description="options: `camera_lidar_radar_fusion`, `camera_lidar_fusion`, `lidar_radar_fusion`, `lidar` or `radar`"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
-  <arg name="lidar_detection_model" default="centerpoint" description="options: `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
+  <arg name="lidar_detection_model" default="centerpoint" description="options: `transfusion`, `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
   <arg name="image_raw0" default="/sensing/camera/camera0/image_rect_color" description="image raw topic name"/>
   <arg name="camera_info0" default="/sensing/camera/camera0/camera_info" description="camera info topic name"/>
   <arg name="detection_rois0" default="/perception/object_recognition/detection/rois0" description="detection rois output topic name"/>
diff --git a/perception/lidar_transfusion/CMakeLists.txt b/perception/lidar_transfusion/CMakeLists.txt
new file mode 100644
index 0000000000000..c0e2d85f27e62
--- /dev/null
+++ b/perception/lidar_transfusion/CMakeLists.txt
@@ -0,0 +1,156 @@
+cmake_minimum_required(VERSION 3.14)
+project(lidar_transfusion)
+
+find_package(autoware_cmake REQUIRED)
+autoware_package()
+
+add_compile_options(-Wno-deprecated-declarations)
+
+option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF)
+
+# set flags for CUDA availability
+option(CUDA_AVAIL "CUDA available" OFF)
+find_package(CUDA)
+if(CUDA_FOUND)
+  find_library(CUBLAS_LIBRARIES cublas HINTS
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib64
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib
+  )
+  if(CUDA_VERBOSE)
+    message("CUDA is available!")
+    message("CUDA Libs: ${CUDA_LIBRARIES}")
+    message("CUDA Headers: ${CUDA_INCLUDE_DIRS}")
+  endif()
+  # Note: cublas_device was depreciated in CUDA version 9.2
+  #       https://forums.developer.nvidia.com/t/where-can-i-find-libcublas-device-so-or-libcublas-device-a/67251/4
+  #       In LibTorch, CUDA_cublas_device_LIBRARY is used.
+  unset(CUDA_cublas_device_LIBRARY CACHE)
+  set(CUDA_AVAIL ON)
+else()
+  message("CUDA NOT FOUND")
+  set(CUDA_AVAIL OFF)
+endif()
+
+# set flags for TensorRT availability
+option(TRT_AVAIL "TensorRT available" OFF)
+# try to find the tensorRT modules
+find_library(NVINFER nvinfer)
+find_library(NVONNXPARSER nvonnxparser)
+if(NVINFER AND NVONNXPARSER)
+  if(CUDA_VERBOSE)
+    message("TensorRT is available!")
+    message("NVINFER: ${NVINFER}")
+    message("NVONNXPARSER: ${NVONNXPARSER}")
+  endif()
+  set(TRT_AVAIL ON)
+else()
+  message("TensorRT is NOT Available")
+  set(TRT_AVAIL OFF)
+endif()
+
+# set flags for CUDNN availability
+option(CUDNN_AVAIL "CUDNN available" OFF)
+# try to find the CUDNN module
+find_library(CUDNN_LIBRARY
+NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+PATH_SUFFIXES lib lib64 bin
+DOC "CUDNN library."
+)
+if(CUDNN_LIBRARY)
+  if(CUDA_VERBOSE)
+    message(STATUS "CUDNN is available!")
+    message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
+  endif()
+  set(CUDNN_AVAIL ON)
+else()
+  message("CUDNN is NOT Available")
+  set(CUDNN_AVAIL OFF)
+endif()
+
+if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
+  find_package(ament_cmake_auto REQUIRED)
+  ament_auto_find_build_dependencies()
+
+  include_directories(
+    include
+    ${CUDA_INCLUDE_DIRS}
+  )
+
+  if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-g -G")
+    set(CUDA_NVCC_FLAGS "-g -G")
+  endif()
+
+  ament_auto_add_library(transfusion_lib SHARED
+    lib/detection_class_remapper.cpp
+    lib/network/network_trt.cpp
+    lib/postprocess/non_maximum_suppression.cpp
+    lib/preprocess/voxel_generator.cpp
+    lib/preprocess/pointcloud_densification.cpp
+    lib/ros_utils.cpp
+    lib/transfusion_trt.cpp
+  )
+
+  cuda_add_library(transfusion_cuda_lib SHARED
+    lib/postprocess/circle_nms_kernel.cu
+    lib/postprocess/postprocess_kernel.cu
+    lib/preprocess/preprocess_kernel.cu
+  )
+
+  target_link_libraries(transfusion_lib
+    ${NVINFER}
+    ${NVONNXPARSER}
+    ${CUDA_LIBRARIES}
+    ${CUBLAS_LIBRARIES}
+    ${CUDA_curand_LIBRARY}
+    ${CUDNN_LIBRARY}
+    transfusion_cuda_lib
+  )
+
+  target_include_directories(transfusion_lib
+    PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+      $<INSTALL_INTERFACE:include>
+  )
+
+  # To suppress unknown-pragmas error. The root-cause is CUB library in CUDA 11.6.
+  # This issue was fixed by https://github.com/NVIDIA/cub/commit/7d608bf1dc14553e2fb219eabeed80b76621b6fe
+  target_include_directories(transfusion_lib
+    SYSTEM PUBLIC
+    ${CUDA_INCLUDE_DIRS}
+  )
+
+  ament_auto_add_library(lidar_transfusion_component SHARED
+    src/lidar_transfusion_node.cpp
+  )
+
+  target_link_libraries(lidar_transfusion_component
+    transfusion_lib
+  )
+
+  rclcpp_components_register_node(lidar_transfusion_component
+    PLUGIN "lidar_transfusion::LidarTransfusionNode"
+    EXECUTABLE lidar_transfusion_node
+  )
+
+  if(BUILD_TESTING)
+    find_package(ament_lint_auto REQUIRED)
+    ament_lint_auto_find_test_dependencies()
+  endif()
+
+  ament_auto_package(
+    INSTALL_TO_SHARE
+      launch
+      config
+  )
+
+else()
+  find_package(ament_cmake_auto REQUIRED)
+  ament_auto_find_build_dependencies()
+
+  ament_auto_package(
+    INSTALL_TO_SHARE
+      launch
+  )
+endif()
diff --git a/perception/lidar_transfusion/README.md b/perception/lidar_transfusion/README.md
new file mode 100644
index 0000000000000..7974714720c32
--- /dev/null
+++ b/perception/lidar_transfusion/README.md
@@ -0,0 +1,113 @@
+# lidar_transfusion
+
+## Purpose
+
+The `lidar_transfusion` package is used for 3D object detection based on lidar data (x, y, z, intensity).
+
+## Inner-workings / Algorithms
+
+The implementation bases on TransFusion [1] work. It uses TensorRT library for data process and network inference.
+
+We trained the models using <https://github.com/open-mmlab/mmdetection3d>.
+
+## Inputs / Outputs
+
+### Input
+
+| Name                 | Type                            | Description       |
+| -------------------- | ------------------------------- | ----------------- |
+| `~/input/pointcloud` | `sensor_msgs::msg::PointCloud2` | Input pointcloud. |
+
+### Output
+
+| Name                                   | Type                                             | Description                 |
+| -------------------------------------- | ------------------------------------------------ | --------------------------- |
+| `~/output/objects`                     | `autoware_perception_msgs::msg::DetectedObjects` | Detected objects.           |
+| `debug/cyclic_time_ms`                 | `tier4_debug_msgs::msg::Float64Stamped`          | Cyclic time (ms).           |
+| `debug/pipeline_latency_ms`            | `tier4_debug_msgs::msg::Float64Stamped`          | Pipeline latency time (ms). |
+| `debug/processing_time/preprocess_ms`  | `tier4_debug_msgs::msg::Float64Stamped`          | Preprocess (ms).            |
+| `debug/processing_time/inference_ms`   | `tier4_debug_msgs::msg::Float64Stamped`          | Inference time (ms).        |
+| `debug/processing_time/postprocess_ms` | `tier4_debug_msgs::msg::Float64Stamped`          | Postprocess time (ms).      |
+| `debug/processing_time/total_ms`       | `tier4_debug_msgs::msg::Float64Stamped`          | Total processing time (ms). |
+
+## Parameters
+
+### TransFusion
+
+{{ json_to_markdown("perception/lidar_transfusion/schema/transfusion.schema.json") }}
+
+### Detection class remapper
+
+{{ json_to_markdown("perception/lidar_transfusion/schema/detection_class_remapper.schema.json") }}
+
+### The `build_only` option
+
+The `lidar_transfusion` node has `build_only` option to build the TensorRT engine file from the ONNX file.
+Although it is preferred to move all the ROS parameters in `.param.yaml` file in Autoware Universe, the `build_only` option is not moved to the `.param.yaml` file for now, because it may be used as a flag to execute the build as a pre-task. You can execute with the following command:
+
+```bash
+ros2 launch lidar_transfusion lidar_transfusion.launch.xml build_only:=true
+```
+
+### The `log_level` option
+
+The default logging severity level for `lidar_transfusion` is `info`. For debugging purposes, the developer may decrease severity level using `log_level` parameter:
+
+```bash
+ros2 launch lidar_transfusion lidar_transfusion.launch.xml log_level:=debug
+```
+
+## Assumptions / Known limits
+
+## Trained Models
+
+You can download the onnx format of trained models by clicking on the links below.
+
+- TransFusion: [transfusion.onnx](https://awf.ml.dev.web.auto/perception/models/transfusion/v1/transfusion.onnx)
+
+The model was trained in TIER IV's internal database (~11k lidar frames) for 20 epochs.
+
+### Changelog
+
+## (Optional) Error detection and handling
+
+<!-- Write how to detect errors and how to recover from them.
+
+Example:
+  This package can handle up to 20 obstacles. If more obstacles found, this node will give up and raise diagnostic errors.
+-->
+
+## (Optional) Performance characterization
+
+<!-- Write performance information like complexity. If it wouldn't be the bottleneck, not necessary.
+
+Example:
+  ### Complexity
+
+  This algorithm is O(N).
+
+  ### Processing time
+
+  ...
+-->
+
+## References/External links
+
+[1] Xuyang Bai, Zeyu Hu, Xinge Zhu, Qingqiu Huang, Yilun Chen, Hongbo Fu and Chiew-Lan Tai. "TransFusion: Robust LiDAR-Camera Fusion for 3D Object Detection with Transformers." arXiv preprint arXiv:2203.11496 (2022). <!-- cspell:disable-line -->
+
+[2] <https://github.com/wep21/CUDA-TransFusion>
+
+[3] <https://github.com/open-mmlab/mmdetection3d>
+
+[4] <https://github.com/open-mmlab/OpenPCDet>
+
+[5] <https://www.nuscenes.org/nuscenes>
+
+## (Optional) Future extensions / Unimplemented parts
+
+<!-- Write future extensions of this package.
+
+Example:
+  Currently, this package can't handle the chattering obstacles well. We plan to add some probabilistic filters in the perception layer to improve it.
+  Also, there are some parameters that should be global(e.g. vehicle size, max steering, etc.). These will be refactored and defined as global parameters so that we can share the same parameters between different nodes.
+-->
diff --git a/perception/lidar_transfusion/config/detection_class_remapper.param.yaml b/perception/lidar_transfusion/config/detection_class_remapper.param.yaml
new file mode 100644
index 0000000000000..baea087c96bca
--- /dev/null
+++ b/perception/lidar_transfusion/config/detection_class_remapper.param.yaml
@@ -0,0 +1,38 @@
+/**:
+  ros__parameters:
+    allow_remapping_by_area_matrix:
+      # NOTE(kl): We turn all vehicles into trailers if they go over 3x12 [m^2].
+      # NOTE(kl): We turn cars into trucks if they have an area between 2.2 x 5.5 and 3.0 * 12.0 [m^2]
+      # row: original class. column: class to remap to
+      #UNKNOWN, CAR, TRUCK, BUS,  TRAILER, MOTORBIKE, BICYCLE,PEDESTRIAN
+      [0,       0,   0,     0,    0,       0,         0,      0,         #UNKNOWN
+       0,       0,   1,     0,    1,       0,         0,      0,         #CAR
+       0,       0,   0,     0,    1,       0,         0,      0,         #TRUCK
+       0,       0,   0,     0,    1,       0,         0,      0,         #BUS
+       0,       0,   0,     0,    0,       0,         0,      0,         #TRAILER
+       0,       0,   0,     0,    0,       0,         0,      0,         #MOTORBIKE
+       0,       0,   0,     0,    0,       0,         0,      0,         #BICYCLE
+       0,       0,   0,     0,    0,       0,         0,      0]         #PEDESTRIAN
+
+    min_area_matrix:
+      #UNKNOWN, CAR,   TRUCK,   BUS,   TRAILER,  MOTORBIKE, BICYCLE, PEDESTRIAN
+      [ 0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #UNKNOWN
+        0.000,  0.000, 12.100,  0.000, 36.000,   0.000,     0.000,   0.000,      #CAR
+        0.000,  0.000,  0.000,  0.000, 36.000,   0.000,     0.000,   0.000,      #TRUCK
+        0.000,  0.000,  0.000,  0.000, 36.000,   0.000,     0.000,   0.000,      #BUS
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #TRAILER
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #MOTORBIKE
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #BICYCLE
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000]      #PEDESTRIAN
+
+
+    max_area_matrix:
+      #UNKNOWN, CAR,   TRUCK,  BUS,    TRAILER,   MOTORBIKE, BICYCLE, PEDESTRIAN
+      [ 0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #UNKNOWN
+        0.000,  0.000, 36.000,  0.000, 999.999,   0.000,     0.000,   0.000,      #CAR
+        0.000,  0.000,  0.000,  0.000, 999.999,   0.000,     0.000,   0.000,      #TRUCK
+        0.000,  0.000,  0.000,  0.000, 999.999,   0.000,     0.000,   0.000,      #BUS
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #TRAILER
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #MOTORBIKE
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #BICYCLE
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000]      #PEDESTRIAN
diff --git a/perception/lidar_transfusion/config/transfusion.param.yaml b/perception/lidar_transfusion/config/transfusion.param.yaml
new file mode 100644
index 0000000000000..feabe359caf1f
--- /dev/null
+++ b/perception/lidar_transfusion/config/transfusion.param.yaml
@@ -0,0 +1,20 @@
+/**:
+  ros__parameters:
+    # network
+    class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"]
+    trt_precision: fp16
+    voxels_num: [5000, 30000, 60000]  # [min, opt, max]
+    point_cloud_range: [-76.8, -76.8, -3.0, 76.8, 76.8, 5.0]  # [x_min, y_min, z_min, x_max, y_max, z_max]
+    voxel_size: [0.3, 0.3, 8.0]  # [x, y, z]
+    onnx_path: "$(var model_path)/transfusion.onnx"
+    engine_path: "$(var model_path)/transfusion.engine"
+    # pre-process params
+    densification_num_past_frames: 1
+    densification_world_frame_id: map
+    # post-process params
+    circle_nms_dist_threshold: 0.5
+    iou_nms_target_class_names: ["CAR"]
+    iou_nms_search_distance_2d: 10.0
+    iou_nms_threshold: 0.1
+    yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]  # refers to the class_names
+    score_threshold: 0.2
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp
new file mode 100644
index 0000000000000..5c25a936d5392
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/cuda_utils.hpp
@@ -0,0 +1,126 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This code is licensed under CC0 1.0 Universal (Public Domain).
+ * You can use this without any limitation.
+ * https://creativecommons.org/publicdomain/zero/1.0/deed.en
+ */
+
+#ifndef LIDAR_TRANSFUSION__CUDA_UTILS_HPP_
+#define LIDAR_TRANSFUSION__CUDA_UTILS_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <type_traits>
+
+#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__))
+
+namespace cuda
+{
+inline void check_error(const ::cudaError_t e, const char * f, int n)
+{
+  if (e != ::cudaSuccess) {
+    ::std::stringstream s;
+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
+      << ::cudaGetErrorString(e);
+    throw ::std::runtime_error{s.str()};
+  }
+}
+
+struct deleter
+{
+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
+};
+
+template <typename T>
+using unique_ptr = ::std::unique_ptr<T, deleter>;
+
+template <typename T>
+typename ::std::enable_if<::std::is_array<T>::value, cuda::unique_ptr<T>>::type make_unique(
+  const ::std::size_t n)
+{
+  using U = typename ::std::remove_extent<T>::type;
+  U * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
+  return cuda::unique_ptr<T>{p};
+}
+
+template <typename T>
+cuda::unique_ptr<T> make_unique()
+{
+  T * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
+  return cuda::unique_ptr<T>{p};
+}
+
+constexpr size_t CUDA_ALIGN = 256;
+
+template <typename T>
+inline size_t get_size_aligned(size_t num_elem)
+{
+  size_t size = num_elem * sizeof(T);
+  size_t extra_align = 0;
+  if (size % CUDA_ALIGN != 0) {
+    extra_align = CUDA_ALIGN - size % CUDA_ALIGN;
+  }
+  return size + extra_align;
+}
+
+template <typename T>
+inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size)
+{
+  size_t size = get_size_aligned<T>(num_elem);
+  if (size > workspace_size) {
+    throw ::std::runtime_error("Workspace is too small!");
+  }
+  workspace_size -= size;
+  T * ptr = reinterpret_cast<T *>(workspace);
+  workspace = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(workspace) + size);
+  return ptr;
+}
+
+template <typename T>
+void clear_async(T * ptr, size_t num_elem, cudaStream_t stream)
+{
+  CHECK_CUDA_ERROR(::cudaMemsetAsync(ptr, 0, sizeof(T) * num_elem, stream));
+}
+
+}  // namespace cuda
+
+#endif  // LIDAR_TRANSFUSION__CUDA_UTILS_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/detection_class_remapper.hpp b/perception/lidar_transfusion/include/lidar_transfusion/detection_class_remapper.hpp
new file mode 100644
index 0000000000000..ace7dc5ff3405
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/detection_class_remapper.hpp
@@ -0,0 +1,47 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__DETECTION_CLASS_REMAPPER_HPP_
+#define LIDAR_TRANSFUSION__DETECTION_CLASS_REMAPPER_HPP_
+
+#include <Eigen/Core>
+
+#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_perception_msgs/msg/object_classification.hpp>
+#include <autoware_perception_msgs/msg/shape.hpp>
+
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+class DetectionClassRemapper
+{
+public:
+  void setParameters(
+    const std::vector<int64_t> & allow_remapping_by_area_matrix,
+    const std::vector<double> & min_area_matrix, const std::vector<double> & max_area_matrix);
+  void mapClasses(autoware_perception_msgs::msg::DetectedObjects & msg);
+
+protected:
+  Eigen::Matrix<bool, Eigen::Dynamic, Eigen::Dynamic> allow_remapping_by_area_matrix_;
+  Eigen::MatrixXd min_area_matrix_;
+  Eigen::MatrixXd max_area_matrix_;
+  int num_labels_;
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__DETECTION_CLASS_REMAPPER_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/lidar_transfusion_node.hpp b/perception/lidar_transfusion/include/lidar_transfusion/lidar_transfusion_node.hpp
new file mode 100644
index 0000000000000..86303618d7877
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/lidar_transfusion_node.hpp
@@ -0,0 +1,73 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__LIDAR_TRANSFUSION_NODE_HPP_
+#define LIDAR_TRANSFUSION__LIDAR_TRANSFUSION_NODE_HPP_
+
+#include "lidar_transfusion/detection_class_remapper.hpp"
+#include "lidar_transfusion/postprocess/non_maximum_suppression.hpp"
+#include "lidar_transfusion/preprocess/pointcloud_densification.hpp"
+#include "lidar_transfusion/transfusion_trt.hpp"
+#include "lidar_transfusion/visibility_control.hpp"
+
+#include <rclcpp/rclcpp.hpp>
+#include <tier4_autoware_utils/ros/debug_publisher.hpp>
+#include <tier4_autoware_utils/ros/published_time_publisher.hpp>
+#include <tier4_autoware_utils/system/stop_watch.hpp>
+
+#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_perception_msgs/msg/object_classification.hpp>
+#include <autoware_perception_msgs/msg/shape.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+class LIDAR_TRANSFUSION_PUBLIC LidarTransfusionNode : public rclcpp::Node
+{
+public:
+  explicit LidarTransfusionNode(const rclcpp::NodeOptions & options);
+
+private:
+  void cloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr msg);
+
+  rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::ConstSharedPtr cloud_sub_{nullptr};
+  rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr objects_pub_{
+    nullptr};
+
+  tf2_ros::Buffer tf_buffer_;
+  tf2_ros::TransformListener tf_listener_{tf_buffer_};
+
+  DetectionClassRemapper detection_class_remapper_;
+  float score_threshold_{0.0};
+  std::vector<std::string> class_names_;
+
+  NonMaximumSuppression iou_bev_nms_;
+
+  std::unique_ptr<TransfusionTRT> detector_ptr_{nullptr};
+
+  // debugger
+  std::unique_ptr<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{
+    nullptr};
+  std::unique_ptr<tier4_autoware_utils::DebugPublisher> debug_publisher_ptr_{nullptr};
+  std::unique_ptr<tier4_autoware_utils::PublishedTimePublisher> published_time_pub_{nullptr};
+};
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__LIDAR_TRANSFUSION_NODE_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/network/network_trt.hpp b/perception/lidar_transfusion/include/lidar_transfusion/network/network_trt.hpp
new file mode 100644
index 0000000000000..3f8f32e346b0a
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/network/network_trt.hpp
@@ -0,0 +1,87 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_
+#define LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_
+
+#include "lidar_transfusion/transfusion_config.hpp"
+#include "lidar_transfusion/utils.hpp"
+
+#include <tensorrt_common/tensorrt_common.hpp>
+
+#include <NvInfer.h>
+
+#include <array>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+struct ProfileDimension
+{
+  nvinfer1::Dims min;
+  nvinfer1::Dims opt;
+  nvinfer1::Dims max;
+
+  bool operator!=(const ProfileDimension & rhs) const
+  {
+    return min.nbDims != rhs.min.nbDims || opt.nbDims != rhs.opt.nbDims ||
+           max.nbDims != rhs.max.nbDims || !std::equal(min.d, min.d + min.nbDims, rhs.min.d) ||
+           !std::equal(opt.d, opt.d + opt.nbDims, rhs.opt.d) ||
+           !std::equal(max.d, max.d + max.nbDims, rhs.max.d);
+  }
+};
+
+class NetworkTRT
+{
+public:
+  explicit NetworkTRT(const TransfusionConfig & config);
+  ~NetworkTRT();
+
+  bool init(
+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision);
+  const char * getTensorName(NetworkIO name);
+
+  tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
+  tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
+
+private:
+  bool parseONNX(
+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
+    size_t workspace_size = (1ULL << 30));
+  bool saveEngine(const std::string & engine_path);
+  bool loadEngine(const std::string & engine_path);
+  bool createContext();
+  bool setProfile(
+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+    nvinfer1::IBuilderConfig & config);
+  bool validateNetworkIO();
+  nvinfer1::Dims validateTensorShape(NetworkIO name, const std::vector<int> shape);
+
+  tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime> runtime_{nullptr};
+  tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory> plan_{nullptr};
+  tensorrt_common::Logger logger_;
+  TransfusionConfig config_;
+  std::vector<const char *> tensors_names_;
+
+  std::array<ProfileDimension, 3> in_profile_dims_;
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/postprocess/circle_nms_kernel.hpp b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/circle_nms_kernel.hpp
new file mode 100644
index 0000000000000..e231482652b98
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/circle_nms_kernel.hpp
@@ -0,0 +1,32 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
+#define LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
+
+#include "lidar_transfusion/utils.hpp"
+
+#include <thrust/device_vector.h>
+
+namespace lidar_transfusion
+{
+// Non-maximum suppression (NMS) uses the distance on the xy plane instead of
+// intersection over union (IoU) to suppress overlapped objects.
+std::size_t circleNMS(
+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream);
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/postprocess/non_maximum_suppression.hpp b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/non_maximum_suppression.hpp
new file mode 100644
index 0000000000000..daf2cc7c1fac1
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/non_maximum_suppression.hpp
@@ -0,0 +1,80 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
+#define LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
+
+#include "lidar_transfusion/ros_utils.hpp"
+
+#include <Eigen/Eigen>
+
+#include <autoware_perception_msgs/msg/detected_object.hpp>
+
+#include <string>
+#include <vector>
+
+namespace lidar_transfusion
+{
+using autoware_perception_msgs::msg::DetectedObject;
+
+enum class NMS_TYPE {
+  IoU_BEV
+  // IoU_3D
+  // Distance_2D
+  // Distance_3D
+};
+
+struct NMSParams
+{
+  NMS_TYPE nms_type_{};
+  std::vector<std::string> target_class_names_{};
+  double search_distance_2d_{};
+  double iou_threshold_{};
+  // double distance_threshold_{};
+};
+
+std::vector<bool> classNamesToBooleanMask(const std::vector<std::string> & class_names)
+{
+  std::vector<bool> mask;
+  constexpr std::size_t num_object_classification = 8;
+  mask.resize(num_object_classification);
+  for (const auto & class_name : class_names) {
+    const auto semantic_type = getSemanticType(class_name);
+    mask.at(semantic_type) = true;
+  }
+
+  return mask;
+}
+
+class NonMaximumSuppression
+{
+public:
+  void setParameters(const NMSParams &);
+
+  std::vector<DetectedObject> apply(const std::vector<DetectedObject> &);
+
+private:
+  bool isTargetLabel(const std::uint8_t);
+
+  bool isTargetPairObject(const DetectedObject &, const DetectedObject &);
+
+  Eigen::MatrixXd generateIoUMatrix(const std::vector<DetectedObject> &);
+
+  NMSParams params_{};
+  std::vector<bool> target_class_mask_{};
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/postprocess/postprocess_kernel.hpp b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/postprocess_kernel.hpp
new file mode 100644
index 0000000000000..01435424aa248
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/postprocess/postprocess_kernel.hpp
@@ -0,0 +1,50 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
+#define LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
+
+#include "lidar_transfusion/transfusion_config.hpp"
+#include "lidar_transfusion/utils.hpp"
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <thrust/device_vector.h>
+
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+class PostprocessCuda
+{
+public:
+  explicit PostprocessCuda(const TransfusionConfig & config, cudaStream_t & stream);
+
+  cudaError_t generateDetectedBoxes3D_launch(
+    const float * cls_output, const float * box_output, const float * dir_cls_output,
+    std::vector<Box3D> & det_boxes3d, cudaStream_t stream);
+
+private:
+  TransfusionConfig config_;
+  cudaStream_t stream_;
+  cudaStream_t stream_event_;
+  cudaEvent_t start_, stop_;
+  thrust::device_vector<Box3D> boxes3d_d_;
+  thrust::device_vector<float> yaw_norm_thresholds_d_;
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp
new file mode 100644
index 0000000000000..25095f4dc9c0b
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp
@@ -0,0 +1,101 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
+#define LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
+
+#include "lidar_transfusion/cuda_utils.hpp"
+
+#include <tf2_ros/buffer.h>
+#include <tf2_ros/transform_listener.h>
+#ifdef ROS_DISTRO_GALACTIC
+#include <tf2_sensor_msgs/tf2_sensor_msgs.h>
+#else
+#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
+#endif
+
+#include <list>
+#include <string>
+#include <utility>
+
+namespace lidar_transfusion
+{
+
+class DensificationParam
+{
+public:
+  DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames)
+  : world_frame_id_(std::move(world_frame_id)),
+    pointcloud_cache_size_(num_past_frames + /*current frame*/ 1)
+  {
+  }
+
+  std::string world_frame_id() const { return world_frame_id_; }
+  unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; }
+
+private:
+  std::string world_frame_id_;
+  unsigned int pointcloud_cache_size_{1};
+};
+
+struct PointCloudWithTransform
+{
+  cuda::unique_ptr<float[]> points_d{nullptr};
+  std_msgs::msg::Header header;
+  size_t num_points{0};
+  Eigen::Affine3f affine_past2world;
+};
+
+class PointCloudDensification
+{
+public:
+  explicit PointCloudDensification(const DensificationParam & param, cudaStream_t & stream);
+
+  bool enqueuePointCloud(
+    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
+
+  double getCurrentTimestamp() const { return current_timestamp_; }
+  Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
+  std::list<PointCloudWithTransform>::iterator getPointCloudCacheIter()
+  {
+    return pointcloud_cache_.begin();
+  }
+  bool isCacheEnd(std::list<PointCloudWithTransform>::iterator iter)
+  {
+    return iter == pointcloud_cache_.end();
+  }
+  size_t getIdx(std::list<PointCloudWithTransform>::iterator iter)
+  {
+    return std::distance(pointcloud_cache_.begin(), iter);
+  }
+  size_t getCacheSize()
+  {
+    return std::distance(pointcloud_cache_.begin(), pointcloud_cache_.end());
+  }
+  unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); }
+
+private:
+  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
+  void dequeue();
+
+  DensificationParam param_;
+  double current_timestamp_{0.0};
+  Eigen::Affine3f affine_world2current_;
+  std::list<PointCloudWithTransform> pointcloud_cache_;
+  cudaStream_t stream_;
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp
new file mode 100644
index 0000000000000..c571990d84b51
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp
@@ -0,0 +1,77 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+ * All rights reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
+#define LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
+
+#include "lidar_transfusion/cuda_utils.hpp"
+#include "lidar_transfusion/transfusion_config.hpp"
+#include "lidar_transfusion/utils.hpp"
+
+#include <cuda_runtime_api.h>
+
+namespace lidar_transfusion
+{
+
+class PreprocessCuda
+{
+public:
+  PreprocessCuda(const TransfusionConfig & config, cudaStream_t & stream);
+
+  void generateVoxels(
+    float * points, unsigned int points_size, unsigned int * pillar_num, float * voxel_features,
+    unsigned int * voxel_num, unsigned int * voxel_idxs);
+
+  cudaError_t generateVoxels_random_launch(
+    float * points, unsigned int points_size, unsigned int * mask, float * voxels);
+
+  cudaError_t generateBaseFeatures_launch(
+    unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features,
+    unsigned int * voxel_num, unsigned int * voxel_idxs);
+
+  // cudaError_t generateVoxelsInput_launch(
+  //   uint8_t * cloud_data, CloudInfo & cloud_info, unsigned int points_agg, unsigned int
+  //   points_size, float time_lag, float * affine_transform, float * points);
+
+  cudaError_t generateSweepPoints_launch(
+    const float * input_points, size_t points_size, int input_point_step, float time_lag,
+    const float * transform, float * output_points);
+
+private:
+  TransfusionConfig config_;
+  cudaStream_t stream_;
+  cuda::unique_ptr<unsigned int[]> mask_{nullptr};
+  cuda::unique_ptr<float[]> voxels_{nullptr};
+  unsigned int mask_size_;
+  unsigned int voxels_size_;
+};
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp
new file mode 100644
index 0000000000000..3e3660e238473
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp
@@ -0,0 +1,70 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
+#define LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
+
+#include "lidar_transfusion/cuda_utils.hpp"
+#include "lidar_transfusion/preprocess/pointcloud_densification.hpp"
+#include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
+#include "lidar_transfusion/transfusion_config.hpp"
+#include "lidar_transfusion/utils.hpp"
+
+#ifdef ROS_DISTRO_GALACTIC
+#include <tf2_eigen/tf2_eigen.h>
+#else
+#include <tf2_eigen/tf2_eigen.hpp>
+#endif
+
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+namespace lidar_transfusion
+{
+constexpr size_t AFF_MAT_SIZE = 16;         // 4x4 matrix
+constexpr size_t MAX_CLOUD_STEP_SIZE = 35;  // PointXYZIRCADT
+
+class VoxelGenerator
+{
+public:
+  explicit VoxelGenerator(
+    const DensificationParam & densification_param, const TransfusionConfig & config,
+    cudaStream_t & stream);
+  std::size_t generateSweepPoints(
+    const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d);
+  bool enqueuePointCloud(
+    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
+  void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg);
+  std::tuple<const uint32_t, const uint8_t, const uint8_t> getFieldInfo(
+    const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name);
+
+private:
+  std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
+  std::unique_ptr<PreprocessCuda> pre_ptr_{nullptr};
+  TransfusionConfig config_;
+  cuda::unique_ptr<unsigned char[]> cloud_data_d_{nullptr};
+  cuda::unique_ptr<float[]> affine_past2current_d_{nullptr};
+  std::vector<float> points_;
+  cudaStream_t stream_;
+  CloudInfo cloud_info_;
+  bool is_initialized_{false};
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp
new file mode 100644
index 0000000000000..f013a02404a29
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp
@@ -0,0 +1,39 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__ROS_UTILS_HPP_
+#define LIDAR_TRANSFUSION__ROS_UTILS_HPP_
+
+#include "lidar_transfusion/utils.hpp"
+
+#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_perception_msgs/msg/object_classification.hpp>
+#include <autoware_perception_msgs/msg/shape.hpp>
+
+#include <string>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+void box3DToDetectedObject(
+  const Box3D & box3d, const std::vector<std::string> & class_names,
+  autoware_perception_msgs::msg::DetectedObject & obj);
+
+uint8_t getSemanticType(const std::string & class_name);
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__ROS_UTILS_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp
new file mode 100644
index 0000000000000..31976de56a9da
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp
@@ -0,0 +1,161 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_
+#define LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_
+
+#include <cstdint>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+class TransfusionConfig
+{
+public:
+  TransfusionConfig(
+    const std::vector<int64_t> & voxels_num, const std::vector<double> & point_cloud_range,
+    const std::vector<double> & voxel_size, const float circle_nms_dist_threshold,
+    const std::vector<double> & yaw_norm_thresholds, const float score_threshold)
+  {
+    if (voxels_num.size() == 3) {
+      max_voxels_ = voxels_num[2];
+
+      voxels_num_[0] = voxels_num[0];
+      voxels_num_[1] = voxels_num[1];
+      voxels_num_[2] = voxels_num[2];
+
+      min_voxel_size_ = voxels_num[0];
+      opt_voxel_size_ = voxels_num[1];
+      max_voxel_size_ = voxels_num[2];
+
+      min_points_size_ = voxels_num[0];
+      opt_points_size_ = voxels_num[1];
+      max_points_size_ = voxels_num[2];
+
+      min_coors_size_ = voxels_num[0];
+      opt_coors_size_ = voxels_num[1];
+      max_coors_size_ = voxels_num[2];
+    }
+    if (point_cloud_range.size() == 6) {
+      min_x_range_ = static_cast<float>(point_cloud_range[0]);
+      min_y_range_ = static_cast<float>(point_cloud_range[1]);
+      min_z_range_ = static_cast<float>(point_cloud_range[2]);
+      max_x_range_ = static_cast<float>(point_cloud_range[3]);
+      max_y_range_ = static_cast<float>(point_cloud_range[4]);
+      max_z_range_ = static_cast<float>(point_cloud_range[5]);
+    }
+    if (voxel_size.size() == 3) {
+      voxel_x_size_ = static_cast<float>(voxel_size[0]);
+      voxel_y_size_ = static_cast<float>(voxel_size[1]);
+      voxel_z_size_ = static_cast<float>(voxel_size[2]);
+    }
+    if (score_threshold > 0.0) {
+      score_threshold_ = score_threshold;
+    }
+    if (circle_nms_dist_threshold > 0.0) {
+      circle_nms_dist_threshold_ = circle_nms_dist_threshold;
+    }
+    yaw_norm_thresholds_ =
+      std::vector<float>(yaw_norm_thresholds.begin(), yaw_norm_thresholds.end());
+    for (auto & yaw_norm_threshold : yaw_norm_thresholds_) {
+      yaw_norm_threshold =
+        (yaw_norm_threshold >= 0.0 && yaw_norm_threshold < 1.0) ? yaw_norm_threshold : 0.0;
+    }
+    grid_x_size_ = static_cast<std::size_t>((max_x_range_ - min_x_range_) / voxel_x_size_);
+    grid_y_size_ = static_cast<std::size_t>((max_y_range_ - min_y_range_) / voxel_y_size_);
+    grid_z_size_ = static_cast<std::size_t>((max_z_range_ - min_z_range_) / voxel_z_size_);
+  }
+
+  ///// INPUT PARAMETERS /////
+  const std::size_t cloud_capacity_{1000000};
+  ///// KERNEL PARAMETERS /////
+  const std::size_t threads_for_voxel_{256};  // threads number for a block
+  const std::size_t points_per_voxel_{20};
+  const std::size_t warp_size_{32};          // one warp(32 threads) for one pillar
+  const std::size_t pillars_per_block_{64};  // one thread deals with one pillar
+                                             // and a block has pillars_per_block threads
+  const std::size_t pillar_feature_size_{64};
+  std::size_t max_voxels_{60000};
+
+  ///// NETWORK PARAMETERS /////
+  const std::size_t batch_size_{1};
+  const std::size_t num_classes_{5};
+  const std::size_t num_point_feature_size_{5};  // x, y, z, intensity, lag
+  // the dimension of the input cloud
+  float min_x_range_{-76.8};
+  float max_x_range_{76.8};
+  float min_y_range_{-76.8};
+  float max_y_range_{76.8};
+  float min_z_range_{-3.0};
+  float max_z_range_{5.0};
+  // the size of a pillar
+  float voxel_x_size_{0.3};
+  float voxel_y_size_{0.3};
+  float voxel_z_size_{8.0};
+  const std::size_t out_size_factor_{4};
+  const std::size_t max_num_points_per_pillar_{points_per_voxel_};
+  const std::size_t num_point_values_{4};
+  const std::size_t num_proposals_{200};
+  // the number of feature maps for pillar scatter
+  const std::size_t num_feature_scatter_{pillar_feature_size_};
+  // the score threshold for classification
+  float score_threshold_{0.2};
+  float circle_nms_dist_threshold_{0.5};
+  std::vector<float> yaw_norm_thresholds_{0.3, 0.3, 0.3, 0.3, 0.0};
+  std::size_t max_num_pillars_{max_voxels_};
+  const std::size_t pillar_points_bev_{max_num_points_per_pillar_ * max_num_pillars_};
+  // the detected boxes result decode by (x, y, z, w, l, h, yaw)
+  const std::size_t num_box_values_{8};
+  // the input size of the 2D backbone network
+  std::size_t grid_x_size_{512};
+  std::size_t grid_y_size_{512};
+  std::size_t grid_z_size_{1};
+  // the output size of the 2D backbone network
+  std::size_t feature_x_size_{grid_x_size_ / out_size_factor_};
+  std::size_t feature_y_size_{grid_y_size_ / out_size_factor_};
+
+  ///// RUNTIME DIMENSIONS /////
+  std::vector<std::size_t> voxels_num_{5000, 30000, 60000};
+  // voxels
+  std::size_t min_voxel_size_{voxels_num_[0]};
+  std::size_t opt_voxel_size_{voxels_num_[1]};
+  std::size_t max_voxel_size_{voxels_num_[2]};
+
+  std::size_t min_point_in_voxel_size_{points_per_voxel_};
+  std::size_t opt_point_in_voxel_size_{points_per_voxel_};
+  std::size_t max_point_in_voxel_size_{points_per_voxel_};
+
+  std::size_t min_network_feature_size_{num_point_feature_size_};
+  std::size_t opt_network_feature_size_{num_point_feature_size_};
+  std::size_t max_network_feature_size_{num_point_feature_size_};
+
+  // num_points
+  std::size_t min_points_size_{voxels_num_[0]};
+  std::size_t opt_points_size_{voxels_num_[1]};
+  std::size_t max_points_size_{voxels_num_[2]};
+
+  // coors
+  std::size_t min_coors_size_{voxels_num_[0]};
+  std::size_t opt_coors_size_{voxels_num_[1]};
+  std::size_t max_coors_size_{voxels_num_[2]};
+
+  std::size_t min_coors_dim_size_{num_point_values_};
+  std::size_t opt_coors_dim_size_{num_point_values_};
+  std::size_t max_coors_dim_size_{num_point_values_};
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_trt.hpp b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_trt.hpp
new file mode 100644
index 0000000000000..a94c3c1871136
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_trt.hpp
@@ -0,0 +1,114 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
+#define LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
+
+#include "lidar_transfusion/cuda_utils.hpp"
+#include "lidar_transfusion/network/network_trt.hpp"
+#include "lidar_transfusion/postprocess/postprocess_kernel.hpp"
+#include "lidar_transfusion/preprocess/pointcloud_densification.hpp"
+#include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
+#include "lidar_transfusion/preprocess/voxel_generator.hpp"
+#include "lidar_transfusion/utils.hpp"
+#include "lidar_transfusion/visibility_control.hpp"
+
+#include <tier4_autoware_utils/system/stop_watch.hpp>
+
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+class NetworkParam
+{
+public:
+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
+  : onnx_path_(std::move(onnx_path)),
+    engine_path_(std::move(engine_path)),
+    trt_precision_(std::move(trt_precision))
+  {
+  }
+
+  std::string onnx_path() const { return onnx_path_; }
+  std::string engine_path() const { return engine_path_; }
+  std::string trt_precision() const { return trt_precision_; }
+
+private:
+  std::string onnx_path_;
+  std::string engine_path_;
+  std::string trt_precision_;
+};
+
+class LIDAR_TRANSFUSION_PUBLIC TransfusionTRT
+{
+public:
+  explicit TransfusionTRT(
+    const NetworkParam & network_param, const DensificationParam & densification_param,
+    const TransfusionConfig & config);
+  virtual ~TransfusionTRT();
+
+  bool detect(
+    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer,
+    std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing);
+
+protected:
+  void initPtr();
+
+  bool preprocess(const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
+
+  bool inference();
+
+  bool postprocess(std::vector<Box3D> & det_boxes3d);
+
+  std::unique_ptr<NetworkTRT> network_trt_ptr_{nullptr};
+  std::unique_ptr<VoxelGenerator> vg_ptr_{nullptr};
+  std::unique_ptr<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{
+    nullptr};
+  std::unique_ptr<PreprocessCuda> pre_ptr_{nullptr};
+  std::unique_ptr<PostprocessCuda> post_ptr_{nullptr};
+  cudaStream_t stream_{nullptr};
+
+  TransfusionConfig config_;
+
+  // input of pre-process
+
+  unsigned int voxel_features_size_{0};
+  unsigned int voxel_num_size_{0};
+  unsigned int voxel_idxs_size_{0};
+  unsigned int cls_size_{0};
+  unsigned int box_size_{0};
+  unsigned int dir_cls_size_{0};
+  cuda::unique_ptr<float[]> points_d_{nullptr};
+  cuda::unique_ptr<unsigned int> params_input_d_{nullptr};
+  cuda::unique_ptr<float[]> voxel_features_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> voxel_num_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> voxel_idxs_d_{nullptr};
+  cuda::unique_ptr<float[]> cls_output_d_{nullptr};
+  cuda::unique_ptr<float[]> box_output_d_{nullptr};
+  cuda::unique_ptr<float[]> dir_cls_output_d_{nullptr};
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
new file mode 100644
index 0000000000000..e73fe7f055953
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
@@ -0,0 +1,103 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__UTILS_HPP_
+#define LIDAR_TRANSFUSION__UTILS_HPP_
+
+#include <cstddef>
+#include <iostream>
+#include <stdexcept>
+#include <unordered_map>
+
+namespace lidar_transfusion
+{
+
+struct Box3D
+{
+  int label;
+  float score;
+  float x;
+  float y;
+  float z;
+  float width;
+  float length;
+  float height;
+  float yaw;
+};
+
+struct CloudInfo
+{
+  uint32_t x_offset;
+  uint32_t y_offset;
+  uint32_t z_offset;
+  uint32_t intensity_offset;
+  uint8_t x_datatype;
+  uint8_t y_datatype;
+  uint8_t z_datatype;
+  uint8_t intensity_datatype;
+  uint8_t x_count;
+  uint8_t y_count;
+  uint8_t z_count;
+  uint8_t intensity_count;
+  uint32_t point_step;
+  bool is_bigendian;
+
+  CloudInfo()
+  : x_offset(0),
+    y_offset(4),
+    z_offset(8),
+    intensity_offset(12),
+    x_datatype(7),
+    y_datatype(7),
+    z_datatype(7),
+    intensity_datatype(7),
+    x_count(1),
+    y_count(1),
+    z_count(1),
+    intensity_count(1),
+    point_step(16),
+    is_bigendian(false)
+  {
+  }
+
+  bool operator!=(const CloudInfo & rhs) const
+  {
+    return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset ||
+           intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype ||
+           y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype ||
+           intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count ||
+           y_count != rhs.y_count || z_count != rhs.z_count ||
+           intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian;
+  }
+};
+
+enum NetworkIO { voxels = 0, num_points, coors, cls_score, dir_pred, bbox_pred, ENUM_SIZE };
+
+// cspell: ignore divup
+template <typename T1, typename T2>
+unsigned int divup(const T1 a, const T2 b)
+{
+  if (a == 0) {
+    throw std::runtime_error("A dividend of divup isn't positive.");
+  }
+  if (b == 0) {
+    throw std::runtime_error("A divisor of divup isn't positive.");
+  }
+
+  return (a + b - 1) / b;
+}
+
+}  // namespace lidar_transfusion
+
+#endif  // LIDAR_TRANSFUSION__UTILS_HPP_
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/visibility_control.hpp b/perception/lidar_transfusion/include/lidar_transfusion/visibility_control.hpp
new file mode 100644
index 0000000000000..aeab20906628a
--- /dev/null
+++ b/perception/lidar_transfusion/include/lidar_transfusion/visibility_control.hpp
@@ -0,0 +1,37 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_
+#define LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_
+
+////////////////////////////////////////////////////////////////////////////////
+#if defined(__WIN32)
+#if defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
+#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllexport)
+#define LIDAR_TRANSFUSION_LOCAL
+#else  // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
+#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllimport)
+#define LIDAR_TRANSFUSION_LOCAL
+#endif  // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
+#elif defined(__linux__)
+#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default")))
+#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden")))
+#elif defined(__APPLE__)
+#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default")))
+#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden")))
+#else
+#error "Unsupported Build Configuration"
+#endif
+
+#endif  // LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_
diff --git a/perception/lidar_transfusion/launch/lidar_transfusion.launch.xml b/perception/lidar_transfusion/launch/lidar_transfusion.launch.xml
new file mode 100644
index 0000000000000..492eb8c4d59b7
--- /dev/null
+++ b/perception/lidar_transfusion/launch/lidar_transfusion.launch.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<launch>
+  <arg name="input/pointcloud" default="/sensing/lidar/pointcloud"/>
+  <arg name="output/objects" default="objects"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
+  <arg name="model_path" default="$(var data_path)/lidar_transfusion"/>
+  <arg name="model_param_path" default="$(find-pkg-share lidar_transfusion)/config/transfusion.param.yaml"/>
+  <arg name="class_remapper_param_path" default="$(find-pkg-share lidar_transfusion)/config/detection_class_remapper.param.yaml"/>
+  <arg name="build_only" default="false" description="shutdown node after TensorRT engine file is built"/>
+  <arg name="log_level" default="info"/>
+
+  <arg name="use_pointcloud_container" default="false" description="use pointcloud_container"/>
+  <arg name="pointcloud_container_name" default="pointcloud_container" description="pointcloud_container name"/>
+
+  <group if="$(var use_pointcloud_container)">
+    <load_composable_node target="$(var pointcloud_container_name)">
+      <composable_node pkg="lidar_transfusion" plugin="lidar_transfusion::LidarTransfusionNode" name="lidar_transfusion">
+        <remap from="~/input/pointcloud" to="$(var input/pointcloud)"/>
+        <remap from="~/output/objects" to="$(var output/objects)"/>
+        <param from="$(var model_param_path)" allow_substs="true"/>
+        <param from="$(var class_remapper_param_path)"/>
+
+        <!-- This parameter shall NOT be included in param file. See also: https://github.com/autowarefoundation/autoware.universe/pull/6167 -->
+        <param name="build_only" value="$(var build_only)"/>
+      </composable_node>
+    </load_composable_node>
+  </group>
+  <group unless="$(var use_pointcloud_container)">
+    <node pkg="lidar_transfusion" exec="lidar_transfusion_node" name="lidar_transfusion" output="screen" args="--ros-args --log-level $(var log_level)">
+      <remap from="~/input/pointcloud" to="$(var input/pointcloud)"/>
+      <remap from="~/output/objects" to="$(var output/objects)"/>
+      <param from="$(var model_param_path)" allow_substs="true"/>
+      <param from="$(var class_remapper_param_path)"/>
+
+      <!-- This parameter shall NOT be included in param file. See also: https://github.com/autowarefoundation/autoware.universe/pull/6167 -->
+      <param name="build_only" value="$(var build_only)"/>
+    </node>
+  </group>
+</launch>
diff --git a/perception/lidar_transfusion/lib/detection_class_remapper.cpp b/perception/lidar_transfusion/lib/detection_class_remapper.cpp
new file mode 100644
index 0000000000000..e093637402448
--- /dev/null
+++ b/perception/lidar_transfusion/lib/detection_class_remapper.cpp
@@ -0,0 +1,71 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <lidar_transfusion/detection_class_remapper.hpp>
+
+namespace lidar_transfusion
+{
+
+void DetectionClassRemapper::setParameters(
+  const std::vector<int64_t> & allow_remapping_by_area_matrix,
+  const std::vector<double> & min_area_matrix, const std::vector<double> & max_area_matrix)
+{
+  assert(allow_remapping_by_area_matrix.size() == min_area_matrix.size());
+  assert(allow_remapping_by_area_matrix.size() == max_area_matrix.size());
+  assert(
+    std::pow(static_cast<int>(std::sqrt(min_area_matrix.size())), 2) == min_area_matrix.size());
+
+  num_labels_ = static_cast<int>(std::sqrt(min_area_matrix.size()));
+
+  Eigen::Map<const Eigen::Matrix<int64_t, Eigen::Dynamic, Eigen::Dynamic>>
+    allow_remapping_by_area_matrix_tmp(
+      allow_remapping_by_area_matrix.data(), num_labels_, num_labels_);
+  allow_remapping_by_area_matrix_ = allow_remapping_by_area_matrix_tmp.transpose()
+                                      .cast<bool>();  // Eigen is column major by default
+
+  Eigen::Map<const Eigen::MatrixXd> min_area_matrix_tmp(
+    min_area_matrix.data(), num_labels_, num_labels_);
+  min_area_matrix_ = min_area_matrix_tmp.transpose();  // Eigen is column major by default
+
+  Eigen::Map<const Eigen::MatrixXd> max_area_matrix_tmp(
+    max_area_matrix.data(), num_labels_, num_labels_);
+  max_area_matrix_ = max_area_matrix_tmp.transpose();  // Eigen is column major by default
+
+  min_area_matrix_ = min_area_matrix_.unaryExpr(
+    [](double v) { return std::isfinite(v) ? v : std::numeric_limits<double>::max(); });
+  max_area_matrix_ = max_area_matrix_.unaryExpr(
+    [](double v) { return std::isfinite(v) ? v : std::numeric_limits<double>::max(); });
+}
+
+void DetectionClassRemapper::mapClasses(autoware_perception_msgs::msg::DetectedObjects & msg)
+{
+  for (auto & object : msg.objects) {
+    const float bev_area = object.shape.dimensions.x * object.shape.dimensions.y;
+
+    for (auto & classification : object.classification) {
+      auto & label = classification.label;
+
+      for (int i = 0; i < num_labels_; ++i) {
+        if (
+          allow_remapping_by_area_matrix_(label, i) && bev_area >= min_area_matrix_(label, i) &&
+          bev_area <= max_area_matrix_(label, i)) {
+          label = i;
+          break;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/network/network_trt.cpp b/perception/lidar_transfusion/lib/network/network_trt.cpp
new file mode 100644
index 0000000000000..a36890facceec
--- /dev/null
+++ b/perception/lidar_transfusion/lib/network/network_trt.cpp
@@ -0,0 +1,332 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/network/network_trt.hpp"
+
+#include <NvOnnxParser.h>
+
+#include <fstream>
+#include <memory>
+#include <string>
+
+namespace lidar_transfusion
+{
+
+std::ostream & operator<<(std::ostream & os, const ProfileDimension & profile)
+{
+  std::string delim = "";
+  os << "min->[";
+  for (int i = 0; i < profile.min.nbDims; ++i) {
+    os << delim << profile.min.d[i];
+    delim = ", ";
+  }
+  os << "], opt->[";
+  delim = "";
+  for (int i = 0; i < profile.opt.nbDims; ++i) {
+    os << delim << profile.opt.d[i];
+    delim = ", ";
+  }
+  os << "], max->[";
+  delim = "";
+  for (int i = 0; i < profile.max.nbDims; ++i) {
+    os << delim << profile.max.d[i];
+    delim = ", ";
+  }
+  os << "]";
+  return os;
+}
+
+NetworkTRT::NetworkTRT(const TransfusionConfig & config) : config_(config)
+{
+  ProfileDimension voxels_dims = {
+    nvinfer1::Dims3(
+      config_.min_voxel_size_, config_.min_point_in_voxel_size_, config_.min_network_feature_size_),
+    nvinfer1::Dims3(
+      config_.opt_voxel_size_, config_.opt_point_in_voxel_size_, config_.opt_network_feature_size_),
+    nvinfer1::Dims3(
+      config_.max_voxel_size_, config_.max_point_in_voxel_size_,
+      config_.max_network_feature_size_)};
+  ProfileDimension num_points_dims = {
+    nvinfer1::Dims{1, {static_cast<int32_t>(config_.min_points_size_)}},
+    nvinfer1::Dims{1, {static_cast<int32_t>(config_.opt_points_size_)}},
+    nvinfer1::Dims{1, {static_cast<int32_t>(config_.max_points_size_)}}};
+  ProfileDimension coors_dims = {
+    nvinfer1::Dims2(config_.min_coors_size_, config_.min_coors_dim_size_),
+    nvinfer1::Dims2(config_.opt_coors_size_, config_.opt_coors_dim_size_),
+    nvinfer1::Dims2(config_.max_coors_size_, config_.max_coors_dim_size_)};
+  in_profile_dims_ = {voxels_dims, num_points_dims, coors_dims};
+}
+
+NetworkTRT::~NetworkTRT()
+{
+  context.reset();
+  runtime_.reset();
+  plan_.reset();
+  engine.reset();
+}
+
+bool NetworkTRT::init(
+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision)
+{
+  runtime_ =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
+  if (!runtime_) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create runtime" << std::endl;
+    return false;
+  }
+
+  bool success;
+  std::ifstream engine_file(engine_path);
+  if (engine_file.is_open()) {
+    success = loadEngine(engine_path);
+  } else {
+    auto log_thread = logger_.log_throttle(
+      nvinfer1::ILogger::Severity::kINFO,
+      "Applying optimizations and building TRT CUDA engine. Please wait a minutes...", 5);
+    success = parseONNX(onnx_path, engine_path, precision);
+    logger_.stop_throttle(log_thread);
+  }
+  success &= createContext();
+
+  return success;
+}
+
+bool NetworkTRT::setProfile(
+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+  nvinfer1::IBuilderConfig & config)
+{
+  auto profile = builder.createOptimizationProfile();
+
+  auto voxels_name = network.getInput(NetworkIO::voxels)->getName();
+  auto num_points_name = network.getInput(NetworkIO::num_points)->getName();
+  auto coors_name = network.getInput(NetworkIO::coors)->getName();
+
+  profile->setDimensions(
+    voxels_name, nvinfer1::OptProfileSelector::kMIN, in_profile_dims_[NetworkIO::voxels].min);
+  profile->setDimensions(
+    voxels_name, nvinfer1::OptProfileSelector::kOPT, in_profile_dims_[NetworkIO::voxels].opt);
+  profile->setDimensions(
+    voxels_name, nvinfer1::OptProfileSelector::kMAX, in_profile_dims_[NetworkIO::voxels].max);
+
+  profile->setDimensions(
+    num_points_name, nvinfer1::OptProfileSelector::kMIN,
+    in_profile_dims_[NetworkIO::num_points].min);
+  profile->setDimensions(
+    num_points_name, nvinfer1::OptProfileSelector::kOPT,
+    in_profile_dims_[NetworkIO::num_points].opt);
+  profile->setDimensions(
+    num_points_name, nvinfer1::OptProfileSelector::kMAX,
+    in_profile_dims_[NetworkIO::num_points].max);
+
+  profile->setDimensions(
+    coors_name, nvinfer1::OptProfileSelector::kMIN, in_profile_dims_[NetworkIO::coors].min);
+  profile->setDimensions(
+    coors_name, nvinfer1::OptProfileSelector::kOPT, in_profile_dims_[NetworkIO::coors].opt);
+  profile->setDimensions(
+    coors_name, nvinfer1::OptProfileSelector::kMAX, in_profile_dims_[NetworkIO::coors].max);
+
+  config.addOptimizationProfile(profile);
+  return true;
+}
+
+bool NetworkTRT::createContext()
+{
+  if (!engine) {
+    tensorrt_common::LOG_ERROR(logger_)
+      << "Failed to create context: Engine was not created" << std::endl;
+    return false;
+  }
+
+  context =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext>(engine->createExecutionContext());
+  if (!context) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create context" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool NetworkTRT::parseONNX(
+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
+  const size_t workspace_size)
+{
+  auto builder =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
+  if (!builder) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create builder" << std::endl;
+    return false;
+  }
+
+  auto config =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+  if (!config) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create config" << std::endl;
+    return false;
+  }
+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, workspace_size);
+#else
+  config->setMaxWorkspaceSize(workspace_size);
+#endif
+  if (precision == "fp16") {
+    if (builder->platformHasFastFp16()) {
+      tensorrt_common::LOG_INFO(logger_) << "Using TensorRT FP16 Inference" << std::endl;
+      config->setFlag(nvinfer1::BuilderFlag::kFP16);
+    } else {
+      tensorrt_common::LOG_INFO(logger_)
+        << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
+    }
+  }
+
+  const auto flag =
+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+  auto network =
+    tensorrt_common::TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
+  if (!network) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create network" << std::endl;
+    return false;
+  }
+
+  auto parser = tensorrt_common::TrtUniquePtr<nvonnxparser::IParser>(
+    nvonnxparser::createParser(*network, logger_));
+  parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
+
+  if (!setProfile(*builder, *network, *config)) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to set profile" << std::endl;
+    return false;
+  }
+
+  plan_ = tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory>(
+    builder->buildSerializedNetwork(*network, *config));
+  if (!plan_) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create serialized network" << std::endl;
+    return false;
+  }
+  engine = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(
+    runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
+  if (!engine) {
+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create engine" << std::endl;
+    return false;
+  }
+
+  return saveEngine(engine_path);
+}
+
+bool NetworkTRT::saveEngine(const std::string & engine_path)
+{
+  tensorrt_common::LOG_INFO(logger_) << "Writing to " << engine_path << std::endl;
+  std::ofstream file(engine_path, std::ios::out | std::ios::binary);
+  file.write(reinterpret_cast<const char *>(plan_->data()), plan_->size());
+  return validateNetworkIO();
+}
+
+bool NetworkTRT::loadEngine(const std::string & engine_path)
+{
+  std::ifstream engine_file(engine_path);
+  std::stringstream engine_buffer;
+  engine_buffer << engine_file.rdbuf();
+  std::string engine_str = engine_buffer.str();
+  engine = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
+  tensorrt_common::LOG_INFO(logger_) << "Loaded engine from " << engine_path << std::endl;
+  return validateNetworkIO();
+}
+
+bool NetworkTRT::validateNetworkIO()
+{
+  // Whether the number of IO match the expected size
+  if (engine->getNbIOTensors() != NetworkIO::ENUM_SIZE) {
+    tensorrt_common::LOG_ERROR(logger_)
+      << "Invalid network IO. Expected size: " << NetworkIO::ENUM_SIZE
+      << ". Actual size: " << engine->getNbIOTensors() << "." << std::endl;
+    throw std::runtime_error("Failed to initialize TRT network.");
+  }
+  for (int i = 0; i < NetworkIO::ENUM_SIZE; ++i) {
+    tensors_names_.push_back(engine->getIOTensorName(i));
+  }
+
+  // Log the network IO
+  std::string tensors = std::accumulate(
+    tensors_names_.begin(), tensors_names_.end(), std::string(),
+    [](const std::string & a, const std::string & b) -> std::string { return a + b + " "; });
+  tensorrt_common::LOG_INFO(logger_) << "Network IO: " << tensors << std::endl;
+
+  // Whether the current engine input profile match the config input profile
+  for (int i = 0; i <= NetworkIO::coors; ++i) {
+    ProfileDimension engine_dims{
+      engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kMIN),
+      engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kOPT),
+      engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kMAX)};
+
+    tensorrt_common::LOG_INFO(logger_)
+      << "Profile for " << tensors_names_[i] << ": " << engine_dims << std::endl;
+
+    if (engine_dims != in_profile_dims_[i]) {
+      tensorrt_common::LOG_ERROR(logger_)
+        << "Invalid network input dimension. Config: " << in_profile_dims_[i]
+        << ". Please change the input profile or delete the engine file and build engine again."
+        << std::endl;
+      throw std::runtime_error("Failed to initialize TRT network.");
+    }
+  }
+
+  // Whether the IO tensor shapes match the network config, -1 for dynamic size
+  validateTensorShape(
+    NetworkIO::voxels, {-1, static_cast<int>(config_.points_per_voxel_),
+                        static_cast<int>(config_.num_point_feature_size_)});
+  validateTensorShape(NetworkIO::num_points, {-1});
+  validateTensorShape(NetworkIO::coors, {-1, static_cast<int>(config_.num_point_values_)});
+  auto cls_score = validateTensorShape(
+    NetworkIO::cls_score,
+    {static_cast<int>(config_.batch_size_), static_cast<int>(config_.num_classes_),
+     static_cast<int>(config_.num_proposals_)});
+  tensorrt_common::LOG_INFO(logger_) << "Network num classes: " << cls_score.d[1] << std::endl;
+  validateTensorShape(
+    NetworkIO::dir_pred,
+    {static_cast<int>(config_.batch_size_), 2, static_cast<int>(config_.num_proposals_)});  // x, y
+  validateTensorShape(
+    NetworkIO::bbox_pred,
+    {static_cast<int>(config_.batch_size_), static_cast<int>(config_.num_box_values_),
+     static_cast<int>(config_.num_proposals_)});
+
+  return true;
+}
+
+const char * NetworkTRT::getTensorName(NetworkIO name)
+{
+  return tensors_names_.at(name);
+}
+
+nvinfer1::Dims NetworkTRT::validateTensorShape(NetworkIO name, const std::vector<int> shape)
+{
+  auto tensor_shape = engine->getTensorShape(tensors_names_[name]);
+  if (tensor_shape.nbDims != static_cast<int>(shape.size())) {
+    tensorrt_common::LOG_ERROR(logger_)
+      << "Invalid tensor shape for " << tensors_names_[name] << ". Expected size: " << shape.size()
+      << ". Actual size: " << tensor_shape.nbDims << "." << std::endl;
+    throw std::runtime_error("Failed to initialize TRT network.");
+  }
+  for (int i = 0; i < tensor_shape.nbDims; ++i) {
+    if (tensor_shape.d[i] != static_cast<int>(shape[i])) {
+      tensorrt_common::LOG_ERROR(logger_)
+        << "Invalid tensor shape for " << tensors_names_[name] << ". Expected: " << shape[i]
+        << ". Actual: " << tensor_shape.d[i] << "." << std::endl;
+      throw std::runtime_error("Failed to initialize TRT network.");
+    }
+  }
+  return tensor_shape;
+}
+
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/postprocess/circle_nms_kernel.cu b/perception/lidar_transfusion/lib/postprocess/circle_nms_kernel.cu
new file mode 100644
index 0000000000000..dcb60831825bb
--- /dev/null
+++ b/perception/lidar_transfusion/lib/postprocess/circle_nms_kernel.cu
@@ -0,0 +1,144 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Modified from
+// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
+
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+
+#include "lidar_transfusion/cuda_utils.hpp"
+#include "lidar_transfusion/postprocess/circle_nms_kernel.hpp"
+#include "lidar_transfusion/utils.hpp"
+
+#include <thrust/host_vector.h>
+
+namespace
+{
+const std::size_t THREADS_PER_BLOCK_NMS = 16;
+}  // namespace
+
+namespace lidar_transfusion
+{
+
+__device__ inline float dist2dPow(const Box3D * a, const Box3D * b)
+{
+  return powf(a->x - b->x, 2) + powf(a->y - b->y, 2);
+}
+
+// cspell: ignore divup
+__global__ void circleNMS_Kernel(
+  const Box3D * boxes, const std::size_t num_boxes3d, const std::size_t col_blocks,
+  const float dist2d_pow_threshold, std::uint64_t * mask)
+{
+  // params: boxes (N,)
+  // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS))
+
+  const auto row_start = blockIdx.y;
+  const auto col_start = blockIdx.x;
+
+  if (row_start > col_start) return;
+
+  const std::size_t row_size =
+    fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+  const std::size_t col_size =
+    fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+
+  __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS];
+
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const Box3D * cur_box = boxes + cur_box_idx;
+
+    std::uint64_t t = 0;
+    std::size_t start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (std::size_t i = start; i < col_size; i++) {
+      if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) {
+        t |= 1ULL << i;
+      }
+    }
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+cudaError_t circleNMS_launch(
+  const thrust::device_vector<Box3D> & boxes3d, const std::size_t num_boxes3d,
+  std::size_t col_blocks, const float distance_threshold,
+  thrust::device_vector<std::uint64_t> & mask, cudaStream_t stream)
+{
+  const float dist2d_pow_thres = powf(distance_threshold, 2);
+
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  circleNMS_Kernel<<<blocks, threads, 0, stream>>>(
+    thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres,
+    thrust::raw_pointer_cast(mask.data()));
+
+  return cudaGetLastError();
+}
+
+std::size_t circleNMS(
+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream)
+{
+  const auto num_boxes3d = boxes3d.size();
+  const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS);
+  thrust::device_vector<std::uint64_t> mask_d(num_boxes3d * col_blocks);
+
+  CHECK_CUDA_ERROR(
+    circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream));
+
+  // memcpy device to host
+  thrust::host_vector<std::uint64_t> mask_h(mask_d.size());
+  thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin());
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream));
+
+  // generate keep_mask
+  std::vector<std::uint64_t> remv_h(col_blocks);
+  thrust::host_vector<bool> keep_mask_h(keep_mask.size());
+  std::size_t num_to_keep = 0;
+  for (std::size_t i = 0; i < num_boxes3d; i++) {
+    auto nblock = i / THREADS_PER_BLOCK_NMS;
+    auto inblock = i % THREADS_PER_BLOCK_NMS;
+
+    if (!(remv_h[nblock] & (1ULL << inblock))) {
+      keep_mask_h[i] = true;
+      num_to_keep++;
+      std::uint64_t * p = &mask_h[0] + i * col_blocks;
+      for (std::size_t j = nblock; j < col_blocks; j++) {
+        remv_h[j] |= p[j];
+      }
+    } else {
+      keep_mask_h[i] = false;
+    }
+  }
+
+  // memcpy host to device
+  keep_mask = keep_mask_h;
+
+  return num_to_keep;
+}
+
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/postprocess/non_maximum_suppression.cpp b/perception/lidar_transfusion/lib/postprocess/non_maximum_suppression.cpp
new file mode 100644
index 0000000000000..c1179a3f6673d
--- /dev/null
+++ b/perception/lidar_transfusion/lib/postprocess/non_maximum_suppression.cpp
@@ -0,0 +1,104 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/postprocess/non_maximum_suppression.hpp"
+
+#include <object_recognition_utils/geometry.hpp>
+#include <object_recognition_utils/object_recognition_utils.hpp>
+#include <tier4_autoware_utils/geometry/geometry.hpp>
+
+namespace lidar_transfusion
+{
+
+void NonMaximumSuppression::setParameters(const NMSParams & params)
+{
+  assert(params.search_distance_2d_ >= 0.0);
+  assert(params.iou_threshold_ >= 0.0 && params.iou_threshold_ <= 1.0);
+
+  params_ = params;
+  target_class_mask_ = classNamesToBooleanMask(params.target_class_names_);
+}
+
+bool NonMaximumSuppression::isTargetLabel(const uint8_t label)
+{
+  if (label >= target_class_mask_.size()) {
+    return false;
+  }
+  return target_class_mask_.at(label);
+}
+
+bool NonMaximumSuppression::isTargetPairObject(
+  const DetectedObject & object1, const DetectedObject & object2)
+{
+  const auto label1 = object_recognition_utils::getHighestProbLabel(object1.classification);
+  const auto label2 = object_recognition_utils::getHighestProbLabel(object2.classification);
+
+  if (isTargetLabel(label1) && isTargetLabel(label2)) {
+    return true;
+  }
+
+  const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_;
+  const auto sqr_dist_2d = tier4_autoware_utils::calcSquaredDistance2d(
+    object_recognition_utils::getPose(object1), object_recognition_utils::getPose(object2));
+  return sqr_dist_2d <= search_sqr_dist_2d;
+}
+
+Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix(
+  const std::vector<DetectedObject> & input_objects)
+{
+  // NOTE: row = target objects to be suppressed, col = source objects to be compared
+  Eigen::MatrixXd triangular_matrix =
+    Eigen::MatrixXd::Zero(input_objects.size(), input_objects.size());
+  for (std::size_t target_i = 0; target_i < input_objects.size(); ++target_i) {
+    for (std::size_t source_i = 0; source_i < target_i; ++source_i) {
+      const auto & target_obj = input_objects.at(target_i);
+      const auto & source_obj = input_objects.at(source_i);
+      if (!isTargetPairObject(target_obj, source_obj)) {
+        continue;
+      }
+
+      if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
+        const double iou = object_recognition_utils::get2dIoU(target_obj, source_obj);
+        triangular_matrix(target_i, source_i) = iou;
+        // NOTE: If the target object has any objects with iou > iou_threshold, it
+        // will be suppressed regardless of later results.
+        if (iou > params_.iou_threshold_) {
+          break;
+        }
+      }
+    }
+  }
+
+  return triangular_matrix;
+}
+
+std::vector<DetectedObject> NonMaximumSuppression::apply(
+  const std::vector<DetectedObject> & input_objects)
+{
+  Eigen::MatrixXd iou_matrix = generateIoUMatrix(input_objects);
+
+  std::vector<DetectedObject> output_objects;
+  output_objects.reserve(input_objects.size());
+  for (std::size_t i = 0; i < input_objects.size(); ++i) {
+    const auto value = iou_matrix.row(i).maxCoeff();
+    if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
+      if (value <= params_.iou_threshold_) {
+        output_objects.emplace_back(input_objects.at(i));
+      }
+    }
+  }
+
+  return output_objects;
+}
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/postprocess/postprocess_kernel.cu b/perception/lidar_transfusion/lib/postprocess/postprocess_kernel.cu
new file mode 100644
index 0000000000000..bca23e9961b40
--- /dev/null
+++ b/perception/lidar_transfusion/lib/postprocess/postprocess_kernel.cu
@@ -0,0 +1,145 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/postprocess/circle_nms_kernel.hpp"
+#include "lidar_transfusion/postprocess/postprocess_kernel.hpp"
+
+#include <thrust/count.h>
+#include <thrust/device_vector.h>
+#include <thrust/sort.h>
+
+namespace lidar_transfusion
+{
+const size_t THREADS_PER_BLOCK = 256;
+
+struct is_score_greater
+{
+  is_score_greater(float t) : t_(t) {}
+
+  __device__ bool operator()(const Box3D & b) { return b.score > t_; }
+
+private:
+  float t_{0.0};
+};
+
+struct is_kept
+{
+  __device__ bool operator()(const bool keep) { return keep; }
+};
+
+struct score_greater
+{
+  __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
+};
+
+__device__ inline float sigmoid(float x)
+{
+  return 1.0f / (1.0f + expf(-x));
+}
+
+__global__ void generateBoxes3D_kernel(
+  const float * __restrict__ cls_output, const float * __restrict__ box_output,
+  const float * __restrict__ dir_cls_output, const float voxel_size_x, const float voxel_size_y,
+  const float min_x_range, const float min_y_range, const int num_proposals, const int num_classes,
+  const int num_point_values, const float * __restrict__ yaw_norm_thresholds,
+  Box3D * __restrict__ det_boxes3d)
+{
+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (point_idx >= num_proposals) {
+    return;
+  }
+
+  int class_id = 0;
+  float max_score = cls_output[point_idx];
+
+#pragma unroll
+  for (int i = 1; i < num_classes; i++) {
+    float score = cls_output[i * num_proposals + point_idx];
+    if (score > max_score) {
+      max_score = score;
+      class_id = i;
+    }
+  }
+
+  // yaw validation
+  const float yaw_sin = dir_cls_output[point_idx];
+  const float yaw_cos = dir_cls_output[point_idx + num_proposals];
+  const float yaw_norm = sqrtf(yaw_sin * yaw_sin + yaw_cos * yaw_cos);
+
+  det_boxes3d[point_idx].label = class_id;
+  det_boxes3d[point_idx].score = yaw_norm >= yaw_norm_thresholds[class_id] ? max_score : 0.f;
+  det_boxes3d[point_idx].x = box_output[point_idx] * num_point_values * voxel_size_x + min_x_range;
+  det_boxes3d[point_idx].y =
+    box_output[point_idx + num_proposals] * num_point_values * voxel_size_y + min_y_range;
+  det_boxes3d[point_idx].z = box_output[point_idx + 2 * num_proposals];
+  det_boxes3d[point_idx].width = expf(box_output[point_idx + 3 * num_proposals]);
+  det_boxes3d[point_idx].length = expf(box_output[point_idx + 4 * num_proposals]);
+  det_boxes3d[point_idx].height = expf(box_output[point_idx + 5 * num_proposals]);
+  det_boxes3d[point_idx].yaw =
+    atan2f(dir_cls_output[point_idx], dir_cls_output[point_idx + num_proposals]);
+}
+
+PostprocessCuda::PostprocessCuda(const TransfusionConfig & config, cudaStream_t & stream)
+: config_(config), stream_(stream)
+{
+  boxes3d_d_ = thrust::device_vector<Box3D>(config_.num_proposals_);
+  yaw_norm_thresholds_d_ = thrust::device_vector<float>(
+    config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end());
+}
+
+// cspell: ignore divup
+cudaError_t PostprocessCuda::generateDetectedBoxes3D_launch(
+  const float * cls_output, const float * box_output, const float * dir_cls_output,
+  std::vector<Box3D> & det_boxes3d, cudaStream_t stream)
+{
+  dim3 threads = {THREADS_PER_BLOCK};
+  dim3 blocks = {divup(config_.num_proposals_, threads.x)};
+
+  generateBoxes3D_kernel<<<blocks, threads, 0, stream>>>(
+    cls_output, box_output, dir_cls_output, config_.voxel_x_size_, config_.voxel_y_size_,
+    config_.min_x_range_, config_.min_y_range_, config_.num_proposals_, config_.num_classes_,
+    config_.num_point_values_, thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()),
+    thrust::raw_pointer_cast(boxes3d_d_.data()));
+
+  // suppress by score
+  const auto num_det_boxes3d = thrust::count_if(
+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(),
+    is_score_greater(config_.score_threshold_));
+  if (num_det_boxes3d == 0) {
+    return cudaGetLastError();
+  }
+  thrust::device_vector<Box3D> det_boxes3d_d(num_det_boxes3d);
+  thrust::copy_if(
+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(),
+    is_score_greater(config_.score_threshold_));
+
+  // sort by score
+  thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater());
+
+  // supress by NMS
+  thrust::device_vector<bool> final_keep_mask_d(num_det_boxes3d);
+  const auto num_final_det_boxes3d =
+    circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream);
+  thrust::device_vector<Box3D> final_det_boxes3d_d(num_final_det_boxes3d);
+  thrust::copy_if(
+    thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(),
+    final_det_boxes3d_d.begin(), is_kept());
+
+  // memcpy device to host
+  det_boxes3d.resize(num_final_det_boxes3d);
+  thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin());
+  return cudaGetLastError();
+}
+
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp b/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp
new file mode 100644
index 0000000000000..774b3a05d71c1
--- /dev/null
+++ b/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp
@@ -0,0 +1,117 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/preprocess/pointcloud_densification.hpp"
+
+#include <pcl_ros/transforms.hpp>
+
+#include <pcl_conversions/pcl_conversions.h>
+#ifdef ROS_DISTRO_GALACTIC
+#include <tf2_eigen/tf2_eigen.h>
+#else
+#include <tf2_eigen/tf2_eigen.hpp>
+#endif
+
+#include <boost/optional.hpp>
+
+#include <string>
+#include <utility>
+
+namespace
+{
+
+boost::optional<geometry_msgs::msg::Transform> getTransform(
+  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
+  const std::string & source_frame_id, const rclcpp::Time & time)
+{
+  try {
+    geometry_msgs::msg::TransformStamped transform_stamped;
+    transform_stamped = tf_buffer.lookupTransform(
+      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
+    return transform_stamped.transform;
+  } catch (tf2::TransformException & ex) {
+    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_transfusion"), ex.what());
+    return boost::none;
+  }
+}
+
+Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
+{
+  Eigen::Affine3f a;
+  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
+  return a;
+}
+
+}  // namespace
+
+namespace lidar_transfusion
+{
+
+PointCloudDensification::PointCloudDensification(
+  const DensificationParam & param, cudaStream_t & stream)
+: param_(param), stream_(stream)
+{
+}
+
+bool PointCloudDensification::enqueuePointCloud(
+  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+{
+  const auto header = pointcloud_msg.header;
+
+  if (param_.pointcloud_cache_size() > 1) {
+    auto transform_world2current =
+      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
+    if (!transform_world2current) {
+      return false;
+    }
+    auto affine_world2current = transformToEigen(transform_world2current.get());
+
+    enqueue(pointcloud_msg, affine_world2current);
+  } else {
+    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
+  }
+
+  dequeue();
+
+  return true;
+}
+
+void PointCloudDensification::enqueue(
+  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
+{
+  affine_world2current_ = affine_world2current;
+  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
+
+  assert(sizeof(uint8_t) * msg.width * msg.height * msg.point_step % sizeof(float) == 1);
+  auto points_d = cuda::make_unique<float[]>(
+    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(float));
+
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    points_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
+    cudaMemcpyHostToDevice, stream_));
+
+  PointCloudWithTransform pointcloud = {
+    std::move(points_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
+
+  pointcloud_cache_.push_front(std::move(pointcloud));
+}
+
+void PointCloudDensification::dequeue()
+{
+  if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {
+    pointcloud_cache_.pop_back();
+  }
+}
+
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu b/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
new file mode 100644
index 0000000000000..b8f9ae998fd4f
--- /dev/null
+++ b/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
@@ -0,0 +1,208 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+ * All rights reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lidar_transfusion/cuda_utils.hpp"
+#include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
+
+namespace lidar_transfusion
+{
+
+PreprocessCuda::PreprocessCuda(const TransfusionConfig & config, cudaStream_t & stream)
+: stream_(stream), config_(config)
+{
+  mask_size_ = config_.grid_z_size_ * config_.grid_y_size_ * config_.grid_x_size_;
+  voxels_size_ = config_.grid_z_size_ * config_.grid_y_size_ * config_.grid_x_size_ *
+                   config_.max_num_points_per_pillar_ * config_.num_point_feature_size_ +
+                 1;
+  mask_ = cuda::make_unique<unsigned int[]>(mask_size_);
+  voxels_ = cuda::make_unique<float[]>(voxels_size_);
+}
+
+void PreprocessCuda::generateVoxels(
+  float * points, unsigned int points_size, unsigned int * pillar_num, float * voxel_features,
+  unsigned int * voxel_num, unsigned int * voxel_idxs)
+{
+  cuda::clear_async(mask_.get(), mask_size_, stream_);
+  cuda::clear_async(voxels_.get(), voxels_size_, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  CHECK_CUDA_ERROR(generateVoxels_random_launch(points, points_size, mask_.get(), voxels_.get()));
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  CHECK_CUDA_ERROR(generateBaseFeatures_launch(
+    mask_.get(), voxels_.get(), pillar_num, voxel_features, voxel_num, voxel_idxs));
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+}
+
+__global__ void generateVoxels_random_kernel(
+  float * points, unsigned int points_size, float min_x_range, float max_x_range, float min_y_range,
+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
+  float pillar_z_size, int grid_y_size, int grid_x_size, int points_per_voxel, unsigned int * mask,
+  float * voxels)
+{
+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (point_idx >= points_size) return;
+
+  float x = points[point_idx * 5];
+  float y = points[point_idx * 5 + 1];
+  float z = points[point_idx * 5 + 2];
+  float i = points[point_idx * 5 + 3];
+  float t = points[point_idx * 5 + 4];
+
+  if (
+    x <= min_x_range || x >= max_x_range || y <= min_y_range || y >= max_y_range ||
+    z <= min_z_range || z >= max_z_range)
+    return;
+
+  int voxel_idx = floorf((x - min_x_range) / pillar_x_size);
+  int voxel_idy = floorf((y - min_y_range) / pillar_y_size);
+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
+
+  unsigned int point_id = atomicAdd(&(mask[voxel_index]), 1);
+
+  if (point_id >= points_per_voxel) return;
+  float * address = voxels + (voxel_index * points_per_voxel + point_id) * 5;
+  atomicExch(address + 0, x);
+  atomicExch(address + 1, y);
+  atomicExch(address + 2, z);
+  atomicExch(address + 3, i);
+  atomicExch(address + 4, t);
+}
+
+cudaError_t PreprocessCuda::generateVoxels_random_launch(
+  float * points, unsigned int points_size, unsigned int * mask, float * voxels)
+{
+  int threadNum = config_.threads_for_voxel_;
+  dim3 blocks((points_size + threadNum - 1) / threadNum);
+  dim3 threads(threadNum);
+  generateVoxels_random_kernel<<<blocks, threads, 0, stream_>>>(
+    points, points_size, config_.min_x_range_, config_.max_x_range_, config_.min_y_range_,
+    config_.max_y_range_, config_.min_z_range_, config_.max_z_range_, config_.voxel_x_size_,
+    config_.voxel_y_size_, config_.voxel_z_size_, config_.grid_y_size_, config_.grid_x_size_,
+    config_.points_per_voxel_, mask, voxels);
+  cudaError_t err = cudaGetLastError();
+  return err;
+}
+
+__global__ void generateBaseFeatures_kernel(
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, float points_per_voxel,
+  float max_voxels, unsigned int * pillar_num, float * voxel_features, unsigned int * voxel_num,
+  unsigned int * voxel_idxs)
+{
+  unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
+
+  if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return;
+
+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
+  unsigned int count = mask[voxel_index];
+  if (!(count > 0)) return;
+  count = count < points_per_voxel ? count : points_per_voxel;
+
+  unsigned int current_pillarId = 0;
+  current_pillarId = atomicAdd(pillar_num, 1);
+  if (current_pillarId >= max_voxels) return;
+
+  voxel_num[current_pillarId] = count;
+
+  uint4 idx = {0, 0, voxel_idy, voxel_idx};
+  ((uint4 *)voxel_idxs)[current_pillarId] = idx;
+
+  for (int i = 0; i < count; i++) {
+    int inIndex = voxel_index * points_per_voxel + i;
+    int outIndex = current_pillarId * points_per_voxel + i;
+    voxel_features[outIndex * 5] = voxels[inIndex * 5];
+    voxel_features[outIndex * 5 + 1] = voxels[inIndex * 5 + 1];
+    voxel_features[outIndex * 5 + 2] = voxels[inIndex * 5 + 2];
+    voxel_features[outIndex * 5 + 3] = voxels[inIndex * 5 + 3];
+    voxel_features[outIndex * 5 + 4] = voxels[inIndex * 5 + 4];
+  }
+
+  // clear buffer for next infer
+  atomicExch(mask + voxel_index, 0);
+}
+
+// create 4 channels
+cudaError_t PreprocessCuda::generateBaseFeatures_launch(
+  unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features,
+  unsigned int * voxel_num, unsigned int * voxel_idxs)
+{
+  dim3 threads = {32, 32};
+  dim3 blocks = {divup(config_.grid_x_size_, threads.x), divup(config_.grid_y_size_, threads.y)};
+
+  generateBaseFeatures_kernel<<<blocks, threads, 0, stream_>>>(
+    mask, voxels, config_.grid_y_size_, config_.grid_x_size_, config_.points_per_voxel_,
+    config_.max_voxels_, pillar_num, voxel_features, voxel_num, voxel_idxs);
+  cudaError_t err = cudaGetLastError();
+  return err;
+}
+
+__global__ void generateSweepPoints_kernel(
+  const float * input_points, size_t points_size, int input_point_step, float time_lag,
+  const float * transform_array, int num_features, float * output_points)
+{
+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (point_idx >= points_size) return;
+
+  const float input_x = input_points[point_idx * input_point_step + 0];
+  const float input_y = input_points[point_idx * input_point_step + 1];
+  const float input_z = input_points[point_idx * input_point_step + 2];
+  const float intensity = input_points[point_idx * input_point_step + 3];
+
+  output_points[point_idx * num_features] = transform_array[0] * input_x +
+                                            transform_array[4] * input_y +
+                                            transform_array[8] * input_z + transform_array[12];
+  output_points[point_idx * num_features + 1] = transform_array[1] * input_x +
+                                                transform_array[5] * input_y +
+                                                transform_array[9] * input_z + transform_array[13];
+  output_points[point_idx * num_features + 2] = transform_array[2] * input_x +
+                                                transform_array[6] * input_y +
+                                                transform_array[10] * input_z + transform_array[14];
+  output_points[point_idx * num_features + 3] = intensity;
+  output_points[point_idx * num_features + 4] = time_lag;
+}
+
+cudaError_t PreprocessCuda::generateSweepPoints_launch(
+  const float * input_points, size_t points_size, int input_point_step, float time_lag,
+  const float * transform_array, float * output_points)
+{
+  int threadNum = config_.threads_for_voxel_;
+  dim3 blocks((points_size + threadNum - 1) / threadNum);
+  dim3 threads(threadNum);
+
+  generateSweepPoints_kernel<<<blocks, threads, 0, stream_>>>(
+    input_points, points_size, input_point_step, time_lag, transform_array,
+    config_.num_point_feature_size_, output_points);
+
+  cudaError_t err = cudaGetLastError();
+  return err;
+}
+
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp b/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp
new file mode 100644
index 0000000000000..7072e8491af66
--- /dev/null
+++ b/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp
@@ -0,0 +1,142 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/preprocess/voxel_generator.hpp"
+
+#include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
+
+#include <sensor_msgs/point_cloud2_iterator.hpp>
+
+#include <type_traits>
+
+namespace lidar_transfusion
+{
+
+std::ostream & operator<<(std::ostream & os, const CloudInfo & info)
+{
+  os << "x_offset: " << static_cast<int>(info.x_offset) << std::endl;
+  os << "y_offset: " << static_cast<int>(info.y_offset) << std::endl;
+  os << "z_offset: " << static_cast<int>(info.z_offset) << std::endl;
+  os << "intensity_offset: " << static_cast<int>(info.intensity_offset) << std::endl;
+  os << "x_datatype: " << static_cast<int>(info.x_datatype) << std::endl;
+  os << "y_datatype: " << static_cast<int>(info.y_datatype) << std::endl;
+  os << "z_datatype: " << static_cast<int>(info.z_datatype) << std::endl;
+  os << "intensity_datatype: " << static_cast<int>(info.intensity_datatype) << std::endl;
+  os << "x_count: " << static_cast<int>(info.x_count) << std::endl;
+  os << "y_count: " << static_cast<int>(info.y_count) << std::endl;
+  os << "z_count: " << static_cast<int>(info.z_count) << std::endl;
+  os << "intensity_count: " << static_cast<int>(info.intensity_count) << std::endl;
+  os << "point_step: " << static_cast<int>(info.point_step) << std::endl;
+  os << "is_bigendian: " << static_cast<int>(info.is_bigendian) << std::endl;
+  return os;
+}
+
+VoxelGenerator::VoxelGenerator(
+  const DensificationParam & densification_param, const TransfusionConfig & config,
+  cudaStream_t & stream)
+: config_(config), stream_(stream)
+{
+  pd_ptr_ = std::make_unique<PointCloudDensification>(densification_param, stream_);
+  pre_ptr_ = std::make_unique<PreprocessCuda>(config_, stream_);
+  cloud_data_d_ = cuda::make_unique<unsigned char[]>(config_.cloud_capacity_ * MAX_CLOUD_STEP_SIZE);
+  affine_past2current_d_ = cuda::make_unique<float[]>(AFF_MAT_SIZE);
+}
+
+bool VoxelGenerator::enqueuePointCloud(
+  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
+{
+  return pd_ptr_->enqueuePointCloud(msg, tf_buffer);
+}
+
+std::size_t VoxelGenerator::generateSweepPoints(
+  const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d)
+{
+  if (!is_initialized_) {
+    initCloudInfo(msg);
+    std::stringstream ss;
+    ss << "Input point cloud information: " << std::endl << cloud_info_;
+    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
+
+    CloudInfo default_cloud_info;
+    if (cloud_info_ != default_cloud_info) {
+      ss << "Expected point cloud information: " << std::endl << default_cloud_info;
+      RCLCPP_ERROR_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
+      throw std::runtime_error("Input point cloud has unsupported format");
+    }
+    is_initialized_ = true;
+  }
+
+  size_t point_counter{0};
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
+       pc_cache_iter++) {
+    auto sweep_num_points = pc_cache_iter->num_points;
+    if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
+      RCLCPP_WARN_STREAM(
+        rclcpp::get_logger("lidar_transfusion"), "Exceeding cloud capacity. Used "
+                                                   << pd_ptr_->getIdx(pc_cache_iter) << " out of "
+                                                   << pd_ptr_->getCacheSize() << " sweep(s)");
+      break;
+    }
+    auto shift = point_counter * config_.num_point_feature_size_;
+
+    auto affine_past2current =
+      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
+    static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
+    static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
+
+    float time_lag = static_cast<float>(
+      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
+
+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+      affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
+      cudaMemcpyHostToDevice, stream_));
+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+    pre_ptr_->generateSweepPoints_launch(
+      pc_cache_iter->points_d.get(), sweep_num_points, cloud_info_.point_step / sizeof(float),
+      time_lag, affine_past2current_d_.get(), points_d.get() + shift);
+    point_counter += sweep_num_points;
+  }
+
+  return point_counter;
+}
+
+void VoxelGenerator::initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg)
+{
+  std::tie(cloud_info_.x_offset, cloud_info_.x_datatype, cloud_info_.x_count) =
+    getFieldInfo(msg, "x");
+  std::tie(cloud_info_.y_offset, cloud_info_.y_datatype, cloud_info_.y_count) =
+    getFieldInfo(msg, "y");
+  std::tie(cloud_info_.z_offset, cloud_info_.z_datatype, cloud_info_.z_count) =
+    getFieldInfo(msg, "z");
+  std::tie(
+    cloud_info_.intensity_offset, cloud_info_.intensity_datatype, cloud_info_.intensity_count) =
+    getFieldInfo(msg, "intensity");
+  cloud_info_.point_step = msg.point_step;
+  cloud_info_.is_bigendian = msg.is_bigendian;
+}
+
+std::tuple<const uint32_t, const uint8_t, const uint8_t> VoxelGenerator::getFieldInfo(
+  const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name)
+{
+  for (const auto & field : msg.fields) {
+    if (field.name == field_name) {
+      return std::make_tuple(field.offset, field.datatype, field.count);
+    }
+  }
+  throw std::runtime_error("Missing field: " + field_name);
+}
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/ros_utils.cpp b/perception/lidar_transfusion/lib/ros_utils.cpp
new file mode 100644
index 0000000000000..0ecb34faf732d
--- /dev/null
+++ b/perception/lidar_transfusion/lib/ros_utils.cpp
@@ -0,0 +1,83 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/ros_utils.hpp"
+
+#include <object_recognition_utils/object_recognition_utils.hpp>
+#include <tier4_autoware_utils/geometry/geometry.hpp>
+#include <tier4_autoware_utils/math/constants.hpp>
+
+namespace lidar_transfusion
+{
+
+using Label = autoware_perception_msgs::msg::ObjectClassification;
+
+void box3DToDetectedObject(
+  const Box3D & box3d, const std::vector<std::string> & class_names,
+  autoware_perception_msgs::msg::DetectedObject & obj)
+{
+  obj.existence_probability = box3d.score;
+
+  // classification
+  autoware_perception_msgs::msg::ObjectClassification classification;
+  classification.probability = 1.0f;
+  if (box3d.label >= 0 && static_cast<size_t>(box3d.label) < class_names.size()) {
+    classification.label = getSemanticType(class_names[box3d.label]);
+  } else {
+    classification.label = Label::UNKNOWN;
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"), "Unexpected label: UNKNOWN is set.");
+  }
+
+  if (object_recognition_utils::isCarLikeVehicle(classification.label)) {
+    obj.kinematics.orientation_availability =
+      autoware_perception_msgs::msg::DetectedObjectKinematics::SIGN_UNKNOWN;
+  }
+
+  obj.classification.emplace_back(classification);
+
+  // pose and shape
+  // mmdet3d yaw format to ros yaw format
+  float yaw = box3d.yaw + tier4_autoware_utils::pi / 2;
+  obj.kinematics.pose_with_covariance.pose.position =
+    tier4_autoware_utils::createPoint(box3d.x, box3d.y, box3d.z);
+  obj.kinematics.pose_with_covariance.pose.orientation =
+    tier4_autoware_utils::createQuaternionFromYaw(yaw);
+  obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+  obj.shape.dimensions =
+    tier4_autoware_utils::createTranslation(box3d.length, box3d.width, box3d.height);
+}
+
+uint8_t getSemanticType(const std::string & class_name)
+{
+  if (class_name == "CAR") {
+    return Label::CAR;
+  } else if (class_name == "TRUCK") {
+    return Label::TRUCK;
+  } else if (class_name == "BUS") {
+    return Label::BUS;
+  } else if (class_name == "TRAILER") {
+    return Label::TRAILER;
+  } else if (class_name == "MOTORCYCLE") {
+    return Label::MOTORCYCLE;
+  } else if (class_name == "BICYCLE") {
+    return Label::BICYCLE;
+  } else if (class_name == "PEDESTRIAN") {
+    return Label::PEDESTRIAN;
+  } else {  // CONSTRUCTION_VEHICLE, BARRIER, TRAFFIC_CONE
+    return Label::UNKNOWN;
+  }
+}
+
+}  // namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/lib/transfusion_trt.cpp b/perception/lidar_transfusion/lib/transfusion_trt.cpp
new file mode 100644
index 0000000000000..c876c0906162b
--- /dev/null
+++ b/perception/lidar_transfusion/lib/transfusion_trt.cpp
@@ -0,0 +1,205 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/transfusion_trt.hpp"
+
+#include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
+#include "lidar_transfusion/transfusion_config.hpp"
+
+#include <tier4_autoware_utils/math/constants.hpp>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+TransfusionTRT::TransfusionTRT(
+  const NetworkParam & network_param, const DensificationParam & densification_param,
+  const TransfusionConfig & config)
+: config_(config)
+{
+  network_trt_ptr_ = std::make_unique<NetworkTRT>(config_);
+
+  network_trt_ptr_->init(
+    network_param.onnx_path(), network_param.engine_path(), network_param.trt_precision());
+  vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_, stream_);
+  stop_watch_ptr_ = std::make_unique<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>>();
+  stop_watch_ptr_->tic("processing/inner");
+  initPtr();
+
+  CHECK_CUDA_ERROR(cudaStreamCreate(&stream_));
+}
+
+TransfusionTRT::~TransfusionTRT()
+{
+  if (stream_) {
+    cudaStreamSynchronize(stream_);
+    cudaStreamDestroy(stream_);
+  }
+}
+
+void TransfusionTRT::initPtr()
+{
+  // point cloud to voxels
+  voxel_features_size_ =
+    config_.max_voxels_ * config_.max_num_points_per_pillar_ * config_.num_point_feature_size_;
+  voxel_num_size_ = config_.max_voxels_;
+  voxel_idxs_size_ = config_.max_voxels_ * config_.num_point_values_;
+
+  // output of TRT -- input of post-process
+  cls_size_ = config_.num_proposals_ * config_.num_classes_;
+  box_size_ = config_.num_proposals_ * config_.num_box_values_;
+  dir_cls_size_ = config_.num_proposals_ * 2;  // x, y
+  cls_output_d_ = cuda::make_unique<float[]>(cls_size_);
+  box_output_d_ = cuda::make_unique<float[]>(box_size_);
+  dir_cls_output_d_ = cuda::make_unique<float[]>(dir_cls_size_);
+
+  params_input_d_ = cuda::make_unique<unsigned int>();
+  voxel_features_d_ = cuda::make_unique<float[]>(voxel_features_size_);
+  voxel_num_d_ = cuda::make_unique<unsigned int[]>(voxel_num_size_);
+  voxel_idxs_d_ = cuda::make_unique<unsigned int[]>(voxel_idxs_size_);
+  points_d_ = cuda::make_unique<float[]>(config_.cloud_capacity_ * config_.num_point_feature_size_);
+  pre_ptr_ = std::make_unique<PreprocessCuda>(config_, stream_);
+  post_ptr_ = std::make_unique<PostprocessCuda>(config_, stream_);
+}
+
+bool TransfusionTRT::detect(
+  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer,
+  std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing)
+{
+  stop_watch_ptr_->toc("processing/inner", true);
+  if (!preprocess(msg, tf_buffer)) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"), "Fail to preprocess and skip to detect.");
+    return false;
+  }
+  proc_timing.emplace(
+    "debug/processing_time/preprocess_ms", stop_watch_ptr_->toc("processing/inner", true));
+
+  if (!inference()) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"), "Fail to inference and skip to detect.");
+    return false;
+  }
+  proc_timing.emplace(
+    "debug/processing_time/inference_ms", stop_watch_ptr_->toc("processing/inner", true));
+
+  if (!postprocess(det_boxes3d)) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"), "Fail to postprocess and skip to detect.");
+    return false;
+  }
+  proc_timing.emplace(
+    "debug/processing_time/postprocess_ms", stop_watch_ptr_->toc("processing/inner", true));
+
+  return true;
+}
+
+bool TransfusionTRT::preprocess(
+  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
+{
+  if (!vg_ptr_->enqueuePointCloud(msg, tf_buffer)) {
+    return false;
+  }
+
+  cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
+  cuda::clear_async(box_output_d_.get(), box_size_, stream_);
+  cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
+  cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
+  cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
+  cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
+  cuda::clear_async(params_input_d_.get(), 1, stream_);
+  cuda::clear_async(
+    points_d_.get(), config_.cloud_capacity_ * config_.num_point_feature_size_, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  const auto count = vg_ptr_->generateSweepPoints(msg, points_d_);
+  RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), "Generated sweep points: " << count);
+
+  pre_ptr_->generateVoxels(
+    points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
+    voxel_idxs_d_.get());
+  unsigned int params_input;
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    &params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost, stream_));
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  if (params_input < config_.min_voxel_size_) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"),
+      "Too few voxels (" << params_input << ") for actual optimization profile ("
+                         << config_.min_voxel_size_ << ")");
+    return false;
+  }
+
+  if (params_input > config_.max_voxels_) {
+    params_input = config_.max_voxels_;
+  }
+  RCLCPP_DEBUG_STREAM(
+    rclcpp::get_logger("lidar_transfusion"), "Generated input voxels: " << params_input);
+
+  network_trt_ptr_->context->setTensorAddress(
+    network_trt_ptr_->getTensorName(NetworkIO::voxels), voxel_features_d_.get());
+  network_trt_ptr_->context->setInputShape(
+    network_trt_ptr_->getTensorName(NetworkIO::voxels),
+    nvinfer1::Dims3{
+      static_cast<int32_t>(params_input), static_cast<int32_t>(config_.max_num_points_per_pillar_),
+      static_cast<int32_t>(config_.num_point_feature_size_)});
+  network_trt_ptr_->context->setTensorAddress(
+    network_trt_ptr_->getTensorName(NetworkIO::num_points), voxel_num_d_.get());
+  network_trt_ptr_->context->setInputShape(
+    network_trt_ptr_->getTensorName(NetworkIO::num_points),
+    nvinfer1::Dims{1, {static_cast<int32_t>(params_input)}});
+  network_trt_ptr_->context->setTensorAddress(
+    network_trt_ptr_->getTensorName(NetworkIO::coors), voxel_idxs_d_.get());
+  network_trt_ptr_->context->setInputShape(
+    network_trt_ptr_->getTensorName(NetworkIO::coors),
+    nvinfer1::Dims2{
+      static_cast<int32_t>(params_input), static_cast<int32_t>(config_.num_point_values_)});
+  network_trt_ptr_->context->setTensorAddress(
+    network_trt_ptr_->getTensorName(NetworkIO::cls_score), cls_output_d_.get());
+  network_trt_ptr_->context->setTensorAddress(
+    network_trt_ptr_->getTensorName(NetworkIO::bbox_pred), box_output_d_.get());
+  network_trt_ptr_->context->setTensorAddress(
+    network_trt_ptr_->getTensorName(NetworkIO::dir_pred), dir_cls_output_d_.get());
+  return true;
+}
+
+bool TransfusionTRT::inference()
+{
+  auto status = network_trt_ptr_->context->enqueueV3(stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  if (!status) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"), "Fail to enqueue and skip to detect.");
+    return false;
+  }
+  return true;
+}
+
+bool TransfusionTRT::postprocess(std::vector<Box3D> & det_boxes3d)
+{
+  CHECK_CUDA_ERROR(post_ptr_->generateDetectedBoxes3D_launch(
+    cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_));
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+  return true;
+}
+
+}  //  namespace lidar_transfusion
diff --git a/perception/lidar_transfusion/package.xml b/perception/lidar_transfusion/package.xml
new file mode 100644
index 0000000000000..29643d0957eb1
--- /dev/null
+++ b/perception/lidar_transfusion/package.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>lidar_transfusion</name>
+  <version>1.0.0</version>
+  <description>The lidar_transfusion package</description>
+  <maintainer email="amadeusz.szymko.2@tier4.jp">Amadeusz Szymko</maintainer>
+  <maintainer email="kenzo.lobos@tier4.jp">Kenzo Lobos-Tsunekawa</maintainer>
+  <maintainer email="satoshi.tanaka@tier4.jp">Satoshi Tanaka</maintainer>
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+  <buildtool_depend>autoware_cmake</buildtool_depend>
+
+  <depend>autoware_perception_msgs</depend>
+  <depend>launch_ros</depend>
+  <depend>object_recognition_utils</depend>
+  <depend>pcl_ros</depend>
+  <depend>rclcpp</depend>
+  <depend>rclcpp_components</depend>
+  <depend>tensorrt_common</depend>
+  <depend>tf2_eigen</depend>
+  <depend>tf2_geometry_msgs</depend>
+  <depend>tf2_sensor_msgs</depend>
+  <depend>tier4_autoware_utils</depend>
+
+  <test_depend>ament_cmake_ros</test_depend>
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>autoware_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/perception/lidar_transfusion/schema/detection_class_remapper.schema.json b/perception/lidar_transfusion/schema/detection_class_remapper.schema.json
new file mode 100644
index 0000000000000..8aaefa295fcc0
--- /dev/null
+++ b/perception/lidar_transfusion/schema/detection_class_remapper.schema.json
@@ -0,0 +1,72 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Parameters for Detection Class Remapper",
+  "type": "object",
+  "definitions": {
+    "detection_class_remapper": {
+      "type": "object",
+      "properties": {
+        "allow_remapping_by_area_matrix": {
+          "type": "array",
+          "items": {
+            "type": "integer"
+          },
+          "description": "Whether to allow remapping of classes. The order of 8x8 matrix classes comes from ObjectClassification msg.",
+          "default": [
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0
+          ],
+          "minItems": 64,
+          "maxItems": 64
+        },
+        "min_area_matrix": {
+          "type": "array",
+          "items": {
+            "type": "number"
+          },
+          "description": "Minimum area for specific class to consider class remapping.",
+          "default": [
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.1, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0,
+            0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+          ],
+          "minItems": 64,
+          "maxItems": 64
+        },
+        "max_area_matrix": {
+          "type": "array",
+          "items": {
+            "type": "number"
+          },
+          "description": "Maximum area for specific class to consider class remapping.",
+          "default": [
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 999.999, 0.0, 0.0, 0.0,
+            0.0, 0.0, 0.0, 0.0, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 999.999, 0.0, 0.0, 0.0,
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+          ],
+          "minItems": 64,
+          "maxItems": 64
+        }
+      },
+      "required": ["allow_remapping_by_area_matrix", "min_area_matrix", "max_area_matrix"],
+      "additionalProperties": false
+    }
+  },
+  "properties": {
+    "/**": {
+      "type": "object",
+      "properties": {
+        "ros__parameters": {
+          "$ref": "#/definitions/detection_class_remapper"
+        }
+      },
+      "required": ["ros__parameters"],
+      "additionalProperties": false
+    }
+  },
+  "required": ["/**"],
+  "additionalProperties": false
+}
diff --git a/perception/lidar_transfusion/schema/transfusion.schema.json b/perception/lidar_transfusion/schema/transfusion.schema.json
new file mode 100644
index 0000000000000..41d8d887236a8
--- /dev/null
+++ b/perception/lidar_transfusion/schema/transfusion.schema.json
@@ -0,0 +1,160 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Parameters for LiDAR TransFusion Node",
+  "type": "object",
+  "definitions": {
+    "transfusion": {
+      "type": "object",
+      "properties": {
+        "class_names": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "An array of class names will be predicted.",
+          "default": ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"],
+          "uniqueItems": true
+        },
+        "trt_precision": {
+          "type": "string",
+          "description": "A precision of TensorRT engine.",
+          "default": "fp16",
+          "enum": ["fp16", "fp32"]
+        },
+        "voxels_num": {
+          "type": "array",
+          "items": {
+            "type": "integer"
+          },
+          "description": "A maximum number of voxels [min, opt, max].",
+          "default": [5000, 30000, 60000]
+        },
+        "point_cloud_range": {
+          "type": "array",
+          "items": {
+            "type": "number"
+          },
+          "description": "An array of distance ranges of each class.",
+          "default": [-76.8, -76.8, -3.0, 76.8, 76.8, 5.0],
+          "minItems": 6,
+          "maxItems": 6
+        },
+        "voxel_size": {
+          "type": "array",
+          "items": {
+            "type": "number"
+          },
+          "description": "Voxels size [x, y, z].",
+          "default": [0.3, 0.3, 8.0],
+          "minItems": 3,
+          "maxItems": 3
+        },
+        "onnx_path": {
+          "type": "string",
+          "description": "A path to ONNX model file.",
+          "default": "$(var model_path)/transfusion.onnx",
+          "pattern": "\\.onnx$"
+        },
+        "engine_path": {
+          "type": "string",
+          "description": "A path to TensorRT engine file.",
+          "default": "$(var model_path)/transfusion.engine",
+          "pattern": "\\.engine$"
+        },
+        "down_sample_factor": {
+          "type": "integer",
+          "description": "A scale factor of downsampling points",
+          "default": 1,
+          "minimum": 1
+        },
+        "densification_num_past_frames": {
+          "type": "integer",
+          "description": "A number of past frames to be considered as same input frame.",
+          "default": 1,
+          "minimum": 0
+        },
+        "densification_world_frame_id": {
+          "type": "string",
+          "description": "A name of frame id where world coordinates system is defined with respect to.",
+          "default": "map"
+        },
+        "circle_nms_dist_threshold": {
+          "type": "number",
+          "description": "A distance threshold between detections in NMS.",
+          "default": 0.5,
+          "minimum": 0.0
+        },
+        "iou_nms_target_class_names": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "An array of class names to be target in NMS.",
+          "default": ["CAR"],
+          "uniqueItems": true
+        },
+        "iou_nms_search_distance_2d": {
+          "type": "number",
+          "description": "A maximum distance value to search the nearest objects.",
+          "default": 10.0,
+          "minimum": 0.0
+        },
+        "iou_nms_threshold": {
+          "type": "number",
+          "description": "A threshold value of NMS using IoU score.",
+          "default": 0.1,
+          "minimum": 0.0,
+          "maximum": 1.0
+        },
+        "yaw_norm_thresholds": {
+          "type": "array",
+          "items": {
+            "type": "number",
+            "minimum": 0.0,
+            "maximum": 1.0
+          },
+          "description": "A thresholds array of direction vectors norm, all of objects with vector norm less than this threshold are ignored.",
+          "default": [0.3, 0.3, 0.3, 0.3, 0.0]
+        },
+        "score_threshold": {
+          "type": "number",
+          "description": "A threshold value of confidence score, all of objects with score less than this threshold are ignored.",
+          "default": 0.2,
+          "minimum": 0.0
+        }
+      },
+      "required": [
+        "class_names",
+        "trt_precision",
+        "voxels_num",
+        "point_cloud_range",
+        "voxel_size",
+        "onnx_path",
+        "engine_path",
+        "densification_num_past_frames",
+        "densification_world_frame_id",
+        "circle_nms_dist_threshold",
+        "iou_nms_target_class_names",
+        "iou_nms_search_distance_2d",
+        "iou_nms_threshold",
+        "yaw_norm_thresholds",
+        "score_threshold"
+      ],
+      "additionalProperties": false
+    }
+  },
+  "properties": {
+    "/**": {
+      "type": "object",
+      "properties": {
+        "ros__parameters": {
+          "$ref": "#/definitions/transfusion"
+        }
+      },
+      "required": ["ros__parameters"],
+      "additionalProperties": false
+    }
+  },
+  "required": ["/**"],
+  "additionalProperties": false
+}
diff --git a/perception/lidar_transfusion/src/lidar_transfusion_node.cpp b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp
new file mode 100644
index 0000000000000..9313015002973
--- /dev/null
+++ b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp
@@ -0,0 +1,178 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/lidar_transfusion_node.hpp"
+
+#include "lidar_transfusion/utils.hpp"
+
+namespace lidar_transfusion
+{
+
+LidarTransfusionNode::LidarTransfusionNode(const rclcpp::NodeOptions & options)
+: Node("lidar_transfusion", options), tf_buffer_(this->get_clock())
+{
+  auto descriptor = rcl_interfaces::msg::ParameterDescriptor{}.set__read_only(true);
+  // network
+  class_names_ = this->declare_parameter<std::vector<std::string>>("class_names", descriptor);
+  const std::string trt_precision =
+    this->declare_parameter<std::string>("trt_precision", descriptor);
+  const auto voxels_num = this->declare_parameter<std::vector<int64_t>>("voxels_num", descriptor);
+  const auto point_cloud_range =
+    this->declare_parameter<std::vector<double>>("point_cloud_range", descriptor);
+  const auto voxel_size = this->declare_parameter<std::vector<double>>("voxel_size", descriptor);
+  const std::string onnx_path = this->declare_parameter<std::string>("onnx_path", descriptor);
+  const std::string engine_path = this->declare_parameter<std::string>("engine_path", descriptor);
+
+  if (point_cloud_range.size() != 6) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"),
+      "The size of point_cloud_range != 6: use the default parameters.");
+  }
+  if (voxel_size.size() != 3) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_transfusion"),
+      "The size of voxel_size != 3: use the default parameters.");
+  }
+
+  // pre-process
+  const std::string densification_world_frame_id =
+    this->declare_parameter<std::string>("densification_world_frame_id", descriptor);
+  const int densification_num_past_frames =
+    this->declare_parameter<int64_t>("densification_num_past_frames", descriptor);
+
+  // post-process
+  const float circle_nms_dist_threshold =
+    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold", descriptor));
+  {  // IoU NMS
+    NMSParams p;
+    p.nms_type_ = NMS_TYPE::IoU_BEV;
+    p.target_class_names_ =
+      this->declare_parameter<std::vector<std::string>>("iou_nms_target_class_names", descriptor);
+    p.search_distance_2d_ =
+      this->declare_parameter<double>("iou_nms_search_distance_2d", descriptor);
+    p.iou_threshold_ = this->declare_parameter<double>("iou_nms_threshold", descriptor);
+    iou_bev_nms_.setParameters(p);
+  }
+  const auto yaw_norm_thresholds =
+    this->declare_parameter<std::vector<double>>("yaw_norm_thresholds", descriptor);
+  const float score_threshold =
+    static_cast<float>(this->declare_parameter<double>("score_threshold", descriptor));
+
+  NetworkParam network_param(onnx_path, engine_path, trt_precision);
+  DensificationParam densification_param(
+    densification_world_frame_id, densification_num_past_frames);
+  TransfusionConfig config(
+    voxels_num, point_cloud_range, voxel_size, circle_nms_dist_threshold, yaw_norm_thresholds,
+    score_threshold);
+
+  const auto allow_remapping_by_area_matrix =
+    this->declare_parameter<std::vector<int64_t>>("allow_remapping_by_area_matrix", descriptor);
+  const auto min_area_matrix =
+    this->declare_parameter<std::vector<double>>("min_area_matrix", descriptor);
+  const auto max_area_matrix =
+    this->declare_parameter<std::vector<double>>("max_area_matrix", descriptor);
+  detection_class_remapper_.setParameters(
+    allow_remapping_by_area_matrix, min_area_matrix, max_area_matrix);
+
+  detector_ptr_ = std::make_unique<TransfusionTRT>(network_param, densification_param, config);
+
+  cloud_sub_ = this->create_subscription<sensor_msgs::msg::PointCloud2>(
+    "~/input/pointcloud", rclcpp::SensorDataQoS{}.keep_last(1),
+    std::bind(&LidarTransfusionNode::cloudCallback, this, std::placeholders::_1));
+
+  objects_pub_ = this->create_publisher<autoware_perception_msgs::msg::DetectedObjects>(
+    "~/output/objects", rclcpp::QoS(1));
+
+  published_time_pub_ = std::make_unique<tier4_autoware_utils::PublishedTimePublisher>(this);
+
+  // initialize debug tool
+  {
+    using tier4_autoware_utils::DebugPublisher;
+    using tier4_autoware_utils::StopWatch;
+    stop_watch_ptr_ = std::make_unique<StopWatch<std::chrono::milliseconds>>();
+    debug_publisher_ptr_ = std::make_unique<DebugPublisher>(this, this->get_name());
+    stop_watch_ptr_->tic("cyclic");
+    stop_watch_ptr_->tic("processing/total");
+  }
+
+  if (this->declare_parameter<bool>("build_only", false, descriptor)) {
+    RCLCPP_INFO(this->get_logger(), "TensorRT engine is built and shutdown node.");
+    rclcpp::shutdown();
+  }
+}
+
+void LidarTransfusionNode::cloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr msg)
+{
+  const auto objects_sub_count =
+    objects_pub_->get_subscription_count() + objects_pub_->get_intra_process_subscription_count();
+  if (objects_sub_count < 1) {
+    return;
+  }
+
+  if (stop_watch_ptr_) {
+    stop_watch_ptr_->toc("processing/total", true);
+  }
+
+  std::vector<Box3D> det_boxes3d;
+  std::unordered_map<std::string, double> proc_timing;
+  bool is_success = detector_ptr_->detect(*msg, tf_buffer_, det_boxes3d, proc_timing);
+  if (!is_success) {
+    return;
+  }
+
+  std::vector<autoware_perception_msgs::msg::DetectedObject> raw_objects;
+  raw_objects.reserve(det_boxes3d.size());
+  for (const auto & box3d : det_boxes3d) {
+    autoware_perception_msgs::msg::DetectedObject obj;
+    box3DToDetectedObject(box3d, class_names_, obj);
+    raw_objects.emplace_back(obj);
+  }
+
+  autoware_perception_msgs::msg::DetectedObjects output_msg;
+  output_msg.header = msg->header;
+  output_msg.objects = iou_bev_nms_.apply(raw_objects);
+
+  detection_class_remapper_.mapClasses(output_msg);
+
+  if (objects_sub_count > 0) {
+    objects_pub_->publish(output_msg);
+    published_time_pub_->publish_if_subscribed(objects_pub_, output_msg.header.stamp);
+  }
+
+  // add processing time for debug
+  if (debug_publisher_ptr_ && stop_watch_ptr_) {
+    const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic", true);
+    const double processing_time_ms = stop_watch_ptr_->toc("processing/total", true);
+    const double pipeline_latency_ms =
+      std::chrono::duration<double, std::milli>(
+        std::chrono::nanoseconds(
+          (this->get_clock()->now() - output_msg.header.stamp).nanoseconds()))
+        .count();
+    debug_publisher_ptr_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/cyclic_time_ms", cyclic_time_ms);
+    debug_publisher_ptr_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/pipeline_latency_ms", pipeline_latency_ms);
+    debug_publisher_ptr_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/processing_time/total_ms", processing_time_ms);
+    for (const auto & [topic, time_ms] : proc_timing) {
+      debug_publisher_ptr_->publish<tier4_debug_msgs::msg::Float64Stamped>(topic, time_ms);
+    }
+  }
+}
+
+}  // namespace lidar_transfusion
+
+#include "rclcpp_components/register_node_macro.hpp"
+
+RCLCPP_COMPONENTS_REGISTER_NODE(lidar_transfusion::LidarTransfusionNode)