diff --git a/perception/autoware_image_projection_based_fusion/CMakeLists.txt b/perception/autoware_image_projection_based_fusion/CMakeLists.txt
index 73d305d2ab2a8..423ed0222de5a 100644
--- a/perception/autoware_image_projection_based_fusion/CMakeLists.txt
+++ b/perception/autoware_image_projection_based_fusion/CMakeLists.txt
@@ -24,6 +24,7 @@ ament_auto_add_library(${PROJECT_NAME} SHARED
   src/roi_detected_object_fusion/node.cpp
   src/segmentation_pointcloud_fusion/node.cpp
   src/roi_pointcloud_fusion/node.cpp
+  src/mask_cluster_fusion/node.cpp
 )
 
 target_link_libraries(${PROJECT_NAME}
@@ -51,6 +52,11 @@ rclcpp_components_register_node(${PROJECT_NAME}
   EXECUTABLE roi_pointcloud_fusion_node
 )
 
+rclcpp_components_register_node(${PROJECT_NAME}
+  PLUGIN "autoware::image_projection_based_fusion::MaskClusterFusionNode"
+  EXECUTABLE mask_cluster_fusion_node
+)
+
 set(CUDA_VERBOSE OFF)
 
 # set flags for CUDA availability
diff --git a/perception/autoware_image_projection_based_fusion/README.md b/perception/autoware_image_projection_based_fusion/README.md
index c976697b0396d..d71c90f6fd5ee 100644
--- a/perception/autoware_image_projection_based_fusion/README.md
+++ b/perception/autoware_image_projection_based_fusion/README.md
@@ -68,9 +68,10 @@ The rclcpp::TimerBase timer could not break a for loop, therefore even if time i
 
 ### Detail description of each fusion's algorithm is in the following links
 
-| Fusion Name                | Description                                                                                     | Detail                                       |
-| -------------------------- | ----------------------------------------------------------------------------------------------- | -------------------------------------------- |
-| roi_cluster_fusion         | Overwrite a classification label of clusters by that of ROIs from a 2D object detector.         | [link](./docs/roi-cluster-fusion.md)         |
-| roi_detected_object_fusion | Overwrite a classification label of detected objects by that of ROIs from a 2D object detector. | [link](./docs/roi-detected-object-fusion.md) |
-| pointpainting_fusion       | Paint the point cloud with the ROIs from a 2D object detector and feed to a 3D object detector. | [link](./docs/pointpainting-fusion.md)       |
-| roi_pointcloud_fusion      | Matching pointcloud with ROIs from a 2D object detector to detect unknown-labeled objects       | [link](./docs/roi-pointcloud-fusion.md)      |
+| Fusion Name                | Description                                                                                       | Detail                                       |
+| -------------------------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------- |
+| roi_cluster_fusion         | Overwrite a classification label of clusters by that of ROIs from a 2D object detector.           | [link](./docs/roi-cluster-fusion.md)         |
+| roi_detected_object_fusion | Overwrite a classification label of detected objects by that of ROIs from a 2D object detector.   | [link](./docs/roi-detected-object-fusion.md) |
+| pointpainting_fusion       | Paint the point cloud with the ROIs from a 2D object detector and feed to a 3D object detector.   | [link](./docs/pointpainting-fusion.md)       |
+| roi_pointcloud_fusion      | Matching pointcloud with ROIs from a 2D object detector to detect unknown-labeled objects         | [link](./docs/roi-pointcloud-fusion.md)      |
+| mask_cluster_fusion        | Overwrite a classification label of clusters by that of masks from a instance segmentation model. | [link](./docs/mask-cluster-fusion.md)        |
diff --git a/perception/autoware_image_projection_based_fusion/config/mask_cluster_fusion.param.yaml b/perception/autoware_image_projection_based_fusion/config/mask_cluster_fusion.param.yaml
new file mode 100644
index 0000000000000..25af5703bb265
--- /dev/null
+++ b/perception/autoware_image_projection_based_fusion/config/mask_cluster_fusion.param.yaml
@@ -0,0 +1,5 @@
+/**:
+  ros__parameters:
+    fusion_distance: 100.0
+    fusion_ratio: 0.4
+    remove_unknown: false
diff --git a/perception/autoware_image_projection_based_fusion/docs/images/mask_cluster_fusion.png b/perception/autoware_image_projection_based_fusion/docs/images/mask_cluster_fusion.png
new file mode 100644
index 0000000000000..7c9a06f82b89e
Binary files /dev/null and b/perception/autoware_image_projection_based_fusion/docs/images/mask_cluster_fusion.png differ
diff --git a/perception/autoware_image_projection_based_fusion/docs/mask-cluster-fusion.md b/perception/autoware_image_projection_based_fusion/docs/mask-cluster-fusion.md
new file mode 100644
index 0000000000000..d76ac010fb955
--- /dev/null
+++ b/perception/autoware_image_projection_based_fusion/docs/mask-cluster-fusion.md
@@ -0,0 +1,38 @@
+# mask_cluster_fusion
+
+## Purpose
+
+The `mask_cluster_fusion` package aims to assign the label of the mask to the cluster by using the mask information from the 2D instance segmentation model.
+
+## Inner-workings / Algorithms
+
+1. The clusters are projected onto the image plane.
+2. Find the mask which intersects with the cluster with high IoU.
+3. Assign the label of the mask to the cluster.
+4. For all clusters with an assigned label, check if any cluster intersects with the same mask. Reassign the label of the mask to the cluster if its IoU is lower than the previously assigned cluster.
+
+![mask_cluster_fusion_image](./images/mask_cluster_fusion.png)
+
+## Inputs / Outputs
+
+### Input
+
+| Name                     | Type                                                       | Description                                               |
+| ------------------------ | ---------------------------------------------------------- | --------------------------------------------------------- |
+| `input`                  | `tier4_perception_msgs::msg::DetectedObjectsWithFeature`   | clustered pointcloud                                      |
+| `input/camera_info[0-7]` | `sensor_msgs::msg::CameraInfo`                             | camera information to project 3d points onto image planes |
+| `input/masks[0-7]`       | `autoware_internal_perception_msgs::msg::SegmentationMask` | masks from each image                                     |
+| `input/image_raw[0-7]`   | `sensor_msgs::msg::Image`                                  | images for visualization                                  |
+
+### Output
+
+| Name                     | Type                                                     | Description                |
+| ------------------------ | -------------------------------------------------------- | -------------------------- |
+| `output`                 | `tier4_perception_msgs::msg::DetectedObjectsWithFeature` | labeled cluster pointcloud |
+| `~/debug/image_raw[0-7]` | `sensor_msgs::msg::Image`                                | images for visualization   |
+
+## Parameters
+
+{{ json_to_markdown("perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json") }}
+
+## Assumptions / Known limits
diff --git a/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp
index c9b4f1f7b903f..3f035ba0201eb 100644
--- a/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp
+++ b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp
@@ -20,6 +20,7 @@
 #include <autoware/universe_utils/system/stop_watch.hpp>
 #include <rclcpp/rclcpp.hpp>
 
+#include <autoware_internal_perception_msgs/msg/segmentation_mask.hpp>
 #include <autoware_perception_msgs/msg/detected_objects.hpp>
 #include <sensor_msgs/msg/camera_info.hpp>
 #include <sensor_msgs/msg/image.hpp>
@@ -54,6 +55,7 @@ using sensor_msgs::msg::PointCloud2;
 using tier4_perception_msgs::msg::DetectedObjectsWithFeature;
 using tier4_perception_msgs::msg::DetectedObjectWithFeature;
 using PointCloud = pcl::PointCloud<pcl::PointXYZ>;
+using autoware_internal_perception_msgs::msg::SegmentationMask;
 using autoware_perception_msgs::msg::ObjectClassification;
 
 template <class TargetMsg3D, class ObjType, class Msg2D>
diff --git a/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp
new file mode 100644
index 0000000000000..857e3a0257341
--- /dev/null
+++ b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp
@@ -0,0 +1,62 @@
+// Copyright 2024 The Autoware Contributors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__IMAGE_PROJECTION_BASED_FUSION__MASK_CLUSTER_FUSION__NODE_HPP_
+#define AUTOWARE__IMAGE_PROJECTION_BASED_FUSION__MASK_CLUSTER_FUSION__NODE_HPP_
+
+#include "autoware/image_projection_based_fusion/fusion_node.hpp"
+
+#include <autoware/image_projection_based_fusion/utils/utils.hpp>
+
+#include <cv_bridge/cv_bridge.h>
+
+#include <string>
+#include <vector>
+
+namespace autoware::image_projection_based_fusion
+{
+struct MatchedCluster
+{
+  uint32_t mask_index{};
+  uint32_t cluster_index{};
+  uint32_t valid_point_number{};
+  sensor_msgs::msg::RegionOfInterest roi;
+};
+using MatchedClusters = std::vector<MatchedCluster>;
+
+class MaskClusterFusionNode
+: public FusionNode<DetectedObjectsWithFeature, DetectedObjectWithFeature, SegmentationMask>
+{
+public:
+  explicit MaskClusterFusionNode(const rclcpp::NodeOptions & options);
+
+protected:
+  void preprocess(DetectedObjectsWithFeature & output_msg) override;
+
+  void postprocess(DetectedObjectsWithFeature & output_msg) override;
+
+  void fuseOnSingleImage(
+    const DetectedObjectsWithFeature & input_cluster_msg, const std::size_t image_id,
+    const SegmentationMask & input_mask_msg, const sensor_msgs::msg::CameraInfo & camera_info,
+    DetectedObjectsWithFeature & output_object_msg) override;
+
+  bool out_of_scope(const DetectedObjectWithFeature & obj) override;
+
+  double fusion_distance_;
+  double fusion_ratio_;
+  bool remove_unknown_;
+};
+}  // namespace autoware::image_projection_based_fusion
+
+#endif  // AUTOWARE__IMAGE_PROJECTION_BASED_FUSION__MASK_CLUSTER_FUSION__NODE_HPP_
diff --git a/perception/autoware_image_projection_based_fusion/launch/mask_cluster_fusion.launch.xml b/perception/autoware_image_projection_based_fusion/launch/mask_cluster_fusion.launch.xml
new file mode 100644
index 0000000000000..3bbce2c5fd9c5
--- /dev/null
+++ b/perception/autoware_image_projection_based_fusion/launch/mask_cluster_fusion.launch.xml
@@ -0,0 +1,72 @@
+<launch>
+  <arg name="input/rois_number" default="6"/>
+  <arg name="input/rois0" default="/perception/object_recognition/detection/mask0"/>
+  <arg name="input/camera_info0" default="/sensing/camera/traffic_light0/camera_info"/>
+  <arg name="input/rois1" default="/perception/object_recognition/detection/mask1"/>
+  <arg name="input/camera_info1" default="/sensing/camera/traffic_light1/camera_info"/>
+  <arg name="input/rois2" default="/perception/object_recognition/detection/mask2"/>
+  <arg name="input/camera_info2" default="/sensing/camera/traffic_light2/camera_info"/>
+  <arg name="input/rois3" default="/perception/object_recognition/detection/mask3"/>
+  <arg name="input/camera_info3" default="/sensing/camera/traffic_light3/camera_info"/>
+  <arg name="input/rois4" default="/perception/object_recognition/detection/mask4"/>
+  <arg name="input/camera_info4" default="/sensing/camera/traffic_light4/camera_info"/>
+  <arg name="input/rois5" default="/perception/object_recognition/detection/mask5"/>
+  <arg name="input/camera_info5" default="/sensing/camera/traffic_light5/camera_info"/>
+  <arg name="input/rois6" default="/perception/object_recognition/detection/mask6"/>
+  <arg name="input/camera_info6" default="/sensing/camera/traffic_light6/camera_info"/>
+  <arg name="input/rois7" default="/perception/object_recognition/detection/mask7"/>
+  <arg name="input/camera_info7" default="/sensing/camera/traffic_light7/camera_info"/>
+  <arg name="input/clusters" default="/perception/object_recognition/detection/clustering/clusters"/>
+  <arg name="output/clusters" default="/perception/object_recognition/detection/clustering/camera_lidar_fusion/clusters"/>
+  <arg name="param_path" default="$(find-pkg-share autoware_image_projection_based_fusion)/config/mask_cluster_fusion.param.yaml"/>
+  <arg name="sync_param_path" default="$(find-pkg-share autoware_image_projection_based_fusion)/config/fusion_common.param.yaml"/>
+
+  <!-- for eval variable-->
+  <arg name="input_rois_number" default="$(var input/rois_number)"/>
+
+  <!-- debug -->
+  <arg name="input/image0" default="/image_raw0"/>
+  <arg name="input/image1" default="/image_raw1"/>
+  <arg name="input/image2" default="/image_raw2"/>
+  <arg name="input/image3" default="/image_raw3"/>
+  <arg name="input/image4" default="/image_raw4"/>
+  <arg name="input/image5" default="/image_raw5"/>
+  <arg name="input/image6" default="/image_raw6"/>
+  <arg name="input/image7" default="/image_raw7"/>
+
+  <group>
+    <node pkg="autoware_image_projection_based_fusion" exec="mask_cluster_fusion_node" name="mask_cluster_fusion" output="screen">
+      <param name="rois_number" value="$(var input/rois_number)"/>
+      <param from="$(var sync_param_path)"/>
+      <param from="$(var param_path)"/>
+      <remap from="input" to="$(var input/clusters)"/>
+      <remap from="output" to="$(var output/clusters)"/>
+
+      <!-- mask, camera and info -->
+      <param name="input/rois0" value="$(var input/rois0)"/>
+      <param name="input/camera_info0" value="$(var input/camera_info0)"/>
+      <param name="input/image0" value="$(var input/image0)"/>
+      <param name="input/rois1" value="$(var input/rois1)"/>
+      <param name="input/camera_info1" value="$(var input/camera_info1)"/>
+      <param name="input/image1" value="$(var input/image1)"/>
+      <param name="input/rois2" value="$(var input/rois2)"/>
+      <param name="input/camera_info2" value="$(var input/camera_info2)"/>
+      <param name="input/image2" value="$(var input/image2)"/>
+      <param name="input/rois3" value="$(var input/rois3)"/>
+      <param name="input/camera_info3" value="$(var input/camera_info3)"/>
+      <param name="input/image3" value="$(var input/image3)"/>
+      <param name="input/rois4" value="$(var input/rois4)"/>
+      <param name="input/camera_info4" value="$(var input/camera_info4)"/>
+      <param name="input/image4" value="$(var input/image4)"/>
+      <param name="input/rois5" value="$(var input/rois5)"/>
+      <param name="input/camera_info5" value="$(var input/camera_info5)"/>
+      <param name="input/image5" value="$(var input/image5)"/>
+      <param name="input/rois6" value="$(var input/rois6)"/>
+      <param name="input/camera_info6" value="$(var input/camera_info6)"/>
+      <param name="input/image6" value="$(var input/image6)"/>
+      <param name="input/rois7" value="$(var input/rois7)"/>
+      <param name="input/camera_info7" value="$(var input/camera_info7)"/>
+      <param name="input/image7" value="$(var input/image7)"/>
+    </node>
+  </group>
+</launch>
diff --git a/perception/autoware_image_projection_based_fusion/package.xml b/perception/autoware_image_projection_based_fusion/package.xml
index 335c4712071e0..c0542e52159ce 100644
--- a/perception/autoware_image_projection_based_fusion/package.xml
+++ b/perception/autoware_image_projection_based_fusion/package.xml
@@ -17,6 +17,7 @@
   <buildtool_depend>autoware_cmake</buildtool_depend>
 
   <depend>autoware_euclidean_cluster</depend>
+  <depend>autoware_internal_perception_msgs</depend>
   <depend>autoware_lidar_centerpoint</depend>
   <depend>autoware_object_recognition_utils</depend>
   <depend>autoware_perception_msgs</depend>
diff --git a/perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json b/perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json
new file mode 100644
index 0000000000000..2435d00aeb7f7
--- /dev/null
+++ b/perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json
@@ -0,0 +1,42 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Parameters for Mask Cluster Fusion Node",
+  "type": "object",
+  "definitions": {
+    "mask_cluster_fusion": {
+      "type": "object",
+      "properties": {
+        "fusion_distance": {
+          "type": "number",
+          "description": "If the detected object's distance is less than its value, the fusion will be processed.",
+          "default": 100.0,
+          "minimum": 0.0
+        },
+        "fusion_ratio": {
+          "type": "number",
+          "description": "The ratio between the selected cluster points and the total points in the cluster.",
+          "default": 0.4,
+          "minimum": 0.0
+        },
+        "remove_unknown": {
+          "type": "boolean",
+          "description": "If this parameter is true, all of objects labeled UNKNOWN will be removed in post-process.",
+          "default": false
+        }
+      },
+      "required": ["fusion_distance", "fusion_ratio", "remove_unknown"]
+    }
+  },
+  "properties": {
+    "/**": {
+      "type": "object",
+      "properties": {
+        "ros__parameters": {
+          "$ref": "#/definitions/mask_cluster_fusion"
+        }
+      },
+      "required": ["ros__parameters"]
+    }
+  },
+  "required": ["/**"]
+}
diff --git a/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp b/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp
index 067deb5d03a87..de250692a2636 100644
--- a/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp
+++ b/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp
@@ -442,4 +442,5 @@ template class FusionNode<
 template class FusionNode<PointCloud2, DetectedObjects, DetectedObjectsWithFeature>;
 template class FusionNode<PointCloud2, DetectedObjectWithFeature, DetectedObjectsWithFeature>;
 template class FusionNode<PointCloud2, PointCloud2, Image>;
+template class FusionNode<DetectedObjectsWithFeature, DetectedObjectWithFeature, SegmentationMask>;
 }  // namespace autoware::image_projection_based_fusion
diff --git a/perception/autoware_image_projection_based_fusion/src/mask_cluster_fusion/node.cpp b/perception/autoware_image_projection_based_fusion/src/mask_cluster_fusion/node.cpp
new file mode 100644
index 0000000000000..9af211c28faeb
--- /dev/null
+++ b/perception/autoware_image_projection_based_fusion/src/mask_cluster_fusion/node.cpp
@@ -0,0 +1,277 @@
+// Copyright 2024 The Autoware Contributors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp"
+
+#include "autoware/euclidean_cluster/utils.hpp"
+#include "autoware/image_projection_based_fusion/utils/geometry.hpp"
+#include "autoware/image_projection_based_fusion/utils/utils.hpp"
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+namespace autoware::image_projection_based_fusion
+{
+bool is_far_enough(const DetectedObjectWithFeature & obj, const double distance_threshold)
+{
+  const auto & position = obj.object.kinematics.pose_with_covariance.pose.position;
+  return position.x * position.x + position.y * position.y >
+         distance_threshold * distance_threshold;
+}
+
+MaskClusterFusionNode::MaskClusterFusionNode(const rclcpp::NodeOptions & options)
+: FusionNode<DetectedObjectsWithFeature, DetectedObjectWithFeature, SegmentationMask>(
+    "mask_cluster_fusion", options)
+{
+  fusion_distance_ = declare_parameter<double>("fusion_distance");
+  fusion_ratio_ = declare_parameter<double>("fusion_ratio");
+  remove_unknown_ = declare_parameter<bool>("remove_unknown");
+}
+
+void MaskClusterFusionNode::preprocess(__attribute__((unused))
+                                       DetectedObjectsWithFeature & output_msg)
+{
+  return;
+}
+
+void MaskClusterFusionNode::postprocess(__attribute__((unused))
+                                        DetectedObjectsWithFeature & output_msg)
+{
+  if (!remove_unknown_) {
+    return;
+  }
+  DetectedObjectsWithFeature known_objects;
+  known_objects.feature_objects.reserve(output_msg.feature_objects.size());
+  for (auto & feature_object : output_msg.feature_objects) {
+    bool is_roi_label_known = feature_object.object.classification.front().label !=
+                              autoware_perception_msgs::msg::ObjectClassification::UNKNOWN;
+    if (is_roi_label_known) {
+      known_objects.feature_objects.push_back(feature_object);
+    }
+  }
+  output_msg.feature_objects = known_objects.feature_objects;
+}
+
+void MaskClusterFusionNode::fuseOnSingleImage(
+  __attribute__((unused)) const DetectedObjectsWithFeature & input_cluster_msg,
+  __attribute__((unused)) const std::size_t image_id,
+  __attribute__((unused)) const SegmentationMask & input_mask_msg,
+  __attribute__((unused)) const sensor_msgs::msg::CameraInfo & camera_info,
+  __attribute__((unused)) DetectedObjectsWithFeature & output_object_msg)
+{
+  if (!checkCameraInfo(camera_info)) return;
+
+  if (input_mask_msg.image.height == 0 || input_mask_msg.image.width == 0) {
+    return;
+  }
+
+  // Convert mask to cv::Mat - Resize mask to the same size as the camera image
+  cv_bridge::CvImagePtr in_image_ptr;
+  try {
+    in_image_ptr = cv_bridge::toCvCopy(
+      std::make_shared<sensor_msgs::msg::Image>(input_mask_msg.image),
+      input_mask_msg.image.encoding);
+  } catch (const std::exception & e) {
+    RCLCPP_ERROR(this->get_logger(), "cv_bridge exception:%s", e.what());
+    return;
+  }
+  cv::Mat mask = in_image_ptr->image;
+  const int orig_width = camera_info.width;
+  const int orig_height = camera_info.height;
+  // resize mask to the same size as the camera image
+  cv::resize(mask, mask, cv::Size(orig_width, orig_height), 0, 0, cv::INTER_NEAREST);
+
+  image_geometry::PinholeCameraModel pinhole_camera_model;
+  pinhole_camera_model.fromCameraInfo(camera_info);
+
+  // get transform from cluster frame id to camera optical frame id
+  geometry_msgs::msg::TransformStamped transform_stamped;
+  {
+    const auto transform_stamped_optional = getTransformStamped(
+      tf_buffer_, /*target*/ camera_info.header.frame_id,
+      /*source*/ input_cluster_msg.header.frame_id, camera_info.header.stamp);
+    if (!transform_stamped_optional) {
+      RCLCPP_WARN_STREAM(
+        get_logger(), "Failed to get transform from " << input_cluster_msg.header.frame_id << " to "
+                                                      << camera_info.header.frame_id);
+      return;
+    }
+    transform_stamped = transform_stamped_optional.value();
+  }
+
+  MatchedClusters matched_clusters;
+  for (std::size_t i = 0; i < input_cluster_msg.feature_objects.size(); ++i) {
+    if (input_cluster_msg.feature_objects.at(i).feature.cluster.data.empty()) {
+      continue;
+    }
+
+    if (is_far_enough(input_cluster_msg.feature_objects.at(i), fusion_distance_)) {
+      continue;
+    }
+
+    // filter point out of scope
+    if (debugger_ && out_of_scope(input_cluster_msg.feature_objects.at(i))) {
+      continue;
+    }
+
+    sensor_msgs::msg::PointCloud2 transformed_cluster;
+    tf2::doTransform(
+      input_cluster_msg.feature_objects.at(i).feature.cluster, transformed_cluster,
+      transform_stamped);
+
+    auto min_x(static_cast<int32_t>(camera_info.width));
+    auto min_y(static_cast<int32_t>(camera_info.height));
+    int32_t max_x(0);
+    int32_t max_y(0);
+
+    double total_projected_points = 0;
+    std::vector<uint32_t> point_counter_each_class(input_mask_msg.classification.size(), 0.0);
+
+    for (sensor_msgs::PointCloud2ConstIterator<float> iter_x(transformed_cluster, "x"),
+         iter_y(transformed_cluster, "y"), iter_z(transformed_cluster, "z");
+         iter_x != iter_x.end(); ++iter_x, ++iter_y, ++iter_z) {
+      if (*iter_z <= 0.0) {
+        continue;
+      }
+
+      Eigen::Vector2d projected_point =
+        calcRawImageProjectedPoint(pinhole_camera_model, cv::Point3d(*iter_x, *iter_y, *iter_z));
+
+      if (
+        0 >= static_cast<int32_t>(projected_point.x()) ||
+        static_cast<int32_t>(projected_point.x()) >= static_cast<int32_t>(camera_info.width) - 1 ||
+        0 >= static_cast<int32_t>(projected_point.y()) ||
+        static_cast<int32_t>(projected_point.y()) >= static_cast<int32_t>(camera_info.height) - 1) {
+        continue;
+      }
+
+      const uint8_t pixel_value = mask.at<uint8_t>(
+        static_cast<uint16_t>(projected_point.y()), static_cast<uint16_t>(projected_point.x()));
+      if (pixel_value != 0) {
+        min_x = std::min(static_cast<int32_t>(projected_point.x()), min_x);
+        min_y = std::min(static_cast<int32_t>(projected_point.y()), min_y);
+        max_x = std::max(static_cast<int32_t>(projected_point.x()), max_x);
+        max_y = std::max(static_cast<int32_t>(projected_point.y()), max_y);
+
+        point_counter_each_class[static_cast<uint32_t>(pixel_value - 1)]++;
+      }
+      total_projected_points++;
+    }
+
+    // If there is no point projected on the mask, skip the cluster
+    bool all_zero = std::all_of(
+      point_counter_each_class.begin(), point_counter_each_class.end(),
+      [](double value) { return value == 0; });
+    if (all_zero) {
+      continue;
+    }
+
+    // Find the mask with the most projected points
+    auto max_object_it =
+      std::max_element(point_counter_each_class.begin(), point_counter_each_class.end());
+    auto max_object_index = std::distance(point_counter_each_class.begin(), max_object_it);
+
+    RCLCPP_DEBUG(get_logger(), "Number of points in the cluster: %u", *max_object_it);
+    RCLCPP_DEBUG(get_logger(), "Total projected points: %f", total_projected_points);
+    RCLCPP_DEBUG(get_logger(), "Fusion ratio: %f", *max_object_it / total_projected_points);
+
+    // check if the ratio of the cluster is smaller than the fusion ratio
+    if (*max_object_it / total_projected_points < fusion_ratio_) {
+      continue;
+    }
+
+    // check if there is a cluster that has the same mask with better iou
+    bool is_any_bigger_ratio_with_same_mask = std::any_of(
+      matched_clusters.begin(), matched_clusters.end(),
+      [max_object_it, max_object_index](const MatchedCluster & matched_cluster) {
+        return matched_cluster.mask_index == max_object_index &&
+               matched_cluster.valid_point_number > *max_object_it;
+      });
+    if (is_any_bigger_ratio_with_same_mask) {
+      continue;
+    }
+
+    // remove the previous matched cluster with the same mask if it exists
+    matched_clusters.erase(
+      std::remove_if(
+        matched_clusters.begin(), matched_clusters.end(),
+        [max_object_index](const MatchedCluster & matched_cluster) {
+          return matched_cluster.mask_index == max_object_index;
+        }),
+      matched_clusters.end());
+
+    MatchedCluster matched_cluster;
+    matched_cluster.mask_index = max_object_index;
+    matched_cluster.cluster_index = i;
+    matched_cluster.valid_point_number = *max_object_it;
+
+    // Set roi of the object
+    sensor_msgs::msg::RegionOfInterest roi;
+    roi.x_offset = min_x;
+    roi.y_offset = min_y;
+    roi.width = max_x - min_x;
+    roi.height = max_y - min_y;
+    matched_cluster.roi = roi;
+
+    matched_clusters.push_back(matched_cluster);
+  }
+
+  for (const auto & matched_cluster : matched_clusters) {
+    // Set classification of the object
+    std::vector<autoware_perception_msgs::msg::ObjectClassification> classification;
+    autoware_perception_msgs::msg::ObjectClassification object_classification;
+    object_classification.label =
+      static_cast<uint8_t>(input_mask_msg.classification[matched_cluster.mask_index].label);
+    object_classification.probability = 1.0;
+    classification.push_back(object_classification);
+    output_object_msg.feature_objects.at(matched_cluster.cluster_index).object.classification =
+      classification;
+  }
+}
+
+bool MaskClusterFusionNode::out_of_scope(const DetectedObjectWithFeature & obj)
+{
+  auto cluster = obj.feature.cluster;
+  bool is_out = false;
+  auto valid_point = [](float p, float min_num, float max_num) -> bool {
+    return (p > min_num) && (p < max_num);
+  };
+
+  for (sensor_msgs::PointCloud2ConstIterator<float> iter_x(cluster, "x"), iter_y(cluster, "y"),
+       iter_z(cluster, "z");
+       iter_x != iter_x.end(); ++iter_x, ++iter_y, ++iter_z) {
+    if (!valid_point(*iter_x, filter_scope_min_x_, filter_scope_max_x_)) {
+      is_out = true;
+      break;
+    }
+
+    if (!valid_point(*iter_y, filter_scope_min_y_, filter_scope_max_y_)) {
+      is_out = true;
+      break;
+    }
+
+    if (!valid_point(*iter_z, filter_scope_min_z_, filter_scope_max_z_)) {
+      is_out = true;
+      break;
+    }
+  }
+
+  return is_out;
+}
+}  // namespace autoware::image_projection_based_fusion
+
+#include <rclcpp_components/register_node_macro.hpp>
+
+RCLCPP_COMPONENTS_REGISTER_NODE(autoware::image_projection_based_fusion::MaskClusterFusionNode)