diff --git a/perception/autoware_image_projection_based_fusion/CMakeLists.txt b/perception/autoware_image_projection_based_fusion/CMakeLists.txt index 73d305d2ab2a8..423ed0222de5a 100644 --- a/perception/autoware_image_projection_based_fusion/CMakeLists.txt +++ b/perception/autoware_image_projection_based_fusion/CMakeLists.txt @@ -24,6 +24,7 @@ ament_auto_add_library(${PROJECT_NAME} SHARED src/roi_detected_object_fusion/node.cpp src/segmentation_pointcloud_fusion/node.cpp src/roi_pointcloud_fusion/node.cpp + src/mask_cluster_fusion/node.cpp ) target_link_libraries(${PROJECT_NAME} @@ -51,6 +52,11 @@ rclcpp_components_register_node(${PROJECT_NAME} EXECUTABLE roi_pointcloud_fusion_node ) +rclcpp_components_register_node(${PROJECT_NAME} + PLUGIN "autoware::image_projection_based_fusion::MaskClusterFusionNode" + EXECUTABLE mask_cluster_fusion_node +) + set(CUDA_VERBOSE OFF) # set flags for CUDA availability diff --git a/perception/autoware_image_projection_based_fusion/README.md b/perception/autoware_image_projection_based_fusion/README.md index c976697b0396d..d71c90f6fd5ee 100644 --- a/perception/autoware_image_projection_based_fusion/README.md +++ b/perception/autoware_image_projection_based_fusion/README.md @@ -68,9 +68,10 @@ The rclcpp::TimerBase timer could not break a for loop, therefore even if time i ### Detail description of each fusion's algorithm is in the following links -| Fusion Name | Description | Detail | -| -------------------------- | ----------------------------------------------------------------------------------------------- | -------------------------------------------- | -| roi_cluster_fusion | Overwrite a classification label of clusters by that of ROIs from a 2D object detector. | [link](./docs/roi-cluster-fusion.md) | -| roi_detected_object_fusion | Overwrite a classification label of detected objects by that of ROIs from a 2D object detector. | [link](./docs/roi-detected-object-fusion.md) | -| pointpainting_fusion | Paint the point cloud with the ROIs from a 2D object detector and feed to a 3D object detector. | [link](./docs/pointpainting-fusion.md) | -| roi_pointcloud_fusion | Matching pointcloud with ROIs from a 2D object detector to detect unknown-labeled objects | [link](./docs/roi-pointcloud-fusion.md) | +| Fusion Name | Description | Detail | +| -------------------------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------- | +| roi_cluster_fusion | Overwrite a classification label of clusters by that of ROIs from a 2D object detector. | [link](./docs/roi-cluster-fusion.md) | +| roi_detected_object_fusion | Overwrite a classification label of detected objects by that of ROIs from a 2D object detector. | [link](./docs/roi-detected-object-fusion.md) | +| pointpainting_fusion | Paint the point cloud with the ROIs from a 2D object detector and feed to a 3D object detector. | [link](./docs/pointpainting-fusion.md) | +| roi_pointcloud_fusion | Matching pointcloud with ROIs from a 2D object detector to detect unknown-labeled objects | [link](./docs/roi-pointcloud-fusion.md) | +| mask_cluster_fusion | Overwrite a classification label of clusters by that of masks from a instance segmentation model. | [link](./docs/mask-cluster-fusion.md) | diff --git a/perception/autoware_image_projection_based_fusion/config/mask_cluster_fusion.param.yaml b/perception/autoware_image_projection_based_fusion/config/mask_cluster_fusion.param.yaml new file mode 100644 index 0000000000000..25af5703bb265 --- /dev/null +++ b/perception/autoware_image_projection_based_fusion/config/mask_cluster_fusion.param.yaml @@ -0,0 +1,5 @@ +/**: + ros__parameters: + fusion_distance: 100.0 + fusion_ratio: 0.4 + remove_unknown: false diff --git a/perception/autoware_image_projection_based_fusion/docs/images/mask_cluster_fusion.png b/perception/autoware_image_projection_based_fusion/docs/images/mask_cluster_fusion.png new file mode 100644 index 0000000000000..7c9a06f82b89e Binary files /dev/null and b/perception/autoware_image_projection_based_fusion/docs/images/mask_cluster_fusion.png differ diff --git a/perception/autoware_image_projection_based_fusion/docs/mask-cluster-fusion.md b/perception/autoware_image_projection_based_fusion/docs/mask-cluster-fusion.md new file mode 100644 index 0000000000000..d76ac010fb955 --- /dev/null +++ b/perception/autoware_image_projection_based_fusion/docs/mask-cluster-fusion.md @@ -0,0 +1,38 @@ +# mask_cluster_fusion + +## Purpose + +The `mask_cluster_fusion` package aims to assign the label of the mask to the cluster by using the mask information from the 2D instance segmentation model. + +## Inner-workings / Algorithms + +1. The clusters are projected onto the image plane. +2. Find the mask which intersects with the cluster with high IoU. +3. Assign the label of the mask to the cluster. +4. For all clusters with an assigned label, check if any cluster intersects with the same mask. Reassign the label of the mask to the cluster if its IoU is lower than the previously assigned cluster. + +![mask_cluster_fusion_image](./images/mask_cluster_fusion.png) + +## Inputs / Outputs + +### Input + +| Name | Type | Description | +| ------------------------ | ---------------------------------------------------------- | --------------------------------------------------------- | +| `input` | `tier4_perception_msgs::msg::DetectedObjectsWithFeature` | clustered pointcloud | +| `input/camera_info[0-7]` | `sensor_msgs::msg::CameraInfo` | camera information to project 3d points onto image planes | +| `input/masks[0-7]` | `autoware_internal_perception_msgs::msg::SegmentationMask` | masks from each image | +| `input/image_raw[0-7]` | `sensor_msgs::msg::Image` | images for visualization | + +### Output + +| Name | Type | Description | +| ------------------------ | -------------------------------------------------------- | -------------------------- | +| `output` | `tier4_perception_msgs::msg::DetectedObjectsWithFeature` | labeled cluster pointcloud | +| `~/debug/image_raw[0-7]` | `sensor_msgs::msg::Image` | images for visualization | + +## Parameters + +{{ json_to_markdown("perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json") }} + +## Assumptions / Known limits diff --git a/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp index c9b4f1f7b903f..3f035ba0201eb 100644 --- a/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp +++ b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/fusion_node.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -54,6 +55,7 @@ using sensor_msgs::msg::PointCloud2; using tier4_perception_msgs::msg::DetectedObjectsWithFeature; using tier4_perception_msgs::msg::DetectedObjectWithFeature; using PointCloud = pcl::PointCloud; +using autoware_internal_perception_msgs::msg::SegmentationMask; using autoware_perception_msgs::msg::ObjectClassification; template diff --git a/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp new file mode 100644 index 0000000000000..857e3a0257341 --- /dev/null +++ b/perception/autoware_image_projection_based_fusion/include/autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp @@ -0,0 +1,62 @@ +// Copyright 2024 The Autoware Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__IMAGE_PROJECTION_BASED_FUSION__MASK_CLUSTER_FUSION__NODE_HPP_ +#define AUTOWARE__IMAGE_PROJECTION_BASED_FUSION__MASK_CLUSTER_FUSION__NODE_HPP_ + +#include "autoware/image_projection_based_fusion/fusion_node.hpp" + +#include + +#include + +#include +#include + +namespace autoware::image_projection_based_fusion +{ +struct MatchedCluster +{ + uint32_t mask_index{}; + uint32_t cluster_index{}; + uint32_t valid_point_number{}; + sensor_msgs::msg::RegionOfInterest roi; +}; +using MatchedClusters = std::vector; + +class MaskClusterFusionNode +: public FusionNode +{ +public: + explicit MaskClusterFusionNode(const rclcpp::NodeOptions & options); + +protected: + void preprocess(DetectedObjectsWithFeature & output_msg) override; + + void postprocess(DetectedObjectsWithFeature & output_msg) override; + + void fuseOnSingleImage( + const DetectedObjectsWithFeature & input_cluster_msg, const std::size_t image_id, + const SegmentationMask & input_mask_msg, const sensor_msgs::msg::CameraInfo & camera_info, + DetectedObjectsWithFeature & output_object_msg) override; + + bool out_of_scope(const DetectedObjectWithFeature & obj) override; + + double fusion_distance_; + double fusion_ratio_; + bool remove_unknown_; +}; +} // namespace autoware::image_projection_based_fusion + +#endif // AUTOWARE__IMAGE_PROJECTION_BASED_FUSION__MASK_CLUSTER_FUSION__NODE_HPP_ diff --git a/perception/autoware_image_projection_based_fusion/launch/mask_cluster_fusion.launch.xml b/perception/autoware_image_projection_based_fusion/launch/mask_cluster_fusion.launch.xml new file mode 100644 index 0000000000000..3bbce2c5fd9c5 --- /dev/null +++ b/perception/autoware_image_projection_based_fusion/launch/mask_cluster_fusion.launch.xml @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/perception/autoware_image_projection_based_fusion/package.xml b/perception/autoware_image_projection_based_fusion/package.xml index 335c4712071e0..c0542e52159ce 100644 --- a/perception/autoware_image_projection_based_fusion/package.xml +++ b/perception/autoware_image_projection_based_fusion/package.xml @@ -17,6 +17,7 @@ autoware_cmake autoware_euclidean_cluster + autoware_internal_perception_msgs autoware_lidar_centerpoint autoware_object_recognition_utils autoware_perception_msgs diff --git a/perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json b/perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json new file mode 100644 index 0000000000000..2435d00aeb7f7 --- /dev/null +++ b/perception/autoware_image_projection_based_fusion/schema/mask_cluster_fusion.schema.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for Mask Cluster Fusion Node", + "type": "object", + "definitions": { + "mask_cluster_fusion": { + "type": "object", + "properties": { + "fusion_distance": { + "type": "number", + "description": "If the detected object's distance is less than its value, the fusion will be processed.", + "default": 100.0, + "minimum": 0.0 + }, + "fusion_ratio": { + "type": "number", + "description": "The ratio between the selected cluster points and the total points in the cluster.", + "default": 0.4, + "minimum": 0.0 + }, + "remove_unknown": { + "type": "boolean", + "description": "If this parameter is true, all of objects labeled UNKNOWN will be removed in post-process.", + "default": false + } + }, + "required": ["fusion_distance", "fusion_ratio", "remove_unknown"] + } + }, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/mask_cluster_fusion" + } + }, + "required": ["ros__parameters"] + } + }, + "required": ["/**"] +} diff --git a/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp b/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp index 067deb5d03a87..de250692a2636 100644 --- a/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp +++ b/perception/autoware_image_projection_based_fusion/src/fusion_node.cpp @@ -442,4 +442,5 @@ template class FusionNode< template class FusionNode; template class FusionNode; template class FusionNode; +template class FusionNode; } // namespace autoware::image_projection_based_fusion diff --git a/perception/autoware_image_projection_based_fusion/src/mask_cluster_fusion/node.cpp b/perception/autoware_image_projection_based_fusion/src/mask_cluster_fusion/node.cpp new file mode 100644 index 0000000000000..9af211c28faeb --- /dev/null +++ b/perception/autoware_image_projection_based_fusion/src/mask_cluster_fusion/node.cpp @@ -0,0 +1,277 @@ +// Copyright 2024 The Autoware Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/image_projection_based_fusion/mask_cluster_fusion/node.hpp" + +#include "autoware/euclidean_cluster/utils.hpp" +#include "autoware/image_projection_based_fusion/utils/geometry.hpp" +#include "autoware/image_projection_based_fusion/utils/utils.hpp" + +#include +#include +#include + +namespace autoware::image_projection_based_fusion +{ +bool is_far_enough(const DetectedObjectWithFeature & obj, const double distance_threshold) +{ + const auto & position = obj.object.kinematics.pose_with_covariance.pose.position; + return position.x * position.x + position.y * position.y > + distance_threshold * distance_threshold; +} + +MaskClusterFusionNode::MaskClusterFusionNode(const rclcpp::NodeOptions & options) +: FusionNode( + "mask_cluster_fusion", options) +{ + fusion_distance_ = declare_parameter("fusion_distance"); + fusion_ratio_ = declare_parameter("fusion_ratio"); + remove_unknown_ = declare_parameter("remove_unknown"); +} + +void MaskClusterFusionNode::preprocess(__attribute__((unused)) + DetectedObjectsWithFeature & output_msg) +{ + return; +} + +void MaskClusterFusionNode::postprocess(__attribute__((unused)) + DetectedObjectsWithFeature & output_msg) +{ + if (!remove_unknown_) { + return; + } + DetectedObjectsWithFeature known_objects; + known_objects.feature_objects.reserve(output_msg.feature_objects.size()); + for (auto & feature_object : output_msg.feature_objects) { + bool is_roi_label_known = feature_object.object.classification.front().label != + autoware_perception_msgs::msg::ObjectClassification::UNKNOWN; + if (is_roi_label_known) { + known_objects.feature_objects.push_back(feature_object); + } + } + output_msg.feature_objects = known_objects.feature_objects; +} + +void MaskClusterFusionNode::fuseOnSingleImage( + __attribute__((unused)) const DetectedObjectsWithFeature & input_cluster_msg, + __attribute__((unused)) const std::size_t image_id, + __attribute__((unused)) const SegmentationMask & input_mask_msg, + __attribute__((unused)) const sensor_msgs::msg::CameraInfo & camera_info, + __attribute__((unused)) DetectedObjectsWithFeature & output_object_msg) +{ + if (!checkCameraInfo(camera_info)) return; + + if (input_mask_msg.image.height == 0 || input_mask_msg.image.width == 0) { + return; + } + + // Convert mask to cv::Mat - Resize mask to the same size as the camera image + cv_bridge::CvImagePtr in_image_ptr; + try { + in_image_ptr = cv_bridge::toCvCopy( + std::make_shared(input_mask_msg.image), + input_mask_msg.image.encoding); + } catch (const std::exception & e) { + RCLCPP_ERROR(this->get_logger(), "cv_bridge exception:%s", e.what()); + return; + } + cv::Mat mask = in_image_ptr->image; + const int orig_width = camera_info.width; + const int orig_height = camera_info.height; + // resize mask to the same size as the camera image + cv::resize(mask, mask, cv::Size(orig_width, orig_height), 0, 0, cv::INTER_NEAREST); + + image_geometry::PinholeCameraModel pinhole_camera_model; + pinhole_camera_model.fromCameraInfo(camera_info); + + // get transform from cluster frame id to camera optical frame id + geometry_msgs::msg::TransformStamped transform_stamped; + { + const auto transform_stamped_optional = getTransformStamped( + tf_buffer_, /*target*/ camera_info.header.frame_id, + /*source*/ input_cluster_msg.header.frame_id, camera_info.header.stamp); + if (!transform_stamped_optional) { + RCLCPP_WARN_STREAM( + get_logger(), "Failed to get transform from " << input_cluster_msg.header.frame_id << " to " + << camera_info.header.frame_id); + return; + } + transform_stamped = transform_stamped_optional.value(); + } + + MatchedClusters matched_clusters; + for (std::size_t i = 0; i < input_cluster_msg.feature_objects.size(); ++i) { + if (input_cluster_msg.feature_objects.at(i).feature.cluster.data.empty()) { + continue; + } + + if (is_far_enough(input_cluster_msg.feature_objects.at(i), fusion_distance_)) { + continue; + } + + // filter point out of scope + if (debugger_ && out_of_scope(input_cluster_msg.feature_objects.at(i))) { + continue; + } + + sensor_msgs::msg::PointCloud2 transformed_cluster; + tf2::doTransform( + input_cluster_msg.feature_objects.at(i).feature.cluster, transformed_cluster, + transform_stamped); + + auto min_x(static_cast(camera_info.width)); + auto min_y(static_cast(camera_info.height)); + int32_t max_x(0); + int32_t max_y(0); + + double total_projected_points = 0; + std::vector point_counter_each_class(input_mask_msg.classification.size(), 0.0); + + for (sensor_msgs::PointCloud2ConstIterator iter_x(transformed_cluster, "x"), + iter_y(transformed_cluster, "y"), iter_z(transformed_cluster, "z"); + iter_x != iter_x.end(); ++iter_x, ++iter_y, ++iter_z) { + if (*iter_z <= 0.0) { + continue; + } + + Eigen::Vector2d projected_point = + calcRawImageProjectedPoint(pinhole_camera_model, cv::Point3d(*iter_x, *iter_y, *iter_z)); + + if ( + 0 >= static_cast(projected_point.x()) || + static_cast(projected_point.x()) >= static_cast(camera_info.width) - 1 || + 0 >= static_cast(projected_point.y()) || + static_cast(projected_point.y()) >= static_cast(camera_info.height) - 1) { + continue; + } + + const uint8_t pixel_value = mask.at( + static_cast(projected_point.y()), static_cast(projected_point.x())); + if (pixel_value != 0) { + min_x = std::min(static_cast(projected_point.x()), min_x); + min_y = std::min(static_cast(projected_point.y()), min_y); + max_x = std::max(static_cast(projected_point.x()), max_x); + max_y = std::max(static_cast(projected_point.y()), max_y); + + point_counter_each_class[static_cast(pixel_value - 1)]++; + } + total_projected_points++; + } + + // If there is no point projected on the mask, skip the cluster + bool all_zero = std::all_of( + point_counter_each_class.begin(), point_counter_each_class.end(), + [](double value) { return value == 0; }); + if (all_zero) { + continue; + } + + // Find the mask with the most projected points + auto max_object_it = + std::max_element(point_counter_each_class.begin(), point_counter_each_class.end()); + auto max_object_index = std::distance(point_counter_each_class.begin(), max_object_it); + + RCLCPP_DEBUG(get_logger(), "Number of points in the cluster: %u", *max_object_it); + RCLCPP_DEBUG(get_logger(), "Total projected points: %f", total_projected_points); + RCLCPP_DEBUG(get_logger(), "Fusion ratio: %f", *max_object_it / total_projected_points); + + // check if the ratio of the cluster is smaller than the fusion ratio + if (*max_object_it / total_projected_points < fusion_ratio_) { + continue; + } + + // check if there is a cluster that has the same mask with better iou + bool is_any_bigger_ratio_with_same_mask = std::any_of( + matched_clusters.begin(), matched_clusters.end(), + [max_object_it, max_object_index](const MatchedCluster & matched_cluster) { + return matched_cluster.mask_index == max_object_index && + matched_cluster.valid_point_number > *max_object_it; + }); + if (is_any_bigger_ratio_with_same_mask) { + continue; + } + + // remove the previous matched cluster with the same mask if it exists + matched_clusters.erase( + std::remove_if( + matched_clusters.begin(), matched_clusters.end(), + [max_object_index](const MatchedCluster & matched_cluster) { + return matched_cluster.mask_index == max_object_index; + }), + matched_clusters.end()); + + MatchedCluster matched_cluster; + matched_cluster.mask_index = max_object_index; + matched_cluster.cluster_index = i; + matched_cluster.valid_point_number = *max_object_it; + + // Set roi of the object + sensor_msgs::msg::RegionOfInterest roi; + roi.x_offset = min_x; + roi.y_offset = min_y; + roi.width = max_x - min_x; + roi.height = max_y - min_y; + matched_cluster.roi = roi; + + matched_clusters.push_back(matched_cluster); + } + + for (const auto & matched_cluster : matched_clusters) { + // Set classification of the object + std::vector classification; + autoware_perception_msgs::msg::ObjectClassification object_classification; + object_classification.label = + static_cast(input_mask_msg.classification[matched_cluster.mask_index].label); + object_classification.probability = 1.0; + classification.push_back(object_classification); + output_object_msg.feature_objects.at(matched_cluster.cluster_index).object.classification = + classification; + } +} + +bool MaskClusterFusionNode::out_of_scope(const DetectedObjectWithFeature & obj) +{ + auto cluster = obj.feature.cluster; + bool is_out = false; + auto valid_point = [](float p, float min_num, float max_num) -> bool { + return (p > min_num) && (p < max_num); + }; + + for (sensor_msgs::PointCloud2ConstIterator iter_x(cluster, "x"), iter_y(cluster, "y"), + iter_z(cluster, "z"); + iter_x != iter_x.end(); ++iter_x, ++iter_y, ++iter_z) { + if (!valid_point(*iter_x, filter_scope_min_x_, filter_scope_max_x_)) { + is_out = true; + break; + } + + if (!valid_point(*iter_y, filter_scope_min_y_, filter_scope_max_y_)) { + is_out = true; + break; + } + + if (!valid_point(*iter_z, filter_scope_min_z_, filter_scope_max_z_)) { + is_out = true; + break; + } + } + + return is_out; +} +} // namespace autoware::image_projection_based_fusion + +#include + +RCLCPP_COMPONENTS_REGISTER_NODE(autoware::image_projection_based_fusion::MaskClusterFusionNode)