autowarefoundation · tzhong518 · Apr 16, 2024 · Mar 5, 2024 · Mar 7, 2024 · Mar 8, 2024
@@ -5,7 +5,7 @@
   <arg name="lidar_detection_model" default="centerpoint" description="options: `centerpoint`, `apollo`, `clustering`"/>
 
   <!-- Lidar detector centerpoint parameters -->
-  <arg name="centerpoint_model_name" default="centerpoint_tiny"/>
+  <arg name="centerpoint_model_name" default="centerpoint_tiny" description="options: `centerpoint`, `centerpoint_tiny` or `centerpoint_sigma`"/>
   <arg name="centerpoint_model_path" default="$(var data_path)/lidar_centerpoint"/>
   <arg name="lidar_model_param_path" default="$(find-pkg-share lidar_centerpoint)/config"/>
 

@@ -36,7 +36,7 @@
   <arg name="traffic_light_arbiter_param_path"/>
 
   <!-- CenterPoint model parameters -->
-  <arg name="centerpoint_model_name" default="centerpoint_tiny" description="options: `centerpoint` or `centerpoint_tiny`"/>
+  <arg name="centerpoint_model_name" default="centerpoint_tiny" description="options: `centerpoint`, `centerpoint_tiny` or `centerpoint_sigma`"/>
   <arg name="centerpoint_model_path" default="$(var data_path)/lidar_centerpoint"/>
 
   <!-- PointPainting model parameters -->

diff --git a/perception/image_projection_based_fusion/config/pointpainting.param.yaml b/perception/image_projection_based_fusion/config/pointpainting.param.yaml
@@ -16,6 +16,7 @@
       downsample_factor: 1
       encoder_in_feature_size: 12
       yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
+      has_variance: false
       has_twist: false
     densification_params:
       world_frame_id: "map"

diff --git a/...ojection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/node.hpp b/...ojection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/node.hpp
@@ -60,6 +60,7 @@ class PointPaintingFusionNode
   std::map<std::string, float> class_index_;
   std::map<std::string, std::function<bool(int)>> isClassTable_;
   std::vector<double> pointcloud_range;
+  bool has_variance_{false};
   bool has_twist_{false};
 
   centerpoint::NonMaximumSuppression iou_bev_nms_;

diff --git a/perception/image_projection_based_fusion/schema/pointpainting.schema.json b/perception/image_projection_based_fusion/schema/pointpainting.schema.json
@@ -60,6 +60,11 @@
               "minimum": 0.0,
               "maximum": 1.0
             },
+            "has_variance": {
+              "type": "boolean",
+              "description": "Indicates whether the model outputs variance value.",
+              "default": false
+            },
             "has_twist": {
               "type": "boolean",
               "description": "Indicates whether the model outputs twist value.",

diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp b/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp
@@ -140,6 +140,7 @@ PointPaintingFusionNode::PointPaintingFusionNode(const rclcpp::NodeOptions & opt
       isClassTable_.erase(cls);
     }
   }
+  has_variance_ = this->declare_parameter<bool>("has_variance");
   has_twist_ = this->declare_parameter<bool>("model_params.has_twist");
   const std::size_t point_feature_size = static_cast<std::size_t>(
     this->declare_parameter<std::int64_t>("model_params.point_feature_size"));
@@ -189,7 +190,7 @@ PointPaintingFusionNode::PointPaintingFusionNode(const rclcpp::NodeOptions & opt
   centerpoint::CenterPointConfig config(
     class_names_.size(), point_feature_size, max_voxel_size, pointcloud_range, voxel_size,
     downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
-    yaw_norm_thresholds);
+    yaw_norm_thresholds, has_variance_);
 
   // create detector
   detector_ptr_ = std::make_unique<image_projection_based_fusion::PointPaintingTRT>(
@@ -401,7 +402,7 @@ void PointPaintingFusionNode::postprocess(sensor_msgs::msg::PointCloud2 & painte
   raw_objects.reserve(det_boxes3d.size());
   for (const auto & box3d : det_boxes3d) {
     autoware_auto_perception_msgs::msg::DetectedObject obj;
-    box3DToDetectedObject(box3d, class_names_, has_twist_, obj);
+    box3DToDetectedObject(box3d, class_names_, has_twist_, has_variance_, obj);
     raw_objects.emplace_back(obj);
   }
 

diff --git a/perception/lidar_centerpoint/config/centerpoint.param.yaml b/perception/lidar_centerpoint/config/centerpoint.param.yaml
@@ -14,6 +14,7 @@
     iou_nms_threshold: 0.1
     yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
     score_threshold: 0.35
+    has_variance: false
     has_twist: false
     trt_precision: fp16
     densification_num_past_frames: 1

diff --git a/perception/lidar_centerpoint/config/centerpoint_sigma.param.yaml b/perception/lidar_centerpoint/config/centerpoint_sigma.param.yaml
@@ -0,0 +1,27 @@
+/**:
+  ros__parameters:
+    class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"]
+    point_feature_size: 4
+    max_voxel_size: 40000
+    point_cloud_range: [-76.8, -76.8, -4.0, 76.8, 76.8, 6.0]
+    voxel_size: [0.32, 0.32, 10.0]
+    downsample_factor: 1
+    encoder_in_feature_size: 9
+    # post-process params
+    circle_nms_dist_threshold: 0.5
+    iou_nms_target_class_names: ["CAR"]
+    iou_nms_search_distance_2d: 10.0
+    iou_nms_threshold: 0.1
+    yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
+    score_threshold: 0.35
+    has_variance: true
+    has_twist: true
+    trt_precision: fp16
+    densification_num_past_frames: 1
+    densification_world_frame_id: map
+
+    # weight files
+    encoder_onnx_path: "$(var model_path)/pts_voxel_encoder_$(var model_name).onnx"
+    encoder_engine_path: "$(var model_path)/pts_voxel_encoder_$(var model_name).engine"
+    head_onnx_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).onnx"
+    head_engine_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).engine"
diff --git a/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml b/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml
@@ -14,6 +14,7 @@
     iou_nms_threshold: 0.1
     yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
     score_threshold: 0.35
+    has_variance: false
     has_twist: false
     trt_precision: fp16
     densification_num_past_frames: 1

diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
@@ -28,7 +28,7 @@ class CenterPointConfig
     const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
     const std::size_t downsample_factor, const std::size_t encoder_in_feature_size,
     const float score_threshold, const float circle_nms_dist_threshold,
-    const std::vector<double> yaw_norm_thresholds)
+    const std::vector<double> yaw_norm_thresholds, const bool has_variance)
   {
     class_size_ = class_size;
     point_feature_size_ = point_feature_size;
@@ -49,6 +49,15 @@ class CenterPointConfig
     downsample_factor_ = downsample_factor;
     encoder_in_feature_size_ = encoder_in_feature_size;
 
+    if (has_variance) {
+      has_variance_ = true;
+      head_out_offset_size_ = 4;
+      head_out_z_size_ = 2;
+      head_out_dim_size_ = 6;
+      head_out_rot_size_ = 4;
+      head_out_vel_size_ = 4;
+    }
+
     if (score_threshold > 0 && score_threshold < 1) {
       score_threshold_ = score_threshold;
     }
@@ -97,11 +106,12 @@ class CenterPointConfig
   std::size_t encoder_in_feature_size_{9};
   const std::size_t encoder_out_feature_size_{32};
   const std::size_t head_out_size_{6};
-  const std::size_t head_out_offset_size_{2};
-  const std::size_t head_out_z_size_{1};
-  const std::size_t head_out_dim_size_{3};
-  const std::size_t head_out_rot_size_{2};
-  const std::size_t head_out_vel_size_{2};
+  bool has_variance_{false};
+  std::size_t head_out_offset_size_{2};
+  std::size_t head_out_z_size_{1};
+  std::size_t head_out_dim_size_{3};
+  std::size_t head_out_rot_size_{2};
+  std::size_t head_out_vel_size_{2};
 
   // post-process params
   float score_threshold_{0.35f};

diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp
@@ -53,6 +53,7 @@ class LidarCenterPointNode : public rclcpp::Node
 
   float score_threshold_{0.0};
   std::vector<std::string> class_names_;
+  bool has_variance_{false};
   bool has_twist_{false};
 
   NonMaximumSuppression iou_bev_nms_;

diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
@@ -32,10 +32,14 @@ namespace centerpoint
 
 void box3DToDetectedObject(
   const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
-  autoware_auto_perception_msgs::msg::DetectedObject & obj);
+  const bool has_variance, autoware_auto_perception_msgs::msg::DetectedObject & obj);
 
 uint8_t getSemanticType(const std::string & class_name);
 
+std::array<double, 36> convertPoseCovarianceMatrix(const Box3D & box3d);
+
+std::array<double, 36> convertTwistCovarianceMatrix(const Box3D & box3d);
+
 bool isCarLikeVehicleLabel(const uint8_t label);
 
 }  // namespace centerpoint

diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/utils.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/utils.hpp
@@ -33,6 +33,17 @@ struct Box3D
   float yaw;
   float vel_x;
   float vel_y;
+
+  // variance
+  float x_variance;
+  float y_variance;
+  float z_variance;
+  float length_variance;
+  float width_variance;
+  float height_variance;
+  float yaw_variance;
+  float vel_x_variance;
+  float vel_y_variance;
 };
 
 // cspell: ignore divup

diff --git a/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu b/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
@@ -58,7 +58,7 @@ __global__ void generateBoxes3D_kernel(
   const float * out_rot, const float * out_vel, const float voxel_size_x, const float voxel_size_y,
   const float range_min_x, const float range_min_y, const std::size_t down_grid_size_x,
   const std::size_t down_grid_size_y, const std::size_t downsample_factor, const int class_size,
-  const float * yaw_norm_thresholds, Box3D * det_boxes3d)
+  const bool has_variance, const float * yaw_norm_thresholds, Box3D * det_boxes3d)
 {
   // generate boxes3d from the outputs of the network.
   // shape of out_*: (N, DOWN_GRID_SIZE_Y, DOWN_GRID_SIZE_X)
@@ -107,6 +107,34 @@ __global__ void generateBoxes3D_kernel(
   det_boxes3d[idx].yaw = atan2f(yaw_sin, yaw_cos);
   det_boxes3d[idx].vel_x = vel_x;
   det_boxes3d[idx].vel_y = vel_y;
+
+  if (has_variance) {
+    const float offset_x_variance = out_offset[down_grid_size * 2 + idx];
+    const float offset_y_variance = out_offset[down_grid_size * 3 + idx];
+    const float z_variance = out_z[down_grid_size * 1 + idx];
+    const float w_variance = out_dim[down_grid_size * 3 + idx];
+    const float l_variance = out_dim[down_grid_size * 4 + idx];
+    const float h_variance = out_dim[down_grid_size * 5 + idx];
+    const float yaw_sin_log_variance = out_rot[down_grid_size * 2 + idx];
+    const float yaw_cos_log_variance = out_rot[down_grid_size * 3 + idx];
+    const float vel_x_variance = out_vel[down_grid_size * 2 + idx];
+    const float vel_y_variance = out_vel[down_grid_size * 3 + idx];
+
+    det_boxes3d[idx].x_variance = voxel_size_x * downsample_factor * expf(offset_x_variance);
+    det_boxes3d[idx].y_variance = voxel_size_x * downsample_factor * expf(offset_y_variance);
+    det_boxes3d[idx].z_variance = expf(z_variance);
+    det_boxes3d[idx].length_variance = expf(l_variance);
+    det_boxes3d[idx].width_variance = expf(w_variance);
+    det_boxes3d[idx].height_variance = expf(h_variance);
+    const float yaw_sin_sq = yaw_sin * yaw_sin;
+    const float yaw_cos_sq = yaw_cos * yaw_cos;
+    const float yaw_norm_sq = (yaw_sin_sq + yaw_cos_sq) * (yaw_sin_sq + yaw_cos_sq);
+    det_boxes3d[idx].yaw_variance =
+      (yaw_cos_sq * expf(yaw_sin_log_variance) + yaw_sin_sq * expf(yaw_cos_log_variance)) /
+      yaw_norm_sq;
+    det_boxes3d[idx].vel_x_variance = expf(vel_x_variance);
+    det_boxes3d[idx].vel_y_variance = expf(vel_y_variance);
+  }
 }
 
 PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config)
@@ -131,7 +159,7 @@ cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch(
     out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_,
     config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_,
     config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_,
-    thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()),
+    config_.has_variance_, thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()),
     thrust::raw_pointer_cast(boxes3d_d_.data()));
 
   // suppress by score

diff --git a/perception/lidar_centerpoint/lib/ros_utils.cpp b/perception/lidar_centerpoint/lib/ros_utils.cpp
@@ -25,49 +25,57 @@
 
 void box3DToDetectedObject(
   const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
-  autoware_auto_perception_msgs::msg::DetectedObject & obj)
+  const bool has_variance, autoware_auto_perception_msgs::msg::DetectedObject & obj)
 {
   // TODO(yukke42): the value of classification confidence of DNN, not probability.
   obj.existence_probability = box3d.score;

  // classification
  autoware_auto_perception_msgs::msg::ObjectClassification classification;
  classification.probability = 1.0f;
  if (box3d.label >= 0 && static_cast<size_t>(box3d.label) < class_names.size()) {
    classification.label = getSemanticType(class_names[box3d.label]);
  } else {
    classification.label = Label::UNKNOWN;
    RCLCPP_WARN_STREAM(
      rclcpp::get_logger("lidar_centerpoint"), "Unexpected label: UNKNOWN is set.");
  }

  if (object_recognition_utils::isCarLikeVehicle(classification.label)) {
    obj.kinematics.orientation_availability =
      autoware_auto_perception_msgs::msg::DetectedObjectKinematics::SIGN_UNKNOWN;
  }

  obj.classification.emplace_back(classification);

  // pose and shape
  // mmdet3d yaw format to ros yaw format
  float yaw = -box3d.yaw - tier4_autoware_utils::pi / 2;
  obj.kinematics.pose_with_covariance.pose.position =
    tier4_autoware_utils::createPoint(box3d.x, box3d.y, box3d.z);
  obj.kinematics.pose_with_covariance.pose.orientation =
    tier4_autoware_utils::createQuaternionFromYaw(yaw);
   obj.shape.type = autoware_auto_perception_msgs::msg::Shape::BOUNDING_BOX;
   obj.shape.dimensions =
     tier4_autoware_utils::createTranslation(box3d.length, box3d.width, box3d.height);
+  if (has_variance) {
+    obj.kinematics.has_position_covariance = has_variance;
+    obj.kinematics.pose_with_covariance.covariance = convertPoseCovarianceMatrix(box3d);
+  }
 
   // twist
   if (has_twist) {
    float vel_x = box3d.vel_x;
    float vel_y = box3d.vel_y;
    geometry_msgs::msg::Twist twist;
    twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
     twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - yaw);
     obj.kinematics.twist_with_covariance.twist = twist;
     obj.kinematics.has_twist = has_twist;
+    if (has_variance) {
+      obj.kinematics.has_twist_covariance = has_variance;
+      obj.kinematics.twist_with_covariance.covariance = convertTwistCovarianceMatrix(box3d);
+    }
   }
 }
 
@@ -92,4 +100,24 @@
   }
 }
 
+std::array<double, 36> convertPoseCovarianceMatrix(const Box3D & box3d)
+{
+  using POSE_IDX = tier4_autoware_utils::xyzrpy_covariance_index::XYZRPY_COV_IDX;
+  std::array<double, 36> pose_covariance{};
+  pose_covariance[POSE_IDX::X_X] = box3d.x_variance;
+  pose_covariance[POSE_IDX::Y_Y] = box3d.y_variance;
+  pose_covariance[POSE_IDX::Z_Z] = box3d.z_variance;
+  pose_covariance[POSE_IDX::YAW_YAW] = box3d.yaw_variance;
+  return pose_covariance;
+}
+
+std::array<double, 36> convertTwistCovarianceMatrix(const Box3D & box3d)
+{
+  using POSE_IDX = tier4_autoware_utils::xyzrpy_covariance_index::XYZRPY_COV_IDX;
+  std::array<double, 36> twist_covariance{};
+  twist_covariance[POSE_IDX::X_X] = box3d.vel_x_variance;
+  twist_covariance[POSE_IDX::Y_Y] = box3d.vel_y_variance;
+  return twist_covariance;
+}
+
 }  // namespace centerpoint
diff --git a/perception/lidar_centerpoint/src/node.cpp b/perception/lidar_centerpoint/src/node.cpp
@@ -55,54 +55,55 @@
   const std::string head_onnx_path = this->declare_parameter<std::string>("head_onnx_path");
   const std::string head_engine_path = this->declare_parameter<std::string>("head_engine_path");
   class_names_ = this->declare_parameter<std::vector<std::string>>("class_names");
-  has_twist_ = this->declare_parameter("has_twist", false);
+  has_variance_ = this->declare_parameter<bool>("has_variance");
+  has_twist_ = this->declare_parameter<bool>("has_twist");
   const std::size_t point_feature_size =
     static_cast<std::size_t>(this->declare_parameter<std::int64_t>("point_feature_size"));
   const std::size_t max_voxel_size =
    static_cast<std::size_t>(this->declare_parameter<std::int64_t>("max_voxel_size"));
  const auto point_cloud_range = this->declare_parameter<std::vector<double>>("point_cloud_range");
  const auto voxel_size = this->declare_parameter<std::vector<double>>("voxel_size");
  const std::size_t downsample_factor =
    static_cast<std::size_t>(this->declare_parameter<std::int64_t>("downsample_factor"));
  const std::size_t encoder_in_feature_size =
    static_cast<std::size_t>(this->declare_parameter<std::int64_t>("encoder_in_feature_size"));
  const auto allow_remapping_by_area_matrix =
    this->declare_parameter<std::vector<int64_t>>("allow_remapping_by_area_matrix");
  const auto min_area_matrix = this->declare_parameter<std::vector<double>>("min_area_matrix");
  const auto max_area_matrix = this->declare_parameter<std::vector<double>>("max_area_matrix");

  detection_class_remapper_.setParameters(
    allow_remapping_by_area_matrix, min_area_matrix, max_area_matrix);

  {
    NMSParams p;
    p.nms_type_ = NMS_TYPE::IoU_BEV;
    p.target_class_names_ =
      this->declare_parameter<std::vector<std::string>>("iou_nms_target_class_names");
    p.search_distance_2d_ = this->declare_parameter<double>("iou_nms_search_distance_2d");
    p.iou_threshold_ = this->declare_parameter<double>("iou_nms_threshold");
    iou_bev_nms_.setParameters(p);
  }

  NetworkParam encoder_param(encoder_onnx_path, encoder_engine_path, trt_precision);
  NetworkParam head_param(head_onnx_path, head_engine_path, trt_precision);
  DensificationParam densification_param(
    densification_world_frame_id, densification_num_past_frames);

  if (point_cloud_range.size() != 6) {
    RCLCPP_WARN_STREAM(
      rclcpp::get_logger("lidar_centerpoint"),
      "The size of point_cloud_range != 6: use the default parameters.");
  }
  if (voxel_size.size() != 3) {
    RCLCPP_WARN_STREAM(
      rclcpp::get_logger("lidar_centerpoint"),
      "The size of voxel_size != 3: use the default parameters.");
  }
   CenterPointConfig config(
     class_names_.size(), point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
     downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
-    yaw_norm_thresholds);
+    yaw_norm_thresholds, has_variance_);
   detector_ptr_ =
     std::make_unique<CenterPointTRT>(encoder_param, head_param, densification_param, config);
 
@@ -152,7 +153,7 @@
   raw_objects.reserve(det_boxes3d.size());
   for (const auto & box3d : det_boxes3d) {
     autoware_auto_perception_msgs::msg::DetectedObject obj;
-    box3DToDetectedObject(box3d, class_names_, has_twist_, obj);
+    box3DToDetectedObject(box3d, class_names_, has_twist_, has_variance_, obj);
     raw_objects.emplace_back(obj);
   }