Skip to content

Commit

Permalink
feat(tensorrt yolox): inference and publish mask image from yolox mod…
Browse files Browse the repository at this point in the history
…el with semantic segmentation header (#5553)

* add segmentation model

Signed-off-by: badai-nguyen <[email protected]>

fix: add multitask for segment

Signed-off-by: badai-nguyen <[email protected]>

* fix: publish mask

Signed-off-by: badai-nguyen <[email protected]>

* feat: publish colorized mask

Signed-off-by: badai-nguyen <[email protected]>

* fix: resize yolox mask

Signed-off-by: badai-nguyen <[email protected]>

* fix: add memory allocate operations

Signed-off-by: Manato HIRABAYASHI <[email protected]>

* refactor: remove underscore for a local variable

Signed-off-by: Manato HIRABAYASHI <[email protected]>

* chore: add condition to check the number of subscriber for newly added topics

Signed-off-by: Manato HIRABAYASHI <[email protected]>

* chore: pre-commit

Signed-off-by: badai-nguyen <[email protected]>

* fix: add roi overlapping segment

Signed-off-by: badai-nguyen <[email protected]>

* fix: roi overlay segment

Signed-off-by: badai-nguyen <[email protected]>

* chore: refactor

Signed-off-by: badai-nguyen <[email protected]>

* docs: update readme

Signed-off-by: badai-nguyen <[email protected]>

* fix: update model name

Signed-off-by: badai-nguyen <[email protected]>

* fix: add utils into tensorrt_yolox

Signed-off-by: badai-nguyen <[email protected]>

* fix: launch file

Signed-off-by: badai-nguyen <[email protected]>

* chore: remove unnecessary depend

Signed-off-by: badai-nguyen <[email protected]>

* chore: fix yaml file

Signed-off-by: badai-nguyen <[email protected]>

* chore: remove duplicated param in launch

Signed-off-by: badai-nguyen <[email protected]>

* fix: semantic class

Signed-off-by: badai-nguyen <[email protected]>

* docs: update readme

Signed-off-by: badai-nguyen <[email protected]>

* chore: update default param

Signed-off-by: badai-nguyen <[email protected]>

* fix: add processing time topic

Signed-off-by: badai-nguyen <[email protected]>

* chore: typo

Signed-off-by: badai-nguyen <[email protected]>

* style(pre-commit): autofix

* chore: fix cspell error

Signed-off-by: badai-nguyen <[email protected]>

* fix: yolox default param

Signed-off-by: badai-nguyen <[email protected]>

* chore: rename debug topics

Signed-off-by: badai-nguyen <[email protected]>

* chore: rename debug topics

Signed-off-by: badai-nguyen <[email protected]>

* docs: update model description

Signed-off-by: badai-nguyen <[email protected]>

* style(pre-commit): autofix

* fix: launch

Signed-off-by: badai-nguyen <[email protected]>

* refactor: unpublish mask for single task

Signed-off-by: badai-nguyen <[email protected]>

* Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp

Co-authored-by: Manato Hirabayashi <[email protected]>

* Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp

Co-authored-by: Manato Hirabayashi <[email protected]>

* Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp

Co-authored-by: Manato Hirabayashi <[email protected]>

* style(pre-commit): autofix

* docs: update reamde

Signed-off-by: badai-nguyen <[email protected]>

* style(pre-commit): autofix

* fix: skip mask size as yolox output

Signed-off-by: badai-nguyen <[email protected]>

---------

Signed-off-by: badai-nguyen <[email protected]>
Signed-off-by: Manato HIRABAYASHI <[email protected]>
Co-authored-by: Manato HIRABAYASHI <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Manato Hirabayashi <[email protected]>
  • Loading branch information
4 people authored Jun 24, 2024
1 parent 60b05b0 commit 2ca276f
Show file tree
Hide file tree
Showing 11 changed files with 614 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
#ifndef TENSORRT_COMMON__TENSORRT_COMMON_HPP_
#define TENSORRT_COMMON__TENSORRT_COMMON_HPP_

#ifndef YOLOX_STANDALONE
#include <rclcpp/rclcpp.hpp>
#endif

#include <NvInfer.h>
#include <NvOnnxParser.h>
Expand Down Expand Up @@ -86,6 +88,7 @@ struct BuildConfig
profile_per_layer(profile_per_layer),
clip_value(clip_value)
{
#ifndef YOLOX_STANDALONE
if (
std::find(valid_calib_type.begin(), valid_calib_type.end(), calib_type_str) ==
valid_calib_type.end()) {
Expand All @@ -95,6 +98,7 @@ struct BuildConfig
<< "Default calibration type will be used: MinMax" << std::endl;
std::cerr << message.str();
}
#endif
}
};

Expand Down
84 changes: 61 additions & 23 deletions perception/tensorrt_yolox/README.md

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,29 @@
# cspell: ignore semseg
/**:
ros__parameters:
# refine segmentation mask by overlay roi class
# disable when sematic segmentation accuracy is good enough
is_roi_overlap_segment: true

# minimum existence_probability of detected roi considered to replace segmentation
overlap_roi_score_threshold: 0.3

# publish color mask for result visualization
is_publish_color_mask: false

roi_overlay_segment_label:
UNKNOWN : true
CAR : false
TRUCK : false
BUS : false
MOTORCYCLE : true
BICYCLE : true
PEDESTRIAN : true
ANIMAL: true

model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
label_path: "$(var data_path)/tensorrt_yolox/label.txt"
color_map_path: "$(var data_path)/tensorrt_yolox/semseg_color_map.csv"
score_threshold: 0.35
nms_threshold: 0.7
precision: "int8" # Operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8].
Expand Down
17 changes: 16 additions & 1 deletion perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,21 @@ extern void crop_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
extern void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
float * dst, unsigned char * src, int d_w, int d_h, int d_c, Roi * d_roi, int s_w, int s_h,
int s_c, int batch, float norm, cudaStream_t stream);
} // namespace tensorrt_yolox

/**
* @brief Argmax on GPU
* @param[out] dst processed image
* @param[in] src probability map
* @param[in] d_w width for output
* @param[in] d_h height for output
* @param[in] s_w width for input
* @param[in] s_h height for input
* @param[in] s_c channel for input
* @param[in] batch batch size
* @param[in] stream cuda stream
*/
extern void argmax_gpu(
unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch,
cudaStream_t stream);
} // namespace tensorrt_yolox
#endif // TENSORRT_YOLOX__PREPROCESS_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ struct GridAndStride
int stride;
};

typedef struct Colormap_
{
int id;
std::string name;
std::vector<unsigned char> color;
} Colormap;

/**
* @class TrtYoloX
* @brief TensorRT YOLOX for faster inference
Expand Down Expand Up @@ -85,7 +92,7 @@ class TrtYoloX
const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
const size_t max_workspace_size = (1 << 30));
const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = "");
/**
* @brief Deconstruct TrtYoloX
*/
Expand All @@ -96,7 +103,9 @@ class TrtYoloX
* @param[out] objects results for object detection
* @param[in] images batched images
*/
bool doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects);
bool doInference(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);

/**
* @brief run inference including pre-process and post-process
Expand Down Expand Up @@ -130,6 +139,22 @@ class TrtYoloX
*/
void printProfiling(void);

/**
* @brief get num for multitask heads
*/
int getMultitaskNum(void);

/**
* @brief get colorized masks from index using specific colormap
* @param[out] cmask colorized mask
* @param[in] index multitask index
* @param[in] colormap colormap for masks
*/
void getColorizedMask(
const std::vector<tensorrt_yolox::Colormap> & colormap, const cv::Mat & mask,
cv::Mat & colorized_mask);
inline std::vector<Colormap> getColorMap() { return sematic_color_map_; }

private:
/**
* @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
Expand Down Expand Up @@ -177,7 +202,9 @@ class TrtYoloX
const cv::Mat & images, int batch_size, ObjectArrays & objects);

bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
bool feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectArrays & objects);
bool feedforwardAndDecode(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);
void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
void generateGridsAndStride(
const int target_w, const int target_h, const std::vector<int> & strides,
Expand Down Expand Up @@ -206,6 +233,26 @@ class TrtYoloX
void nmsSortedBboxes(
const ObjectArray & face_objects, std::vector<int> & picked, float nms_threshold) const;

/**
* @brief get a mask image for a segmentation head
* @param[out] argmax argmax results
* @param[in] prob probability map
* @param[in] dims dimension for probability map
* @param[in] out_w mask width excluding letterbox
* @param[in] out_h mask height excluding letterbox
*/
cv::Mat getMaskImage(float * prob, nvinfer1::Dims dims, int out_w, int out_h);

/**
* @brief get a mask image on GPUs for a segmentation head
* @param[out] mask image
* @param[in] prob probability map on device
* @param[in] out_w mask width excluding letterbox
* @param[in] out_h mask height excluding letterbox
* @param[in] b current batch
*/
cv::Mat getMaskImageGpu(float * d_prob, nvinfer1::Dims dims, int out_w, int out_h, int b);

std::unique_ptr<tensorrt_common::TrtCommon> trt_common_;

std::vector<float> input_h_;
Expand Down Expand Up @@ -249,6 +296,20 @@ class TrtYoloX
CudaUniquePtrHost<Roi[]> roi_h_;
// device pointer for ROI
CudaUniquePtr<Roi[]> roi_d_;

// flag whether model has multitasks
int multitask_;
// buff size for segmentation heads
CudaUniquePtr<float[]> segmentation_out_prob_d_;
CudaUniquePtrHost<float[]> segmentation_out_prob_h_;
size_t segmentation_out_elem_num_;
size_t segmentation_out_elem_num_per_batch_;
std::vector<cv::Mat> segmentation_masks_;
// host buffer for argmax postprocessing on GPU
CudaUniquePtrHost<unsigned char[]> argmax_buf_h_;
// device buffer for argmax postprocessing on GPU
CudaUniquePtr<unsigned char[]> argmax_buf_d_;
std::vector<tensorrt_yolox::Colormap> sematic_color_map_;
};

} // namespace tensorrt_yolox
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
#define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_

#include "object_recognition_utils/object_recognition_utils.hpp"

#include <autoware/universe_utils/ros/debug_publisher.hpp>
#include <autoware/universe_utils/system/stop_watch.hpp>
#include <image_transport/image_transport.hpp>
Expand All @@ -25,6 +27,7 @@
#include <sensor_msgs/msg/image.hpp>
#include <std_msgs/msg/header.hpp>
#include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
#include <tier4_perception_msgs/msg/semantic.hpp>

#if __has_include(<cv_bridge/cv_bridge.hpp>)
#include <cv_bridge/cv_bridge.hpp>
Expand All @@ -41,10 +44,30 @@

namespace tensorrt_yolox
{
// cspell: ignore Semseg
using LabelMap = std::map<int, std::string>;

using Label = tier4_perception_msgs::msg::Semantic;
class TrtYoloXNode : public rclcpp::Node
{
struct RoiOverlaySemsegLabel
{
bool UNKNOWN;
bool CAR;
bool TRUCK;
bool BUS;
bool MOTORCYCLE;
bool BICYCLE;
bool PEDESTRIAN;
bool ANIMAL;
bool isOverlay(const uint8_t label) const
{
return (label == Label::UNKNOWN && UNKNOWN) || (label == Label::CAR && CAR) ||
(label == Label::TRUCK && TRUCK) || (label == Label::BUS && BUS) ||
(label == Label::ANIMAL && ANIMAL) || (label == Label::MOTORBIKE && MOTORCYCLE) ||
(label == Label::BICYCLE && BICYCLE) || (label == Label::PEDESTRIAN && PEDESTRIAN);
};
}; // struct RoiOverlaySemsegLabel

public:
explicit TrtYoloXNode(const rclcpp::NodeOptions & node_options);

Expand All @@ -53,8 +76,13 @@ class TrtYoloXNode : public rclcpp::Node
void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
bool readLabelFile(const std::string & label_path);
void replaceLabelMap();

void overlapSegmentByRoi(
const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
int mapRoiLabel2SegLabel(const int32_t roi_label_index);
image_transport::Publisher image_pub_;
image_transport::Publisher mask_pub_;
image_transport::Publisher color_mask_pub_;

rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;

image_transport::Subscriber image_sub_;
Expand All @@ -63,6 +91,21 @@ class TrtYoloXNode : public rclcpp::Node

LabelMap label_map_;
std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
bool is_roi_overlap_segment_;
bool is_publish_color_mask_;
float overlap_roi_score_threshold_;
// TODO(badai-nguyen): change to function
std::map<std::string, int> remap_roi_to_semantic_ = {
{"UNKNOWN", 3}, // other
{"ANIMAL", 0}, // other
{"PEDESTRIAN", 6}, // person
{"CAR", 7}, // car
{"TRUCK", 7}, // truck
{"BUS", 7}, // bus
{"BICYCLE", 8}, // bicycle
{"MOTORBIKE", 8}, // motorcycle
};
RoiOverlaySemsegLabel roi_overlay_segment_labels_;
std::unique_ptr<autoware::universe_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_;
std::unique_ptr<autoware::universe_utils::DebugPublisher> debug_publisher_;
};
Expand Down
12 changes: 10 additions & 2 deletions perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
<?xml version="1.0"?>
<launch>
<!-- cspell:ignore finetune -->
<!-- cspell: ignore semseg, finetune -->
<arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
<arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
<arg name="model_name" default="yolox-sPlus-T4-960x960-pseudo-finetune"/>
<arg name="output/mask" default="/perception/object_recognition/detection/mask0"/>
<arg
name="model_name"
default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"
description="options `yolox-sPlus-T4-960x960-pseudo-finetune` if only detection is needed, `yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls` if sematic segmentation is also needed"
/>
<arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
<arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
<arg name="use_decompress" default="true" description="use image decompress"/>
<arg name="build_only" default="false" description="exit after trt engine is built"/>

<arg name="param_file" default="$(find-pkg-share image_transport_decompressor)/config/image_transport_decompressor.param.yaml"/>
<node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
<remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
<remap from="~/output/raw_image" to="$(var input/image)"/>
<param from="$(var param_file)"/>
</node>

<node pkg="tensorrt_yolox" exec="tensorrt_yolox_node_exe" name="tensorrt_yolox" output="screen">
<remap from="~/in/image" to="$(var input/image)"/>
<remap from="~/out/objects" to="$(var output/objects)"/>
<remap from="~/out/mask" to="$(var output/mask)"/>
<param from="$(var yolox_param_path)" allow_substs="true"/>
<param name="build_only" value="$(var build_only)"/>
</node>
Expand Down
35 changes: 35 additions & 0 deletions perception/tensorrt_yolox/src/preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -594,4 +594,39 @@ void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
cuda_gridsize(N), block, 0, stream>>>(N, dst, src, d_h, d_w, s_h, s_w, d_roi, norm, batch);
}
__global__ void argmax_gpu_kernel(
int N, unsigned char * dst, float * src, int dst_h, int dst_w, int src_c, int src_h, int src_w,
int batch)
{
// NHWC
int index = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
if (index >= N) return;
int c = 0;
int w = index % dst_w;
int h = index / (dst_w);
int b;
for (b = 0; b < batch; b++) {
float max_prob = 0.0;
int max_index = 0;
int dst_index = w + dst_w * h + b * dst_h * dst_w;
for (c = 0; c < src_c; c++) {
int src_index = w + src_w * h + c * src_h * src_w + b * src_c * src_h * src_w;
max_index = max_prob < src[src_index] ? c : max_index;
max_prob = max_prob < src[src_index] ? src[src_index] : max_prob;
}
dst[dst_index] = max_index;
}
}
void argmax_gpu(
unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch,
cudaStream_t stream)
{
int N = d_w * d_h;
argmax_gpu_kernel<<<cuda_gridsize(N), block, 0, stream>>>(
N, dst, src, d_h, d_w, s_c, s_h, s_w, batch);
}
} // namespace tensorrt_yolox
Loading

0 comments on commit 2ca276f

Please sign in to comment.