Skip to content

Commit

Permalink
dav2: Working on jetson, output weird
Browse files Browse the repository at this point in the history
  • Loading branch information
marcojob committed Nov 12, 2024
1 parent fc36a4a commit a2db862
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 65 deletions.
65 changes: 51 additions & 14 deletions include/usb_cam/learning/depth_anything_v2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@

class DepthAnythingV2 : public LearningInterface {
public:
DepthAnythingV2(ros::NodeHandle* nh, std::string model_path, std::string topic) {
DepthAnythingV2(ros::NodeHandle* nh, std::string& model_path, std::string& metric_topic, std::string& vis_topic) {
_INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH);
_model_path = model_path;
_load_model();

if (nh != nullptr) {
_depth_publication = nh->advertise<sensor_msgs::Image>(topic, 1);
if (!metric_topic.empty()) {
_depth_metric_pub = nh->advertise<sensor_msgs::Image>(metric_topic, 1);
}
if (!vis_topic.empty()) {
_depth_vis_pub = nh->advertise<sensor_msgs::Image>(vis_topic, 1);
}

}
}

Expand All @@ -35,27 +41,58 @@ class DepthAnythingV2 : public LearningInterface {
}
cv::Mat float_image;
cv::merge(channels, float_image);
_input_data = float_image.reshape(1, 1).ptr<float>(0);

// Normalize the image using the mean and std for each channel
// Mean and std are typically for [R, G, B] channels respectively
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
std::vector<float> std = {0.229f, 0.224f, 0.225f};
for (int c = 0; c < 3; ++c) {
cv::Mat channel = channels[c];
channel = (channel - mean[c]) / std[c];
}

cv::merge(channels, float_image);
memcpy(_input_data, float_image.data, _input_size_float);
}

void publish() override {
if (_depth_publication.getTopic() != "") {
cv::Mat depth_prediction = cv::Mat(_HEIGHT, _WIDTH, CV_32FC1, _output_data);
void publish() override {
cv::Mat depth_prediction = cv::Mat(_HEIGHT, _WIDTH, CV_32FC1, _output_data);
if (_depth_metric_pub.getTopic() != "") {
// Raw depth prediction (in meters, CV_32FC1)
cv_bridge::CvImage depth_image;
depth_image.header.stamp = ros::Time::now();
depth_image.header.frame_id = "depth_frame";
depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; // 32-bit float
depth_image.image = depth_prediction;
_depth_metric_pub.publish(depth_image.toImageMsg());

cv_bridge::CvImage depth_image;
depth_image.header.stamp = ros::Time::now();
depth_image.header.frame_id = "depth_frame";
depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1;
depth_image.image = depth_prediction;
_depth_publication.publish(depth_image.toImageMsg());
}
}

if (_depth_vis_pub.getTopic() != "") {
cv::Mat depth_visualized;
double min_val, max_val;
cv::minMaxLoc(depth_prediction, &min_val, &max_val);

// Normalize depth image for visualization
// Ignore values <= 0 (i.e., no depth / invalid depth)
cv::normalize(depth_prediction, depth_visualized, 0, 255, cv::NORM_MINMAX);
depth_visualized.convertTo(depth_visualized, CV_8UC1);

cv_bridge::CvImage visualized_image;
visualized_image.header.stamp = ros::Time::now();
visualized_image.header.frame_id = "depth_frame";
visualized_image.encoding = sensor_msgs::image_encodings::MONO8; // 8-bit grayscale for visualization
visualized_image.image = depth_visualized;
_depth_vis_pub.publish(visualized_image.toImageMsg());
}
}

private:
const size_t _HEIGHT = 518;
const size_t _WIDTH = 518;
cv::Size _INPUT_SIZE;
ros::Publisher _depth_publication;
ros::Publisher _depth_metric_pub;
ros::Publisher _depth_vis_pub;
};

#endif // DEPTH_ANYTHING_HPP_
6 changes: 4 additions & 2 deletions include/usb_cam/learning/interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,17 @@ class LearningInterface {
nvinfer1::IExecutionContext* _context;
nvinfer1::INetworkDefinition* _network;
nvinfer1::IRuntime* _runtime;
size_t _input_c, _input_h, _input_w, _input_size_float;
size_t _output_c, _output_h, _output_w, _output_size_float;
std::string _model_path;

void _load_model();

private:
static constexpr size_t JETSON_MEM_LIMIT_B{3ULL * 1024 * 1024 * 1024};
static std::mutex predict_mutex;
void* _buffers[2] = { nullptr, nullptr };

// Global mutex for prediction calls
static std::mutex predict_mutex;

// TODO: static?
class Logger : public nvinfer1::ILogger {
Expand Down
3 changes: 2 additions & 1 deletion include/usb_cam/usb_cam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ typedef struct
std::string pixel_format_name;
std::string av_device_format;
std::string dav2_file;
std::string dav2_topic;
std::string dav2_metric_topic;
std::string dav2_vis_topic;
int image_width;
int image_height;
int framerate;
Expand Down
10 changes: 6 additions & 4 deletions launch/two_mipi_rgb.launch
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
<param name="wb_green_gain" value="0.88"/>
<param name="wb_red_gain" value="1.18"/>

<param name="depth_anything_v2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits_16.onnx" />
<param name="depth_anything_v2_topic" value="/depth0" />
<param name="dav2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_vits14.onnx" />
<param name="dav2_metric_topic" value="/depth0/relative" />
<param name="dav2_vis_topic" value="/depth0/vis" />
</node>

<node name="cam1" pkg="usb_cam" type="usb_cam_node" output="screen" >
Expand All @@ -39,7 +40,8 @@
<param name="wb_green_gain" value="0.88"/>
<param name="wb_red_gain" value="1.18"/>

<!-- <param name="depth_anything_v2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits_16.onnx" /> -->
<!-- <param name="depth_anything_v2_topic" value="/depth1" /> -->
<!-- <param name="dav2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits_16_indoor.onnx" /> -->
<!-- <param name="dav2_metric_topic" value="/depth1/relative" /> -->
<!-- <param name="dav2_vis_topic" value="/depth1/vis" /> -->
</node>
</launch>
88 changes: 48 additions & 40 deletions src/interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,44 @@ std::mutex LearningInterface::predict_mutex;
void LearningInterface::_load_model() {
std::ifstream file_check(_model_path);
if (!file_check.good()) {
std::cerr << "Error: " << _model_path << "' does not exist." << std::endl;
std::cerr << "Error: " << _model_path << " does not exist." << std::endl;
throw std::runtime_error("Model file not found");
}
file_check.close();

if (_model_path.find(".onnx") == std::string::npos) {
std::ifstream engine_stream(_model_path, std::ios::binary);
engine_stream.seekg(0, std::ios::end);
// Define the expected .engine path based on the .onnx model path
std::string engine_path = _model_path.substr(0, _model_path.find_last_of('.')) + ".engine";

const size_t model_size = engine_stream.tellg();
engine_stream.seekg(0, std::ios::beg);
if (_model_path.find(".onnx") != std::string::npos) {
// Check if the engine file already exists
std::ifstream engine_check(engine_path, std::ios::binary);

std::unique_ptr<char[]> engine_data(new char[model_size]);
engine_stream.read(engine_data.get(), model_size);
engine_stream.close();
if (engine_check.good()) {
engine_check.seekg(0, std::ios::end);
const size_t model_size = engine_check.tellg();
engine_check.seekg(0, std::ios::beg);

// Create tensorrt model
_runtime = nvinfer1::createInferRuntime(_logger);
_engine = _runtime->deserializeCudaEngine(engine_data.get(), model_size);
_context = _engine->createExecutionContext();
std::unique_ptr<char[]> engine_data(new char[model_size]);
engine_check.read(engine_data.get(), model_size);
engine_check.close();

// Create TensorRT runtime and load engine
_runtime = nvinfer1::createInferRuntime(_logger);
_engine = _runtime->deserializeCudaEngine(engine_data.get(), model_size);
_context = _engine->createExecutionContext();

} else {
// Build an engine from the .onnx model and save it as .engine
_build(_model_path);
_save_engine(engine_path);
}

} else {
// Build an engine from an onnx model
_build(_model_path);
_save_engine(_model_path);
std::cerr << "Error: Only .onnx model files are supported." << std::endl;
throw std::runtime_error("Unsupported model format");
}

// Define input dimensions
// TODO: Check dimensions, set as members
#if NV_TENSORRT_MAJOR < 10
auto input_dims = _engine->getBindingDimensions(0);
auto output_dims = _engine->getBindingDimensions(1);
Expand All @@ -46,23 +55,33 @@ void LearningInterface::_load_model() {
auto output_dims = _engine->getTensorShape(_engine->getIOTensorName(1));
#endif

const size_t input_h = input_dims.d[2];
const size_t input_w = input_dims.d[3];
ROS_INFO("%ld, %ld", input_h, input_w);
// TODO: THis does not generalize so well
_input_c = input_dims.d[1];
_input_h = input_dims.d[2];
_input_w = input_dims.d[3];
_output_c = output_dims.d[0];
_output_h = output_dims.d[1];
_output_w = output_dims.d[2];
_input_size_float = _input_c * _input_h * _input_w * sizeof(float);
_output_size_float = _output_c * _output_h * _output_w * sizeof(float);

cudaStreamCreate(&_stream);

cudaMalloc(&_buffers[0], 3 * input_h * input_w * sizeof(float));
cudaMalloc(&_buffers[1], input_h * input_w * sizeof(float));
cudaMalloc(&_buffers[0], _input_size_float);
cudaMalloc(&_buffers[1], _output_size_float);

_output_data = new float[input_h * input_w];
_input_data = new float[_input_c * _input_h * _input_w];
_output_data = new float[_output_c * _output_h * _output_w];
}

void LearningInterface::_build(std::string onnx_path) {
auto builder = createInferBuilder(_logger);
const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
IBuilderConfig* config = builder->createBuilderConfig();

// TODO: What about different hardware?
config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, JETSON_MEM_LIMIT_B);
config->setFlag(BuilderFlag::kFP16);
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _logger);
bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO));
Expand All @@ -78,16 +97,7 @@ void LearningInterface::_build(std::string onnx_path) {
delete plan;
}

bool LearningInterface::_save_engine(const std::string& onnx_path) {
std::string engine_path;
size_t dot_index = onnx_path.find_last_of(".");
if (dot_index != std::string::npos) {
engine_path = onnx_path.substr(0, dot_index) + ".engine";

} else {
return false;
}

bool LearningInterface::_save_engine(const std::string& engine_path) {
if (_engine) {
nvinfer1::IHostMemory* data = _engine->serialize();
std::ofstream file;
Expand All @@ -107,9 +117,7 @@ bool LearningInterface::_save_engine(const std::string& onnx_path) {

void LearningInterface::predict() {
if (predict_mutex.try_lock()) {
ROS_INFO("INSIDE PREDICT");
// TODO: Use input, output size members to set memcpy size correctly
cudaMemcpyAsync(_buffers[0], _input_data, 0, cudaMemcpyHostToDevice, _stream);
cudaMemcpyAsync(_buffers[0], _input_data, _input_size_float , cudaMemcpyHostToDevice, _stream);

#if NV_TENSORRT_MAJOR < 10
_context->enqueueV2(_buffers, _stream, nullptr);
Expand All @@ -118,17 +126,17 @@ void LearningInterface::predict() {
#endif

cudaStreamSynchronize(_stream);
// TODO: Use input, output size members to set memcpy size correctly
cudaMemcpyAsync(_output_data, _buffers[1], 0, cudaMemcpyDeviceToHost);
cudaMemcpyAsync(_output_data, _buffers[1], _output_size_float, cudaMemcpyDeviceToHost);
predict_mutex.unlock();

} else {
ROS_INFO("predict in progress, skipping");
}
}

LearningInterface::~LearningInterface() {
cudaFree(_stream);
cudaFree(_buffers[0]);
cudaFree(_buffers[1]);

delete[] _input_data;
delete[] _output_data;
}
9 changes: 5 additions & 4 deletions src/usb_cam_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,12 @@ class UsbCamNode {
}

// Setup the network that outputs derivates of the image captured
m_node.param("depth_anything_v2_file", m_parameters.dav2_file, std::string(""));
m_node.param("depth_anything_v2_topic", m_parameters.dav2_topic, std::string(""));
m_node.param("dav2_file", m_parameters.dav2_file, std::string(""));
m_node.param("dav2_metric_topic", m_parameters.dav2_metric_topic, std::string(""));
m_node.param("dav2_vis_topic", m_parameters.dav2_vis_topic, std::string(""));

if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) {
networks.push_back(std::make_unique<DepthAnythingV2>(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic));
if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_metric_topic.empty()) {
networks.push_back(std::make_unique<DepthAnythingV2>(&m_node, m_parameters.dav2_file, m_parameters.dav2_metric_topic, m_parameters.dav2_vis_topic));
}

ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS",
Expand Down

0 comments on commit a2db862

Please sign in to comment.