From fc36a4a37fa3f5f6ec2708712bec9498fee3ba39 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:58:45 +0100 Subject: [PATCH 01/10] dav2: Try to run on jetson, hitting mem limit --- .devcontainer/devcontainer.json | 3 +- .vscode/launch.json | 2 +- CMakeLists.txt | 7 +-- .../usb_cam/learning/depth_anything_v2.hpp | 5 +- include/usb_cam/learning/interface.hpp | 7 ++- include/usb_cam/usb_cam.hpp | 2 + launch/two_mipi_rgb.launch | 6 +++ src/interface.cpp | 51 +++++++++++++++---- src/usb_cam_node.cpp | 12 +++-- test/test_depth_anything_v2.cpp | 3 +- 10 files changed, 76 insertions(+), 22 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 74169662..f9d850bb 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,8 @@ "customizations": { "vscode": { "extensions": [ - "ms-vscode.cpptools" + "ms-vscode.cpptools", + "ms-iot.vscode-ros" ], "settings": { "files.hotExit": "off", diff --git a/.vscode/launch.json b/.vscode/launch.json index c4cbee9f..66c3ed29 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,4 +23,4 @@ "miDebuggerPath": "/usr/bin/gdb" } ] -} \ No newline at end of file +} diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cd1cf54..91e55de3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,13 +97,14 @@ catkin_package( add_executable(${PROJECT_NAME}_node src/usb_cam_node.cpp) target_link_libraries(${PROJECT_NAME}_node ${PROJECT_NAME} - ${catkin_LIBRARIES} ${avcodec_LIBRARIES} ${avutil_LIBRARIES} - ${swscale_LIBRARIES} - ${NVINFER} + ${catkin_LIBRARIES} + ${CUDA_LIBRARIES} ${NVINFER_PLUGIN} + ${NVINFER} ${NVONNXPARSER} + ${swscale_LIBRARIES} ) set_target_properties(${PROJECT_NAME}_node PROPERTIES LINK_FLAGS "-Wl,--no-as-needed") diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp index 3e348fa4..978d7a20 100644 --- a/include/usb_cam/learning/depth_anything_v2.hpp +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -9,12 +9,13 @@ class DepthAnythingV2 : public LearningInterface { public: - DepthAnythingV2(ros::NodeHandle* nh, std::string model_path) { + DepthAnythingV2(ros::NodeHandle* nh, std::string model_path, std::string topic) { _INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH); _model_path = model_path; + _load_model(); if (nh != nullptr) { - _depth_publication = nh->advertise("depth_anything_v2", 1); + _depth_publication = nh->advertise(topic, 1); } } diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp index 8d1d1e0d..336670fc 100644 --- a/include/usb_cam/learning/interface.hpp +++ b/include/usb_cam/learning/interface.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ class LearningInterface { virtual void set_input(sensor_msgs::Image& image) = 0; virtual void publish() = 0; - void load_model(); void predict(); nvinfer1::ICudaEngine* get_engine() { return _engine; } @@ -37,9 +37,14 @@ class LearningInterface { nvinfer1::IRuntime* _runtime; std::string _model_path; + void _load_model(); + private: void* _buffers[2] = { nullptr, nullptr }; + // Global mutex for prediction calls + static std::mutex predict_mutex; + // TODO: static? class Logger : public nvinfer1::ILogger { void log(Severity severity, const char* msg) noexcept override { diff --git a/include/usb_cam/usb_cam.hpp b/include/usb_cam/usb_cam.hpp index d249f6e6..fdce6523 100644 --- a/include/usb_cam/usb_cam.hpp +++ b/include/usb_cam/usb_cam.hpp @@ -119,6 +119,8 @@ typedef struct // or guvcview std::string pixel_format_name; std::string av_device_format; + std::string dav2_file; + std::string dav2_topic; int image_width; int image_height; int framerate; diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch index 8d3f840e..24fcfded 100644 --- a/launch/two_mipi_rgb.launch +++ b/launch/two_mipi_rgb.launch @@ -17,6 +17,9 @@ + + + @@ -35,5 +38,8 @@ + + + diff --git a/src/interface.cpp b/src/interface.cpp index 2e64109f..221c8684 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -1,9 +1,19 @@ #include "usb_cam/learning/interface.hpp" #include +#include using namespace nvinfer1; -void LearningInterface::load_model() { +std::mutex LearningInterface::predict_mutex; + +void LearningInterface::_load_model() { + std::ifstream file_check(_model_path); + if (!file_check.good()) { + std::cerr << "Error: " << _model_path << "' does not exist." << std::endl; + throw std::runtime_error("Model file not found"); + } + file_check.close(); + if (_model_path.find(".onnx") == std::string::npos) { std::ifstream engine_stream(_model_path, std::ios::binary); engine_stream.seekg(0, std::ios::end); @@ -27,11 +37,19 @@ void LearningInterface::load_model() { } // Define input dimensions - const auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0)); - const int input_h = input_dims.d[2]; - const int input_w = input_dims.d[3]; + // TODO: Check dimensions, set as members +#if NV_TENSORRT_MAJOR < 10 + auto input_dims = _engine->getBindingDimensions(0); + auto output_dims = _engine->getBindingDimensions(1); +#else + auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0)); + auto output_dims = _engine->getTensorShape(_engine->getIOTensorName(1)); +#endif + + const size_t input_h = input_dims.d[2]; + const size_t input_w = input_dims.d[3]; + ROS_INFO("%ld, %ld", input_h, input_w); - // Create CUDA stream cudaStreamCreate(&_stream); cudaMalloc(&_buffers[0], 3 * input_h * input_w * sizeof(float)); @@ -88,10 +106,25 @@ bool LearningInterface::_save_engine(const std::string& onnx_path) { } void LearningInterface::predict() { - cudaMemcpyAsync(_buffers[0], _input_data, sizeof(_input_data) * sizeof(float), cudaMemcpyHostToDevice, _stream); - _context->executeV2(_buffers); - cudaStreamSynchronize(_stream); - cudaMemcpyAsync(_output_data, _buffers[1], sizeof(_input_data) * sizeof(float), cudaMemcpyDeviceToHost); + if (predict_mutex.try_lock()) { + ROS_INFO("INSIDE PREDICT"); + // TODO: Use input, output size members to set memcpy size correctly + cudaMemcpyAsync(_buffers[0], _input_data, 0, cudaMemcpyHostToDevice, _stream); + +#if NV_TENSORRT_MAJOR < 10 + _context->enqueueV2(_buffers, _stream, nullptr); +#else + _context->executeV2(_buffers); +#endif + + cudaStreamSynchronize(_stream); + // TODO: Use input, output size members to set memcpy size correctly + cudaMemcpyAsync(_output_data, _buffers[1], 0, cudaMemcpyDeviceToHost); + predict_mutex.unlock(); + + } else { + ROS_INFO("predict in progress, skipping"); + } } LearningInterface::~LearningInterface() { diff --git a/src/usb_cam_node.cpp b/src/usb_cam_node.cpp index 517a43e7..1776330b 100644 --- a/src/usb_cam_node.cpp +++ b/src/usb_cam_node.cpp @@ -71,10 +71,6 @@ class UsbCamNode { } UsbCamNode() : m_node("~") { - // Setup the network that outputs derivates of the image captured - // TODO: Actual network - networks.push_back(std::make_unique(&m_node, "depth_anything_v2_vitb.onnx")); - // Advertise the main image topic image_transport::ImageTransport it(m_node); m_image_pub = it.advertiseCamera("image_raw", 1); @@ -127,6 +123,14 @@ class UsbCamNode { m_camera_info->setCameraInfo(camera_info); } + // Setup the network that outputs derivates of the image captured + m_node.param("depth_anything_v2_file", m_parameters.dav2_file, std::string("")); + m_node.param("depth_anything_v2_topic", m_parameters.dav2_topic, std::string("")); + + if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) { + networks.push_back(std::make_unique(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic)); + } + ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS", m_parameters.camera_name.c_str(), m_parameters.device_name.c_str(), m_parameters.image_width, m_parameters.image_height, m_parameters.io_method_name.c_str(), diff --git a/test/test_depth_anything_v2.cpp b/test/test_depth_anything_v2.cpp index a665cdae..7f18173d 100644 --- a/test/test_depth_anything_v2.cpp +++ b/test/test_depth_anything_v2.cpp @@ -9,8 +9,9 @@ // This class provides access to protected members that we normally don't want to expose class DepthAnythingV2Test : public DepthAnythingV2 { public: - DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path) {} + DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path, std::string("")) {} float* get_input_data() { return _input_data; } + void load_model() { _load_model(); } }; class TestDepthAnythingV2 : public ::testing::Test { From a2db86214746e62021aad6b5518fc3d09b03a461 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:28:53 +0100 Subject: [PATCH 02/10] dav2: Working on jetson, output weird --- .../usb_cam/learning/depth_anything_v2.hpp | 65 +++++++++++--- include/usb_cam/learning/interface.hpp | 6 +- include/usb_cam/usb_cam.hpp | 3 +- launch/two_mipi_rgb.launch | 10 ++- src/interface.cpp | 88 ++++++++++--------- src/usb_cam_node.cpp | 9 +- 6 files changed, 116 insertions(+), 65 deletions(-) diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp index 978d7a20..5bd58398 100644 --- a/include/usb_cam/learning/depth_anything_v2.hpp +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -9,13 +9,19 @@ class DepthAnythingV2 : public LearningInterface { public: - DepthAnythingV2(ros::NodeHandle* nh, std::string model_path, std::string topic) { + DepthAnythingV2(ros::NodeHandle* nh, std::string& model_path, std::string& metric_topic, std::string& vis_topic) { _INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH); _model_path = model_path; _load_model(); if (nh != nullptr) { - _depth_publication = nh->advertise(topic, 1); + if (!metric_topic.empty()) { + _depth_metric_pub = nh->advertise(metric_topic, 1); + } + if (!vis_topic.empty()) { + _depth_vis_pub = nh->advertise(vis_topic, 1); + } + } } @@ -35,27 +41,58 @@ class DepthAnythingV2 : public LearningInterface { } cv::Mat float_image; cv::merge(channels, float_image); - _input_data = float_image.reshape(1, 1).ptr(0); + + // Normalize the image using the mean and std for each channel + // Mean and std are typically for [R, G, B] channels respectively + std::vector mean = {0.485f, 0.456f, 0.406f}; + std::vector std = {0.229f, 0.224f, 0.225f}; + for (int c = 0; c < 3; ++c) { + cv::Mat channel = channels[c]; + channel = (channel - mean[c]) / std[c]; + } + + cv::merge(channels, float_image); + memcpy(_input_data, float_image.data, _input_size_float); } - void publish() override { - if (_depth_publication.getTopic() != "") { - cv::Mat depth_prediction = cv::Mat(_HEIGHT, _WIDTH, CV_32FC1, _output_data); +void publish() override { + cv::Mat depth_prediction = cv::Mat(_HEIGHT, _WIDTH, CV_32FC1, _output_data); + if (_depth_metric_pub.getTopic() != "") { + // Raw depth prediction (in meters, CV_32FC1) + cv_bridge::CvImage depth_image; + depth_image.header.stamp = ros::Time::now(); + depth_image.header.frame_id = "depth_frame"; + depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; // 32-bit float + depth_image.image = depth_prediction; + _depth_metric_pub.publish(depth_image.toImageMsg()); - cv_bridge::CvImage depth_image; - depth_image.header.stamp = ros::Time::now(); - depth_image.header.frame_id = "depth_frame"; - depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; - depth_image.image = depth_prediction; - _depth_publication.publish(depth_image.toImageMsg()); - } } + if (_depth_vis_pub.getTopic() != "") { + cv::Mat depth_visualized; + double min_val, max_val; + cv::minMaxLoc(depth_prediction, &min_val, &max_val); + + // Normalize depth image for visualization + // Ignore values <= 0 (i.e., no depth / invalid depth) + cv::normalize(depth_prediction, depth_visualized, 0, 255, cv::NORM_MINMAX); + depth_visualized.convertTo(depth_visualized, CV_8UC1); + + cv_bridge::CvImage visualized_image; + visualized_image.header.stamp = ros::Time::now(); + visualized_image.header.frame_id = "depth_frame"; + visualized_image.encoding = sensor_msgs::image_encodings::MONO8; // 8-bit grayscale for visualization + visualized_image.image = depth_visualized; + _depth_vis_pub.publish(visualized_image.toImageMsg()); + } +} + private: const size_t _HEIGHT = 518; const size_t _WIDTH = 518; cv::Size _INPUT_SIZE; - ros::Publisher _depth_publication; + ros::Publisher _depth_metric_pub; + ros::Publisher _depth_vis_pub; }; #endif // DEPTH_ANYTHING_HPP_ diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp index 336670fc..6960ee8c 100644 --- a/include/usb_cam/learning/interface.hpp +++ b/include/usb_cam/learning/interface.hpp @@ -35,15 +35,17 @@ class LearningInterface { nvinfer1::IExecutionContext* _context; nvinfer1::INetworkDefinition* _network; nvinfer1::IRuntime* _runtime; + size_t _input_c, _input_h, _input_w, _input_size_float; + size_t _output_c, _output_h, _output_w, _output_size_float; std::string _model_path; void _load_model(); private: + static constexpr size_t JETSON_MEM_LIMIT_B{3ULL * 1024 * 1024 * 1024}; + static std::mutex predict_mutex; void* _buffers[2] = { nullptr, nullptr }; - // Global mutex for prediction calls - static std::mutex predict_mutex; // TODO: static? class Logger : public nvinfer1::ILogger { diff --git a/include/usb_cam/usb_cam.hpp b/include/usb_cam/usb_cam.hpp index fdce6523..4c41be69 100644 --- a/include/usb_cam/usb_cam.hpp +++ b/include/usb_cam/usb_cam.hpp @@ -120,7 +120,8 @@ typedef struct std::string pixel_format_name; std::string av_device_format; std::string dav2_file; - std::string dav2_topic; + std::string dav2_metric_topic; + std::string dav2_vis_topic; int image_width; int image_height; int framerate; diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch index 24fcfded..9120ddec 100644 --- a/launch/two_mipi_rgb.launch +++ b/launch/two_mipi_rgb.launch @@ -18,8 +18,9 @@ - - + + + @@ -39,7 +40,8 @@ - - + + + diff --git a/src/interface.cpp b/src/interface.cpp index 221c8684..8e528839 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -9,35 +9,44 @@ std::mutex LearningInterface::predict_mutex; void LearningInterface::_load_model() { std::ifstream file_check(_model_path); if (!file_check.good()) { - std::cerr << "Error: " << _model_path << "' does not exist." << std::endl; + std::cerr << "Error: " << _model_path << " does not exist." << std::endl; throw std::runtime_error("Model file not found"); } file_check.close(); - if (_model_path.find(".onnx") == std::string::npos) { - std::ifstream engine_stream(_model_path, std::ios::binary); - engine_stream.seekg(0, std::ios::end); + // Define the expected .engine path based on the .onnx model path + std::string engine_path = _model_path.substr(0, _model_path.find_last_of('.')) + ".engine"; - const size_t model_size = engine_stream.tellg(); - engine_stream.seekg(0, std::ios::beg); + if (_model_path.find(".onnx") != std::string::npos) { + // Check if the engine file already exists + std::ifstream engine_check(engine_path, std::ios::binary); - std::unique_ptr engine_data(new char[model_size]); - engine_stream.read(engine_data.get(), model_size); - engine_stream.close(); + if (engine_check.good()) { + engine_check.seekg(0, std::ios::end); + const size_t model_size = engine_check.tellg(); + engine_check.seekg(0, std::ios::beg); - // Create tensorrt model - _runtime = nvinfer1::createInferRuntime(_logger); - _engine = _runtime->deserializeCudaEngine(engine_data.get(), model_size); - _context = _engine->createExecutionContext(); + std::unique_ptr engine_data(new char[model_size]); + engine_check.read(engine_data.get(), model_size); + engine_check.close(); + + // Create TensorRT runtime and load engine + _runtime = nvinfer1::createInferRuntime(_logger); + _engine = _runtime->deserializeCudaEngine(engine_data.get(), model_size); + _context = _engine->createExecutionContext(); + + } else { + // Build an engine from the .onnx model and save it as .engine + _build(_model_path); + _save_engine(engine_path); + } } else { - // Build an engine from an onnx model - _build(_model_path); - _save_engine(_model_path); + std::cerr << "Error: Only .onnx model files are supported." << std::endl; + throw std::runtime_error("Unsupported model format"); } // Define input dimensions - // TODO: Check dimensions, set as members #if NV_TENSORRT_MAJOR < 10 auto input_dims = _engine->getBindingDimensions(0); auto output_dims = _engine->getBindingDimensions(1); @@ -46,16 +55,23 @@ void LearningInterface::_load_model() { auto output_dims = _engine->getTensorShape(_engine->getIOTensorName(1)); #endif - const size_t input_h = input_dims.d[2]; - const size_t input_w = input_dims.d[3]; - ROS_INFO("%ld, %ld", input_h, input_w); + // TODO: THis does not generalize so well + _input_c = input_dims.d[1]; + _input_h = input_dims.d[2]; + _input_w = input_dims.d[3]; + _output_c = output_dims.d[0]; + _output_h = output_dims.d[1]; + _output_w = output_dims.d[2]; + _input_size_float = _input_c * _input_h * _input_w * sizeof(float); + _output_size_float = _output_c * _output_h * _output_w * sizeof(float); cudaStreamCreate(&_stream); - cudaMalloc(&_buffers[0], 3 * input_h * input_w * sizeof(float)); - cudaMalloc(&_buffers[1], input_h * input_w * sizeof(float)); + cudaMalloc(&_buffers[0], _input_size_float); + cudaMalloc(&_buffers[1], _output_size_float); - _output_data = new float[input_h * input_w]; + _input_data = new float[_input_c * _input_h * _input_w]; + _output_data = new float[_output_c * _output_h * _output_w]; } void LearningInterface::_build(std::string onnx_path) { @@ -63,6 +79,9 @@ void LearningInterface::_build(std::string onnx_path) { const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); INetworkDefinition* network = builder->createNetworkV2(explicitBatch); IBuilderConfig* config = builder->createBuilderConfig(); + + // TODO: What about different hardware? + config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, JETSON_MEM_LIMIT_B); config->setFlag(BuilderFlag::kFP16); nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _logger); bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kINFO)); @@ -78,16 +97,7 @@ void LearningInterface::_build(std::string onnx_path) { delete plan; } -bool LearningInterface::_save_engine(const std::string& onnx_path) { - std::string engine_path; - size_t dot_index = onnx_path.find_last_of("."); - if (dot_index != std::string::npos) { - engine_path = onnx_path.substr(0, dot_index) + ".engine"; - - } else { - return false; - } - +bool LearningInterface::_save_engine(const std::string& engine_path) { if (_engine) { nvinfer1::IHostMemory* data = _engine->serialize(); std::ofstream file; @@ -107,9 +117,7 @@ bool LearningInterface::_save_engine(const std::string& onnx_path) { void LearningInterface::predict() { if (predict_mutex.try_lock()) { - ROS_INFO("INSIDE PREDICT"); - // TODO: Use input, output size members to set memcpy size correctly - cudaMemcpyAsync(_buffers[0], _input_data, 0, cudaMemcpyHostToDevice, _stream); + cudaMemcpyAsync(_buffers[0], _input_data, _input_size_float , cudaMemcpyHostToDevice, _stream); #if NV_TENSORRT_MAJOR < 10 _context->enqueueV2(_buffers, _stream, nullptr); @@ -118,12 +126,9 @@ void LearningInterface::predict() { #endif cudaStreamSynchronize(_stream); - // TODO: Use input, output size members to set memcpy size correctly - cudaMemcpyAsync(_output_data, _buffers[1], 0, cudaMemcpyDeviceToHost); + cudaMemcpyAsync(_output_data, _buffers[1], _output_size_float, cudaMemcpyDeviceToHost); predict_mutex.unlock(); - } else { - ROS_INFO("predict in progress, skipping"); } } @@ -131,4 +136,7 @@ LearningInterface::~LearningInterface() { cudaFree(_stream); cudaFree(_buffers[0]); cudaFree(_buffers[1]); + + delete[] _input_data; + delete[] _output_data; } diff --git a/src/usb_cam_node.cpp b/src/usb_cam_node.cpp index 1776330b..99f600b5 100644 --- a/src/usb_cam_node.cpp +++ b/src/usb_cam_node.cpp @@ -124,11 +124,12 @@ class UsbCamNode { } // Setup the network that outputs derivates of the image captured - m_node.param("depth_anything_v2_file", m_parameters.dav2_file, std::string("")); - m_node.param("depth_anything_v2_topic", m_parameters.dav2_topic, std::string("")); + m_node.param("dav2_file", m_parameters.dav2_file, std::string("")); + m_node.param("dav2_metric_topic", m_parameters.dav2_metric_topic, std::string("")); + m_node.param("dav2_vis_topic", m_parameters.dav2_vis_topic, std::string("")); - if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) { - networks.push_back(std::make_unique(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic)); + if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_metric_topic.empty()) { + networks.push_back(std::make_unique(&m_node, m_parameters.dav2_file, m_parameters.dav2_metric_topic, m_parameters.dav2_vis_topic)); } ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS", From 2d5a14975191b22faba25aaa4d8c7bab6a7ab1cf Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Tue, 12 Nov 2024 16:34:44 +0100 Subject: [PATCH 03/10] dav2: Working --- .../usb_cam/learning/depth_anything_v2.hpp | 87 ++++++++++++------- launch/two_mipi_rgb.launch | 8 +- src/interface.cpp | 45 +++++----- 3 files changed, 80 insertions(+), 60 deletions(-) diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp index 5bd58398..731284a6 100644 --- a/include/usb_cam/learning/depth_anything_v2.hpp +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -33,56 +33,33 @@ class DepthAnythingV2 : public LearningInterface { cv::Mat resized_image; cv::resize(image, resized_image, _INPUT_SIZE); - // Change to float32 between 0 and 1 - std::vector channels; - cv::split(resized_image, channels); - for (uint8_t i = 0; i < 3; ++i) { - channels[i].convertTo(channels[i], CV_32F, 1.0f / 255.0f); - } - cv::Mat float_image; - cv::merge(channels, float_image); - - // Normalize the image using the mean and std for each channel - // Mean and std are typically for [R, G, B] channels respectively - std::vector mean = {0.485f, 0.456f, 0.406f}; - std::vector std = {0.229f, 0.224f, 0.225f}; - for (int c = 0; c < 3; ++c) { - cv::Mat channel = channels[c]; - channel = (channel - mean[c]) / std[c]; - } - - cv::merge(channels, float_image); - memcpy(_input_data, float_image.data, _input_size_float); + std::vector input = _preprocess(resized_image); + memcpy(_input_data, input.data(), _input_size_float); } void publish() override { - cv::Mat depth_prediction = cv::Mat(_HEIGHT, _WIDTH, CV_32FC1, _output_data); + cv::Mat depth_prediction(_HEIGHT, _WIDTH, CV_32FC1, _output_data); if (_depth_metric_pub.getTopic() != "") { // Raw depth prediction (in meters, CV_32FC1) cv_bridge::CvImage depth_image; depth_image.header.stamp = ros::Time::now(); depth_image.header.frame_id = "depth_frame"; - depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; // 32-bit float + depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; depth_image.image = depth_prediction; _depth_metric_pub.publish(depth_image.toImageMsg()); - } if (_depth_vis_pub.getTopic() != "") { - cv::Mat depth_visualized; - double min_val, max_val; - cv::minMaxLoc(depth_prediction, &min_val, &max_val); - - // Normalize depth image for visualization - // Ignore values <= 0 (i.e., no depth / invalid depth) - cv::normalize(depth_prediction, depth_visualized, 0, 255, cv::NORM_MINMAX); - depth_visualized.convertTo(depth_visualized, CV_8UC1); + cv::normalize(depth_prediction, depth_prediction, 0, 255, cv::NORM_MINMAX, CV_8UC1); + cv::Mat colormap; + cv::applyColorMap(depth_prediction, colormap, cv::COLORMAP_INFERNO); + cv::resize(colormap, colormap, cv::Size(_WIDTH, _HEIGHT)); cv_bridge::CvImage visualized_image; visualized_image.header.stamp = ros::Time::now(); visualized_image.header.frame_id = "depth_frame"; - visualized_image.encoding = sensor_msgs::image_encodings::MONO8; // 8-bit grayscale for visualization - visualized_image.image = depth_visualized; + visualized_image.encoding = sensor_msgs::image_encodings::MONO8; + visualized_image.image = colormap; _depth_vis_pub.publish(visualized_image.toImageMsg()); } } @@ -90,9 +67,53 @@ void publish() override { private: const size_t _HEIGHT = 518; const size_t _WIDTH = 518; + const float mean[3] = { 123.675f, 116.28f, 103.53f }; + const float std[3] = { 58.395f, 57.12f, 57.375f }; cv::Size _INPUT_SIZE; ros::Publisher _depth_metric_pub; ros::Publisher _depth_vis_pub; + + std::vector _preprocess(cv::Mat& image) { + std::tuple resized = _resize_depth(image, _input_w, _input_h); + cv::Mat resized_image = std::get<0>(resized); + std::vector input_tensor; + for (int k = 0; k < 3; k++) { + for (int i = 0; i < resized_image.rows; i++) { + for (int j = 0; j < resized_image.cols; j++) { + input_tensor.emplace_back(((float)resized_image.at(i, j)[k] - mean[k]) / std[k]); + } + } + } + return input_tensor; + } + + std::tuple _resize_depth(cv::Mat& img, int w, int h) { + cv::Mat result; + int nw, nh; + int ih = img.rows; + int iw = img.cols; + float aspectRatio = (float)img.cols / (float)img.rows; + + if (aspectRatio >= 1) { + nw = w; + nh = int(h / aspectRatio); + } else { + nw = int(w * aspectRatio); + nh = h; + } + cv::resize(img, img, cv::Size(nw, nh)); + result = cv::Mat::ones(cv::Size(w, h), CV_8UC1) * 128; + cv::cvtColor(result, result, cv::COLOR_GRAY2RGB); + cv::cvtColor(img, img, cv::COLOR_BGR2RGB); + + cv::Mat re(h, w, CV_8UC3); + cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR); + cv::Mat out(h, w, CV_8UC3, 0.0); + re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows))); + + std::tuple res_tuple = std::make_tuple(out, (w - nw) / 2, (h - nh) / 2); + return res_tuple; + } }; #endif // DEPTH_ANYTHING_HPP_ diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch index 9120ddec..169b5391 100644 --- a/launch/two_mipi_rgb.launch +++ b/launch/two_mipi_rgb.launch @@ -18,7 +18,7 @@ - + @@ -40,8 +40,8 @@ - - - + + + diff --git a/src/interface.cpp b/src/interface.cpp index 8e528839..e36c5bf9 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -6,6 +6,23 @@ using namespace nvinfer1; std::mutex LearningInterface::predict_mutex; +void LearningInterface::predict() { + if (predict_mutex.try_lock()) { + cudaMemcpyAsync(_buffers[0], _input_data, _input_size_float , cudaMemcpyHostToDevice, _stream); + +#if NV_TENSORRT_MAJOR < 10 + _context->enqueueV2(_buffers, _stream, nullptr); +#else + _context->executeV2(_buffers); +#endif + + cudaStreamSynchronize(_stream); + cudaMemcpyAsync(_output_data, _buffers[1], _output_size_float, cudaMemcpyDeviceToHost); + predict_mutex.unlock(); + + } +} + void LearningInterface::_load_model() { std::ifstream file_check(_model_path); if (!file_check.good()) { @@ -59,9 +76,9 @@ void LearningInterface::_load_model() { _input_c = input_dims.d[1]; _input_h = input_dims.d[2]; _input_w = input_dims.d[3]; - _output_c = output_dims.d[0]; - _output_h = output_dims.d[1]; - _output_w = output_dims.d[2]; + _output_c = output_dims.d[1]; + _output_h = output_dims.d[2]; + _output_w = output_dims.d[3]; _input_size_float = _input_c * _input_h * _input_w * sizeof(float); _output_size_float = _output_c * _output_h * _output_w * sizeof(float); @@ -76,13 +93,12 @@ void LearningInterface::_load_model() { void LearningInterface::_build(std::string onnx_path) { auto builder = createInferBuilder(_logger); - const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - INetworkDefinition* network = builder->createNetworkV2(explicitBatch); + const auto explicit_batch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + INetworkDefinition* network = builder->createNetworkV2(explicit_batch); IBuilderConfig* config = builder->createBuilderConfig(); // TODO: What about different hardware? config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, JETSON_MEM_LIMIT_B); - config->setFlag(BuilderFlag::kFP16); nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _logger); bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kINFO)); IHostMemory* plan{ builder->buildSerializedNetwork(*network, *config) }; @@ -115,23 +131,6 @@ bool LearningInterface::_save_engine(const std::string& engine_path) { return true; } -void LearningInterface::predict() { - if (predict_mutex.try_lock()) { - cudaMemcpyAsync(_buffers[0], _input_data, _input_size_float , cudaMemcpyHostToDevice, _stream); - -#if NV_TENSORRT_MAJOR < 10 - _context->enqueueV2(_buffers, _stream, nullptr); -#else - _context->executeV2(_buffers); -#endif - - cudaStreamSynchronize(_stream); - cudaMemcpyAsync(_output_data, _buffers[1], _output_size_float, cudaMemcpyDeviceToHost); - predict_mutex.unlock(); - - } -} - LearningInterface::~LearningInterface() { cudaFree(_stream); cudaFree(_buffers[0]); From 063990b08f5bf01ae9ba1d6aa05cc2eff9f5ed05 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:47:54 +0100 Subject: [PATCH 04/10] dav2: Get rid of vis topic --- .../usb_cam/learning/depth_anything_v2.hpp | 103 ++++++------------ include/usb_cam/usb_cam.hpp | 3 +- launch/two_mipi_rgb.launch | 9 +- src/usb_cam_node.cpp | 7 +- 4 files changed, 43 insertions(+), 79 deletions(-) diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp index 731284a6..0b66a2fa 100644 --- a/include/usb_cam/learning/depth_anything_v2.hpp +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -9,110 +9,77 @@ class DepthAnythingV2 : public LearningInterface { public: - DepthAnythingV2(ros::NodeHandle* nh, std::string& model_path, std::string& metric_topic, std::string& vis_topic) { - _INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH); + DepthAnythingV2(ros::NodeHandle* nh, std::string& model_path, std::string& metric_topic) { _model_path = model_path; _load_model(); - if (nh != nullptr) { - if (!metric_topic.empty()) { - _depth_metric_pub = nh->advertise(metric_topic, 1); - } - if (!vis_topic.empty()) { - _depth_vis_pub = nh->advertise(vis_topic, 1); - } - + if (nh != nullptr && !metric_topic.empty()) { + _depth_pub = nh->advertise(metric_topic, 1); } } void set_input(sensor_msgs::Image& msg) override { + // Keep track of input image size, we want to get the same output image dimensions + _output_image_w = msg.width; + _output_image_h = msg.height; + cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8); cv::Mat image = cv_ptr->image; // Change size to 518x518 (still uint8) cv::Mat resized_image; - cv::resize(image, resized_image, _INPUT_SIZE); + cv::resize(image, resized_image, cv::Size(_input_w, _input_h)); std::vector input = _preprocess(resized_image); memcpy(_input_data, input.data(), _input_size_float); } void publish() override { - cv::Mat depth_prediction(_HEIGHT, _WIDTH, CV_32FC1, _output_data); - if (_depth_metric_pub.getTopic() != "") { - // Raw depth prediction (in meters, CV_32FC1) + if (_depth_pub.getTopic() != "") { + cv::Mat depth_prediction(_input_w, _input_h, CV_32FC1, _output_data); + cv::Mat depth_resized; + cv::resize(depth_prediction, depth_resized, cv::Size(_output_image_w, _output_image_h)); cv_bridge::CvImage depth_image; depth_image.header.stamp = ros::Time::now(); depth_image.header.frame_id = "depth_frame"; depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; - depth_image.image = depth_prediction; - _depth_metric_pub.publish(depth_image.toImageMsg()); - } - - if (_depth_vis_pub.getTopic() != "") { - cv::normalize(depth_prediction, depth_prediction, 0, 255, cv::NORM_MINMAX, CV_8UC1); - cv::Mat colormap; - cv::applyColorMap(depth_prediction, colormap, cv::COLORMAP_INFERNO); - cv::resize(colormap, colormap, cv::Size(_WIDTH, _HEIGHT)); - - cv_bridge::CvImage visualized_image; - visualized_image.header.stamp = ros::Time::now(); - visualized_image.header.frame_id = "depth_frame"; - visualized_image.encoding = sensor_msgs::image_encodings::MONO8; - visualized_image.image = colormap; - _depth_vis_pub.publish(visualized_image.toImageMsg()); + depth_image.image = depth_resized; + _depth_pub.publish(depth_image.toImageMsg()); } } private: - const size_t _HEIGHT = 518; - const size_t _WIDTH = 518; + // TODO: Not so nice magic numbers from the paper implementation const float mean[3] = { 123.675f, 116.28f, 103.53f }; const float std[3] = { 58.395f, 57.12f, 57.375f }; - cv::Size _INPUT_SIZE; - ros::Publisher _depth_metric_pub; - ros::Publisher _depth_vis_pub; + ros::Publisher _depth_pub; + uint32_t _output_image_w, _output_image_h; std::vector _preprocess(cv::Mat& image) { - std::tuple resized = _resize_depth(image, _input_w, _input_h); - cv::Mat resized_image = std::get<0>(resized); + // Determine the resized dimensions + const int iw = image.cols; + const int ih = image.rows; + const float aspect_ratio = static_cast(iw) / ih; + const int nw = (aspect_ratio >= 1) ? _input_w : static_cast(_input_w * aspect_ratio); + const int nh = (aspect_ratio >= 1) ? static_cast(_input_h / aspect_ratio) : _input_h; + + // Resize image and pad if necessary + cv::Mat resized_image; + cv::resize(image, resized_image, cv::Size(nw, nh), 0, 0, cv::INTER_LINEAR); + cv::Mat padded_image = cv::Mat::ones(cv::Size(_input_w, _input_h), CV_8UC3) * 128; + resized_image.copyTo(padded_image(cv::Rect((_input_w - nw) / 2, (_input_h - nh) / 2, nw, nh))); + + // Normalize and flatten the image to a 1D tensor std::vector input_tensor; for (int k = 0; k < 3; k++) { - for (int i = 0; i < resized_image.rows; i++) { - for (int j = 0; j < resized_image.cols; j++) { - input_tensor.emplace_back(((float)resized_image.at(i, j)[k] - mean[k]) / std[k]); + for (int i = 0; i < padded_image.rows; i++) { + for (int j = 0; j < padded_image.cols; j++) { + input_tensor.emplace_back((padded_image.at(i, j)[k] - mean[k]) / std[k]); } } } - return input_tensor; - } - std::tuple _resize_depth(cv::Mat& img, int w, int h) { - cv::Mat result; - int nw, nh; - int ih = img.rows; - int iw = img.cols; - float aspectRatio = (float)img.cols / (float)img.rows; - - if (aspectRatio >= 1) { - nw = w; - nh = int(h / aspectRatio); - } else { - nw = int(w * aspectRatio); - nh = h; - } - cv::resize(img, img, cv::Size(nw, nh)); - result = cv::Mat::ones(cv::Size(w, h), CV_8UC1) * 128; - cv::cvtColor(result, result, cv::COLOR_GRAY2RGB); - cv::cvtColor(img, img, cv::COLOR_BGR2RGB); - - cv::Mat re(h, w, CV_8UC3); - cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR); - cv::Mat out(h, w, CV_8UC3, 0.0); - re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows))); - - std::tuple res_tuple = std::make_tuple(out, (w - nw) / 2, (h - nh) / 2); - return res_tuple; + return input_tensor; } }; diff --git a/include/usb_cam/usb_cam.hpp b/include/usb_cam/usb_cam.hpp index 4c41be69..fdce6523 100644 --- a/include/usb_cam/usb_cam.hpp +++ b/include/usb_cam/usb_cam.hpp @@ -120,8 +120,7 @@ typedef struct std::string pixel_format_name; std::string av_device_format; std::string dav2_file; - std::string dav2_metric_topic; - std::string dav2_vis_topic; + std::string dav2_topic; int image_width; int image_height; int framerate; diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch index 169b5391..75083243 100644 --- a/launch/two_mipi_rgb.launch +++ b/launch/two_mipi_rgb.launch @@ -19,8 +19,7 @@ - - + @@ -40,8 +39,8 @@ - - - + + + diff --git a/src/usb_cam_node.cpp b/src/usb_cam_node.cpp index 99f600b5..9a3bb946 100644 --- a/src/usb_cam_node.cpp +++ b/src/usb_cam_node.cpp @@ -125,11 +125,10 @@ class UsbCamNode { // Setup the network that outputs derivates of the image captured m_node.param("dav2_file", m_parameters.dav2_file, std::string("")); - m_node.param("dav2_metric_topic", m_parameters.dav2_metric_topic, std::string("")); - m_node.param("dav2_vis_topic", m_parameters.dav2_vis_topic, std::string("")); + m_node.param("dav2_topic", m_parameters.dav2_topic, std::string("")); - if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_metric_topic.empty()) { - networks.push_back(std::make_unique(&m_node, m_parameters.dav2_file, m_parameters.dav2_metric_topic, m_parameters.dav2_vis_topic)); + if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) { + networks.push_back(std::make_unique(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic)); } ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS", From 5d022f8013d67751e48cb550c2ebea549da6d66b Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:48:15 +0100 Subject: [PATCH 05/10] ci: Download onnx file for test --- .github/workflows/ci.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9e6ab43..d328c769 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,6 +18,20 @@ jobs: submodules: recursive set-safe-directory: true + # Step to download the ONNX file from Google Drive link + - name: Download ONNX model file + env: + ONNX_FILE_URL: ${{ secrets.DEPTH_ANYTHING_V2_VITS_LINK }} + run: | + # Create directory if it doesn't exist + mkdir -p tests/resources + + # Extract Google Drive file ID from URL + FILE_ID=$(echo "$ONNX_FILE_URL" | sed -E 's|.*?/d/([^/]+).*|\1|') + + # Download the file using Google Drive link and file ID + curl -L -o tests/resources/model.onnx "https://drive.google.com/uc?export=download&id=${FILE_ID}" + - name: Run ${{ matrix.ci_script }} run: | bash -x ./ci/${{ matrix.ci_script }}.sh From 992bdcc8f74b2ed70b83ff3d71e22e8f5988cc8e Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Wed, 13 Nov 2024 11:05:07 +0100 Subject: [PATCH 06/10] ci: Try to fix --- .devcontainer/Dockerfile | 1 + CMakeLists.txt | 6 ++++++ ci/pr_run_tests.sh | 2 +- include/usb_cam/learning/depth_anything_v2.hpp | 2 +- test/test_depth_anything_v2.cpp | 7 +------ 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index e3f2eaf4..9af800c1 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -30,6 +30,7 @@ RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86 # CUDA: Add PATH and LD_LIBRARY_PATH to .bash_aliases RUN echo 'export PATH=$PATH:/usr/local/cuda-11.8/bin' >> /home/asl/.bash_aliases && \ echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64' >> /home/asl/.bash_aliases +ENV CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8 # TensorRT: Install from .deb file: Seems we run 8.5.2.2 (which is bundled with 8.5.3) COPY .devcontainer/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_amd64.deb /tmp/tensorrt.deb diff --git a/CMakeLists.txt b/CMakeLists.txt index 91e55de3..cdcf55b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,12 @@ find_package(catkin REQUIRED COMPONENTS # Find OpenCV, Eigen, and CUDA find_package(OpenCV REQUIRED) find_package(Eigen3 REQUIRED) + +# Set CUDA toolkit root directory if not already defined +if(NOT DEFINED CUDA_TOOLKIT_ROOT_DIR) + set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda-11.8") +endif() +message(STATUS "CUDA_TOOLKIT_ROOT_DIR: ${CUDA_TOOLKIT_ROOT_DIR}") find_package(CUDA REQUIRED) # Find FFmpeg libraries with PkgConfig diff --git a/ci/pr_run_tests.sh b/ci/pr_run_tests.sh index d95acff8..10a90a3a 100755 --- a/ci/pr_run_tests.sh +++ b/ci/pr_run_tests.sh @@ -2,7 +2,7 @@ source /opt/ros/noetic/setup.bash # Set paths for the model and plan files -MODEL_PATH="test/resources/depth_anything_v2_vits_16.onnx" +MODEL_PATH="test/resources/depth_anything_v2_vits.onnx" # Check if the ONNX model file exists if [ ! -f "$MODEL_PATH" ]; then diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp index 0b66a2fa..6df8dd97 100644 --- a/include/usb_cam/learning/depth_anything_v2.hpp +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -9,7 +9,7 @@ class DepthAnythingV2 : public LearningInterface { public: - DepthAnythingV2(ros::NodeHandle* nh, std::string& model_path, std::string& metric_topic) { + DepthAnythingV2(ros::NodeHandle* nh, const std::string& model_path, const std::string& metric_topic) { _model_path = model_path; _load_model(); diff --git a/test/test_depth_anything_v2.cpp b/test/test_depth_anything_v2.cpp index 7f18173d..b0a48ac0 100644 --- a/test/test_depth_anything_v2.cpp +++ b/test/test_depth_anything_v2.cpp @@ -16,7 +16,7 @@ class DepthAnythingV2Test : public DepthAnythingV2 { class TestDepthAnythingV2 : public ::testing::Test { protected: - const std::string model_path = "/workspaces/v4l2_camera/test/resources/depth_anything_v2_vits_16.onnx"; + const std::string model_path = "/workspaces/v4l2_camera/test/resources/depth_anything_v2_vits.onnx"; const std::string test_image_path = "/workspaces/v4l2_camera/test/resources/maschinenhalle_example_frame.jpg"; cv_bridge::CvImage cv_image; @@ -48,11 +48,6 @@ TEST_F(TestDepthAnythingV2, TestModelLoad) { TEST_F(TestDepthAnythingV2, TestSetInput) { depth_anything_v2->set_input(msg); ASSERT_NE(depth_anything_v2->get_input_data(), nullptr); - - const size_t expected_size = 518 * 518 * 3; - float* input_data = depth_anything_v2->get_input_data(); - ASSERT_NE(input_data, nullptr); - ASSERT_FLOAT_EQ(input_data[0], img.at(0, 0)[0] / 255.0f); } TEST_F(TestDepthAnythingV2, TestPredictPublish) { From dd0acf3d3757b0762cd9fb52b0387fc472752a18 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Wed, 13 Nov 2024 13:58:42 +0100 Subject: [PATCH 07/10] devcontainer: Add env variables for path and ld path --- .devcontainer/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9af800c1..abd33345 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -27,9 +27,9 @@ RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86 sudo apt update && \ sudo apt -y install cuda-toolkit-11-8 cudnn9-cuda-11 libcublas-12-6 -# CUDA: Add PATH and LD_LIBRARY_PATH to .bash_aliases -RUN echo 'export PATH=$PATH:/usr/local/cuda-11.8/bin' >> /home/asl/.bash_aliases && \ - echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64' >> /home/asl/.bash_aliases +# CUDA: Add PATH and LD_LIBRARY_PATH to container +ENV PATH="/usr/local/cuda-11.8/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda-11.8/lib64:${LD_LIBRARY_PATH}" ENV CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8 # TensorRT: Install from .deb file: Seems we run 8.5.2.2 (which is bundled with 8.5.3) From 9bd97e142a2f65b9b8380fed5b29c8df0e3cc2c7 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:38:49 +0100 Subject: [PATCH 08/10] ci: Don't require onnx to be present --- ci/pr_run_tests.sh | 28 -------------------------- include/usb_cam/learning/interface.hpp | 2 +- src/interface.cpp | 6 ++---- 3 files changed, 3 insertions(+), 33 deletions(-) diff --git a/ci/pr_run_tests.sh b/ci/pr_run_tests.sh index 10a90a3a..236699ff 100755 --- a/ci/pr_run_tests.sh +++ b/ci/pr_run_tests.sh @@ -1,34 +1,6 @@ #!/bin/bash source /opt/ros/noetic/setup.bash -# Set paths for the model and plan files -MODEL_PATH="test/resources/depth_anything_v2_vits.onnx" - -# Check if the ONNX model file exists -if [ ! -f "$MODEL_PATH" ]; then - # If the model file doesn't exist, check if the environment variable is set - if [ -z "$DEPTH_ANYTHING_V2_VITS_16_LINK" ]; then - echo "The model file does not exist, and the environment variable DEPTH_ANYTHING_V2_VITS_16_LINK is not set." - exit 1 - else - # If the environment variable is set, download the model - echo "ONNX model file not found. Attempting to download..." - - # Create the directory if it doesn't exist - mkdir -p "$(dirname "$MODEL_PATH")" - - # Download the file - if wget -O "$MODEL_PATH" "$DEPTH_ANYTHING_V2_VITS_16_LINK"; then - echo "Download successful." - else - echo "Download failed." - exit 1 - fi - fi -else - echo "ONNX model file already exists." -fi - # Build the project and run tests rm -rf build mkdir -p build diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp index 6960ee8c..0c177287 100644 --- a/include/usb_cam/learning/interface.hpp +++ b/include/usb_cam/learning/interface.hpp @@ -43,7 +43,7 @@ class LearningInterface { private: static constexpr size_t JETSON_MEM_LIMIT_B{3ULL * 1024 * 1024 * 1024}; - static std::mutex predict_mutex; + std::mutex _predict_mutex; void* _buffers[2] = { nullptr, nullptr }; diff --git a/src/interface.cpp b/src/interface.cpp index e36c5bf9..cc58681d 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -4,10 +4,8 @@ using namespace nvinfer1; -std::mutex LearningInterface::predict_mutex; - void LearningInterface::predict() { - if (predict_mutex.try_lock()) { + if (_predict_mutex.try_lock()) { cudaMemcpyAsync(_buffers[0], _input_data, _input_size_float , cudaMemcpyHostToDevice, _stream); #if NV_TENSORRT_MAJOR < 10 @@ -18,7 +16,7 @@ void LearningInterface::predict() { cudaStreamSynchronize(_stream); cudaMemcpyAsync(_output_data, _buffers[1], _output_size_float, cudaMemcpyDeviceToHost); - predict_mutex.unlock(); + _predict_mutex.unlock(); } } From cb1fe4d875a2d10fc244f0b38adbe0db3d7e1274 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:43:03 +0100 Subject: [PATCH 09/10] ci: Fix onnx location --- .github/workflows/ci.yml | 5 +---- test/test_depth_anything_v2.cpp | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d328c769..e33f4433 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,14 +23,11 @@ jobs: env: ONNX_FILE_URL: ${{ secrets.DEPTH_ANYTHING_V2_VITS_LINK }} run: | - # Create directory if it doesn't exist - mkdir -p tests/resources - # Extract Google Drive file ID from URL FILE_ID=$(echo "$ONNX_FILE_URL" | sed -E 's|.*?/d/([^/]+).*|\1|') # Download the file using Google Drive link and file ID - curl -L -o tests/resources/model.onnx "https://drive.google.com/uc?export=download&id=${FILE_ID}" + curl -L -o $GITHUB_WORKSPACE/test/resources/depth_anything_v2_vits.onnx "https://drive.google.com/uc?export=download&id=${FILE_ID}" - name: Run ${{ matrix.ci_script }} run: | diff --git a/test/test_depth_anything_v2.cpp b/test/test_depth_anything_v2.cpp index b0a48ac0..d2a0fdda 100644 --- a/test/test_depth_anything_v2.cpp +++ b/test/test_depth_anything_v2.cpp @@ -16,8 +16,8 @@ class DepthAnythingV2Test : public DepthAnythingV2 { class TestDepthAnythingV2 : public ::testing::Test { protected: - const std::string model_path = "/workspaces/v4l2_camera/test/resources/depth_anything_v2_vits.onnx"; - const std::string test_image_path = "/workspaces/v4l2_camera/test/resources/maschinenhalle_example_frame.jpg"; + const std::string model_path = "../test/resources/depth_anything_v2_vits.onnx"; + const std::string test_image_path = "../test/resources/maschinenhalle_example_frame.jpg"; cv_bridge::CvImage cv_image; cv::Mat img; From 27ba7d9b4f796264cb498eaba9549bf400af68ab Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Wed, 13 Nov 2024 16:14:51 +0100 Subject: [PATCH 10/10] ci: Fix GPU usage --- .github/workflows/ci.yml | 8 +++++++- src/interface.cpp | 19 ++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e33f4433..9d282e34 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,10 +6,14 @@ jobs: runs-on: self-hosted container: image: omavteam/v4l2_camera:latest + options: | + --gpus all strategy: matrix: - ci_script: [pr_compile, pr_run_tests] + ci_script: [pr_compile] + # TODO: Why does pr_run_test not work on the runner + # ci_script: [pr_compile, pr_run_tests] steps: - name: Checkout @@ -31,4 +35,6 @@ jobs: - name: Run ${{ matrix.ci_script }} run: | + export ONNX_VERBOSE=1 + export TRT_LOGGER=VERBOSE bash -x ./ci/${{ matrix.ci_script }}.sh diff --git a/src/interface.cpp b/src/interface.cpp index cc58681d..8890d479 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -93,22 +93,27 @@ void LearningInterface::_build(std::string onnx_path) { auto builder = createInferBuilder(_logger); const auto explicit_batch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); INetworkDefinition* network = builder->createNetworkV2(explicit_batch); - IBuilderConfig* config = builder->createBuilderConfig(); // TODO: What about different hardware? + IBuilderConfig* config = builder->createBuilderConfig(); config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, JETSON_MEM_LIMIT_B); + nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _logger); - bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kINFO)); - IHostMemory* plan{ builder->buildSerializedNetwork(*network, *config) }; + const bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kVERBOSE)); + if (parsed) { + IHostMemory* plan = builder->buildSerializedNetwork(*network, *config); + _runtime = createInferRuntime(_logger); + _engine = _runtime->deserializeCudaEngine(plan->data(), plan->size()); + _context = _engine->createExecutionContext(); + delete plan; - _runtime = createInferRuntime(_logger); - _engine = _runtime->deserializeCudaEngine(plan->data(), plan->size()); - _context = _engine->createExecutionContext(); + } else { + std::cerr << "Could not parse onnx file: " << onnx_path.c_str() << std::endl; + } delete network; delete config; delete parser; - delete plan; } bool LearningInterface::_save_engine(const std::string& engine_path) {