From fc36a4a37fa3f5f6ec2708712bec9498fee3ba39 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:58:45 +0100 Subject: [PATCH] dav2: Try to run on jetson, hitting mem limit --- .devcontainer/devcontainer.json | 3 +- .vscode/launch.json | 2 +- CMakeLists.txt | 7 +-- .../usb_cam/learning/depth_anything_v2.hpp | 5 +- include/usb_cam/learning/interface.hpp | 7 ++- include/usb_cam/usb_cam.hpp | 2 + launch/two_mipi_rgb.launch | 6 +++ src/interface.cpp | 51 +++++++++++++++---- src/usb_cam_node.cpp | 12 +++-- test/test_depth_anything_v2.cpp | 3 +- 10 files changed, 76 insertions(+), 22 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 74169662..f9d850bb 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,8 @@ "customizations": { "vscode": { "extensions": [ - "ms-vscode.cpptools" + "ms-vscode.cpptools", + "ms-iot.vscode-ros" ], "settings": { "files.hotExit": "off", diff --git a/.vscode/launch.json b/.vscode/launch.json index c4cbee9f..66c3ed29 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,4 +23,4 @@ "miDebuggerPath": "/usr/bin/gdb" } ] -} \ No newline at end of file +} diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cd1cf54..91e55de3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,13 +97,14 @@ catkin_package( add_executable(${PROJECT_NAME}_node src/usb_cam_node.cpp) target_link_libraries(${PROJECT_NAME}_node ${PROJECT_NAME} - ${catkin_LIBRARIES} ${avcodec_LIBRARIES} ${avutil_LIBRARIES} - ${swscale_LIBRARIES} - ${NVINFER} + ${catkin_LIBRARIES} + ${CUDA_LIBRARIES} ${NVINFER_PLUGIN} + ${NVINFER} ${NVONNXPARSER} + ${swscale_LIBRARIES} ) set_target_properties(${PROJECT_NAME}_node PROPERTIES LINK_FLAGS "-Wl,--no-as-needed") diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp index 3e348fa4..978d7a20 100644 --- a/include/usb_cam/learning/depth_anything_v2.hpp +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -9,12 +9,13 @@ class DepthAnythingV2 : public LearningInterface { public: - DepthAnythingV2(ros::NodeHandle* nh, std::string model_path) { + DepthAnythingV2(ros::NodeHandle* nh, std::string model_path, std::string topic) { _INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH); _model_path = model_path; + _load_model(); if (nh != nullptr) { - _depth_publication = nh->advertise("depth_anything_v2", 1); + _depth_publication = nh->advertise(topic, 1); } } diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp index 8d1d1e0d..336670fc 100644 --- a/include/usb_cam/learning/interface.hpp +++ b/include/usb_cam/learning/interface.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ class LearningInterface { virtual void set_input(sensor_msgs::Image& image) = 0; virtual void publish() = 0; - void load_model(); void predict(); nvinfer1::ICudaEngine* get_engine() { return _engine; } @@ -37,9 +37,14 @@ class LearningInterface { nvinfer1::IRuntime* _runtime; std::string _model_path; + void _load_model(); + private: void* _buffers[2] = { nullptr, nullptr }; + // Global mutex for prediction calls + static std::mutex predict_mutex; + // TODO: static? class Logger : public nvinfer1::ILogger { void log(Severity severity, const char* msg) noexcept override { diff --git a/include/usb_cam/usb_cam.hpp b/include/usb_cam/usb_cam.hpp index d249f6e6..fdce6523 100644 --- a/include/usb_cam/usb_cam.hpp +++ b/include/usb_cam/usb_cam.hpp @@ -119,6 +119,8 @@ typedef struct // or guvcview std::string pixel_format_name; std::string av_device_format; + std::string dav2_file; + std::string dav2_topic; int image_width; int image_height; int framerate; diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch index 8d3f840e..24fcfded 100644 --- a/launch/two_mipi_rgb.launch +++ b/launch/two_mipi_rgb.launch @@ -17,6 +17,9 @@ + + + @@ -35,5 +38,8 @@ + + + diff --git a/src/interface.cpp b/src/interface.cpp index 2e64109f..221c8684 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -1,9 +1,19 @@ #include "usb_cam/learning/interface.hpp" #include +#include using namespace nvinfer1; -void LearningInterface::load_model() { +std::mutex LearningInterface::predict_mutex; + +void LearningInterface::_load_model() { + std::ifstream file_check(_model_path); + if (!file_check.good()) { + std::cerr << "Error: " << _model_path << "' does not exist." << std::endl; + throw std::runtime_error("Model file not found"); + } + file_check.close(); + if (_model_path.find(".onnx") == std::string::npos) { std::ifstream engine_stream(_model_path, std::ios::binary); engine_stream.seekg(0, std::ios::end); @@ -27,11 +37,19 @@ void LearningInterface::load_model() { } // Define input dimensions - const auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0)); - const int input_h = input_dims.d[2]; - const int input_w = input_dims.d[3]; + // TODO: Check dimensions, set as members +#if NV_TENSORRT_MAJOR < 10 + auto input_dims = _engine->getBindingDimensions(0); + auto output_dims = _engine->getBindingDimensions(1); +#else + auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0)); + auto output_dims = _engine->getTensorShape(_engine->getIOTensorName(1)); +#endif + + const size_t input_h = input_dims.d[2]; + const size_t input_w = input_dims.d[3]; + ROS_INFO("%ld, %ld", input_h, input_w); - // Create CUDA stream cudaStreamCreate(&_stream); cudaMalloc(&_buffers[0], 3 * input_h * input_w * sizeof(float)); @@ -88,10 +106,25 @@ bool LearningInterface::_save_engine(const std::string& onnx_path) { } void LearningInterface::predict() { - cudaMemcpyAsync(_buffers[0], _input_data, sizeof(_input_data) * sizeof(float), cudaMemcpyHostToDevice, _stream); - _context->executeV2(_buffers); - cudaStreamSynchronize(_stream); - cudaMemcpyAsync(_output_data, _buffers[1], sizeof(_input_data) * sizeof(float), cudaMemcpyDeviceToHost); + if (predict_mutex.try_lock()) { + ROS_INFO("INSIDE PREDICT"); + // TODO: Use input, output size members to set memcpy size correctly + cudaMemcpyAsync(_buffers[0], _input_data, 0, cudaMemcpyHostToDevice, _stream); + +#if NV_TENSORRT_MAJOR < 10 + _context->enqueueV2(_buffers, _stream, nullptr); +#else + _context->executeV2(_buffers); +#endif + + cudaStreamSynchronize(_stream); + // TODO: Use input, output size members to set memcpy size correctly + cudaMemcpyAsync(_output_data, _buffers[1], 0, cudaMemcpyDeviceToHost); + predict_mutex.unlock(); + + } else { + ROS_INFO("predict in progress, skipping"); + } } LearningInterface::~LearningInterface() { diff --git a/src/usb_cam_node.cpp b/src/usb_cam_node.cpp index 517a43e7..1776330b 100644 --- a/src/usb_cam_node.cpp +++ b/src/usb_cam_node.cpp @@ -71,10 +71,6 @@ class UsbCamNode { } UsbCamNode() : m_node("~") { - // Setup the network that outputs derivates of the image captured - // TODO: Actual network - networks.push_back(std::make_unique(&m_node, "depth_anything_v2_vitb.onnx")); - // Advertise the main image topic image_transport::ImageTransport it(m_node); m_image_pub = it.advertiseCamera("image_raw", 1); @@ -127,6 +123,14 @@ class UsbCamNode { m_camera_info->setCameraInfo(camera_info); } + // Setup the network that outputs derivates of the image captured + m_node.param("depth_anything_v2_file", m_parameters.dav2_file, std::string("")); + m_node.param("depth_anything_v2_topic", m_parameters.dav2_topic, std::string("")); + + if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) { + networks.push_back(std::make_unique(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic)); + } + ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS", m_parameters.camera_name.c_str(), m_parameters.device_name.c_str(), m_parameters.image_width, m_parameters.image_height, m_parameters.io_method_name.c_str(), diff --git a/test/test_depth_anything_v2.cpp b/test/test_depth_anything_v2.cpp index a665cdae..7f18173d 100644 --- a/test/test_depth_anything_v2.cpp +++ b/test/test_depth_anything_v2.cpp @@ -9,8 +9,9 @@ // This class provides access to protected members that we normally don't want to expose class DepthAnythingV2Test : public DepthAnythingV2 { public: - DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path) {} + DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path, std::string("")) {} float* get_input_data() { return _input_data; } + void load_model() { _load_model(); } }; class TestDepthAnythingV2 : public ::testing::Test {