From fc36a4a37fa3f5f6ec2708712bec9498fee3ba39 Mon Sep 17 00:00:00 2001
From: marcojob <44396071+marcojob@users.noreply.github.com>
Date: Mon, 11 Nov 2024 17:58:45 +0100
Subject: [PATCH] dav2: Try to run on jetson, hitting mem limit

---
 .devcontainer/devcontainer.json               |  3 +-
 .vscode/launch.json                           |  2 +-
 CMakeLists.txt                                |  7 +--
 .../usb_cam/learning/depth_anything_v2.hpp    |  5 +-
 include/usb_cam/learning/interface.hpp        |  7 ++-
 include/usb_cam/usb_cam.hpp                   |  2 +
 launch/two_mipi_rgb.launch                    |  6 +++
 src/interface.cpp                             | 51 +++++++++++++++----
 src/usb_cam_node.cpp                          | 12 +++--
 test/test_depth_anything_v2.cpp               |  3 +-
 10 files changed, 76 insertions(+), 22 deletions(-)
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 74169662..f9d850bb 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -3,7 +3,8 @@
     "customizations": {
         "vscode": {
             "extensions": [
-                "ms-vscode.cpptools"
+                "ms-vscode.cpptools",
+                "ms-iot.vscode-ros"
             ],
             "settings": {
                 "files.hotExit": "off",
diff --git a/.vscode/launch.json b/.vscode/launch.json
index c4cbee9f..66c3ed29 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -23,4 +23,4 @@
             "miDebuggerPath": "/usr/bin/gdb"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2cd1cf54..91e55de3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,13 +97,14 @@ catkin_package(
 add_executable(${PROJECT_NAME}_node src/usb_cam_node.cpp)
 target_link_libraries(${PROJECT_NAME}_node
     ${PROJECT_NAME}
-    ${catkin_LIBRARIES}
     ${avcodec_LIBRARIES}
     ${avutil_LIBRARIES}
-    ${swscale_LIBRARIES}
-    ${NVINFER}
+    ${catkin_LIBRARIES}
+    ${CUDA_LIBRARIES}
     ${NVINFER_PLUGIN}
+    ${NVINFER}
     ${NVONNXPARSER}
+    ${swscale_LIBRARIES}
 )
 set_target_properties(${PROJECT_NAME}_node PROPERTIES LINK_FLAGS "-Wl,--no-as-needed")
 
diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp
index 3e348fa4..978d7a20 100644
--- a/include/usb_cam/learning/depth_anything_v2.hpp
+++ b/include/usb_cam/learning/depth_anything_v2.hpp
@@ -9,12 +9,13 @@
 
 class DepthAnythingV2 : public LearningInterface {
 public:
-    DepthAnythingV2(ros::NodeHandle* nh, std::string model_path) {
+    DepthAnythingV2(ros::NodeHandle* nh, std::string model_path, std::string topic) {
         _INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH);
         _model_path = model_path;
+        _load_model();
 
         if (nh != nullptr) {
-            _depth_publication = nh->advertise<sensor_msgs::Image>("depth_anything_v2", 1);
+            _depth_publication = nh->advertise<sensor_msgs::Image>(topic, 1);
         }
     }
 
diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp
index 8d1d1e0d..336670fc 100644
--- a/include/usb_cam/learning/interface.hpp
+++ b/include/usb_cam/learning/interface.hpp
@@ -4,6 +4,7 @@
 #include <algorithm>
 #include <fstream>
 #include <iostream>
+#include <mutex>
 #include <NvInfer.h>
 #include <opencv2/opencv.hpp>
 #include <sensor_msgs/Image.h>
@@ -18,7 +19,6 @@ class LearningInterface {
     virtual void set_input(sensor_msgs::Image& image) = 0;
     virtual void publish() = 0;
 
-    void load_model();
     void predict();
 
     nvinfer1::ICudaEngine* get_engine() { return _engine; }
@@ -37,9 +37,14 @@ class LearningInterface {
     nvinfer1::IRuntime* _runtime;
     std::string _model_path;
 
+    void _load_model();
+
 private:
     void* _buffers[2] = { nullptr, nullptr };
 
+    // Global mutex for prediction calls
+    static std::mutex predict_mutex;
+
     // TODO: static?
     class Logger : public nvinfer1::ILogger {
         void log(Severity severity, const char* msg) noexcept override {
diff --git a/include/usb_cam/usb_cam.hpp b/include/usb_cam/usb_cam.hpp
index d249f6e6..fdce6523 100644
--- a/include/usb_cam/usb_cam.hpp
+++ b/include/usb_cam/usb_cam.hpp
@@ -119,6 +119,8 @@ typedef struct
   // or guvcview
   std::string pixel_format_name;
   std::string av_device_format;
+  std::string dav2_file;
+  std::string dav2_topic;
   int image_width;
   int image_height;
   int framerate;
diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch
index 8d3f840e..24fcfded 100644
--- a/launch/two_mipi_rgb.launch
+++ b/launch/two_mipi_rgb.launch
@@ -17,6 +17,9 @@
     <param name="wb_blue_gain" value="0.98"/>
     <param name="wb_green_gain" value="0.88"/>
     <param name="wb_red_gain" value="1.18"/>
+
+    <param name="depth_anything_v2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits_16.onnx" />
+    <param name="depth_anything_v2_topic" value="/depth0" />
   </node>
 
   <node name="cam1" pkg="usb_cam" type="usb_cam_node" output="screen" >
@@ -35,5 +38,8 @@
     <param name="wb_blue_gain" value="0.98"/>
     <param name="wb_green_gain" value="0.88"/>
     <param name="wb_red_gain" value="1.18"/>
+
+    <!-- <param name="depth_anything_v2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits_16.onnx" /> -->
+    <!-- <param name="depth_anything_v2_topic" value="/depth1" /> -->
   </node>
 </launch>
diff --git a/src/interface.cpp b/src/interface.cpp
index 2e64109f..221c8684 100644
--- a/src/interface.cpp
+++ b/src/interface.cpp
@@ -1,9 +1,19 @@
 #include "usb_cam/learning/interface.hpp"
 #include <NvOnnxParser.h>
+#include <ros/ros.h>
 
 using namespace nvinfer1;
 
-void LearningInterface::load_model() {
+std::mutex LearningInterface::predict_mutex;
+
+void LearningInterface::_load_model() {
+    std::ifstream file_check(_model_path);
+    if (!file_check.good()) {
+        std::cerr << "Error: " << _model_path << "' does not exist." << std::endl;
+        throw std::runtime_error("Model file not found");
+    }
+    file_check.close();
+
     if (_model_path.find(".onnx") == std::string::npos) {
         std::ifstream engine_stream(_model_path, std::ios::binary);
         engine_stream.seekg(0, std::ios::end);
@@ -27,11 +37,19 @@ void LearningInterface::load_model() {
     }
 
     // Define input dimensions
-    const auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0));
-    const int input_h = input_dims.d[2];
-    const int input_w = input_dims.d[3];
+    // TODO: Check dimensions, set as members
+#if NV_TENSORRT_MAJOR < 10
+    auto input_dims = _engine->getBindingDimensions(0);
+    auto output_dims = _engine->getBindingDimensions(1);
+#else
+    auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0));
+    auto output_dims = _engine->getTensorShape(_engine->getIOTensorName(1));
+#endif
+
+    const size_t input_h = input_dims.d[2];
+    const size_t input_w = input_dims.d[3];
+    ROS_INFO("%ld, %ld", input_h, input_w);
 
-    // Create CUDA stream
     cudaStreamCreate(&_stream);
 
     cudaMalloc(&_buffers[0], 3 * input_h * input_w * sizeof(float));
@@ -88,10 +106,25 @@ bool LearningInterface::_save_engine(const std::string& onnx_path) {
 }
 
 void LearningInterface::predict() {
-    cudaMemcpyAsync(_buffers[0], _input_data, sizeof(_input_data) * sizeof(float), cudaMemcpyHostToDevice, _stream);
-    _context->executeV2(_buffers);
-    cudaStreamSynchronize(_stream);
-    cudaMemcpyAsync(_output_data, _buffers[1], sizeof(_input_data) * sizeof(float), cudaMemcpyDeviceToHost);
+    if (predict_mutex.try_lock()) {
+        ROS_INFO("INSIDE PREDICT");
+        // TODO: Use input, output size members to set memcpy size correctly
+        cudaMemcpyAsync(_buffers[0], _input_data, 0, cudaMemcpyHostToDevice, _stream);
+
+#if NV_TENSORRT_MAJOR < 10
+        _context->enqueueV2(_buffers, _stream, nullptr);
+#else
+        _context->executeV2(_buffers);
+#endif
+
+        cudaStreamSynchronize(_stream);
+        // TODO: Use input, output size members to set memcpy size correctly
+        cudaMemcpyAsync(_output_data, _buffers[1], 0, cudaMemcpyDeviceToHost);
+        predict_mutex.unlock();
+
+    } else {
+        ROS_INFO("predict in progress, skipping");
+    }
 }
 
 LearningInterface::~LearningInterface() {
diff --git a/src/usb_cam_node.cpp b/src/usb_cam_node.cpp
index 517a43e7..1776330b 100644
--- a/src/usb_cam_node.cpp
+++ b/src/usb_cam_node.cpp
@@ -71,10 +71,6 @@ class UsbCamNode {
     }
 
     UsbCamNode() : m_node("~") {
-        // Setup the network that outputs derivates of the image captured
-        // TODO: Actual network
-        networks.push_back(std::make_unique<DepthAnythingV2>(&m_node, "depth_anything_v2_vitb.onnx"));
-
         // Advertise the main image topic
         image_transport::ImageTransport it(m_node);
         m_image_pub = it.advertiseCamera("image_raw", 1);
@@ -127,6 +123,14 @@ class UsbCamNode {
           m_camera_info->setCameraInfo(camera_info);
         }
 
+        // Setup the network that outputs derivates of the image captured
+        m_node.param("depth_anything_v2_file", m_parameters.dav2_file, std::string(""));
+        m_node.param("depth_anything_v2_topic", m_parameters.dav2_topic, std::string(""));
+
+        if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) {
+            networks.push_back(std::make_unique<DepthAnythingV2>(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic));
+        }
+
         ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS",
             m_parameters.camera_name.c_str(), m_parameters.device_name.c_str(),
             m_parameters.image_width, m_parameters.image_height, m_parameters.io_method_name.c_str(),
diff --git a/test/test_depth_anything_v2.cpp b/test/test_depth_anything_v2.cpp
index a665cdae..7f18173d 100644
--- a/test/test_depth_anything_v2.cpp
+++ b/test/test_depth_anything_v2.cpp
@@ -9,8 +9,9 @@
 // This class provides access to protected members that we normally don't want to expose
 class DepthAnythingV2Test : public DepthAnythingV2 {
     public:
-        DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path) {}
+        DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path, std::string("")) {}
         float* get_input_data() { return _input_data; }
+        void load_model() { _load_model(); }
 };
 
 class TestDepthAnythingV2 : public ::testing::Test {