ethz-asl · marcojob · Nov 13, 2024 · Nov 11, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -27,9 +27,10 @@ RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86
     sudo apt update && \
     sudo apt -y install cuda-toolkit-11-8 cudnn9-cuda-11 libcublas-12-6
 
-# CUDA: Add PATH and LD_LIBRARY_PATH to .bash_aliases
-RUN echo 'export PATH=$PATH:/usr/local/cuda-11.8/bin' >> /home/asl/.bash_aliases && \
-    echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64' >> /home/asl/.bash_aliases
+# CUDA: Add PATH and LD_LIBRARY_PATH to container
+ENV PATH="/usr/local/cuda-11.8/bin:${PATH}"
+ENV LD_LIBRARY_PATH="/usr/local/cuda-11.8/lib64:${LD_LIBRARY_PATH}"
+ENV CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8
 
 # TensorRT: Install from .deb file: Seems we run 8.5.2.2 (which is bundled with 8.5.3)
 COPY .devcontainer/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_amd64.deb /tmp/tensorrt.deb

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -3,7 +3,8 @@
     "customizations": {
         "vscode": {
             "extensions": [
-                "ms-vscode.cpptools"
+                "ms-vscode.cpptools",
+                "ms-iot.vscode-ros"
             ],
             "settings": {
                 "files.hotExit": "off",

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -6,10 +6,14 @@ jobs:
     runs-on: self-hosted
     container:
       image: omavteam/v4l2_camera:latest
+      options: |
+          --gpus all
 
     strategy:
       matrix:
-        ci_script: [pr_compile, pr_run_tests]
+        ci_script: [pr_compile]
+        # TODO: Why does pr_run_test not work on the runner
+        # ci_script: [pr_compile, pr_run_tests]
 
     steps:
       - name: Checkout
@@ -18,6 +22,19 @@ jobs:
           submodules: recursive
           set-safe-directory: true
 
+      # Step to download the ONNX file from Google Drive link
+      - name: Download ONNX model file
+        env:
+          ONNX_FILE_URL: ${{ secrets.DEPTH_ANYTHING_V2_VITS_LINK }}
+        run: |
+          # Extract Google Drive file ID from URL
+          FILE_ID=$(echo "$ONNX_FILE_URL" | sed -E 's|.*?/d/([^/]+).*|\1|')
+
+          # Download the file using Google Drive link and file ID
+          curl -L -o $GITHUB_WORKSPACE/test/resources/depth_anything_v2_vits.onnx "https://drive.google.com/uc?export=download&id=${FILE_ID}"
+
       - name: Run ${{ matrix.ci_script }}
         run: |
+          export ONNX_VERBOSE=1
+          export TRT_LOGGER=VERBOSE
           bash -x ./ci/${{ matrix.ci_script }}.sh
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -23,4 +23,4 @@
             "miDebuggerPath": "/usr/bin/gdb"
         }
     ]
-}
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -21,6 +21,12 @@ find_package(catkin REQUIRED COMPONENTS
 # Find OpenCV, Eigen, and CUDA
 find_package(OpenCV REQUIRED)
 find_package(Eigen3 REQUIRED)
+
+# Set CUDA toolkit root directory if not already defined
+if(NOT DEFINED CUDA_TOOLKIT_ROOT_DIR)
+  set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda-11.8")
+endif()
+message(STATUS "CUDA_TOOLKIT_ROOT_DIR: ${CUDA_TOOLKIT_ROOT_DIR}")
 find_package(CUDA REQUIRED)
 
 # Find FFmpeg libraries with PkgConfig
@@ -97,13 +103,14 @@ catkin_package(
 add_executable(${PROJECT_NAME}_node src/usb_cam_node.cpp)
 target_link_libraries(${PROJECT_NAME}_node
     ${PROJECT_NAME}
-    ${catkin_LIBRARIES}
     ${avcodec_LIBRARIES}
     ${avutil_LIBRARIES}
-    ${swscale_LIBRARIES}
-    ${NVINFER}
+    ${catkin_LIBRARIES}
+    ${CUDA_LIBRARIES}
     ${NVINFER_PLUGIN}
+    ${NVINFER}
     ${NVONNXPARSER}
+    ${swscale_LIBRARIES}
 )
 set_target_properties(${PROJECT_NAME}_node PROPERTIES LINK_FLAGS "-Wl,--no-as-needed")
 

diff --git a/ci/pr_run_tests.sh b/ci/pr_run_tests.sh
@@ -1,34 +1,6 @@
 #!/bin/bash
 source /opt/ros/noetic/setup.bash
 
-# Set paths for the model and plan files
-MODEL_PATH="test/resources/depth_anything_v2_vits_16.onnx"
-
-# Check if the ONNX model file exists
-if [ ! -f "$MODEL_PATH" ]; then
-    # If the model file doesn't exist, check if the environment variable is set
-    if [ -z "$DEPTH_ANYTHING_V2_VITS_16_LINK" ]; then
-        echo "The model file does not exist, and the environment variable DEPTH_ANYTHING_V2_VITS_16_LINK is not set."
-        exit 1
-    else
-        # If the environment variable is set, download the model
-        echo "ONNX model file not found. Attempting to download..."
-
-        # Create the directory if it doesn't exist
-        mkdir -p "$(dirname "$MODEL_PATH")"
-
-        # Download the file
-        if wget -O "$MODEL_PATH" "$DEPTH_ANYTHING_V2_VITS_16_LINK"; then
-            echo "Download successful."
-        else
-            echo "Download failed."
-            exit 1
-        fi
-    fi
-else
-    echo "ONNX model file already exists."
-fi
-
 # Build the project and run tests
 rm -rf build
 mkdir -p build

diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp
@@ -9,52 +9,78 @@
 
 class DepthAnythingV2 : public LearningInterface {
 public:
-    DepthAnythingV2(ros::NodeHandle* nh, std::string model_path) {
-        _INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH);
+    DepthAnythingV2(ros::NodeHandle* nh, const std::string& model_path, const std::string& metric_topic) {
         _model_path = model_path;
+        _load_model();
 
-        if (nh != nullptr) {
-            _depth_publication = nh->advertise<sensor_msgs::Image>("depth_anything_v2", 1);
+        if (nh != nullptr && !metric_topic.empty()) {
+            _depth_pub = nh->advertise<sensor_msgs::Image>(metric_topic, 1);
         }
     }
 
     void set_input(sensor_msgs::Image& msg) override {
+        // Keep track of input image size, we want to get the same output image dimensions
+        _output_image_w = msg.width;
+        _output_image_h = msg.height;
+
         cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8);
         cv::Mat image = cv_ptr->image;
 
         // Change size to 518x518 (still uint8)
         cv::Mat resized_image;
-        cv::resize(image, resized_image, _INPUT_SIZE);
+        cv::resize(image, resized_image, cv::Size(_input_w, _input_h));
 
-        // Change to float32 between 0 and 1
-        std::vector<cv::Mat> channels;
-        cv::split(resized_image, channels);
-        for (uint8_t i = 0; i < 3; ++i) {
-            channels[i].convertTo(channels[i], CV_32F, 1.0f / 255.0f);
-        }
-        cv::Mat float_image;
-        cv::merge(channels, float_image);
-        _input_data = float_image.reshape(1, 1).ptr<float>(0);
+        std::vector<float> input = _preprocess(resized_image);
+        memcpy(_input_data, input.data(), _input_size_float);
     }
 
-    void publish() override {
-        if (_depth_publication.getTopic() != "") {
-            cv::Mat depth_prediction = cv::Mat(_HEIGHT, _WIDTH, CV_32FC1, _output_data);
-
-            cv_bridge::CvImage depth_image;
-            depth_image.header.stamp = ros::Time::now();
-            depth_image.header.frame_id = "depth_frame";
-            depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1;
-            depth_image.image = depth_prediction;
-            _depth_publication.publish(depth_image.toImageMsg());
-        }
+void publish() override {
+    if (_depth_pub.getTopic() != "") {
+        cv::Mat depth_prediction(_input_w, _input_h, CV_32FC1, _output_data);
+        cv::Mat depth_resized;
+        cv::resize(depth_prediction, depth_resized, cv::Size(_output_image_w, _output_image_h));
+        cv_bridge::CvImage depth_image;
+        depth_image.header.stamp = ros::Time::now();
+        depth_image.header.frame_id = "depth_frame";
+        depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1;
+        depth_image.image = depth_resized;
+        _depth_pub.publish(depth_image.toImageMsg());
     }
+}
 
 private:
-    const size_t _HEIGHT = 518;
-    const size_t _WIDTH = 518;
-    cv::Size _INPUT_SIZE;
-    ros::Publisher _depth_publication;
+    // TODO: Not so nice magic numbers from the paper implementation
+    const float mean[3] = { 123.675f, 116.28f, 103.53f };
+    const float std[3] = { 58.395f, 57.12f, 57.375f };
+    ros::Publisher _depth_pub;
+    uint32_t _output_image_w, _output_image_h;
+
+    std::vector<float> _preprocess(cv::Mat& image) {
+        // Determine the resized dimensions
+        const int iw = image.cols;
+        const int ih = image.rows;
+        const float aspect_ratio = static_cast<float>(iw) / ih;
+        const int nw = (aspect_ratio >= 1) ? _input_w : static_cast<int>(_input_w * aspect_ratio);
+        const int nh = (aspect_ratio >= 1) ? static_cast<int>(_input_h / aspect_ratio) : _input_h;
+
+        // Resize image and pad if necessary
+        cv::Mat resized_image;
+        cv::resize(image, resized_image, cv::Size(nw, nh), 0, 0, cv::INTER_LINEAR);
+        cv::Mat padded_image = cv::Mat::ones(cv::Size(_input_w, _input_h), CV_8UC3) * 128;
+        resized_image.copyTo(padded_image(cv::Rect((_input_w - nw) / 2, (_input_h - nh) / 2, nw, nh)));
+
+        // Normalize and flatten the image to a 1D tensor
+        std::vector<float> input_tensor;
+        for (int k = 0; k < 3; k++) {
+            for (int i = 0; i < padded_image.rows; i++) {
+                for (int j = 0; j < padded_image.cols; j++) {
+                    input_tensor.emplace_back((padded_image.at<cv::Vec3b>(i, j)[k] - mean[k]) / std[k]);
+                }
+            }
+        }
+
+        return input_tensor;
+    }
 };
 
 #endif // DEPTH_ANYTHING_HPP_
diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp
@@ -4,6 +4,7 @@
 #include <algorithm>
 #include <fstream>
 #include <iostream>
+#include <mutex>
 #include <NvInfer.h>
 #include <opencv2/opencv.hpp>
 #include <sensor_msgs/Image.h>
@@ -18,7 +19,6 @@ class LearningInterface {
     virtual void set_input(sensor_msgs::Image& image) = 0;
     virtual void publish() = 0;
 
-    void load_model();
     void predict();
 
     nvinfer1::ICudaEngine* get_engine() { return _engine; }
@@ -35,11 +35,18 @@ class LearningInterface {
     nvinfer1::IExecutionContext* _context;
     nvinfer1::INetworkDefinition* _network;
     nvinfer1::IRuntime* _runtime;
+    size_t _input_c, _input_h, _input_w, _input_size_float;
+    size_t _output_c, _output_h, _output_w, _output_size_float;
     std::string _model_path;
 
+    void _load_model();
+
 private:
+    static constexpr size_t JETSON_MEM_LIMIT_B{3ULL * 1024 * 1024 * 1024};
+    std::mutex _predict_mutex;
     void* _buffers[2] = { nullptr, nullptr };
 
+
     // TODO: static?
     class Logger : public nvinfer1::ILogger {
         void log(Severity severity, const char* msg) noexcept override {

diff --git a/include/usb_cam/usb_cam.hpp b/include/usb_cam/usb_cam.hpp
@@ -119,6 +119,8 @@ typedef struct
   // or guvcview
   std::string pixel_format_name;
   std::string av_device_format;
+  std::string dav2_file;
+  std::string dav2_topic;
   int image_width;
   int image_height;
   int framerate;

diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch
@@ -17,6 +17,9 @@
     <param name="wb_blue_gain" value="0.98"/>
     <param name="wb_green_gain" value="0.88"/>
     <param name="wb_red_gain" value="1.18"/>
+
+    <param name="dav2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits.onnx" />
+    <param name="dav2_topic" value="/cam0/depth/relative" />
   </node>
 
   <node name="cam1" pkg="usb_cam" type="usb_cam_node" output="screen" >
@@ -35,5 +38,9 @@
     <param name="wb_blue_gain" value="0.98"/>
     <param name="wb_green_gain" value="0.88"/>
     <param name="wb_red_gain" value="1.18"/>
+
+    <param name="dav2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits.onnx" />
+    <!-- Downfacing cam needs no depth prediction (yet) -->
+    <param name="dav2_topic" value="" /> 
   </node>
 </launch>
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,4 +23,4 @@ @@
                 "miDebuggerPath": "/usr/bin/gdb"
             }
         ]
-    }
+    }