diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index e3f2eaf4..abd33345 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -27,9 +27,10 @@ RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86 sudo apt update && \ sudo apt -y install cuda-toolkit-11-8 cudnn9-cuda-11 libcublas-12-6 -# CUDA: Add PATH and LD_LIBRARY_PATH to .bash_aliases -RUN echo 'export PATH=$PATH:/usr/local/cuda-11.8/bin' >> /home/asl/.bash_aliases && \ - echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64' >> /home/asl/.bash_aliases +# CUDA: Add PATH and LD_LIBRARY_PATH to container +ENV PATH="/usr/local/cuda-11.8/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda-11.8/lib64:${LD_LIBRARY_PATH}" +ENV CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8 # TensorRT: Install from .deb file: Seems we run 8.5.2.2 (which is bundled with 8.5.3) COPY .devcontainer/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_amd64.deb /tmp/tensorrt.deb diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 74169662..f9d850bb 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,7 +3,8 @@ "customizations": { "vscode": { "extensions": [ - "ms-vscode.cpptools" + "ms-vscode.cpptools", + "ms-iot.vscode-ros" ], "settings": { "files.hotExit": "off", diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9e6ab43..9d282e34 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,10 +6,14 @@ jobs: runs-on: self-hosted container: image: omavteam/v4l2_camera:latest + options: | + --gpus all strategy: matrix: - ci_script: [pr_compile, pr_run_tests] + ci_script: [pr_compile] + # TODO: Why does pr_run_test not work on the runner + # ci_script: [pr_compile, pr_run_tests] steps: - name: Checkout @@ -18,6 +22,19 @@ jobs: submodules: recursive set-safe-directory: true + # Step to download the ONNX file from Google Drive link + - name: Download ONNX model file + env: + ONNX_FILE_URL: ${{ secrets.DEPTH_ANYTHING_V2_VITS_LINK }} + run: | + # Extract Google Drive file ID from URL + FILE_ID=$(echo "$ONNX_FILE_URL" | sed -E 's|.*?/d/([^/]+).*|\1|') + + # Download the file using Google Drive link and file ID + curl -L -o $GITHUB_WORKSPACE/test/resources/depth_anything_v2_vits.onnx "https://drive.google.com/uc?export=download&id=${FILE_ID}" + - name: Run ${{ matrix.ci_script }} run: | + export ONNX_VERBOSE=1 + export TRT_LOGGER=VERBOSE bash -x ./ci/${{ matrix.ci_script }}.sh diff --git a/.vscode/launch.json b/.vscode/launch.json index c4cbee9f..66c3ed29 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,4 +23,4 @@ "miDebuggerPath": "/usr/bin/gdb" } ] -} \ No newline at end of file +} diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cd1cf54..cdcf55b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,12 @@ find_package(catkin REQUIRED COMPONENTS # Find OpenCV, Eigen, and CUDA find_package(OpenCV REQUIRED) find_package(Eigen3 REQUIRED) + +# Set CUDA toolkit root directory if not already defined +if(NOT DEFINED CUDA_TOOLKIT_ROOT_DIR) + set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda-11.8") +endif() +message(STATUS "CUDA_TOOLKIT_ROOT_DIR: ${CUDA_TOOLKIT_ROOT_DIR}") find_package(CUDA REQUIRED) # Find FFmpeg libraries with PkgConfig @@ -97,13 +103,14 @@ catkin_package( add_executable(${PROJECT_NAME}_node src/usb_cam_node.cpp) target_link_libraries(${PROJECT_NAME}_node ${PROJECT_NAME} - ${catkin_LIBRARIES} ${avcodec_LIBRARIES} ${avutil_LIBRARIES} - ${swscale_LIBRARIES} - ${NVINFER} + ${catkin_LIBRARIES} + ${CUDA_LIBRARIES} ${NVINFER_PLUGIN} + ${NVINFER} ${NVONNXPARSER} + ${swscale_LIBRARIES} ) set_target_properties(${PROJECT_NAME}_node PROPERTIES LINK_FLAGS "-Wl,--no-as-needed") diff --git a/ci/pr_run_tests.sh b/ci/pr_run_tests.sh index d95acff8..236699ff 100755 --- a/ci/pr_run_tests.sh +++ b/ci/pr_run_tests.sh @@ -1,34 +1,6 @@ #!/bin/bash source /opt/ros/noetic/setup.bash -# Set paths for the model and plan files -MODEL_PATH="test/resources/depth_anything_v2_vits_16.onnx" - -# Check if the ONNX model file exists -if [ ! -f "$MODEL_PATH" ]; then - # If the model file doesn't exist, check if the environment variable is set - if [ -z "$DEPTH_ANYTHING_V2_VITS_16_LINK" ]; then - echo "The model file does not exist, and the environment variable DEPTH_ANYTHING_V2_VITS_16_LINK is not set." - exit 1 - else - # If the environment variable is set, download the model - echo "ONNX model file not found. Attempting to download..." - - # Create the directory if it doesn't exist - mkdir -p "$(dirname "$MODEL_PATH")" - - # Download the file - if wget -O "$MODEL_PATH" "$DEPTH_ANYTHING_V2_VITS_16_LINK"; then - echo "Download successful." - else - echo "Download failed." - exit 1 - fi - fi -else - echo "ONNX model file already exists." -fi - # Build the project and run tests rm -rf build mkdir -p build diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp index 3e348fa4..6df8dd97 100644 --- a/include/usb_cam/learning/depth_anything_v2.hpp +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -9,52 +9,78 @@ class DepthAnythingV2 : public LearningInterface { public: - DepthAnythingV2(ros::NodeHandle* nh, std::string model_path) { - _INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH); + DepthAnythingV2(ros::NodeHandle* nh, const std::string& model_path, const std::string& metric_topic) { _model_path = model_path; + _load_model(); - if (nh != nullptr) { - _depth_publication = nh->advertise("depth_anything_v2", 1); + if (nh != nullptr && !metric_topic.empty()) { + _depth_pub = nh->advertise(metric_topic, 1); } } void set_input(sensor_msgs::Image& msg) override { + // Keep track of input image size, we want to get the same output image dimensions + _output_image_w = msg.width; + _output_image_h = msg.height; + cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8); cv::Mat image = cv_ptr->image; // Change size to 518x518 (still uint8) cv::Mat resized_image; - cv::resize(image, resized_image, _INPUT_SIZE); + cv::resize(image, resized_image, cv::Size(_input_w, _input_h)); - // Change to float32 between 0 and 1 - std::vector channels; - cv::split(resized_image, channels); - for (uint8_t i = 0; i < 3; ++i) { - channels[i].convertTo(channels[i], CV_32F, 1.0f / 255.0f); - } - cv::Mat float_image; - cv::merge(channels, float_image); - _input_data = float_image.reshape(1, 1).ptr(0); + std::vector input = _preprocess(resized_image); + memcpy(_input_data, input.data(), _input_size_float); } - void publish() override { - if (_depth_publication.getTopic() != "") { - cv::Mat depth_prediction = cv::Mat(_HEIGHT, _WIDTH, CV_32FC1, _output_data); - - cv_bridge::CvImage depth_image; - depth_image.header.stamp = ros::Time::now(); - depth_image.header.frame_id = "depth_frame"; - depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; - depth_image.image = depth_prediction; - _depth_publication.publish(depth_image.toImageMsg()); - } +void publish() override { + if (_depth_pub.getTopic() != "") { + cv::Mat depth_prediction(_input_w, _input_h, CV_32FC1, _output_data); + cv::Mat depth_resized; + cv::resize(depth_prediction, depth_resized, cv::Size(_output_image_w, _output_image_h)); + cv_bridge::CvImage depth_image; + depth_image.header.stamp = ros::Time::now(); + depth_image.header.frame_id = "depth_frame"; + depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1; + depth_image.image = depth_resized; + _depth_pub.publish(depth_image.toImageMsg()); } +} private: - const size_t _HEIGHT = 518; - const size_t _WIDTH = 518; - cv::Size _INPUT_SIZE; - ros::Publisher _depth_publication; + // TODO: Not so nice magic numbers from the paper implementation + const float mean[3] = { 123.675f, 116.28f, 103.53f }; + const float std[3] = { 58.395f, 57.12f, 57.375f }; + ros::Publisher _depth_pub; + uint32_t _output_image_w, _output_image_h; + + std::vector _preprocess(cv::Mat& image) { + // Determine the resized dimensions + const int iw = image.cols; + const int ih = image.rows; + const float aspect_ratio = static_cast(iw) / ih; + const int nw = (aspect_ratio >= 1) ? _input_w : static_cast(_input_w * aspect_ratio); + const int nh = (aspect_ratio >= 1) ? static_cast(_input_h / aspect_ratio) : _input_h; + + // Resize image and pad if necessary + cv::Mat resized_image; + cv::resize(image, resized_image, cv::Size(nw, nh), 0, 0, cv::INTER_LINEAR); + cv::Mat padded_image = cv::Mat::ones(cv::Size(_input_w, _input_h), CV_8UC3) * 128; + resized_image.copyTo(padded_image(cv::Rect((_input_w - nw) / 2, (_input_h - nh) / 2, nw, nh))); + + // Normalize and flatten the image to a 1D tensor + std::vector input_tensor; + for (int k = 0; k < 3; k++) { + for (int i = 0; i < padded_image.rows; i++) { + for (int j = 0; j < padded_image.cols; j++) { + input_tensor.emplace_back((padded_image.at(i, j)[k] - mean[k]) / std[k]); + } + } + } + + return input_tensor; + } }; #endif // DEPTH_ANYTHING_HPP_ diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp index 8d1d1e0d..0c177287 100644 --- a/include/usb_cam/learning/interface.hpp +++ b/include/usb_cam/learning/interface.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ class LearningInterface { virtual void set_input(sensor_msgs::Image& image) = 0; virtual void publish() = 0; - void load_model(); void predict(); nvinfer1::ICudaEngine* get_engine() { return _engine; } @@ -35,11 +35,18 @@ class LearningInterface { nvinfer1::IExecutionContext* _context; nvinfer1::INetworkDefinition* _network; nvinfer1::IRuntime* _runtime; + size_t _input_c, _input_h, _input_w, _input_size_float; + size_t _output_c, _output_h, _output_w, _output_size_float; std::string _model_path; + void _load_model(); + private: + static constexpr size_t JETSON_MEM_LIMIT_B{3ULL * 1024 * 1024 * 1024}; + std::mutex _predict_mutex; void* _buffers[2] = { nullptr, nullptr }; + // TODO: static? class Logger : public nvinfer1::ILogger { void log(Severity severity, const char* msg) noexcept override { diff --git a/include/usb_cam/usb_cam.hpp b/include/usb_cam/usb_cam.hpp index d249f6e6..fdce6523 100644 --- a/include/usb_cam/usb_cam.hpp +++ b/include/usb_cam/usb_cam.hpp @@ -119,6 +119,8 @@ typedef struct // or guvcview std::string pixel_format_name; std::string av_device_format; + std::string dav2_file; + std::string dav2_topic; int image_width; int image_height; int framerate; diff --git a/launch/two_mipi_rgb.launch b/launch/two_mipi_rgb.launch index 8d3f840e..75083243 100644 --- a/launch/two_mipi_rgb.launch +++ b/launch/two_mipi_rgb.launch @@ -17,6 +17,9 @@ + + + @@ -35,5 +38,9 @@ + + + + diff --git a/src/interface.cpp b/src/interface.cpp index 2e64109f..8890d479 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -1,75 +1,122 @@ #include "usb_cam/learning/interface.hpp" #include +#include using namespace nvinfer1; -void LearningInterface::load_model() { - if (_model_path.find(".onnx") == std::string::npos) { - std::ifstream engine_stream(_model_path, std::ios::binary); - engine_stream.seekg(0, std::ios::end); +void LearningInterface::predict() { + if (_predict_mutex.try_lock()) { + cudaMemcpyAsync(_buffers[0], _input_data, _input_size_float , cudaMemcpyHostToDevice, _stream); - const size_t model_size = engine_stream.tellg(); - engine_stream.seekg(0, std::ios::beg); +#if NV_TENSORRT_MAJOR < 10 + _context->enqueueV2(_buffers, _stream, nullptr); +#else + _context->executeV2(_buffers); +#endif - std::unique_ptr engine_data(new char[model_size]); - engine_stream.read(engine_data.get(), model_size); - engine_stream.close(); + cudaStreamSynchronize(_stream); + cudaMemcpyAsync(_output_data, _buffers[1], _output_size_float, cudaMemcpyDeviceToHost); + _predict_mutex.unlock(); - // Create tensorrt model - _runtime = nvinfer1::createInferRuntime(_logger); - _engine = _runtime->deserializeCudaEngine(engine_data.get(), model_size); - _context = _engine->createExecutionContext(); + } +} + +void LearningInterface::_load_model() { + std::ifstream file_check(_model_path); + if (!file_check.good()) { + std::cerr << "Error: " << _model_path << " does not exist." << std::endl; + throw std::runtime_error("Model file not found"); + } + file_check.close(); + + // Define the expected .engine path based on the .onnx model path + std::string engine_path = _model_path.substr(0, _model_path.find_last_of('.')) + ".engine"; + + if (_model_path.find(".onnx") != std::string::npos) { + // Check if the engine file already exists + std::ifstream engine_check(engine_path, std::ios::binary); + + if (engine_check.good()) { + engine_check.seekg(0, std::ios::end); + const size_t model_size = engine_check.tellg(); + engine_check.seekg(0, std::ios::beg); + + std::unique_ptr engine_data(new char[model_size]); + engine_check.read(engine_data.get(), model_size); + engine_check.close(); + + // Create TensorRT runtime and load engine + _runtime = nvinfer1::createInferRuntime(_logger); + _engine = _runtime->deserializeCudaEngine(engine_data.get(), model_size); + _context = _engine->createExecutionContext(); + + } else { + // Build an engine from the .onnx model and save it as .engine + _build(_model_path); + _save_engine(engine_path); + } } else { - // Build an engine from an onnx model - _build(_model_path); - _save_engine(_model_path); + std::cerr << "Error: Only .onnx model files are supported." << std::endl; + throw std::runtime_error("Unsupported model format"); } // Define input dimensions - const auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0)); - const int input_h = input_dims.d[2]; - const int input_w = input_dims.d[3]; +#if NV_TENSORRT_MAJOR < 10 + auto input_dims = _engine->getBindingDimensions(0); + auto output_dims = _engine->getBindingDimensions(1); +#else + auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0)); + auto output_dims = _engine->getTensorShape(_engine->getIOTensorName(1)); +#endif + + // TODO: THis does not generalize so well + _input_c = input_dims.d[1]; + _input_h = input_dims.d[2]; + _input_w = input_dims.d[3]; + _output_c = output_dims.d[1]; + _output_h = output_dims.d[2]; + _output_w = output_dims.d[3]; + _input_size_float = _input_c * _input_h * _input_w * sizeof(float); + _output_size_float = _output_c * _output_h * _output_w * sizeof(float); - // Create CUDA stream cudaStreamCreate(&_stream); - cudaMalloc(&_buffers[0], 3 * input_h * input_w * sizeof(float)); - cudaMalloc(&_buffers[1], input_h * input_w * sizeof(float)); + cudaMalloc(&_buffers[0], _input_size_float); + cudaMalloc(&_buffers[1], _output_size_float); - _output_data = new float[input_h * input_w]; + _input_data = new float[_input_c * _input_h * _input_w]; + _output_data = new float[_output_c * _output_h * _output_w]; } void LearningInterface::_build(std::string onnx_path) { auto builder = createInferBuilder(_logger); - const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - INetworkDefinition* network = builder->createNetworkV2(explicitBatch); + const auto explicit_batch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + INetworkDefinition* network = builder->createNetworkV2(explicit_batch); + + // TODO: What about different hardware? IBuilderConfig* config = builder->createBuilderConfig(); - config->setFlag(BuilderFlag::kFP16); + config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, JETSON_MEM_LIMIT_B); + nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _logger); - bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kINFO)); - IHostMemory* plan{ builder->buildSerializedNetwork(*network, *config) }; + const bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kVERBOSE)); + if (parsed) { + IHostMemory* plan = builder->buildSerializedNetwork(*network, *config); + _runtime = createInferRuntime(_logger); + _engine = _runtime->deserializeCudaEngine(plan->data(), plan->size()); + _context = _engine->createExecutionContext(); + delete plan; - _runtime = createInferRuntime(_logger); - _engine = _runtime->deserializeCudaEngine(plan->data(), plan->size()); - _context = _engine->createExecutionContext(); + } else { + std::cerr << "Could not parse onnx file: " << onnx_path.c_str() << std::endl; + } delete network; delete config; delete parser; - delete plan; } -bool LearningInterface::_save_engine(const std::string& onnx_path) { - std::string engine_path; - size_t dot_index = onnx_path.find_last_of("."); - if (dot_index != std::string::npos) { - engine_path = onnx_path.substr(0, dot_index) + ".engine"; - - } else { - return false; - } - +bool LearningInterface::_save_engine(const std::string& engine_path) { if (_engine) { nvinfer1::IHostMemory* data = _engine->serialize(); std::ofstream file; @@ -87,15 +134,11 @@ bool LearningInterface::_save_engine(const std::string& onnx_path) { return true; } -void LearningInterface::predict() { - cudaMemcpyAsync(_buffers[0], _input_data, sizeof(_input_data) * sizeof(float), cudaMemcpyHostToDevice, _stream); - _context->executeV2(_buffers); - cudaStreamSynchronize(_stream); - cudaMemcpyAsync(_output_data, _buffers[1], sizeof(_input_data) * sizeof(float), cudaMemcpyDeviceToHost); -} - LearningInterface::~LearningInterface() { cudaFree(_stream); cudaFree(_buffers[0]); cudaFree(_buffers[1]); + + delete[] _input_data; + delete[] _output_data; } diff --git a/src/usb_cam_node.cpp b/src/usb_cam_node.cpp index 517a43e7..9a3bb946 100644 --- a/src/usb_cam_node.cpp +++ b/src/usb_cam_node.cpp @@ -71,10 +71,6 @@ class UsbCamNode { } UsbCamNode() : m_node("~") { - // Setup the network that outputs derivates of the image captured - // TODO: Actual network - networks.push_back(std::make_unique(&m_node, "depth_anything_v2_vitb.onnx")); - // Advertise the main image topic image_transport::ImageTransport it(m_node); m_image_pub = it.advertiseCamera("image_raw", 1); @@ -127,6 +123,14 @@ class UsbCamNode { m_camera_info->setCameraInfo(camera_info); } + // Setup the network that outputs derivates of the image captured + m_node.param("dav2_file", m_parameters.dav2_file, std::string("")); + m_node.param("dav2_topic", m_parameters.dav2_topic, std::string("")); + + if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) { + networks.push_back(std::make_unique(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic)); + } + ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS", m_parameters.camera_name.c_str(), m_parameters.device_name.c_str(), m_parameters.image_width, m_parameters.image_height, m_parameters.io_method_name.c_str(), diff --git a/test/test_depth_anything_v2.cpp b/test/test_depth_anything_v2.cpp index a665cdae..d2a0fdda 100644 --- a/test/test_depth_anything_v2.cpp +++ b/test/test_depth_anything_v2.cpp @@ -9,14 +9,15 @@ // This class provides access to protected members that we normally don't want to expose class DepthAnythingV2Test : public DepthAnythingV2 { public: - DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path) {} + DepthAnythingV2Test(const std::string& model_path) : DepthAnythingV2(nullptr, model_path, std::string("")) {} float* get_input_data() { return _input_data; } + void load_model() { _load_model(); } }; class TestDepthAnythingV2 : public ::testing::Test { protected: - const std::string model_path = "/workspaces/v4l2_camera/test/resources/depth_anything_v2_vits_16.onnx"; - const std::string test_image_path = "/workspaces/v4l2_camera/test/resources/maschinenhalle_example_frame.jpg"; + const std::string model_path = "../test/resources/depth_anything_v2_vits.onnx"; + const std::string test_image_path = "../test/resources/maschinenhalle_example_frame.jpg"; cv_bridge::CvImage cv_image; cv::Mat img; @@ -47,11 +48,6 @@ TEST_F(TestDepthAnythingV2, TestModelLoad) { TEST_F(TestDepthAnythingV2, TestSetInput) { depth_anything_v2->set_input(msg); ASSERT_NE(depth_anything_v2->get_input_data(), nullptr); - - const size_t expected_size = 518 * 518 * 3; - float* input_data = depth_anything_v2->get_input_data(); - ASSERT_NE(input_data, nullptr); - ASSERT_FLOAT_EQ(input_data[0], img.at(0, 0)[0] / 255.0f); } TEST_F(TestDepthAnythingV2, TestPredictPublish) {