diff --git a/CMakeLists.txt b/CMakeLists.txt index 11789ee0..b6ec0710 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,13 +46,15 @@ endif() set(TENSORRT_LIB_DIR /usr/lib/x86_64-linux-gnu CACHE PATH "Path to TensorRT libraries") find_library(NVINFER nvinfer PATHS ${TENSORRT_LIB_DIR}) find_library(NVINFER_PLUGIN nvinfer_plugin PATHS ${TENSORRT_LIB_DIR}) +find_library(NVONNXPARSER nvonnxparser PATHS ${TENSORRT_LIB_DIR}) + message(STATUS "TensorRT NVINFER library found at: ${NVINFER}") message(STATUS "TensorRT NVINFER_PLUGIN library found at: ${NVINFER_PLUGIN}") - +message(STATUS "TensorRT NVONNXPARSER library found at: ${NVONNXPARSER}") # Check if TensorRT libraries are found -if(NOT NVINFER OR NOT NVINFER_PLUGIN) - message(FATAL_ERROR "TensorRT libraries not found. Set TENSORRT_LIB_DIR correctly.") +if(NOT NVINFER OR NOT NVINFER_PLUGIN OR NOT NVONNXPARSER) + message(FATAL_ERROR "TensorRT libraries not found. Ensure TENSORRT_LIB_DIR is set correctly.") endif() ## Build the USB camera library @@ -82,6 +84,7 @@ target_link_libraries(${PROJECT_NAME} ${swscale_LIBRARIES} ${NVINFER} ${NVINFER_PLUGIN} + ${NVONNXPARSER} ) # Define catkin package @@ -100,6 +103,7 @@ target_link_libraries(${PROJECT_NAME}_node ${swscale_LIBRARIES} ${NVINFER} ${NVINFER_PLUGIN} + ${NVONNXPARSER} ) set_target_properties(${PROJECT_NAME}_node PROPERTIES LINK_FLAGS "-Wl,--no-as-needed") @@ -113,16 +117,17 @@ if(BUILD_TESTING) find_package(GTest REQUIRED) include_directories(${GTEST_INCLUDE_DIRS}) - catkin_add_gtest(test_learning_interface test/test_learning_interface.cpp) - target_link_libraries(test_learning_interface + catkin_add_gtest(test_depth_anything_v2 test/test_depth_anything_v2.cpp) + target_link_libraries(test_depth_anything_v2 ${PROJECT_NAME} ${catkin_LIBRARIES} GTest::gtest_main ${NVINFER} ${NVINFER_PLUGIN} + ${NVONNXPARSER} ${CUDA_LIBRARIES} ) - set_target_properties(test_learning_interface PROPERTIES LINK_FLAGS "-Wl,--no-as-needed") + set_target_properties(test_depth_anything_v2 PROPERTIES LINK_FLAGS "-Wl,--no-as-needed") endif() # Installation rules diff --git a/ci/pr_run_tests.sh b/ci/pr_run_tests.sh index 5b2a2140..85d19b0b 100755 --- a/ci/pr_run_tests.sh +++ b/ci/pr_run_tests.sh @@ -1,45 +1,45 @@ #!/bin/bash source /opt/ros/noetic/setup.bash -# Check if the plan file exists before generating it -echo $LD_LIBRARY_PATH -if [ ! -f "test/resources/raft-small.plan" ]; then - echo "Plan file not found. Generating plan file..." - if /usr/src/tensorrt/bin/trtexec --buildOnly --onnx="test/resources/raft-small.onnx" --saveEngine="test/resources/raft-small.plan" --plugins="/usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so" - then - echo "Plan file generation successful" +# Set paths for the model and plan files +MODEL_PATH="test/resources/depth_anything_v2_vitb.onnx" +MODEL_URL="https://github.com/fabio-sim/Depth-Anything-ONNX/releases/download/v2.0.0/depth_anything_v2_vitb.onnx" + +# Step 1: Check if the ONNX model file exists +if [ ! -f "$MODEL_PATH" ]; then + echo "ONNX model file not found. Downloading..." + if wget -O "$MODEL_PATH" "$MODEL_URL"; then + echo "Model downloaded successfully." else - echo "Plan file generation failed" + echo "Model download failed." exit 1 fi else - echo "Plan file already exists. Skipping generation." + echo "ONNX model file already exists. Skipping download." fi +# Build the project and run tests rm -rf build mkdir -p build cd build -if cmake .. -DBUILD_TESTING=ON -then - echo "CMake successfull" - if make test_learning_interface - then - echo "Make successfull" + +if cmake .. -DBUILD_TESTING=ON; then + echo "CMake successful." + if make test_depth_anything_v2; then + echo "Make successful." else - echo "Make failed" + echo "Make failed." exit 1 fi else - echo "CMake failed" + echo "CMake failed." exit 1 fi -if ./devel/lib/usb_cam/test_learning_interface -then - echo "Tests successful" +# Run the test executable +if ./devel/lib/usb_cam/test_depth_anything_v2; then + echo "Tests successful." else - echo "Tests failed" + echo "Tests failed." exit 1 fi - - diff --git a/include/usb_cam/learning/depth_anything_v2.hpp b/include/usb_cam/learning/depth_anything_v2.hpp new file mode 100644 index 00000000..f2604f5e --- /dev/null +++ b/include/usb_cam/learning/depth_anything_v2.hpp @@ -0,0 +1,23 @@ +#ifndef DEPTH_ANYTHING_HPP_ +#define DEPTH_ANYTHING_HPP_ + +#include "interface.hpp" +#include +#include + +class DepthAnythingV2 : public LearningInterface { +public: + DepthAnythingV2(std::string model_path) { + _model_path = model_path; + } + + void get_output(uint8_t* output_buffer) override { + // TODO + } + + void publish() override { + // TODO + } +}; + +#endif // DEPTH_ANYTHING_HPP_ \ No newline at end of file diff --git a/include/usb_cam/learning/interface.hpp b/include/usb_cam/learning/interface.hpp index d833443e..a690d10b 100644 --- a/include/usb_cam/learning/interface.hpp +++ b/include/usb_cam/learning/interface.hpp @@ -1,73 +1,58 @@ #ifndef LEARNING_INTERFACE_HPP_ #define LEARNING_INTERFACE_HPP_ -#include -#include -#include #include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include class LearningInterface { public: - LearningInterface() : _model_path("") { - // Instantiate the logger and initialize plugins - if (!initLibNvInferPlugins(static_cast(&_logger), "")) { - std::cerr << "Error: Failed to initialize TensorRT plugins." << std::endl; - throw std::runtime_error("Failed to initialize TensorRT plugins."); - } - } + LearningInterface() : _model_path("") {} + + void set_input(cv::Mat input_image); - virtual void set_input(const uint8_t* input_buffer, size_t height, size_t width) = 0; virtual void get_output(uint8_t* output_buffer) = 0; virtual void publish() = 0; void load_model(); - bool run_inference(size_t batch_size); - - virtual ~LearningInterface() { - // Release allocated CUDA memory - if (_buffers[0]) cudaFree(_buffers[0]); - if (_buffers[1]) cudaFree(_buffers[1]); - - delete[] _input_buffer; - delete[] _output_buffer; - } + void predict(); - float* get_input_buffer() { return _input_buffer; } nvinfer1::ICudaEngine* get_engine() { return _engine; } nvinfer1::IExecutionContext* get_context() { return _context; } nvinfer1::IRuntime* get_runtime() { return _runtime; } + ~LearningInterface(); + protected: - float* _input_buffer = nullptr; - float* _output_buffer = nullptr; - nvinfer1::ICudaEngine* _engine = nullptr; - nvinfer1::IExecutionContext* _context = nullptr; - nvinfer1::IRuntime* _runtime = nullptr; - size_t input_height; - size_t input_width; - size_t output_height; - size_t output_width; + cudaStream_t _stream; + float* _input_data = nullptr; + float* _output_data = nullptr; + nvinfer1::ICudaEngine* _engine; + nvinfer1::IExecutionContext* _context; + nvinfer1::INetworkDefinition* _network; + nvinfer1::IRuntime* _runtime; std::string _model_path; private: void* _buffers[2] = { nullptr, nullptr }; + // TODO: static? class Logger : public nvinfer1::ILogger { - public: void log(Severity severity, const char* msg) noexcept override { - if (severity <= Severity::kWARNING) { // Limit logging to warnings and errors + // Only output logs with severity greater than warning + if (severity <= Severity::kWARNING) { std::cout << msg << std::endl; } } - }; - Logger _logger; + } _logger; + + bool _save_engine(const std::string& onnx_path); + void _build(std::string onnx_path); }; #endif // LEARNING_INTERFACE_HPP_ diff --git a/include/usb_cam/learning/raft.hpp b/include/usb_cam/learning/raft.hpp deleted file mode 100644 index 1cf9d439..00000000 --- a/include/usb_cam/learning/raft.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef RAFT_HPP_ -#define RAFT_HPP_ - -#include "interface.hpp" -#include -#include - -class Raft : public LearningInterface { -public: - Raft(std::string model_path, size_t network_height, size_t network_width) : _network_height(network_height), _network_width(network_width) { - _model_path = model_path; - _network_size = cv::Size(_network_width, _network_height); - } - - void set_input(const uint8_t* input_buffer, size_t height, size_t width) override { - // Resize frame to network size - cv::Mat input_frame(height, width, CV_8UC1, (void*)input_buffer); - cv::Mat resized_frame; - cv::resize(input_frame, resized_frame, _network_size); - - cv::Mat float_frame; - resized_frame.convertTo(float_frame, CV_32FC1, _uint8_to_float); - - cudaMemcpy(_input_buffer, float_frame.ptr(), _network_width * _network_height * sizeof(float), cudaMemcpyHostToDevice); - } - - void get_output(uint8_t* output_buffer) override { - // TODO - } - - void publish() override { - // TODO - } - - -private: - const size_t _network_height; - const size_t _network_width; - cv::Size _network_size; - static constexpr float _uint8_to_float = 1.0f / 255.0f; -}; - -#endif // RAFT_HPP_ \ No newline at end of file diff --git a/src/interface.cpp b/src/interface.cpp index 567b1c50..0249887a 100644 --- a/src/interface.cpp +++ b/src/interface.cpp @@ -1,74 +1,113 @@ #include "usb_cam/learning/interface.hpp" +#include + +using namespace nvinfer1; void LearningInterface::load_model() { - // Open and try to read the file - std::ifstream file(_model_path, std::ios::binary); - if (file.good()) { - file.seekg(0, std::ios::end); - const size_t model_size = file.tellg(); - file.seekg(0, std::ios::beg); - - // Read the model data - std::vector model_data(model_size); - file.read(model_data.data(), model_size); - file.close(); + if (_model_path.find(".onnx") == std::string::npos) { + std::ifstream engine_stream(_model_path, std::ios::binary); + engine_stream.seekg(0, std::ios::end); + + const size_t model_size = engine_stream.tellg(); + engine_stream.seekg(0, std::ios::beg); + + std::unique_ptr engine_data(new char[model_size]); + engine_stream.read(engine_data.get(), model_size); + engine_stream.close(); + // Create tensorrt model _runtime = nvinfer1::createInferRuntime(_logger); - if (_runtime != nullptr) { - _engine = _runtime->deserializeCudaEngine(model_data.data(), model_size); - if (_engine != nullptr) { - _context = _engine->createExecutionContext(); - if (_context != nullptr) { - // Allocate buffers for input and output - size_t input_size; - size_t output_size; - for (int io = 0; io < _engine->getNbBindings(); io++) { - const char* name = _engine->getBindingName(io); - std::cout << io << ": " << name; - const nvinfer1::Dims dims = _engine->getBindingDimensions(io); - - size_t total_dims = 1; - for (int d = 0; d < dims.nbDims; d++) { - total_dims *= dims.d[d]; - } - - std::cout << " size: " << total_dims << std::endl; - - // Check if it's an input or output binding - if (_engine->bindingIsInput(io)) { - input_size = total_dims * sizeof(float); - } else { - output_size = total_dims * sizeof(float); - } - } - - // Allocate device buffers - cudaMalloc(&_buffers[0], input_size); - cudaMalloc(&_buffers[1], output_size); - - // Allocate CPU buffers - _input_buffer = new float[input_size / sizeof(float)]; - _output_buffer = new float[output_size / sizeof(float)]; - - std::cout << "TensorRT model loaded successfully from: " << _model_path << std::endl; - } else { - std::cout << "Failed to create execution context." << std::endl; - } - } else { - std::cout << "Failed to create TensorRT engine." << std::endl; - } - } else { - std::cout << "Failed to create TensorRT runtime." << std::endl; - } + _engine = _runtime->deserializeCudaEngine(engine_data.get(), model_size); + _context = _engine->createExecutionContext(); + } else { - std::cout << "Failed to open model file." << std::endl; + // Build an engine from an onnx model + _build(_model_path); + _save_engine(_model_path); } + + // Define input dimensions + const auto input_dims = _engine->getTensorShape(_engine->getIOTensorName(0)); + const int input_h = input_dims.d[2]; + const int input_w = input_dims.d[3]; + + // Create CUDA stream + cudaStreamCreate(&_stream); + + cudaMalloc(&_buffers[0], 3 * input_h * input_w * sizeof(float)); + cudaMalloc(&_buffers[1], input_h * input_w * sizeof(float)); + + _output_data = new float[input_h * input_w]; +} + +void LearningInterface::set_input(cv::Mat input_image) { + } -bool LearningInterface::run_inference(size_t batch_size) { - if (!_context->executeV2(_buffers)) { - std::cerr << "Failed to execute inference." << std::endl; +void LearningInterface::_build(std::string onnx_path) +{ + auto builder = createInferBuilder(_logger); + const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + INetworkDefinition* network = builder->createNetworkV2(explicitBatch); + IBuilderConfig* config = builder->createBuilderConfig(); + config->setFlag(BuilderFlag::kFP16); + nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _logger); + bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kINFO)); + IHostMemory* plan{ builder->buildSerializedNetwork(*network, *config) }; + + _runtime = createInferRuntime(_logger); + _engine = _runtime->deserializeCudaEngine(plan->data(), plan->size()); + _context = _engine->createExecutionContext(); + + delete network; + delete config; + delete parser; + delete plan; +} + +bool LearningInterface::_save_engine(const std::string& onnx_path) { + // Create an engine path from onnx path + std::string engine_path; + size_t dot_index = onnx_path.find_last_of("."); + if (dot_index != std::string::npos) { + engine_path = onnx_path.substr(0, dot_index) + ".engine"; + + } else { return false; } + + // Save the engine to the path + if (_engine) { + nvinfer1::IHostMemory* data = _engine->serialize(); + std::ofstream file; + file.open(engine_path, std::ios::binary | std::ios::out); + if (!file.is_open()) { + std::cout << "Create engine file" << engine_path << " failed" << std::endl; + return 0; + } + + file.write((const char*)data->data(), data->size()); + file.close(); + + delete data; + } return true; } + +void LearningInterface::predict() { + cudaMemcpyAsync(_buffers[0], _input_data, sizeof(_input_data) * sizeof(float), cudaMemcpyHostToDevice, _stream); + _context->executeV2(_buffers); + cudaStreamSynchronize(_stream); + + // Postprocessing + cudaMemcpyAsync(_output_data, _buffers[1], sizeof(_input_data) * sizeof(float), cudaMemcpyDeviceToHost); +} + +LearningInterface::~LearningInterface() { + cudaFree(_stream); + cudaFree(_buffers[0]); + cudaFree(_buffers[1]); + + delete[] _input_data; + delete[] _output_data; +} \ No newline at end of file diff --git a/src/usb_cam_node.cpp b/src/usb_cam_node.cpp index de1d0761..1eef4ce0 100644 --- a/src/usb_cam_node.cpp +++ b/src/usb_cam_node.cpp @@ -38,7 +38,7 @@ #include "usb_cam/utils.hpp" #include "usb_cam/learning/interface.hpp" -#include "usb_cam/learning/raft.hpp" +#include "usb_cam/learning/depth_anything_v2.hpp" namespace usb_cam { class UsbCamNode { @@ -72,7 +72,8 @@ class UsbCamNode { UsbCamNode() : m_node("~") { // Setup the network that outputs derivates of the image captured - networks.push_back(std::make_unique("resources/raft.onnx", 240, 320)); + // TODO: Actual network + networks.push_back(std::make_unique("depth_anything_v2_vitb.onnx")); // Advertise the main image topic image_transport::ImageTransport it(m_node); @@ -178,7 +179,7 @@ class UsbCamNode { // Run all the networks for (const auto& net : networks) { - net->set_input(m_image.data.data(), 1920, 1200); + net->set_input(m_image.data.data()); if (net->run_inference(1)) { net->publish(); } diff --git a/test/test_depth_anything_v2.cpp b/test/test_depth_anything_v2.cpp new file mode 100644 index 00000000..4f030992 --- /dev/null +++ b/test/test_depth_anything_v2.cpp @@ -0,0 +1,30 @@ +#include +#include "usb_cam/learning/depth_anything_v2.hpp" +#include +#include +#include + +// Define a fixture for Raft tests +class DepthAnythingV2Test : public ::testing::Test { +protected: + // Initialize variables for the test + std::string model_path = "/workspaces/v4l2_camera/test/resources/depth_anything_v2.plan"; + + DepthAnythingV2* depth_anything_v2; + + void SetUp() override { + // Instantiate the Raft model with the test parameters + depth_anything_v2 = new DepthAnythingV2(model_path); + depth_anything_v2->load_model(); + } + + void TearDown() override { + delete depth_anything_v2; + } +}; + +TEST_F(DepthAnythingV2Test, TestModelLoad) { + ASSERT_NE(depth_anything_v2->get_engine(), nullptr); + ASSERT_NE(depth_anything_v2->get_context(), nullptr); + ASSERT_NE(depth_anything_v2->get_runtime(), nullptr); +} diff --git a/test/test_learning_interface.cpp b/test/test_learning_interface.cpp deleted file mode 100644 index 5ba0245a..00000000 --- a/test/test_learning_interface.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include "usb_cam/learning/raft.hpp" -#include -#include -#include - -// Define a fixture for Raft tests -class RaftTest : public ::testing::Test { -protected: - // Initialize variables for the test - std::string model_path = "/workspaces/v4l2_camera/test/resources/raft-small.plan"; - size_t input_height = 224; - size_t input_width = 224; - - Raft* raft; - - void SetUp() override { - // Instantiate the Raft model with the test parameters - raft = new Raft(model_path, input_height, input_width); - raft->load_model(); - } - - void TearDown() override { - delete raft; - } -}; - -TEST_F(RaftTest, TestModelLoad) { - // Test that the model loads successfully - ASSERT_NE(raft->get_engine(), nullptr); - ASSERT_NE(raft->get_context(), nullptr); - ASSERT_NE(raft->get_runtime(), nullptr); -} - -TEST_F(RaftTest, TestSetInput) { - // Create a dummy input buffer - std::vector input_data(input_height * input_width, 128); - - // Set input and check if it is copied to the device - raft->set_input(input_data.data(), input_height, input_width); - - // Allocate host memory to copy back the data from GPU for verification - std::vector host_input(input_height * input_width); - cudaMemcpy(host_input.data(), raft->get_input_buffer(), input_height * input_width * sizeof(float), cudaMemcpyDeviceToHost); - - // Verify the data (simple check to see if values are scaled correctly) - for (size_t i = 0; i < host_input.size(); ++i) { - ASSERT_NEAR(host_input[i], 128.0f / 255.0f, 1e-5); - } -} - -TEST_F(RaftTest, TestRunInference) { - // Dummy batch size - size_t batch_size = 1; - - // Run inference - bool success = raft->run_inference(batch_size); - - // Check if inference ran successfully - ASSERT_TRUE(success); -}