Skip to content

Commit

Permalink
ci: Fix GPU usage
Browse files Browse the repository at this point in the history
  • Loading branch information
marcojob committed Nov 13, 2024
1 parent cb1fe4d commit 55663e6
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 7 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ jobs:
runs-on: self-hosted
container:
image: omavteam/v4l2_camera:latest
options: |
--gpus all
strategy:
matrix:
Expand All @@ -31,4 +33,6 @@ jobs:
- name: Run ${{ matrix.ci_script }}
run: |
export ONNX_VERBOSE=1
export TRT_LOGGER=VERBOSE
bash -x ./ci/${{ matrix.ci_script }}.sh
110 changes: 103 additions & 7 deletions src/interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ void LearningInterface::_load_model() {
if (_model_path.find(".onnx") != std::string::npos) {
// Check if the engine file already exists
std::ifstream engine_check(engine_path, std::ios::binary);


std::cout << "FOUND ONNX" << std::endl;
if (engine_check.good()) {
std::cout << "GOT ENGINE" << std::endl;
engine_check.seekg(0, std::ios::end);
const size_t model_size = engine_check.tellg();
engine_check.seekg(0, std::ios::beg);
Expand All @@ -51,6 +53,7 @@ void LearningInterface::_load_model() {
_context = _engine->createExecutionContext();

} else {
std::cout << "NO ENGINE" << std::endl;
// Build an engine from the .onnx model and save it as .engine
_build(_model_path);
_save_engine(engine_path);
Expand Down Expand Up @@ -90,25 +93,118 @@ void LearningInterface::_load_model() {
}

void LearningInterface::_build(std::string onnx_path) {
std::cout << "BUILDING ENGINE" << std::endl;

// Create the builder
auto builder = createInferBuilder(_logger);
if (!builder) {
throw std::runtime_error("Failed to create TensorRT builder.");
}

// Set up network with explicit batch flag
const auto explicit_batch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
INetworkDefinition* network = builder->createNetworkV2(explicit_batch);
if (!network) {
builder->destroy();
throw std::runtime_error("Failed to create TensorRT network definition.");
}

// Create builder configuration
IBuilderConfig* config = builder->createBuilderConfig();
if (!config) {
network->destroy();
builder->destroy();
throw std::runtime_error("Failed to create TensorRT builder configuration.");
}

// TODO: What about different hardware?
// Set configuration memory pool limit
std::cout << "SETTING CONFIG MEMORY LIMIT" << std::endl;
config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, JETSON_MEM_LIMIT_B);

// Create parser
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _logger);
if (!parser) {
config->destroy();
network->destroy();
builder->destroy();
throw std::runtime_error("Failed to create TensorRT ONNX parser.");
}

// Parse the ONNX model
std::cout << "PARSING ONNX MODEL" << std::endl;
bool parsed = parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO));
IHostMemory* plan{ builder->buildSerializedNetwork(*network, *config) };
if (!parsed) {
std::cerr << "Failed to parse ONNX model from file: " << onnx_path << std::endl;
parser->destroy();
config->destroy();
network->destroy();
builder->destroy();
throw std::runtime_error("ONNX model parsing failed.");
}

// Build the serialized network (engine plan)
std::cout << "BUILDING SERIALIZED NETWORK" << std::endl;
IHostMemory* plan = builder->buildSerializedNetwork(*network, *config);
if (!plan) {
std::cerr << "Failed to build serialized TensorRT engine plan." << std::endl;
parser->destroy();
config->destroy();
network->destroy();
builder->destroy();
throw std::runtime_error("Serialized network creation failed.");
}

// Create runtime
std::cout << "CREATING RUNTIME" << std::endl;
_runtime = createInferRuntime(_logger);
if (!_runtime) {
std::cerr << "Failed to create TensorRT runtime." << std::endl;
plan->destroy();
parser->destroy();
config->destroy();
network->destroy();
builder->destroy();
throw std::runtime_error("Runtime creation failed.");
}

// Deserialize the engine from the plan
std::cout << "DESERIALIZING ENGINE" << std::endl;
_engine = _runtime->deserializeCudaEngine(plan->data(), plan->size());
if (!_engine) {
std::cerr << "Failed to deserialize CUDA engine from serialized plan." << std::endl;
_runtime->destroy();
plan->destroy();
parser->destroy();
config->destroy();
network->destroy();
builder->destroy();
throw std::runtime_error("CUDA engine deserialization failed.");
}

// Create execution context
std::cout << "CREATING EXECUTION CONTEXT" << std::endl;
_context = _engine->createExecutionContext();
if (!_context) {
std::cerr << "Failed to create execution context from CUDA engine." << std::endl;
_engine->destroy();
_runtime->destroy();
plan->destroy();
parser->destroy();
config->destroy();
network->destroy();
builder->destroy();
throw std::runtime_error("Execution context creation failed.");
}

// Clean up resources
std::cout << "CLEANING UP RESOURCES" << std::endl;
plan->destroy();
parser->destroy();
config->destroy();
network->destroy();
builder->destroy();

delete network;
delete config;
delete parser;
delete plan;
std::cout << "ENGINE BUILD SUCCESSFUL" << std::endl;
}

bool LearningInterface::_save_engine(const std::string& engine_path) {
Expand Down

0 comments on commit 55663e6

Please sign in to comment.