Skip to content

Commit

Permalink
Add TensorRT10 support for YOLOv8
Browse files Browse the repository at this point in the history
  • Loading branch information
mpj1234 committed Jul 16, 2024
1 parent 040d053 commit f6fff4d
Show file tree
Hide file tree
Showing 36 changed files with 7,684 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,9 @@
*.exe
*.out
*.app

build/
models/
cmake-build-debug/
cmake-build-release/
.idea/
103 changes: 103 additions & 0 deletions CMakeLists-win.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
cmake_minimum_required(VERSION 3.28)
project(yolov8_trtx_v10)

set(CMAKE_CXX_STANDARD 11)
# 设置nvcc编译cu文件时候使用utf-8编码
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /utf-8")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")

enable_language(CUDA)

# 设置cuda多个框架支持
set(CMAKE_CUDA_ARCHITECTURES 75 86 89)
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")

# OpenCV
set(OpenCV_DIR E:\\Opencv\\install\\opencv-4.8.0\\build)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
link_directories(${OpenCV_LIB_DIR})

# CUDA
set(CUDA_TOOLKIT_ROOT_DIR C:\\Program\ Files\\NVIDIA\ GPU\ Computing\ Toolkit\\CUDA\\v11.8)
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64)

# TensorRT
#set(TENSORRT_ROOT E:\\TensorRT\\TensorRT-8.6.1.6)
set(TENSORRT_ROOT E:\\TensorRT\\TensorRT-10.2.0.19)
include_directories(${TENSORRT_ROOT}/include)
link_directories(${TENSORRT_ROOT}/lib)

# 判断TENSORRT_ROOT路径中的version如果路径中第一个.前大于8
# 获取所有版本文件
file(GLOB TENSORRT_VERSION_FILES "${TENSORRT_ROOT}/include/NvInferVersion.h")
# 读取版本文件
file(STRINGS ${TENSORRT_VERSION_FILES} TENSORRT_VERSION_LINES
LIMIT_COUNT 1 # 只读取第一行
REGEX "#define NV_TENSORRT_MAJOR [0-9]+" # 匹配版本号定义行
)
message(STATUS " TENSORRT_VERSION_LINES: ${TENSORRT_VERSION_LINES}")
# 解析版本号
string(REGEX REPLACE "#define NV_TENSORRT_MAJOR ([0-9]+)" "\\1" TENSORRT_VERSION_MAJOR ${TENSORRT_VERSION_LINES})
message(STATUS " TENSORRT_VERSION_MAJOR: ${TENSORRT_VERSION_MAJOR}")
# 判断版本号是否大于等于10
if (TENSORRT_VERSION_MAJOR GREATER_EQUAL 10)
message(STATUS " TensorRT version is greater than or equal to 10.")
link_libraries(
opencv_core
opencv_highgui
opencv_imgproc
opencv_imgcodecs
cudart
cublas
nvinfer_10
)
else ()
message(STATUS " TensorRT version is less than 10.")
link_libraries(
opencv_core
opencv_highgui
opencv_imgproc
opencv_imgcodecs
cudart
cublas
nvinfer
)
endif ()

include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/plugin)
include_directories(${CMAKE_SOURCE_DIR}/src)
link_directories(${CMAKE_SOURCE_DIR}/lib)

add_definitions(-DNOMINMAX)

add_definitions(-DAPI_EXPORTS)

file(GLOB_RECURSE SRCS ${CMAKE_SOURCE_DIR}/src/*.cpp ${CMAKE_SOURCE_DIR}/src/*.cu)
file(GLOB_RECURSE PLUGIN_SRCS ${PROJECT_SOURCE_DIR}/plugin/*.cu)

add_library(myplugins SHARED ${PLUGIN_SRCS})
target_link_libraries(myplugins nvinfer_10 nvinfer_plugin_10 cudart)

add_executable(yolov8_cls yolov8_cls.cpp ${SRCS})
target_link_libraries(yolov8_cls myplugins)

add_executable(yolov8_det yolov8_det.cpp ${SRCS})
target_link_libraries(yolov8_det nvinfer_10)
target_link_libraries(yolov8_det cudart)
target_link_libraries(yolov8_det myplugins)
target_link_libraries(yolov8_det ${OpenCV_LIBS})

add_executable(yolov8_seg yolov8_seg.cpp ${SRCS})
target_link_libraries(yolov8_seg nvinfer_10)
target_link_libraries(yolov8_seg cudart)
target_link_libraries(yolov8_seg myplugins)
target_link_libraries(yolov8_seg ${OpenCV_LIBS})

add_executable(yolov8_pose yolov8_pose.cpp ${SRCS})
target_link_libraries(yolov8_pose nvinfer_10)
target_link_libraries(yolov8_pose cudart)
target_link_libraries(yolov8_pose myplugins)
target_link_libraries(yolov8_pose ${OpenCV_LIBS})
57 changes: 57 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
cmake_minimum_required(VERSION 3.10)

project(yolov8)

add_definitions(-std=c++11)
add_definitions(-DAPI_EXPORTS)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)

set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
enable_language(CUDA)

include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/plugin)

# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
message("embed_platform on")
include_directories(/usr/local/cuda/targets/aarch64-linux/include)
link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
else()
message("embed_platform off")

# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)

# tensorrt
include_directories(/workspace/shared/TensorRT-10.2.0.19/include/)
link_directories(/workspace/shared/TensorRT-10.2.0.19/lib/)

# include_directories(/home/lindsay/TensorRT-7.2.3.4/include)
# link_directories(/home/lindsay/TensorRT-7.2.3.4/lib)
endif()

add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu)
target_link_libraries(myplugins nvinfer cudart)

find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS})

file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu)
add_executable(yolov8_det ${PROJECT_SOURCE_DIR}/yolov8_det.cpp ${SRCS})

target_link_libraries(yolov8_det nvinfer)
target_link_libraries(yolov8_det cudart)
target_link_libraries(yolov8_det myplugins)
target_link_libraries(yolov8_det ${OpenCV_LIBS})

add_executable(yolov8_seg ${PROJECT_SOURCE_DIR}/yolov8_seg.cpp ${SRCS})
target_link_libraries(yolov8_seg nvinfer cudart myplugins ${OpenCV_LIBS})

add_executable(yolov8_pose ${PROJECT_SOURCE_DIR}/yolov8_pose.cpp ${SRCS})
target_link_libraries(yolov8_pose nvinfer cudart myplugins ${OpenCV_LIBS})

add_executable(yolov8_cls ${PROJECT_SOURCE_DIR}/yolov8_cls.cpp ${SRCS})
target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS})
143 changes: 143 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
## Introduce

Yolov8 model supports TensorRT-10.

## Environment

CUDA: 11.8
CUDNN: 8.9.1.23
TensorRT: TensorRT-10.2.0.19

## Support

* [x] YOLOv8-cls support FP32/FP16/INT8 and Python/C++ API
* [x] YOLOv8-det support FP32/FP16/INT8 and Python/C++ API
* [x] YOLOv8-seg support FP32/FP16/INT8 and Python/C++ API
* [x] YOLOv8-pose support FP32/FP16/INT8 and Python/C++ API

## Config

* Choose the YOLOv8 sub-model n/s/m/l/x/n6/s6/m6/l6/x6 from command line arguments.
* Other configs please check [src/config.h](src/config.h)

## Build and Run

1. generate .wts from pytorch with .pt, or download .wts from model zoo

```shell
git clone https://gitclone.com/github.com/ultralytics/ultralytics.git
git clone https://github.com/mpj1234/YOLOv8-series-TensorRT10.git
cd YOLOv8-series-TensorRT10/
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-cls.pt
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt
cp [PATH-TO-YOLOv8-series-TensorRT10]/yolov8/gen_wts.py .
python gen_wts.py -w yolov8n-cls.pt -o yolov8n-cls.wts -t cls
python gen_wts.py -w yolov8n.pt -o yolov8n.wts
python gen_wts.py -w yolov8n-seg.pt -o yolov8n-seg.wts -t seg
python gen_wts.py -w yolov8n-pose.pt -o yolov8n-pose.wts -t pose
# A file 'yolov8n.wts' will be generated.
```

2. build YOLOv8-series-TensorRT10 and run

#### Classification

```shell
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10
# Update kNumClass in src/config.h if your model is trained on custom dataset
mkdir build
cd build
cp [PATH-TO-ultralytics-yolov8]/yolov8sn-cls.wts .
cmake ..
make

# Download ImageNet labels
wget https://github.com/joannzhang00/ImageNet-dataset-classes-labels/blob/main/imagenet_classes.txt

# Build and serialize TensorRT engine
./yolov8_cls -s yolov8n-cls.wts yolov8n-cls.engine [n/s/m/l/x]

# Run inference
./yolov8_cls -d yolov8n-cls.engine ../images
# The results are displayed in the console
```

3. Optional, load and run the tensorrt model in Python
```shell
// Install python-tensorrt, pycuda, etc.
// Ensure the yolov8n-cls.engine
python yolov8_cls_trt.py ./build/yolov8n-cls.engine ../images
# faq: in windows bug pycuda._driver.LogicError
# faq: in linux bug Segmentation fault
# Add the following code to the py file:
# import pycuda.autoinit
# import pycuda.driver as cuda
```

#### Detection

```shell
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10
# Update kNumClass in src/config.h if your model is trained on custom dataset
mkdir build
cd build
cp [PATH-TO-ultralytics-yolov8]/yolov8n.wts .
cmake ..
make

# Build and serialize TensorRT engine
./yolov8_det -s yolov8n.wts yolov8n.engine [n/s/m/l/x]

# Run inference
./yolov8_det -d yolov8n.engine ../images [c/g]
# The results are displayed in the console
```

#### Segmentation

```shell
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10
# Update kNumClass in src/config.h if your model is trained on custom dataset
mkdir build
cd build
cp [PATH-TO-ultralytics-yolov8]/yolov8n-seg.wts .
cmake ..
make

# Build and serialize TensorRT engine
./yolov8_seg -s yolov8n-seg.wts yolov8n-seg.engine [n/s/m/l/x]

# Download the labels file
wget -O coco.txt https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-2014_2017.txt

# Run inference
./yolov8_seg -d yolov8n-seg.engine ../images [c/g] coco.txt
# The results are displayed in the console
```

#### Pose

```shell
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10
# Update kNumClass in src/config.h if your model is trained on custom dataset
mkdir build
cd build
cp [PATH-TO-ultralytics-yolov8]/yolov8n-pose.wts .
cmake ..
make

# Build and serialize TensorRT engine
./yolov8_seg -s yolov8n-pose.wts yolov8n-pose.engine [n/s/m/l/x]

# Run inference
./yolov8_seg -d yolov8n-seg.engine ../images c
# The results are displayed in the console
```

## INT8 Quantization
1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [GoogleDrive](https://drive.google.com/drive/folders/1s7jE9DtOngZMzJC1uL307J2MiaGwdRSI?usp=sharing) or [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
2. unzip it in yolov8_trt10/build
3. set the macro `USE_INT8` in src/config.h and make again
4. serialize the model and test
57 changes: 57 additions & 0 deletions gen_wts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import sys # noqa: F401
import argparse
import os
import struct
import torch


def parse_args():
parser = argparse.ArgumentParser(description='Convert .pt file to .wts')
parser.add_argument('-w', '--weights', required=True,
help='Input weights (.pt) file path (required)')
parser.add_argument(
'-o', '--output', help='Output (.wts) file path (optional)')
parser.add_argument(
'-t', '--type', type=str, default='detect', choices=['detect', 'cls', 'seg', 'pose'],
help='determines the model is detection/classification')
args = parser.parse_args()
if not os.path.isfile(args.weights):
raise SystemExit('Invalid input file')
if not args.output:
args.output = os.path.splitext(args.weights)[0] + '.wts'
elif os.path.isdir(args.output):
args.output = os.path.join(
args.output,
os.path.splitext(os.path.basename(args.weights))[0] + '.wts')
return args.weights, args.output, args.type


pt_file, wts_file, m_type = parse_args()

print(f'Generating .wts for {m_type} model')

# Load model
print(f'Loading {pt_file}')

# Initialize
device = 'cpu'

# Load model
model = torch.load(pt_file, map_location=device)['model'].float() # load to FP32

if m_type in ['detect', 'seg', 'pose']:
anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None]

delattr(model.model[-1], 'anchors')

model.to(device).eval()

with open(wts_file, 'w') as f:
f.write('{}\n'.format(len(model.state_dict().keys())))
for k, v in model.state_dict().items():
vr = v.reshape(-1).cpu().numpy()
f.write('{} {} '.format(k, len(vr)))
for vv in vr:
f.write(' ')
f.write(struct.pack('>f', float(vv)).hex())
f.write('\n')
Binary file added images/bus.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/cat.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/dog.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/zidane.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit f6fff4d

Please sign in to comment.