-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
36 changed files
with
7,684 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,3 +30,9 @@ | |
*.exe | ||
*.out | ||
*.app | ||
|
||
build/ | ||
models/ | ||
cmake-build-debug/ | ||
cmake-build-release/ | ||
.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
cmake_minimum_required(VERSION 3.28) | ||
project(yolov8_trtx_v10) | ||
|
||
set(CMAKE_CXX_STANDARD 11) | ||
# 设置nvcc编译cu文件时候使用utf-8编码 | ||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /utf-8") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8") | ||
|
||
enable_language(CUDA) | ||
|
||
# 设置cuda多个框架支持 | ||
set(CMAKE_CUDA_ARCHITECTURES 75 86 89) | ||
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") | ||
|
||
# OpenCV | ||
set(OpenCV_DIR E:\\Opencv\\install\\opencv-4.8.0\\build) | ||
find_package(OpenCV REQUIRED) | ||
include_directories(${OpenCV_INCLUDE_DIRS}) | ||
link_directories(${OpenCV_LIB_DIR}) | ||
|
||
# CUDA | ||
set(CUDA_TOOLKIT_ROOT_DIR C:\\Program\ Files\\NVIDIA\ GPU\ Computing\ Toolkit\\CUDA\\v11.8) | ||
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include) | ||
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64) | ||
|
||
# TensorRT | ||
#set(TENSORRT_ROOT E:\\TensorRT\\TensorRT-8.6.1.6) | ||
set(TENSORRT_ROOT E:\\TensorRT\\TensorRT-10.2.0.19) | ||
include_directories(${TENSORRT_ROOT}/include) | ||
link_directories(${TENSORRT_ROOT}/lib) | ||
|
||
# 判断TENSORRT_ROOT路径中的version如果路径中第一个.前大于8 | ||
# 获取所有版本文件 | ||
file(GLOB TENSORRT_VERSION_FILES "${TENSORRT_ROOT}/include/NvInferVersion.h") | ||
# 读取版本文件 | ||
file(STRINGS ${TENSORRT_VERSION_FILES} TENSORRT_VERSION_LINES | ||
LIMIT_COUNT 1 # 只读取第一行 | ||
REGEX "#define NV_TENSORRT_MAJOR [0-9]+" # 匹配版本号定义行 | ||
) | ||
message(STATUS " TENSORRT_VERSION_LINES: ${TENSORRT_VERSION_LINES}") | ||
# 解析版本号 | ||
string(REGEX REPLACE "#define NV_TENSORRT_MAJOR ([0-9]+)" "\\1" TENSORRT_VERSION_MAJOR ${TENSORRT_VERSION_LINES}) | ||
message(STATUS " TENSORRT_VERSION_MAJOR: ${TENSORRT_VERSION_MAJOR}") | ||
# 判断版本号是否大于等于10 | ||
if (TENSORRT_VERSION_MAJOR GREATER_EQUAL 10) | ||
message(STATUS " TensorRT version is greater than or equal to 10.") | ||
link_libraries( | ||
opencv_core | ||
opencv_highgui | ||
opencv_imgproc | ||
opencv_imgcodecs | ||
cudart | ||
cublas | ||
nvinfer_10 | ||
) | ||
else () | ||
message(STATUS " TensorRT version is less than 10.") | ||
link_libraries( | ||
opencv_core | ||
opencv_highgui | ||
opencv_imgproc | ||
opencv_imgcodecs | ||
cudart | ||
cublas | ||
nvinfer | ||
) | ||
endif () | ||
|
||
include_directories(${CMAKE_SOURCE_DIR}/include) | ||
include_directories(${CMAKE_SOURCE_DIR}/plugin) | ||
include_directories(${CMAKE_SOURCE_DIR}/src) | ||
link_directories(${CMAKE_SOURCE_DIR}/lib) | ||
|
||
add_definitions(-DNOMINMAX) | ||
|
||
add_definitions(-DAPI_EXPORTS) | ||
|
||
file(GLOB_RECURSE SRCS ${CMAKE_SOURCE_DIR}/src/*.cpp ${CMAKE_SOURCE_DIR}/src/*.cu) | ||
file(GLOB_RECURSE PLUGIN_SRCS ${PROJECT_SOURCE_DIR}/plugin/*.cu) | ||
|
||
add_library(myplugins SHARED ${PLUGIN_SRCS}) | ||
target_link_libraries(myplugins nvinfer_10 nvinfer_plugin_10 cudart) | ||
|
||
add_executable(yolov8_cls yolov8_cls.cpp ${SRCS}) | ||
target_link_libraries(yolov8_cls myplugins) | ||
|
||
add_executable(yolov8_det yolov8_det.cpp ${SRCS}) | ||
target_link_libraries(yolov8_det nvinfer_10) | ||
target_link_libraries(yolov8_det cudart) | ||
target_link_libraries(yolov8_det myplugins) | ||
target_link_libraries(yolov8_det ${OpenCV_LIBS}) | ||
|
||
add_executable(yolov8_seg yolov8_seg.cpp ${SRCS}) | ||
target_link_libraries(yolov8_seg nvinfer_10) | ||
target_link_libraries(yolov8_seg cudart) | ||
target_link_libraries(yolov8_seg myplugins) | ||
target_link_libraries(yolov8_seg ${OpenCV_LIBS}) | ||
|
||
add_executable(yolov8_pose yolov8_pose.cpp ${SRCS}) | ||
target_link_libraries(yolov8_pose nvinfer_10) | ||
target_link_libraries(yolov8_pose cudart) | ||
target_link_libraries(yolov8_pose myplugins) | ||
target_link_libraries(yolov8_pose ${OpenCV_LIBS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
cmake_minimum_required(VERSION 3.10) | ||
|
||
project(yolov8) | ||
|
||
add_definitions(-std=c++11) | ||
add_definitions(-DAPI_EXPORTS) | ||
set(CMAKE_CXX_STANDARD 11) | ||
set(CMAKE_BUILD_TYPE Debug) | ||
|
||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc) | ||
enable_language(CUDA) | ||
|
||
include_directories(${PROJECT_SOURCE_DIR}/include) | ||
include_directories(${PROJECT_SOURCE_DIR}/plugin) | ||
|
||
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different | ||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") | ||
message("embed_platform on") | ||
include_directories(/usr/local/cuda/targets/aarch64-linux/include) | ||
link_directories(/usr/local/cuda/targets/aarch64-linux/lib) | ||
else() | ||
message("embed_platform off") | ||
|
||
# cuda | ||
include_directories(/usr/local/cuda/include) | ||
link_directories(/usr/local/cuda/lib64) | ||
|
||
# tensorrt | ||
include_directories(/workspace/shared/TensorRT-10.2.0.19/include/) | ||
link_directories(/workspace/shared/TensorRT-10.2.0.19/lib/) | ||
|
||
# include_directories(/home/lindsay/TensorRT-7.2.3.4/include) | ||
# link_directories(/home/lindsay/TensorRT-7.2.3.4/lib) | ||
endif() | ||
|
||
add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu) | ||
target_link_libraries(myplugins nvinfer cudart) | ||
|
||
find_package(OpenCV) | ||
include_directories(${OpenCV_INCLUDE_DIRS}) | ||
|
||
file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu) | ||
add_executable(yolov8_det ${PROJECT_SOURCE_DIR}/yolov8_det.cpp ${SRCS}) | ||
|
||
target_link_libraries(yolov8_det nvinfer) | ||
target_link_libraries(yolov8_det cudart) | ||
target_link_libraries(yolov8_det myplugins) | ||
target_link_libraries(yolov8_det ${OpenCV_LIBS}) | ||
|
||
add_executable(yolov8_seg ${PROJECT_SOURCE_DIR}/yolov8_seg.cpp ${SRCS}) | ||
target_link_libraries(yolov8_seg nvinfer cudart myplugins ${OpenCV_LIBS}) | ||
|
||
add_executable(yolov8_pose ${PROJECT_SOURCE_DIR}/yolov8_pose.cpp ${SRCS}) | ||
target_link_libraries(yolov8_pose nvinfer cudart myplugins ${OpenCV_LIBS}) | ||
|
||
add_executable(yolov8_cls ${PROJECT_SOURCE_DIR}/yolov8_cls.cpp ${SRCS}) | ||
target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
## Introduce | ||
|
||
Yolov8 model supports TensorRT-10. | ||
|
||
## Environment | ||
|
||
CUDA: 11.8 | ||
CUDNN: 8.9.1.23 | ||
TensorRT: TensorRT-10.2.0.19 | ||
|
||
## Support | ||
|
||
* [x] YOLOv8-cls support FP32/FP16/INT8 and Python/C++ API | ||
* [x] YOLOv8-det support FP32/FP16/INT8 and Python/C++ API | ||
* [x] YOLOv8-seg support FP32/FP16/INT8 and Python/C++ API | ||
* [x] YOLOv8-pose support FP32/FP16/INT8 and Python/C++ API | ||
|
||
## Config | ||
|
||
* Choose the YOLOv8 sub-model n/s/m/l/x/n6/s6/m6/l6/x6 from command line arguments. | ||
* Other configs please check [src/config.h](src/config.h) | ||
|
||
## Build and Run | ||
|
||
1. generate .wts from pytorch with .pt, or download .wts from model zoo | ||
|
||
```shell | ||
git clone https://gitclone.com/github.com/ultralytics/ultralytics.git | ||
git clone https://github.com/mpj1234/YOLOv8-series-TensorRT10.git | ||
cd YOLOv8-series-TensorRT10/ | ||
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-cls.pt | ||
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt | ||
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt | ||
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt | ||
cp [PATH-TO-YOLOv8-series-TensorRT10]/yolov8/gen_wts.py . | ||
python gen_wts.py -w yolov8n-cls.pt -o yolov8n-cls.wts -t cls | ||
python gen_wts.py -w yolov8n.pt -o yolov8n.wts | ||
python gen_wts.py -w yolov8n-seg.pt -o yolov8n-seg.wts -t seg | ||
python gen_wts.py -w yolov8n-pose.pt -o yolov8n-pose.wts -t pose | ||
# A file 'yolov8n.wts' will be generated. | ||
``` | ||
|
||
2. build YOLOv8-series-TensorRT10 and run | ||
|
||
#### Classification | ||
|
||
```shell | ||
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10 | ||
# Update kNumClass in src/config.h if your model is trained on custom dataset | ||
mkdir build | ||
cd build | ||
cp [PATH-TO-ultralytics-yolov8]/yolov8sn-cls.wts . | ||
cmake .. | ||
make | ||
|
||
# Download ImageNet labels | ||
wget https://github.com/joannzhang00/ImageNet-dataset-classes-labels/blob/main/imagenet_classes.txt | ||
|
||
# Build and serialize TensorRT engine | ||
./yolov8_cls -s yolov8n-cls.wts yolov8n-cls.engine [n/s/m/l/x] | ||
|
||
# Run inference | ||
./yolov8_cls -d yolov8n-cls.engine ../images | ||
# The results are displayed in the console | ||
``` | ||
|
||
3. Optional, load and run the tensorrt model in Python | ||
```shell | ||
// Install python-tensorrt, pycuda, etc. | ||
// Ensure the yolov8n-cls.engine | ||
python yolov8_cls_trt.py ./build/yolov8n-cls.engine ../images | ||
# faq: in windows bug pycuda._driver.LogicError | ||
# faq: in linux bug Segmentation fault | ||
# Add the following code to the py file: | ||
# import pycuda.autoinit | ||
# import pycuda.driver as cuda | ||
``` | ||
|
||
#### Detection | ||
|
||
```shell | ||
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10 | ||
# Update kNumClass in src/config.h if your model is trained on custom dataset | ||
mkdir build | ||
cd build | ||
cp [PATH-TO-ultralytics-yolov8]/yolov8n.wts . | ||
cmake .. | ||
make | ||
|
||
# Build and serialize TensorRT engine | ||
./yolov8_det -s yolov8n.wts yolov8n.engine [n/s/m/l/x] | ||
|
||
# Run inference | ||
./yolov8_det -d yolov8n.engine ../images [c/g] | ||
# The results are displayed in the console | ||
``` | ||
|
||
#### Segmentation | ||
|
||
```shell | ||
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10 | ||
# Update kNumClass in src/config.h if your model is trained on custom dataset | ||
mkdir build | ||
cd build | ||
cp [PATH-TO-ultralytics-yolov8]/yolov8n-seg.wts . | ||
cmake .. | ||
make | ||
|
||
# Build and serialize TensorRT engine | ||
./yolov8_seg -s yolov8n-seg.wts yolov8n-seg.engine [n/s/m/l/x] | ||
|
||
# Download the labels file | ||
wget -O coco.txt https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-2014_2017.txt | ||
|
||
# Run inference | ||
./yolov8_seg -d yolov8n-seg.engine ../images [c/g] coco.txt | ||
# The results are displayed in the console | ||
``` | ||
|
||
#### Pose | ||
|
||
```shell | ||
cd [PATH-TO-YOLOv8-series-TensorRT10]/YOLOv8-series-TensorRT10 | ||
# Update kNumClass in src/config.h if your model is trained on custom dataset | ||
mkdir build | ||
cd build | ||
cp [PATH-TO-ultralytics-yolov8]/yolov8n-pose.wts . | ||
cmake .. | ||
make | ||
|
||
# Build and serialize TensorRT engine | ||
./yolov8_seg -s yolov8n-pose.wts yolov8n-pose.engine [n/s/m/l/x] | ||
|
||
# Run inference | ||
./yolov8_seg -d yolov8n-seg.engine ../images c | ||
# The results are displayed in the console | ||
``` | ||
|
||
## INT8 Quantization | ||
1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [GoogleDrive](https://drive.google.com/drive/folders/1s7jE9DtOngZMzJC1uL307J2MiaGwdRSI?usp=sharing) or [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh | ||
2. unzip it in yolov8_trt10/build | ||
3. set the macro `USE_INT8` in src/config.h and make again | ||
4. serialize the model and test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import sys # noqa: F401 | ||
import argparse | ||
import os | ||
import struct | ||
import torch | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description='Convert .pt file to .wts') | ||
parser.add_argument('-w', '--weights', required=True, | ||
help='Input weights (.pt) file path (required)') | ||
parser.add_argument( | ||
'-o', '--output', help='Output (.wts) file path (optional)') | ||
parser.add_argument( | ||
'-t', '--type', type=str, default='detect', choices=['detect', 'cls', 'seg', 'pose'], | ||
help='determines the model is detection/classification') | ||
args = parser.parse_args() | ||
if not os.path.isfile(args.weights): | ||
raise SystemExit('Invalid input file') | ||
if not args.output: | ||
args.output = os.path.splitext(args.weights)[0] + '.wts' | ||
elif os.path.isdir(args.output): | ||
args.output = os.path.join( | ||
args.output, | ||
os.path.splitext(os.path.basename(args.weights))[0] + '.wts') | ||
return args.weights, args.output, args.type | ||
|
||
|
||
pt_file, wts_file, m_type = parse_args() | ||
|
||
print(f'Generating .wts for {m_type} model') | ||
|
||
# Load model | ||
print(f'Loading {pt_file}') | ||
|
||
# Initialize | ||
device = 'cpu' | ||
|
||
# Load model | ||
model = torch.load(pt_file, map_location=device)['model'].float() # load to FP32 | ||
|
||
if m_type in ['detect', 'seg', 'pose']: | ||
anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None] | ||
|
||
delattr(model.model[-1], 'anchors') | ||
|
||
model.to(device).eval() | ||
|
||
with open(wts_file, 'w') as f: | ||
f.write('{}\n'.format(len(model.state_dict().keys()))) | ||
for k, v in model.state_dict().items(): | ||
vr = v.reshape(-1).cpu().numpy() | ||
f.write('{} {} '.format(k, len(vr))) | ||
for vv in vr: | ||
f.write(' ') | ||
f.write(struct.pack('>f', float(vv)).hex()) | ||
f.write('\n') |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.