forked from WongKinYiu/yolov7
-
Notifications
You must be signed in to change notification settings - Fork 1
/
convert_tflite.py
106 lines (95 loc) · 3.69 KB
/
convert_tflite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import onnx
import numpy as np
from numpy import random
import torch
from models.experimental import attempt_load
from utils.general import check_img_size, colorstr, check_dataset
import tensorflow as tf
from tqdm import tqdm
from onnx_tf.backend import prepare
from utils.datasets import create_dataloader
import argparse
import yaml
#Limit GPU memory usage for tensorflow
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu,True)
parser = argparse.ArgumentParser()
parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
parser.add_argument('--data', default='data/VisDrone.yaml', help='path yaml')
parser.add_argument('--load', default='runs/train/yolov7/weights/best.onnx', help='onnx model path')
opt = parser.parse_args()
print(opt)
data = opt.data
# Load PyTorch Model
torch_path = 'runs/train/yolov7/weights/yolov7_best_fp32.pt'
device = torch.device("cuda:1")
model = attempt_load(torch_path, map_location=device) # load FP32 model
stride = int(model.stride.max())
model.eval()
# Load ONNX Model
path = opt.load
onnx_model = onnx.load(path)
tf_path = "/tmp/yolov7_tf"
tf_rep = prepare(onnx_model)
tf_rep.export_graph(tf_path)
# Quantizer
def convert_to_tf(model, precision, quantize="dynamic"):
# Convert ONNX Model to Tensorflow Lite
converter = tf.lite.TFLiteConverter.from_saved_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
if precision == 'fp32':
precision = tf.float32
elif precision == 'fp16':
precision = tf.float16
elif precision == 'int8':
precision = tf.int8
if quantize == "dynamic":
print("Dynamic Quantization")
else:
print("Post Static Quantization")
converter.representative_dataset = representative_data_gen
return converter.convert()
else:
raise ValueError("precision is wrong")
converter.target_spec.supported_types = [precision]
return converter.convert()
# Load Dataset for Post Static Quantization
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
imgsz = 640
batch_size =1
task = 'train'
gs = max(int(model.stride.max()), 32) # grid size (max stride)
imgsz = check_img_size(imgsz, s=gs) # check img_size
with open(data) as f:
data = yaml.load(f, Loader=yaml.SafeLoader)
check_dataset(data) # check
dataloader = create_dataloader(data['train'], imgsz, batch_size, gs, opt, pad=0.5, rect=True,
prefix=colorstr(f'{task}: '))[0]
def representative_data_gen():
for i,(img, _, _, _) in tqdm(enumerate(dataloader)):
img = img.to("cpu", non_blocking=True)
img = img.float()
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if i == 0:
img_shape = img.shape
if img.shape != img_shape:
continue
yield [img]
# if i == 1000:
# break
# yield [img]
tflite_fp32 = convert_to_tf(model=tf_path, precision='fp32')
tflite_fp16 = convert_to_tf(model=tf_path, precision='fp16')
tflite_int8_psq = convert_to_tf(model=tf_path, precision='int8', quantize='psq')
tflite_int8_dq = convert_to_tf(model=tf_path, precision='int8', quantize='dynamic')
with open(path.replace("best.onnx", "best_fp32.tflite"), 'wb') as f:
f.write(tflite_fp32)
with open(path.replace("best.onnx", "best_fp16.tflite"), 'wb') as f:
f.write(tflite_fp16)
with open(path.replace("best.onnx", "best_int8_psq.tflite"), 'wb') as f:
f.write(tflite_int8_psq)
with open(path.replace("best.onnx", "best_int8_dq.tflite"), 'wb') as f:
f.write(tflite_int8_dq)