forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaffine_quantizer.h
152 lines (134 loc) · 4.08 KB
/
affine_quantizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#pragma once
#include <ATen/ATen.h>
#include <ATen/native/DispatchStub.h>
namespace at {
namespace native {
Tensor quantize_tensor_per_tensor_affine(
Tensor rtensor,
Tensor qtensor,
double scale,
int64_t zero_point);
Tensor quantize_tensor_per_channel_affine(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
Tensor quantize_tensor_per_channel_float_qparams(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
Tensor dequantize_tensor_per_tensor_affine(
Tensor qtensor,
Tensor rtensor,
double scale,
int64_t zero_point);
Tensor dequantize_tensor_per_channel_affine(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
Tensor dequantize_tensor_per_channel_float_qparams(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
using quantize_tensor_per_tensor_affine_fn =
void (*)(Tensor rtensor, Tensor qtensor, double scale, int64_t zero_point);
using quantize_tensor_per_channel_affine_fn = void (*)(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
using quantize_tensor_per_channel_float_qparams_fn = void (*)(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
using dequantize_tensor_per_tensor_affine_fn =
void (*)(Tensor qtensor, Tensor rtensor, double scale, int64_t zero_point);
using dequantize_tensor_per_channel_affine_fn = void (*)(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
using dequantize_tensor_per_channel_float_qparams_fn = void (*)(
Tensor qtensor,
Tensor rtensor,
Tensor scales,
Tensor zero_points,
int64_t axis);
DECLARE_DISPATCH(
quantize_tensor_per_tensor_affine_fn,
quantize_tensor_per_tensor_affine_stub);
DECLARE_DISPATCH(
quantize_tensor_per_channel_affine_fn,
quantize_tensor_per_channel_affine_stub);
DECLARE_DISPATCH(
quantize_tensor_per_channel_float_qparams_fn,
quantize_tensor_per_channel_float_qparams_stub);
DECLARE_DISPATCH(
dequantize_tensor_per_tensor_affine_fn,
dequantize_tensor_per_tensor_affine_stub);
DECLARE_DISPATCH(
dequantize_tensor_per_channel_affine_fn,
dequantize_tensor_per_channel_affine_stub);
DECLARE_DISPATCH(
dequantize_tensor_per_channel_float_qparams_fn,
dequantize_tensor_per_channel_float_qparams_stub);
// Quantize a float value into a uint value given scale and zero_point
template <typename T>
CAFFE2_API T quantize_val(double scale, int64_t zero_point, float value);
// TODO combine this with quantize_val once the numerics for ARM are aligned
// with it
uint8_t quantize_val_arm(
const float scale,
const int32_t zero_point,
const float value);
template <typename T, int precision = 8>
void quantize_vec(
double scale,
int64_t zero_point,
const float* src,
T* dst,
size_t count = 8);
template <typename T>
CAFFE2_API Tensor quantize_tensor(
Tensor rtensor,
Tensor qtensor,
double scale,
int64_t zero_point);
template <typename T>
CAFFE2_API float dequantize_val(double scale, int64_t zero_point, T value);
template <typename T>
CAFFE2_API float dequantize_vec(
double scale,
int64_t zero_point,
const T* src,
float* dst,
size_t count = 8);
template <typename T>
CAFFE2_API Tensor dequantize_tensor(
Tensor qtensor,
Tensor rtensor,
double scale,
int64_t zero_point);
template <typename SRC_T, typename DST_T>
CAFFE2_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src);
// Given a multiplier and a zero_point, requantize int32_t computed values back
// to quantized values. See comment above
// make_per_tensor_affine_quantizer function for the usage of int64_t
template <typename DST_T>
CAFFE2_API DST_T
requantize_from_int(double multiplier, int64_t zero_point, int64_t src);
template <typename T>
CAFFE2_API T quantize_val_float_qparams(float scale, float zero_point, float value);
} // namespace native
} // namespace at