-
Notifications
You must be signed in to change notification settings - Fork 103
/
Copy pathimresize.py
227 lines (169 loc) · 11.8 KB
/
imresize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import numpy as np
from scipy.ndimage import filters, measurements, interpolation
from math import pi
def imresize(im, scale_factor=None, output_shape=None, kernel=None, antialiasing=True, kernel_shift_flag=False):
# First standardize values and fill missing arguments (if needed) by deriving scale from output shape or vice versa
scale_factor, output_shape = fix_scale_and_size(im.shape, output_shape, scale_factor)
# For a given numeric kernel case, just do convolution and sub-sampling (downscaling only)
if type(kernel) == np.ndarray and scale_factor[0] <= 1:
return numeric_kernel(im, kernel, scale_factor, output_shape, kernel_shift_flag)
# Choose interpolation method, each method has the matching kernel size
method, kernel_width = {
"cubic": (cubic, 4.0),
"lanczos2": (lanczos2, 4.0),
"lanczos3": (lanczos3, 6.0),
"box": (box, 1.0),
"linear": (linear, 2.0),
None: (cubic, 4.0) # set default interpolation method as cubic
}.get(kernel)
# Antialiasing is only used when downscaling
antialiasing *= (scale_factor[0] < 1)
# Sort indices of dimensions according to scale of each dimension. since we are going dim by dim this is efficient
sorted_dims = np.argsort(np.array(scale_factor)).tolist()
# Iterate over dimensions to calculate local weights for resizing and resize each time in one direction
out_im = np.copy(im)
for dim in sorted_dims:
# No point doing calculations for scale-factor 1. nothing will happen anyway
if scale_factor[dim] == 1.0:
continue
# for each coordinate (along 1 dim), calculate which coordinates in the input image affect its result and the
# weights that multiply the values there to get its result.
weights, field_of_view = contributions(im.shape[dim], output_shape[dim], scale_factor[dim],
method, kernel_width, antialiasing)
# Use the affecting position values and the set of weights to calculate the result of resizing along this 1 dim
out_im = resize_along_dim(out_im, dim, weights, field_of_view)
return out_im
def fix_scale_and_size(input_shape, output_shape, scale_factor):
# First fixing the scale-factor (if given) to be standardized the function expects (a list of scale factors in the
# same size as the number of input dimensions)
if scale_factor is not None:
# By default, if scale-factor is a scalar we assume 2d resizing and duplicate it.
if np.isscalar(scale_factor):
scale_factor = [scale_factor, scale_factor]
# We extend the size of scale-factor list to the size of the input by assigning 1 to all the unspecified scales
scale_factor = list(scale_factor)
scale_factor.extend([1] * (len(input_shape) - len(scale_factor)))
# Fixing output-shape (if given): extending it to the size of the input-shape, by assigning the original input-size
# to all the unspecified dimensions
if output_shape is not None:
output_shape = list(np.uint(np.array(output_shape))) + list(input_shape[len(output_shape):])
# Dealing with the case of non-give scale-factor, calculating according to output-shape. note that this is
# sub-optimal, because there can be different scales to the same output-shape.
if scale_factor is None:
scale_factor = 1.0 * np.array(output_shape) / np.array(input_shape)
# Dealing with missing output-shape. calculating according to scale-factor
if output_shape is None:
output_shape = np.uint(np.ceil(np.array(input_shape) * np.array(scale_factor)))
return scale_factor, output_shape
def contributions(in_length, out_length, scale, kernel, kernel_width, antialiasing):
# This function calculates a set of 'filters' and a set of field_of_view that will later on be applied
# such that each position from the field_of_view will be multiplied with a matching filter from the
# 'weights' based on the interpolation method and the distance of the sub-pixel location from the pixel centers
# around it. This is only done for one dimension of the image.
# When anti-aliasing is activated (default and only for downscaling) the receptive field is stretched to size of
# 1/sf. this means filtering is more 'low-pass filter'.
fixed_kernel = (lambda arg: scale * kernel(scale * arg)) if antialiasing else kernel
kernel_width *= 1.0 / scale if antialiasing else 1.0
# These are the coordinates of the output image
out_coordinates = np.arange(1, out_length+1)
# These are the matching positions of the output-coordinates on the input image coordinates.
# Best explained by example: say we have 4 horizontal pixels for HR and we downscale by SF=2 and get 2 pixels:
# [1,2,3,4] -> [1,2]. Remember each pixel number is the middle of the pixel.
# The scaling is done between the distances and not pixel numbers (the right boundary of pixel 4 is transformed to
# the right boundary of pixel 2. pixel 1 in the small image matches the boundary between pixels 1 and 2 in the big
# one and not to pixel 2. This means the position is not just multiplication of the old pos by scale-factor).
# So if we measure distance from the left border, middle of pixel 1 is at distance d=0.5, border between 1 and 2 is
# at d=1, and so on (d = p - 0.5). we calculate (d_new = d_old / sf) which means:
# (p_new-0.5 = (p_old-0.5) / sf) -> p_new = p_old/sf + 0.5 * (1-1/sf)
match_coordinates = 1.0 * out_coordinates / scale + 0.5 * (1 - 1.0 / scale)
# This is the left boundary to start multiplying the filter from, it depends on the size of the filter
left_boundary = np.floor(match_coordinates - kernel_width / 2)
# Kernel width needs to be enlarged because when covering has sub-pixel borders, it must 'see' the pixel centers
# of the pixels it only covered a part from. So we add one pixel at each side to consider (weights can zeroize them)
expanded_kernel_width = np.ceil(kernel_width) + 2
# Determine a set of field_of_view for each each output position, these are the pixels in the input image
# that the pixel in the output image 'sees'. We get a matrix whos horizontal dim is the output pixels (big) and the
# vertical dim is the pixels it 'sees' (kernel_size + 2)
field_of_view = np.squeeze(np.uint(np.expand_dims(left_boundary, axis=1) + np.arange(expanded_kernel_width) - 1))
# Assign weight to each pixel in the field of view. A matrix whos horizontal dim is the output pixels and the
# vertical dim is a list of weights matching to the pixel in the field of view (that are specified in
# 'field_of_view')
weights = fixed_kernel(1.0 * np.expand_dims(match_coordinates, axis=1) - field_of_view - 1)
# Normalize weights to sum up to 1. be careful from dividing by 0
sum_weights = np.sum(weights, axis=1)
sum_weights[sum_weights == 0] = 1.0
weights = 1.0 * weights / np.expand_dims(sum_weights, axis=1)
# We use this mirror structure as a trick for reflection padding at the boundaries
mirror = np.uint(np.concatenate((np.arange(in_length), np.arange(in_length - 1, -1, step=-1))))
field_of_view = mirror[np.mod(field_of_view, mirror.shape[0])]
# Get rid of weights and pixel positions that are of zero weight
non_zero_out_pixels = np.nonzero(np.any(weights, axis=0))
weights = np.squeeze(weights[:, non_zero_out_pixels])
field_of_view = np.squeeze(field_of_view[:, non_zero_out_pixels])
# Final products are the relative positions and the matching weights, both are output_size X fixed_kernel_size
return weights, field_of_view
def resize_along_dim(im, dim, weights, field_of_view):
# To be able to act on each dim, we swap so that dim 0 is the wanted dim to resize
tmp_im = np.swapaxes(im, dim, 0)
# We add singleton dimensions to the weight matrix so we can multiply it with the big tensor we get for
# tmp_im[field_of_view.T], (bsxfun style)
weights = np.reshape(weights.T, list(weights.T.shape) + (np.ndim(im) - 1) * [1])
# This is a bit of a complicated multiplication: tmp_im[field_of_view.T] is a tensor of order image_dims+1.
# for each pixel in the output-image it matches the positions the influence it from the input image (along 1 dim
# only, this is why it only adds 1 dim to the shape). We then multiply, for each pixel, its set of positions with
# the matching set of weights. we do this by this big tensor element-wise multiplication (MATLAB bsxfun style:
# matching dims are multiplied element-wise while singletons mean that the matching dim is all multiplied by the
# same number
tmp_out_im = np.sum(tmp_im[field_of_view.T] * weights, axis=0)
# Finally we swap back the axes to the original order
return np.swapaxes(tmp_out_im, dim, 0)
def numeric_kernel(im, kernel, scale_factor, output_shape, kernel_shift_flag):
# See kernel_shift function to understand what this is
if kernel_shift_flag:
kernel = kernel_shift(kernel, scale_factor)
# First run a correlation (convolution with flipped kernel)
out_im = np.zeros_like(im)
for channel in range(np.ndim(im)):
out_im[:, :, channel] = filters.correlate(im[:, :, channel], kernel)
# Then subsample and return
return out_im[np.round(np.linspace(0, im.shape[0] - 1 / scale_factor[0], output_shape[0])).astype(int)[:, None],
np.round(np.linspace(0, im.shape[1] - 1 / scale_factor[1], output_shape[1])).astype(int), :]
def kernel_shift(kernel, sf):
# There are two reasons for shifting the kernel:
# 1. Center of mass is not in the center of the kernel which creates ambiguity. There is no possible way to know
# the degradation process included shifting so we always assume center of mass is center of the kernel.
# 2. We further shift kernel center so that top left result pixel corresponds to the middle of the sfXsf first
# pixels. Default is for odd size to be in the middle of the first pixel and for even sized kernel to be at the
# top left corner of the first pixel. that is why different shift size needed between od and even size.
# Given that these two conditions are fulfilled, we are happy and aligned, the way to test it is as follows:
# The input image, when interpolated (regular bicubic) is exactly aligned with ground truth.
# First calculate the current center of mass for the kernel
current_center_of_mass = measurements.center_of_mass(kernel)
# The second ("+ 0.5 * ....") is for applying condition 2 from the comments above
wanted_center_of_mass = np.array(kernel.shape) / 2 + 0.5 * (sf - (kernel.shape[0] % 2))
# Define the shift vector for the kernel shifting (x,y)
shift_vec = wanted_center_of_mass - current_center_of_mass
# Before applying the shift, we first pad the kernel so that nothing is lost due to the shift
# (biggest shift among dims + 1 for safety)
kernel = np.pad(kernel, np.int(np.ceil(np.max(shift_vec))) + 1, 'constant')
# Finally shift the kernel and return
return interpolation.shift(kernel, shift_vec)
# These next functions are all interpolation methods. x is the distance from the left pixel center
def cubic(x):
absx = np.abs(x)
absx2 = absx ** 2
absx3 = absx ** 3
return ((1.5*absx3 - 2.5*absx2 + 1) * (absx <= 1) +
(-0.5*absx3 + 2.5*absx2 - 4*absx + 2) * ((1 < absx) & (absx <= 2)))
def lanczos2(x):
return (((np.sin(pi*x) * np.sin(pi*x/2) + np.finfo(np.float32).eps) /
((pi**2 * x**2 / 2) + np.finfo(np.float32).eps))
* (abs(x) < 2))
def box(x):
return ((-0.5 <= x) & (x < 0.5)) * 1.0
def lanczos3(x):
return (((np.sin(pi*x) * np.sin(pi*x/3) + np.finfo(np.float32).eps) /
((pi**2 * x**2 / 3) + np.finfo(np.float32).eps))
* (abs(x) < 3))
def linear(x):
return (x + 1) * ((-1 <= x) & (x < 0)) + (1 - x) * ((0 <= x) & (x <= 1))