-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support for explicit padding and dilations in 2D CNN layers #138
base: main
Are you sure you want to change the base?
Changes from all commits
46fcb28
abf7a4d
149bc30
6d0cf77
668f192
4a83a22
0c7a1ce
f47907c
8639bd4
623aaf6
b9603e3
8bc6e2f
8be2198
66d28cd
0d40529
97ba41b
3e4807b
e17df8a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -176,13 +176,15 @@ def _visit_node(self, node, next_nodes): | |
|
||
def _consume_dense_nodes(self, node, next_nodes): | ||
"""Starting from a MatMul node, consume nodes to form a dense Ax + b node.""" | ||
# This should only be called when we know we have a starting MatMul node. This | ||
# error indicates a bug in the function calling this one. | ||
if node.op_type != "MatMul": | ||
raise ValueError( | ||
f"{node.name} is a {node.op_type} node, only MatMul nodes can be used as starting points for consumption." | ||
f"{node.name} is a {node.op_type} node, but the method for parsing MatMul nodes was invoked." | ||
) | ||
if len(node.input) != 2: | ||
raise ValueError( | ||
f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 input dimensions can be used as starting points for consumption." | ||
f"{node.name} input has {len(node.input)} dimensions, but the parser requires the starting node to have 2 input dimensions." | ||
) | ||
|
||
[in_0, in_1] = list(node.input) | ||
|
@@ -200,7 +202,7 @@ def _consume_dense_nodes(self, node, next_nodes): | |
raise TypeError(f"Expected a node next, got a {type_} instead.") | ||
if node.op_type != "Add": | ||
raise ValueError( | ||
f"The first node to be consumed, {node.name}, is a {node.op_type} node. Only Add nodes are supported." | ||
f"The next node to be parsed, {node.name}, is a {node.op_type} node. Only Add nodes are supported." | ||
) | ||
|
||
# extract biases | ||
|
@@ -255,11 +257,11 @@ def _consume_gemm_dense_nodes(self, node, next_nodes): | |
"""Starting from a Gemm node, consume nodes to form a dense aAB + bC node.""" | ||
if node.op_type != "Gemm": | ||
raise ValueError( | ||
f"{node.name} is a {node.op_type} node, only Gemm nodes can be used as starting points for consumption." | ||
f"{node.name} is a {node.op_type} node, but the method for parsing Gemm nodes was invoked." | ||
) | ||
if len(node.input) != 3: | ||
raise ValueError( | ||
f"{node.name} input has {len(node.input)} dimensions, only nodes with 3 input dimensions can be used as starting points for consumption." | ||
f"{node.name} input has {len(node.input)} dimensions, but the parser requires the starting node to have 3 input dimensions." | ||
) | ||
|
||
attr = _collect_attributes(node) | ||
|
@@ -310,11 +312,11 @@ def _consume_conv_nodes(self, node, next_nodes): | |
""" | ||
if node.op_type != "Conv": | ||
raise ValueError( | ||
f"{node.name} is a {node.op_type} node, only Conv nodes can be used as starting points for consumption." | ||
f"{node.name} is a {node.op_type} node, but the method for parsing Conv nodes was invoked." | ||
) | ||
if len(node.input) not in [2, 3]: | ||
raise ValueError( | ||
f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 or 3 input dimensions can be used as starting points for consumption." | ||
f"{node.name} input has {len(node.input)} dimensions, but the parser requires the starting node to have 2 or 3 input dimensions." | ||
) | ||
|
||
if len(node.input) == 2: | ||
|
@@ -359,25 +361,32 @@ def _consume_conv_nodes(self, node, next_nodes): | |
f"Input/output size ({input_output_size}) first dimension must match input weights channels ({in_channels})." | ||
) | ||
|
||
# TODO: need to check pads and dilations also have correct dimensions. Also should | ||
# add support for autopad. | ||
if "pads" in attr: | ||
pads = attr["pads"] | ||
else: | ||
pads = 2 * (len(input_output_size) - 1) * [0] | ||
|
||
if "dilations" in attr: | ||
dilations = attr["dilations"] | ||
else: | ||
dilations = (len(input_output_size) - 1) * [1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line needs test coverage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Coverage added to the non-dilation case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this PR already adds tests with dilations, old tests already cover cases without dilations. We can check the coverage after the CI issue is fixed. |
||
|
||
# Other attributes are not supported | ||
if "dilations" in attr and attr["dilations"] != [1, 1]: | ||
raise ValueError( | ||
f"{node} has non-identity dilations ({attr['dilations']}). This is not supported." | ||
) | ||
if attr["group"] != 1: | ||
raise ValueError( | ||
f"{node} has multiple groups ({attr['group']}). This is not supported." | ||
) | ||
if "pads" in attr and np.any(attr["pads"]): | ||
raise ValueError( | ||
f"{node} has non-zero pads ({attr['pads']}). This is not supported." | ||
) | ||
|
||
# generate new nodes for the node output | ||
padding = 0 | ||
padding = [ | ||
pads[i] + pads[i + len(input_output_size) - 1] | ||
for i in range(len(input_output_size) - 1) | ||
] | ||
output_size = [out_channels] | ||
for w, k, s in zip(input_output_size[1:], kernel_shape, strides): | ||
new_w = int((w - k + 2 * padding) / s) + 1 | ||
for w, k, s, p in zip(input_output_size[1:], kernel_shape, strides, padding): | ||
new_w = int((w - k + p) / s) + 1 | ||
output_size.append(new_w) | ||
|
||
activation = "linear" | ||
|
@@ -401,6 +410,8 @@ def _consume_conv_nodes(self, node, next_nodes): | |
output_size, | ||
strides, | ||
weights, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=transformer, | ||
) | ||
|
@@ -413,11 +424,11 @@ def _consume_reshape_nodes(self, node, next_nodes): | |
"""Parse a Reshape node.""" | ||
if node.op_type != "Reshape": | ||
raise ValueError( | ||
f"{node.name} is a {node.op_type} node, only Reshape nodes can be used as starting points for consumption." | ||
f"{node.name} is a {node.op_type} node, but the method for parsing Reshape nodes was invoked." | ||
) | ||
if len(node.input) != 2: | ||
raise ValueError( | ||
f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 input dimensions can be used as starting points for consumption." | ||
f"{node.name} input has {len(node.input)} dimensions, but the parser requires the starting node to have 2 input dimensions." | ||
) | ||
[in_0, in_1] = list(node.input) | ||
input_layer = self._node_map[in_0] | ||
|
@@ -434,7 +445,7 @@ def _consume_pool_nodes(self, node, next_nodes): | |
""" | ||
if node.op_type not in _POOLING_OP_TYPES: | ||
raise ValueError( | ||
f"{node.name} is a {node.op_type} node, only MaxPool nodes can be used as starting points for consumption." | ||
f"{node.name} is a {node.op_type} node, but the method for parsing MaxPool nodes was invoked." | ||
) | ||
pool_func_name = "max" | ||
|
||
|
@@ -445,7 +456,7 @@ def _consume_pool_nodes(self, node, next_nodes): | |
) | ||
if len(node.input) != 1: | ||
raise ValueError( | ||
f"{node.name} input has {len(node.input)} dimensions, only nodes with 1 input dimension can be used as starting points for consumption." | ||
f"{node.name} input has {len(node.input)} dimensions, but the parser requires the starting node to have 1 input dimension." | ||
) | ||
|
||
input_layer, transformer = self._node_input_and_transformer(node.input[0]) | ||
|
@@ -464,20 +475,14 @@ def _consume_pool_nodes(self, node, next_nodes): | |
in_channels = input_output_size[0] | ||
|
||
attr = _collect_attributes(node) | ||
kernel_depth = attr["kernel_shape"][0] | ||
kernel_depth = in_channels | ||
kernel_shape = attr["kernel_shape"][1:] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The kernel size for maxpool2d does not include input/output channel. There are only two options for kernel size: (i) one integer k, then the kernel size is [1, 1, k, k]; (ii) two integers [r, c], then the kernel size is [1, 1, r, c]. We need to change lines 478-479 to get correct kernel_depth (which should be 1) and kernel_shape (which should be [k, k] or [r, c]). After fixing that, the checking in 490-492 makes sense. Otherwise, we will get the error message like "Kernel shape [4] has 1 dimensions. Strides attribute has 2 dimensions. These must be equal." There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do I read the correct kernel_depth and kernel_shape from the node attribute? Do I need to count the dimensions to determine whether it's been given as k, [r,c], or [1,1,r,c]? Or do I just have the indices wrong here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not need to determine which case since ONNX already standardizes the dimensions for kernel. For maxpooling2d, the node atttribute will give [r, c] (or [k,k] for case(i)), so we just need to define kernel_shape as attr["kernel_shape"]. Since the output channels equal to the input channels, just define kernel_depth as in_channels will be fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (This code predates this PR.) I've now set kernel_depth to in_channels. In the maxpool_2d.onnx file used in the test_onnx_parser/test_maxpool test, attr["kernel_shape"] for node1 is (3, 2, 3). I'm not sure what each of these dimensions represents, but if I take all 3 it fails. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think kernel_shape = attr["kernel_shape"] since the first dimension is no longer the depth in maxpooling. Can you put some tests on this part to see which one is correct? |
||
strides = attr["strides"] if "strides" in attr else [1] * len(kernel_shape) | ||
pads = attr["pads"] if "pads" in attr else None | ||
dilations = attr["dilations"] if "dilations" in attr else None | ||
|
||
# check only kernel shape, stride, storage order are set | ||
# everything else is not supported | ||
if "dilations" in attr and attr["dilations"] != [1, 1]: | ||
raise ValueError( | ||
f"{node.name} has non-identity dilations ({attr['dilations']}). This is not supported." | ||
) | ||
if "pads" in attr and np.any(attr["pads"]): | ||
raise ValueError( | ||
f"{node.name} has non-zero pads ({attr['pads']}). This is not supported." | ||
) | ||
if ("auto_pad" in attr) and (attr["auto_pad"] != "NOTSET"): | ||
raise ValueError( | ||
f"{node.name} has autopad set ({attr['auto_pad']}). This is not supported." | ||
|
@@ -519,6 +524,8 @@ def _consume_pool_nodes(self, node, next_nodes): | |
pool_func_name, | ||
tuple(kernel_shape), | ||
kernel_depth, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=transformer, | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -402,6 +402,10 @@ class Layer2D(Layer): | |
the size of the output. | ||
strides : matrix-like | ||
stride of the kernel. | ||
pads : matrix-like | ||
Padding for the kernel. Given as [left, bottom, right, top] | ||
dilations : matrix-like | ||
Dilations of the kernel | ||
activation : str or None | ||
activation function name | ||
input_index_mapper : IndexMapper or None | ||
|
@@ -414,6 +418,8 @@ def __init__( | |
output_size, | ||
strides, | ||
*, | ||
pads=None, | ||
dilations=None, | ||
activation=None, | ||
input_index_mapper=None, | ||
): | ||
|
@@ -424,12 +430,25 @@ def __init__( | |
input_index_mapper=input_index_mapper, | ||
) | ||
self.__strides = strides | ||
if pads is None: | ||
self.__pads = [0, 0, 0, 0] | ||
else: | ||
self.__pads = pads | ||
if dilations is None: | ||
self.__dilations = [1, 1] | ||
else: | ||
self.__dilations = dilations | ||
|
||
@property | ||
def strides(self): | ||
"""Return the stride of the layer""" | ||
return self.__strides | ||
|
||
@property | ||
def pads(self): | ||
"""Return the padding of the layer""" | ||
return self.__pads | ||
|
||
@property | ||
def kernel_shape(self): | ||
"""Return the shape of the kernel""" | ||
|
@@ -440,6 +459,20 @@ def kernel_depth(self): | |
"""Return the depth of the kernel""" | ||
raise NotImplementedError() | ||
|
||
@property | ||
def dilations(self): | ||
"""Return the kernel dilation of the layer""" | ||
return self.__dilations | ||
|
||
@property | ||
def dilated_kernel_shape(self): | ||
"""Return the shape of the kernel after dilation""" | ||
dilated_dims = [ | ||
self.dilations[i] * (self.kernel_shape[i] - 1) + 1 | ||
for i in range(len(self.kernel_shape)) | ||
] | ||
return tuple(dilated_dims) | ||
|
||
def kernel_index_with_input_indexes(self, out_d, out_r, out_c): | ||
""" | ||
Returns an iterator over the index within the kernel and input index | ||
|
@@ -455,14 +488,16 @@ def kernel_index_with_input_indexes(self, out_d, out_r, out_c): | |
the output column. | ||
""" | ||
kernel_d = self.kernel_depth | ||
[kernel_r, kernel_c] = self.kernel_shape | ||
[kernel_r, kernel_c] = self.dilated_kernel_shape | ||
[rows_stride, cols_stride] = self.__strides | ||
[pads_row, pads_col] = self.__pads[:2] | ||
start_in_d = 0 | ||
start_in_r = out_r * rows_stride | ||
start_in_c = out_c * cols_stride | ||
mapper = lambda x: x | ||
if self.input_index_mapper is not None: | ||
mapper = self.input_index_mapper | ||
start_in_r = out_r * rows_stride - pads_row | ||
start_in_c = out_c * cols_stride - pads_col | ||
# Defined but never used: | ||
# mapper = lambda x: x | ||
# if self.input_index_mapper is not None: | ||
# mapper = self.input_index_mapper | ||
|
||
for k_d in range(kernel_d): | ||
for k_r in range(kernel_r): | ||
|
@@ -475,7 +510,7 @@ def kernel_index_with_input_indexes(self, out_d, out_r, out_c): | |
# as this could require using a partial kernel | ||
# even though we loop over ALL kernel indexes. | ||
if not all( | ||
input_index[i] < self.input_size[i] | ||
input_index[i] < self.input_size[i] and input_index[i] >= 0 | ||
for i in range(len(input_index)) | ||
): | ||
continue | ||
|
@@ -522,6 +557,10 @@ class PoolingLayer2D(Layer2D): | |
the size of the output. | ||
strides : matrix-like | ||
stride of the kernel. | ||
pads : matrix-like | ||
Padding for the kernel. Given as [left, bottom, right, top] | ||
dilations : matrix-like | ||
Dilations of the kernel | ||
pool_func : str | ||
name of function used to pool values in a kernel to a single value. | ||
transpose : bool | ||
|
@@ -544,13 +583,17 @@ def __init__( | |
kernel_shape, | ||
kernel_depth, | ||
*, | ||
pads=None, | ||
dilations=None, | ||
activation=None, | ||
input_index_mapper=None, | ||
): | ||
super().__init__( | ||
input_size, | ||
output_size, | ||
strides, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=input_index_mapper, | ||
) | ||
|
@@ -598,6 +641,10 @@ class ConvLayer2D(Layer2D): | |
stride of the cross-correlation kernel. | ||
kernel : matrix-like | ||
the cross-correlation kernel. | ||
pads : matrix-like | ||
Padding for the kernel. Given as [left, bottom, right, top] | ||
dilations : matrix-like | ||
Dilations of the kernel | ||
activation : str or None | ||
activation function name | ||
input_index_mapper : IndexMapper or None | ||
|
@@ -611,17 +658,40 @@ def __init__( | |
strides, | ||
kernel, | ||
*, | ||
pads=None, | ||
dilations=None, | ||
activation=None, | ||
input_index_mapper=None, | ||
): | ||
super().__init__( | ||
input_size, | ||
output_size, | ||
strides, | ||
pads=pads, | ||
dilations=dilations, | ||
activation=activation, | ||
input_index_mapper=input_index_mapper, | ||
) | ||
self.__kernel = kernel | ||
if self.dilations != [1, 1]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand correctly, when dilations!=[1, 1], this part will extend the kernel from shape [out_channel, in_channel, r, c] to shape [out_channel, in_channel, dr*(r-1)+1, dc*(c-1)+1] by adding zeros, where [dr, dc] are dilations. But this part of code does not correctly do this extension. For example, assume the size of kernel is [2, 1, 4, 3] and dilations = [2, 3], then the size of dilate_kernel should be [2, 1, 7, 7]. But we will get an error here due to unmatched sizes. My suggestion is directly assigning values to dilate_kernel instead of using numpy.hstack and numpy.dstack. |
||
dilated = np.zeros( | ||
( | ||
kernel.shape[0], | ||
kernel.shape[1], | ||
(kernel.shape[2] - 1) * dilations[0] + 1, | ||
(kernel.shape[3] - 1) * dilations[1] + 1, | ||
) | ||
) | ||
for i in range(kernel.shape[0]): | ||
for j in range(kernel.shape[1]): | ||
for k in range(kernel.shape[2]): | ||
for l in range(kernel.shape[3]): | ||
dilated[i, j, k * dilations[0], l * dilations[1]] = kernel[ | ||
i, j, k, l | ||
] | ||
self.__dilated_kernel = dilated | ||
else: | ||
self.__dilated_kernel = kernel | ||
|
||
def kernel_with_input_indexes(self, out_d, out_r, out_c): | ||
""" | ||
|
@@ -658,6 +728,11 @@ def kernel(self): | |
"""Return the cross-correlation kernel""" | ||
return self.__kernel | ||
|
||
@property | ||
def dilated_kernel(self): | ||
"""Return the dilated cross-correlation kernel""" | ||
return self.__dilated_kernel | ||
Comment on lines
+731
to
+734
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function needs to be tested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test added |
||
|
||
def __str__(self): | ||
return f"ConvLayer(input_size={self.input_size}, output_size={self.output_size}, strides={self.strides}, kernel_shape={self.kernel_shape})" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The phraseology
consumption
is not very intuitive. If I were a user and fed my CNN into OMLT and got this error, it would be clear to me what to do to fix it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was cribbing from the docstrings, but after a bit more time with the code I've reworded all of these errors slightly. The node type checks should never be hit unless there's a bug in the calling function, and '''._visit_node()''' doesn't have that problem. The dimension checks are hopefully a bit clearer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree. I think we can be more specific like: To define a convolution layer, the input/output channels and weight matrix are required. Biaes is optional.