diff --git a/.readthedocs.yml b/.readthedocs.yml
index 29c410e7..6e41af22 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,9 +1,64 @@
-# Read the Docs configuration file
+# Read the Docs configuration file for Sphinx projects
+
 # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 
-conda:
-    file: docs/environment.yml
+
+# Required
+
+version: 2
+
+
+# Set the OS, Python version and other tools you might need
+
+build:
+
+  os: ubuntu-22.04
+
+  tools:
+
+    python: "3.8"
+
+    # You can also specify other tool versions:
+
+    # nodejs: "20"
+
+    # rust: "1.70"
+
+    # golang: "1.20"
+
+
+# Build documentation in the "docs/" directory with Sphinx
+
+sphinx:
+
+  configuration: docs/conf.py
+
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+
+  # builder: "dirhtml"
+
+  # Fail on all warnings to avoid broken references
+
+  # fail_on_warning: true
+
+
+# Optionally build your docs in additional formats such as PDF and ePub
+
+# formats:
+
+#   - pdf
+
+#   - epub
+
+
+# Optional but recommended, declare the Python requirements required
+
+# to build your documentation
+
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
 
 python:
-    version: 3.8
-    setup_py_install: true
+
+  install:
+
+    - requirements: docs/requirements.txt
\ No newline at end of file
diff --git a/README.rst b/README.rst
index 4ba4566f..cb90ed8e 100644
--- a/README.rst
+++ b/README.rst
@@ -4,7 +4,7 @@
      :align: center
      :width: 200px
 
-.. image:: https://github.com/cog-imperial/OMLT/workflows/CI/badge.svg?branch=main
+.. image:: https://github.com/cog-imperial/OMLT/actions/workflows/main.yml/badge.svg
      :target: https://github.com/cog-imperial/OMLT/actions?workflow=CI
      :alt: CI Status
 
diff --git a/docs/environment.yml b/docs/environment.yml
deleted file mode 100644
index 4044f80e..00000000
--- a/docs/environment.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: omlt
-channels:
-    - conda-forge
-dependencies:
-    - python=3.8
-    - numpy
-    - pyomo
-    - networkx
-    - onnx
-    - tensorflow
-    - importlib-metadata
\ No newline at end of file
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..6305e50b
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,10 @@
+# Required dependencies for Sphinx documentation
+sphinx
+sphinx-rtd-theme
+numpy
+pyomo
+networkx
+onnx
+tensorflow
+linear-tree
+importlib-metadata
\ No newline at end of file
diff --git a/src/omlt/gbt/gbt_formulation.py b/src/omlt/gbt/gbt_formulation.py
index 13b03227..f2d01296 100644
--- a/src/omlt/gbt/gbt_formulation.py
+++ b/src/omlt/gbt/gbt_formulation.py
@@ -197,13 +197,23 @@ def _branching_y(tree_id, branch_node_id):
         node_mask = (nodes_tree_ids == tree_id) & (nodes_node_ids == branch_node_id)
         feature_id = nodes_feature_ids[node_mask]
         branch_value = nodes_values[node_mask]
-        assert len(feature_id) == 1 and len(branch_value) == 1
+        if len(branch_value) != 1:
+            raise ValueError(
+                f"The given tree_id and branch_node_id do not uniquely identify a branch value."
+            )
+        if len(feature_id) != 1:
+            raise ValueError(
+                f"The given tree_id and branch_node_id do not uniquely identify a feature."
+            )
         feature_id = feature_id[0]
         branch_value = branch_value[0]
         (branch_y_idx,) = np.where(
             branch_value_by_feature_id[feature_id] == branch_value
         )
-        assert len(branch_y_idx) == 1
+        if len(branch_y_idx) != 1:
+            raise ValueError(
+                f"The given tree_id and branch_node_id do not uniquely identify a branch index."
+            )
         return block.y[feature_id, branch_y_idx[0]]
 
     def _sum_of_z_l(tree_id, start_node_id):
diff --git a/src/omlt/gbt/model.py b/src/omlt/gbt/model.py
index bbd178aa..9bac2590 100644
--- a/src/omlt/gbt/model.py
+++ b/src/omlt/gbt/model.py
@@ -56,14 +56,20 @@ def scaling_object(self, scaling_object):
 def _model_num_inputs(model):
     """Returns the number of input variables"""
     graph = model.graph
-    assert len(graph.input) == 1
+    if len(graph.input) != 1:
+        raise ValueError(
+            f"Model graph input field is multi-valued {graph.input}. A single value is required."
+        )
     return _tensor_size(graph.input[0])
 
 
 def _model_num_outputs(model):
     """Returns the number of output variables"""
     graph = model.graph
-    assert len(graph.output) == 1
+    if len(graph.output) != 1:
+        raise ValueError(
+            f"Model graph output field is multi-valued {graph.output}. A single value is required."
+        )
     return _tensor_size(graph.output[0])
 
 
@@ -71,9 +77,15 @@ def _tensor_size(tensor):
     """Returns the size of an input tensor"""
     tensor_type = tensor.type.tensor_type
     size = None
-    for dim in tensor_type.shape.dim:
-        if dim.dim_value is not None and dim.dim_value > 0:
-            assert size is None
-            size = dim.dim_value
-    assert size is not None
+    dim_values = [
+        dim.dim_value
+        for dim in tensor_type.shape.dim
+        if dim.dim_value is not None and dim.dim_value > 0
+    ]
+    if len(dim_values) == 1:
+        size = dim_values[0]
+    elif dim_values == []:
+        raise ValueError(f"Tensor {tensor} has no positive dimensions.")
+    else:
+        raise ValueError(f"Tensor {tensor} has multiple positive dimensions.")
     return size
diff --git a/src/omlt/io/onnx_parser.py b/src/omlt/io/onnx_parser.py
index 091082a7..511261c0 100644
--- a/src/omlt/io/onnx_parser.py
+++ b/src/omlt/io/onnx_parser.py
@@ -73,13 +73,17 @@ def parse_network(self, graph, scaling_object, input_bounds):
                         dim_value = 1
                     size.append(dim.dim_value)
                     dim_value *= dim.dim_value
-            assert dim_value is not None
+            if dim_value is None:
+                raise ValueError(
+                    f'All dimensions in graph "{graph.name}" input tensor have 0 value.'
+                )
             assert network_input is None
             network_input = InputLayer(size)
             self._node_map[input.name] = network_input
             network.add_layer(network_input)
 
-        assert network_input is not None
+        if network_input is None:
+            raise ValueError(f'No valid input layer found in graph "{graph.name}".')
 
         self._nodes = nodes
         self._nodes_by_output = nodes_by_output
@@ -109,11 +113,14 @@ def parse_network(self, graph, scaling_object, input_bounds):
                 # Now connect inputs to the current node
                 for input in node_inputs:
                     self._nodes[input][2].append(node.name)
-            else:
-                assert node.op_type == "Constant"
+            elif node.op_type == "Constant":
                 for output in node.output:
                     value = _parse_constant_value(node)
                     self._constants[output] = value
+            else:
+                raise ValueError(
+                    f'Nodes must have inputs or have op_type "Constant". Node "{node.name}" has no inputs and op_type "{node.op_type}".'
+                )
 
         # traverse graph
         self._node_stack = list(inputs)
@@ -169,34 +176,54 @@ def _visit_node(self, node, next_nodes):
 
     def _consume_dense_nodes(self, node, next_nodes):
         """Starting from a MatMul node, consume nodes to form a dense Ax + b node."""
-        assert node.op_type == "MatMul"
-        assert len(node.input) == 2
+        if node.op_type != "MatMul":
+            raise ValueError(
+                f"{node.name} is a {node.op_type} node, only MatMul nodes can be used as starting points for consumption."
+            )
+        if len(node.input) != 2:
+            raise ValueError(
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."
+            )
 
         [in_0, in_1] = list(node.input)
         input_layer, transformer = self._node_input_and_transformer(in_0)
         node_weights = self._initializers[in_1]
 
-        assert len(next_nodes) == 1
+        if len(next_nodes) != 1:
+            raise ValueError(
+                f"Next nodes must have length 1, {next_nodes} has length {len(next_nodes)}"
+            )
 
         # expect 'Add' node ahead
         type_, node, maybe_next_nodes = self._nodes[next_nodes[0]]
-        assert type_ == "node"
-        assert node.op_type == "Add"
+        if type_ != "node":
+            raise TypeError(f"Expected a node next, got a {type_} instead.")
+        if node.op_type != "Add":
+            raise ValueError(
+                f"The first node to be consumed, {node.name}, is a {node.op_type} node. Only Add nodes are supported."
+            )
 
         # extract biases
         next_nodes = maybe_next_nodes
-        assert len(node.input) == 2
         [in_0, in_1] = list(node.input)
 
         if in_0 in self._initializers:
             node_biases = self._initializers[in_0]
-        else:
-            assert in_1 in self._initializers
+        elif in_1 in self._initializers:
             node_biases = self._initializers[in_1]
+        else:
+            raise ValueError(f"Node inputs were not found in graph initializers.")
 
-        assert len(node_weights.shape) == 2
-        assert node_weights.shape[1] == node_biases.shape[0]
-        assert len(node.output) == 1
+        if len(node_weights.shape) != 2:
+            raise ValueError(f"Node weights must be a 2-dimensional matrix.")
+        if node_weights.shape[1] != node_biases.shape[0]:
+            raise ValueError(
+                f"Node weights has {node_weights.shape[1]} columns; node biases has {node_biases.shape[0]} rows. These must be equal."
+            )
+        if len(node.output) != 1:
+            raise ValueError(
+                f"Node output is {node.output} but should be a single value."
+            )
 
         input_output_size = _get_input_output_size(input_layer, transformer)
 
@@ -226,8 +253,14 @@ def _consume_dense_nodes(self, node, next_nodes):
 
     def _consume_gemm_dense_nodes(self, node, next_nodes):
         """Starting from a Gemm node, consume nodes to form a dense aAB + bC node."""
-        assert node.op_type == "Gemm"
-        assert len(node.input) == 3
+        if node.op_type != "Gemm":
+            raise ValueError(
+                f"{node.name} is a {node.op_type} node, only Gemm nodes can be used as starting points for consumption."
+            )
+        if len(node.input) != 3:
+            raise ValueError(
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with 3 input dimensions can be used as starting points for consumption."
+            )
 
         attr = _collect_attributes(node)
         alpha = attr["alpha"]
@@ -275,8 +308,15 @@ def _consume_conv_nodes(self, node, next_nodes):
         Starting from a Conv node, consume nodes to form a convolution node with
         (optional) activation function.
         """
-        assert node.op_type == "Conv"
-        assert len(node.input) in [2, 3]
+        if node.op_type != "Conv":
+            raise ValueError(
+                f"{node.name} is a {node.op_type} node, only Conv nodes can be used as starting points for consumption."
+            )
+        if len(node.input) not in [2, 3]:
+            raise ValueError(
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 or 3 input dimensions can be used as starting points for consumption."
+            )
+
         if len(node.input) == 2:
             [in_0, in_1] = list(node.input)
             in_2 = None
@@ -295,18 +335,43 @@ def _consume_conv_nodes(self, node, next_nodes):
         attr = _collect_attributes(node)
 
         strides = attr["strides"]
-
         # check only kernel shape and stride are set
-        # everything else is not supported
-        assert biases.shape == (out_channels,)
-        assert in_channels == input_output_size[0]
-        assert attr["kernel_shape"] == kernel_shape
-        assert attr["dilations"] == [1, 1]
-        assert attr["group"] == 1
-        if "pads" in attr:
-            assert not np.any(attr["pads"])  # pads all zero
-        assert len(kernel_shape) == len(strides)
-        assert len(input_output_size) == len(kernel_shape) + 1
+        if attr["kernel_shape"] != kernel_shape:
+            raise ValueError(
+                f"Kernel shape attribute {attr['kernel_shape']} does not match initialized kernel shape {kernel_shape}."
+            )
+        if len(kernel_shape) != len(strides):
+            raise ValueError(
+                f"Initialized kernel shape {kernel_shape} has {len(kernel_shape)} dimensions. Strides attribute has {len(strides)} dimensions. These must be equal."
+            )
+        if len(input_output_size) != len(kernel_shape) + 1:
+            raise ValueError(
+                f"Input/output size ({input_output_size}) must have one more dimension than initialized kernel shape ({kernel_shape})."
+            )
+
+        # Check input, output have correct dimensions
+        if biases.shape != (out_channels,):
+            raise ValueError(
+                f"Biases shape {biases.shape} must match output weights channels {(out_channels,)}."
+            )
+        if in_channels != input_output_size[0]:
+            raise ValueError(
+                f"Input/output size ({input_output_size}) first dimension must match input weights channels ({in_channels})."
+            )
+
+        # Other attributes are not supported
+        if "dilations" in attr and attr["dilations"] != [1, 1]:
+            raise ValueError(
+                f"{node} has non-identity dilations ({attr['dilations']}). This is not supported."
+            )
+        if attr["group"] != 1:
+            raise ValueError(
+                f"{node} has multiple groups ({attr['group']}). This is not supported."
+            )
+        if "pads" in attr and np.any(attr["pads"]):
+            raise ValueError(
+                f"{node} has non-zero pads ({attr['pads']}). This is not supported."
+            )
 
         # generate new nodes for the node output
         padding = 0
@@ -326,7 +391,10 @@ def _consume_conv_nodes(self, node, next_nodes):
 
         # convolute image one channel at the time
         # expect 2d image with channels
-        assert len(input_output_size) == 3
+        if len(input_output_size) != 3:
+            raise ValueError(
+                f"Expected a 2D image with channels, got {input_output_size}."
+            )
 
         conv_layer = ConvLayer2D(
             input_output_size,
@@ -343,8 +411,14 @@ def _consume_conv_nodes(self, node, next_nodes):
 
     def _consume_reshape_nodes(self, node, next_nodes):
         """Parse a Reshape node."""
-        assert node.op_type == "Reshape"
-        assert len(node.input) == 2
+        if node.op_type != "Reshape":
+            raise ValueError(
+                f"{node.name} is a {node.op_type} node, only Reshape nodes can be used as starting points for consumption."
+            )
+        if len(node.input) != 2:
+            raise ValueError(
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."
+            )
         [in_0, in_1] = list(node.input)
         input_layer = self._node_map[in_0]
         new_shape = self._constants[in_1]
@@ -358,13 +432,22 @@ def _consume_pool_nodes(self, node, next_nodes):
         Starting from a MaxPool node, consume nodes to form a pooling node with
         (optional) activation function.
         """
-        assert node.op_type in _POOLING_OP_TYPES
+        if node.op_type not in _POOLING_OP_TYPES:
+            raise ValueError(
+                f"{node.name} is a {node.op_type} node, only MaxPool nodes can be used as starting points for consumption."
+            )
         pool_func_name = "max"
 
         # ONNX network should not contain indices output from MaxPool - not supported by OMLT
-        assert len(node.output) == 1
+        if len(node.output) != 1:
+            raise ValueError(
+                f"The ONNX contains indices output from MaxPool. This is not supported by OMLT."
+            )
+        if len(node.input) != 1:
+            raise ValueError(
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with 1 input dimension can be used as starting points for consumption."
+            )
 
-        assert len(node.input) == 1
         input_layer, transformer = self._node_input_and_transformer(node.input[0])
         input_output_size = _get_input_output_size(input_layer, transformer)
 
@@ -372,9 +455,11 @@ def _consume_pool_nodes(self, node, next_nodes):
         if len(input_output_size) == 4:
             # this means there is an extra dimension for number of batches
             # batches not supported, so only accept if they're not there or there is only 1 batch
-            assert input_output_size[0] == 1
+            if input_output_size[0] != 1:
+                raise ValueError(
+                    f"{node.name} has {input_output_size[0]} batches, only a single batch is supported."
+                )
             input_output_size = input_output_size[1:]
-        assert len(input_output_size) == 3
 
         in_channels = input_output_size[0]
 
@@ -385,11 +470,26 @@ def _consume_pool_nodes(self, node, next_nodes):
 
         # check only kernel shape, stride, storage order are set
         # everything else is not supported
-        assert ("dilations" not in attr) or (attr["dilations"] == [1, 1])
-        assert ("pads" not in attr) or (not np.any(attr["pads"]))
-        assert ("auto_pad" not in attr) or (attr["auto_pad"] == "NOTSET")
-        assert len(kernel_shape) == len(strides)
-        assert len(input_output_size) == len(kernel_shape) + 1
+        if "dilations" in attr and attr["dilations"] != [1, 1]:
+            raise ValueError(
+                f"{node.name} has non-identity dilations ({attr['dilations']}). This is not supported."
+            )
+        if "pads" in attr and np.any(attr["pads"]):
+            raise ValueError(
+                f"{node.name} has non-zero pads ({attr['pads']}). This is not supported."
+            )
+        if ("auto_pad" in attr) and (attr["auto_pad"] != "NOTSET"):
+            raise ValueError(
+                f"{node.name} has autopad set ({attr['auto_pad']}). This is not supported."
+            )
+        if len(kernel_shape) != len(strides):
+            raise ValueError(
+                f"Kernel shape {kernel_shape} has {len(kernel_shape)} dimensions. Strides attribute has {len(strides)} dimensions. These must be equal."
+            )
+        if len(input_output_size) != len(kernel_shape) + 1:
+            raise ValueError(
+                f"Input/output size ({input_output_size}) must have one more dimension than kernel shape ({kernel_shape})."
+            )
 
         output_shape_wrapper = math.floor
         if "ceil_mode" in attr and attr["ceil_mode"] == 1:
diff --git a/src/omlt/neuralnet/layer.py b/src/omlt/neuralnet/layer.py
index dbf3f2ac..fc0393b3 100644
--- a/src/omlt/neuralnet/layer.py
+++ b/src/omlt/neuralnet/layer.py
@@ -10,9 +10,9 @@ class Layer:
 
     Parameters
     ----------
-    input_size : tuple
+    input_size : list
         size of the layer input
-    output_size : tuple
+    output_size : list
         size of the layer output
     activation : str or None
         activation function name
@@ -23,10 +23,16 @@ class Layer:
     def __init__(
         self, input_size, output_size, *, activation=None, input_index_mapper=None
     ):
-        assert isinstance(input_size, list)
-        assert isinstance(output_size, list)
-        self.__input_size = input_size
-        self.__output_size = output_size
+        if not isinstance(input_size, (list, tuple)):
+            raise TypeError(
+                f"input_size must be a list or tuple, {type(input_size)} was provided."
+            )
+        if not isinstance(output_size, (list, tuple)):
+            raise TypeError(
+                f"output_size must be a list or tuple, {type(output_size)} was provided."
+            )
+        self.__input_size = list(input_size)
+        self.__output_size = list(output_size)
         self.activation = activation
         if input_index_mapper is None:
             input_index_mapper = IndexMapper(input_size, input_size)
@@ -99,7 +105,10 @@ def eval_single_layer(self, x):
             if self.__input_index_mapper is not None
             else x[:]
         )
-        assert x_reshaped.shape == tuple(self.input_size)
+        if x_reshaped.shape != tuple(self.input_size):
+            raise ValueError(
+                f"Layer requires an input size {self.input_size}, but the input tensor had size {x_reshaped.shape}."
+            )
         y = self._eval(x_reshaped)
         return self._apply_activation(y)
 
@@ -201,7 +210,6 @@ def __str__(self):
     def _eval(self, x):
         y = np.dot(x, self.__weights) + self.__biases
         y = np.reshape(y, tuple(self.output_size))
-        assert y.shape == tuple(self.output_size)
         return y
 
 
@@ -424,6 +432,7 @@ def kernel_index_with_input_indexes(self, out_d, out_r, out_c):
             for k_r in range(kernel_r):
                 for k_c in range(kernel_c):
                     input_index = (start_in_d + k_d, start_in_r + k_r, start_in_c + k_c)
+
                     assert len(input_index) == len(self.input_size)
                     # don't yield an out-of-bounds input index;
                     # can happen if ceil mode is enabled for pooling layers
@@ -450,8 +459,11 @@ def get_input_index(self, out_index, kernel_index):
 
     def _eval(self, x):
         y = np.empty(shape=self.output_size)
-        assert len(self.output_size) == 3
-        [depth, rows, cols] = self.output_size
+        if len(self.output_size) != 3:
+            raise ValueError(
+                f"Output should have 3 dimensions but instead has {len(self.output_size)}"
+            )
+        [depth, rows, cols] = list(self.output_size)
         for out_d in range(depth):
             for out_r in range(rows):
                 for out_c in range(cols):
@@ -506,6 +518,10 @@ def __init__(
             activation=activation,
             input_index_mapper=input_index_mapper,
         )
+        if pool_func_name not in PoolingLayer2D._POOL_FUNCTIONS:
+            raise ValueError(
+                f"Allowable pool functions are {PoolingLayer2D._POOL_FUNCTIONS}, {pool_func_name} was provided."
+            )
         self._pool_func_name = pool_func_name
         self._kernel_shape = kernel_shape
         self._kernel_depth = kernel_depth
@@ -528,7 +544,6 @@ def _eval_at_index(self, x, out_d, out_r, out_c):
             x[index]
             for (_, index) in self.kernel_index_with_input_indexes(out_d, out_r, out_c)
         ]
-        assert self._pool_func_name in PoolingLayer2D._POOL_FUNCTIONS
         pool_func = PoolingLayer2D._POOL_FUNCTIONS[self._pool_func_name]
         return pool_func(vals)
 
diff --git a/src/omlt/neuralnet/layers/full_space.py b/src/omlt/neuralnet/layers/full_space.py
index c8967924..c24dc98e 100644
--- a/src/omlt/neuralnet/layers/full_space.py
+++ b/src/omlt/neuralnet/layers/full_space.py
@@ -6,8 +6,6 @@
 from omlt.neuralnet.layer import ConvLayer2D, IndexMapper, PoolingLayer2D
 
 
-# TODO: Change asserts to exceptions with messages (or ensure they
-# TODO:      are trapped higher up the call stack)
 def full_space_dense_layer(net_block, net, layer_block, layer):
     r"""
     Add full-space formulation of the dense layer to the block
@@ -183,7 +181,10 @@ def full_space_conv2d_layer(net_block, net, layer_block, layer):
         and layer.activation != "linear"
     ):
         # activation applied after convolution layer, so there shouldn't be an activation after max pooling too
-        assert succ_layer.activation == "linear"
+        if succ_layer.activation != "linear":
+            raise ValueError(
+                f"Activation is applied after convolution layer, but the successor max pooling layer {succ_layer} has an activation function also."
+            )
         succ_layer.activation = layer.activation
         layer.activation = "linear"
 
@@ -241,10 +242,13 @@ def full_space_maxpool2d_layer(net_block, net, layer_block, layer):
 
     """
     input_layer, input_layer_block = _input_layer_and_block(net_block, net, layer)
-    assert isinstance(input_layer, ConvLayer2D)
-    assert (
-        input_layer.activation == "linear"
-    )  # TODO - add support for non-increasing activation functions on preceding convolutional layer
+    if not isinstance(input_layer, ConvLayer2D):
+        raise TypeError("Input layer must be a ConvLayer2D.")
+    if input_layer.activation != "linear":
+        raise ValueError(
+            "Non-increasing activation functions on the preceding convolutional layer are not supported."
+        )
+    # TODO - add support for non-increasing activation functions on preceding convolutional layer
 
     # note kernel indexes are the same set of values for any output index, so wlog get kernel indexes for (0, 0, 0)
     layer_block._kernel_indexes = pyo.Set(
@@ -317,7 +321,8 @@ def _calculate_n_plus(out_index, l, k, layer, input_layer_block):
 
 def _input_layer_and_block(net_block, net, layer):
     input_layers = list(net.predecessors(layer))
-    assert len(input_layers) == 1
+    if len(input_layers) != 1:
+        raise ValueError("Multiple input layers are not currently supported.")
     input_layer = input_layers[0]
     input_layer_block = net_block.layer[id(input_layer)]
     return input_layer, input_layer_block
diff --git a/src/omlt/neuralnet/layers/partition_based.py b/src/omlt/neuralnet/layers/partition_based.py
index c7116d35..87e13f18 100644
--- a/src/omlt/neuralnet/layers/partition_based.py
+++ b/src/omlt/neuralnet/layers/partition_based.py
@@ -44,7 +44,12 @@ def partition_based_dense_relu_layer(net_block, net, layer_block, layer, split_f
     """
     # not an input layer, process the expressions
     prev_layers = list(net.predecessors(layer))
-    assert len(prev_layers) == 1
+    if len(prev_layers) == 0:
+        raise ValueError(
+            f"Layer {layer} is not an input layer, but has no predecessors."
+        )
+    elif len(prev_layers) > 1:
+        raise ValueError(f"Layer {layer} has multiple predecessors.")
     prev_layer = prev_layers[0]
     prev_layer_block = net_block.layer[id(prev_layer)]
 
@@ -86,7 +91,10 @@ def output_node_block(b, *output_index):
                 expr += prev_layer_block.z[input_index] * w
 
             lb, ub = compute_bounds_on_expr(expr)
-            assert lb is not None and ub is not None
+            if lb is None:
+                raise ValueError("Expression is unbounded below.")
+            if ub is None:
+                raise ValueError("Expression is unbounded above.")
 
             z2 = b.z2[split_index]
             z2.setlb(min(0, lb))
@@ -106,7 +114,10 @@ def output_node_block(b, *output_index):
         expr += bias
 
         lb, ub = compute_bounds_on_expr(expr)
-        assert lb is not None and ub is not None
+        if lb is None:
+            raise ValueError("Expression is unbounded below.")
+        if ub is None:
+            raise ValueError("Expression is unbounded above.")
 
         layer_block.z[output_index].setlb(0)
         layer_block.z[output_index].setub(max(0, ub))
diff --git a/src/omlt/neuralnet/layers/reduced_space.py b/src/omlt/neuralnet/layers/reduced_space.py
index 1584b775..95d1f97f 100644
--- a/src/omlt/neuralnet/layers/reduced_space.py
+++ b/src/omlt/neuralnet/layers/reduced_space.py
@@ -11,7 +11,12 @@ def reduced_space_dense_layer(net_block, net, layer_block, layer, activation):
     """
     # not an input layer, process the expressions
     prev_layers = list(net.predecessors(layer))
-    assert len(prev_layers) == 1
+    if len(prev_layers) == 0:
+        raise ValueError(
+            f"Layer {layer} is not an input layer, but has no predecessors."
+        )
+    elif len(prev_layers) > 1:
+        raise ValueError(f"Layer {layer} has multiple predecessors.")
     prev_layer = prev_layers[0]
     prev_layer_block = net_block.layer[id(prev_layer)]
 
diff --git a/src/omlt/neuralnet/network_definition.py b/src/omlt/neuralnet/network_definition.py
index ac930baa..aeef22eb 100644
--- a/src/omlt/neuralnet/network_definition.py
+++ b/src/omlt/neuralnet/network_definition.py
@@ -77,8 +77,10 @@ def add_edge(self, from_layer, to_layer):
         """
         id_to = id(to_layer)
         id_from = id(from_layer)
-        assert id_to in self.__layers_by_id
-        assert id_from in self.__layers_by_id
+        if id_to not in self.__layers_by_id:
+            raise ValueError(f"Inbound layer {to_layer} not found in network.")
+        if id_from not in self.__layers_by_id:
+            raise ValueError(f"Outbound layer {from_layer} not found in network.")
         self.__graph.add_edge(id_from, id_to)
 
     @property
diff --git a/src/omlt/neuralnet/nn_formulation.py b/src/omlt/neuralnet/nn_formulation.py
index 89bb3fc3..042b14fe 100644
--- a/src/omlt/neuralnet/nn_formulation.py
+++ b/src/omlt/neuralnet/nn_formulation.py
@@ -94,15 +94,12 @@ def __init__(
         if activation_constraints is not None:
             self._activation_constraints.update(activation_constraints)
 
-        # TODO: Change these to exceptions.
         network_inputs = list(self.__network_definition.input_nodes)
-        assert (
-            len(network_inputs) == 1
-        ), "Multiple input layers are not currently supported."
+        if len(network_inputs) != 1:
+            raise ValueError("Multiple input layers are not currently supported.")
         network_outputs = list(self.__network_definition.output_nodes)
-        assert (
-            len(network_outputs) == 1
-        ), "Multiple output layers are not currently supported."
+        if len(network_outputs) != 1:
+            raise ValueError("Multiple output layers are not currently supported.")
 
     def _supported_default_layer_constraints(self):
         return _DEFAULT_LAYER_CONSTRAINTS
@@ -126,18 +123,16 @@ def _build_formulation(self):
     def input_indexes(self):
         """The indexes of the formulation inputs."""
         network_inputs = list(self.__network_definition.input_nodes)
-        assert (
-            len(network_inputs) == 1
-        ), "Multiple input layers are not currently supported."
+        if len(network_inputs) != 1:
+            raise ValueError("Multiple input layers are not currently supported.")
         return network_inputs[0].input_indexes
 
     @property
     def output_indexes(self):
         """The indexes of the formulation output."""
         network_outputs = list(self.__network_definition.output_nodes)
-        assert (
-            len(network_outputs) == 1
-        ), "Multiple output layers are not currently supported."
+        if len(network_outputs) != 1:
+            raise ValueError("Multiple output layers are not currently supported.")
         return network_outputs[0].output_indexes
 
 
@@ -207,7 +202,8 @@ def layer(b, layer_id):
     # setup input variables constraints
     # currently only support a single input layer
     input_layers = list(net.input_layers)
-    assert len(input_layers) == 1
+    if len(input_layers) != 1:
+        raise ValueError("Multiple input layers are not currently supported.")
     input_layer = input_layers[0]
 
     @block.Constraint(input_layer.output_indexes)
@@ -217,7 +213,8 @@ def input_assignment(b, *output_index):
     # setup output variables constraints
     # currently only support a single output layer
     output_layers = list(net.output_layers)
-    assert len(output_layers) == 1
+    if len(output_layers) != 1:
+        raise ValueError("Multiple output layers are not currently supported.")
     output_layer = output_layers[0]
 
     @block.Constraint(output_layer.output_indexes)
@@ -320,6 +317,16 @@ def __init__(self, network_structure, activation_functions=None):
         if activation_functions is not None:
             self._activation_functions.update(activation_functions)
 
+        # If we want to do network input/output validation at initialize time instead
+        # of build time, as it is for FullSpaceNNFormulation:
+        #
+        # network_inputs = list(self.__network_definition.input_nodes)
+        # if len(network_inputs) != 1:
+        #     raise ValueError("Multiple input layers are not currently supported.")
+        # network_outputs = list(self.__network_definition.output_nodes)
+        # if len(network_outputs) != 1:
+        #     raise ValueError("Multiple output layers are not currently supported.")
+
     def _supported_default_activation_functions(self):
         return dict(_DEFAULT_ACTIVATION_FUNCTIONS)
 
@@ -405,16 +412,16 @@ def output_assignment(b, *output_index):
     def input_indexes(self):
         """The indexes of the formulation inputs."""
         network_inputs = list(self.__network_definition.input_nodes)
-        assert len(network_inputs) == 1, "Unsupported multiple network input variables"
+        if len(network_inputs) != 1:
+            raise ValueError("Multiple input layers are not currently supported.")
         return network_inputs[0].input_indexes
 
     @property
     def output_indexes(self):
         """The indexes of the formulation output."""
         network_outputs = list(self.__network_definition.output_nodes)
-        assert (
-            len(network_outputs) == 1
-        ), "Unsupported multiple network output variables"
+        if len(network_outputs) != 1:
+            raise ValueError("Multiple output layers are not currently supported.")
         return network_outputs[0].output_indexes
 
 
@@ -514,10 +521,16 @@ def layer(b, layer_id):
             else:
                 raise ValueError("ReluPartitionFormulation supports only Dense layers")
 
+        # This check is never hit. The formulation._build_formulation() function is
+        # only ever called by an OmltBlock.build_formulation(), and that runs the
+        # input_indexes and output_indexes first, which will catch any formulations
+        # with multiple input or output layers.
+
         # setup input variables constraints
         # currently only support a single input layer
         input_layers = list(net.input_layers)
-        assert len(input_layers) == 1
+        if len(input_layers) != 1:
+            raise ValueError("Multiple input layers are not currently supported.")
         input_layer = input_layers[0]
 
         @block.Constraint(input_layer.output_indexes)
@@ -530,7 +543,8 @@ def input_assignment(b, *output_index):
         # setup output variables constraints
         # currently only support a single output layer
         output_layers = list(net.output_layers)
-        assert len(output_layers) == 1
+        if len(output_layers) != 1:
+            raise ValueError("Multiple output layers are not currently supported.")
         output_layer = output_layers[0]
 
         @block.Constraint(output_layer.output_indexes)
@@ -544,14 +558,14 @@ def output_assignment(b, *output_index):
     def input_indexes(self):
         """The indexes of the formulation inputs."""
         network_inputs = list(self.__network_definition.input_nodes)
-        assert len(network_inputs) == 1, "Unsupported multiple network input variables"
+        if len(network_inputs) != 1:
+            raise ValueError("Multiple input layers are not currently supported.")
         return network_inputs[0].input_indexes
 
     @property
     def output_indexes(self):
         """The indexes of the formulation output."""
         network_outputs = list(self.__network_definition.output_nodes)
-        assert (
-            len(network_outputs) == 1
-        ), "Unsupported multiple network output variables"
+        if len(network_outputs) != 1:
+            raise ValueError("Multiple output layers are not currently supported.")
         return network_outputs[0].output_indexes
diff --git a/tests/io/test_onnx_parser.py b/tests/io/test_onnx_parser.py
index 6454938d..763b282c 100644
--- a/tests/io/test_onnx_parser.py
+++ b/tests/io/test_onnx_parser.py
@@ -4,6 +4,7 @@
 
 if onnx_available:
     from omlt.io.onnx import load_onnx_neural_network
+    from omlt.io.onnx_parser import NetworkParser
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -105,3 +106,156 @@ def test_maxpool(datadir):
     assert layers[3].output_size == [3, 2, 1]
     for layer in layers[1:]:
         assert layer.kernel_depth == 3
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_input_tensor_invalid_dims(datadir):
+    model = onnx.load(datadir.file("keras_linear_131.onnx"))
+    model.graph.input[0].type.tensor_type.shape.dim[1].dim_value = 0
+    parser = NetworkParser()
+    with pytest.raises(ValueError) as excinfo:
+        parser.parse_network(model.graph, None, None)
+    expected_msg = 'All dimensions in graph "tf2onnx" input tensor have 0 value.'
+    assert str(excinfo.value) == expected_msg
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_no_input_layers(datadir):
+    model = onnx.load(datadir.file("keras_linear_131.onnx"))
+    model.graph.input.remove(model.graph.input[0])
+    parser = NetworkParser()
+    with pytest.raises(ValueError) as excinfo:
+        parser.parse_network(model.graph, None, None)
+    expected_msg = 'No valid input layer found in graph "tf2onnx".'
+    assert str(excinfo.value) == expected_msg
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_node_no_inputs(datadir):
+    model = onnx.load(datadir.file("keras_linear_131.onnx"))
+    while len(model.graph.node[0].input) > 0:
+        model.graph.node[0].input.pop()
+    parser = NetworkParser()
+    with pytest.raises(ValueError) as excinfo:
+        parser.parse_network(model.graph, None, None)
+    expected_msg = """Nodes must have inputs or have op_type \"Constant\". Node \"StatefulPartitionedCall/keras_linear_131/dense/MatMul\" has no inputs and op_type \"MatMul\"."""
+    assert str(excinfo.value) == expected_msg
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_consume_wrong_node_type(datadir):
+    model = onnx.load(datadir.file("keras_linear_131.onnx"))
+    parser = NetworkParser()
+    parser.parse_network(model.graph, None, None)
+
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_dense_nodes(
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
+        )
+    expected_msg_dense = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only MatMul nodes can be used as starting points for consumption."
+    assert str(excinfo.value) == expected_msg_dense
+
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_gemm_dense_nodes(
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
+        )
+    expected_msg_gemm = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only Gemm nodes can be used as starting points for consumption."
+    assert str(excinfo.value) == expected_msg_gemm
+
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_conv_nodes(
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
+        )
+    expected_msg_conv = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only Conv nodes can be used as starting points for consumption."
+    assert str(excinfo.value) == expected_msg_conv
+
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_reshape_nodes(
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
+        )
+    expected_msg_reshape = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only Reshape nodes can be used as starting points for consumption."
+    assert str(excinfo.value) == expected_msg_reshape
+
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_pool_nodes(
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
+        )
+    expected_msg_pool = """StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only MaxPool nodes can be used as starting points for consumption."""
+    assert str(excinfo.value) == expected_msg_pool
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_consume_dense_wrong_dims(datadir):
+    model = onnx.load(datadir.file("keras_linear_131.onnx"))
+    parser = NetworkParser()
+    parser.parse_network(model.graph, None, None)
+
+    parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/MatMul"][
+        1
+    ].input.append("abcd")
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_dense_nodes(
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/MatMul"][1],
+            parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/MatMul"][2],
+        )
+    expected_msg_dense = "StatefulPartitionedCall/keras_linear_131/dense/MatMul input has 3 dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."
+    assert str(excinfo.value) == expected_msg_dense
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_consume_gemm_wrong_dims(datadir):
+    model = onnx.load(datadir.file("gemm.onnx"))
+    parser = NetworkParser()
+    parser.parse_network(model.graph, None, None)
+    parser._nodes["Gemm_0"][1].input.append("abcd")
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_gemm_dense_nodes(
+            parser._nodes["Gemm_0"][1], parser._nodes["Gemm_0"][2]
+        )
+    expected_msg_gemm = "Gemm_0 input has 4 dimensions, only nodes with 3 input dimensions can be used as starting points for consumption."
+    assert str(excinfo.value) == expected_msg_gemm
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_consume_conv_wrong_dims(datadir):
+    model = onnx.load(datadir.file("convx1_gemmx1.onnx"))
+    parser = NetworkParser()
+    parser.parse_network(model.graph, None, None)
+    parser._nodes["Conv_0"][1].input.append("abcd")
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_conv_nodes(
+            parser._nodes["Conv_0"][1], parser._nodes["Conv_0"][2]
+        )
+    expected_msg_conv = "Conv_0 input has 4 dimensions, only nodes with 2 or 3 input dimensions can be used as starting points for consumption."
+    assert str(excinfo.value) == expected_msg_conv
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_consume_reshape_wrong_dims(datadir):
+    model = onnx.load(datadir.file("convx1_gemmx1.onnx"))
+    parser = NetworkParser()
+    parser.parse_network(model.graph, None, None)
+    parser._nodes["Reshape_2"][1].input.append("abcd")
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_reshape_nodes(
+            parser._nodes["Reshape_2"][1], parser._nodes["Reshape_2"][2]
+        )
+    expected_msg_reshape = """Reshape_2 input has 3 dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."""
+    assert str(excinfo.value) == expected_msg_reshape
+
+
+@pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
+def test_consume_maxpool_wrong_dims(datadir):
+    model = onnx.load(datadir.file("maxpool_2d.onnx"))
+    parser = NetworkParser()
+    parser.parse_network(model.graph, None, None)
+    parser._nodes["node1"][1].input.append("abcd")
+    with pytest.raises(ValueError) as excinfo:
+        parser._consume_pool_nodes(parser._nodes["node1"][1], parser._nodes["node1"][2])
+    expected_msg_maxpool = """node1 input has 2 dimensions, only nodes with 1 input dimension can be used as starting points for consumption."""
+    assert str(excinfo.value) == expected_msg_maxpool
diff --git a/tests/neuralnet/test_network_definition.py b/tests/neuralnet/test_network_definition.py
index 1897e88b..f3cadcb7 100644
--- a/tests/neuralnet/test_network_definition.py
+++ b/tests/neuralnet/test_network_definition.py
@@ -132,3 +132,47 @@ def test_input_bound_scaling_multiD():
         scaler, scaled_input_bounds=None, unscaled_input_bounds=unscaled_input_bounds
     )
     assert net.scaled_input_bounds == scaled_input_bounds
+
+
+def _test_add_invalid_edge(direction):
+    """
+    direction can be "in" or "out"
+    """
+    net = NetworkDefinition(scaled_input_bounds=[(-10.0, 10.0)])
+
+    input_layer = InputLayer([1])
+    net.add_layer(input_layer)
+
+    dense_layer_0 = DenseLayer(
+        input_layer.output_size,
+        [1, 2],
+        activation="relu",
+        weights=np.array([[1.0, -1.0]]),
+        biases=np.array([0.0, 0.0]),
+    )
+    net.add_layer(dense_layer_0)
+    net.add_edge(input_layer, dense_layer_0)
+
+    dense_layer_1 = DenseLayer(
+        input_layer.output_size,
+        dense_layer_0.input_size,
+        activation="linear",
+        weights=np.array([[1.0, 0.0], [5.0, 1.0]]),
+        biases=np.array([0.0, 0.0]),
+    )
+
+    if direction == "in":
+        with pytest.raises(ValueError) as excinfo:
+            net.add_edge(input_layer, dense_layer_1)
+        expected_msg = f"Inbound layer {dense_layer_1} not found in network."
+        assert str(excinfo.value) == expected_msg
+    elif direction == "out":
+        with pytest.raises(ValueError) as excinfo:
+            net.add_edge(dense_layer_1, dense_layer_0)
+        expected_msg = f"Outbound layer {dense_layer_1} not found in network."
+        assert str(excinfo.value) == expected_msg
+
+
+def test_add_invalid_edge():
+    _test_add_invalid_edge("in")
+    _test_add_invalid_edge("out")
diff --git a/tests/neuralnet/test_nn_formulation.py b/tests/neuralnet/test_nn_formulation.py
index 95d47bfb..ad3b2b0f 100644
--- a/tests/neuralnet/test_nn_formulation.py
+++ b/tests/neuralnet/test_nn_formulation.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pyomo.environ as pyo
 import pytest
+from pyomo.contrib.fbbt import interval
 
 from omlt import OmltBlock
 from omlt.neuralnet import (
@@ -9,6 +10,7 @@
     NetworkDefinition,
     ReducedSpaceNNFormulation,
     ReducedSpaceSmoothNNFormulation,
+    ReluPartitionFormulation,
 )
 from omlt.neuralnet.layer import (
     ConvLayer2D,
@@ -18,6 +20,15 @@
     PoolingLayer2D,
     GNNLayer,
 )
+from omlt.neuralnet.layers.full_space import (
+    full_space_maxpool2d_layer,
+    _input_layer_and_block,
+)
+from omlt.neuralnet.layers.partition_based import (
+    partition_based_dense_relu_layer,
+    default_partition_split_func,
+)
+from omlt.neuralnet.layers.reduced_space import reduced_space_dense_layer
 
 
 def two_node_network(activation, input_value):
@@ -249,7 +260,8 @@ def _maxpool_conv_network(inputs):
     net.add_layer(conv_layer_2)
     net.add_edge(conv_layer_1, conv_layer_2)
 
-    # test normal ConvLayer -> MaxPoolLayer structure, with monotonic increasing activation part of ConvLayer
+    # test normal ConvLayer -> MaxPoolLayer structure, with monotonic increasing
+    # activation part of ConvLayer
     maxpool_layer_1 = PoolingLayer2D(
         conv_layer_2.output_size, [1, 1, 2], [2, 2], "max", [3, 2], 1
     )
@@ -263,7 +275,8 @@ def _maxpool_conv_network(inputs):
     net.add_layer(conv_layer_3)
     net.add_edge(maxpool_layer_1, conv_layer_3)
 
-    # test ConvLayer -> MaxPoolLayer when nonlinear activation function is already part of max pooling layer
+    # test ConvLayer -> MaxPoolLayer when nonlinear activation function is
+    # already part of max pooling layer
     # also test index mapping logic in max pooling layers
     maxpool_layer_2_input_size = [1, 2, 1]
     maxpool_layer_2_index_mapper = IndexMapper(
@@ -328,6 +341,470 @@ def test_maxpool_FullSpaceNNFormulation():
     assert abs(pyo.value(m.neural_net_block.outputs[0, 0, 0]) - y[0, 0, 0]) < 1e-6
 
 
+def _test_formulation_initialize_extra_input(network_formulation):
+    """
+    network_formulation can be:
+    'FullSpace',
+    'ReducedSpace'
+    """
+    net, y = two_node_network("linear", -2.0)
+    extra_input = InputLayer([1])
+    net.add_layer(extra_input)
+    with pytest.raises(ValueError) as excinfo:
+        if network_formulation == "FullSpace":
+            FullSpaceNNFormulation(net)
+        elif network_formulation == "ReducedSpace":
+            ReducedSpaceNNFormulation(net)
+    expected_msg = "Multiple input layers are not currently supported."
+    assert str(excinfo.value) == expected_msg
+
+
+def _test_formulation_added_extra_input(network_formulation):
+    """
+    network_formulation can be:
+    'FullSpace',
+    'ReducedSpace'
+    'relu'
+    """
+    net, y = two_node_network("linear", -2.0)
+    extra_input = InputLayer([1])
+    if network_formulation == "FullSpace":
+        formulation = FullSpaceNNFormulation(net)
+    elif network_formulation == "ReducedSpace":
+        formulation = ReducedSpaceNNFormulation(net)
+    elif network_formulation == "relu":
+        formulation = ReluPartitionFormulation(net)
+    net.add_layer(extra_input)
+    with pytest.raises(ValueError) as excinfo:
+        formulation.input_indexes
+    expected_msg = "Multiple input layers are not currently supported."
+    assert str(excinfo.value) == expected_msg
+
+
+def _test_formulation_build_extra_input(network_formulation):
+    """
+    network_formulation can be:
+    'FullSpace',
+    'ReducedSpace'
+    'relu'
+    """
+    net, y = two_node_network("linear", -2.0)
+    extra_input = InputLayer([1])
+    if network_formulation == "FullSpace":
+        formulation = FullSpaceNNFormulation(net)
+    elif network_formulation == "ReducedSpace":
+        formulation = ReducedSpaceNNFormulation(net)
+    elif network_formulation == "relu":
+        formulation = ReluPartitionFormulation(net)
+    net.add_layer(extra_input)
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+    with pytest.raises(ValueError) as excinfo:
+        m.neural_net_block.build_formulation(formulation)
+    expected_msg = "Multiple input layers are not currently supported."
+    assert str(excinfo.value) == expected_msg
+
+
+def _test_formulation_added_extra_output(network_formulation):
+    """
+    network_formulation can be:
+    'FullSpace',
+    'ReducedSpace'
+    'relu'
+    """
+    net, y = two_node_network("linear", -2.0)
+    extra_output = DenseLayer(
+        [1, 2],
+        [1, 2],
+        activation="linear",
+        weights=np.array([[1.0, 0.0], [5.0, 1.0]]),
+        biases=np.array([3.0, 4.0]),
+    )
+    if network_formulation == "FullSpace":
+        formulation = FullSpaceNNFormulation(net)
+    elif network_formulation == "ReducedSpace":
+        formulation = ReducedSpaceNNFormulation(net)
+    elif network_formulation == "relu":
+        formulation = ReluPartitionFormulation(net)
+    net.add_layer(extra_output)
+    net.add_edge(list(net.layers)[-2], extra_output)
+    with pytest.raises(ValueError) as excinfo:
+        formulation.output_indexes
+    expected_msg = "Multiple output layers are not currently supported."
+    assert str(excinfo.value) == expected_msg
+
+
+def _test_formulation_initialize_extra_output(network_formulation):
+    """
+    network_formulation can be:
+    'FullSpace',
+    'ReducedSpace'
+    """
+    net, y = two_node_network("linear", -2.0)
+    extra_output = DenseLayer(
+        [1, 2],
+        [1, 2],
+        activation="linear",
+        weights=np.array([[1.0, 0.0], [5.0, 1.0]]),
+        biases=np.array([3.0, 4.0]),
+    )
+    net.add_layer(extra_output)
+    net.add_edge(list(net.layers)[-2], extra_output)
+    with pytest.raises(ValueError) as excinfo:
+        if network_formulation == "FullSpace":
+            FullSpaceNNFormulation(net)
+        elif network_formulation == "ReducedSpace":
+            ReducedSpaceNNFormulation(net)
+    expected_msg = "Multiple output layers are not currently supported."
+    assert str(excinfo.value) == expected_msg
+
+
+def test_FullSpaceNNFormulation_invalid_network():
+    _test_formulation_initialize_extra_input("FullSpace")
+    _test_formulation_added_extra_input("FullSpace")
+    _test_formulation_build_extra_input("FullSpace")
+    _test_formulation_initialize_extra_output("FullSpace")
+    _test_formulation_added_extra_output("FullSpace")
+
+
+def test_ReducedSpaceNNFormulation_invalid_network():
+    # _test_formulation_initialize_extra_input("ReducedSpace")
+    _test_formulation_added_extra_input("ReducedSpace")
+    _test_formulation_build_extra_input("ReducedSpace")
+    # _test_formulation_initialize_extra_output("ReducedSpace")
+    _test_formulation_added_extra_output("ReducedSpace")
+
+
+def test_ReluPartitionFormulation_invalid_network():
+    _test_formulation_added_extra_input("relu")
+    _test_formulation_build_extra_input("relu")
+    _test_formulation_added_extra_output("relu")
+
+
+def _test_dense_layer_multiple_predecessors(layer_type):
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+    net, y = two_node_network(None, -2.0)
+    extra_input = InputLayer([1])
+    test_layer = list(net.layers)[2]
+    net.add_layer(extra_input)
+    net.add_edge(extra_input, test_layer)
+    with pytest.raises(ValueError) as excinfo:
+        if layer_type == "PartitionBased":
+            partition_based_dense_relu_layer(m, net, m, test_layer, None)
+        elif layer_type == "ReducedSpace":
+            reduced_space_dense_layer(m, net, m, test_layer, None)
+    expected_msg = f"Layer {test_layer} has multiple predecessors."
+    assert str(excinfo.value) == expected_msg
+
+
+def _test_dense_layer_no_predecessors(layer_type):
+    """
+    Layer type can be "ReducedSpace", or "PartitionBased".
+    """
+    m = pyo.ConcreteModel()
+    net = NetworkDefinition(scaled_input_bounds=[(-10.0, 10.0)])
+
+    test_layer = DenseLayer(
+        [1],
+        [1, 2],
+        activation=None,
+        weights=np.array([[1.0, -1.0]]),
+        biases=np.array([1.0, 2.0]),
+    )
+    net.add_layer(test_layer)
+    with pytest.raises(ValueError) as excinfo:
+        if layer_type == "PartitionBased":
+            partition_based_dense_relu_layer(m, net, m, test_layer, None)
+        elif layer_type == "ReducedSpace":
+            reduced_space_dense_layer(m, net, m, test_layer, None)
+    expected_msg = f"Layer {test_layer} is not an input layer, but has no predecessors."
+    assert str(excinfo.value) == expected_msg
+
+
+def test_partition_based_dense_layer_predecessors():
+    _test_dense_layer_multiple_predecessors("PartitionBased")
+    _test_dense_layer_no_predecessors("PartitionBased")
+
+
+def test_reduced_space_dense_layer_predecessors():
+    _test_dense_layer_multiple_predecessors("ReducedSpace")
+    _test_dense_layer_no_predecessors("ReducedSpace")
+
+
+def test_partition_based_unbounded_below():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+    net, y = two_node_network(None, -2.0)
+    test_layer = list(net.layers)[2]
+    prev_layer_id = id(list(net.layers)[1])
+    formulation = ReluPartitionFormulation(net)
+
+    m.neural_net_block.build_formulation(formulation)
+    prev_layer_block = m.neural_net_block.layer[prev_layer_id]
+    prev_layer_block.z.setlb(-interval.inf)
+
+    split_func = lambda w: default_partition_split_func(w, 2)
+
+    with pytest.raises(ValueError) as excinfo:
+        partition_based_dense_relu_layer(
+            m.neural_net_block, net, m.neural_net_block, test_layer, split_func
+        )
+    expected_msg = "Expression is unbounded below."
+    assert str(excinfo.value) == expected_msg
+
+
+def test_partition_based_unbounded_above():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+    net, y = two_node_network(None, -2.0)
+    test_layer = list(net.layers)[2]
+    prev_layer_id = id(list(net.layers)[1])
+    formulation = ReluPartitionFormulation(net)
+
+    m.neural_net_block.build_formulation(formulation)
+    prev_layer_block = m.neural_net_block.layer[prev_layer_id]
+    prev_layer_block.z.setub(interval.inf)
+
+    split_func = lambda w: default_partition_split_func(w, 2)
+
+    with pytest.raises(ValueError) as excinfo:
+        partition_based_dense_relu_layer(
+            m.neural_net_block, net, m.neural_net_block, test_layer, split_func
+        )
+    expected_msg = "Expression is unbounded above."
+    assert str(excinfo.value) == expected_msg
+
+
+def test_partition_based_bias_unbounded_below():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+    net, y = two_node_network(None, -2.0)
+    test_layer = list(net.layers)[2]
+    formulation = ReluPartitionFormulation(net)
+
+    m.neural_net_block.build_formulation(formulation)
+
+    test_layer.biases[0] = -interval.inf
+    split_func = lambda w: default_partition_split_func(w, 2)
+
+    with pytest.raises(ValueError) as excinfo:
+        partition_based_dense_relu_layer(
+            m.neural_net_block, net, m.neural_net_block, test_layer, split_func
+        )
+    expected_msg = "Expression is unbounded below."
+    assert str(excinfo.value) == expected_msg
+
+
+def test_partition_based_bias_unbounded_above():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+    net, y = two_node_network(None, -2.0)
+    test_layer = list(net.layers)[2]
+    formulation = ReluPartitionFormulation(net)
+
+    m.neural_net_block.build_formulation(formulation)
+
+    test_layer.biases[0] = interval.inf
+    split_func = lambda w: default_partition_split_func(w, 2)
+
+    with pytest.raises(ValueError) as excinfo:
+        partition_based_dense_relu_layer(
+            m.neural_net_block, net, m.neural_net_block, test_layer, split_func
+        )
+    expected_msg = "Expression is unbounded above."
+    assert str(excinfo.value) == expected_msg
+
+
+def test_fullspace_internal_extra_input():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+    net, y = two_node_network("linear", -2.0)
+    extra_input = InputLayer([1])
+    test_layer = list(net.layers)[1]
+    formulation = FullSpaceNNFormulation(net)
+    m.neural_net_block.build_formulation(formulation)
+    net.add_layer(extra_input)
+    net.add_edge(extra_input, test_layer)
+    with pytest.raises(ValueError) as excinfo:
+        _input_layer_and_block(m.neural_net_block, net, test_layer)
+    expected_msg = "Multiple input layers are not currently supported."
+    assert str(excinfo.value) == expected_msg
+
+
+def test_conv2d_extra_activation():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+
+    input_size = [1, 8, 6]
+    input_bounds = {}
+    for i in range(input_size[1]):
+        for j in range(input_size[2]):
+            input_bounds[(0, i, j)] = (-10.0, 10.0)
+    net = NetworkDefinition(scaled_input_bounds=input_bounds)
+
+    input_layer = InputLayer(input_size)
+    net.add_layer(input_layer)
+
+    conv_layer_1_kernel = np.array([[[[-3, 0], [1, 5]]]])
+    conv_layer_1 = ConvLayer2D(
+        input_layer.output_size, [1, 4, 5], [2, 1], conv_layer_1_kernel
+    )
+    net.add_layer(conv_layer_1)
+    net.add_edge(input_layer, conv_layer_1)
+
+    # have two consecutive conv layers,
+    # to check that conv layer behaves normally when a non-max pool layer succeeds it
+    conv_layer_2_kernel = np.array([[[[-2, -2], [-2, -2]]]])
+    conv_layer_2 = ConvLayer2D(
+        conv_layer_1.output_size,
+        [1, 3, 4],
+        [1, 1],
+        conv_layer_2_kernel,
+        activation="relu",
+    )
+    net.add_layer(conv_layer_2)
+    net.add_edge(conv_layer_1, conv_layer_2)
+
+    # test normal ConvLayer -> MaxPoolLayer structure, with monotonic
+    # increasing activation part of ConvLayer
+    maxpool_layer_1 = PoolingLayer2D(
+        conv_layer_2.output_size, [1, 1, 2], [2, 2], "max", [3, 2], 1, activation="relu"
+    )
+    net.add_layer(maxpool_layer_1)
+    net.add_edge(conv_layer_2, maxpool_layer_1)
+    with pytest.raises(ValueError) as excinfo:
+        m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
+    expected_msg = """Activation is applied after convolution layer, but the successor max pooling layer PoolingLayer(input_size=[1, 3, 4], output_size=[1, 1, 2], strides=[2, 2], kernel_shape=[3, 2]), pool_func_name=max has an activation function also."""
+    assert str(excinfo.value) == expected_msg
+
+
+def test_maxpool2d_bad_input_activation():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+
+    input_size = [1, 8, 6]
+    input_bounds = {}
+    for i in range(input_size[1]):
+        for j in range(input_size[2]):
+            input_bounds[(0, i, j)] = (-10.0, 10.0)
+    net = NetworkDefinition(scaled_input_bounds=input_bounds)
+
+    input_layer = InputLayer(input_size)
+    net.add_layer(input_layer)
+
+    conv_layer_1_kernel = np.array([[[[-3, 0], [1, 5]]]])
+    conv_layer_1 = ConvLayer2D(
+        input_layer.output_size, [1, 4, 5], [2, 1], conv_layer_1_kernel
+    )
+    net.add_layer(conv_layer_1)
+    net.add_edge(input_layer, conv_layer_1)
+
+    # have two consecutive conv layers,
+    # to check that conv layer behaves normally when a non-max pool layer succeeds it
+    conv_layer_2_kernel = np.array([[[[-2, -2], [-2, -2]]]])
+    conv_layer_2 = ConvLayer2D(
+        conv_layer_1.output_size,
+        [1, 3, 4],
+        [1, 1],
+        conv_layer_2_kernel,
+        activation="relu",
+    )
+    net.add_layer(conv_layer_2)
+    net.add_edge(conv_layer_1, conv_layer_2)
+
+    # test normal ConvLayer -> MaxPoolLayer structure, with monotonic increasing
+    # activation part of ConvLayer
+    maxpool_layer_1 = PoolingLayer2D(
+        conv_layer_2.output_size,
+        [1, 1, 2],
+        [2, 2],
+        "max",
+        [3, 2],
+        1,
+        activation="linear",
+    )
+    net.add_layer(maxpool_layer_1)
+    net.add_edge(conv_layer_2, maxpool_layer_1)
+
+    m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
+
+    conv_layer_2.activation = "relu"
+
+    with pytest.raises(ValueError) as excinfo:
+        full_space_maxpool2d_layer(
+            m.neural_net_block, net, m.neural_net_block, maxpool_layer_1
+        )
+    expected_msg = """Non-increasing activation functions on the preceding convolutional layer are not supported."""
+    assert str(excinfo.value) == expected_msg
+
+
+def test_maxpool2d_bad_input_layer():
+    m = pyo.ConcreteModel()
+    m.neural_net_block = OmltBlock()
+
+    input_size = [1, 8, 6]
+    input_bounds = {}
+    for i in range(input_size[1]):
+        for j in range(input_size[2]):
+            input_bounds[(0, i, j)] = (-10.0, 10.0)
+    net = NetworkDefinition(scaled_input_bounds=input_bounds)
+
+    input_layer = InputLayer(input_size)
+    net.add_layer(input_layer)
+
+    conv_layer_1_kernel = np.array([[[[-3, 0], [1, 5]]]])
+    conv_layer_1 = ConvLayer2D(
+        input_layer.output_size, [1, 4, 5], [2, 1], conv_layer_1_kernel
+    )
+    net.add_layer(conv_layer_1)
+    net.add_edge(input_layer, conv_layer_1)
+
+    # have two consecutive conv layers,
+    # to check that conv layer behaves normally when a non-max pool layer succeeds it
+    conv_layer_2_kernel = np.array([[[[-2, -2], [-2, -2]]]])
+    conv_layer_2 = ConvLayer2D(
+        conv_layer_1.output_size,
+        [1, 3, 4],
+        [1, 1],
+        conv_layer_2_kernel,
+        activation="relu",
+    )
+    net.add_layer(conv_layer_2)
+    net.add_edge(conv_layer_1, conv_layer_2)
+
+    # test normal ConvLayer -> MaxPoolLayer structure, with monotonic increasing
+    # activation part of ConvLayer
+    maxpool_layer_1 = PoolingLayer2D(
+        conv_layer_2.output_size,
+        [1, 1, 2],
+        [2, 2],
+        "max",
+        [3, 2],
+        1,
+        activation="linear",
+    )
+    net.add_layer(maxpool_layer_1)
+    net.add_edge(conv_layer_2, maxpool_layer_1)
+
+    maxpool_layer_2 = PoolingLayer2D(
+        maxpool_layer_1.output_size,
+        [1, 1, 2],
+        [2, 2],
+        "max",
+        [3, 2],
+        1,
+        activation="linear",
+    )
+    net.add_layer(maxpool_layer_2)
+    net.add_edge(maxpool_layer_1, maxpool_layer_2)
+
+    with pytest.raises(TypeError) as excinfo:
+        m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
+    expected_msg = "Input layer must be a ConvLayer2D."
+    assert str(excinfo.value) == expected_msg
+
+
 def three_node_graph_neural_network(activation):
     input_size = [6]
     input_bounds = {}
diff --git a/tests/notebooks/test_run_notebooks.py b/tests/notebooks/test_run_notebooks.py
index 85c3d304..8fa78f77 100644
--- a/tests/notebooks/test_run_notebooks.py
+++ b/tests/notebooks/test_run_notebooks.py
@@ -14,10 +14,10 @@
 
 # TODO: These will be replaced with stronger tests using testbook soon
 def _test_run_notebook(folder, notebook_fname, n_cells):
-    # change to notebook directory to allow testing
+    # Change to notebook directory to allow for testing
     cwd = os.getcwd()
     os.chdir(os.path.join(this_file_dir(), "..", "..", "docs", "notebooks", folder))
-    with testbook(notebook_fname, timeout=300, execute=True) as tb:
+    with testbook(notebook_fname, timeout=500, execute=True) as tb:
         assert tb.code_cells_executed == n_cells
     os.chdir(cwd)