diff --git a/docs/api_doc/omlt.neuralnet.layers_activations.rst b/docs/api_doc/omlt.neuralnet.layers_activations.rst
index b73ad642..e9c7149b 100644
--- a/docs/api_doc/omlt.neuralnet.layers_activations.rst
+++ b/docs/api_doc/omlt.neuralnet.layers_activations.rst
@@ -6,6 +6,8 @@ Layer and Activation Functions
Layer Functions
----------------
+.. automodule:: omlt.neuralnet.layers.__init__
+
.. automodule:: omlt.neuralnet.layers.full_space
:members:
:undoc-members:
@@ -26,6 +28,8 @@ Layer Functions
Activation Functions
---------------------
+.. automodule:: omlt.neuralnet.activations.__init__
+
.. automodule:: omlt.neuralnet.activations.linear
:members:
:undoc-members:
diff --git a/docs/notebooks.rst b/docs/notebooks.rst
index 4bf3dfd9..7c5e05b6 100644
--- a/docs/notebooks.rst
+++ b/docs/notebooks.rst
@@ -2,7 +2,9 @@ Jupyter Notebooks
===================
OMLT provides Jupyter notebooks to demonstrate its core capabilities. All notebooks can be found on the OMLT
-github `page `_. The notebooks are summarized as follows:
+github `page `_.
+
+The first set of notebooks demonstrates the basic mechanics of OMLT and shows how to use it:
* `build_network.ipynb `_ shows how to manually create a `NetworkDefinition` object. This notebook is helpful for understanding the details of the internal layer structure that OMLT uses to represent neural networks.
@@ -10,6 +12,14 @@ github `page `_.
* `neural_network_formulations.ipynb `_ showcases the different neural network formulations available in OMLT.
+* `index_handling.ipynb `_ shows how to use `IndexMapper` to handle the mappings between indexes.
+
+* `bo_with_trees.ipynb `_ incorporates gradient-boosted trees into a Bayesian optimization loop to optimize the Rosenbrock function.
+
+* `linear_tree_formulations.ipynb `_ showcases the different linear model decision tree formulations available in OMLT.
+
+The second set of notebooks gives application-specific examples:
+
* `mnist_example_dense.ipynb `_ trains a fully dense neural network on MNIST and uses OMLT to find adversarial examples.
* `mnist_example_convolutional.ipynb `_ trains a convolutional neural network on MNIST and uses OMLT to find adversarial examples.
@@ -17,7 +27,4 @@ github `page `_.
* `auto-thermal-reformer.ipynb `_ develops a neural network surrogate (using sigmoid activations) with data from a process model built using `IDAES-PSE `_.
* `auto-thermal-reformer-relu.ipynb `_ develops a neural network surrogate (using ReLU activations) with data from a process model built using `IDAES-PSE `_.
-
-* `bo_with_trees.ipynb `_ incorporates gradient-boosted-trees into a Bayesian optimization loop to optimize the Rosenbrock function.
-
-* `linear_tree_formulations.ipynb `_ showcases the different linear model decision tree formulations available in OMLT.
\ No newline at end of file
+*
diff --git a/docs/notebooks/neuralnet/index_handling.ipynb b/docs/notebooks/neuralnet/index_handling.ipynb
new file mode 100644
index 00000000..36ed4338
--- /dev/null
+++ b/docs/notebooks/neuralnet/index_handling.ipynb
@@ -0,0 +1,257 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Index handling\n",
+ "\n",
+ "Sometimes moving from layer to layer in a neural network involves rearranging the size from the output of the previous layer to the input of the next layer. This notebook demonstrates how to use this functionality in OMLT.\n",
+ "\n",
+ "## Library Setup\n",
+ "\n",
+ "Start by importing the libraries used in this project:\n",
+ "\n",
+ " - `numpy`: a general-purpose numerical library\n",
+ " - `omlt`: the package this notebook demonstates.\n",
+ " \n",
+ "We import the following classes from `omlt`:\n",
+ "\n",
+ " - `NetworkDefinition`: class that contains the nodes in a Neural Network\n",
+ " - `InputLayer`, `DenseLayer`, `PoolingLayer2D`: the three types of layers used in this example\n",
+ " - `IndexMapper`: used to reshape the data between layers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from omlt.neuralnet import NetworkDefinition\n",
+ "from omlt.neuralnet.layer import IndexMapper, InputLayer, DenseLayer, PoolingLayer2D"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Then we define a simple network that consists of a max pooling layer and a dense layer:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0\tInputLayer(input_size=[9], output_size=[9])\n",
+ "1\tPoolingLayer(input_size=[1, 3, 3], output_size=[1, 2, 2], strides=[2, 2], kernel_shape=[2, 2]), pool_func_name=max\n",
+ "2\tDenseLayer(input_size=[4], output_size=[1])\n"
+ ]
+ }
+ ],
+ "source": [
+ "# define bounds for inputs\n",
+ "input_size = [9]\n",
+ "input_bounds = {}\n",
+ "for i in range(input_size[0]):\n",
+ " input_bounds[(i)] = (-10.0, 10.0)\n",
+ "\n",
+ "net = NetworkDefinition(scaled_input_bounds=input_bounds)\n",
+ "\n",
+ "# define the input layer\n",
+ "input_layer = InputLayer(input_size)\n",
+ "\n",
+ "net.add_layer(input_layer)\n",
+ "\n",
+ "# define the pooling layer\n",
+ "input_index_mapper_1 = IndexMapper([9], [1, 3, 3])\n",
+ "maxpooling_layer = PoolingLayer2D(\n",
+ " [1, 3, 3],\n",
+ " [1, 2, 2],\n",
+ " [2, 2],\n",
+ " \"max\",\n",
+ " [2, 2],\n",
+ " 1,\n",
+ " input_index_mapper=input_index_mapper_1,\n",
+ ")\n",
+ "\n",
+ "net.add_layer(maxpooling_layer)\n",
+ "net.add_edge(input_layer, maxpooling_layer)\n",
+ "\n",
+ "# define the dense layer\n",
+ "input_index_mapper_2 = IndexMapper([1, 2, 2], [4])\n",
+ "dense_layer = DenseLayer(\n",
+ " [4],\n",
+ " [1],\n",
+ " activation=\"linear\",\n",
+ " weights=np.ones([4, 1]),\n",
+ " biases=np.zeros(1),\n",
+ " input_index_mapper=input_index_mapper_2,\n",
+ ")\n",
+ "\n",
+ "net.add_layer(dense_layer)\n",
+ "net.add_edge(maxpooling_layer, dense_layer)\n",
+ "\n",
+ "for layer_id, layer in enumerate(net.layers):\n",
+ " print(f\"{layer_id}\\t{layer}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this example, `input_index_mapper_1` maps outputs of `input_layer` (with size [9]) to the inputs of `maxpooling_layer` (with size [1, 3, 3]), `input_index_mapper_2` maps the outputs of `maxpooling_layer` (with size [1, 2, 2]) to the inputs of `dense_layer` (with size [4]). Given an input, we can evaluate each layer to see how `IndexMapper` works: "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "outputs of maxpooling_layer:\n",
+ " [[[5. 6.]\n",
+ " [8. 9.]]]\n",
+ "outputs of dense_layer:\n",
+ " [28.]\n"
+ ]
+ }
+ ],
+ "source": [
+ "x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
+ "y1 = maxpooling_layer.eval_single_layer(x)\n",
+ "print(\"outputs of maxpooling_layer:\\n\", y1)\n",
+ "y2 = dense_layer.eval_single_layer(y1)\n",
+ "print(\"outputs of dense_layer:\\n\", y2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Without `IndexMapper`, the output of `maxpooling_layer` is identical to the input of `dense_layer`. When using `IndexMapper`, using `input_indexes_with_input_layer_indexes` can provide the mapping between indexes. Therefore, there is no need to define variables for the inputs of each layer (except for `input_layer`). As shown in the following, we print both input indexes and output indexes for each layer. Also, we give the mapping between indexes of two adjacent layers, where `local_index` corresponds to the input indexes of the current layer and `input_index` corresponds to the output indexes of previous layer."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "input indexes of input_layer:\n",
+ "[(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,)]\n",
+ "output indexes of input_layer:\n",
+ "[(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,)]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"input indexes of input_layer:\")\n",
+ "print(input_layer.input_indexes)\n",
+ "print(\"output indexes of input_layer:\")\n",
+ "print(input_layer.output_indexes)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "input indexes of maxpooling_layer:\n",
+ "[(0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 2, 0), (0, 2, 1), (0, 2, 2)]\n",
+ "output indexes of maxpooling_layer:\n",
+ "[(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1)]\n",
+ "input_index_mapping_1:\n",
+ "local_index: (0, 0, 0) input_index: (0,)\n",
+ "local_index: (0, 0, 1) input_index: (1,)\n",
+ "local_index: (0, 0, 2) input_index: (2,)\n",
+ "local_index: (0, 1, 0) input_index: (3,)\n",
+ "local_index: (0, 1, 1) input_index: (4,)\n",
+ "local_index: (0, 1, 2) input_index: (5,)\n",
+ "local_index: (0, 2, 0) input_index: (6,)\n",
+ "local_index: (0, 2, 1) input_index: (7,)\n",
+ "local_index: (0, 2, 2) input_index: (8,)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"input indexes of maxpooling_layer:\")\n",
+ "print(maxpooling_layer.input_indexes)\n",
+ "print(\"output indexes of maxpooling_layer:\")\n",
+ "print(maxpooling_layer.output_indexes)\n",
+ "print(\"input_index_mapping_1:\")\n",
+ "for local_index, input_index in maxpooling_layer.input_indexes_with_input_layer_indexes:\n",
+ " print(\"local_index:\", local_index, \"input_index:\", input_index)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "input indexes of dense_layer:\n",
+ "[(0,), (1,), (2,), (3,)]\n",
+ "output indexes of dense_layer:\n",
+ "[(0,)]\n",
+ "input_index_mapping_2:\n",
+ "local_index: (0,) input_index: (0, 0, 0)\n",
+ "local_index: (1,) input_index: (0, 0, 1)\n",
+ "local_index: (2,) input_index: (0, 1, 0)\n",
+ "local_index: (3,) input_index: (0, 1, 1)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"input indexes of dense_layer:\")\n",
+ "print(dense_layer.input_indexes)\n",
+ "print(\"output indexes of dense_layer:\")\n",
+ "print(dense_layer.output_indexes)\n",
+ "print(\"input_index_mapping_2:\")\n",
+ "for local_index, input_index in dense_layer.input_indexes_with_input_layer_indexes:\n",
+ " print(\"local_index:\", local_index, \"input_index:\", input_index)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "OMLT",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.16"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/omlt/neuralnet/__init__.py b/src/omlt/neuralnet/__init__.py
index 9f9c7006..9d8e8cf2 100644
--- a/src/omlt/neuralnet/__init__.py
+++ b/src/omlt/neuralnet/__init__.py
@@ -1,16 +1,19 @@
r"""
-We use the following notation to describe layer and activation functions:
+The basic pipeline in source code of OMLT is:
.. math::
\begin{align*}
- N &:= \text{Set of nodes (i.e. neurons in the neural network)}\\
- M_i &:= \text{Number of inputs to node $i$}\\
- \hat z_i &:= \text{pre-activation value on node $i$}\\
- z_i &:= \text{post-activation value on node $i$}\\
- w_{ij} &:= \text{weight from input $j$ to node $i$}\\
- b_i &:= \text{bias value for node $i$}
+ \mathbf z^{(0)}
+ \xrightarrow[\text{Constraints}]{\text{Layer 1}} \hat{\mathbf z}^{(1)}
+ \xrightarrow[\text{Activations}]{\text{Layer 1}} \mathbf z^{(1)}
+ \xrightarrow[\text{Constraints}]{\text{Layer 2}} \hat{\mathbf z}^{(2)}
+ \xrightarrow[\text{Activations}]{\text{Layer 2}} \mathbf z^{(2)}
+ \xrightarrow[\text{Constraints}]{\text{Layer 3}}\cdots
\end{align*}
+
+where :math:`\mathbf z^{(0)}` is the output of `InputLayer`, :math:`\hat{\mathbf z}^{(l)}` is the pre-activation output of :math:`l`-th layer, :math:`\mathbf z^{(l)}` is the post-activation output of :math:`l`-th layer.
+
"""
from omlt.neuralnet.network_definition import NetworkDefinition
from omlt.neuralnet.nn_formulation import (
diff --git a/src/omlt/neuralnet/activations/__init__.py b/src/omlt/neuralnet/activations/__init__.py
index b2aecb1b..fcae4cc8 100644
--- a/src/omlt/neuralnet/activations/__init__.py
+++ b/src/omlt/neuralnet/activations/__init__.py
@@ -1,3 +1,7 @@
+r"""
+Since all activation functions are element-wised, we only consider how to formulate activation functions for a single neuron, where :math:`x` denotes pre-activation variable, and :math:`y` denotes post-activation variable.
+
+"""
from .linear import linear_activation_constraint, linear_activation_function
from .relu import ComplementarityReLUActivation, bigm_relu_activation_constraint
from .smooth import (
diff --git a/src/omlt/neuralnet/activations/linear.py b/src/omlt/neuralnet/activations/linear.py
index 073c8898..712049c1 100644
--- a/src/omlt/neuralnet/activations/linear.py
+++ b/src/omlt/neuralnet/activations/linear.py
@@ -8,12 +8,12 @@ def linear_activation_constraint(
r"""
Linear activation constraint generator
- Generates the constraints for the linear activation function.
+ Generates the constraints for the linear activation function:
.. math::
\begin{align*}
- z_i &= \hat{z_i} && \forall i \in N
+ y=x
\end{align*}
"""
diff --git a/src/omlt/neuralnet/activations/relu.py b/src/omlt/neuralnet/activations/relu.py
index aeead444..427be19a 100644
--- a/src/omlt/neuralnet/activations/relu.py
+++ b/src/omlt/neuralnet/activations/relu.py
@@ -6,26 +6,37 @@ def bigm_relu_activation_constraint(net_block, net, layer_block, layer):
r"""
Big-M ReLU activation formulation.
- Generates the constraints for the ReLU activation function.
+ Generates the constraints for the ReLU activation function:
.. math::
\begin{align*}
- z_i &= \text{max}(0, \hat{z_i}) && \forall i \in N
+ y=\max(0,x)
\end{align*}
- The Big-M formulation for the i-th node is given by:
+ We additionally introduce the following notations to describe this formulation:
.. math::
\begin{align*}
- z_i &\geq \hat{z_i} \\
- z_i &\leq \hat{z_i} - l(1-\sigma) \\
- z_i &\leq u(\sigma) \\
- \sigma &\in \{0, 1\}
+ \sigma &:= \text{denote if $y=x$, $\sigma\in\{0,1\}$}\\
+ l &:= \text{the lower bound of $x$}\\
+ u &:= \text{the upper bound of $x$}\\
\end{align*}
- where :math:`l` and :math:`u` are, respectively, lower and upper bounds of :math:`\hat{z_i}`.
+ The big-M formulation is given by:
+
+ .. math::
+
+ \begin{align*}
+ y&\ge 0\\
+ y&\ge x\\
+ y&\le x-(1-\sigma)l\\
+ y&\le \sigma u
+ \end{align*}
+
+ The lower bound of :math:`y` is :math:`\max(0,l)`, and the upper bound of :math:`y` is :math:`\max(0,u)`.
+
"""
layer_block.q_relu = pyo.Var(layer.output_indexes, within=pyo.Binary)
@@ -45,7 +56,7 @@ def bigm_relu_activation_constraint(net_block, net, layer_block, layer):
for output_index in layer.output_indexes:
lb, ub = layer_block.zhat[output_index].bounds
layer_block._big_m_lb_relu[output_index] = lb
- layer_block.z[output_index].setlb(0)
+ layer_block.z[output_index].setlb(max(0, lb))
layer_block._big_m_ub_relu[output_index] = ub
layer_block.z[output_index].setub(max(0, ub))
@@ -73,22 +84,32 @@ def bigm_relu_activation_constraint(net_block, net, layer_block, layer):
class ComplementarityReLUActivation:
r"""
- Complementarity-based ReLU activation forumlation.
+ Complementarity-based ReLU activation formulation.
+
+ Generates the constraints for the ReLU activation function:
+
+ .. math::
+
+ \begin{align*}
+ y=\max(0,x)
+ \end{align*}
- Generates the constraints for the ReLU activation function.
+ The complementarity-based formulation is given by:
.. math::
\begin{align*}
- z_i &= \text{max}(0, \hat{z_i}) && \forall i \in N
+ 0\le y \perp (y-x)\ge 0
\end{align*}
- The complementarity-based formulation for the i-th node is given by:
+ which denotes that:
.. math::
\begin{align*}
- 0 &\leq z_i \perp (z-\hat{z_i}) \geq 0
+ y\ge 0\\
+ y(y-x)=0\\
+ y-x\ge 0
\end{align*}
"""
diff --git a/src/omlt/neuralnet/activations/smooth.py b/src/omlt/neuralnet/activations/smooth.py
index 4caf2404..b37ac6c7 100644
--- a/src/omlt/neuralnet/activations/smooth.py
+++ b/src/omlt/neuralnet/activations/smooth.py
@@ -3,12 +3,12 @@
def softplus_activation_function(x):
r"""
- Applies the softplus function.
+ Applies the softplus function:
.. math::
\begin{align*}
- y &= log(exp(\hat x) + 1)
+ y=\log(\exp(x)+1)
\end{align*}
"""
@@ -17,12 +17,12 @@ def softplus_activation_function(x):
def sigmoid_activation_function(x):
r"""
- Applies the sigmoid function.
+ Applies the sigmoid function:
.. math::
\begin{align*}
- y &= \frac{1}{1 + exp(-\hat x)}
+ y=\frac{1}{1+\exp(-x)}
\end{align*}
"""
@@ -31,12 +31,12 @@ def sigmoid_activation_function(x):
def tanh_activation_function(x):
r"""
- Applies the tanh function.
+ Applies the tanh function:
.. math::
\begin{align*}
- y &= tanh(x)
+ y=\tanh(x)
\end{align*}
"""
@@ -45,15 +45,7 @@ def tanh_activation_function(x):
def softplus_activation_constraint(net_block, net, layer_block, layer):
r"""
- Softplus activation constraint generator
-
- Generates the constraints for the softplus activation function.
-
- .. math::
-
- \begin{align*}
- z_i &= log(exp(\hat z_i) + 1) && \forall i \in N
- \end{align*}
+ Softplus activation constraint generator.
"""
return smooth_monotonic_activation_constraint(
@@ -63,15 +55,7 @@ def softplus_activation_constraint(net_block, net, layer_block, layer):
def sigmoid_activation_constraint(net_block, net, layer_block, layer):
r"""
- Sigmoid activation constraint generator
-
- Generates the constraints for the sigmoid activation function.
-
- .. math::
-
- \begin{align*}
- z_i &= \frac{1}{1 + exp(-\hat z_i)} && \forall i \in N
- \end{align*}
+ Sigmoid activation constraint generator.
"""
return smooth_monotonic_activation_constraint(
@@ -81,15 +65,7 @@ def sigmoid_activation_constraint(net_block, net, layer_block, layer):
def tanh_activation_constraint(net_block, net, layer_block, layer):
r"""
- tanh activation constraint generator
-
- Generates the constraints for the tanh activation function.
-
- .. math::
-
- \begin{align*}
- z_i &= tanh(\hat z_i) && \forall i \in N
- \end{align*}
+ tanh activation constraint generator.
"""
return smooth_monotonic_activation_constraint(
@@ -99,15 +75,14 @@ def tanh_activation_constraint(net_block, net, layer_block, layer):
def smooth_monotonic_activation_constraint(net_block, net, layer_block, layer, fcn):
r"""
- Activation constraint generator for a smooth monotonic function
+ Activation constraint generator for a smooth monotonic function.
- Generates the constraints for the activation function fcn if it
- is smooth and monotonic
+ Generates the constraints for the activation function :math:`f` if it is smooth and monotonic:
.. math::
\begin{align*}
- z_i &= fcn(\hat z_i) && \forall i \in N
+ y=f(x)
\end{align*}
"""
diff --git a/src/omlt/neuralnet/layer.py b/src/omlt/neuralnet/layer.py
index d7f7fa89..f5df49b6 100644
--- a/src/omlt/neuralnet/layer.py
+++ b/src/omlt/neuralnet/layer.py
@@ -1,4 +1,21 @@
-"""Neural network layer classes."""
+r"""
+Neural network layer classes.
+
+We use the following notations to define a layer:
+
+.. math::
+
+ \begin{align*}
+ F_{in} &:= \text{number of input features}\\
+ F_{out} &:= \text{number of output features}\\
+ x_i &:= \text{the $i$-th input, $0\le i