diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index 55870dbc..00000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,58 +0,0 @@
----
-name: CI
-
-on:
-  push:
-    branches: ["main","github-actions"]
-  pull_request:
-    branches: ["main"]
-  workflow_dispatch:
-
-jobs:
-  tests:
-    name: "Python ${{ matrix.python-version }}"
-    runs-on: "ubuntu-latest"
-
-    strategy:
-      matrix:
-        # python-version: ["3.7", "3.8", "3.9"]
-        python-version: ["3.8", "3.9", "3.10"]
-
-    steps:
-      - uses: "actions/checkout@v2"
-      - uses: "actions/setup-python@v2"
-      - uses: "s-weigand/setup-conda@v1"
-        with:
-          python-version: "${{ matrix.python-version }}"
-
-      - name: Install solvers
-        run: sudo apt-get install -y glpk-utils coinor-cbc
-
-      - name: "Install dependencies"
-        run: |
-          set -xe
-          python -VV
-          python -m site
-          python -m pip install --upgrade pip setuptools wheel
-          python -m pip install --upgrade coverage[toml] virtualenv tox tox-gh-actions          
-          conda install -c conda-forge ipopt
-          conda install -c conda-forge pyscipopt
-
-      - name: "Run tox targets with lean testing environment for ${{ matrix.python-version }}"
-        run: "tox -re leanenv"
-
-      - name: "Run tox targets for ${{ matrix.python-version }}"
-        run: "tox"
-
-      # - name: "Run tox notebooks targets for ${{ matrix.python-version }}"
-      #   run: |
-      #     shopt -s globstar
-      #     tox -e notebooks docs/**/*.ipynb
-
-      - name: "Convert coverage"
-        run: "python -m coverage xml"
-        
-      - name: "Upload coverage to Codecov"
-        uses: "codecov/codecov-action@v2"
-        with:
-          fail_ci_if_error: true
diff --git a/README.rst b/README.rst
index cb90ed8e..0303ed01 100644
--- a/README.rst
+++ b/README.rst
@@ -55,6 +55,17 @@ When utilizing linear model decision trees, please cite the following paper in a
           doi = {https://doi.org/10.1016/j.compchemeng.2023.108347}
      }
 
+When utilizing graph neural networks, please cite the following paper in addition:
+
+::
+
+     @article{zhang2023,
+          title={Augmenting optimization-based molecular design with graph neural networks},
+          author= {Shiqiang Zhang and Juan S. Campos and Christian Feldmann and Frederik Sandfort and Miriam Mathea and Ruth Misener},
+          journal={arXiv preprint arXiv:2312.03613},
+          year = {2023},
+     }
+
 Documentation
 ==============
 The latest OMLT documentation can be found at the `readthedocs page <https://omlt.readthedocs.io/en/latest/index.html#>`_. Additionally, much of the current functionality is demonstrated using Jupyter notebooks available in the  `notebooks folder <https://github.com/cog-imperial/OMLT/tree/main/docs/notebooks>`_.
@@ -149,11 +160,11 @@ Contributors
 
    * - |jalving|_
      - Jordan Jalving
-     - This work was funded by Sandia National Laboratories, Laboratory Directed Research and Development program
+     - This work was funded by Sandia National Laboratories, Laboratory Directed Research and Development program.
 
    * - |fracek|_
      - Francesco Ceccon
-     - This work was funded by an Engineering & Physical Sciences Research Council Research Fellowship [GrantNumber EP/P016871/1]
+     - This work was funded by an Engineering & Physical Sciences Research Council Research Fellowship [GrantNumber EP/P016871/1].
 
    * - |carldlaird|_
      - Carl D. Laird
@@ -171,6 +182,13 @@ Contributors
      - Bashar L. Ammari
      - This work was funded by Sandia National Laboratories, Laboratory Directed Research and Development program.
 
+   * - |juan-campos|_
+     - Juan S. Campos
+     - This work was funded by an Engineering & Physical Sciences Research Council Research Fellowship [GrantNumber EP/W003317/1].
+
+   * - |zshiqiang|_
+     - Shiqiang Zhang
+     - This work was funded by an Imperial College Hans Rausing PhD Scholarship.
 
 .. _jalving: https://github.com/jalving
 .. |jalving| image:: https://avatars1.githubusercontent.com/u/16785413?s=120&v=4
@@ -195,3 +213,11 @@ Contributors
 .. _bammari: https://github.com/bammari
 .. |bammari| image:: https://avatars.githubusercontent.com/u/96192809?v=4
    :width: 80px
+
+.. _juan-campos: https://github.com/juan-campos
+.. |juan-campos| image:: https://avatars.githubusercontent.com/u/65016230?v=4
+   :width: 80px
+
+.. _zshiqiang: https://github.com/zshiqiang
+.. |zshiqiang| image:: https://avatars.githubusercontent.com/u/91337036?v=4
+   :width: 80px 
diff --git a/docs/notebooks.rst b/docs/notebooks.rst
index 7c5e05b6..f7da92f4 100644
--- a/docs/notebooks.rst
+++ b/docs/notebooks.rst
@@ -24,6 +24,8 @@ The second set of notebooks gives application-specific examples:
 
 * `mnist_example_convolutional.ipynb <https://github.com/cog-imperial/OMLT/blob/main/docs/notebooks/neuralnet/mnist_example_convolutional.ipynb>`_ trains a convolutional neural network on MNIST and uses OMLT to find adversarial examples.
 
+* `graph_neural_network_formulation.ipynb <https://github.com/cog-imperial/OMLT/blob/main/docs/notebooks/graph_neural_network_formulation.ipynb>`_ transforms graph neural networks into OMLT and builds formulation to solve optimization problems.
+
 * `auto-thermal-reformer.ipynb <https://github.com/cog-imperial/OMLT/blob/main/docs/notebooks/neuralnet/auto-thermal-reformer.ipynb>`_ develops a neural network surrogate (using sigmoid activations) with data from a process model built using `IDAES-PSE <https://github.com/IDAES/idaes-pse>`_.
 
 * `auto-thermal-reformer-relu.ipynb <https://github.com/cog-imperial/OMLT/blob/main/docs/notebooks/neuralnet/auto-thermal-reformer-relu.ipynb>`_ develops a neural network surrogate (using ReLU activations) with data from a process model built using `IDAES-PSE <https://github.com/IDAES/idaes-pse>`_.
diff --git a/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb b/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb
new file mode 100644
index 00000000..dd1e74dd
--- /dev/null
+++ b/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb
@@ -0,0 +1,666 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Optimizing over trained graph neural networks\n",
+    "\n",
+    "This notebook explains how OMLT is used to optimize over trained graph neural networks (GNNs).\n",
+    "\n",
+    "**NOTE:** For simplicity, we skip the training process and just use random parameters for GNNs.\n",
+    "\n",
+    "\n",
+    "## Library Setup\n",
+    "\n",
+    "This notebook assumes you have a working PyTorch environment to define a Dense NN. This Dense NN is then formulated in Pyomo using OMLT which therefore requires working Pyomo and OMLT installations.\n",
+    "\n",
+    "The required Python libraries used in this notebook are as follows:\n",
+    "\n",
+    "- `numpy`: used for transformation of parameters\n",
+    "\n",
+    "- `torch`: the machine learning language used for neural networks\n",
+    "\n",
+    "- `torch_geometric`: the machine learning language used for graph neural networks\n",
+    "\n",
+    "- `pyomo`: the algebraic modeling language for Python, it is used to define the optimization model passed to the solver\n",
+    "\n",
+    "- `onnx`: used to express trained neural network models\n",
+    "\n",
+    "- `omlt`: the package this notebook demonstrates. OMLT can formulate machine learning (such as neural networks) within Pyomo\n",
+    "\n",
+    "**NOTE:** This notebook also assumes you have a working MIP solver executable to solve optimization problems in Pyomo. The open-source solver CBC is called by default. \n",
+    "\n",
+    "\n",
+    "## Example 1: Optimizing a GNN with Fixed Graph\n",
+    "\n",
+    "Define a GCN in `torch_geometric` as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch.nn import Linear, ReLU, Sigmoid\n",
+    "from torch_geometric.nn import Sequential, GCNConv\n",
+    "from torch_geometric.nn import global_mean_pool\n",
+    "from omlt.io.torch_geometric import gnn_with_fixed_graph\n",
+    "import pyomo.environ as pyo\n",
+    "from omlt import OmltBlock\n",
+    "\n",
+    "\n",
+    "def GCN_Sequential(activation, pooling):\n",
+    "    torch.manual_seed(123)\n",
+    "    return Sequential(\n",
+    "        \"x, edge_index\",\n",
+    "        [\n",
+    "            (GCNConv(2, 4), \"x, edge_index -> x\"),\n",
+    "            activation(),\n",
+    "            (GCNConv(4, 4), \"x, edge_index -> x\"),\n",
+    "            activation(),\n",
+    "            Linear(4, 4),\n",
+    "            (pooling, \"x, None -> x\"),\n",
+    "            Linear(4, 2),\n",
+    "            activation(),\n",
+    "            Linear(2, 1),\n",
+    "        ],\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This model has two types of `Linear` layers: the first linear layer maps in-features to out-features for each node, the last two linear layers map features after pooling. For illustration purposes, we use `load_torch_geometric_sequential` to show the transformed model in OMLT (this step is not needed for later formulation):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0\tInputLayer(input_size=[6], output_size=[6])\tlinear\n",
+      "1\tGNNLayer(input_size=[6], output_size=[12])\trelu\n",
+      "2\tGNNLayer(input_size=[12], output_size=[12])\trelu\n",
+      "3\tDenseLayer(input_size=[12], output_size=[12])\tlinear\n",
+      "4\tDenseLayer(input_size=[12], output_size=[4])\tlinear\n",
+      "5\tDenseLayer(input_size=[4], output_size=[2])\trelu\n",
+      "6\tDenseLayer(input_size=[2], output_size=[1])\tlinear\n"
+     ]
+    }
+   ],
+   "source": [
+    "from omlt.io.torch_geometric import load_torch_geometric_sequential\n",
+    "\n",
+    "# define a GCN sequential model\n",
+    "nn = GCN_Sequential(ReLU, global_mean_pool)\n",
+    "# number of nodes\n",
+    "N = 3\n",
+    "# adjacency matrix\n",
+    "A = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])\n",
+    "\n",
+    "# load the model into OMLT\n",
+    "net = load_torch_geometric_sequential(nn, N, A)\n",
+    "\n",
+    "for layer_id, layer in enumerate(net.layers):\n",
+    "    print(f\"{layer_id}\\t{layer}\\t{layer.activation}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Two GCN layers are rewritten into two `GNNLayer` in OMLT given $N$ and $A$. The first linear layer is expanded since it maps features of each node. The pooling layer is equivalently transformed into a `DenseLayer`. The last two linear layers are the same as before since features of each node are pooled.\n",
+    "\n",
+    "Besides giving $N$ and $A$, one needs to define bounds for inputs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define a GCN sequential model\n",
+    "nn1 = GCN_Sequential(ReLU, global_mean_pool)\n",
+    "# number of nodes\n",
+    "N = 3\n",
+    "# adjacency matrix\n",
+    "A = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])\n",
+    "\n",
+    "# size of inputs = number of nodes x number of input features\n",
+    "input_size = [6]\n",
+    "# define lower and upper bounds for each input\n",
+    "input_bounds = {}\n",
+    "for i in range(input_size[0]):\n",
+    "    input_bounds[(i)] = (-1.0, 1.0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After having these information, the last step is to create an `OmltBlock` and build formulation in this block using `gnn_with_fixed_graph`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Welcome to the CBC MILP Solver \n",
+      "Version: 2.9.9 \n",
+      "Build Date: Oct 13 2018 \n",
+      "\n",
+      "command line - /rds/general/user/sz421/home/anaconda3/envs/OMLT_test/bin/cbc -printingOptions all -import /var/tmp/pbs.8259409.pbs/tmpp27h4a9g.pyomo.lp -stat=1 -solve -solu /var/tmp/pbs.8259409.pbs/tmpp27h4a9g.pyomo.soln (default strategy 1)\n",
+      "Option for printingOptions changed from normal to all\n",
+      "Presolve 172 (-222) rows, 111 (-75) columns and 608 (-267) elements\n",
+      "Statistics for presolved model\n",
+      "Original problem has 26 integers (26 of which binary)\n",
+      "Presolved problem has 25 integers (25 of which binary)\n",
+      "==== 110 zero objective 2 different\n",
+      "1 variables have objective of -0.0421598\n",
+      "110 variables have objective of 0\n",
+      "==== absolute objective values 2 different\n",
+      "110 variables have objective of 0\n",
+      "1 variables have objective of 0.0421598\n",
+      "==== for integers 25 zero objective 1 different\n",
+      "25 variables have objective of 0\n",
+      "==== for integers absolute objective values 1 different\n",
+      "25 variables have objective of 0\n",
+      "===== end objective counts\n",
+      "\n",
+      "\n",
+      "Problem has 172 rows, 111 columns (1 with objective) and 608 elements\n",
+      "Column breakdown:\n",
+      "0 of type 0.0->inf, 49 of type 0.0->up, 0 of type lo->inf, \n",
+      "37 of type lo->up, 0 of type free, 0 of type fixed, \n",
+      "0 of type -inf->0.0, 0 of type -inf->up, 25 of type 0.0->1.0 \n",
+      "Row breakdown:\n",
+      "8 of type E 0.0, 0 of type E 1.0, 0 of type E -1.0, \n",
+      "5 of type E other, 0 of type G 0.0, 0 of type G 1.0, \n",
+      "0 of type G other, 134 of type L 0.0, 0 of type L 1.0, \n",
+      "25 of type L other, 0 of type Range 0.0->1.0, 0 of type Range other, \n",
+      "0 of type Free \n",
+      "Continuous objective value is 0.315152 - 0.00 seconds\n",
+      "Cgl0003I 0 fixed, 0 tightened bounds, 2 strengthened rows, 0 substitutions\n",
+      "Cgl0004I processed model has 166 rows, 105 columns (25 integer (25 of which binary)) and 670 elements\n",
+      "Cbc0038I Initial state - 5 integers unsatisfied sum - 0.191951\n",
+      "Cbc0038I Pass   1: suminf.    0.00000 (0) obj. 0.317969 iterations 17\n",
+      "Cbc0038I Solution found of 0.317969\n",
+      "Cbc0038I Relaxing continuous gives 0.317969\n",
+      "Cbc0038I Before mini branch and bound, 19 integers at bound fixed and 48 continuous\n",
+      "Cbc0038I Full problem 166 rows 105 columns, reduced to 49 rows 27 columns\n",
+      "Cbc0038I Mini branch and bound did not improve solution (0.01 seconds)\n",
+      "Cbc0038I Round again with cutoff of 0.317791\n",
+      "Cbc0038I Pass   2: suminf.    0.00876 (1) obj. 0.317791 iterations 1\n",
+      "Cbc0038I Pass   3: suminf.    0.18897 (1) obj. 0.317791 iterations 25\n",
+      "Cbc0038I Pass   4: suminf.    0.00876 (1) obj. 0.317791 iterations 45\n",
+      "Cbc0038I Pass   5: suminf.    0.18897 (1) obj. 0.317791 iterations 10\n",
+      "Cbc0038I Pass   6: suminf.    0.00876 (1) obj. 0.317791 iterations 9\n",
+      "Cbc0038I Pass   7: suminf.    0.00876 (1) obj. 0.317791 iterations 20\n",
+      "Cbc0038I Pass   8: suminf.    0.18897 (1) obj. 0.317791 iterations 11\n",
+      "Cbc0038I Pass   9: suminf.    0.00876 (1) obj. 0.317791 iterations 11\n",
+      "Cbc0038I Pass  10: suminf.    0.00876 (1) obj. 0.317791 iterations 4\n",
+      "Cbc0038I Pass  11: suminf.    0.18897 (1) obj. 0.317791 iterations 10\n",
+      "Cbc0038I Pass  12: suminf.    0.00876 (1) obj. 0.317791 iterations 8\n",
+      "Cbc0038I Pass  13: suminf.    0.00876 (1) obj. 0.317791 iterations 6\n",
+      "Cbc0038I Pass  14: suminf.    0.18897 (1) obj. 0.317791 iterations 9\n",
+      "Cbc0038I Pass  15: suminf.    0.00876 (1) obj. 0.317791 iterations 9\n",
+      "Cbc0038I Pass  16: suminf.    0.00876 (1) obj. 0.317791 iterations 6\n",
+      "Cbc0038I Pass  17: suminf.    0.18897 (1) obj. 0.317791 iterations 17\n",
+      "Cbc0038I Pass  18: suminf.    0.00876 (1) obj. 0.317791 iterations 18\n",
+      "Cbc0038I Pass  19: suminf.    0.00876 (1) obj. 0.317791 iterations 8\n",
+      "Cbc0038I Pass  20: suminf.    0.18897 (1) obj. 0.317791 iterations 15\n",
+      "Cbc0038I Pass  21: suminf.    0.00876 (1) obj. 0.317791 iterations 19\n",
+      "Cbc0038I Pass  22: suminf.    0.00876 (1) obj. 0.317791 iterations 25\n",
+      "Cbc0038I Pass  23: suminf.    0.18897 (1) obj. 0.317791 iterations 6\n",
+      "Cbc0038I Pass  24: suminf.    0.00876 (1) obj. 0.317791 iterations 5\n",
+      "Cbc0038I Pass  25: suminf.    0.00876 (1) obj. 0.317791 iterations 12\n",
+      "Cbc0038I Pass  26: suminf.    0.18897 (1) obj. 0.317791 iterations 6\n",
+      "Cbc0038I Pass  27: suminf.    0.00876 (1) obj. 0.317791 iterations 5\n",
+      "Cbc0038I Pass  28: suminf.    0.00876 (1) obj. 0.317791 iterations 13\n",
+      "Cbc0038I Pass  29: suminf.    0.18897 (1) obj. 0.317791 iterations 6\n",
+      "Cbc0038I Pass  30: suminf.    0.00876 (1) obj. 0.317791 iterations 15\n",
+      "Cbc0038I Pass  31: suminf.    0.00876 (1) obj. 0.317791 iterations 6\n",
+      "Cbc0038I No solution found this major pass\n",
+      "Cbc0038I Before mini branch and bound, 1 integers at bound fixed and 46 continuous\n",
+      "Cbc0038I Full problem 166 rows 105 columns, reduced to 48 rows 27 columns\n",
+      "Cbc0038I Mini branch and bound did not improve solution (0.02 seconds)\n",
+      "Cbc0038I After 0.02 seconds - Feasibility pump exiting with objective of 0.317969 - took 0.02 seconds\n",
+      "Cbc0012I Integer solution of 0.31796885 found by feasibility pump after 0 iterations and 0 nodes (0.02 seconds)\n",
+      "Cbc0038I Full problem 166 rows 105 columns, reduced to 49 rows 27 columns\n",
+      "Cbc0031I 6 added rows had average density of 5.5\n",
+      "Cbc0013I At root node, 25 cuts changed objective from 0.31628066 to 0.31796885 in 1 passes\n",
+      "Cbc0014I Cut generator 0 (Probing) - 11 row cuts average 3.0 elements, 1 column cuts (1 active)  in 0.000 seconds - new frequency is 1\n",
+      "Cbc0014I Cut generator 1 (Gomory) - 2 row cuts average 13.0 elements, 0 column cuts (0 active)  in 0.000 seconds - new frequency is 1\n",
+      "Cbc0014I Cut generator 2 (Knapsack) - 0 row cuts average 0.0 elements, 0 column cuts (0 active)  in 0.000 seconds - new frequency is -100\n",
+      "Cbc0014I Cut generator 3 (Clique) - 0 row cuts average 0.0 elements, 0 column cuts (0 active)  in 0.000 seconds - new frequency is -100\n",
+      "Cbc0014I Cut generator 4 (MixedIntegerRounding2) - 4 row cuts average 3.2 elements, 0 column cuts (0 active)  in 0.000 seconds - new frequency is 1\n",
+      "Cbc0014I Cut generator 5 (FlowCover) - 0 row cuts average 0.0 elements, 0 column cuts (0 active)  in 0.000 seconds - new frequency is -100\n",
+      "Cbc0014I Cut generator 6 (TwoMirCuts) - 8 row cuts average 6.8 elements, 0 column cuts (0 active)  in 0.000 seconds - new frequency is 1\n",
+      "Cbc0001I Search completed - best objective 0.3179688539269278, took 31 iterations and 0 nodes (0.02 seconds)\n",
+      "Cbc0035I Maximum depth 0, 0 variables fixed on reduced cost\n",
+      "Cuts at root node changed objective from 0.316281 to 0.317969\n",
+      "Probing was tried 1 times and created 12 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "Gomory was tried 1 times and created 2 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "Knapsack was tried 1 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "Clique was tried 1 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "MixedIntegerRounding2 was tried 1 times and created 4 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "FlowCover was tried 1 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "TwoMirCuts was tried 1 times and created 8 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "\n",
+      "Result - Optimal solution found\n",
+      "\n",
+      "Objective value:                0.31796885\n",
+      "Enumerated nodes:               0\n",
+      "Total iterations:               31\n",
+      "Time (CPU seconds):             0.03\n",
+      "Time (Wallclock seconds):       0.03\n",
+      "\n",
+      "Total time (CPU seconds):       0.03   (Wallclock seconds):       0.03\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create pyomo model\n",
+    "m1 = pyo.ConcreteModel()\n",
+    "\n",
+    "# create an OMLT block for the neural network and build its formulation\n",
+    "m1.nn = OmltBlock()\n",
+    "\n",
+    "# build formulation in block m.nn\n",
+    "gnn_with_fixed_graph(m1.nn, nn1, N, A, scaled_input_bounds=input_bounds)\n",
+    "\n",
+    "# set the objective as the single output of the model\n",
+    "m1.obj = pyo.Objective(expr=m1.nn.outputs[0])\n",
+    "\n",
+    "# solve the optimization problem\n",
+    "status = pyo.SolverFactory(\"cbc\").solve(m1, tee=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can evaluate the solution in original model to verify it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.31796885]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "X = []\n",
+    "edges = []\n",
+    "for u in range(N):\n",
+    "    for v in range(N):\n",
+    "        if u != v and pyo.value(m1.nn.A[u, v]):\n",
+    "            edges.append((u, v))\n",
+    "for i in range(6):\n",
+    "    X.append(pyo.value(m1.nn.inputs[i]))\n",
+    "X = np.array(X).reshape(3, 2)\n",
+    "edges = np.transpose(np.array(edges)).reshape(2, -1)\n",
+    "nn.eval()\n",
+    "print(nn1(torch.tensor(X).float(), torch.tensor(edges)).detach().numpy())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For smooth activation function like Sigmoid, a smooth optimization solvers (such as Ipopt) is needed:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ipopt 3.14.12: \n",
+      "\n",
+      "******************************************************************************\n",
+      "This program contains Ipopt, a library for large-scale nonlinear optimization.\n",
+      " Ipopt is released as open source code under the Eclipse Public License (EPL).\n",
+      "         For more information visit https://github.com/coin-or/Ipopt\n",
+      "******************************************************************************\n",
+      "\n",
+      "This is Ipopt version 3.14.12, running with linear solver MUMPS 5.2.1.\n",
+      "\n",
+      "Number of nonzeros in equality constraint Jacobian...:      395\n",
+      "Number of nonzeros in inequality constraint Jacobian.:      276\n",
+      "Number of nonzeros in Lagrangian Hessian.............:       26\n",
+      "\n",
+      "Total number of variables............................:      148\n",
+      "                     variables with only lower bounds:        0\n",
+      "                variables with lower and upper bounds:      146\n",
+      "                     variables with only upper bounds:        0\n",
+      "Total number of equality constraints.................:      100\n",
+      "Total number of inequality constraints...............:      216\n",
+      "        inequality constraints with only lower bounds:        0\n",
+      "   inequality constraints with lower and upper bounds:        0\n",
+      "        inequality constraints with only upper bounds:      216\n",
+      "\n",
+      "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls\n",
+      "   0  0.0000000e+00 4.73e-01 1.32e-04  -1.0 0.00e+00    -  0.00e+00 0.00e+00   0\n",
+      "   1  7.5562415e-02 3.99e-01 6.07e+01  -1.0 6.41e-01    -  4.88e-02 1.57e-01f  1\n",
+      "   2  1.6790548e-01 3.08e-01 4.25e+01  -1.0 4.05e-01    -  1.81e-01 2.28e-01f  1\n",
+      "   3  2.5794319e-01 2.19e-01 3.18e+01  -1.0 3.13e-01    -  5.10e-01 2.88e-01f  1\n",
+      "   4  3.9100053e-01 8.85e-02 1.45e+01  -1.0 2.23e-01    -  9.76e-01 5.97e-01h  1\n",
+      "   5  4.4307883e-01 3.72e-02 2.75e+01  -1.0 1.41e-01    -  1.00e+00 5.79e-01h  1\n",
+      "   6  4.6540208e-01 1.52e-02 6.33e+01  -1.0 8.16e-02    -  1.00e+00 5.91e-01h  1\n",
+      "   7  4.7445376e-01 6.31e-03 1.55e+02  -1.0 3.67e-02    -  1.00e+00 5.85e-01h  1\n",
+      "   8  4.7820844e-01 2.61e-03 3.74e+02  -1.0 1.47e-02    -  1.00e+00 5.86e-01h  1\n",
+      "   9  4.7976341e-01 1.08e-03 9.04e+02  -1.0 6.36e-03    -  1.00e+00 5.86e-01h  1\n",
+      "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls\n",
+      "  10  4.8040743e-01 4.48e-04 2.18e+03  -1.0 2.53e-03    -  1.00e+00 5.86e-01h  1\n",
+      "  11  4.8067424e-01 1.86e-04 5.26e+03  -1.0 1.09e-03    -  1.00e+00 5.86e-01h  1\n",
+      "  12  4.8078473e-01 7.67e-05 1.27e+04  -1.0 4.34e-04    -  1.00e+00 5.87e-01h  1\n",
+      "  13  4.8083051e-01 3.16e-05 3.04e+04  -1.0 1.87e-04    -  1.00e+00 5.88e-01h  1\n",
+      "  14  4.8084947e-01 1.29e-05 7.21e+04  -1.0 7.40e-05    -  1.00e+00 5.91e-01h  1\n",
+      "  15  4.8085732e-01 5.18e-06 1.67e+05  -1.0 3.15e-05    -  1.00e+00 5.99e-01h  1\n",
+      "  16  4.8086057e-01 1.98e-06 3.65e+05  -1.0 1.21e-05    -  1.00e+00 6.18e-01h  1\n",
+      "  17  4.8086191e-01 6.64e-07 6.79e+05  -1.0 4.84e-06    -  1.00e+00 6.65e-01h  1\n",
+      "  18  4.8086192e-01 6.47e-07 3.49e+06  -1.0 1.54e-06    -  1.00e+00 2.47e-02f  6\n",
+      "  19  4.8086258e-01 1.04e-10 1.00e-06  -1.0 1.52e-06    -  1.00e+00 1.00e+00h  1\n",
+      "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls\n",
+      "  20  4.8086253e-01 1.78e-10 4.52e+02  -8.6 7.22e-05    -  1.00e+00 1.00e+00h  1\n",
+      "  21  4.8001913e-01 2.86e-02 2.80e+02  -8.6 1.17e+00    -  5.24e-01 1.00e+00f  1\n",
+      "  22  4.8001744e-01 1.08e-02 2.14e+01  -8.6 2.16e-01    -  9.00e-01 6.57e-01h  1\n",
+      "  23  4.8001271e-01 1.79e-03 2.28e+01  -8.6 3.03e-01    -  8.31e-01 1.00e+00h  1\n",
+      "  24  4.8000768e-01 1.81e-04 4.39e+01  -8.6 9.74e-02    -  7.32e-01 1.00e+00h  1\n",
+      "  25  4.8000768e-01 1.80e-04 5.02e+01  -8.6 1.67e-02    -  5.11e-01 4.80e-03h  1\n",
+      "  26  4.8000768e-01 1.80e-04 7.68e+01  -8.6 1.72e-02    -  2.92e-01 2.94e-04f  2\n",
+      "  27  4.8000768e-01 1.80e-04 1.18e+02  -8.6 1.73e-02    -  6.38e-01 2.96e-04h  1\n",
+      "  28  4.8000768e-01 1.80e-04 1.25e+02  -8.6 1.76e-02    -  3.09e-01 6.58e-05h  2\n",
+      "  29  4.8000768e-01 1.80e-04 1.41e+02  -8.6 1.76e-02    -  1.00e+00 2.94e-04h  1\n",
+      "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls\n",
+      "  30  4.8000669e-01 5.04e-06 2.97e+00  -8.6 1.77e-02    -  2.99e-01 1.00e+00f  1\n",
+      "  31  4.8000669e-01 5.04e-06 8.32e+01  -8.6 4.66e-05    -  5.89e-01 2.35e-04h  1\n",
+      "  32  4.8000669e-01 5.04e-06 1.14e+02  -8.6 5.90e-05    -  5.27e-01 3.92e-05h  1\n",
+      "  33  4.8000669e-01 5.04e-06 1.25e+02  -8.6 6.02e-05    -  3.88e-01 6.73e-06f  2\n",
+      "  34  4.8000669e-01 5.04e-06 1.25e+02  -8.6 6.06e-05    -  5.49e-02 2.21e-05h  1\n",
+      "  35  4.8000669e-01 5.04e-06 1.27e+02  -8.6 4.61e-04    -  8.33e-02 7.28e-08f  2\n",
+      "  36  4.8000669e-01 5.04e-06 1.34e+02  -8.6 6.09e-05    -  4.80e-01 7.71e-05f  2\n",
+      "  37  4.8000669e-01 5.04e-06 1.35e+02  -8.6 6.11e-05    -  1.75e-01 1.36e-05h  1\n",
+      "  38  4.8000669e-01 5.04e-06 1.36e+02  -8.6 1.27e-04    -  9.83e-02 1.38e-07f  2\n",
+      "  39  4.8000669e-01 5.04e-06 1.37e+02  -8.6 6.12e-05    -  2.54e-01 9.45e-04h  1\n",
+      "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls\n",
+      "  40  4.8000669e-01 4.94e-06 1.35e+02  -8.6 6.12e-05    -  1.83e-01 1.89e-02f  1\n",
+      "  41  4.8000669e-01 4.94e-06 1.36e+02  -8.6 2.85e-04    -  9.97e-02 1.10e-07f  2\n",
+      "  42  4.8000669e-01 4.94e-06 1.37e+02  -8.6 6.00e-05    -  1.72e-01 4.40e-05h  1\n",
+      "  43  4.8000669e-01 4.94e-06 1.37e+02  -8.6 1.41e-04    -  9.04e-02 1.49e-06f  2\n",
+      "  44  4.8000669e-01 4.94e-06 1.38e+02  -8.6 6.01e-05    -  1.71e-01 5.97e-06f  2\n",
+      "  45  4.8000670e-01 2.65e-11 6.17e+01  -8.6 6.01e-05    -  1.56e-01 1.00e+00h  1\n",
+      "  46  4.8000670e-01 2.64e-11 5.65e+01  -8.6 3.45e-06    -  5.27e-02 4.42e-04h  1\n",
+      "  47  4.8000670e-01 2.26e-11 7.42e+01  -8.6 1.45e-07    -  6.61e-01 1.47e-01f  2\n",
+      "  48  4.8000670e-01 2.25e-11 8.82e+01  -8.6 7.27e-06    -  2.11e-01 1.18e-03h  1\n",
+      "  49  4.8000670e-01 2.25e-11 1.30e+02  -8.6 8.86e-06    -  7.86e-01 1.57e-04f  2\n",
+      "iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls\n",
+      "  50  4.8000670e-01 2.25e-11 1.33e+02  -8.6 4.24e-05    -  2.54e-01 1.00e-04h  1\n",
+      "  51  4.8000670e-01 2.25e-11 1.41e+02  -8.6 6.81e-05    -  1.00e+00 2.59e-05f  2\n",
+      "  52  4.8000670e-01 3.10e-11 2.08e+00  -8.6 1.27e-07    -  2.54e-01 1.00e+00h  1\n",
+      "  53  4.8000670e-01 3.19e-09 1.41e+02  -8.6 3.37e-05    -  1.00e+00 4.73e-04h  1\n",
+      "  54  4.8000670e-01 9.74e-11 7.50e-11  -8.6 1.65e-08    -  1.00e+00 1.00e+00f  1\n",
+      "\n",
+      "Number of Iterations....: 54\n",
+      "\n",
+      "                                   (scaled)                 (unscaled)\n",
+      "Objective...............:   4.8000669509937166e-01    4.8000669509937166e-01\n",
+      "Dual infeasibility......:   7.5043113584813605e-11    7.5043113584813605e-11\n",
+      "Constraint violation....:   9.7397756526618195e-11    9.7397756526618195e-11\n",
+      "Variable bound violation:   0.0000000000000000e+00    0.0000000000000000e+00\n",
+      "Complementarity.........:   2.5636037643218892e-09    2.5636037643218892e-09\n",
+      "Overall NLP error.......:   9.7397756526618195e-11    2.5636037643218892e-09\n",
+      "\n",
+      "\n",
+      "Number of objective function evaluations             = 72\n",
+      "Number of objective gradient evaluations             = 55\n",
+      "Number of equality constraint evaluations            = 72\n",
+      "Number of inequality constraint evaluations          = 72\n",
+      "Number of equality constraint Jacobian evaluations   = 55\n",
+      "Number of inequality constraint Jacobian evaluations = 55\n",
+      "Number of Lagrangian Hessian evaluations             = 54\n",
+      "Total seconds in IPOPT                               = 0.125\n",
+      "\n",
+      "EXIT: Optimal Solution Found.\n",
+      "\b"
+     ]
+    }
+   ],
+   "source": [
+    "# define a GCN sequential model\n",
+    "nn2 = GCN_Sequential(Sigmoid, global_mean_pool)\n",
+    "\n",
+    "# create pyomo model\n",
+    "m2 = pyo.ConcreteModel()\n",
+    "\n",
+    "# create an OMLT block for the neural network and build its formulation\n",
+    "m2.nn = OmltBlock()\n",
+    "\n",
+    "# build formulation in block m.nn\n",
+    "gnn_with_fixed_graph(m2.nn, nn2, N, A, scaled_input_bounds=input_bounds)\n",
+    "\n",
+    "# set the objective as the single output of the model\n",
+    "m2.obj = pyo.Objective(expr=m2.nn.outputs[0])\n",
+    "\n",
+    "# solve the optimization problem\n",
+    "status = pyo.SolverFactory(\"ipopt\").solve(m2, tee=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example 2: Optimizing a GNN with Non-Fixed Graph\n",
+    "\n",
+    "Since GCN is not supported when the input graph is not fixed, we define a SAGE in `torch_geometric` as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch.nn import Linear, ReLU\n",
+    "from torch_geometric.nn import Sequential, SAGEConv\n",
+    "from torch_geometric.nn import global_add_pool\n",
+    "from omlt.io.torch_geometric import gnn_with_non_fixed_graph\n",
+    "\n",
+    "import pyomo.environ as pyo\n",
+    "from omlt import OmltBlock\n",
+    "\n",
+    "\n",
+    "def SAGE_Sequential(activation, pooling):\n",
+    "    torch.manual_seed(123)\n",
+    "    return Sequential(\n",
+    "        \"x, edge_index\",\n",
+    "        [\n",
+    "            (SAGEConv(2, 4, aggr=\"sum\"), \"x, edge_index -> x\"),\n",
+    "            activation(),\n",
+    "            (SAGEConv(4, 4, aggr=\"sum\"), \"x, edge_index -> x\"),\n",
+    "            activation(),\n",
+    "            Linear(4, 4),\n",
+    "            (pooling, \"x, None -> x\"),\n",
+    "            Linear(4, 2),\n",
+    "            activation(),\n",
+    "            Linear(2, 1),\n",
+    "        ],\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We follow the same procedure as in Example 1 except that $A$ is no longer needed for `gnn_with_non_fixed_graph`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Welcome to the CBC MILP Solver \n",
+      "Version: 2.9.9 \n",
+      "Build Date: Oct 13 2018 \n",
+      "\n",
+      "command line - /rds/general/user/sz421/home/anaconda3/envs/OMLT_test/bin/cbc -printingOptions all -import /var/tmp/pbs.8259409.pbs/tmp1n22ks_r.pyomo.lp -stat=1 -solve -solu /var/tmp/pbs.8259409.pbs/tmp1n22ks_r.pyomo.soln (default strategy 1)\n",
+      "Option for printingOptions changed from normal to all\n",
+      "Presolve 260 (-137) rows, 141 (-51) columns and 852 (-173) elements\n",
+      "Statistics for presolved model\n",
+      "Original problem has 32 integers (32 of which binary)\n",
+      "Presolved problem has 29 integers (29 of which binary)\n",
+      "==== 139 zero objective 3 different\n",
+      "139 variables have objective of 0\n",
+      "1 variables have objective of 0.203177\n",
+      "1 variables have objective of 0.686721\n",
+      "==== absolute objective values 3 different\n",
+      "139 variables have objective of 0\n",
+      "1 variables have objective of 0.203177\n",
+      "1 variables have objective of 0.686721\n",
+      "==== for integers 29 zero objective 1 different\n",
+      "29 variables have objective of 0\n",
+      "==== for integers absolute objective values 1 different\n",
+      "29 variables have objective of 0\n",
+      "===== end objective counts\n",
+      "\n",
+      "\n",
+      "Problem has 260 rows, 141 columns (2 with objective) and 852 elements\n",
+      "Column breakdown:\n",
+      "0 of type 0.0->inf, 62 of type 0.0->up, 0 of type lo->inf, \n",
+      "50 of type lo->up, 0 of type free, 0 of type fixed, \n",
+      "0 of type -inf->0.0, 0 of type -inf->up, 29 of type 0.0->1.0 \n",
+      "Row breakdown:\n",
+      "0 of type E 0.0, 0 of type E 1.0, 0 of type E -1.0, \n",
+      "26 of type E other, 0 of type G 0.0, 0 of type G 1.0, \n",
+      "0 of type G other, 154 of type L 0.0, 24 of type L 1.0, \n",
+      "56 of type L other, 0 of type Range 0.0->1.0, 0 of type Range other, \n",
+      "0 of type Free \n",
+      "Continuous objective value is 0.107106 - 0.00 seconds\n",
+      "Cgl0003I 0 fixed, 0 tightened bounds, 1 strengthened rows, 0 substitutions\n",
+      "Cgl0004I processed model has 237 rows, 118 columns (29 integer (29 of which binary)) and 969 elements\n",
+      "Cbc0038I Initial state - 17 integers unsatisfied sum - 1.66726\n",
+      "Cbc0038I Pass   1: suminf.    1.01765 (9) obj. 0.107106 iterations 47\n",
+      "Cbc0038I Solution found of 0.107106\n",
+      "Cbc0038I Relaxing continuous gives 0.107106\n",
+      "Cbc0038I Before mini branch and bound, 12 integers at bound fixed and 40 continuous\n",
+      "Cbc0038I Mini branch and bound did not improve solution (0.01 seconds)\n",
+      "Cbc0038I After 0.01 seconds - Feasibility pump exiting with objective of 0.107106 - took 0.00 seconds\n",
+      "Cbc0012I Integer solution of 0.10710584 found by feasibility pump after 0 iterations and 0 nodes (0.01 seconds)\n",
+      "Cbc0001I Search completed - best objective 0.1071058437228203, took 0 iterations and 0 nodes (0.01 seconds)\n",
+      "Cbc0035I Maximum depth 0, 0 variables fixed on reduced cost\n",
+      "Cuts at root node changed objective from 0.107106 to 0.107106\n",
+      "Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "Gomory was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "Knapsack was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "Clique was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "MixedIntegerRounding2 was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "FlowCover was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "TwoMirCuts was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n",
+      "\n",
+      "Result - Optimal solution found\n",
+      "\n",
+      "Objective value:                0.10710584\n",
+      "Enumerated nodes:               0\n",
+      "Total iterations:               0\n",
+      "Time (CPU seconds):             0.01\n",
+      "Time (Wallclock seconds):       0.02\n",
+      "\n",
+      "Total time (CPU seconds):       0.01   (Wallclock seconds):       0.02\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# define a GAGE sequential model\n",
+    "nn3 = SAGE_Sequential(ReLU, global_add_pool)\n",
+    "# number of nodes\n",
+    "N = 3\n",
+    "\n",
+    "# size of inputs = number of nodes x number of input features\n",
+    "input_size = [6]\n",
+    "# define lower and upper bounds for each input\n",
+    "input_bounds = {}\n",
+    "for i in range(input_size[0]):\n",
+    "    input_bounds[(i)] = (-1.0, 1.0)\n",
+    "\n",
+    "# create pyomo model\n",
+    "m3 = pyo.ConcreteModel()\n",
+    "\n",
+    "# create an OMLT block for the neural network and build its formulation\n",
+    "m3.nn = OmltBlock()\n",
+    "\n",
+    "# build formulation in block m.nn\n",
+    "gnn_with_non_fixed_graph(m3.nn, nn3, N, scaled_input_bounds=input_bounds)\n",
+    "\n",
+    "# set the objective as the single output of the model\n",
+    "m3.obj = pyo.Objective(expr=m3.nn.outputs[0])\n",
+    "\n",
+    "# solve the optimization problem\n",
+    "status = pyo.SolverFactory(\"cbc\").solve(m3, tee=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:OMLT_test]",
+   "language": "python",
+   "name": "conda-env-OMLT_test-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/setup.cfg b/setup.cfg
index 5e008d57..7b4233e8 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -88,6 +88,7 @@ testing =
     torchvision
     tqdm
     protobuf==3.20.3
+    torch_geometric
 
 testing_lean =
     setuptools
diff --git a/src/omlt/dependencies.py b/src/omlt/dependencies.py
index a5fc41ba..6330c38f 100644
--- a/src/omlt/dependencies.py
+++ b/src/omlt/dependencies.py
@@ -4,4 +4,7 @@
 onnx, onnx_available = attempt_import("onnx")
 keras, keras_available = attempt_import("tensorflow.keras")
 
+torch, torch_available = attempt_import("torch")
+
+torch_geometric, torch_geometric_available = attempt_import("torch_geometric")
 lineartree, lineartree_available = attempt_import("lineartree")
diff --git a/src/omlt/io/__init__.py b/src/omlt/io/__init__.py
index c290b1bf..13b40c8c 100644
--- a/src/omlt/io/__init__.py
+++ b/src/omlt/io/__init__.py
@@ -1,4 +1,9 @@
-from omlt.dependencies import keras_available, onnx_available
+from omlt.dependencies import (
+    onnx_available,
+    keras_available,
+    torch_available,
+    torch_geometric_available,
+)
 
 if onnx_available:
     from omlt.io.onnx import (
diff --git a/src/omlt/io/torch_geometric/__init__.py b/src/omlt/io/torch_geometric/__init__.py
new file mode 100644
index 00000000..50dc3555
--- /dev/null
+++ b/src/omlt/io/torch_geometric/__init__.py
@@ -0,0 +1,8 @@
+from omlt.io.torch_geometric.torch_geometric_reader import (
+    load_torch_geometric_sequential,
+)
+
+from omlt.io.torch_geometric.build_gnn_formulation import (
+    gnn_with_fixed_graph,
+    gnn_with_non_fixed_graph,
+)
diff --git a/src/omlt/io/torch_geometric/build_gnn_formulation.py b/src/omlt/io/torch_geometric/build_gnn_formulation.py
new file mode 100644
index 00000000..f63af267
--- /dev/null
+++ b/src/omlt/io/torch_geometric/build_gnn_formulation.py
@@ -0,0 +1,148 @@
+import numpy as np
+import pyomo.environ as pyo
+from omlt.neuralnet import FullSpaceNNFormulation
+from omlt.io.torch_geometric import load_torch_geometric_sequential
+
+
+def gnn_with_non_fixed_graph(
+    block,
+    nn,
+    N,
+    scaling_object=None,
+    scaled_input_bounds=None,
+    unscaled_input_bounds=None,
+):
+    """
+    Build formulation for a torch_geometric graph neural network model (built with Sequential).
+    Since the input graph is not fixed, the elements in adjacency matrix are decision variables.
+
+    Parameters
+    ----------
+    block : Block
+        the Pyomo block
+    nn : torch_geometric.model
+        A torch_geometric model that was built with Sequential
+    N : int
+        The number of nodes of input graph
+    scaling_object : instance of ScalingInterface or None
+        Provide an instance of a scaling object to use to scale iputs --> scaled_inputs
+        and scaled_outputs --> outputs. If None, no scaling is performed. See scaling.py.
+    scaled_input_bounds : dict or None
+        A dict that contains the bounds on the scaled variables (the
+        direct inputs to the neural network). If None, then no bounds
+        are specified or they are generated using unscaled bounds.
+    unscaled_input_bounds : dict or None
+        A dict that contains the bounds on the unscaled variables (the
+        direct inputs to the neural network). If specified the scaled_input_bounds
+        dictionary will be generated using the provided scaling object.
+        If None, then no bounds are specified.
+
+    Returns
+    -------
+    OmltBlock (formulated)
+    """
+
+    # build NetworkDefinition for nn
+    net = load_torch_geometric_sequential(
+        nn=nn,
+        N=N,
+        A=None,
+        scaling_object=scaling_object,
+        scaled_input_bounds=scaled_input_bounds,
+        unscaled_input_bounds=unscaled_input_bounds,
+    )
+
+    # define binary variables for adjacency matrix
+    block.A = pyo.Var(
+        pyo.Set(initialize=range(N)),
+        pyo.Set(initialize=range(N)),
+        within=pyo.Binary,
+    )
+    # assume that the self contribution always exists
+    for u in range(N):
+        block.A[u, u].fix(1)
+    # assume the adjacency matrix is always symmetric
+    block.symmetric_adjacency = pyo.ConstraintList()
+    for u in range(N):
+        for v in range(u + 1, N):
+            block.symmetric_adjacency.add((block.A[u, v] == block.A[v, u]))
+
+    # build formulation for GNN
+    block.build_formulation(FullSpaceNNFormulation(net))
+
+    return block
+
+
+def gnn_with_fixed_graph(
+    block,
+    nn,
+    N,
+    A,
+    scaling_object=None,
+    scaled_input_bounds=None,
+    unscaled_input_bounds=None,
+):
+    """
+    Build formulation for a torch_geometric graph neural network model (built with Sequential).
+    Given the adjacency matrix, the input graph structure is fixed.
+
+    Parameters
+    ----------
+    block : Block
+        the Pyomo block
+    nn : torch_geometric.model
+        A torch_geometric model that was built with Sequential
+    N : int
+        The number of nodes of input graph
+    A : matrix-like
+        The adjacency matrix of input graph
+    scaling_object : instance of ScalingInterface or None
+        Provide an instance of a scaling object to use to scale iputs --> scaled_inputs
+        and scaled_outputs --> outputs. If None, no scaling is performed. See scaling.py.
+    scaled_input_bounds : dict or None
+        A dict that contains the bounds on the scaled variables (the
+        direct inputs to the neural network). If None, then no bounds
+        are specified or they are generated using unscaled bounds.
+    unscaled_input_bounds : dict or None
+        A dict that contains the bounds on the unscaled variables (the
+        direct inputs to the neural network). If specified the scaled_input_bounds
+        dictionary will be generated using the provided scaling object.
+        If None, then no bounds are specified.
+
+    Returns
+    -------
+    OmltBlock (formulated)
+    """
+
+    # assume the adjacency matrix is always symmetric
+    assert np.array_equal(A, np.transpose(A))
+
+    # build NetworkDefinition for nn
+    net = load_torch_geometric_sequential(
+        nn=nn,
+        N=N,
+        A=A,
+        scaling_object=scaling_object,
+        scaled_input_bounds=scaled_input_bounds,
+        unscaled_input_bounds=unscaled_input_bounds,
+    )
+
+    # define binary variables for adjacency matrix
+    block.A = pyo.Var(
+        pyo.Set(initialize=range(N)),
+        pyo.Set(initialize=range(N)),
+        within=pyo.Binary,
+    )
+    # fix A using given values
+    for u in range(N):
+        for v in range(N):
+            block.A[u, v].fix(A[u, v])
+
+    # assume that the self contribution always exists
+    for u in range(N):
+        block.A[u, u].fix(1)
+
+    # build formulation for GNN
+    block.build_formulation(FullSpaceNNFormulation(net))
+
+    return block
diff --git a/src/omlt/io/torch_geometric/torch_geometric_reader.py b/src/omlt/io/torch_geometric/torch_geometric_reader.py
new file mode 100644
index 00000000..beee1516
--- /dev/null
+++ b/src/omlt/io/torch_geometric/torch_geometric_reader.py
@@ -0,0 +1,289 @@
+import numpy as np
+
+from omlt.neuralnet.layer import DenseLayer, InputLayer, GNNLayer
+from omlt.neuralnet.network_definition import NetworkDefinition
+import warnings
+
+
+def _compute_gcn_norm(A):
+    """
+    Calculate the norm for a GCN layer
+
+    Parameters
+    ----------
+    A : matrix-like
+        the adjacency matrix.
+    """
+    N = A.shape[0]
+    Ahat = A + np.eye(N)
+    degrees = np.sum(Ahat, axis=0)
+    gcn_norm = np.zeros(A.shape)
+    for u in range(N):
+        for v in range(N):
+            gcn_norm[u, v] = Ahat[u, v] / np.sqrt(degrees[u] * degrees[v])
+    return gcn_norm
+
+
+def _compute_sage_norm(A, aggr):
+    """
+    Calculate the norm for a SAGE layer
+
+    Parameters
+    ----------
+    A : matrix-like
+        the adjacency matrix.
+    aggr : str
+        the aggregation function, "sum" (default) or "mean"
+    """
+    N = A.shape[0]
+    # sum aggregation
+    sage_norm = A + np.eye(N)
+    # mean aggregation
+    if aggr == "mean":
+        degrees = np.sum(A, axis=0)
+        for u in range(N):
+            for v in range(N):
+                if u != v and degrees[u] > 0:
+                    sage_norm[u, v] = sage_norm[u, v] / degrees[u]
+    return sage_norm
+
+
+def _process_gnn_parameters(gnn_weights_uv, gnn_weights_vv, gnn_biases, gnn_norm):
+    """
+    Construct the weights and biases for the GNNLayer class
+
+    Parameters
+    ----------
+    gnn_weights_uv : matrix-like
+        the weights between two different nodes, shape: (out_channels, in_channels).
+    gnn_weights_vv : matrix-like
+        the weights between the same node, shape: (out_channels, in_channels).
+    gnn_biases : array-like
+        the biases, shape: (out_channels, )
+    gnn_norm : matrix-like
+        the norm for the GNN layer, shape: (N, N)
+
+    Returns
+    -------
+    weights : matrix-like
+        the weights for the GNNLayer class, shape: (N * in_channels, N * out_channels)
+    biases: array-like
+        the biases for the GNNLayer class, shape: (N * out_channels, )
+    """
+    out_channels, in_channels = gnn_weights_uv.shape
+    N = gnn_norm.shape[0]
+    weights = np.zeros((N * in_channels, N * out_channels), dtype=gnn_weights_uv.dtype)
+    biases = np.zeros(N * out_channels, dtype=gnn_biases.dtype)
+    for output_index in range(N * out_channels):
+        biases[output_index] = gnn_biases[output_index % out_channels]
+        for input_index in range(N * in_channels):
+            input_node_index = input_index // in_channels
+            output_node_index = output_index // out_channels
+            if input_node_index != output_node_index:
+                weights[input_index, output_index] = (
+                    gnn_norm[output_node_index, input_node_index]
+                    * gnn_weights_uv[
+                        output_index % out_channels, input_index % in_channels
+                    ]
+                )
+            else:
+                weights[input_index, output_index] = (
+                    gnn_norm[output_node_index, input_node_index]
+                    * gnn_weights_vv[
+                        output_index % out_channels, input_index % in_channels
+                    ]
+                )
+    return weights, biases
+
+
+_LAYER_OP_TYPES_FIXED_GRAPH = ["Linear", "GCNConv", "SAGEConv"]
+_LAYER_OP_TYPES_NON_FIXED_GRAPH = ["Linear", "SAGEConv"]
+_ACTIVATION_OP_TYPES = ["ReLU", "Sigmoid", "LogSoftmax", "Softplus", "Tanh"]
+_POOLING_OP_TYPES = ["global_mean_pool", "global_add_pool"]
+_AGGREGATION_OP_TYPES = ["sum", "mean"]
+_OP_TYPES = _LAYER_OP_TYPES_FIXED_GRAPH + _ACTIVATION_OP_TYPES + _POOLING_OP_TYPES
+
+
+def load_torch_geometric_sequential(
+    nn,
+    N,
+    A=None,
+    scaling_object=None,
+    scaled_input_bounds=None,
+    unscaled_input_bounds=None,
+):
+    """
+    Load a torch_geometric  graph neural network model (built with Sequential) into
+    an OMLT network definition object. This network definition object
+    can be used in different formulations.
+
+    Parameters
+    ----------
+    nn : torch_geometric.model
+        A torch_geometric model that was built with Sequential
+    N : int
+        The number of nodes of input graph
+    A : matrix-like
+        The adjacency matrix of input graph
+    scaling_object : instance of ScalingInterface or None
+        Provide an instance of a scaling object to use to scale iputs --> scaled_inputs
+        and scaled_outputs --> outputs. If None, no scaling is performed. See scaling.py.
+    scaled_input_bounds : dict or None
+        A dict that contains the bounds on the scaled variables (the
+        direct inputs to the neural network). If None, then no bounds
+        are specified or they are generated using unscaled bounds.
+    unscaled_input_bounds : dict or None
+        A dict that contains the bounds on the unscaled variables (the
+        direct inputs to the neural network). If specified the scaled_input_bounds
+        dictionary will be generated using the provided scaling object.
+        If None, then no bounds are specified.
+
+    Returns
+    -------
+    NetworkDefinition
+    """
+    n_inputs = N * nn[0].in_channels
+
+    net = NetworkDefinition(
+        scaling_object=scaling_object,
+        scaled_input_bounds=scaled_input_bounds,
+        unscaled_input_bounds=unscaled_input_bounds,
+    )
+
+    prev_layer = InputLayer([n_inputs])
+    net.add_layer(prev_layer)
+
+    operations = []
+    for l in nn:
+        op_name = None
+        if l.__class__.__name__ == "function":
+            op_name = l.__name__
+        else:
+            op_name = l.__class__.__name__
+
+        if op_name not in _OP_TYPES:
+            raise ValueError("this operation is not supported")
+        operations.append(op_name)
+
+    if A is None:
+        # If A is None, then the graph is not fixed.
+        # Only layers in _LAYER_OP_TYPES_NON_FIXED_GRAPH are supported.
+        # Only "sum" aggregation is supported.
+        # Since all weights and biases are possibly needed, A is set to correspond to a complete graph.
+        for index, l in enumerate(nn):
+            if (
+                operations[index]
+                in ["Linear"] + _ACTIVATION_OP_TYPES + _POOLING_OP_TYPES
+            ):
+                # nonlinear activation results in a MINLP
+                if operations[index] in ["Sigmoid", "LogSoftmax", "Softplus", "Tanh"]:
+                    warnings.warn(
+                        "nonlinear activation results in a MINLP", stacklevel=2
+                    )
+                # Linear layers, all activation functions, and all pooling functions are still supported.
+                continue
+            if operations[index] not in _LAYER_OP_TYPES_NON_FIXED_GRAPH:
+                raise ValueError(
+                    "this layer is not supported when the graph is not fixed"
+                )
+            elif l.aggr != "sum":
+                raise ValueError(
+                    "this aggregation is not supported when the graph is not fixed"
+                )
+
+        A = np.ones((N, N)) - np.eye(N)
+
+    for index, l in enumerate(nn):
+        if operations[index] in _ACTIVATION_OP_TYPES:
+            # Skip activation layers since they are already handled in last layer
+            continue
+
+        activation = None
+        if index + 1 < len(nn) and operations[index + 1] in _ACTIVATION_OP_TYPES:
+            # Check if this layer has an activation function
+            activation = operations[index + 1].lower()
+
+        if operations[index] == "Linear":
+            gnn_weights = l.weight.detach().numpy()
+            gnn_biases = l.bias.detach().numpy()
+            # A linear layer is either applied on each node's features (i.e., prev_layer.output_size[-1] = N * gnn_weights.shape[1])
+            # or the features after pooling (i.e., prev_layer.output_size[-1] = gnn_weights.shape[1])
+            gnn_norm = np.eye(prev_layer.output_size[-1] // gnn_weights.shape[1])
+            weights, biases = _process_gnn_parameters(
+                gnn_weights, gnn_weights, gnn_biases, gnn_norm
+            )
+            n_layer_inputs, n_layer_outputs = weights.shape
+            curr_layer = DenseLayer(
+                [n_layer_inputs],
+                [n_layer_outputs],
+                activation=activation,
+                weights=weights,
+                biases=biases,
+            )
+        elif operations[index] == "GCNConv":
+            assert l.improved == False
+            assert l.cached == False
+            assert l.add_self_loops == True
+            assert l.normalize == True
+            gnn_weights = l.lin.weight.detach().numpy()
+            gnn_biases = l.bias.detach().numpy()
+            gnn_norm = _compute_gcn_norm(A)
+            weights, biases = _process_gnn_parameters(
+                gnn_weights, gnn_weights, gnn_biases, gnn_norm
+            )
+            n_layer_inputs, n_layer_outputs = weights.shape
+            curr_layer = GNNLayer(
+                [n_layer_inputs],
+                [n_layer_outputs],
+                activation=activation,
+                weights=weights,
+                biases=biases,
+                N=N,
+            )
+        elif operations[index] == "SAGEConv":
+            assert l.normalize == False
+            assert l.project == False
+            assert l.aggr in _AGGREGATION_OP_TYPES
+            gnn_weights_uv = l.lin_l.weight.detach().numpy()
+            gnn_biases = l.lin_l.bias.detach().numpy()
+            gnn_weights_vv = np.zeros(shape=gnn_weights_uv.shape)
+            if l.root_weight:
+                gnn_weights_vv = l.lin_r.weight.detach().numpy()
+            gnn_norm = _compute_sage_norm(A, l.aggr)
+            weights, biases = _process_gnn_parameters(
+                gnn_weights_uv, gnn_weights_vv, gnn_biases, gnn_norm
+            )
+            n_layer_inputs, n_layer_outputs = weights.shape
+            curr_layer = GNNLayer(
+                [n_layer_inputs],
+                [n_layer_outputs],
+                activation=activation,
+                weights=weights,
+                biases=biases,
+                N=N,
+            )
+        elif operations[index] in _POOLING_OP_TYPES:
+            # Both mean and add pooling layers can be transformed into a DenseLayer
+            n_layer_inputs = prev_layer.output_size[-1]
+            n_layer_outputs = prev_layer.output_size[-1] // N
+            weights = np.zeros((n_layer_inputs, n_layer_outputs))
+            biases = np.zeros(n_layer_outputs)
+            for input_index in range(n_layer_inputs):
+                for output_index in range(n_layer_outputs):
+                    if input_index % n_layer_outputs == output_index:
+                        # mean pooling
+                        if operations[index] == "global_mean_pool":
+                            weights[input_index, output_index] = 1.0 / N
+                        # add pooling
+                        else:
+                            weights[input_index, output_index] = 1.0
+            curr_layer = DenseLayer(
+                [n_layer_inputs],
+                [n_layer_outputs],
+                weights=weights,
+                biases=biases,
+            )
+        net.add_layer(curr_layer)
+        net.add_edge(prev_layer, curr_layer)
+        prev_layer = curr_layer
+    return net
diff --git a/src/omlt/neuralnet/layer.py b/src/omlt/neuralnet/layer.py
index f5df49b6..fd1b3234 100644
--- a/src/omlt/neuralnet/layer.py
+++ b/src/omlt/neuralnet/layer.py
@@ -236,6 +236,159 @@ def _eval(self, x):
         return y
 
 
+class GNNLayer(DenseLayer):
+    r"""
+    We additionally introduce the following notations to describe the gnn layer:
+
+    .. math::
+
+        \begin{align*}
+            N       &:= \text{the number of node in the graph}\\
+            u       &:= \text{the node index of $x_i$, $u=\lfloor iN/F_{in}\rfloor$}\\
+            v       &:= \text{the node index of $y_j$, $v=\lfloor jN/F_{out}\rfloor$}\\
+            A_{u,v} &:= \text{the edge between node $u$ and $v$}\\
+        \end{align*}
+
+    The gnn layer is defined by:
+
+    .. math::
+
+        \begin{align*}
+            y_j = \sigma \left(\sum\limits_{i=0}^{F_{in}-1}A_{u,v}w_{ij}x_i+b_j\right), && \forall 0\le j<F_{out}, 
+        \end{align*}
+
+
+    For example, given a GraphSAGE layer with sum aggregation:
+
+    .. math::
+
+        \begin{align*}
+            \mathbf{y_v} =\sigma\left(\mathbf{w_1^T}\mathbf{x_v}+\mathbf{w_2}^T\sum\limits_{u\in\mathcal N(v)}\mathbf{x_u}+\mathbf{b}\right)
+        \end{align*}
+
+    If the graph structure is fixed, assume that it is a line graph with :math:`N=3` nodes, i.e., the adjacency matrix :math:`A=\begin{pmatrix}1 & 1 & 0\\1 & 1 & 1\\ 0 & 1 & 1\end{pmatrix}`. Then the corresponding GNN layer is defined with parameters:
+
+    .. math::
+
+        \begin{align*}
+            \mathbf{W}=\begin{pmatrix}
+                \mathbf{w_1} & \mathbf{w_2} & \mathbf{0} \\
+                \mathbf{w_2} & \mathbf{w_1} & \mathbf{w_2} \\
+                \mathbf{0} & \mathbf{w_2} & \mathbf{w_1} \\
+            \end{pmatrix},
+            \mathbf{B}=\begin{pmatrix}
+            \mathbf{b}\\\mathbf{b}\\\mathbf{b}
+            \end{pmatrix}
+        \end{align*}
+
+    Otherwise, if the input graph structure is not fixed, all weights and biases should be provided. In this case, the GNN layer is defined with parameters:
+
+    .. math::
+
+        \begin{align*}
+            \mathbf{W}=\begin{pmatrix}
+                \mathbf{w_1} & \mathbf{w_2} & \mathbf{w_2} \\
+                \mathbf{w_2} & \mathbf{w_1} & \mathbf{w_2} \\
+                \mathbf{w_2} & \mathbf{w_2} & \mathbf{w_1} \\
+            \end{pmatrix},
+            \mathbf{B}=\begin{pmatrix}
+            \mathbf{b}\\\mathbf{b}\\\mathbf{b}
+            \end{pmatrix}
+        \end{align*}
+
+    In this case, all elements :math:`A_{u,v},u\neq v` are binary variables.
+
+    Parameters
+    ----------
+    input_size : tuple
+        the size of the input.
+    output_size : tuple
+        the size of the output.
+    weight : matrix-like
+        the weight matrix.
+    biases : array-like
+        the biases.
+    N : int
+        number of nodes in the graph
+    activation : str or None
+        activation function name
+    input_index_mapper : IndexMapper or None
+        map indexes from this layer index to the input layer index size
+    """
+
+    def __init__(
+        self,
+        input_size,
+        output_size,
+        weights,
+        biases,
+        N,
+        *,
+        activation=None,
+        input_index_mapper=None,
+    ):
+        super().__init__(
+            input_size,
+            output_size,
+            weights=weights,
+            biases=biases,
+            activation=activation,
+            input_index_mapper=input_index_mapper,
+        )
+        if input_size[-1] % N != 0:
+            raise ValueError(
+                "Input size must equal to the number of nodes multiplied by the number of input node features"
+            )
+        if output_size[-1] % N != 0:
+            raise ValueError(
+                "Output size must equal to the number of nodes multiplied by the number of output node features"
+            )
+        self.__N = N
+        self.__gnn_input_size = input_size[-1] // N
+        self.__gnn_output_size = output_size[-1] // N
+
+    @property
+    def N(self):
+        """Return the number of nodes in the graphs"""
+        return self.__N
+
+    @property
+    def gnn_input_size(self):
+        """Return the size of the input tensor in original GNN"""
+        return self.__gnn_input_size
+
+    @property
+    def gnn_output_size(self):
+        """Return the size of the output tensor in original GNN"""
+        return self.__gnn_output_size
+
+    def __str__(self):
+        return f"GNNLayer(input_size={self.input_size}, output_size={self.output_size})"
+
+    def _eval_with_adjacency(self, x, A):
+        x_reshaped = (
+            np.reshape(x, self.input_index_mapper.output_size)
+            if self.input_index_mapper is not None
+            else x[:]
+        )
+        assert x_reshaped.shape == tuple(self.input_size)
+        y = np.zeros(shape=self.output_size)
+        for output_index in self.output_indexes:
+            for input_index in self.input_indexes:
+                if input_index[:-1] == output_index[:-1]:
+                    y[output_index] += (
+                        x_reshaped[input_index]
+                        * self.weights[input_index[-1], output_index[-1]]
+                        * A[
+                            input_index[-1] // self.gnn_input_size,
+                            output_index[-1] // self.gnn_output_size,
+                        ]
+                    )
+            y[output_index] += self.biases[output_index[-1]]
+
+        return y
+
+
 class Layer2D(Layer):
     """
     Abstract two-dimensional layer that downsamples values in a kernel to a single value.
diff --git a/src/omlt/neuralnet/layers/full_space.py b/src/omlt/neuralnet/layers/full_space.py
index 122c8eeb..8970bc69 100644
--- a/src/omlt/neuralnet/layers/full_space.py
+++ b/src/omlt/neuralnet/layers/full_space.py
@@ -37,6 +37,150 @@ def dense_layer(b, *output_index):
         return layer_block.zhat[output_index] == expr
 
 
+def full_space_gnn_layer(net_block, net, layer_block, layer):
+    r"""
+    We additionally introduce the following notations to describe the gnn layer:
+
+    .. math::
+
+        \begin{align*}
+            N       &:= \text{the number of node in the graph}\\
+            u       &:= \text{the node index of $x_i$, $u=\lfloor iN/F_{in}\rfloor$}\\
+            v       &:= \text{the node index of $y_j$, $v=\lfloor jN/F_{out}\rfloor$}\\
+            A_{u,v} &:= \text{the edge between node $u$ and $v$}\\
+            l_i     &:= \text{the lower bound of $x_i$}\\
+            u_i     &:= \text{the upper bound of $x_i$}\\
+        \end{align*}
+
+    Add full-space formulation of the gnn layer to the block:
+
+    .. math::
+
+        \begin{align*}
+            y_j &= \sum_{i=0}^{F_{in}-1} w_{ij} \bar x_{i,v} + b_j,  && \forall 0\le j<F_{out} \\
+            \bar x_{i,v} &= A_{u,v} x_{i}, && \forall 0\le i<F_{in},~ 0\le v<N
+        \end{align*}
+
+    The big-M formulation for :math:`\bar x_{i,v}` is given by:
+
+    .. math::
+
+        \begin{align*}
+            x_i-u_i(1-A_{u,v})\le &~ \bar x_{i,v} \le x_i-l_i(1-A_{u,v})\\
+            l_iA_{u,v}\le & ~\bar x_{i,v}\le u_iA_{u,v}\\
+        \end{align*}
+
+    The bounds of :math:`\bar x_{i,v}` is defined by:
+
+    .. math::
+
+        \begin{align*}
+            \left[\bar l_{i,v},\bar u_{i,v}\right]=
+            \begin{cases}
+                [0,0], & \text{$A_{u,v}$ is fixed to $0$}\\
+                [l_i,u_i], & \text{$A_{u,v}$ is fixed to $1$}\\
+                [\min(0,l_i),\max(0,u_i)], & \text{$A_{u,v}$ is not fixed}
+            \end{cases}
+        \end{align*}
+    """
+
+    input_layer, input_layer_block = _input_layer_and_block(net_block, net, layer)
+
+    input_layer_block.zbar = pyo.Var(
+        pyo.Set(initialize=layer.input_indexes),
+        pyo.Set(initialize=range(layer.N)),
+        initialize=0,
+    )
+    input_layer_block._zbar_lower_bound_z_big_m = pyo.Constraint(
+        pyo.Set(initialize=layer.input_indexes),
+        pyo.Set(initialize=range(layer.N)),
+    )
+    input_layer_block._zbar_upper_bound_z_big_m = pyo.Constraint(
+        pyo.Set(initialize=layer.input_indexes),
+        pyo.Set(initialize=range(layer.N)),
+    )
+    input_layer_block._zbar_lower_bound_big_m = pyo.Constraint(
+        pyo.Set(initialize=layer.input_indexes),
+        pyo.Set(initialize=range(layer.N)),
+    )
+    input_layer_block._zbar_upper_bound_big_m = pyo.Constraint(
+        pyo.Set(initialize=layer.input_indexes),
+        pyo.Set(initialize=range(layer.N)),
+    )
+
+    for local_index, input_index in layer.input_indexes_with_input_layer_indexes:
+        lb, ub = input_layer_block.z[input_index].bounds
+
+        input_node_index = local_index[-1] // layer.gnn_input_size
+
+        for output_node_index in range(layer.N):
+            # this edge is not fixed
+            if not net_block.A[input_node_index, output_node_index].fixed:
+                input_layer_block.zbar[input_index, output_node_index].setlb(min(0, lb))
+                input_layer_block.zbar[input_index, output_node_index].setub(max(0, ub))
+            # this edge is fixed to be 1
+            elif pyo.value(net_block.A[input_node_index, output_node_index]) == 1:
+                input_layer_block.zbar[input_index, output_node_index].setlb(lb)
+                input_layer_block.zbar[input_index, output_node_index].setub(ub)
+            # this edge is fixed to be 0
+            else:
+                input_layer_block.zbar[input_index, output_node_index].setlb(0)
+                input_layer_block.zbar[input_index, output_node_index].setub(0)
+
+            input_layer_block._zbar_lower_bound_z_big_m[
+                local_index, output_node_index
+            ] = input_layer_block.zbar[
+                local_index, output_node_index
+            ] >= input_layer_block.z[
+                input_index
+            ] - ub * (
+                1.0 - net_block.A[input_node_index, output_node_index]
+            )
+
+            input_layer_block._zbar_upper_bound_z_big_m[
+                local_index, output_node_index
+            ] = input_layer_block.zbar[
+                local_index, output_node_index
+            ] <= input_layer_block.z[
+                input_index
+            ] - lb * (
+                1.0 - net_block.A[input_node_index, output_node_index]
+            )
+
+            input_layer_block._zbar_lower_bound_big_m[
+                local_index, output_node_index
+            ] = (
+                input_layer_block.zbar[local_index, output_node_index]
+                >= lb * net_block.A[input_node_index, output_node_index]
+            )
+
+            input_layer_block._zbar_upper_bound_big_m[
+                local_index, output_node_index
+            ] = (
+                input_layer_block.zbar[local_index, output_node_index]
+                <= ub * net_block.A[input_node_index, output_node_index]
+            )
+
+    @layer_block.Constraint(layer.output_indexes)
+    def gnn_layer(b, *output_index):
+        # gnn layers multiply only the last dimension of
+        # their inputs
+        expr = 0.0
+        for local_index in layer.input_indexes:
+            w = layer.weights[local_index[-1], output_index[-1]]
+            output_node_index = output_index[-1] // layer.gnn_output_size
+            expr += input_layer_block.zbar[local_index, output_node_index] * w
+        # move this at the end to avoid numpy/pyomo var bug
+        output_node_index = output_index[-1] // layer.gnn_output_size
+        expr += layer.biases[output_index[-1]]
+
+        lb, ub = compute_bounds_on_expr(expr)
+        layer_block.zhat[output_index].setlb(lb)
+        layer_block.zhat[output_index].setub(ub)
+
+        return layer_block.zhat[output_index] == expr
+
+
 def full_space_conv2d_layer(net_block, net, layer_block, layer):
     r"""
     Add full-space formulation of the 2-D convolutional layer to the block
diff --git a/src/omlt/neuralnet/nn_formulation.py b/src/omlt/neuralnet/nn_formulation.py
index 464498bb..042b14fe 100644
--- a/src/omlt/neuralnet/nn_formulation.py
+++ b/src/omlt/neuralnet/nn_formulation.py
@@ -17,11 +17,18 @@
     tanh_activation_constraint,
     tanh_activation_function,
 )
-from omlt.neuralnet.layer import ConvLayer2D, DenseLayer, InputLayer, PoolingLayer2D
+from omlt.neuralnet.layer import (
+    ConvLayer2D,
+    DenseLayer,
+    InputLayer,
+    PoolingLayer2D,
+    GNNLayer,
+)
 from omlt.neuralnet.layers.full_space import (
     full_space_conv2d_layer,
     full_space_dense_layer,
     full_space_maxpool2d_layer,
+    full_space_gnn_layer,
 )
 from omlt.neuralnet.layers.partition_based import (
     default_partition_split_func,
@@ -39,6 +46,7 @@ def _ignore_input_layer():
     DenseLayer: full_space_dense_layer,
     ConvLayer2D: full_space_conv2d_layer,
     PoolingLayer2D: full_space_maxpool2d_layer,
+    GNNLayer: full_space_gnn_layer,
 }
 
 _DEFAULT_ACTIVATION_CONSTRAINTS = {
diff --git a/tests/io/test_keras_reader.py b/tests/io/test_keras_reader.py
index f5c9668f..d47b0920 100644
--- a/tests/io/test_keras_reader.py
+++ b/tests/io/test_keras_reader.py
@@ -7,7 +7,7 @@
 
 
 @pytest.mark.skipif(
-    not keras_available, reason="Test only valid when keras not available"
+    not keras_available, reason="Test only valid when keras is available"
 )
 def test_keras_reader(datadir):
     nn = keras.models.load_model(datadir.file("keras_linear_131"), compile=False)
diff --git a/tests/io/test_torch_geometric.py b/tests/io/test_torch_geometric.py
new file mode 100644
index 00000000..d80d176b
--- /dev/null
+++ b/tests/io/test_torch_geometric.py
@@ -0,0 +1,225 @@
+import pytest
+import numpy as np
+import pyomo.environ as pyo
+from omlt import OmltBlock
+
+from omlt.dependencies import (
+    torch,
+    torch_available,
+    torch_geometric,
+    torch_geometric_available,
+)
+
+if torch_available and torch_geometric_available:
+    from torch.nn import Linear, ReLU, Sigmoid, Softplus, Tanh
+    from torch_geometric.nn import Sequential, GCNConv, SAGEConv
+    from torch_geometric.nn import global_mean_pool, global_add_pool, global_max_pool
+    from omlt.io.torch_geometric import (
+        load_torch_geometric_sequential,
+        gnn_with_fixed_graph,
+        gnn_with_non_fixed_graph,
+    )
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def GCN_Sequential(activation, pooling):
+    return Sequential(
+        "x, edge_index",
+        [
+            (GCNConv(2, 4), "x, edge_index -> x"),
+            activation(),
+            (GCNConv(4, 4), "x, edge_index -> x"),
+            activation(),
+            Linear(4, 4),
+            (pooling, "x, None -> x"),
+            Linear(4, 2),
+            activation(),
+            Linear(2, 1),
+        ],
+    )
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def SAGE_Sequential(activation, pooling, aggr, root_weight):
+    return Sequential(
+        "x, edge_index",
+        [
+            (SAGEConv(2, 4, aggr=aggr, root_weight=root_weight), "x, edge_index -> x"),
+            activation(),
+            (SAGEConv(4, 4, aggr=aggr, root_weight=root_weight), "x, edge_index -> x"),
+            activation(),
+            Linear(4, 4),
+            (pooling, "x, None -> x"),
+            Linear(4, 2),
+            activation(),
+            Linear(2, 1),
+        ],
+    )
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def _test_torch_geometric_reader(nn, activation, pooling):
+    N = 4
+    A = np.ones((N, N), dtype=int)
+    net = load_torch_geometric_sequential(nn, N, A)
+    layers = list(net.layers)
+    assert len(layers) == 7
+    assert layers[1].weights.shape == (8, 16)
+    assert layers[2].weights.shape == (16, 16)
+    assert layers[3].weights.shape == (16, 16)
+    assert layers[4].weights.shape == (16, 4)
+    assert layers[5].weights.shape == (4, 2)
+    assert layers[6].weights.shape == (2, 1)
+    for layer_id in [1, 2, 5]:
+        if activation == ReLU:
+            assert layers[layer_id].activation == "relu"
+        elif activation == Sigmoid:
+            assert layers[layer_id].activation == "sigmoid"
+        elif activation == Tanh:
+            assert layers[layer_id].activation == "tanh"
+
+    if pooling == global_mean_pool:
+        assert sum(sum(layers[4].weights)) == N
+    elif pooling == global_add_pool:
+        assert sum(sum(layers[4].weights)) == N**2
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def _test_gnn_with_fixed_graph(nn):
+    N = 4
+    F = 2
+
+    input_size = [N * F]
+    input_bounds = {}
+    for i in range(input_size[0]):
+        input_bounds[(i)] = (-1.0, 1.0)
+    m = pyo.ConcreteModel()
+    m.nn = OmltBlock()
+    A = np.eye(N, dtype=int)
+    gnn_with_fixed_graph(m.nn, nn, N, A, scaled_input_bounds=input_bounds)
+    assert m.nvariables() == 282
+    assert m.nconstraints() == 614
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def _test_gnn_with_non_fixed_graph(nn):
+    N = 4
+    F = 2
+
+    input_size = [N * F]
+    input_bounds = {}
+    for i in range(input_size[0]):
+        input_bounds[(i)] = (-1.0, 1.0)
+    m = pyo.ConcreteModel()
+    m.nn = OmltBlock()
+    gnn_with_non_fixed_graph(m.nn, nn, N, scaled_input_bounds=input_bounds)
+    assert m.nvariables() == 282
+    assert m.nconstraints() == 620
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def test_torch_geometric_reader():
+    for activation in [ReLU, Sigmoid, Tanh]:
+        for pooling in [global_mean_pool, global_add_pool]:
+            nn = GCN_Sequential(activation, pooling)
+            _test_torch_geometric_reader(nn, activation, pooling)
+            for aggr in ["sum", "mean"]:
+                for root_weight in [False, True]:
+                    nn = SAGE_Sequential(activation, pooling, aggr, root_weight)
+                    _test_torch_geometric_reader(nn, activation, pooling)
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def test_gnn_with_fixed_graph():
+    for pooling in [global_mean_pool, global_add_pool]:
+        nn = GCN_Sequential(ReLU, pooling)
+        _test_gnn_with_fixed_graph(nn)
+        for aggr in ["sum", "mean"]:
+            for root_weight in [False, True]:
+                nn = SAGE_Sequential(ReLU, pooling, aggr, root_weight)
+                _test_gnn_with_fixed_graph(nn)
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def test_gnn_with_non_fixed_graph():
+    for pooling in [global_mean_pool, global_add_pool]:
+        for aggr in ["sum"]:
+            for root_weight in [False, True]:
+                nn = SAGE_Sequential(ReLU, pooling, aggr, root_weight)
+                _test_gnn_with_non_fixed_graph(nn)
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def _test_gnn_value_error(nn, error_info, error_type="ValueError"):
+    N = 4
+    F = 2
+
+    input_size = [N * F]
+    input_bounds = {}
+    for i in range(input_size[0]):
+        input_bounds[(i)] = (-1.0, 1.0)
+    if error_type == "ValueError":
+        with pytest.raises(ValueError) as excinfo:
+            load_torch_geometric_sequential(
+                nn=nn,
+                N=N,
+                A=None,
+                scaled_input_bounds=input_bounds,
+            )
+        assert str(excinfo.value) == error_info
+    elif error_type == "warns":
+        with pytest.warns() as record:
+            load_torch_geometric_sequential(
+                nn=nn,
+                N=N,
+                A=None,
+                scaled_input_bounds=input_bounds,
+            )
+        assert str(record[0].message) == error_info
+
+
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="Test only valid when torch and torch_geometric are available",
+)
+def test_gnn_value_error():
+    nn = SAGE_Sequential(ReLU, global_max_pool, "mean", True)
+    _test_gnn_value_error(nn, "this operation is not supported")
+
+    nn = SAGE_Sequential(Sigmoid, global_mean_pool, "sum", True)
+    _test_gnn_value_error(nn, "nonlinear activation results in a MINLP", "warns")
+
+    nn = SAGE_Sequential(ReLU, global_mean_pool, "mean", True)
+    _test_gnn_value_error(
+        nn, "this aggregation is not supported when the graph is not fixed"
+    )
+
+    nn = GCN_Sequential(ReLU, global_mean_pool)
+    _test_gnn_value_error(nn, "this layer is not supported when the graph is not fixed")
diff --git a/tests/neuralnet/test_layer.py b/tests/neuralnet/test_layer.py
index 7cb966c0..7b865faf 100644
--- a/tests/neuralnet/test_layer.py
+++ b/tests/neuralnet/test_layer.py
@@ -7,6 +7,7 @@
     IndexMapper,
     InputLayer,
     PoolingLayer2D,
+    GNNLayer,
 )
 
 
@@ -94,3 +95,54 @@ def test_maxpool_layer():
     layer = PoolingLayer2D([1, 4, 4], [1, 2, 2], [2, 2], "max", [3, 3], 1)
     y = layer.eval_single_layer(x)
     assert np.array_equal(y, [[[11, 12], [15, 16]]])
+
+
+def test_gnn_layer_with_input_index_mapper():
+    weights = np.array(
+        [
+            [1, 0, 1, 1, -1, 1, 1, -1, 1],
+            [0, 1, 1, -1, 1, 1, -1, 1, 1],
+            [1, -1, 1, 1, 0, 1, 1, -1, 1],
+            [-1, 1, 1, 0, 1, 1, -1, 1, 1],
+            [1, -1, 1, 1, -1, 1, 1, 0, 1],
+            [-1, 1, 1, -1, 1, 1, 0, 1, 1],
+        ]
+    )
+
+    biases = np.array([-1, 0, 1, -1, 0, 1, -1, 0, 1])
+
+    # input has size [6], but the previous node output is [2, 3]
+    # use mapper to map between the two
+    t = IndexMapper([1, 2, 2, 3], [1, 2, 6])
+    layer = GNNLayer([1, 2, 6], [1, 2, 9], weights, biases, N=3, input_index_mapper=t)
+
+    inputs = np.array([[[[-3, 2, -1], [1, -2, 3]], [[0, 0, 0], [0, 0, 0]]]])
+
+    A1 = np.ones([3, 3], dtype=int)
+    y1 = np.array(
+        [[[-11, 9, 1, -12, 11, 1, -10, 10, 1], [-1, 0, 1, -1, 0, 1, -1, 0, 1]]]
+    )
+    assert np.array_equal(layer._eval_with_adjacency(inputs, A1), y1)
+    assert np.array_equal(layer.eval_single_layer(inputs), y1)
+
+    A2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    y2 = np.array([[[-4, 2, 0, -2, 1, 1, -3, 3, 2], [-1, 0, 1, -1, 0, 1, -1, 0, 1]]])
+    assert np.array_equal(layer._eval_with_adjacency(inputs, A2), y2)
+
+    A3 = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])
+    y3 = np.array([[[-6, 4, 0, -12, 11, 1, -5, 5, 2], [-1, 0, 1, -1, 0, 1, -1, 0, 1]]])
+    assert np.array_equal(layer._eval_with_adjacency(inputs, A3), y3)
+
+    with pytest.raises(ValueError) as excinfo:
+        layer = GNNLayer([5], [9], weights, biases, N=3)
+    assert (
+        str(excinfo.value)
+        == "Input size must equal to the number of nodes multiplied by the number of input node features"
+    )
+
+    with pytest.raises(ValueError) as excinfo:
+        layer = GNNLayer([6], [8], weights, biases, N=3)
+    assert (
+        str(excinfo.value)
+        == "Output size must equal to the number of nodes multiplied by the number of output node features"
+    )
diff --git a/tests/neuralnet/test_nn_formulation.py b/tests/neuralnet/test_nn_formulation.py
index b2f7e811..ad3b2b0f 100644
--- a/tests/neuralnet/test_nn_formulation.py
+++ b/tests/neuralnet/test_nn_formulation.py
@@ -18,6 +18,7 @@
     IndexMapper,
     InputLayer,
     PoolingLayer2D,
+    GNNLayer,
 )
 from omlt.neuralnet.layers.full_space import (
     full_space_maxpool2d_layer,
@@ -802,3 +803,101 @@ def test_maxpool2d_bad_input_layer():
         m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
     expected_msg = "Input layer must be a ConvLayer2D."
     assert str(excinfo.value) == expected_msg
+
+
+def three_node_graph_neural_network(activation):
+    input_size = [6]
+    input_bounds = {}
+    for i in range(input_size[0]):
+        input_bounds[(i)] = (-10.0, 10.0)
+    net = NetworkDefinition(scaled_input_bounds=input_bounds)
+
+    input_layer = InputLayer(input_size)
+    net.add_layer(input_layer)
+
+    gnn_layer = GNNLayer(
+        input_layer.output_size,
+        [9],
+        activation=activation,
+        weights=np.array(
+            [
+                [1, 0, 1, 1, -1, 1, 1, -1, 1],
+                [0, 1, 1, -1, 1, 1, -1, 1, 1],
+                [1, -1, 1, 1, 0, 1, 1, -1, 1],
+                [-1, 1, 1, 0, 1, 1, -1, 1, 1],
+                [1, -1, 1, 1, -1, 1, 1, 0, 1],
+                [-1, 1, 1, -1, 1, 1, 0, 1, 1],
+            ]
+        ),
+        biases=np.array([-1, 0, 1, -1, 0, 1, -1, 0, 1]),
+        N=3,
+    )
+    net.add_layer(gnn_layer)
+    net.add_edge(input_layer, gnn_layer)
+
+    return net
+
+
+def examples_of_graphs(graph_type):
+    # complete graph
+    if graph_type == "complete":
+        A = np.ones([3, 3], dtype=int)
+        y = np.array([-11, 9, 1, -12, 11, 1, -10, 10, 1])
+    # edgeless graph
+    elif graph_type == "edgeless":
+        A = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+        y = np.array([-4, 2, 0, -2, 1, 1, -3, 3, 2])
+    # line graph, i.e., 0-1-2
+    elif graph_type == "line":
+        A = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])
+        y = np.array([-6, 4, 0, -12, 11, 1, -5, 5, 2])
+    return A, y
+
+
+def _test_three_node_graph_neural_network(graph_type):
+    m = pyo.ConcreteModel()
+    m.nn = OmltBlock()
+    inputs = np.array([-3, 2, -1, 1, -2, 3])
+    net = three_node_graph_neural_network("linear")
+
+    m.nn.N = 3
+    m.nn.A = pyo.Var(
+        pyo.Set(initialize=range(m.nn.N)),
+        pyo.Set(initialize=range(m.nn.N)),
+        within=pyo.Binary,
+    )
+
+    m.nn.build_formulation(FullSpaceNNFormulation(net))
+
+    A, y = examples_of_graphs(graph_type)
+    for i in range(m.nn.N):
+        for j in range(m.nn.N):
+            m.nn.A[i, j].fix(A[i, j])
+    for i in range(6):
+        m.nn.inputs[i].fix(inputs[i])
+
+    assert m.nvariables() == 81
+    assert m.nconstraints() == 120
+
+    m.obj = pyo.Objective(expr=0)
+
+    status = pyo.SolverFactory("cbc").solve(m, tee=False)
+
+    for i in range(9):
+        assert abs(pyo.value(m.nn.outputs[i]) - y[i]) < 1e-6
+
+    for i in range(6):
+        for j in range(3):
+            assert (
+                abs(
+                    pyo.value(m.nn.layer[m.nn.layers.at(1)].zbar[i, j])
+                    - pyo.value(m.nn.A[i // 2, j]) * inputs[i]
+                )
+                < 1e-6
+            )
+
+
+def test_three_node_graph_neural_network():
+    graph_types = ["complete", "edgeless", "line"]
+    for graph_type in graph_types:
+        _test_three_node_graph_neural_network(graph_type)
diff --git a/tests/notebooks/test_run_notebooks.py b/tests/notebooks/test_run_notebooks.py
index af792fc3..9b1361c9 100644
--- a/tests/notebooks/test_run_notebooks.py
+++ b/tests/notebooks/test_run_notebooks.py
@@ -4,7 +4,12 @@
 from pyomo.common.fileutils import this_file_dir
 from testbook import testbook
 
-from omlt.dependencies import keras_available, onnx_available
+from omlt.dependencies import (
+    keras_available,
+    onnx_available,
+    torch_available,
+    torch_geometric_available,
+)
 
 
 # TODO: These will be replaced with stronger tests using testbook soon
@@ -54,5 +59,13 @@ def test_neural_network_formulations():
     _test_run_notebook("neuralnet", "neural_network_formulations.ipynb", 21)
 
 
+@pytest.mark.skipif(
+    not (torch_available and torch_geometric_available),
+    reason="torch and torch_geometric needed for this notebook",
+)
+def test_graph_neural_network_formulation():
+    _test_run_notebook("neuralnet", "graph_neural_network_formulation.ipynb", 8)
+
+
 def test_index_handling():
     _test_run_notebook("neuralnet", "index_handling.ipynb", 6)