diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index e21d4ef1..00000000
--- a/.coveragerc
+++ /dev/null
@@ -1,28 +0,0 @@
-# .coveragerc to control coverage.py
-[run]
-branch = True
-source = optml
-# omit = bad_file.py
-
-[paths]
-source =
-    src/
-    */site-packages/
-
-[report]
-# Regexes for lines to exclude from consideration
-exclude_lines =
-    # Have to re-enable the standard pragma
-    pragma: no cover
-
-    # Don't complain about missing debug-only code:
-    def __repr__
-    if self\.debug
-
-    # Don't complain if tests don't hit defensive assertion code:
-    raise AssertionError
-    raise NotImplementedError
-
-    # Don't complain if non-runnable code isn't run:
-    if 0:
-    if __name__ == .__main__.:
diff --git a/docs/pull_request_template.md b/.github/pull_request_template.md
similarity index 100%
rename from docs/pull_request_template.md
rename to .github/pull_request_template.md
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index 2c92b089..00000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,53 +0,0 @@
----
-name: CI
-
-on:
-  push:
-    branches: ["main","github-actions"]
-  pull_request:
-    branches: ["main"]
-  workflow_dispatch:
-
-jobs:
-  tests:
-    name: "Python ${{ matrix.python-version }}"
-    runs-on: "ubuntu-latest"
-
-    strategy:
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-
-    steps:
-      - uses: "actions/checkout@v2"
-      - uses: "actions/setup-python@v2"
-      - uses: "s-weigand/setup-conda@v1"
-        with:
-          python-version: "${{ matrix.python-version }}"
-
-      - name: Install solvers
-        run: sudo apt-get install -y glpk-utils coinor-cbc
-
-      - name: "Install dependencies"
-        run: |
-          set -xe
-          python -VV
-          python -m site
-          python -m pip install --upgrade pip setuptools wheel
-          python -m pip install --upgrade coverage[toml] virtualenv tox tox-gh-actions          
-          conda install -c conda-forge ipopt
-          conda install -c conda-forge pyscipopt
-
-      - name: "Run tox targets with lean testing environment for ${{ matrix.python-version }}"
-        run: "tox -re leanenv"
-
-      - name: "Run tox targets for ${{ matrix.python-version }}"
-        run: "tox"
-
-      - name: "Convert coverage"
-        run: "python -m coverage xml"
-        
-      - name: "Upload coverage to Codecov"
-        uses: "codecov/codecov-action@v4"
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          fail_ci_if_error: true
diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml
new file mode 100644
index 00000000..20a9e104
--- /dev/null
+++ b/.github/workflows/publish_release.yml
@@ -0,0 +1,23 @@
+name: Publish release
+on:
+  push:
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+'
+jobs:
+  publish-release:
+    runs-on: ubuntu-22.04
+    env:
+      VERSION: ${{ github.ref_name }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+      - run: pip install -e '.[dev]'
+      - run: python -m build
+      - run:
+          twine upload
+          -u __token__
+          -p ${{ secrets.PYPI_API_TOKEN }}
+          dist/*
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 00000000..507f64f5
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,82 @@
+name: Tests
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+jobs:
+  ruff:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+      - run: pip install '.[dev]'
+      - run: ruff check src/ tests/ docs/
+  mypy:
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+      - run: pip install '.[dev]'
+      - run: mypy src/ tests/ docs/
+  ruff-format:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+      - run: pip install '.[dev]'
+      - run: ruff format --check src/ tests/ docs/
+  pytest:
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+      - uses: actions/cache@v3
+        with:
+          path: ~/conda_pkgs_dir
+          key: ${{ runner.os }}-conda-${{ hashFiles('environment.yml') }}
+      - uses: conda-incubator/setup-miniconda@v3
+        with:
+          channel-priority: strict
+          environment-file: environment.yml
+          use-only-tar-bz2: true
+      - run: pip install '.[dev]'
+      - shell: bash -el {0}
+        run: pytest
+      - run: python -m coverage xml
+      - uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          fail_ci_if_error: true
+  doctest:
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+      - run: pip install '.[dev]'
+      - run: make -C docs doctest
diff --git a/.gitignore b/.gitignore
index 1b77d315..243e16e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+src/omlt/_version.py
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -50,6 +51,8 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+docs/notebooks/data/MNIST
+docs/notebooks/neuralnet/*.keras
 
 # Translations
 *.mo
@@ -70,6 +73,7 @@ instance/
 
 # Sphinx documentation
 docs/_build/
+docs/_autosummary
 
 # PyBuilder
 target/
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 6e41af22..2cb5498c 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -16,7 +16,7 @@ build:
 
   tools:
 
-    python: "3.8"
+    python: "3.12"
 
     # You can also specify other tool versions:
 
@@ -58,7 +58,8 @@ sphinx:
 # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
 
 python:
-
   install:
-
-    - requirements: docs/requirements.txt
\ No newline at end of file
+    - method: pip
+      path: .
+      extra_requirements:
+        - docs
diff --git a/Makefile b/Makefile
deleted file mode 100644
index cba83db0..00000000
--- a/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-.PHONY: develop docs test
-
-develop:
-	python -m pip install -e .[testing]
-
-docs:
-	python -m tox -e docs
-
-test:
-	python -m tox
\ No newline at end of file
diff --git a/README.rst b/README.rst
index e99fe927..058c0fc6 100644
--- a/README.rst
+++ b/README.rst
@@ -142,14 +142,14 @@ Example
 Development
 ===========
 
-OMLT uses `tox` to manage development tasks:
+OMLT uses `just <https://github.com/casey/just>`_ to manage development tasks:
 
-* `tox -av` to list available tasks
-* `tox` to run tests
-* `tox -e lint` to check formatting and code styles
-* `tox -e format` to automatically format files
-* `tox -e docs` to build the documentation
-* `tox -e publish` to publish the package to PyPi
+* ``just`` to list available tasks
+* ``just check`` to run all checks
+* ``just fix`` to apply any auto-fixes
+* ``just dev`` to install development dependencies in your current Python environment
+* ``just dev-gpu`` same as ``dev`` but with GPU support
+* ``just docs`` to build the documentation
 
 Contributors
 ============
@@ -224,4 +224,4 @@ Contributors
 
 .. _zshiqiang: https://github.com/zshiqiang
 .. |zshiqiang| image:: https://avatars.githubusercontent.com/u/91337036?v=4
-   :width: 80px 
+   :width: 80px
diff --git a/docs/Makefile b/docs/Makefile
index 95d96808..5117fbf5 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,27 +1,18 @@
-# Makefile for Sphinx documentation
+# Minimal makefile for Sphinx documentation
 #
 
 # You can set these variables from the command line, and also
 # from the environment for the first two.
-SPHINXOPTS    ?=
+SPHINXOPTS    ?= -W --keep-going
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
 BUILDDIR      = _build
-#AUTODOCDIR    = api
-
-# User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $?), 1)
-$(error "The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/")
-endif
-
-.PHONY: help clean Makefile
 
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
-clean:
-	rm -rf $(BUILDDIR)/* #$(AUTODOCDIR)
+.PHONY: help Makefile
 
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
diff --git a/docs/conf.py b/docs/conf.py
index a85d176d..575f2b79 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,70 +1,7 @@
-# This file is execfile()d with the current directory set to its containing dir.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import inspect
-import os
-import shutil
-import sys
-
-# -- Path setup --------------------------------------------------------------
-
-__location__ = os.path.join(
-    os.getcwd(), os.path.dirname(inspect.getfile(inspect.currentframe()))
-)
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.join(__location__, "../src"))
-
-# -- Run sphinx-apidoc -------------------------------------------------------
-# This hack is necessary since RTD does not issue `sphinx-apidoc` before running
-# `sphinx-build -b html . _build/html`. See Issue:
-# https://github.com/rtfd/readthedocs.org/issues/1139
-# DON'T FORGET: Check the box "Install your project inside a virtualenv using
-# setup.py install" in the RTD Advanced Settings.
-# Additionally it helps us to avoid running apidoc manually
-
-try:  # for Sphinx >= 1.7
-    from sphinx.ext import apidoc
-except ImportError:
-    from sphinx import apidoc
-
-# output_dir = os.path.join(__location__, "api")
-# module_dir = os.path.join(__location__, "../src/omlt")
-# try:
-#     shutil.rmtree(output_dir)
-# except FileNotFoundError:
-#     pass
-
-# try:
-#     import sphinx
-
-#     cmd_line_template = (
-#         "sphinx-apidoc --implicit-namespaces -f -o {outputdir} {moduledir}"
-#     )
-#     cmd_line = cmd_line_template.format(outputdir=output_dir, moduledir=module_dir)
-
-#     args = cmd_line.split(" ")
-#     if tuple(sphinx.__version__.split(".")) >= ("1", "7"):
-#         # This is a rudimentary parse_version to avoid external dependencies
-#         args = args[1:]
-
-#     apidoc.main(args)
-# except Exception as e:
-#     print("Running `sphinx-apidoc` failed!\n{}".format(e))
+import omlt
 
 # -- General configuration ---------------------------------------------------
 
-# If your documentation needs a minimal Sphinx version, state it here.
-# needs_sphinx = '1.0'
-
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
@@ -86,200 +23,58 @@
 # The suffix of source filenames.
 source_suffix = ".rst"
 
-# The encoding of source files.
-# source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = "index"
-
 # General information about the project.
 project = "omlt"
-copyright = "2022, Carnegie Mellon University, Imperial College London, Sandia National Laboratories"
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = ""  # Is set by calling `setup.py docs`
-# The full version, including alpha/beta/rc tags.
-release = ""  # Is set by calling `setup.py docs`
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-# language = None
+project_copyright = (
+    "2022, Carnegie Mellon University, "
+    "Imperial College London, Sandia National Laboratories"
+)
 
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-# today = ''
-# Else, today_fmt is used as the format for a strftime call.
-# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", ".venv", "notebooks"]
+exclude_patterns = ["_build"]
 
-# The reST default role (used for this markup: `text`) to use for all documents.
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = "sphinx"
 
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-# keep_warnings = False
-
 
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-# html_theme = "furo"
 html_theme = "sphinx_rtd_theme"
 
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-html_theme_options = {
-    "light_css_variables": {
-        "color-brand-primary": "#003E74",
-        "color-brand-content": "#002147",
-    },
-    "dark_css_variables": {
-        "color-brand-primary": "#0091D4",
-        "color-brand-content": "#D4EFFC",
-    },
-}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents.  If None, it defaults to
-# "<project> v<release> documentation".
-try:
-    from omlt import __version__ as version
-except ImportError:
-    pass
-else:
-    release = version
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-# html_short_title = None
+version = omlt.__version__
+release = omlt.__version__
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 html_logo = "_static/omlt_logo.png"
 
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
 
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-# html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-# html_domain_indices = True
-
-# If false, no index is generated.
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-# html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
 # Output file base name for HTML help builder.
 htmlhelp_basename = "omlt-doc"
 
 
 # -- Options for LaTeX output ------------------------------------------------
 
-latex_elements = {
-    # The paper size ("letterpaper" or "a4paper").
-    # "papersize": "letterpaper",
-    # The font size ("10pt", "11pt" or "12pt").
-    # "pointsize": "10pt",
-    # Additional stuff for the LaTeX preamble.
-    # "preamble": "",
-}
-
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
     ("index", "user_guide.tex", "OMLT Documentation", "The OMLT Developers", "manual")
 ]
 
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-# latex_logo = ""
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-# latex_appendices = []
-
-# If false, no module index is generated.
-# latex_domain_indices = True
-
 # -- External mapping --------------------------------------------------------
-python_version = ".".join(map(str, sys.version_info[0:2]))
 intersphinx_mapping = {
     "sphinx": ("http://www.sphinx-doc.org/en/stable", None),
-    "python": ("https://docs.python.org/" + python_version, None),
+    "python": ("https://docs.python.org/3", None),
     "matplotlib": ("https://matplotlib.org", None),
     "numpy": ("https://docs.scipy.org/doc/numpy", None),
     "sklearn": ("https://scikit-learn.org/stable", None),
diff --git a/docs/installation.rst b/docs/installation.rst
index 15b116ae..2c586bb0 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -1,5 +1,5 @@
 Installation
-==============
+============
 
 OMLT requires Python >= 3.6. The most stable OMLT version can be installed using the PyPI package index. This will also install the required depencies. Simply run: ::
 
@@ -13,7 +13,7 @@ If using the latest un-released version, install from the github repository and
 
 
 Optional Requirements
--------------
+---------------------
 
 OMLT can import sequential Keras models which requires a working installation of tensorflow: ::
 
diff --git a/docs/notebooks/data/build_sin_quadratic_csv.py b/docs/notebooks/data/build_sin_quadratic_csv.py
index 6506022a..261525eb 100644
--- a/docs/notebooks/data/build_sin_quadratic_csv.py
+++ b/docs/notebooks/data/build_sin_quadratic_csv.py
@@ -1,5 +1,4 @@
-from random import random
-
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
@@ -7,14 +6,13 @@
 w = 5
 
 x = np.linspace(-2, 2, n_samples)
-df = pd.DataFrame(x, columns=["x"])
-df["y"] = (
-    np.sin(w * x)
-    + x**2
-    + np.array([np.random.uniform() * 0.1 for _ in range(n_samples)])
+rng = np.random.default_rng()
+sin_quads = pd.DataFrame(x, columns=["x"])
+sin_quads["y"] = (
+    np.sin(w * x) + x**2 + np.array([rng.uniform() * 0.1 for _ in range(n_samples)])
 )
 
-plt.plot(df["x"], df["y"])
+plt.plot(sin_quads["x"], sin_quads["y"])
 plt.show()
 
-df.to_csv("sin_quadratic.csv")
+sin_quads.to_csv("sin_quadratic.csv")
diff --git a/docs/notebooks/neuralnet/auto-thermal-reformer-relu.ipynb b/docs/notebooks/neuralnet/auto-thermal-reformer-relu.ipynb
index 78c4e1a9..f49a771e 100644
--- a/docs/notebooks/neuralnet/auto-thermal-reformer-relu.ipynb
+++ b/docs/notebooks/neuralnet/auto-thermal-reformer-relu.ipynb
@@ -78,19 +78,20 @@
    ],
    "source": [
     "import os\n",
-    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # suppress CUDA warnings from tensorflow\n",
+    "\n",
+    "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"  # suppress CUDA warnings from tensorflow\n",
     "\n",
     "# import the necessary packages\n",
-    "from omlt import OmltBlock, OffsetScaling\n",
-    "from omlt.io.keras import load_keras_sequential\n",
-    "from omlt.neuralnet import ReluBigMFormulation\n",
-    "import pyomo.environ as pyo\n",
     "import pandas as pd\n",
-    "import tensorflow.keras as keras\n",
-    "from tensorflow.keras.models import Sequential\n",
+    "import pyomo.environ as pyo\n",
+    "from tensorflow import keras\n",
     "from tensorflow.keras.layers import Dense\n",
+    "from tensorflow.keras.models import Sequential\n",
     "from tensorflow.keras.optimizers import Adam\n",
-    "from tensorflow.keras.callbacks import ModelCheckpoint"
+    "\n",
+    "from omlt import OffsetScaling, OmltBlock\n",
+    "from omlt.io.keras import load_keras_sequential\n",
+    "from omlt.neuralnet import ReluBigMFormulation"
    ]
   },
   {
@@ -151,10 +152,23 @@
    ],
    "source": [
     "# read in our csv data\n",
-    "columns = ['Bypass Fraction', 'NG Steam Ratio', 'Steam Flow',\n",
-    "           'Reformer Duty','AR', 'C2H6', 'C3H8', 'C4H10',\n",
-    "           'CH4', 'CO', 'CO2', 'H2', 'H2O', 'N2']\n",
-    "df = pd.read_csv('../data/reformer.csv', usecols=columns)\n",
+    "columns = [\n",
+    "    \"Bypass Fraction\",\n",
+    "    \"NG Steam Ratio\",\n",
+    "    \"Steam Flow\",\n",
+    "    \"Reformer Duty\",\n",
+    "    \"AR\",\n",
+    "    \"C2H6\",\n",
+    "    \"C3H8\",\n",
+    "    \"C4H10\",\n",
+    "    \"CH4\",\n",
+    "    \"CO\",\n",
+    "    \"CO2\",\n",
+    "    \"H2\",\n",
+    "    \"H2O\",\n",
+    "    \"N2\",\n",
+    "]\n",
+    "df = pd.read_csv(\"../data/reformer.csv\", usecols=columns)\n",
     "print(df)"
    ]
   },
@@ -169,9 +183,21 @@
    "outputs": [],
    "source": [
     "# separate the data into inputs and outputs\n",
-    "inputs = ['Bypass Fraction', 'NG Steam Ratio']\n",
-    "outputs = [ 'Steam Flow', 'Reformer Duty','AR', 'C2H6', 'C3H8', 'C4H10',\n",
-    "           'CH4', 'CO', 'CO2', 'H2', 'H2O', 'N2']\n",
+    "inputs = [\"Bypass Fraction\", \"NG Steam Ratio\"]\n",
+    "outputs = [\n",
+    "    \"Steam Flow\",\n",
+    "    \"Reformer Duty\",\n",
+    "    \"AR\",\n",
+    "    \"C2H6\",\n",
+    "    \"C3H8\",\n",
+    "    \"C4H10\",\n",
+    "    \"CH4\",\n",
+    "    \"CO\",\n",
+    "    \"CO2\",\n",
+    "    \"H2\",\n",
+    "    \"H2O\",\n",
+    "    \"N2\",\n",
+    "]\n",
     "dfin = df[inputs]\n",
     "dfout = df[outputs]"
    ]
@@ -198,8 +224,8 @@
     "\n",
     "# capture the minimum and maximum values of the scaled inputs\n",
     "# so we don't use the model outside the valid range\n",
-    "scaled_lb = dfin.min()[inputs].values\n",
-    "scaled_ub = dfin.max()[inputs].values"
+    "scaled_lb = dfin.min()[inputs].to_numpy()\n",
+    "scaled_ub = dfin.max()[inputs].to_numpy()"
    ]
   },
   {
@@ -222,13 +248,13 @@
    ],
    "source": [
     "# create our Keras Sequential model\n",
-    "nn = Sequential(name='reformer_relu_4_20')\n",
-    "nn.add(Dense(units=10, input_dim=len(inputs), activation='relu'))\n",
-    "nn.add(Dense(units=10, activation='relu'))\n",
-    "nn.add(Dense(units=10, activation='relu'))\n",
-    "nn.add(Dense(units=10, activation='relu'))\n",
+    "nn = Sequential(name=\"reformer_relu_4_20\")\n",
+    "nn.add(Dense(units=10, input_dim=len(inputs), activation=\"relu\"))\n",
+    "nn.add(Dense(units=10, activation=\"relu\"))\n",
+    "nn.add(Dense(units=10, activation=\"relu\"))\n",
+    "nn.add(Dense(units=10, activation=\"relu\"))\n",
     "nn.add(Dense(units=len(outputs)))\n",
-    "nn.compile(optimizer=Adam(), loss='mse')"
+    "nn.compile(optimizer=Adam(), loss=\"mse\")"
    ]
   },
   {
@@ -449,8 +475,8 @@
    ],
    "source": [
     "# train our model\n",
-    "x = dfin.values\n",
-    "y = dfout.values\n",
+    "x = dfin.to_numpy()\n",
+    "y = dfout.to_numpy()\n",
     "\n",
     "history = nn.fit(x, y, epochs=100)"
    ]
@@ -468,7 +494,7 @@
     "# save the model to disk\n",
     "# While not technically necessary, this shows how we can load a previously saved model into\n",
     "# our optimization formulation)\n",
-    "nn.save('reformer_nn_relu.keras')"
+    "nn.save(\"reformer_nn_relu.keras\")"
    ]
   },
   {
@@ -522,22 +548,24 @@
    "outputs": [],
    "source": [
     "# load the Keras model\n",
-    "nn_reformer = keras.models.load_model('reformer_nn_relu.keras', compile=False)\n",
+    "nn_reformer = keras.models.load_model(\"reformer_nn_relu.keras\", compile=False)\n",
     "\n",
     "# Note: The neural network is in the scaled space. We want access to the\n",
     "# variables in the unscaled space. Therefore, we need to tell OMLT about the\n",
     "# scaling factors\n",
     "scaler = OffsetScaling(\n",
-    "        offset_inputs={i: x_offset[inputs[i]] for i in range(len(inputs))},\n",
-    "        factor_inputs={i: x_factor[inputs[i]] for i in range(len(inputs))},\n",
-    "        offset_outputs={i: y_offset[outputs[i]] for i in range(len(outputs))},\n",
-    "        factor_outputs={i: y_factor[outputs[i]] for i in range(len(outputs))}\n",
-    "    )\n",
+    "    offset_inputs={i: x_offset[inputs[i]] for i in range(len(inputs))},\n",
+    "    factor_inputs={i: x_factor[inputs[i]] for i in range(len(inputs))},\n",
+    "    offset_outputs={i: y_offset[outputs[i]] for i in range(len(outputs))},\n",
+    "    factor_outputs={i: y_factor[outputs[i]] for i in range(len(outputs))},\n",
+    ")\n",
     "\n",
     "scaled_input_bounds = {i: (scaled_lb[i], scaled_ub[i]) for i in range(len(inputs))}\n",
     "\n",
     "# create a network definition from the Keras model\n",
-    "net = load_keras_sequential(nn_reformer, scaling_object=scaler, scaled_input_bounds=scaled_input_bounds)\n",
+    "net = load_keras_sequential(\n",
+    "    nn_reformer, scaling_object=scaler, scaled_input_bounds=scaled_input_bounds\n",
+    ")\n",
     "\n",
     "# create the variables and constraints for the neural network in Pyomo\n",
     "m.reformer.build_formulation(ReluBigMFormulation(net))"
@@ -554,8 +582,8 @@
    "outputs": [],
    "source": [
     "# now add the objective and the constraints\n",
-    "h2_idx = outputs.index('H2')\n",
-    "n2_idx = outputs.index('N2')\n",
+    "h2_idx = outputs.index(\"H2\")\n",
+    "n2_idx = outputs.index(\"N2\")\n",
     "m.obj = pyo.Objective(expr=m.reformer.outputs[h2_idx], sense=pyo.maximize)\n",
     "m.con = pyo.Constraint(expr=m.reformer.outputs[n2_idx] <= 0.34)"
    ]
@@ -571,7 +599,7 @@
    "outputs": [],
    "source": [
     "# now solve the optimization problem (this may take some time)\n",
-    "solver = pyo.SolverFactory('cbc')\n",
+    "solver = pyo.SolverFactory(\"cbc\")\n",
     "status = solver.solve(m, tee=False)"
    ]
   },
@@ -596,10 +624,10 @@
     }
    ],
    "source": [
-    "print('Bypass Fraction:', pyo.value(m.reformer.inputs[0]))\n",
-    "print('NG Steam Ratio:', pyo.value(m.reformer.inputs[1]))\n",
-    "print('H2 Concentration:', pyo.value(m.reformer.outputs[h2_idx]))\n",
-    "print('N2 Concentration:', pyo.value(m.reformer.outputs[n2_idx]))"
+    "print(\"Bypass Fraction:\", pyo.value(m.reformer.inputs[0]))\n",
+    "print(\"NG Steam Ratio:\", pyo.value(m.reformer.inputs[1]))\n",
+    "print(\"H2 Concentration:\", pyo.value(m.reformer.outputs[h2_idx]))\n",
+    "print(\"N2 Concentration:\", pyo.value(m.reformer.outputs[n2_idx]))"
    ]
   }
  ],
diff --git a/docs/notebooks/neuralnet/auto-thermal-reformer.ipynb b/docs/notebooks/neuralnet/auto-thermal-reformer.ipynb
index 650f5700..6f7d4320 100644
--- a/docs/notebooks/neuralnet/auto-thermal-reformer.ipynb
+++ b/docs/notebooks/neuralnet/auto-thermal-reformer.ipynb
@@ -67,19 +67,20 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # suppress CUDA warnings from tensorflow\n",
+    "\n",
+    "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"  # suppress CUDA warnings from tensorflow\n",
     "\n",
     "# import the necessary packages\n",
-    "from omlt import OmltBlock, OffsetScaling\n",
-    "from omlt.io.keras import load_keras_sequential\n",
-    "from omlt.neuralnet import FullSpaceSmoothNNFormulation\n",
-    "import pyomo.environ as pyo\n",
     "import pandas as pd\n",
-    "import tensorflow.keras as keras\n",
-    "from tensorflow.keras.models import Sequential\n",
+    "import pyomo.environ as pyo\n",
+    "from tensorflow import keras\n",
     "from tensorflow.keras.layers import Dense\n",
+    "from tensorflow.keras.models import Sequential\n",
     "from tensorflow.keras.optimizers import Adam\n",
-    "from tensorflow.keras.callbacks import ModelCheckpoint"
+    "\n",
+    "from omlt import OffsetScaling, OmltBlock\n",
+    "from omlt.io.keras import load_keras_sequential\n",
+    "from omlt.neuralnet import FullSpaceSmoothNNFormulation"
    ]
   },
   {
@@ -140,10 +141,23 @@
    ],
    "source": [
     "# read in our csv data\n",
-    "columns = ['Bypass Fraction', 'NG Steam Ratio', 'Steam Flow',\n",
-    "           'Reformer Duty','AR', 'C2H6', 'C3H8', 'C4H10',\n",
-    "           'CH4', 'CO', 'CO2', 'H2', 'H2O', 'N2']\n",
-    "df = pd.read_csv('../data/reformer.csv', usecols=columns)\n",
+    "columns = [\n",
+    "    \"Bypass Fraction\",\n",
+    "    \"NG Steam Ratio\",\n",
+    "    \"Steam Flow\",\n",
+    "    \"Reformer Duty\",\n",
+    "    \"AR\",\n",
+    "    \"C2H6\",\n",
+    "    \"C3H8\",\n",
+    "    \"C4H10\",\n",
+    "    \"CH4\",\n",
+    "    \"CO\",\n",
+    "    \"CO2\",\n",
+    "    \"H2\",\n",
+    "    \"H2O\",\n",
+    "    \"N2\",\n",
+    "]\n",
+    "df = pd.read_csv(\"../data/reformer.csv\", usecols=columns)\n",
     "print(df)"
    ]
   },
@@ -158,9 +172,21 @@
    "outputs": [],
    "source": [
     "# separate the data into inputs and outputs\n",
-    "inputs = ['Bypass Fraction', 'NG Steam Ratio']\n",
-    "outputs = [ 'Steam Flow', 'Reformer Duty','AR', 'C2H6', 'C3H8', 'C4H10',\n",
-    "           'CH4', 'CO', 'CO2', 'H2', 'H2O', 'N2']\n",
+    "inputs = [\"Bypass Fraction\", \"NG Steam Ratio\"]\n",
+    "outputs = [\n",
+    "    \"Steam Flow\",\n",
+    "    \"Reformer Duty\",\n",
+    "    \"AR\",\n",
+    "    \"C2H6\",\n",
+    "    \"C3H8\",\n",
+    "    \"C4H10\",\n",
+    "    \"CH4\",\n",
+    "    \"CO\",\n",
+    "    \"CO2\",\n",
+    "    \"H2\",\n",
+    "    \"H2O\",\n",
+    "    \"N2\",\n",
+    "]\n",
     "dfin = df[inputs]\n",
     "dfout = df[outputs]"
    ]
@@ -187,8 +213,8 @@
     "\n",
     "# capture the minimum and maximum values of the scaled inputs\n",
     "# so we don't use the model outside the valid range\n",
-    "scaled_lb = dfin.min()[inputs].values\n",
-    "scaled_ub = dfin.max()[inputs].values"
+    "scaled_lb = dfin.min()[inputs].to_numpy()\n",
+    "scaled_ub = dfin.max()[inputs].to_numpy()"
    ]
   },
   {
@@ -211,13 +237,13 @@
    ],
    "source": [
     "# create our Keras Sequential model\n",
-    "nn = Sequential(name='reformer_sigmoid_4_20')\n",
-    "nn.add(Dense(units=20, input_dim=len(inputs), activation='sigmoid'))\n",
-    "nn.add(Dense(units=20, activation='sigmoid'))\n",
-    "nn.add(Dense(units=20, activation='sigmoid'))\n",
-    "nn.add(Dense(units=20, activation='sigmoid'))\n",
+    "nn = Sequential(name=\"reformer_sigmoid_4_20\")\n",
+    "nn.add(Dense(units=20, input_dim=len(inputs), activation=\"sigmoid\"))\n",
+    "nn.add(Dense(units=20, activation=\"sigmoid\"))\n",
+    "nn.add(Dense(units=20, activation=\"sigmoid\"))\n",
+    "nn.add(Dense(units=20, activation=\"sigmoid\"))\n",
     "nn.add(Dense(units=len(outputs)))\n",
-    "nn.compile(optimizer=Adam(), loss='mse')"
+    "nn.compile(optimizer=Adam(), loss=\"mse\")"
    ]
   },
   {
@@ -438,8 +464,8 @@
    ],
    "source": [
     "# train our model\n",
-    "x = dfin.values\n",
-    "y = dfout.values\n",
+    "x = dfin.to_numpy()\n",
+    "y = dfout.to_numpy()\n",
     "\n",
     "history = nn.fit(x, y, epochs=100)"
    ]
@@ -457,7 +483,7 @@
     "# save the model to disk\n",
     "# While not technically necessary, this shows how we can load a previously saved model into\n",
     "# our optimization formulation)\n",
-    "nn.save('reformer_nn.keras')"
+    "nn.save(\"reformer_nn.keras\")"
    ]
   },
   {
@@ -511,22 +537,24 @@
    "outputs": [],
    "source": [
     "# load the Keras model\n",
-    "nn_reformer = keras.models.load_model('reformer_nn.keras', compile=False)\n",
+    "nn_reformer = keras.models.load_model(\"reformer_nn.keras\", compile=False)\n",
     "\n",
     "# Note: The neural network is in the scaled space. We want access to the\n",
     "# variables in the unscaled space. Therefore, we need to tell OMLT about the\n",
     "# scaling factors\n",
     "scaler = OffsetScaling(\n",
-    "        offset_inputs={i: x_offset[inputs[i]] for i in range(len(inputs))},\n",
-    "        factor_inputs={i: x_factor[inputs[i]] for i in range(len(inputs))},\n",
-    "        offset_outputs={i: y_offset[outputs[i]] for i in range(len(outputs))},\n",
-    "        factor_outputs={i: y_factor[outputs[i]] for i in range(len(outputs))}\n",
-    "    )\n",
+    "    offset_inputs={i: x_offset[inputs[i]] for i in range(len(inputs))},\n",
+    "    factor_inputs={i: x_factor[inputs[i]] for i in range(len(inputs))},\n",
+    "    offset_outputs={i: y_offset[outputs[i]] for i in range(len(outputs))},\n",
+    "    factor_outputs={i: y_factor[outputs[i]] for i in range(len(outputs))},\n",
+    ")\n",
     "\n",
     "scaled_input_bounds = {i: (scaled_lb[i], scaled_ub[i]) for i in range(len(inputs))}\n",
     "\n",
     "# create a network definition from the Keras model\n",
-    "net = load_keras_sequential(nn_reformer, scaling_object=scaler, scaled_input_bounds=scaled_input_bounds)\n",
+    "net = load_keras_sequential(\n",
+    "    nn_reformer, scaling_object=scaler, scaled_input_bounds=scaled_input_bounds\n",
+    ")\n",
     "\n",
     "# create the variables and constraints for the neural network in Pyomo\n",
     "m.reformer.build_formulation(FullSpaceSmoothNNFormulation(net))"
@@ -543,8 +571,8 @@
    "outputs": [],
    "source": [
     "# now add the objective and the constraints\n",
-    "h2_idx = outputs.index('H2')\n",
-    "n2_idx = outputs.index('N2')\n",
+    "h2_idx = outputs.index(\"H2\")\n",
+    "n2_idx = outputs.index(\"N2\")\n",
     "m.obj = pyo.Objective(expr=m.reformer.outputs[h2_idx], sense=pyo.maximize)\n",
     "m.con = pyo.Constraint(expr=m.reformer.outputs[n2_idx] <= 0.34)"
    ]
@@ -687,7 +715,7 @@
    ],
    "source": [
     "# now solve the optimization problem\n",
-    "solver = pyo.SolverFactory('ipopt')\n",
+    "solver = pyo.SolverFactory(\"ipopt\")\n",
     "status = solver.solve(m, tee=True)"
    ]
   },
@@ -712,10 +740,10 @@
     }
    ],
    "source": [
-    "print('Bypass Fraction:', pyo.value(m.reformer.inputs[0]))\n",
-    "print('NG Steam Ratio:', pyo.value(m.reformer.inputs[1]))\n",
-    "print('H2 Concentration:', pyo.value(m.reformer.outputs[h2_idx]))\n",
-    "print('N2 Concentration:', pyo.value(m.reformer.outputs[n2_idx]))"
+    "print(\"Bypass Fraction:\", pyo.value(m.reformer.inputs[0]))\n",
+    "print(\"NG Steam Ratio:\", pyo.value(m.reformer.inputs[1]))\n",
+    "print(\"H2 Concentration:\", pyo.value(m.reformer.outputs[h2_idx]))\n",
+    "print(\"N2 Concentration:\", pyo.value(m.reformer.outputs[n2_idx]))"
    ]
   }
  ],
diff --git a/docs/notebooks/neuralnet/build_network.ipynb b/docs/notebooks/neuralnet/build_network.ipynb
index 4b0bd499..008042c1 100644
--- a/docs/notebooks/neuralnet/build_network.ipynb
+++ b/docs/notebooks/neuralnet/build_network.ipynb
@@ -37,11 +37,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pyomo.environ as pyo\n",
     "import numpy as np\n",
+    "import pyomo.environ as pyo\n",
     "\n",
     "from omlt.neuralnet import NetworkDefinition\n",
-    "from omlt.neuralnet.layer import InputLayer, DenseLayer, IndexMapper"
+    "from omlt.neuralnet.layer import DenseLayer, IndexMapper, InputLayer"
    ]
   },
   {
@@ -75,10 +75,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "net = NetworkDefinition(scaled_input_bounds={(0,0):(-1.0, 1.0), \n",
-    "                                             (0,1):(-1.0, 1.0),\n",
-    "                                             (1,0):(-1.0, 1.0),\n",
-    "                                             (1,1):(-1.0, 1.0)})"
+    "net = NetworkDefinition(\n",
+    "    scaled_input_bounds={\n",
+    "        (0, 0): (-1.0, 1.0),\n",
+    "        (0, 1): (-1.0, 1.0),\n",
+    "        (1, 0): (-1.0, 1.0),\n",
+    "        (1, 1): (-1.0, 1.0),\n",
+    "    }\n",
+    ")"
    ]
   },
   {
@@ -156,7 +160,7 @@
     }
    ],
    "source": [
-    "Image(filename='../images/input-layer.png', height=300)"
+    "Image(filename=\"../images/input-layer.png\", height=300)"
    ]
   },
   {
@@ -224,7 +228,7 @@
     }
    ],
    "source": [
-    "Image(filename='../images/dense-layer-0.png', height=250)"
+    "Image(filename=\"../images/dense-layer-0.png\", height=250)"
    ]
   },
   {
@@ -238,7 +242,7 @@
     "    output_size=[2, 1],\n",
     "    activation=\"linear\",\n",
     "    weights=np.array([[1.0], [-0.5]]),\n",
-    "    biases=np.array([[0.1], [0.25]])\n",
+    "    biases=np.array([[0.1], [0.25]]),\n",
     ")"
    ]
   },
@@ -288,7 +292,7 @@
     }
    ],
    "source": [
-    "Image(filename='../images/network-structure-0.png', height=200)"
+    "Image(filename=\"../images/network-structure-0.png\", height=200)"
    ]
   },
   {
@@ -368,10 +372,10 @@
     "y = x\n",
     "z = np.maximum(0, y)\n",
     "\n",
-    "plt.plot(x, y, label='Before Activation')\n",
-    "plt.plot(x, z, label='After Activation')\n",
-    "plt.xlabel('x')\n",
-    "plt.ylabel('y')\n",
+    "plt.plot(x, y, label=\"Before Activation\")\n",
+    "plt.plot(x, z, label=\"After Activation\")\n",
+    "plt.xlabel(\"x\")\n",
+    "plt.ylabel(\"y\")\n",
     "plt.legend()"
    ]
   },
@@ -427,7 +431,7 @@
     }
    ],
    "source": [
-    "Image(filename='../images/dense-layer-1.png', height=250)"
+    "Image(filename=\"../images/dense-layer-1.png\", height=250)"
    ]
   },
   {
@@ -476,7 +480,7 @@
     }
    ],
    "source": [
-    "Image(filename='../images/network-structure-1.png', height=200)"
+    "Image(filename=\"../images/network-structure-1.png\", height=200)"
    ]
   },
   {
@@ -672,8 +676,10 @@
     }
    ],
    "source": [
-    "m.neural_net.layer[m.neural_net.layers.at(1)].z.pprint() #Note, the input layer does not have zhat\n",
-    "m.neural_net.layer[m.neural_net.layers.at(2)].zhat.pprint() "
+    "m.neural_net.layer[\n",
+    "    m.neural_net.layers.at(1)\n",
+    "].z.pprint()  # Note, the input layer does not have zhat\n",
+    "m.neural_net.layer[m.neural_net.layers.at(2)].zhat.pprint()"
    ]
   },
   {
diff --git a/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb b/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb
index 69cb9675..2699d171 100644
--- a/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb
+++ b/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb
@@ -54,13 +54,13 @@
    ],
    "source": [
     "import numpy as np\n",
+    "import pyomo.environ as pyo\n",
     "import torch\n",
     "from torch.nn import Linear, ReLU, Sigmoid\n",
-    "from torch_geometric.nn import Sequential, GCNConv\n",
-    "from torch_geometric.nn import global_mean_pool\n",
-    "from omlt.io.torch_geometric import gnn_with_fixed_graph\n",
-    "import pyomo.environ as pyo\n",
+    "from torch_geometric.nn import GCNConv, Sequential, global_mean_pool\n",
+    "\n",
     "from omlt import OmltBlock\n",
+    "from omlt.io.torch_geometric import gnn_with_fixed_graph\n",
     "\n",
     "\n",
     "def GCN_Sequential(activation, pooling):\n",
@@ -78,7 +78,7 @@
     "            activation(),\n",
     "            Linear(2, 1),\n",
     "        ],\n",
-    "    )\n"
+    "    )"
    ]
   },
   {
@@ -478,14 +478,13 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "import pyomo.environ as pyo\n",
     "import torch\n",
-    "from torch.nn import Linear, ReLU\n",
-    "from torch_geometric.nn import Sequential, SAGEConv\n",
-    "from torch_geometric.nn import global_add_pool\n",
-    "from omlt.io.torch_geometric import gnn_with_non_fixed_graph\n",
+    "from torch.nn import ReLU\n",
+    "from torch_geometric.nn import SAGEConv, global_add_pool\n",
     "\n",
-    "import pyomo.environ as pyo\n",
     "from omlt import OmltBlock\n",
+    "from omlt.io.torch_geometric import gnn_with_non_fixed_graph\n",
     "\n",
     "\n",
     "def SAGE_Sequential(activation, pooling):\n",
diff --git a/docs/notebooks/neuralnet/import_network.ipynb b/docs/notebooks/neuralnet/import_network.ipynb
index 3f056572..673fa974 100644
--- a/docs/notebooks/neuralnet/import_network.ipynb
+++ b/docs/notebooks/neuralnet/import_network.ipynb
@@ -170,7 +170,7 @@
    "source": [
     "import pandas as pd\n",
     "\n",
-    "df = pd.read_csv('../data/diabetes.csv')\n",
+    "df = pd.read_csv(\"../data/diabetes.csv\")\n",
     "\n",
     "df.head()"
    ]
@@ -215,8 +215,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X = df.iloc[:, :8].values\n",
-    "Y = df.iloc[:, 8].values"
+    "X = df.iloc[:, :8].to_numpy()\n",
+    "Y = df.iloc[:, 8].to_numpy()"
    ]
   },
   {
@@ -265,7 +265,7 @@
     "\n",
     "lb = np.min(X, axis=0)\n",
     "ub = np.max(X, axis=0)\n",
-    "input_bounds = [(l, u) for l, u in zip(lb, ub)]\n",
+    "input_bounds = list(zip(lb, ub))\n",
     "input_bounds"
    ]
   },
@@ -292,7 +292,7 @@
     }
    ],
    "source": [
-    "from omlt.io import write_onnx_model_with_bounds, load_onnx_neural_network_with_bounds"
+    "from omlt.io import load_onnx_neural_network_with_bounds, write_onnx_model_with_bounds"
    ]
   },
   {
@@ -350,17 +350,17 @@
    ],
    "source": [
     "import os\n",
-    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}\n",
-    "import keras\n",
-    "from keras.models import Sequential\n",
+    "\n",
+    "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"3\"  # or any {'0', '1', '2'}\n",
     "from keras.layers import Dense\n",
+    "from keras.models import Sequential\n",
     "\n",
     "model = Sequential()\n",
-    "model.add(Dense(12, input_dim=8, activation='relu'))\n",
-    "model.add(Dense(8, activation='relu'))\n",
-    "model.add(Dense(1, activation='linear'))\n",
+    "model.add(Dense(12, input_dim=8, activation=\"relu\"))\n",
+    "model.add(Dense(8, activation=\"relu\"))\n",
+    "model.add(Dense(1, activation=\"linear\"))\n",
     "\n",
-    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])"
+    "model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])"
    ]
   },
   {
@@ -730,13 +730,13 @@
     "# Add output_names for compatibility:\n",
     "model.output_names = [output.name for output in model.outputs]\n",
     "\n",
-    "from tensorflow import TensorSpec\n",
     "import tf2onnx\n",
+    "from tensorflow import TensorSpec\n",
     "\n",
     "spec = [TensorSpec(input.shape, input.dtype, input.name) for input in model.inputs]\n",
     "onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec)\n",
     "\n",
-    "with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as f:\n",
+    "with tempfile.NamedTemporaryFile(suffix=\".onnx\", delete=False) as f:\n",
     "    write_onnx_model_with_bounds(f.name, onnx_model, input_bounds)\n",
     "    print(f\"Wrote ONNX model with bounds at {f.name}\")"
    ]
@@ -770,7 +770,7 @@
     }
    ],
    "source": [
-    "Image(filename='../images/simple-neural-network.png', height=600)"
+    "Image(filename=\"../images/simple-neural-network.png\", height=600)"
    ]
   },
   {
@@ -816,10 +816,11 @@
    ],
    "source": [
     "import torch\n",
-    "import torch.nn as nn\n",
     "import torch.nn.functional as F\n",
+    "from torch import nn\n",
     "from torch.utils.data import DataLoader, TensorDataset\n",
     "\n",
+    "\n",
     "class PyTorchModel(nn.Module):\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
@@ -830,18 +831,20 @@
     "    def forward(self, x):\n",
     "        x = F.relu(self.dense_0(x))\n",
     "        x = F.relu(self.dense_1(x))\n",
-    "        x = self.out(x)\n",
-    "        return x\n",
+    "        return self.out(x)\n",
+    "\n",
     "\n",
     "model = PyTorchModel()\n",
     "loss_function = nn.L1Loss()\n",
-    "optimizer = torch.optim.Adam(model.parameters(),lr=0.01)\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=0.01)\n",
     "\n",
-    "dataset = TensorDataset(torch.as_tensor(X, dtype=torch.float32), torch.as_tensor(Y, dtype=torch.float32))\n",
+    "dataset = TensorDataset(\n",
+    "    torch.as_tensor(X, dtype=torch.float32), torch.as_tensor(Y, dtype=torch.float32)\n",
+    ")\n",
     "dataloader = DataLoader(dataset, batch_size=10)\n",
     "\n",
     "for epoch in range(150):\n",
-    "    for id_batch, (x_batch, y_batch) in enumerate(dataloader):\n",
+    "    for x_batch, y_batch in dataloader:\n",
     "        y_batch_pred = model(x_batch)\n",
     "        loss = loss_function(y_batch_pred, y_batch.view(*y_batch_pred.shape))\n",
     "        optimizer.zero_grad()\n",
@@ -849,7 +852,7 @@
     "        optimizer.step()\n",
     "\n",
     "    if epoch % 10 == 0:\n",
-    "        print(f\"Epoch number: {epoch} loss : {loss.item()}\")\n"
+    "        print(f\"Epoch number: {epoch} loss : {loss.item()}\")"
    ]
   },
   {
@@ -878,17 +881,14 @@
     "# model input used for exporting\n",
     "x = torch.randn(10, 8, requires_grad=True)\n",
     "pytorch_model = None\n",
-    "with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as f:\n",
+    "with tempfile.NamedTemporaryFile(suffix=\".onnx\", delete=False) as f:\n",
     "    torch.onnx.export(\n",
     "        model,\n",
     "        x,\n",
     "        f,\n",
-    "        input_names=['input'],\n",
-    "        output_names=['output'],\n",
-    "        dynamic_axes={\n",
-    "            'input': {0: 'batch_size'},\n",
-    "            'output': {0: 'batch_size'}\n",
-    "        }\n",
+    "        input_names=[\"input\"],\n",
+    "        output_names=[\"output\"],\n",
+    "        dynamic_axes={\"input\": {0: \"batch_size\"}, \"output\": {0: \"batch_size\"}},\n",
     "    )\n",
     "    write_onnx_model_with_bounds(f.name, None, input_bounds)\n",
     "    print(f\"Wrote PyTorch model to {f.name}\")\n",
@@ -917,7 +917,7 @@
     }
    ],
    "source": [
-    "Image(filename='../images/torch-neural-network.png', height=500)"
+    "Image(filename=\"../images/torch-neural-network.png\", height=500)"
    ]
   },
   {
diff --git a/docs/notebooks/neuralnet/index_handling.ipynb b/docs/notebooks/neuralnet/index_handling.ipynb
index 36ed4338..7b3a9b6a 100644
--- a/docs/notebooks/neuralnet/index_handling.ipynb
+++ b/docs/notebooks/neuralnet/index_handling.ipynb
@@ -29,8 +29,9 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "\n",
     "from omlt.neuralnet import NetworkDefinition\n",
-    "from omlt.neuralnet.layer import IndexMapper, InputLayer, DenseLayer, PoolingLayer2D"
+    "from omlt.neuralnet.layer import DenseLayer, IndexMapper, InputLayer, PoolingLayer2D"
    ]
   },
   {
diff --git a/docs/notebooks/neuralnet/mnist_example_convolutional.ipynb b/docs/notebooks/neuralnet/mnist_example_convolutional.ipynb
index 1de8f770..cf44882c 100644
--- a/docs/notebooks/neuralnet/mnist_example_convolutional.ipynb
+++ b/docs/notebooks/neuralnet/mnist_example_convolutional.ipynb
@@ -47,26 +47,29 @@
     }
    ],
    "source": [
-    "#Import requisite packages\n",
-    "#data manipulation\n",
-    "import numpy as np\n",
+    "# Import requisite packages\n",
+    "# data manipulation\n",
     "import tempfile\n",
     "\n",
-    "#pytorch for training neural network\n",
-    "import torch, torch.onnx\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torch.optim as optim\n",
-    "from torchvision import datasets, transforms\n",
-    "from torch.optim.lr_scheduler import StepLR\n",
+    "import numpy as np\n",
     "\n",
-    "#pyomo for optimization\n",
+    "# pyomo for optimization\n",
     "import pyomo.environ as pyo\n",
     "\n",
-    "#omlt for interfacing our neural network with pyomo\n",
+    "# pytorch for training neural network\n",
+    "import torch\n",
+    "import torch.onnx\n",
+    "from torch import nn, optim\n",
+    "from torch.optim.lr_scheduler import StepLR\n",
+    "from torchvision import datasets, transforms\n",
+    "\n",
+    "# omlt for interfacing our neural network with pyomo\n",
     "from omlt import OmltBlock\n",
-    "from omlt.neuralnet import FullSpaceNNFormulation\n",
-    "from omlt.io.onnx import write_onnx_model_with_bounds, load_onnx_neural_network_with_bounds"
+    "from omlt.io.onnx import (\n",
+    "    load_onnx_neural_network_with_bounds,\n",
+    "    write_onnx_model_with_bounds,\n",
+    ")\n",
+    "from omlt.neuralnet import FullSpaceNNFormulation"
    ]
   },
   {
@@ -84,14 +87,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#set training and test batch sizes\n",
-    "train_kwargs = {'batch_size': 64}\n",
-    "test_kwargs = {'batch_size': 1000}\n",
+    "# set training and test batch sizes\n",
+    "train_kwargs = {\"batch_size\": 64}\n",
+    "test_kwargs = {\"batch_size\": 1000}\n",
     "\n",
-    "#build DataLoaders for training and test sets\n",
-    "dataset1 = datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor())\n",
-    "dataset2 = datasets.MNIST('../data', train=False, transform=transforms.ToTensor())\n",
-    "train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs, shuffle=True)\n",
+    "# build DataLoaders for training and test sets\n",
+    "dataset1 = datasets.MNIST(\n",
+    "    \"../data\", train=True, download=True, transform=transforms.ToTensor()\n",
+    ")\n",
+    "dataset2 = datasets.MNIST(\"../data\", train=False, transform=transforms.ToTensor())\n",
+    "train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs, shuffle=True)\n",
     "test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)"
    ]
   },
@@ -110,28 +115,28 @@
    "source": [
     "hidden_size = 10\n",
     "\n",
+    "\n",
     "class Net(nn.Module):\n",
-    "    #define layers of neural network\n",
+    "    # define layers of neural network\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
-    "        self.conv1  = nn.Conv2d(1, 2, (4,4), (2,2), 0)\n",
-    "        self.conv2  = nn.Conv2d(2, 2, (4,4), (2,2), 0)\n",
-    "        self.hidden1 = nn.Linear(5*5*2, hidden_size)\n",
-    "        self.output  = nn.Linear(hidden_size, 10)\n",
+    "        self.conv1 = nn.Conv2d(1, 2, (4, 4), (2, 2), 0)\n",
+    "        self.conv2 = nn.Conv2d(2, 2, (4, 4), (2, 2), 0)\n",
+    "        self.hidden1 = nn.Linear(5 * 5 * 2, hidden_size)\n",
+    "        self.output = nn.Linear(hidden_size, 10)\n",
     "        self.relu = nn.ReLU()\n",
     "        self.softmax = nn.LogSoftmax(dim=1)\n",
     "\n",
-    "    #define forward pass of neural network\n",
+    "    # define forward pass of neural network\n",
     "    def forward(self, x):\n",
     "        self.x1 = self.conv1(x)\n",
     "        self.x2 = self.relu(self.x1)\n",
     "        self.x3 = self.conv2(self.x2)\n",
     "        self.x4 = self.relu(self.x3)\n",
-    "        self.x5 = self.hidden1(self.x4.view((-1,5*5*2)))\n",
+    "        self.x5 = self.hidden1(self.x4.view((-1, 5 * 5 * 2)))\n",
     "        self.x6 = self.relu(self.x5)\n",
     "        self.x7 = self.output(self.x6)\n",
-    "        x = self.softmax(self.x7)      \n",
-    "        return x"
+    "        return self.softmax(self.x7)"
    ]
   },
   {
@@ -147,33 +152,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#training function computes loss and its gradient on batch, and prints status after every 200 batches\n",
+    "# training function computes loss and its gradient on batch, and prints status after every 200 batches\n",
     "def train(model, train_loader, optimizer, epoch):\n",
-    "    model.train(); criterion = nn.NLLLoss()\n",
+    "    model.train()\n",
+    "    criterion = nn.NLLLoss()\n",
     "    for batch_idx, (data, target) in enumerate(train_loader):\n",
     "        optimizer.zero_grad()\n",
     "        output = model(data)\n",
     "        loss = criterion(output, target)\n",
     "        loss.backward()\n",
     "        optimizer.step()\n",
-    "        if batch_idx % 200  == 0:\n",
-    "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
-    "                epoch, batch_idx * len(data), len(train_loader.dataset),\n",
-    "                100. * batch_idx / len(train_loader), loss.item()))\n",
+    "        if batch_idx % 200 == 0:\n",
+    "            print(\n",
+    "                f\"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100.0 * batch_idx / len(train_loader):.0f}%)]\\tLoss: {loss.item():.6f}\"\n",
+    "            )\n",
     "\n",
-    "#testing function computes loss and prints overall model accuracy on test set\n",
+    "\n",
+    "# testing function computes loss and prints overall model accuracy on test set\n",
     "def test(model, test_loader):\n",
-    "    model.eval(); criterion = nn.NLLLoss(reduction='sum')\n",
-    "    test_loss = 0; correct = 0\n",
+    "    model.eval()\n",
+    "    criterion = nn.NLLLoss(reduction=\"sum\")\n",
+    "    test_loss = 0\n",
+    "    correct = 0\n",
     "    with torch.no_grad():\n",
     "        for data, target in test_loader:\n",
     "            output = model(data)\n",
-    "            test_loss += criterion(output, target).item()  \n",
-    "            pred = output.argmax(dim=1, keepdim=True) \n",
+    "            test_loss += criterion(output, target).item()\n",
+    "            pred = output.argmax(dim=1, keepdim=True)\n",
     "            correct += pred.eq(target.view_as(pred)).sum().item()\n",
     "    test_loss /= len(test_loader.dataset)\n",
-    "    print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
-    "        test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))            "
+    "    print(\n",
+    "        f\"\\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100.0 * correct / len(test_loader.dataset):.0f}%)\\n\"\n",
+    "    )"
    ]
   },
   {
@@ -237,12 +247,12 @@
     }
    ],
    "source": [
-    "#define model and optimizer\n",
+    "# define model and optimizer\n",
     "model = Net()\n",
     "optimizer = optim.Adadelta(model.parameters(), lr=1)\n",
     "scheduler = StepLR(optimizer, step_size=1, gamma=0.7)\n",
     "\n",
-    "#train CNN model for five epochs\n",
+    "# train CNN model for five epochs\n",
     "for epoch in range(5):\n",
     "    train(model, train_loader, optimizer, epoch)\n",
     "    test(model, test_loader)\n",
@@ -283,27 +293,27 @@
    ],
    "source": [
     "class NoSoftmaxNet(nn.Module):\n",
-    "    #define layers of neural network\n",
+    "    # define layers of neural network\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
-    "        self.conv1  = nn.Conv2d(1, 2, (4,4), (2,2), 0)\n",
-    "        self.conv2  = nn.Conv2d(2, 2, (4,4), (2,2), 0)\n",
+    "        self.conv1 = nn.Conv2d(1, 2, (4, 4), (2, 2), 0)\n",
+    "        self.conv2 = nn.Conv2d(2, 2, (4, 4), (2, 2), 0)\n",
     "        self.hidden1 = nn.Linear(5 * 5 * 2, hidden_size)\n",
-    "        self.output  = nn.Linear(hidden_size, 10)\n",
+    "        self.output = nn.Linear(hidden_size, 10)\n",
     "        self.relu = nn.ReLU()\n",
     "\n",
-    "    #define forward pass of neural network\n",
+    "    # define forward pass of neural network\n",
     "    def forward(self, x):\n",
     "        self.x1 = self.conv1(x)\n",
     "        self.x2 = self.relu(self.x1)\n",
     "        self.x3 = self.conv2(self.x2)\n",
     "        self.x4 = self.relu(self.x3)\n",
-    "        self.x5 = self.hidden1(self.x4.view((-1,5*5*2)))\n",
+    "        self.x5 = self.hidden1(self.x4.view((-1, 5 * 5 * 2)))\n",
     "        self.x6 = self.relu(self.x5)\n",
-    "        x = self.output(self.x6)    \n",
-    "        return x\n",
+    "        return self.output(self.x6)\n",
+    "\n",
     "\n",
-    "#create neural network without LogSoftmax and load parameters from existing model\n",
+    "# create neural network without LogSoftmax and load parameters from existing model\n",
     "model2 = NoSoftmaxNet()\n",
     "model2.load_state_dict(model.state_dict())"
    ]
@@ -331,24 +341,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#load image and true label from test set with index 'problem_index'\n",
+    "# load image and true label from test set with index 'problem_index'\n",
     "problem_index = 0\n",
     "image = dataset2[problem_index][0].detach().numpy()\n",
     "label = dataset2[problem_index][1]\n",
     "\n",
-    "#define input region defined by infinity norm\n",
+    "# define input region defined by infinity norm\n",
     "epsilon_infty = 1e-3\n",
     "lb = np.maximum(0, image - epsilon_infty)\n",
     "ub = np.minimum(1, image + epsilon_infty)\n",
     "\n",
-    "#save input bounds as dictionary, note that the first index 0 corresponds to the single-channel input\n",
+    "# save input bounds as dictionary, note that the first index 0 corresponds to the single-channel input\n",
     "input_bounds = {}\n",
     "for i in range(28):\n",
     "    for j in range(28):\n",
-    "        input_bounds[(0,i,j)] = (float(lb[0][i,j]), float(ub[0][i,j])) \n",
-    "    \n",
-    "#define dummy input tensor    \n",
-    "x = dataset2[problem_index][0].view(-1,1,28,28)"
+    "        input_bounds[(0, i, j)] = (float(lb[0][i, j]), float(ub[0][i, j]))\n",
+    "\n",
+    "# define dummy input tensor\n",
+    "x = dataset2[problem_index][0].view(-1, 1, 28, 28)"
    ]
   },
   {
@@ -364,22 +374,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as f:\n",
-    "    #export neural network to ONNX\n",
+    "with tempfile.NamedTemporaryFile(suffix=\".onnx\", delete=False) as f:\n",
+    "    # export neural network to ONNX\n",
     "    torch.onnx.export(\n",
     "        model2,\n",
     "        x,\n",
     "        f,\n",
-    "        input_names=['input'],\n",
-    "        output_names=['output'],\n",
-    "        dynamic_axes={\n",
-    "            'input': {0: 'batch_size'},\n",
-    "            'output': {0: 'batch_size'}\n",
-    "        }\n",
+    "        input_names=[\"input\"],\n",
+    "        output_names=[\"output\"],\n",
+    "        dynamic_axes={\"input\": {0: \"batch_size\"}, \"output\": {0: \"batch_size\"}},\n",
     "    )\n",
-    "    #write ONNX model and its bounds using OMLT\n",
+    "    # write ONNX model and its bounds using OMLT\n",
     "    write_onnx_model_with_bounds(f.name, None, input_bounds)\n",
-    "    #load the network definition from the ONNX model\n",
+    "    # load the network definition from the ONNX model\n",
     "    network_definition = load_onnx_neural_network_with_bounds(f.name)"
    ]
   },
@@ -798,12 +805,12 @@
     }
    ],
    "source": [
-    "#create pyomo model\n",
+    "# create pyomo model\n",
     "m = pyo.ConcreteModel()\n",
     "\n",
-    "#create an OMLT block for the neural network and build its formulation\n",
+    "# create an OMLT block for the neural network and build its formulation\n",
     "m.nn = OmltBlock()\n",
-    "m.nn.build_formulation(formulation) "
+    "m.nn.build_formulation(formulation)"
    ]
   },
   {
@@ -820,7 +827,7 @@
    "outputs": [],
    "source": [
     "adversary = (label + 1) % 10\n",
-    "m.obj = pyo.Objective(expr=(-(m.nn.outputs[0,adversary]-m.nn.outputs[0,label])))"
+    "m.obj = pyo.Objective(expr=(-(m.nn.outputs[0, adversary] - m.nn.outputs[0, label])))"
    ]
   },
   {
@@ -1003,7 +1010,7 @@
     }
    ],
    "source": [
-    "solver = pyo.SolverFactory('cbc')\n",
+    "solver = pyo.SolverFactory(\"cbc\")\n",
     "solver.solve(m, tee=True)"
    ]
   },
diff --git a/docs/notebooks/neuralnet/mnist_example_dense.ipynb b/docs/notebooks/neuralnet/mnist_example_dense.ipynb
index e7af1f06..fecd4467 100644
--- a/docs/notebooks/neuralnet/mnist_example_dense.ipynb
+++ b/docs/notebooks/neuralnet/mnist_example_dense.ipynb
@@ -46,26 +46,29 @@
     }
    ],
    "source": [
-    "#Import requisite packages\n",
-    "#data manipulation\n",
-    "import numpy as np\n",
+    "# Import requisite packages\n",
+    "# data manipulation\n",
     "import tempfile\n",
     "\n",
-    "#pytorch for training neural network\n",
-    "import torch, torch.onnx\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torch.optim as optim\n",
-    "from torchvision import datasets, transforms\n",
-    "from torch.optim.lr_scheduler import StepLR\n",
+    "import numpy as np\n",
     "\n",
-    "#pyomo for optimization\n",
+    "# pyomo for optimization\n",
     "import pyomo.environ as pyo\n",
     "\n",
-    "#omlt for interfacing our neural network with pyomo\n",
+    "# pytorch for training neural network\n",
+    "import torch\n",
+    "import torch.onnx\n",
+    "from torch import nn, optim\n",
+    "from torch.optim.lr_scheduler import StepLR\n",
+    "from torchvision import datasets, transforms\n",
+    "\n",
+    "# omlt for interfacing our neural network with pyomo\n",
     "from omlt import OmltBlock\n",
-    "from omlt.neuralnet import FullSpaceNNFormulation\n",
-    "from omlt.io.onnx import write_onnx_model_with_bounds, load_onnx_neural_network_with_bounds"
+    "from omlt.io.onnx import (\n",
+    "    load_onnx_neural_network_with_bounds,\n",
+    "    write_onnx_model_with_bounds,\n",
+    ")\n",
+    "from omlt.neuralnet import FullSpaceNNFormulation"
    ]
   },
   {
@@ -83,14 +86,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#set training and test batch sizes\n",
-    "train_kwargs = {'batch_size': 64}\n",
-    "test_kwargs = {'batch_size': 1000}\n",
+    "# set training and test batch sizes\n",
+    "train_kwargs = {\"batch_size\": 64}\n",
+    "test_kwargs = {\"batch_size\": 1000}\n",
     "\n",
-    "#build DataLoaders for training and test sets\n",
-    "dataset1 = datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor())\n",
-    "dataset2 = datasets.MNIST('../data', train=False, transform=transforms.ToTensor())\n",
-    "train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs, shuffle=True)\n",
+    "# build DataLoaders for training and test sets\n",
+    "dataset1 = datasets.MNIST(\n",
+    "    \"../data\", train=True, download=True, transform=transforms.ToTensor()\n",
+    ")\n",
+    "dataset2 = datasets.MNIST(\"../data\", train=False, transform=transforms.ToTensor())\n",
+    "train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs, shuffle=True)\n",
     "test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)"
    ]
   },
@@ -109,25 +114,25 @@
    "source": [
     "hidden_size = 50\n",
     "\n",
+    "\n",
     "class Net(nn.Module):\n",
-    "    #define layers of neural network\n",
+    "    # define layers of neural network\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
-    "        self.hidden1  = nn.Linear(784, hidden_size)\n",
-    "        self.hidden2  = nn.Linear(hidden_size, hidden_size)\n",
-    "        self.output  = nn.Linear(hidden_size, 10)\n",
+    "        self.hidden1 = nn.Linear(784, hidden_size)\n",
+    "        self.hidden2 = nn.Linear(hidden_size, hidden_size)\n",
+    "        self.output = nn.Linear(hidden_size, 10)\n",
     "        self.relu = nn.ReLU()\n",
     "        self.softmax = nn.LogSoftmax(dim=1)\n",
     "\n",
-    "    #define forward pass of neural network\n",
+    "    # define forward pass of neural network\n",
     "    def forward(self, x):\n",
     "        x = self.hidden1(x)\n",
     "        x = self.relu(x)\n",
     "        x = self.hidden2(x)\n",
     "        x = self.relu(x)\n",
     "        x = self.output(x)\n",
-    "        x = self.softmax(x)      \n",
-    "        return x"
+    "        return self.softmax(x)"
    ]
   },
   {
@@ -143,33 +148,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#training function computes loss and its gradient on batch, and prints status after every 200 batches\n",
+    "# training function computes loss and its gradient on batch, and prints status after every 200 batches\n",
     "def train(model, train_loader, optimizer, epoch):\n",
-    "    model.train(); criterion = nn.NLLLoss()\n",
+    "    model.train()\n",
+    "    criterion = nn.NLLLoss()\n",
     "    for batch_idx, (data, target) in enumerate(train_loader):\n",
     "        optimizer.zero_grad()\n",
-    "        output = model(data.view(-1, 28*28))\n",
+    "        output = model(data.view(-1, 28 * 28))\n",
     "        loss = criterion(output, target)\n",
     "        loss.backward()\n",
     "        optimizer.step()\n",
-    "        if batch_idx % 200  == 0:\n",
-    "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
-    "                epoch, batch_idx * len(data), len(train_loader.dataset),\n",
-    "                100. * batch_idx / len(train_loader), loss.item()))\n",
+    "        if batch_idx % 200 == 0:\n",
+    "            print(\n",
+    "                f\"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100.0 * batch_idx / len(train_loader):.0f}%)]\\tLoss: {loss.item():.6f}\"\n",
+    "            )\n",
+    "\n",
     "\n",
-    "#testing function computes loss and prints overall model accuracy on test set\n",
+    "# testing function computes loss and prints overall model accuracy on test set\n",
     "def test(model, test_loader):\n",
-    "    model.eval(); criterion = nn.NLLLoss( reduction='sum')\n",
-    "    test_loss = 0; correct = 0\n",
+    "    model.eval()\n",
+    "    criterion = nn.NLLLoss(reduction=\"sum\")\n",
+    "    test_loss = 0\n",
+    "    correct = 0\n",
     "    with torch.no_grad():\n",
     "        for data, target in test_loader:\n",
-    "            output = model(data.view(-1, 28*28))\n",
-    "            test_loss += criterion(output, target).item()  \n",
-    "            pred = output.argmax(dim=1, keepdim=True) \n",
+    "            output = model(data.view(-1, 28 * 28))\n",
+    "            test_loss += criterion(output, target).item()\n",
+    "            pred = output.argmax(dim=1, keepdim=True)\n",
     "            correct += pred.eq(target.view_as(pred)).sum().item()\n",
     "    test_loss /= len(test_loader.dataset)\n",
-    "    print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
-    "        test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))            "
+    "    print(\n",
+    "        f\"\\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100.0 * correct / len(test_loader.dataset):.0f}%)\\n\"\n",
+    "    )"
    ]
   },
   {
@@ -233,12 +243,12 @@
     }
    ],
    "source": [
-    "#define model and optimizer\n",
+    "# define model and optimizer\n",
     "model = Net()\n",
     "optimizer = optim.Adadelta(model.parameters(), lr=1)\n",
     "scheduler = StepLR(optimizer, step_size=1, gamma=0.7)\n",
     "\n",
-    "#train neural network for five epochs\n",
+    "# train neural network for five epochs\n",
     "for epoch in range(5):\n",
     "    train(model, train_loader, optimizer, epoch)\n",
     "    test(model, test_loader)\n",
@@ -279,24 +289,24 @@
    ],
    "source": [
     "class NoSoftmaxNet(nn.Module):\n",
-    "    #define layers of neural network\n",
+    "    # define layers of neural network\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
-    "        self.hidden1  = nn.Linear(784, hidden_size)\n",
-    "        self.hidden2  = nn.Linear(hidden_size, hidden_size)\n",
-    "        self.output  = nn.Linear(hidden_size, 10)\n",
+    "        self.hidden1 = nn.Linear(784, hidden_size)\n",
+    "        self.hidden2 = nn.Linear(hidden_size, hidden_size)\n",
+    "        self.output = nn.Linear(hidden_size, 10)\n",
     "        self.relu = nn.ReLU()\n",
     "\n",
-    "    #define forward pass of neural network\n",
+    "    # define forward pass of neural network\n",
     "    def forward(self, x):\n",
     "        x = self.hidden1(x)\n",
     "        x = self.relu(x)\n",
     "        x = self.hidden2(x)\n",
     "        x = self.relu(x)\n",
-    "        x = self.output(x)\n",
-    "        return x\n",
+    "        return self.output(x)\n",
+    "\n",
     "\n",
-    "#create neural network without LogSoftmax and load parameters from existing model\n",
+    "# create neural network without LogSoftmax and load parameters from existing model\n",
     "model2 = NoSoftmaxNet()\n",
     "model2.load_state_dict(model.state_dict())"
    ]
@@ -324,23 +334,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#load image and true label from test set with index 'problem_index'\n",
+    "# load image and true label from test set with index 'problem_index'\n",
     "problem_index = 0\n",
-    "image = dataset2[problem_index][0].view(-1,28*28).detach().numpy()\n",
+    "image = dataset2[problem_index][0].view(-1, 28 * 28).detach().numpy()\n",
     "label = dataset2[problem_index][1]\n",
     "\n",
-    "#define input region defined by infinity norm\n",
+    "# define input region defined by infinity norm\n",
     "epsilon_infty = 5e-2\n",
     "lb = np.maximum(0, image - epsilon_infty)\n",
     "ub = np.minimum(1, image + epsilon_infty)\n",
     "\n",
-    "#save input bounds as dictionary\n",
+    "# save input bounds as dictionary\n",
     "input_bounds = {}\n",
-    "for i in range(28*28):\n",
-    "    input_bounds[i] = (float(lb[0][i]), float(ub[0][i])) \n",
-    "    \n",
-    "#define dummy input tensor    \n",
-    "x_temp = dataset2[problem_index][0].view(-1,28*28)"
+    "for i in range(28 * 28):\n",
+    "    input_bounds[i] = (float(lb[0][i]), float(ub[0][i]))\n",
+    "\n",
+    "# define dummy input tensor\n",
+    "x_temp = dataset2[problem_index][0].view(-1, 28 * 28)"
    ]
   },
   {
@@ -356,22 +366,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as f:\n",
-    "    #export neural network to ONNX\n",
+    "with tempfile.NamedTemporaryFile(suffix=\".onnx\", delete=False) as f:\n",
+    "    # export neural network to ONNX\n",
     "    torch.onnx.export(\n",
     "        model2,\n",
     "        x_temp,\n",
     "        f,\n",
-    "        input_names=['input'],\n",
-    "        output_names=['output'],\n",
-    "        dynamic_axes={\n",
-    "            'input': {0: 'batch_size'},\n",
-    "            'output': {0: 'batch_size'}\n",
-    "        }\n",
+    "        input_names=[\"input\"],\n",
+    "        output_names=[\"output\"],\n",
+    "        dynamic_axes={\"input\": {0: \"batch_size\"}, \"output\": {0: \"batch_size\"}},\n",
     "    )\n",
-    "    #write ONNX model and its bounds using OMLT\n",
+    "    # write ONNX model and its bounds using OMLT\n",
     "    write_onnx_model_with_bounds(f.name, None, input_bounds)\n",
-    "    #load the network definition from the ONNX model\n",
+    "    # load the network definition from the ONNX model\n",
     "    network_definition = load_onnx_neural_network_with_bounds(f.name)"
    ]
   },
@@ -777,12 +784,12 @@
     }
    ],
    "source": [
-    "#create pyomo model\n",
+    "# create pyomo model\n",
     "m = pyo.ConcreteModel()\n",
     "\n",
-    "#create an OMLT block for the neural network and build its formulation\n",
+    "# create an OMLT block for the neural network and build its formulation\n",
     "m.nn = OmltBlock()\n",
-    "m.nn.build_formulation(formulation) "
+    "m.nn.build_formulation(formulation)"
    ]
   },
   {
@@ -799,7 +806,7 @@
    "outputs": [],
    "source": [
     "adversary = (label + 1) % 10\n",
-    "m.obj = pyo.Objective(expr=(-(m.nn.outputs[adversary]-m.nn.outputs[label])))"
+    "m.obj = pyo.Objective(expr=(-(m.nn.outputs[adversary] - m.nn.outputs[label])))"
    ]
   },
   {
@@ -961,7 +968,7 @@
     }
    ],
    "source": [
-    "pyo.SolverFactory('cbc').solve(m, tee=True)"
+    "pyo.SolverFactory(\"cbc\").solve(m, tee=True)"
    ]
   }
  ],
diff --git a/docs/notebooks/neuralnet/neural_network_formulations.ipynb b/docs/notebooks/neuralnet/neural_network_formulations.ipynb
index 3317acd9..07613122 100644
--- a/docs/notebooks/neuralnet/neural_network_formulations.ipynb
+++ b/docs/notebooks/neuralnet/neural_network_formulations.ipynb
@@ -45,6 +45,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
+   "id": "7fb27b941602401d91542211134fc71a",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -62,31 +63,37 @@
     }
    ],
    "source": [
-    "#Start by importing the following libraries\n",
-    "#data manipulation and plotting\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
+    "# Start by importing the following libraries\n",
+    "# data manipulation and plotting\n",
     "import matplotlib\n",
-    "matplotlib.rc('font', size=24)\n",
-    "plt.rc('axes', titlesize=24)\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
     "\n",
-    "#tensorflow objects\n",
-    "from tensorflow.keras.models import Sequential, Model\n",
+    "matplotlib.rc(\"font\", size=24)\n",
+    "plt.rc(\"axes\", titlesize=24)\n",
+    "\n",
+    "# tensorflow objects\n",
+    "# pyomo for optimization\n",
+    "import pyomo.environ as pyo\n",
     "from tensorflow.keras.layers import Dense, Input\n",
+    "from tensorflow.keras.models import Sequential\n",
     "from tensorflow.keras.optimizers import Adam\n",
     "\n",
-    "#pyomo for optimization\n",
-    "import pyomo.environ as pyo\n",
+    "import omlt\n",
     "\n",
-    "#omlt for interfacing our neural network with pyomo\n",
+    "# omlt for interfacing our neural network with pyomo\n",
     "from omlt import OmltBlock\n",
-    "from omlt.neuralnet import NetworkDefinition, FullSpaceNNFormulation, \\\n",
-    "FullSpaceSmoothNNFormulation, ReducedSpaceSmoothNNFormulation, ReluBigMFormulation,\\\n",
-    "ReluComplementarityFormulation, ReluPartitionFormulation\n",
-    "from omlt.neuralnet.activations import ComplementarityReLUActivation\n",
     "from omlt.io.keras import keras_reader\n",
-    "import omlt"
+    "from omlt.neuralnet import (\n",
+    "    FullSpaceNNFormulation,\n",
+    "    FullSpaceSmoothNNFormulation,\n",
+    "    ReducedSpaceSmoothNNFormulation,\n",
+    "    ReluBigMFormulation,\n",
+    "    ReluComplementarityFormulation,\n",
+    "    ReluPartitionFormulation,\n",
+    ")\n",
+    "from omlt.neuralnet.activations import ComplementarityReLUActivation"
    ]
   },
   {
@@ -116,6 +123,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "acae54e37e7d407bbb7b55eff062a284",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -123,7 +131,7 @@
    },
    "outputs": [],
    "source": [
-    "df = pd.read_csv(\"../data/sin_quadratic.csv\",index_col=[0]);"
+    "df = pd.read_csv(\"../data/sin_quadratic.csv\", index_col=[0]);"
    ]
   },
   {
@@ -141,6 +149,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
+   "id": "9a63283cbaf04dbcab1f6479b197f3a8",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -159,27 +168,27 @@
     }
    ],
    "source": [
-    "#retrieve input 'x' and output 'y' from the dataframe\n",
+    "# retrieve input 'x' and output 'y' from the dataframe\n",
     "x = df[\"x\"]\n",
     "y = df[\"y\"]\n",
     "\n",
-    "#calculate mean and standard deviation, add scaled 'x' and scaled 'y' to the dataframe\n",
+    "# calculate mean and standard deviation, add scaled 'x' and scaled 'y' to the dataframe\n",
     "mean_data = df.mean(axis=0)\n",
     "std_data = df.std(axis=0)\n",
-    "df[\"x_scaled\"] = (df['x'] - mean_data['x']) / std_data['x']\n",
-    "df[\"y_scaled\"] = (df['y'] - mean_data['y']) / std_data['y']\n",
+    "df[\"x_scaled\"] = (df[\"x\"] - mean_data[\"x\"]) / std_data[\"x\"]\n",
+    "df[\"y_scaled\"] = (df[\"y\"] - mean_data[\"y\"]) / std_data[\"y\"]\n",
     "\n",
-    "#create plots for unscaled and scaled data\n",
-    "f, (ax1, ax2) = plt.subplots(1, 2,figsize = (16,8))\n",
+    "# create plots for unscaled and scaled data\n",
+    "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))\n",
     "\n",
     "ax1.plot(x, y)\n",
     "ax1.set_xlabel(\"x\")\n",
-    "ax1.set_ylabel(\"y\");\n",
+    "ax1.set_ylabel(\"y\")\n",
     "ax1.set_title(\"Training Data\")\n",
     "\n",
     "ax2.plot(df[\"x_scaled\"], df[\"y_scaled\"])\n",
     "ax2.set_xlabel(\"x_scaled\")\n",
-    "ax2.set_ylabel(\"y_scaled\");\n",
+    "ax2.set_ylabel(\"y_scaled\")\n",
     "ax2.set_title(\"Scaled Training Data\")\n",
     "\n",
     "plt.tight_layout()"
@@ -205,6 +214,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
+   "id": "8dd0d8092fe74a7c96281538738b07e2",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -212,34 +222,35 @@
    },
    "outputs": [],
    "source": [
-    "#sigmoid neural network\n",
-    "nn1 = Sequential(name='sin_wave_sigmoid')\n",
+    "# sigmoid neural network\n",
+    "nn1 = Sequential(name=\"sin_wave_sigmoid\")\n",
     "nn1.add(Input(np.array((1,))))\n",
-    "nn1.add(Dense(50, activation='sigmoid'))\n",
-    "nn1.add(Dense(50, activation='sigmoid'))\n",
+    "nn1.add(Dense(50, activation=\"sigmoid\"))\n",
+    "nn1.add(Dense(50, activation=\"sigmoid\"))\n",
     "nn1.add(Dense(1))\n",
-    "nn1.compile(optimizer=Adam(), loss='mse')\n",
+    "nn1.compile(optimizer=Adam(), loss=\"mse\")\n",
     "\n",
-    "#relu neural network\n",
-    "nn2 = Sequential(name='sin_wave_relu')\n",
+    "# relu neural network\n",
+    "nn2 = Sequential(name=\"sin_wave_relu\")\n",
     "nn2.add(Input(np.array((1,))))\n",
-    "nn2.add(Dense(30, activation='relu'))\n",
-    "nn2.add(Dense(30, activation='relu'))\n",
+    "nn2.add(Dense(30, activation=\"relu\"))\n",
+    "nn2.add(Dense(30, activation=\"relu\"))\n",
     "nn2.add(Dense(1))\n",
-    "nn2.compile(optimizer=Adam(), loss='mse')\n",
+    "nn2.compile(optimizer=Adam(), loss=\"mse\")\n",
     "\n",
-    "#mixed neural network\n",
-    "nn3 = Sequential(name='sin_wave_mixed')\n",
+    "# mixed neural network\n",
+    "nn3 = Sequential(name=\"sin_wave_mixed\")\n",
     "nn3.add(Input(np.array((1,))))\n",
-    "nn3.add(Dense(50, activation='sigmoid'))\n",
-    "nn3.add(Dense(50, activation='relu'))\n",
+    "nn3.add(Dense(50, activation=\"sigmoid\"))\n",
+    "nn3.add(Dense(50, activation=\"relu\"))\n",
     "nn3.add(Dense(1))\n",
-    "nn3.compile(optimizer=Adam(), loss='mse')"
+    "nn3.compile(optimizer=Adam(), loss=\"mse\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
+   "id": "72eea5119410473aa328ad9291626812",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -850,15 +861,14 @@
       "\u001b[1m313/313\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 923us/step - loss: 0.0090\n",
       "Epoch 150/150\n",
       "\u001b[1m313/313\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 929us/step - loss: 0.0087\n"
-
      ]
     }
    ],
    "source": [
-    "#train all three neural networks\n",
-    "history1 = nn1.fit(x=df['x_scaled'], y=df['y_scaled'],verbose=1, epochs=75)\n",
-    "history2 = nn2.fit(x=df['x_scaled'], y=df['y_scaled'],verbose=1, epochs=75)\n",
-    "history3 = nn3.fit(x=df['x_scaled'], y=df['y_scaled'],verbose=1, epochs=150)"
+    "# train all three neural networks\n",
+    "history1 = nn1.fit(x=df[\"x_scaled\"], y=df[\"y_scaled\"], verbose=1, epochs=75)\n",
+    "history2 = nn2.fit(x=df[\"x_scaled\"], y=df[\"y_scaled\"], verbose=1, epochs=75)\n",
+    "history3 = nn3.fit(x=df[\"x_scaled\"], y=df[\"y_scaled\"], verbose=1, epochs=150)"
    ]
   },
   {
@@ -877,6 +887,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
+   "id": "8edb47106e1a46a883d545849b8ab81b",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -894,23 +905,24 @@
     }
    ],
    "source": [
-    "#note: we calculate the unscaled output for each neural network to check the predictions\n",
-    "#nn1\n",
-    "y_predict_scaled_sigmoid = nn1.predict(x=df['x_scaled'])\n",
-    "y_predict_sigmoid = y_predict_scaled_sigmoid*(std_data['y']) + mean_data['y']\n",
-    "\n",
-    "#nn2\n",
-    "y_predict_scaled_relu = nn2.predict(x=df['x_scaled'])\n",
-    "y_predict_relu = y_predict_scaled_relu*(std_data['y']) + mean_data['y']\n",
-    "\n",
-    "#nn3\n",
-    "y_predict_scaled_mixed = nn3.predict(x=df['x_scaled'])\n",
-    "y_predict_mixed = y_predict_scaled_mixed*(std_data['y']) + mean_data['y']"
+    "# note: we calculate the unscaled output for each neural network to check the predictions\n",
+    "# nn1\n",
+    "y_predict_scaled_sigmoid = nn1.predict(x=df[\"x_scaled\"])\n",
+    "y_predict_sigmoid = y_predict_scaled_sigmoid * (std_data[\"y\"]) + mean_data[\"y\"]\n",
+    "\n",
+    "# nn2\n",
+    "y_predict_scaled_relu = nn2.predict(x=df[\"x_scaled\"])\n",
+    "y_predict_relu = y_predict_scaled_relu * (std_data[\"y\"]) + mean_data[\"y\"]\n",
+    "\n",
+    "# nn3\n",
+    "y_predict_scaled_mixed = nn3.predict(x=df[\"x_scaled\"])\n",
+    "y_predict_mixed = y_predict_scaled_mixed * (std_data[\"y\"]) + mean_data[\"y\"]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "10185d26023b46108eb7d9f57d49d2b3",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -929,12 +941,12 @@
     }
    ],
    "source": [
-    "#create a single plot with the original data and each neural network's predictions\n",
-    "fig,ax = plt.subplots(1,figsize = (8,8))\n",
-    "ax.plot(x,y,linewidth = 3.0,label = \"data\", alpha = 0.5)\n",
-    "ax.plot(x,y_predict_relu,linewidth = 3.0,linestyle=\"dotted\",label = \"relu\")\n",
-    "ax.plot(x,y_predict_sigmoid,linewidth = 3.0,linestyle=\"dotted\",label = \"sigmoid\")\n",
-    "ax.plot(x,y_predict_mixed,linewidth = 3.0,linestyle=\"dotted\",label = \"mixed\")\n",
+    "# create a single plot with the original data and each neural network's predictions\n",
+    "fig, ax = plt.subplots(1, figsize=(8, 8))\n",
+    "ax.plot(x, y, linewidth=3.0, label=\"data\", alpha=0.5)\n",
+    "ax.plot(x, y_predict_relu, linewidth=3.0, linestyle=\"dotted\", label=\"relu\")\n",
+    "ax.plot(x, y_predict_sigmoid, linewidth=3.0, linestyle=\"dotted\", label=\"sigmoid\")\n",
+    "ax.plot(x, y_predict_mixed, linewidth=3.0, linestyle=\"dotted\", label=\"mixed\")\n",
     "plt.xlabel(\"x\")\n",
     "plt.ylabel(\"y\")\n",
     "plt.legend();"
@@ -1051,6 +1063,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
+   "id": "8763a12b2bbd4a93a75aff182afb95dc",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1067,17 +1080,23 @@
     }
    ],
    "source": [
-    "#create an omlt scaling object\n",
-    "scaler = omlt.scaling.OffsetScaling(offset_inputs=[mean_data['x']],\n",
-    "                    factor_inputs=[std_data['x']],\n",
-    "                    offset_outputs=[mean_data['y']],\n",
-    "                    factor_outputs=[std_data['y']])\n",
-    "\n",
-    "#create the input bounds. note that the key `0` corresponds to input `0` and that we also scale the input bounds\n",
-    "input_bounds={0:((min(df['x']) - mean_data['x'])/std_data['x'],\n",
-    "                 (max(df['x']) - mean_data['x'])/std_data['x'])};\n",
+    "# create an omlt scaling object\n",
+    "scaler = omlt.scaling.OffsetScaling(\n",
+    "    offset_inputs=[mean_data[\"x\"]],\n",
+    "    factor_inputs=[std_data[\"x\"]],\n",
+    "    offset_outputs=[mean_data[\"y\"]],\n",
+    "    factor_outputs=[std_data[\"y\"]],\n",
+    ")\n",
+    "\n",
+    "# create the input bounds. note that the key `0` corresponds to input `0` and that we also scale the input bounds\n",
+    "input_bounds = {\n",
+    "    0: (\n",
+    "        (min(df[\"x\"]) - mean_data[\"x\"]) / std_data[\"x\"],\n",
+    "        (max(df[\"x\"]) - mean_data[\"x\"]) / std_data[\"x\"],\n",
+    "    )\n",
+    "}\n",
     "print(scaler)\n",
-    "print(\"Scaled input bounds: \",input_bounds)"
+    "print(\"Scaled input bounds: \", input_bounds)"
    ]
   },
   {
@@ -1099,6 +1118,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
+   "id": "7623eae2785240b9bd12b16a66d81610",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1181,39 +1201,43 @@
     }
    ],
    "source": [
-    "#create a network definition\n",
-    "net_sigmoid = keras_reader.load_keras_sequential(nn1,scaler,input_bounds)\n",
+    "# create a network definition\n",
+    "net_sigmoid = keras_reader.load_keras_sequential(nn1, scaler, input_bounds)\n",
     "\n",
-    "#create a pyomo model with variables x and y\n",
+    "# create a pyomo model with variables x and y\n",
     "model1_reduced = pyo.ConcreteModel()\n",
-    "model1_reduced.x = pyo.Var(initialize = 0)\n",
-    "model1_reduced.y = pyo.Var(initialize = 0)\n",
+    "model1_reduced.x = pyo.Var(initialize=0)\n",
+    "model1_reduced.y = pyo.Var(initialize=0)\n",
     "model1_reduced.obj = pyo.Objective(expr=(model1_reduced.y))\n",
     "\n",
-    "#create an OmltBlock\n",
+    "# create an OmltBlock\n",
     "model1_reduced.nn = OmltBlock()\n",
     "\n",
-    "#use the reduced-space formulation\n",
+    "# use the reduced-space formulation\n",
     "formulation1_reduced = ReducedSpaceSmoothNNFormulation(net_sigmoid)\n",
     "model1_reduced.nn.build_formulation(formulation1_reduced)\n",
     "\n",
-    "#connect pyomo variables to the neural network\n",
+    "\n",
+    "# connect pyomo variables to the neural network\n",
     "@model1_reduced.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.nn.inputs[0]\n",
     "\n",
+    "\n",
     "@model1_reduced.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.nn.outputs[0]\n",
     "\n",
-    "#solve the model and query the solution\n",
-    "status_1_reduced = pyo.SolverFactory('ipopt').solve(model1_reduced, tee=True)\n",
-    "solution_1_reduced = (pyo.value(model1_reduced.x),pyo.value(model1_reduced.y))"
+    "\n",
+    "# solve the model and query the solution\n",
+    "status_1_reduced = pyo.SolverFactory(\"ipopt\").solve(model1_reduced, tee=True)\n",
+    "solution_1_reduced = (pyo.value(model1_reduced.x), pyo.value(model1_reduced.y))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
+   "id": "7cdc8c89c7104fffa095e18ddfef8986",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1234,13 +1258,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"Reduced Space Solution:\")\n",
-    "print(\"# of variables: \",model1_reduced.nvariables())\n",
-    "print(\"# of constraints: \",model1_reduced.nconstraints())\n",
+    "print(\"# of variables: \", model1_reduced.nvariables())\n",
+    "print(\"# of constraints: \", model1_reduced.nconstraints())\n",
     "print(\"x = \", solution_1_reduced[0])\n",
     "print(\"y = \", solution_1_reduced[1])\n",
-    "print(\"Solve Time: \", status_1_reduced['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_1_reduced[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -1261,6 +1285,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
+   "id": "b118ea5561624da68c537baed56e602f",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1447,32 +1472,36 @@
     }
    ],
    "source": [
-    "net_sigmoid = keras_reader.load_keras_sequential(nn1,scaler,input_bounds)\n",
+    "net_sigmoid = keras_reader.load_keras_sequential(nn1, scaler, input_bounds)\n",
     "\n",
     "model1_full = pyo.ConcreteModel()\n",
-    "model1_full.x = pyo.Var(initialize = 0)\n",
-    "model1_full.y = pyo.Var(initialize = 0)\n",
+    "model1_full.x = pyo.Var(initialize=0)\n",
+    "model1_full.y = pyo.Var(initialize=0)\n",
     "model1_full.obj = pyo.Objective(expr=(model1_full.y))\n",
     "model1_full.nn = OmltBlock()\n",
     "\n",
     "formulation2_full = FullSpaceSmoothNNFormulation(net_sigmoid)\n",
     "model1_full.nn.build_formulation(formulation2_full)\n",
     "\n",
+    "\n",
     "@model1_full.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.nn.inputs[0]\n",
     "\n",
+    "\n",
     "@model1_full.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.nn.outputs[0]\n",
     "\n",
-    "status_1_full = pyo.SolverFactory('ipopt').solve(model1_full, tee=True)\n",
-    "solution_1_full = (pyo.value(model1_full.x),pyo.value(model1_full.y))"
+    "\n",
+    "status_1_full = pyo.SolverFactory(\"ipopt\").solve(model1_full, tee=True)\n",
+    "solution_1_full = (pyo.value(model1_full.x), pyo.value(model1_full.y))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
+   "id": "938c804e27f84196a10c8828c723f798",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1493,13 +1522,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"Full Space Solution:\")\n",
-    "print(\"# of variables: \",model1_full.nvariables())\n",
-    "print(\"# of constraints: \",model1_full.nconstraints())\n",
+    "print(\"# of variables: \", model1_full.nvariables())\n",
+    "print(\"# of constraints: \", model1_full.nconstraints())\n",
     "print(\"x = \", solution_1_full[0])\n",
     "print(\"y = \", solution_1_full[1])\n",
-    "print(\"Solve Time: \", status_1_full['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_1_full[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -1521,6 +1550,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
+   "id": "504fb2a444614c0babb325280ed9130a",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1668,32 +1698,36 @@
     }
    ],
    "source": [
-    "net_relu = keras_reader.load_keras_sequential(nn2,scaler,input_bounds)\n",
+    "net_relu = keras_reader.load_keras_sequential(nn2, scaler, input_bounds)\n",
     "\n",
     "model2_comp = pyo.ConcreteModel()\n",
-    "model2_comp.x = pyo.Var(initialize = 0)\n",
-    "model2_comp.y = pyo.Var(initialize = 0)\n",
+    "model2_comp.x = pyo.Var(initialize=0)\n",
+    "model2_comp.y = pyo.Var(initialize=0)\n",
     "model2_comp.obj = pyo.Objective(expr=(model2_comp.y))\n",
     "model2_comp.nn = OmltBlock()\n",
     "\n",
     "formulation2_comp = ReluComplementarityFormulation(net_relu)\n",
     "model2_comp.nn.build_formulation(formulation2_comp)\n",
     "\n",
+    "\n",
     "@model2_comp.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.nn.inputs[0]\n",
     "\n",
+    "\n",
     "@model2_comp.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.nn.outputs[0]\n",
     "\n",
-    "status_2_comp = pyo.SolverFactory('ipopt').solve(model2_comp, tee=True)\n",
-    "solution_2_comp = (pyo.value(model2_comp.x),pyo.value(model2_comp.y))"
+    "\n",
+    "status_2_comp = pyo.SolverFactory(\"ipopt\").solve(model2_comp, tee=True)\n",
+    "solution_2_comp = (pyo.value(model2_comp.x), pyo.value(model2_comp.y))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 14,
+   "id": "59bbdb311c014d738909a11f9e486628",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1714,13 +1748,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"ReLU Complementarity Solution:\")\n",
-    "print(\"# of variables: \",model2_comp.nvariables())\n",
-    "print(\"# of constraints: \",model2_comp.nconstraints())\n",
+    "print(\"# of variables: \", model2_comp.nvariables())\n",
+    "print(\"# of constraints: \", model2_comp.nconstraints())\n",
     "print(\"x = \", solution_2_comp[0])\n",
     "print(\"y = \", solution_2_comp[1])\n",
-    "print(\"Solve Time: \", status_2_comp['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_2_comp[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -1741,6 +1775,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
+   "id": "b43b363d81ae4b689946ece5c682cd59",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1748,32 +1783,36 @@
    },
    "outputs": [],
    "source": [
-    "net_relu = keras_reader.load_keras_sequential(nn2,scaler,input_bounds)\n",
+    "net_relu = keras_reader.load_keras_sequential(nn2, scaler, input_bounds)\n",
     "\n",
     "model2_bigm = pyo.ConcreteModel()\n",
-    "model2_bigm.x = pyo.Var(initialize = 0)\n",
-    "model2_bigm.y = pyo.Var(initialize = 0)\n",
+    "model2_bigm.x = pyo.Var(initialize=0)\n",
+    "model2_bigm.y = pyo.Var(initialize=0)\n",
     "model2_bigm.obj = pyo.Objective(expr=(model2_bigm.y))\n",
     "model2_bigm.nn = OmltBlock()\n",
     "\n",
     "formulation2_bigm = ReluBigMFormulation(net_relu)\n",
     "model2_bigm.nn.build_formulation(formulation2_bigm)\n",
     "\n",
+    "\n",
     "@model2_bigm.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.nn.inputs[0]\n",
     "\n",
+    "\n",
     "@model2_bigm.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.nn.outputs[0]\n",
     "\n",
-    "status_2_bigm = pyo.SolverFactory('cbc').solve(model2_bigm, tee=False)\n",
-    "solution_2_bigm = (pyo.value(model2_bigm.x),pyo.value(model2_bigm.y))"
+    "\n",
+    "status_2_bigm = pyo.SolverFactory(\"cbc\").solve(model2_bigm, tee=False)\n",
+    "solution_2_bigm = (pyo.value(model2_bigm.x), pyo.value(model2_bigm.y))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 16,
+   "id": "8a65eabff63a45729fe45fb5ade58bdc",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1794,13 +1833,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"ReLU BigM Solution:\")\n",
-    "print(\"# of variables: \",model2_bigm.nvariables())\n",
-    "print(\"# of constraints: \",model2_bigm.nconstraints())\n",
+    "print(\"# of variables: \", model2_bigm.nvariables())\n",
+    "print(\"# of constraints: \", model2_bigm.nconstraints())\n",
     "print(\"x = \", solution_2_bigm[0])\n",
     "print(\"y = \", solution_2_bigm[1])\n",
-    "print(\"Solve Time: \", status_2_bigm['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_2_bigm[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -1822,6 +1861,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
+   "id": "c3933fab20d04ec698c2621248eb3be0",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -1956,46 +1996,54 @@
     }
    ],
    "source": [
-    "net_relu_partition = keras_reader.load_keras_sequential(nn2,scaler,input_bounds)\n",
+    "net_relu_partition = keras_reader.load_keras_sequential(nn2, scaler, input_bounds)\n",
+    "\n",
     "\n",
-    "#create a function that partitions a vector of weights w` into `n` partitions\n",
-    "#by default, the `ReluPartitionFormulation` will use this function with n=2\n",
+    "# create a function that partitions a vector of weights w` into `n` partitions\n",
+    "# by default, the `ReluPartitionFormulation` will use this function with n=2\n",
     "def partition_split_func(w, n):\n",
     "    sorted_indexes = np.argsort(w)\n",
     "    n = min(n, len(sorted_indexes))\n",
     "    return np.array_split(sorted_indexes, n)\n",
     "\n",
-    "#change the number of partitions and create a function we can pass to the formulation\n",
+    "\n",
+    "# change the number of partitions and create a function we can pass to the formulation\n",
     "#'N = 1' corresponds to BigM, 'N = n_inputs' corresponds to a convex hull formulation\n",
     "N = 1\n",
     "split_func = lambda w: partition_split_func(w, N)\n",
     "\n",
     "model2_partition = pyo.ConcreteModel()\n",
-    "model2_partition.x = pyo.Var(initialize = 0)\n",
-    "model2_partition.y = pyo.Var(initialize = 0)\n",
+    "model2_partition.x = pyo.Var(initialize=0)\n",
+    "model2_partition.y = pyo.Var(initialize=0)\n",
     "model2_partition.obj = pyo.Objective(expr=(model2_partition.y))\n",
     "model2_partition.nn = OmltBlock()\n",
     "\n",
-    "formulation2_partition = ReluPartitionFormulation(net_relu_partition, split_func=split_func)\n",
+    "formulation2_partition = ReluPartitionFormulation(\n",
+    "    net_relu_partition, split_func=split_func\n",
+    ")\n",
     "model2_partition.nn.build_formulation(formulation2_partition)\n",
     "\n",
+    "\n",
     "@model2_partition.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.nn.inputs[0]\n",
     "\n",
+    "\n",
     "@model2_partition.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.nn.outputs[0]\n",
     "\n",
-    "solver = pyo.SolverFactory('cbc')\n",
+    "\n",
+    "solver = pyo.SolverFactory(\"cbc\")\n",
     "solver.options[\"printingOptions\"] = \"normal\"\n",
-    "status_2_partition=solver.solve(model2_partition, tee=True)\n",
-    "solution_2_partition = (pyo.value(model2_partition.x),pyo.value(model2_partition.y))"
+    "status_2_partition = solver.solve(model2_partition, tee=True)\n",
+    "solution_2_partition = (pyo.value(model2_partition.x), pyo.value(model2_partition.y))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 18,
+   "id": "4dd4641cc4064e0191573fe9c69df29b",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -2016,13 +2064,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"ReLU Partition Solution:\")\n",
-    "print(\"# of variables: \",model2_partition.nvariables())\n",
-    "print(\"# of constraints: \",model2_partition.nconstraints())\n",
+    "print(\"# of variables: \", model2_partition.nvariables())\n",
+    "print(\"# of constraints: \", model2_partition.nconstraints())\n",
     "print(\"x = \", solution_2_partition[0])\n",
     "print(\"y = \", solution_2_partition[1])\n",
-    "print(\"Solve Time: \", status_2_partition['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_2_partition[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -2041,6 +2089,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
+   "id": "8309879909854d7188b41380fd92a7c3",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -2173,34 +2222,39 @@
     }
    ],
    "source": [
-    "net_mixed = keras_reader.load_keras_sequential(nn3,scaler,input_bounds)\n",
+    "net_mixed = keras_reader.load_keras_sequential(nn3, scaler, input_bounds)\n",
     "\n",
     "model3_mixed = pyo.ConcreteModel()\n",
-    "model3_mixed.x = pyo.Var(initialize = 0)\n",
-    "model3_mixed.y = pyo.Var(initialize = 0)\n",
+    "model3_mixed.x = pyo.Var(initialize=0)\n",
+    "model3_mixed.y = pyo.Var(initialize=0)\n",
     "model3_mixed.obj = pyo.Objective(expr=(model3_mixed.y))\n",
     "model3_mixed.nn = OmltBlock()\n",
     "\n",
-    "formulation3_mixed = FullSpaceNNFormulation(net_mixed,activation_constraints={\n",
-    "            \"relu\": ComplementarityReLUActivation()})\n",
+    "formulation3_mixed = FullSpaceNNFormulation(\n",
+    "    net_mixed, activation_constraints={\"relu\": ComplementarityReLUActivation()}\n",
+    ")\n",
     "model3_mixed.nn.build_formulation(formulation3_mixed)\n",
     "\n",
+    "\n",
     "@model3_mixed.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.nn.inputs[0]\n",
     "\n",
+    "\n",
     "@model3_mixed.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.nn.outputs[0]\n",
     "\n",
-    "solver = pyo.SolverFactory('ipopt')\n",
-    "status_3_mixed = solver.solve(model3_mixed, tee='true')\n",
-    "solution_3_mixed = (pyo.value(model3_mixed.x),pyo.value(model3_mixed.y))"
+    "\n",
+    "solver = pyo.SolverFactory(\"ipopt\")\n",
+    "status_3_mixed = solver.solve(model3_mixed, tee=\"true\")\n",
+    "solution_3_mixed = (pyo.value(model3_mixed.x), pyo.value(model3_mixed.y))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 20,
+   "id": "3ed186c9a28b402fb0bc4494df01f08d",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -2221,13 +2275,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"Mixed NN Solution:\")\n",
-    "print(\"# of variables: \",model3_mixed.nvariables())\n",
-    "print(\"# of constraints: \",model3_mixed.nconstraints())\n",
+    "print(\"# of variables: \", model3_mixed.nvariables())\n",
+    "print(\"# of constraints: \", model3_mixed.nconstraints())\n",
     "print(\"x = \", solution_3_mixed[0])\n",
     "print(\"y = \", solution_3_mixed[1])\n",
-    "print(\"Solve Time: \", status_3_mixed['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_3_mixed[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -2253,6 +2307,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
+   "id": "cb1e1581032b452c9409d6c6813c49d1",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -2271,28 +2326,50 @@
     }
    ],
    "source": [
-    "#create a plot with 3 subplots\n",
-    "fig,axs = plt.subplots(1,3,figsize = (24,8))\n",
+    "# create a plot with 3 subplots\n",
+    "fig, axs = plt.subplots(1, 3, figsize=(24, 8))\n",
     "\n",
-    "#nn1 - sigmoid\n",
-    "axs[0].plot(x,y_predict_sigmoid,linewidth = 3.0,linestyle=\"dotted\",color = \"orange\")\n",
+    "# nn1 - sigmoid\n",
+    "axs[0].plot(x, y_predict_sigmoid, linewidth=3.0, linestyle=\"dotted\", color=\"orange\")\n",
     "axs[0].set_title(\"sigmoid\")\n",
-    "axs[0].scatter([solution_1_reduced[0]],[solution_1_reduced[1]],color = \"black\",s = 300, label=\"reduced space\")\n",
-    "axs[0].scatter([solution_1_full[0]],[solution_1_full[1]],color = \"blue\",s = 300, label=\"full space\")\n",
+    "axs[0].scatter(\n",
+    "    [solution_1_reduced[0]],\n",
+    "    [solution_1_reduced[1]],\n",
+    "    color=\"black\",\n",
+    "    s=300,\n",
+    "    label=\"reduced space\",\n",
+    ")\n",
+    "axs[0].scatter(\n",
+    "    [solution_1_full[0]], [solution_1_full[1]], color=\"blue\", s=300, label=\"full space\"\n",
+    ")\n",
     "axs[0].legend()\n",
     "\n",
-    "#nn2 - relu\n",
-    "axs[1].plot(x,y_predict_relu,linewidth = 3.0,linestyle=\"dotted\",color = \"green\")\n",
+    "# nn2 - relu\n",
+    "axs[1].plot(x, y_predict_relu, linewidth=3.0, linestyle=\"dotted\", color=\"green\")\n",
     "axs[1].set_title(\"relu\")\n",
-    "axs[1].scatter([solution_2_comp[0]],[solution_2_comp[1]],color = \"black\",s = 300, label=\"complementarity\")\n",
-    "axs[1].scatter([solution_2_bigm[0]],[solution_2_bigm[1]],color = \"blue\",s = 300, label=\"bigm\")\n",
-    "axs[1].scatter([solution_2_partition[0]],[solution_2_partition[1]],color = \"purple\",s = 300, label=\"partition\")\n",
+    "axs[1].scatter(\n",
+    "    [solution_2_comp[0]],\n",
+    "    [solution_2_comp[1]],\n",
+    "    color=\"black\",\n",
+    "    s=300,\n",
+    "    label=\"complementarity\",\n",
+    ")\n",
+    "axs[1].scatter(\n",
+    "    [solution_2_bigm[0]], [solution_2_bigm[1]], color=\"blue\", s=300, label=\"bigm\"\n",
+    ")\n",
+    "axs[1].scatter(\n",
+    "    [solution_2_partition[0]],\n",
+    "    [solution_2_partition[1]],\n",
+    "    color=\"purple\",\n",
+    "    s=300,\n",
+    "    label=\"partition\",\n",
+    ")\n",
     "axs[1].legend()\n",
     "\n",
-    "#nn3 - mixed\n",
-    "axs[2].plot(x,y_predict_mixed,linewidth = 3.0,linestyle=\"dotted\", color = \"red\")\n",
+    "# nn3 - mixed\n",
+    "axs[2].plot(x, y_predict_mixed, linewidth=3.0, linestyle=\"dotted\", color=\"red\")\n",
     "axs[2].set_title(\"mixed\")\n",
-    "axs[2].scatter([solution_3_mixed[0]],[solution_3_mixed[1]],color = \"black\",s = 300);"
+    "axs[2].scatter([solution_3_mixed[0]], [solution_3_mixed[1]], color=\"black\", s=300);"
    ]
   },
   {
diff --git a/docs/notebooks/trees/bo_with_trees.ipynb b/docs/notebooks/trees/bo_with_trees.ipynb
index 11801d96..0f52e595 100644
--- a/docs/notebooks/trees/bo_with_trees.ipynb
+++ b/docs/notebooks/trees/bo_with_trees.ipynb
@@ -47,29 +47,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
     "import random\n",
     "\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
     "def f(x, y):\n",
-    "    return (1-x)**2 + 100* ((y-x**2))**2\n",
+    "    return (1 - x) ** 2 + 100 * (y - x**2) ** 2\n",
+    "\n",
+    "\n",
+    "f_bnds = [(-2.048, 2.048) for _ in range(2)]\n",
     "\n",
-    "f_bnds = [(-2.048,2.048) for _ in range(2)]\n",
     "\n",
     "def generate_samples(num_samples, bb_bnds):\n",
-    "    data = {'X': [], 'y': []}\n",
+    "    data = {\"X\": [], \"y\": []}\n",
     "\n",
     "    for _ in range(num_samples):\n",
     "        sample = []\n",
     "\n",
     "        # iterate through all dimension bounds\n",
-    "        for idx, var_bnds in enumerate(bb_bnds):\n",
+    "        for _, var_bnds in enumerate(bb_bnds):\n",
     "            val = random.uniform(var_bnds[0], var_bnds[1])\n",
     "\n",
     "            # populate the sample\n",
     "            sample.append(val)\n",
     "\n",
-    "        data['X'].append(sample)\n",
-    "        data['y'].append(f(sample[0], sample[1]))\n",
+    "        data[\"X\"].append(sample)\n",
+    "        data[\"y\"].append(f(sample[0], sample[1]))\n",
     "    return data"
    ]
   },
@@ -89,29 +93,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import lightgbm as lgb\n",
     "import warnings\n",
     "\n",
+    "import lightgbm as lgb\n",
+    "\n",
+    "\n",
     "def train_tree(data):\n",
     "    with warnings.catch_warnings():\n",
     "        warnings.simplefilter(\"ignore\")\n",
-    "        PARAMS = {'objective': 'regression',\n",
-    "                  'metric': 'rmse',\n",
-    "                  'boosting': 'gbdt',\n",
-    "                  'num_trees': 50,\n",
-    "                  'max_depth': 3,\n",
-    "                  'min_data_in_leaf': 2,\n",
-    "                  'random_state': 100,\n",
-    "                  'verbose': -1}\n",
-    "        train_x = np.asarray(data['X'])\n",
-    "        train_data = lgb.Dataset(train_x, \n",
-    "                                 label=data['y'],\n",
-    "                                 params={'verbose': -1})\n",
-    "\n",
-    "        model = lgb.train(PARAMS, \n",
-    "                          train_data,\n",
-    "                          verbose_eval=False)\n",
-    "    return model"
+    "        PARAMS = {\n",
+    "            \"objective\": \"regression\",\n",
+    "            \"metric\": \"rmse\",\n",
+    "            \"boosting\": \"gbdt\",\n",
+    "            \"num_trees\": 50,\n",
+    "            \"max_depth\": 3,\n",
+    "            \"min_data_in_leaf\": 2,\n",
+    "            \"random_state\": 100,\n",
+    "            \"verbose\": -1,\n",
+    "        }\n",
+    "        train_x = np.asarray(data[\"X\"])\n",
+    "        train_data = lgb.Dataset(train_x, label=data[\"y\"], params={\"verbose\": -1})\n",
+    "\n",
+    "        return lgb.train(PARAMS, train_data, verbose_eval=False)"
    ]
   },
   {
@@ -133,14 +136,12 @@
     "from onnxmltools.convert.lightgbm.convert import convert\n",
     "from skl2onnx.common.data_types import FloatTensorType\n",
     "\n",
+    "\n",
     "def get_onnx_model(lgb_model):\n",
     "    # export onnx model\n",
     "    float_tensor_type = FloatTensorType([None, lgb_model.num_feature()])\n",
-    "    initial_types = [('float_input', float_tensor_type)]\n",
-    "    onnx_model = convert(lgb_model, \n",
-    "                         initial_types=initial_types, \n",
-    "                         target_opset=8)\n",
-    "    return onnx_model"
+    "    initial_types = [(\"float_input\", float_tensor_type)]\n",
+    "    return convert(lgb_model, initial_types=initial_types, target_opset=8)"
    ]
   },
   {
@@ -160,9 +161,10 @@
    "source": [
     "def write_onnx_to_file(onnx_model, path, file_name=\"output.onnx\"):\n",
     "    from pathlib import Path\n",
+    "\n",
     "    with open(Path(path) / file_name, \"wb\") as onnx_file:\n",
     "        onnx_file.write(onnx_model.SerializeToString())\n",
-    "        print(f'Onnx model written to {onnx_file.name}')"
+    "        print(f\"Onnx model written to {onnx_file.name}\")"
    ]
   },
   {
@@ -182,15 +184,16 @@
    "outputs": [],
    "source": [
     "import pyomo.environ as pe\n",
+    "\n",
     "from omlt.block import OmltBlock\n",
     "from omlt.gbt import GBTBigMFormulation, GradientBoostedTreeModel\n",
     "\n",
+    "\n",
     "def add_tree_model(opt_model, onnx_model, input_bounds):\n",
     "    # init omlt block and gbt model based on the onnx format\n",
     "    opt_model.gbt = OmltBlock()\n",
-    "    gbt_model = GradientBoostedTreeModel(onnx_model, \n",
-    "                                         scaled_input_bounds=input_bounds)\n",
-    "    \n",
+    "    gbt_model = GradientBoostedTreeModel(onnx_model, scaled_input_bounds=input_bounds)\n",
+    "\n",
     "    # omlt uses a big-m formulation to encode the tree models\n",
     "    formulation = GBTBigMFormulation(gbt_model)\n",
     "    opt_model.gbt.build_formulation(formulation)"
@@ -211,27 +214,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "\n",
     "def add_unc_metric(opt_model, data):\n",
-    "    \n",
     "    # compute mean and std for standardization\n",
-    "    data_x = np.asarray(data['X'])\n",
+    "    data_x = np.asarray(data[\"X\"])\n",
     "    std = np.std(data_x, axis=0)\n",
     "    mean = np.mean(data_x, axis=0)\n",
-    "    \n",
+    "\n",
     "    # alpha capture the uncertainty value\n",
-    "    alpha_bound = abs(0.5*np.var(data['y']))\n",
-    "    opt_model.alpha = pe.Var(within=pe.NonNegativeReals, bounds=(0,alpha_bound))\n",
+    "    alpha_bound = abs(0.5 * np.var(data[\"y\"]))\n",
+    "    opt_model.alpha = pe.Var(within=pe.NonNegativeReals, bounds=(0, alpha_bound))\n",
     "    opt_model.unc_constr = pe.ConstraintList()\n",
-    "    \n",
+    "\n",
     "    for x in data_x:\n",
     "        x_var = opt_model.gbt.inputs\n",
     "        opt_model.unc_constr.add(\n",
-    "            opt_model.alpha <= \\\n",
-    "            sum((x[idx]-(x_var[idx]-mean[idx])/std[idx])*\\\n",
-    "                (x[idx]-(x_var[idx]-mean[idx])/std[idx]) \n",
-    "                for idx in range(len(x_var)) )\n",
+    "            opt_model.alpha\n",
+    "            <= sum(\n",
+    "                (x[idx] - (x_var[idx] - mean[idx]) / std[idx])\n",
+    "                * (x[idx] - (x_var[idx] - mean[idx]) / std[idx])\n",
+    "                for idx in range(len(x_var))\n",
+    "            )\n",
     "        )"
    ]
   },
@@ -270,11 +272,13 @@
     "random.seed(10)\n",
     "data = generate_samples(5, f_bnds)\n",
     "\n",
+    "\n",
     "def plot_progress(data, input_bounds):\n",
     "    # plot contour line and data points\n",
     "    import matplotlib.pyplot as plt\n",
+    "\n",
     "    fig = plt.figure()\n",
-    "    ax = fig.add_axes([0,0,2,2])\n",
+    "    ax = fig.add_axes([0, 0, 2, 2])\n",
     "\n",
     "    # create mesh\n",
     "    s = 0.01\n",
@@ -283,19 +287,20 @@
     "    X, Y = np.meshgrid(X, Y)\n",
     "\n",
     "    # rosenbrock function\n",
-    "    Z = f(X,Y)\n",
+    "    Z = f(X, Y)\n",
     "\n",
     "    # plot contour line\n",
-    "    clevf = np.arange(Z.min(),Z.max(), 10)\n",
+    "    clevf = np.arange(Z.min(), Z.max(), 10)\n",
     "    CS = plt.contourf(X, Y, Z, clevf)\n",
     "    fig.colorbar(CS)\n",
     "\n",
     "    # plot initial data set\n",
-    "    ax.scatter([x[0] for x in data['X']], [x[1] for x in data['X']], c='r', s=100)\n",
+    "    ax.scatter([x[0] for x in data[\"X\"]], [x[1] for x in data[\"X\"]], c=\"r\", s=100)\n",
     "\n",
-    "    plt.rcParams.update({'font.size': 15})\n",
+    "    plt.rcParams.update({\"font.size\": 15})\n",
     "    plt.show()\n",
-    "    \n",
+    "\n",
+    "\n",
     "plot_progress(data, f_bnds)"
    ]
   },
@@ -324,29 +329,28 @@
     "    # building the optimization model\n",
     "    onnx_model = get_onnx_model(lgb_model)\n",
     "    opt_model = pe.ConcreteModel()\n",
-    "    \n",
+    "\n",
     "    add_tree_model(opt_model, onnx_model, f_bnds)\n",
-    "    \n",
+    "\n",
     "    if has_unc:\n",
     "        add_unc_metric(opt_model, data)\n",
-    "        opt_model.obj = pe.Objective(expr=opt_model.gbt.outputs[0] - 1.96*opt_model.alpha)\n",
+    "        opt_model.obj = pe.Objective(\n",
+    "            expr=opt_model.gbt.outputs[0] - 1.96 * opt_model.alpha\n",
+    "        )\n",
     "\n",
     "        # add uncertainty leads to non-convex MIQP, i.e. solvers like Gurobi can solve this\n",
-    "        solver = pe.SolverFactory('gurobi')\n",
-    "        solver.options['NonConvex'] = 2\n",
+    "        solver = pe.SolverFactory(\"gurobi\")\n",
+    "        solver.options[\"NonConvex\"] = 2\n",
     "        solution = solver.solve(opt_model, tee=False)\n",
     "    else:\n",
     "        opt_model.obj = pe.Objective(expr=opt_model.gbt.outputs[0])\n",
     "\n",
     "        # without uncerainty we can use cbc to solve the model\n",
-    "        solver = pe.SolverFactory('cbc')\n",
-    "        solution = solver.solve(opt_model, tee=False)   \n",
-    "    \n",
+    "        solver = pe.SolverFactory(\"cbc\")\n",
+    "        solution = solver.solve(opt_model, tee=False)\n",
+    "\n",
     "    # extract solution from solved model\n",
-    "    next_x = [opt_model.gbt.inputs[idx].value \n",
-    "              for idx in range(len(opt_model.gbt.inputs))]\n",
-    "    \n",
-    "    return next_x"
+    "    return [opt_model.gbt.inputs[idx].value for idx in range(len(opt_model.gbt.inputs))]"
    ]
   },
   {
@@ -400,40 +404,43 @@
    "source": [
     "from tqdm.notebook import tqdm\n",
     "\n",
-    "for itr in tqdm(range(80)):\n",
+    "for _ in tqdm(range(80)):\n",
     "    # training the tree ensemble\n",
     "    lgb_model = train_tree(data)\n",
-    "    \n",
+    "\n",
     "    # minimize the trained model\n",
     "    next_x = minimize_model(f_bnds, lgb_model, has_unc=False)\n",
-    "    \n",
+    "\n",
     "    # evaluating the following input\n",
     "    next_y = f(next_x[0], next_x[1])\n",
     "\n",
-    "    data['X'].append(next_x)\n",
-    "    data['y'].append(next_y)\n",
-    "    \n",
-    "def plot_progress(data):  \n",
+    "    data[\"X\"].append(next_x)\n",
+    "    data[\"y\"].append(next_y)\n",
+    "\n",
+    "\n",
+    "def plot_progress(data):\n",
     "    # set up plot\n",
     "    import matplotlib.pyplot as plt\n",
+    "\n",
     "    fig = plt.figure()\n",
-    "    ax = fig.add_axes([0,0,2,2])\n",
+    "    ax = fig.add_axes([0, 0, 2, 2])\n",
     "    plt.ylabel(\"Black-Box Function Objective\")\n",
     "    plt.xlabel(\"# Iterations\")\n",
     "\n",
     "    # extract best_y\n",
     "    min_y = []\n",
-    "    curr_min = data['y'][0]\n",
-    "    for y in data['y']:\n",
-    "        curr_min = min(y,curr_min)\n",
+    "    curr_min = data[\"y\"][0]\n",
+    "    for y in data[\"y\"]:\n",
+    "        curr_min = min(y, curr_min)\n",
     "        min_y.append(curr_min)\n",
-    "    \n",
+    "\n",
     "    # plot steps to show progress\n",
-    "    ax.step(np.arange(len(data['y'])), min_y, linewidth=2, color=\"b\")\n",
-    "    plt.axhline(y=0.0, color='r', linewidth=3, linestyle='--')\n",
-    "    \n",
+    "    ax.step(np.arange(len(data[\"y\"])), min_y, linewidth=2, color=\"b\")\n",
+    "    plt.axhline(y=0.0, color=\"r\", linewidth=3, linestyle=\"--\")\n",
+    "\n",
     "    plt.show()\n",
     "\n",
+    "\n",
     "plot_progress(data)"
    ]
   },
@@ -469,7 +476,8 @@
    ],
    "source": [
     "from IPython.display import Image\n",
-    "Image(filename='images/bo-with-trees.png', height=300)"
+    "\n",
+    "Image(filename=\"images/bo-with-trees.png\", height=300)"
    ]
   },
   {
diff --git a/docs/notebooks/trees/linear_tree_formulations.ipynb b/docs/notebooks/trees/linear_tree_formulations.ipynb
index f98373e1..e7f263ae 100644
--- a/docs/notebooks/trees/linear_tree_formulations.ipynb
+++ b/docs/notebooks/trees/linear_tree_formulations.ipynb
@@ -55,26 +55,30 @@
    },
    "outputs": [],
    "source": [
-    "#Start by importing the following libraries\n",
-    "#data manipulation and plotting\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
+    "# Start by importing the following libraries\n",
+    "# data manipulation and plotting\n",
     "import matplotlib\n",
-    "matplotlib.rc('font', size=24)\n",
-    "plt.rc('axes', titlesize=24)\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "\n",
+    "matplotlib.rc(\"font\", size=24)\n",
+    "plt.rc(\"axes\", titlesize=24)\n",
     "\n",
-    "#linear-tree objects\n",
+    "# linear-tree objects\n",
+    "# pyomo for optimization\n",
+    "import pyomo.environ as pyo\n",
     "from lineartree import LinearTreeRegressor\n",
     "from sklearn.linear_model import LinearRegression\n",
     "\n",
-    "#pyomo for optimization\n",
-    "import pyomo.environ as pyo\n",
+    "import omlt\n",
     "\n",
-    "#omlt for interfacing our linear tree with pyomo\n",
+    "# omlt for interfacing our linear tree with pyomo\n",
     "from omlt import OmltBlock\n",
-    "from omlt.linear_tree import LinearTreeGDPFormulation, LinearTreeHybridBigMFormulation, LinearTreeDefinition\n",
-    "import omlt"
+    "from omlt.linear_tree import (\n",
+    "    LinearTreeDefinition,\n",
+    "    LinearTreeGDPFormulation,\n",
+    "    LinearTreeHybridBigMFormulation,\n",
+    ")"
    ]
   },
   {
@@ -114,7 +118,7 @@
    },
    "outputs": [],
    "source": [
-    "df = pd.read_csv(\"../data/sin_quadratic.csv\",index_col=[0])"
+    "df = pd.read_csv(\"../data/sin_quadratic.csv\", index_col=[0])"
    ]
   },
   {
@@ -152,18 +156,18 @@
     }
    ],
    "source": [
-    "#retrieve input 'x' and output 'y' from the dataframe\n",
+    "# retrieve input 'x' and output 'y' from the dataframe\n",
     "x = df[\"x\"]\n",
     "y = df[\"y\"]\n",
     "\n",
-    "#calculate mean and standard deviation, add scaled 'x' and scaled 'y' to the dataframe\n",
+    "# calculate mean and standard deviation, add scaled 'x' and scaled 'y' to the dataframe\n",
     "mean_data = df.mean(axis=0)\n",
     "std_data = df.std(axis=0)\n",
-    "df[\"x_scaled\"] = (df['x'] - mean_data['x']) / std_data['x']\n",
-    "df[\"y_scaled\"] = (df['y'] - mean_data['y']) / std_data['y']\n",
+    "df[\"x_scaled\"] = (df[\"x\"] - mean_data[\"x\"]) / std_data[\"x\"]\n",
+    "df[\"y_scaled\"] = (df[\"y\"] - mean_data[\"y\"]) / std_data[\"y\"]\n",
     "\n",
-    "#create plots for unscaled and scaled data\n",
-    "f, (ax1, ax2) = plt.subplots(1, 2,figsize = (16,8))\n",
+    "# create plots for unscaled and scaled data\n",
+    "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))\n",
     "\n",
     "ax1.plot(x, y)\n",
     "ax1.set_xlabel(\"x\")\n",
@@ -202,12 +206,10 @@
    },
    "outputs": [],
    "source": [
-    "#Build the linear-tree model\n",
-    "regr = LinearTreeRegressor(LinearRegression(), \n",
-    "                            criterion='mse', \n",
-    "                            max_bins=120, \n",
-    "                            min_samples_leaf=30, \n",
-    "                            max_depth=8)"
+    "# Build the linear-tree model\n",
+    "regr = LinearTreeRegressor(\n",
+    "    LinearRegression(), criterion=\"mse\", max_bins=120, min_samples_leaf=30, max_depth=8\n",
+    ")"
    ]
   },
   {
@@ -221,12 +223,12 @@
    },
    "outputs": [],
    "source": [
-    "#Data needs to be in array and reshaped\n",
-    "x_scaled = df[\"x_scaled\"].to_numpy().reshape(-1,1)\n",
-    "y_scaled = df[\"y_scaled\"].to_numpy().reshape(-1,1)\n",
+    "# Data needs to be in array and reshaped\n",
+    "x_scaled = df[\"x_scaled\"].to_numpy().reshape(-1, 1)\n",
+    "y_scaled = df[\"y_scaled\"].to_numpy().reshape(-1, 1)\n",
     "\n",
-    "#train the linear tree on the scaled data\n",
-    "history1 = regr.fit(x_scaled,y_scaled)"
+    "# train the linear tree on the scaled data\n",
+    "history1 = regr.fit(x_scaled, y_scaled)"
    ]
   },
   {
@@ -278,9 +280,9 @@
    },
    "outputs": [],
    "source": [
-    "#note: we calculate the unscaled output for each neural network to check the predictions\n",
+    "# note: we calculate the unscaled output for each neural network to check the predictions\n",
     "y_predict_scaled_lt = regr.predict(x_scaled)\n",
-    "y_predict_lt = y_predict_scaled_lt*(std_data['y']) + mean_data['y']"
+    "y_predict_lt = y_predict_scaled_lt * (std_data[\"y\"]) + mean_data[\"y\"]"
    ]
   },
   {
@@ -315,10 +317,10 @@
     }
    ],
    "source": [
-    "#create a single plot with the original data and each neural network's predictions\n",
-    "fig,ax = plt.subplots(1,figsize = (8,8))\n",
-    "ax.plot(x,y,linewidth = 3.0,label = \"data\", alpha = 0.5)\n",
-    "ax.plot(x,y_predict_lt,linewidth = 3.0,linestyle=\"dotted\",label = \"linear-tree\")\n",
+    "# create a single plot with the original data and each neural network's predictions\n",
+    "fig, ax = plt.subplots(1, figsize=(8, 8))\n",
+    "ax.plot(x, y, linewidth=3.0, label=\"data\", alpha=0.5)\n",
+    "ax.plot(x, y_predict_lt, linewidth=3.0, linestyle=\"dotted\", label=\"linear-tree\")\n",
     "plt.xlabel(\"x\")\n",
     "plt.ylabel(\"y\")\n",
     "plt.legend()"
@@ -392,17 +394,23 @@
     }
    ],
    "source": [
-    "#create an omlt scaling object\n",
-    "scaler = omlt.scaling.OffsetScaling(offset_inputs=[mean_data['x']],\n",
-    "                    factor_inputs=[std_data['x']],\n",
-    "                    offset_outputs=[mean_data['y']],\n",
-    "                    factor_outputs=[std_data['y']])\n",
-    "\n",
-    "#create the input bounds. note that the key `0` corresponds to input `0` and that we also scale the input bounds\n",
-    "input_bounds={0:((min(df['x']) - mean_data['x'])/std_data['x'],\n",
-    "                 (max(df['x']) - mean_data['x'])/std_data['x'])};\n",
+    "# create an omlt scaling object\n",
+    "scaler = omlt.scaling.OffsetScaling(\n",
+    "    offset_inputs=[mean_data[\"x\"]],\n",
+    "    factor_inputs=[std_data[\"x\"]],\n",
+    "    offset_outputs=[mean_data[\"y\"]],\n",
+    "    factor_outputs=[std_data[\"y\"]],\n",
+    ")\n",
+    "\n",
+    "# create the input bounds. note that the key `0` corresponds to input `0` and that we also scale the input bounds\n",
+    "input_bounds = {\n",
+    "    0: (\n",
+    "        (min(df[\"x\"]) - mean_data[\"x\"]) / std_data[\"x\"],\n",
+    "        (max(df[\"x\"]) - mean_data[\"x\"]) / std_data[\"x\"],\n",
+    "    )\n",
+    "}\n",
     "print(scaler)\n",
-    "print(\"Scaled input bounds: \",input_bounds)"
+    "print(\"Scaled input bounds: \", input_bounds)"
    ]
   },
   {
@@ -569,34 +577,37 @@
     }
    ],
    "source": [
-    "#create a LinearTreeDefinition Object\n",
-    "ltmodel = LinearTreeDefinition(regr,scaler,input_bounds)\n",
+    "# create a LinearTreeDefinition Object\n",
+    "ltmodel = LinearTreeDefinition(regr, scaler, input_bounds)\n",
     "\n",
-    "#create a pyomo model with variables x and y\n",
+    "# create a pyomo model with variables x and y\n",
     "model1 = pyo.ConcreteModel()\n",
-    "model1.x = pyo.Var(initialize = 0)\n",
-    "model1.y = pyo.Var(initialize = 0)\n",
+    "model1.x = pyo.Var(initialize=0)\n",
+    "model1.y = pyo.Var(initialize=0)\n",
     "model1.obj = pyo.Objective(expr=(model1.y))\n",
     "\n",
-    "#create an OmltBlock\n",
+    "# create an OmltBlock\n",
     "model1.lt = OmltBlock()\n",
     "\n",
-    "#use the GDP formulation with a big-M, transformation\n",
-    "formulation1_lt = LinearTreeGDPFormulation(ltmodel, transformation='bigm')\n",
+    "# use the GDP formulation with a big-M, transformation\n",
+    "formulation1_lt = LinearTreeGDPFormulation(ltmodel, transformation=\"bigm\")\n",
     "model1.lt.build_formulation(formulation1_lt)\n",
     "\n",
-    "#connect pyomo variables to the neural network\n",
+    "\n",
+    "# connect pyomo variables to the neural network\n",
     "@model1.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.lt.inputs[0]\n",
     "\n",
+    "\n",
     "@model1.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.lt.outputs[0]\n",
     "\n",
-    "#solve the model and query the solution\n",
-    "status_1_bigm = pyo.SolverFactory('cbc').solve(model1, tee=True)\n",
-    "solution_1_bigm = (pyo.value(model1.x),pyo.value(model1.y))"
+    "\n",
+    "# solve the model and query the solution\n",
+    "status_1_bigm = pyo.SolverFactory(\"cbc\").solve(model1, tee=True)\n",
+    "solution_1_bigm = (pyo.value(model1.x), pyo.value(model1.y))"
    ]
   },
   {
@@ -623,13 +634,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"Big-M Transformation Solution:\")\n",
-    "print(\"# of variables: \",model1.nvariables())\n",
-    "print(\"# of constraints: \",model1.nconstraints())\n",
+    "print(\"# of variables: \", model1.nvariables())\n",
+    "print(\"# of constraints: \", model1.nconstraints())\n",
     "print(\"x = \", solution_1_bigm[0])\n",
     "print(\"y = \", solution_1_bigm[1])\n",
-    "print(\"Solve Time: \", status_1_bigm['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_1_bigm[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -724,31 +735,34 @@
     }
    ],
    "source": [
-    "#create a pyomo model with variables x and y\n",
+    "# create a pyomo model with variables x and y\n",
     "model2 = pyo.ConcreteModel()\n",
-    "model2.x = pyo.Var(initialize = 0)\n",
-    "model2.y = pyo.Var(initialize = 0)\n",
+    "model2.x = pyo.Var(initialize=0)\n",
+    "model2.y = pyo.Var(initialize=0)\n",
     "model2.obj = pyo.Objective(expr=(model2.y))\n",
     "\n",
-    "#create an OmltBlock\n",
+    "# create an OmltBlock\n",
     "model2.lt = OmltBlock()\n",
     "\n",
-    "#use the GDP formulation with a hull transformation\n",
-    "formulation2_lt = LinearTreeGDPFormulation(ltmodel, transformation='hull')\n",
+    "# use the GDP formulation with a hull transformation\n",
+    "formulation2_lt = LinearTreeGDPFormulation(ltmodel, transformation=\"hull\")\n",
     "model2.lt.build_formulation(formulation2_lt)\n",
     "\n",
-    "#connect pyomo variables to the neural network\n",
+    "\n",
+    "# connect pyomo variables to the neural network\n",
     "@model2.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.lt.inputs[0]\n",
     "\n",
+    "\n",
     "@model2.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.lt.outputs[0]\n",
     "\n",
-    "#solve the model and query the solution\n",
-    "status_2_hull = pyo.SolverFactory('cbc').solve(model2, tee=True)\n",
-    "solution_2_hull = (pyo.value(model2.x),pyo.value(model2.y))"
+    "\n",
+    "# solve the model and query the solution\n",
+    "status_2_hull = pyo.SolverFactory(\"cbc\").solve(model2, tee=True)\n",
+    "solution_2_hull = (pyo.value(model2.x), pyo.value(model2.y))"
    ]
   },
   {
@@ -771,13 +785,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"Hull Transformation Solution:\")\n",
-    "print(\"# of variables: \",model2.nvariables())\n",
-    "print(\"# of constraints: \",model2.nconstraints())\n",
+    "print(\"# of variables: \", model2.nvariables())\n",
+    "print(\"# of constraints: \", model2.nconstraints())\n",
     "print(\"x = \", solution_2_hull[0])\n",
     "print(\"y = \", solution_2_hull[1])\n",
-    "print(\"Solve Time: \", status_2_hull['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_2_hull[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -939,35 +953,38 @@
     }
    ],
    "source": [
-    "#create a pyomo model with variables x and y\n",
+    "# create a pyomo model with variables x and y\n",
     "model_c = pyo.ConcreteModel()\n",
-    "model_c.x = pyo.Var(initialize = 0)\n",
-    "model_c.y = pyo.Var(initialize = 0)\n",
+    "model_c.x = pyo.Var(initialize=0)\n",
+    "model_c.y = pyo.Var(initialize=0)\n",
     "model_c.obj = pyo.Objective(expr=(model_c.y))\n",
     "\n",
-    "#create an OmltBlock\n",
+    "# create an OmltBlock\n",
     "model_c.lt = OmltBlock()\n",
     "\n",
-    "#use the GDP formulation with a custom transformation\n",
-    "formulation_c_lt = LinearTreeGDPFormulation(ltmodel, transformation='custom')\n",
+    "# use the GDP formulation with a custom transformation\n",
+    "formulation_c_lt = LinearTreeGDPFormulation(ltmodel, transformation=\"custom\")\n",
     "model_c.lt.build_formulation(formulation_c_lt)\n",
     "\n",
-    "#connect pyomo variables to the neural network\n",
+    "\n",
+    "# connect pyomo variables to the neural network\n",
     "@model_c.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.lt.inputs[0]\n",
     "\n",
+    "\n",
     "@model_c.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.lt.outputs[0]\n",
     "\n",
+    "\n",
     "# NOTE: Since we passed the 'custom' transformation option, the user must\n",
     "# transform the model or the omlt block before passing the model to the solver\n",
-    "pyo.TransformationFactory('gdp.bigm').apply_to(model_c)\n",
+    "pyo.TransformationFactory(\"gdp.bigm\").apply_to(model_c)\n",
     "\n",
-    "#solve the model and query the solution\n",
-    "status_c_bigm = pyo.SolverFactory('cbc').solve(model_c, tee=True)\n",
-    "solution_c_bigm = (pyo.value(model_c.x),pyo.value(model_c.y))"
+    "# solve the model and query the solution\n",
+    "status_c_bigm = pyo.SolverFactory(\"cbc\").solve(model_c, tee=True)\n",
+    "solution_c_bigm = (pyo.value(model_c.x), pyo.value(model_c.y))"
    ]
   },
   {
@@ -990,13 +1007,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"BigM Transformation Solution:\")\n",
-    "print(\"# of variables: \",model_c.nvariables())\n",
-    "print(\"# of constraints: \",model_c.nconstraints())\n",
+    "print(\"# of variables: \", model_c.nvariables())\n",
+    "print(\"# of constraints: \", model_c.nconstraints())\n",
     "print(\"x = \", solution_c_bigm[0])\n",
     "print(\"y = \", solution_c_bigm[1])\n",
-    "print(\"Solve Time: \", status_c_bigm['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_c_bigm[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -1292,31 +1309,34 @@
     }
    ],
    "source": [
-    "#create a pyomo model with variables x and y\n",
+    "# create a pyomo model with variables x and y\n",
     "model3 = pyo.ConcreteModel()\n",
-    "model3.x = pyo.Var(initialize = 0)\n",
-    "model3.y = pyo.Var(initialize = 0)\n",
+    "model3.x = pyo.Var(initialize=0)\n",
+    "model3.y = pyo.Var(initialize=0)\n",
     "model3.obj = pyo.Objective(expr=(model3.y))\n",
     "\n",
-    "#create an OmltBlock\n",
+    "# create an OmltBlock\n",
     "model3.lt = OmltBlock()\n",
     "\n",
-    "#use the Hybrid Big-M formulation\n",
+    "# use the Hybrid Big-M formulation\n",
     "formulation3_lt = LinearTreeHybridBigMFormulation(ltmodel)\n",
     "model3.lt.build_formulation(formulation3_lt)\n",
     "\n",
-    "#connect pyomo variables to the neural network\n",
+    "\n",
+    "# connect pyomo variables to the neural network\n",
     "@model3.Constraint()\n",
     "def connect_inputs(mdl):\n",
     "    return mdl.x == mdl.lt.inputs[0]\n",
     "\n",
+    "\n",
     "@model3.Constraint()\n",
     "def connect_outputs(mdl):\n",
     "    return mdl.y == mdl.lt.outputs[0]\n",
     "\n",
-    "#solve the model and query the solution\n",
-    "status_3_hyb = pyo.SolverFactory('scip').solve(model3, tee=True)\n",
-    "solution_3_hyb = (pyo.value(model3.x),pyo.value(model3.y))"
+    "\n",
+    "# solve the model and query the solution\n",
+    "status_3_hyb = pyo.SolverFactory(\"scip\").solve(model3, tee=True)\n",
+    "solution_3_hyb = (pyo.value(model3.x), pyo.value(model3.y))"
    ]
   },
   {
@@ -1339,13 +1359,13 @@
     }
    ],
    "source": [
-    "#print out model size and solution values\n",
+    "# print out model size and solution values\n",
     "print(\"Hull Transformation Solution:\")\n",
-    "print(\"# of variables: \",model3.nvariables())\n",
-    "print(\"# of constraints: \",model3.nconstraints())\n",
+    "print(\"# of variables: \", model3.nvariables())\n",
+    "print(\"# of constraints: \", model3.nconstraints())\n",
     "print(\"x = \", solution_3_hyb[0])\n",
     "print(\"y = \", solution_3_hyb[1])\n",
-    "print(\"Solve Time: \", status_3_hyb['Solver'][0]['Time'])"
+    "print(\"Solve Time: \", status_3_hyb[\"Solver\"][0][\"Time\"])"
    ]
   },
   {
@@ -1394,26 +1414,53 @@
     }
    ],
    "source": [
-    "#create a plot with 3 subplots\n",
-    "fig,axs = plt.subplots(1,3,figsize = (24,8))\n",
+    "# create a plot with 3 subplots\n",
+    "fig, axs = plt.subplots(1, 3, figsize=(24, 8))\n",
     "\n",
-    "#GDP Representation - Big-M Transformation\n",
-    "axs[0].plot(x,y_predict_lt,linewidth = 3.0,linestyle=\"dotted\",color = \"orange\", label='Fitted Model')\n",
+    "# GDP Representation - Big-M Transformation\n",
+    "axs[0].plot(\n",
+    "    x,\n",
+    "    y_predict_lt,\n",
+    "    linewidth=3.0,\n",
+    "    linestyle=\"dotted\",\n",
+    "    color=\"orange\",\n",
+    "    label=\"Fitted Model\",\n",
+    ")\n",
     "axs[0].set_title(\"Big-M\")\n",
-    "axs[0].scatter([solution_1_bigm[0]],[solution_1_bigm[1]],color = \"black\",s = 300, label='Optimum')\n",
+    "axs[0].scatter(\n",
+    "    [solution_1_bigm[0]], [solution_1_bigm[1]], color=\"black\", s=300, label=\"Optimum\"\n",
+    ")\n",
     "axs[0].legend()\n",
     "\n",
-    "#GDP Representation - Hull Transformation\n",
-    "axs[1].plot(x,y_predict_lt,linewidth = 3.0,linestyle=\"dotted\",color = \"orange\", label='Fitted Model')\n",
+    "# GDP Representation - Hull Transformation\n",
+    "axs[1].plot(\n",
+    "    x,\n",
+    "    y_predict_lt,\n",
+    "    linewidth=3.0,\n",
+    "    linestyle=\"dotted\",\n",
+    "    color=\"orange\",\n",
+    "    label=\"Fitted Model\",\n",
+    ")\n",
     "axs[1].set_title(\"Convex Hull\")\n",
-    "axs[1].scatter([solution_2_hull[0]],[solution_2_hull[1]],color = \"black\",s = 300, label='Optimum')\n",
+    "axs[1].scatter(\n",
+    "    [solution_2_hull[0]], [solution_2_hull[1]], color=\"black\", s=300, label=\"Optimum\"\n",
+    ")\n",
     "axs[1].legend()\n",
     "\n",
     "\n",
-    "#Hybrid Big-M Representation\n",
-    "axs[2].plot(x,y_predict_lt,linewidth = 3.0,linestyle=\"dotted\",color = \"orange\", label='Fitted Model')\n",
+    "# Hybrid Big-M Representation\n",
+    "axs[2].plot(\n",
+    "    x,\n",
+    "    y_predict_lt,\n",
+    "    linewidth=3.0,\n",
+    "    linestyle=\"dotted\",\n",
+    "    color=\"orange\",\n",
+    "    label=\"Fitted Model\",\n",
+    ")\n",
     "axs[2].set_title(\"Hybrid Big-M\")\n",
-    "axs[2].scatter([solution_3_hyb[0]],[solution_3_hyb[1]],color = \"black\",s = 300, label='Optimum')\n",
+    "axs[2].scatter(\n",
+    "    [solution_3_hyb[0]], [solution_3_hyb[1]], color=\"black\", s=300, label=\"Optimum\"\n",
+    ")\n",
     "axs[2].legend()"
    ]
   },
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index 6305e50b..00000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# Required dependencies for Sphinx documentation
-sphinx
-sphinx-rtd-theme
-numpy
-pyomo
-networkx
-onnx
-tensorflow
-linear-tree
-importlib-metadata
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 00000000..69566368
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+dependencies:
+  - ipopt
+  - pyscipopt
+  - coin-or-cbc
diff --git a/justfile b/justfile
new file mode 100644
index 00000000..30eddd46
--- /dev/null
+++ b/justfile
@@ -0,0 +1,52 @@
+# List all commands.
+default:
+  @just --list
+
+# Build docs.
+docs:
+  rm -rf docs/_build docs/_autosummary
+  make -C docs html
+  echo Docs are in $PWD/docs/_build/html/index.html
+
+# Do a dev install.
+dev:
+  pip install -e '.[dev]'
+  conda env update --file environment.yml
+
+# Do a dev install with GPU support.
+dev-gpu:
+  pip install -e '.[dev-gpu]'
+  conda env update --file environment.yml
+
+# Run code checks.
+check:
+  #!/usr/bin/env bash
+
+  error=0
+  trap error=1 ERR
+
+  echo
+  (set -x; ruff check src/ tests/ docs/ )
+
+  echo
+  ( set -x; ruff format --check src/ tests/ docs/ )
+
+  echo
+  ( set -x; mypy src/ tests/ docs/ )
+
+  echo
+  ( set -x; pytest )
+
+  echo
+  ( set -x; make -C docs doctest )
+
+  test $error = 0
+
+# Auto-fix code issues.
+fix:
+  ruff format src/ tests/ docs/
+  ruff check --fix src/ tests/ docs/
+
+# Build a release.
+build:
+  python -m build
diff --git a/pyproject.toml b/pyproject.toml
index 2c63dbb2..fab992f2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,200 @@
 [build-system]
-# AVOID CHANGING REQUIRES: IT WILL BE UPDATED BY PYSCAFFOLD!
-requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5", "wheel"]
+requires = ["setuptools", "setuptools_scm"]
 build-backend = "setuptools.build_meta"
 
+[project]
+name = "omlt"
+authors = [
+  { name = "The OMLT Developers", email = "omlt@googlegroups.com" },
+]
+
+dependencies = [
+  "networkx",
+  "numpy",
+  # TODO: Remove constraint when fix to https://github.com/Pyomo/pyomo/issues/3262 is released
+  "pyomo==6.6.2",
+  "onnx",
+  "onnxruntime",
+]
+requires-python = ">=3.9"
+dynamic = ["version"]
+readme = "README.rst"
+license = { file = "LICENSE.rst" }
+description = "OMLT is a Python package for representing machine learning models (such as neural networks) within the Pyomo optimization environment."
+
+[project.optional-dependencies]
+linear-tree = ["linear-tree"]
+keras = ["tensorflow", "keras"]
+keras-gpu = ["tensorflow[and-cuda]", "keras"]
+torch = ["torch", "torch-geometric"]
+dev-tools = [
+  "ruff",
+  "mypy",
+  "pytest",
+  "pytest-cov",
+  "sphinx",
+  "sphinx-copybutton",
+  "build",
+  "twine",
+  "furo",
+  "testbook",
+  "notebook",
+  "pandas",
+  "matplotlib",
+  "gurobipy",
+  "torchvision",
+  "tf2onnx",
+]
+docs = [
+  "sphinx",
+  "sphinx-rtd-theme",
+  "tensorflow",
+  "linear-tree",
+]
+dev = [
+  "omlt[dev-tools,keras,torch,linear-tree,docs]",
+]
+dev-gpu = [
+  "omlt[dev-tools,keras-gpu,torch,linear-tree,docs]",
+]
+
+
+[project.urls]
+github = "https://github.com/cog-imperial/OMLT"
+x = "https://x.com/cogimperial"
+documentation = "https://omlt.readthedocs.io"
+
 [tool.setuptools_scm]
-# See configuration details in https://github.com/pypa/setuptools_scm
-version_scheme = "no-guess-dev"
+write_to = "src/omlt/_version.py"
+
+[tool.ruff]
+line-length = 88
+extend-exclude = ["src/omlt/_version.py"]
+
+[tool.ruff.lint]
+select = ["ALL"]
+ignore = [
+  "ANN101",
+  "ANN401",
+  "COM812",
+  "ISC001",
+  "SLF001",
+  "ARG001",
+  "N803",
+  "N806",
+  # Remove these after issue https://github.com/cog-imperial/OMLT/issues/153 is fixed.
+  "D100",
+  "D101",
+  "D102",
+  "D103",
+  "D104",
+  "D105",
+  "D106",
+  "D107",
+  # TODO: Remove these eventually
+  "ANN001",
+  "ANN002",
+  "ANN201",
+  "ANN202",
+  "ANN204",
+]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = [
+  "D100",
+  "D101",
+  "D102",
+  "D103",
+  "D104",
+  "D105",
+  "D106",
+  "D107",
+  "S101",
+  "INP001",
+]
+"examples/*" = [
+  "INP001",
+]
+"docs/conf.py" = ["D100", "INP001"]
+"src/omlt/neuralnet/layer.py" = ["N802"]
+"docs/notebooks/data/build_sin_quadratic_csv.py" = ["INP001"]
+"docs/notebooks/*" = [
+  "T201",
+  "F811",
+  "E402",
+  "ICN001",
+  "E501",
+  "PD901",
+  "E731",
+  "F841",
+  "FBT002",
+  "PTH123",
+  "S311",
+  "N812",
+  "A001",
+  "E741",
+  "N802",
+  "PERF401",
+  "PLR2004",
+]
+
+[tool.mypy]
+show_error_codes = true
+implicit_optional = false
+warn_no_return = true
+strict_optional = true
+# TODO: Enable eventually
+# disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+warn_unreachable = true
+disallow_any_generics = true
+
+[[tool.mypy.overrides]]
+module = [
+  "pandas.*",
+  "networkx.*",
+  "tf2onnx.*",
+  "onnxruntime.*",
+  "lineartree.*",
+  "sklearn.*",
+  "testbook.*",
+  "pyomo.*",
+  "keras.*",
+  "tensorflow.*",
+  "torch_geometric.*",
+]
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+addopts = "--cov omlt --cov-report term-missing --cov-config pyproject.toml --verbose"
+
+[tool.coverage.run]
+branch = true
+
+[tool.coverage.paths]
+source = [
+    "src/",
+    "*/site-packages/",
+]
+
+[tool.coverage.report]
+# Regexes for lines to exclude from consideration
+exclude_lines = [
+    # Have to re-enable the standard pragma
+    "pragma: no cover",
+
+    # Don't complain about missing debug-only code:
+    "def __repr__",
+
+    # Don't complain if tests don't hit defensive assertion code:
+    "raise AssertionError",
+    "raise NotImplementedError",
+
+    # Don't complain if non-runnable code isn't run:
+    "if __name__ == .__main__.:",
+]
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index cb8d5e2e..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,171 +0,0 @@
-# This file is used to configure your project.
-# Read more about the various options under:
-# http://setuptools.readthedocs.io/en/latest/setuptools.html#configuring-setup-using-setup-cfg-files
-
-[metadata]
-name = omlt
-description = OMLT is a Python package for representing machine learning models (such as neural networks) within the Pyomo optimization environment.
-author = The OMLT Developers
-author_email = omlt@googlegroups.com
-license = BSD 3-Clause
-long_description = file: README.rst
-long_description_content_type = text/x-rst; charset=UTF-8
-url = https://github.com/cog-imperial/OMLT/
-# Add here related links, for example:
-project_urls =
-    Source = https://github.com/cog-imperial/OMLT/
-    Twitter = https://twitter.com/cogimperial
-#    Changelog = https://pyscaffold.org/en/latest/changelog.html
-#    Tracker = https://github.com/pyscaffold/pyscaffold/issues
-#    Conda-Forge = https://anaconda.org/conda-forge/pyscaffold
-#    Download = https://pypi.org/project/PyScaffold/#files
-
-# Change if running only on Windows, Mac or Linux (comma-separated)
-platforms = any
-
-# Add here all kinds of additional classifiers as defined under
-# https://pypi.python.org/pypi?%3Aaction=list_classifiers
-classifiers =
-    Development Status :: 4 - Beta
-    Programming Language :: Python
-
-
-[options]
-zip_safe = False
-packages = find_namespace:
-include_package_data = True
-package_dir =
-    =src
-
-# Require a min/specific Python version (comma-separated conditions)
-# OMLT currently supports Python 3.7 and above
-python_requires = >=3.7
-
-# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
-# Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
-# new major versions. This works if the required packages follow Semantic Versioning.
-# For more information, check out https://semver.org/.
-install_requires =
-    importlib-metadata; python_version<"3.8"
-    networkx
-    pyomo==6.6.2
-    numpy
-    protobuf==3.20.3
-
-
-[options.packages.find]
-where = src
-exclude =
-    tests
-
-[options.extras_require]
-# Add here additional requirements for extra features, to install with:
-# `pip install omlt[PDF]` like:
-# PDF = ReportLab; RXP
-
-# Add here test requirements (semicolon/line-separated)
-testing =
-    setuptools
-    pytest
-    pytest-cov
-    testbook
-    nbmake
-    tox
-    flake8
-    tensorflow-cpu
-    ipywidgets
-    jupyter
-    lightgbm
-    linear-tree
-    matplotlib
-    pandas
-    keras>=3.0
-    onnx
-    onnxruntime
-    onnxmltools
-    tf2onnx>=1.12
-    torch
-    torchvision
-    tqdm
-    protobuf==3.20.3
-    torch_geometric
-
-testing_lean =
-    setuptools
-    pytest
-    pytest-cov
-    testbook
-    nbmake
-    tox
-    flake8
-    ipywidgets
-    jupyter
-    lightgbm
-    matplotlib
-    pandas
-    torch
-    torchvision
-    tqdm
-
-[options.entry_points]
-# Add here console scripts like:
-# console_scripts =
-#     script_name = omlt.module:function
-# For example:
-# console_scripts =
-#     fibonacci = omlt.skeleton:run
-# And any other entry points, for example:
-# pyscaffold.cli =
-#     awesome = pyscaffoldext.awesome.extension:AwesomeExtension
-
-[tool:pytest]
-# Specify command line options as you would do when invoking pytest directly.
-# e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml
-# in order to write a coverage file that can be read by Jenkins.
-# CAUTION: --cov flags may prohibit setting breakpoints while debugging.
-#          Comment those flags to avoid this py.test issue.
-addopts =
-    --cov omlt --cov-report term-missing
-    --verbose
-norecursedirs =
-    dist
-    build
-    .tox
-testpaths = tests
-# Use pytest markers to select/deselect specific tests
-# markers =
-#     slow: mark tests as slow (deselect with '-m "not slow"')
-#     system: mark end-to-end system tests
-
-[bdist_wheel]
-# Use this option if your package is pure-python
-universal = 1
-
-[devpi:upload]
-# Options for the devpi: PyPI server and packaging tool
-# VCS export must be deactivated since we are using setuptools-scm
-no_vcs = 1
-formats = bdist_wheel
-
-[flake8]
-# Some sane defaults for the code style checker flake8
-max_line_length = 88
-extend_ignore = E203, W503
-# ^  Black-compatible
-#    E203 and W503 have edge cases handled by black
-exclude =
-    .tox
-    build
-    dist
-    .eggs
-    docs/conf.py
-per_file_ignores =
-    # ignore docstrings in tests
-    tests/*:D100,D101,D102,D103,D104,D105,D106,D107
-
-[pyscaffold]
-# PyScaffold's parameters when the project was created.
-# This will be used when updating. Do not change!
-version = 4.0.2
-package = omlt
-extensions =
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 57314fee..00000000
--- a/setup.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""
-    Setup file for omlt.
-    Use setup.cfg to configure your project.
-
-    This file was generated with PyScaffold 4.0.2.
-    PyScaffold helps you to put up the scaffold of your new Python project.
-    Learn more under: https://pyscaffold.org/
-"""
-from setuptools import setup
-
-if __name__ == "__main__":
-    try:
-        setup(use_scm_version={"version_scheme": "no-guess-dev"})
-    except:  # noqa
-        print(
-            "\n\nAn error occurred while building the project, "
-            "please ensure you have the most updated version of setuptools, "
-            "setuptools_scm and wheel with:\n"
-            "   pip install -U setuptools setuptools_scm wheel\n\n"
-        )
-        raise
diff --git a/src/omlt/__init__.py b/src/omlt/__init__.py
index 12aafdd5..30fe7b9c 100644
--- a/src/omlt/__init__.py
+++ b/src/omlt/__init__.py
@@ -1,33 +1,19 @@
-"""
-OMLT
-====
+"""OMLT.
 
-OMLT is a Python package for representing machine learning models (neural networks and gradient-boosted trees) within the Pyomo optimization environment.
+OMLT is a Python package for representing machine learning models (neural networks
+and gradient-boosted trees) within the Pyomo optimization environment.
 The package provides various optimization formulations for machine learning models
 (such as full-space, reduced-space, and MILP) as well as an interface to import
 sequential Keras and general ONNX models.
 
 """
 
-import sys
-
-if sys.version_info[:2] >= (3, 8):
-    # TODO: Import directly (no need for conditional) when `python_requires = >= 3.8`
-    from importlib.metadata import PackageNotFoundError  # pragma: no cover
-    from importlib.metadata import version
-else:
-    from importlib_metadata import PackageNotFoundError  # pragma: no cover
-    from importlib_metadata import version
-
-try:
-    # Change here if project is renamed and does not equal the package name
-    dist_name = __name__
-    __version__ = version(dist_name)
-except PackageNotFoundError:  # pragma: no cover
-    __version__ = "unknown"
-finally:
-    del version, PackageNotFoundError
-
-# Top level exports
-from omlt.block import OmltBlock
+from omlt._version import __version__
+from omlt.block import OmltBlock  # type: ignore[attr-defined]
 from omlt.scaling import OffsetScaling
+
+__all__ = [
+    "OmltBlock",
+    "OffsetScaling",
+    "__version__",
+]
diff --git a/src/omlt/block.py b/src/omlt/block.py
index a6c7bbf2..e18b7138 100644
--- a/src/omlt/block.py
+++ b/src/omlt/block.py
@@ -1,4 +1,5 @@
-"""
+"""OmltBlock.
+
 The omlt.block module contains the implementation of the OmltBlock class. This
 class is used in combination with a formulation object to construct the
 necessary constraints and variables to represent ML models.
@@ -23,8 +24,6 @@ class is used in combination with a formulation object to construct the
         pyo.assert_optimal_termination(status)
 """
 
-import warnings
-
 import pyomo.environ as pyo
 from pyomo.core.base.block import _BlockData, declare_custom_block
 
@@ -32,13 +31,14 @@ class is used in combination with a formulation object to construct the
 @declare_custom_block(name="OmltBlock")
 class OmltBlockData(_BlockData):
     def __init__(self, component):
-        super(OmltBlockData, self).__init__(component)
+        super().__init__(component)
         self.__formulation = None
         self.__input_indexes = None
         self.__output_indexes = None
 
     def _setup_inputs_outputs(self, *, input_indexes, output_indexes):
-        """
+        """Setup inputs and outputs.
+
         This function should be called by the derived class to create the
         inputs and outputs on the block
 
@@ -52,11 +52,6 @@ def _setup_inputs_outputs(self, *, input_indexes, output_indexes):
         """
         self.__input_indexes = input_indexes
         self.__output_indexes = output_indexes
-        if not input_indexes or not output_indexes:
-            # TODO: implement this check higher up in the class hierarchy to provide more contextual error msg
-            raise ValueError(
-                "OmltBlock must have at least one input and at least one output."
-            )
 
         self.inputs_set = pyo.Set(initialize=input_indexes)
         self.inputs = pyo.Var(self.inputs_set, initialize=0)
@@ -64,7 +59,8 @@ def _setup_inputs_outputs(self, *, input_indexes, output_indexes):
         self.outputs = pyo.Var(self.outputs_set, initialize=0)
 
     def build_formulation(self, formulation):
-        """
+        """Build formulation.
+
         Call this method to construct the constraints (and possibly
         intermediate variables) necessary for the particular neural network
         formulation. The formulation object can be accessed later through the
@@ -75,6 +71,20 @@ def build_formulation(self, formulation):
         formulation : instance of _PyomoFormulation
             see, for example, FullSpaceNNFormulation
         """
+        if not formulation.input_indexes:
+            msg = (
+                "OmltBlock must have at least one input to build a formulation. "
+                f"{formulation} has no inputs."
+            )
+            raise ValueError(msg)
+
+        if not formulation.output_indexes:
+            msg = (
+                "OmltBlock must have at least one output to build a formulation. "
+                f"{formulation} has no outputs."
+            )
+            raise ValueError(msg)
+
         self._setup_inputs_outputs(
             input_indexes=list(formulation.input_indexes),
             output_indexes=list(formulation.output_indexes),
diff --git a/src/omlt/formulation.py b/src/omlt/formulation.py
index fd83ae86..442e44bf 100644
--- a/src/omlt/formulation.py
+++ b/src/omlt/formulation.py
@@ -5,7 +5,8 @@
 
 
 class _PyomoFormulationInterface(abc.ABC):
-    """
+    """Pyomo Formulation Interface.
+
     Base class interface for a Pyomo formulation object. This class
     is largely internal, and developers of new formulations should derive from
     _PyomoFormulation.
@@ -23,42 +24,45 @@ def _set_block(self, block):
     @abc.abstractmethod
     def block(self):
         """Return the block associated with this formulation."""
-        pass
 
     @property
     @abc.abstractmethod
     def input_indexes(self):
-        """Return the indices corresponding to the inputs of the
+        """Input indexes.
+
+        Return the indices corresponding to the inputs of the
         ML model. This is a list of entries (which may be tuples
         for higher dimensional inputs).
         """
-        pass
 
     @property
     @abc.abstractmethod
     def output_indexes(self):
-        """Return the indices corresponding to the outputs of the
+        """Output indexes.
+
+        Return the indices corresponding to the outputs of the
         ML model. This is a list of entries (which may be tuples
         for higher dimensional outputs).
         """
-        pass
 
     @abc.abstractmethod
     def _build_formulation(self):
-        """This method is called by the OmltBlock object to build the
+        """Build formulation.
+
+        This method is called by the OmltBlock object to build the
         corresponding mathematical formulation of the model.
         """
-        pass
 
 
 class _PyomoFormulation(_PyomoFormulationInterface):
-    """
+    """Pyomo Formulation.
+
     This is a base class for different Pyomo formulations. To create a new
-    formulation, inherit from this class and implement the abstract methods and properties.
+    formulation, inherit from this class and implement the abstract methods
+    and properties.
     """
 
     def __init__(self):
-        super(_PyomoFormulation, self).__init__()
         self.__block = None
 
     def _set_block(self, block):
@@ -66,8 +70,14 @@ def _set_block(self, block):
 
     @property
     def block(self):
-        """The underlying block containing the constraints / variables for this formulation."""
-        return self.__block()
+        """Block.
+
+        The underlying block containing the constraints / variables for this
+        formulation.
+        """
+        if self.__block is not None:
+            return self.__block()
+        return None
 
 
 def scalar_or_tuple(x):
diff --git a/src/omlt/gbt/__init__.py b/src/omlt/gbt/__init__.py
index f62ed421..ebf2bb1c 100644
--- a/src/omlt/gbt/__init__.py
+++ b/src/omlt/gbt/__init__.py
@@ -1,4 +1,5 @@
-r"""
+r"""Gradient-Boosted Trees formulation.
+
 We use the following notation to describe the gradient-boosted trees formulation:
 
 .. math::
@@ -25,3 +26,5 @@
 
 from omlt.gbt.gbt_formulation import GBTBigMFormulation
 from omlt.gbt.model import GradientBoostedTreeModel
+
+__all__ = ["GBTBigMFormulation", "GradientBoostedTreeModel"]
diff --git a/src/omlt/gbt/gbt_formulation.py b/src/omlt/gbt/gbt_formulation.py
index f2d01296..d81e26fb 100644
--- a/src/omlt/gbt/gbt_formulation.py
+++ b/src/omlt/gbt/gbt_formulation.py
@@ -1,4 +1,5 @@
 import collections
+from typing import Any
 
 import numpy as np
 import pyomo.environ as pe
@@ -8,26 +9,25 @@
 
 
 class GBTBigMFormulation(_PyomoFormulation):
-    """
-    This class is the entry-point to build gradient-boosted trees formulations.
+    """This class is the entry-point to build gradient-boosted trees formulations.
 
     This class iterates over all trees in the ensemble and generates
     constraints to enforce splitting rules according to:
 
-    References
-    ----------
-     * Misic, V. "Optimization of tree ensembles."
-       Operations Research 68.5 (2020): 1605-1624.
-     * Mistry, M., et al. "Mixed-integer convex nonlinear optimization with gradient-boosted trees embedded."
-       INFORMS Journal on Computing (2020).
-
-    Parameters
-    ----------
-    tree_ensemble_structure : GradientBoostedTreeModel
-        the tree ensemble definition
+    References:
+         * Misic, V. "Optimization of tree ensembles."
+           Operations Research 68.5 (2020): 1605-1624.
+         * Mistry, M., et al. "Mixed-integer convex nonlinear optimization with
+           gradient-boosted trees embedded."
+           INFORMS Journal on Computing (2020).
+
+    Parameters:
+        tree_ensemble_structure (GradientBoostedTreeModel):
+            the tree ensemble definition
     """
 
     def __init__(self, gbt_model):
+        """Constructor."""
         super().__init__()
         self.model_definition = gbt_model
 
@@ -42,7 +42,9 @@ def output_indexes(self):
         return list(range(self.model_definition.n_outputs))
 
     def _build_formulation(self):
-        """This method is called by the OmltBlock to build the corresponding
+        """Build formulation.
+
+        This method is called by the OmltBlock to build the corresponding
         mathematical formulation on the Pyomo block.
         """
         _setup_scaled_inputs_outputs(
@@ -59,9 +61,8 @@ def _build_formulation(self):
         )
 
 
-def add_formulation_to_block(block, model_definition, input_vars, output_vars):
-    r"""
-    Adds the gradient-boosted trees formulation to the given Pyomo block.
+def add_formulation_to_block(block, model_definition, input_vars, output_vars):  # noqa: C901, PLR0915
+    r"""Adds the gradient-boosted trees formulation to the given Pyomo block.
 
     .. math::
         \begin{align*}
@@ -73,7 +74,8 @@ def add_formulation_to_block(block, model_definition, input_vars, output_vars):
         \sum\limits_{l \in \text{Right}_{t,s}} z_{t,l} &\leq 1 - y_{i(s),j(s)},
             && \forall t \in T, \forall s \in V_t, \\
         y_{i,j} &\leq y_{i,j+1},
-            && \forall i \in \left [ n \right ], \forall j \in \left [ m_i - 1 \right ], \\
+            && \forall i \in \left [ n \right ], \\
+            \forall j \in \left [ m_i - 1 \right ], \\
         x_{i} &\geq v_{i,0} +
             \sum\limits_{j=1}^{m_i} \left (v_{i,j} -
             v_{i,j-1} \right ) \left ( 1 - y_{i,j} \right ),
@@ -84,23 +86,22 @@ def add_formulation_to_block(block, model_definition, input_vars, output_vars):
         \end{align*}
 
 
-    References
-    ----------
-     * Misic, V. "Optimization of tree ensembles."
-       Operations Research 68.5 (2020): 1605-1624.
-     * Mistry, M., et al. "Mixed-integer convex nonlinear optimization with gradient-boosted trees embedded."
-       INFORMS Journal on Computing (2020).
-
-    Parameters
-    ----------
-    block : Block
-        the Pyomo block
-    tree_ensemble_structure : GradientBoostedTreeModel
-        the tree ensemble definition
-    input_vars : Var
-        the input variables of the Pyomo block
-    output_vars : Var
-        the output variables of the Pyomo block
+    References:
+         * Misic, V. "Optimization of tree ensembles."
+           Operations Research 68.5 (2020): 1605-1624.
+         * Mistry, M., et al. "Mixed-integer convex nonlinear optimization with
+           gradient-boosted trees embedded."
+           INFORMS Journal on Computing (2020).
+
+    Parameters:
+        block (Block):
+            the Pyomo block
+        tree_ensemble_structure (GradientBoostedTreeModel):
+            the tree ensemble definition
+        input_vars (Var):
+            the input variables of the Pyomo block
+        output_vars (Var):
+            the output variables of the Pyomo block
 
     """
     if isinstance(model_definition, GradientBoostedTreeModel):
@@ -124,13 +125,7 @@ def add_formulation_to_block(block, model_definition, input_vars, output_vars):
     nodes_node_ids = np.array(attr["nodes_nodeids"].ints)
     nodes_false_node_ids = np.array(attr["nodes_falsenodeids"].ints)
     nodes_true_node_ids = np.array(attr["nodes_truenodeids"].ints)
-    nodes_hitrates = np.array(attr["nodes_hitrates"].floats)
-    nodes_missing_value_tracks_true = np.array(
-        attr["nodes_missing_value_tracks_true"].ints
-    )
 
-    n_targets = attr["n_targets"].i
-    target_ids = np.array(attr["target_ids"].ints)
     target_node_ids = np.array(attr["target_nodeids"].ints)
     target_tree_ids = np.array(attr["target_treeids"].ints)
     target_weights = np.array(attr["target_weights"].floats)
@@ -142,7 +137,7 @@ def add_formulation_to_block(block, model_definition, input_vars, output_vars):
     tree_ids = set(nodes_tree_ids)
     feature_ids = set(nodes_feature_ids)
 
-    continuous_vars = dict()
+    continuous_vars = {}
 
     for var_idx in input_vars:
         var = input_vars[var_idx]
@@ -154,7 +149,7 @@ def add_formulation_to_block(block, model_definition, input_vars, output_vars):
         domain=pe.Reals,
     )
 
-    branch_value_by_feature_id = dict()
+    branch_value_by_feature_id: dict[int, Any] = {}
     branch_value_by_feature_id = collections.defaultdict(list)
 
     for f in feature_ids:
@@ -164,15 +159,17 @@ def add_formulation_to_block(block, model_definition, input_vars, output_vars):
 
     y_index = [
         (f, bi)
-        for f in continuous_vars.keys()
+        for f in continuous_vars
         for bi, _ in enumerate(branch_value_by_feature_id[f])
     ]
     block.y = pe.Var(y_index, domain=pe.Binary)
 
     @block.Constraint(tree_ids)
     def single_leaf(b, tree_id):
-        r"""
-        Add constraint to ensure that only one leaf per tree is active, Mistry et al. Equ. (3b).
+        r"""Single leaf constraint.
+
+        Add constraint to ensure that only one leaf per tree is active,
+        Mistry et al. Equ. (3b).
         .. math::
             \begin{align*}
             \sum\limits_{l \in L_t} z_{t,l} &= 1, && \forall t \in T
@@ -198,22 +195,28 @@ def _branching_y(tree_id, branch_node_id):
         feature_id = nodes_feature_ids[node_mask]
         branch_value = nodes_values[node_mask]
         if len(branch_value) != 1:
-            raise ValueError(
-                f"The given tree_id and branch_node_id do not uniquely identify a branch value."
+            msg = (
+                "The given tree_id and branch_node_id do not uniquely identify a"
+                " branch value."
             )
+            raise ValueError(msg)
         if len(feature_id) != 1:
-            raise ValueError(
-                f"The given tree_id and branch_node_id do not uniquely identify a feature."
+            msg = (
+                "The given tree_id and branch_node_id do not uniquely identify a"
+                " feature."
             )
+            raise ValueError(msg)
         feature_id = feature_id[0]
         branch_value = branch_value[0]
         (branch_y_idx,) = np.where(
             branch_value_by_feature_id[feature_id] == branch_value
         )
         if len(branch_y_idx) != 1:
-            raise ValueError(
-                f"The given tree_id and branch_node_id do not uniquely identify a branch index."
+            msg = (
+                "The given tree_id and branch_node_id do not uniquely identify a branch"
+                " index."
             )
+            raise ValueError(msg)
         return block.y[feature_id, branch_y_idx[0]]
 
     def _sum_of_z_l(tree_id, start_node_id):
@@ -235,7 +238,8 @@ def _sum_of_z_l(tree_id, start_node_id):
 
     @block.Constraint(nodes_tree_branch_ids)
     def left_split(b, tree_id, branch_node_id):
-        r"""
+        r"""Left split.
+
         Add constraint to activate all left splits leading to an active leaf,
         Mistry et al. Equ. (3c).
         .. math::
@@ -252,7 +256,8 @@ def left_split(b, tree_id, branch_node_id):
 
     @block.Constraint(nodes_tree_branch_ids)
     def right_split(b, tree_id, branch_node_id):
-        r"""
+        r"""Right split.
+
         Add constraint to activate all right splits leading to an active leaf,
         Mistry et al. Equ. (3d).
         .. math::
@@ -269,8 +274,8 @@ def right_split(b, tree_id, branch_node_id):
 
     @block.Constraint(y_index)
     def order_y(b, feature_id, branch_y_idx):
-        r"""
-        Add constraint to activate splits in the correct order.
+        r"""Add constraint to activate splits in the correct order.
+
         Mistry et al. Equ. (3e).
         .. math::
             \begin{align*}
@@ -285,8 +290,11 @@ def order_y(b, feature_id, branch_y_idx):
 
     @block.Constraint(y_index)
     def var_lower(b, feature_id, branch_y_idx):
-        r"""
-        Add constraint to link discrete tree splits to lower bound of continuous variables.
+        r"""Lower bound constraint.
+
+        Add constraint to link discrete tree splits to lower bound of continuous
+        variables.
+
         Mistry et al. Equ. (4a).
         .. math::
             \begin{align*}
@@ -304,8 +312,10 @@ def var_lower(b, feature_id, branch_y_idx):
 
     @block.Constraint(y_index)
     def var_upper(b, feature_id, branch_y_idx):
-        r"""
-        Add constraint to link discrete tree splits to upper bound of continuous variables.
+        r"""Upper bound constraint.
+
+        Add constraint to link discrete tree splits to upper bound of continuous
+        variables.
         Mistry et al. Equ. (4b).
         .. math::
             \begin{align*}
@@ -322,8 +332,8 @@ def var_upper(b, feature_id, branch_y_idx):
 
     @block.Constraint()
     def tree_mean_value(b):
-        r"""
-        Add constraint to link block output tree model mean.
+        r"""Add constraint to link block output tree model mean.
+
         Mistry et al. Equ. (3a).
         .. math::
             \begin{align*}
@@ -344,7 +354,7 @@ def tree_mean_value(b):
 
 
 def _node_attributes(node):
-    attr = dict()
+    attr = {}
     for at in node.attribute:
         attr[at.name] = at
     return attr
diff --git a/src/omlt/gbt/model.py b/src/omlt/gbt/model.py
index 9bac2590..693c24f7 100644
--- a/src/omlt/gbt/model.py
+++ b/src/omlt/gbt/model.py
@@ -1,6 +1,7 @@
 class GradientBoostedTreeModel:
     def __init__(self, onnx_model, scaling_object=None, scaled_input_bounds=None):
-        """
+        """Constructor.
+
         Create a network definition object used to create the gradient-boosted trees
         formulation in Pyomo
 
@@ -25,56 +26,56 @@ def __init__(self, onnx_model, scaling_object=None, scaled_input_bounds=None):
 
     @property
     def onnx_model(self):
-        """Returns underlying onnx model of the tree model being used"""
+        """Returns underlying onnx model of the tree model being used."""
         return self.__model
 
     @property
     def n_inputs(self):
-        """Returns the number of input variables"""
+        """Returns the number of input variables."""
         return self.__n_inputs
 
     @property
     def n_outputs(self):
-        """Returns the number of output variables"""
+        """Returns the number of output variables."""
         return self.__n_outputs
 
     @property
     def scaling_object(self):
-        """Return an instance of the scaling object that supports the ScalingInterface"""
+        """Return an instance of the scaling object supporting the ScalingInterface."""
         return self.__scaling_object
 
-    @property
-    def scaled_input_bounds(self):
-        """Return a list of tuples containing lower and upper bounds of tree ensemble inputs"""
-        return self.__scaled_input_bounds
-
     @scaling_object.setter
     def scaling_object(self, scaling_object):
         self.__scaling_object = scaling_object
 
+    @property
+    def scaled_input_bounds(self):
+        """Return a list of tuples of lower and upper bounds of tree ensemble inputs."""
+        return self.__scaled_input_bounds
+
 
 def _model_num_inputs(model):
-    """Returns the number of input variables"""
+    """Returns the number of input variables."""
     graph = model.graph
     if len(graph.input) != 1:
-        raise ValueError(
-            f"Model graph input field is multi-valued {graph.input}. A single value is required."
-        )
+        msg = f"Model graph input field is multi-valued {graph.input}. A single value"
+        " is required."
+        raise ValueError(msg)
     return _tensor_size(graph.input[0])
 
 
 def _model_num_outputs(model):
-    """Returns the number of output variables"""
+    """Returns the number of output variables."""
     graph = model.graph
     if len(graph.output) != 1:
-        raise ValueError(
-            f"Model graph output field is multi-valued {graph.output}. A single value is required."
-        )
+        msg = f"Model graph output field is multi-valued {graph.output}. A single value"
+        " is required."
+        raise ValueError(msg)
     return _tensor_size(graph.output[0])
 
 
 def _tensor_size(tensor):
-    """Returns the size of an input tensor"""
+    """Returns the size of an input tensor."""
     tensor_type = tensor.type.tensor_type
     size = None
     dim_values = [
@@ -85,7 +86,9 @@ def _tensor_size(tensor):
     if len(dim_values) == 1:
         size = dim_values[0]
     elif dim_values == []:
-        raise ValueError(f"Tensor {tensor} has no positive dimensions.")
+        msg = f"Tensor {tensor} has no positive dimensions."
+        raise ValueError(msg)
     else:
-        raise ValueError(f"Tensor {tensor} has multiple positive dimensions.")
+        msg = f"Tensor {tensor} has multiple positive dimensions."
+        raise ValueError(msg)
     return size
diff --git a/src/omlt/io/__init__.py b/src/omlt/io/__init__.py
index 6933e312..64fa72e1 100644
--- a/src/omlt/io/__init__.py
+++ b/src/omlt/io/__init__.py
@@ -14,3 +14,14 @@
 
 if keras_available:
     from omlt.io.keras import load_keras_sequential
+
+__all__ = [
+    "keras_available",
+    "onnx_available",
+    "torch_available",
+    "torch_geometric_available",
+    "load_onnx_neural_network",
+    "load_onnx_neural_network_with_bounds",
+    "write_onnx_model_with_bounds",
+    "load_keras_sequential",
+]
diff --git a/src/omlt/io/input_bounds.py b/src/omlt/io/input_bounds.py
index 7bdb8ea8..f01eb3ca 100644
--- a/src/omlt/io/input_bounds.py
+++ b/src/omlt/io/input_bounds.py
@@ -1,20 +1,17 @@
 import json
+from pathlib import Path
 
 
 def write_input_bounds(input_bounds_filename, input_bounds):
-    """
-    Write the specified input bounds to the given file.
-    """
+    """Write the specified input bounds to the given file."""
     input_bounds = _prepare_input_bounds(input_bounds)
-    with open(input_bounds_filename, "w") as f:
+    with Path(input_bounds_filename).open("w") as f:
         json.dump(input_bounds, f)
 
 
 def load_input_bounds(input_bounds_filename):
-    """
-    Read the input bounds from the given file.
-    """
-    with open(input_bounds_filename, "r") as f:
+    """Read the input bounds from the given file."""
+    with Path(input_bounds_filename).open() as f:
         raw_input_bounds = json.load(f)
 
     return dict(_parse_raw_input_bounds(d) for d in raw_input_bounds)
@@ -26,12 +23,11 @@ def _prepare_input_bounds(input_bounds):
             {"key": i, "lower_bound": lb, "upper_bound": ub}
             for i, (lb, ub) in enumerate(input_bounds)
         ]
-    else:
-        # users should have passed a dict-like
-        return [
-            {"key": key, "lower_bound": lb, "upper_bound": ub}
-            for key, (lb, ub) in input_bounds.items()
-        ]
+    # users should have passed a dict-like
+    return [
+        {"key": key, "lower_bound": lb, "upper_bound": ub}
+        for key, (lb, ub) in input_bounds.items()
+    ]
 
 
 def _parse_raw_input_bounds(raw):
diff --git a/src/omlt/io/keras/__init__.py b/src/omlt/io/keras/__init__.py
index 72f6931a..bd9bbc3e 100644
--- a/src/omlt/io/keras/__init__.py
+++ b/src/omlt/io/keras/__init__.py
@@ -1 +1,3 @@
 from omlt.io.keras.keras_reader import load_keras_sequential
+
+__all__ = ["load_keras_sequential"]
diff --git a/src/omlt/io/keras/keras_reader.py b/src/omlt/io/keras/keras_reader.py
index daccf68b..2462bed0 100644
--- a/src/omlt/io/keras/keras_reader.py
+++ b/src/omlt/io/keras/keras_reader.py
@@ -1,13 +1,14 @@
-import tensorflow.keras as keras
+from tensorflow import keras
 
-from omlt.neuralnet.layer import DenseLayer, InputLayer
+from omlt.neuralnet.layer import DenseLayer, InputLayer, Layer
 from omlt.neuralnet.network_definition import NetworkDefinition
 
 
 def load_keras_sequential(
     nn, scaling_object=None, scaled_input_bounds=None, unscaled_input_bounds=None
 ):
-    """
+    """Load Keras sequential network.
+
     Load a keras neural network model (built with Sequential) into
     an OMLT network definition object. This network definition object
     can be used in different formulations.
@@ -17,8 +18,9 @@ def load_keras_sequential(
     nn : keras.model
         A keras model that was built with Sequential
     scaling_object : instance of ScalingInterface or None
-        Provide an instance of a scaling object to use to scale iputs --> scaled_inputs
-        and scaled_outputs --> outputs. If None, no scaling is performed. See scaling.py.
+        Provide an instance of a scaling object to use to scale inputs --> scaled_inputs
+        and scaled_outputs --> outputs. If None, no scaling is performed.
+        See scaling.py.
     scaled_input_bounds : dict or None
         A dict that contains the bounds on the scaled variables (the
         direct inputs to the neural network). If None, then no bounds
@@ -29,11 +31,10 @@ def load_keras_sequential(
         dictionary will be generated using the provided scaling object.
         If None, then no bounds are specified.
 
-    Returns
+    Returns:
     -------
     NetworkDefinition
     """
-    # TODO: Add exceptions for unsupported layer types
     n_inputs = len(nn.layers[0].get_weights()[0])
 
     net = NetworkDefinition(
@@ -42,18 +43,19 @@ def load_keras_sequential(
         unscaled_input_bounds=unscaled_input_bounds,
     )
 
-    prev_layer = InputLayer([n_inputs])
+    prev_layer: Layer = InputLayer([n_inputs])
     net.add_layer(prev_layer)
 
-    for l in nn.layers:
-        cfg = l.get_config()
-        if not isinstance(l, keras.layers.Dense):
-            raise ValueError(
-                "Layer type {} encountered. The function load_keras_sequential "
-                "only supports dense layers at this time. Consider using "
-                "ONNX and the ONNX parser".format(type(l))
+    for layer in nn.layers:
+        cfg = layer.get_config()
+        if not isinstance(layer, keras.layers.Dense):
+            msg = (
+                f"Layer type {type(layer)} encountered. The load_keras_sequential "
+                "function only supports dense layers at this time. Consider using "
+                "ONNX and the ONNX parser."
             )
-        weights, biases = l.get_weights()
+            raise TypeError(msg)
+        weights, biases = layer.get_weights()
         n_layer_inputs, n_layer_nodes = weights.shape
 
         dense_layer = DenseLayer(
diff --git a/src/omlt/io/onnx.py b/src/omlt/io/onnx.py
index d41983c2..6c5b3cb3 100644
--- a/src/omlt/io/onnx.py
+++ b/src/omlt/io/onnx.py
@@ -1,4 +1,3 @@
-import json
 from pathlib import Path
 
 import onnx
@@ -8,8 +7,7 @@
 
 
 def write_onnx_model_with_bounds(filename, onnx_model=None, input_bounds=None):
-    """
-    Write the ONNX model to the given file.
+    """Write the ONNX model to the given file.
 
     If `input_bounds` is not None, write it alongside the ONNX model.
 
@@ -23,7 +21,7 @@ def write_onnx_model_with_bounds(filename, onnx_model=None, input_bounds=None):
         bounds on the input variables
     """
     if onnx_model is not None:
-        with open(filename, "wb") as f:
+        with Path(filename).open("wb") as f:
             f.write(onnx_model.SerializeToString())
 
     if input_bounds is not None:
@@ -31,30 +29,28 @@ def write_onnx_model_with_bounds(filename, onnx_model=None, input_bounds=None):
 
 
 def load_onnx_neural_network_with_bounds(filename):
-    """
-    Load a NetworkDefinition with input bounds from an onnx object.
+    """Load a NetworkDefinition with input bounds from an onnx object.
 
     Parameters
     ----------
     filename : str
         the path where the ONNX model and input bounds file are written
 
-    Returns
+    Returns:
     -------
     NetworkDefinition
     """
     onnx_model = onnx.load(filename)
     input_bounds_filename = Path(f"{filename}.bounds.json")
     input_bounds = None
-    if input_bounds_filename.exists:
+    if input_bounds_filename.exists():
         input_bounds = load_input_bounds(input_bounds_filename)
 
     return load_onnx_neural_network(onnx_model, input_bounds=input_bounds)
 
 
 def load_onnx_neural_network(onnx, scaling_object=None, input_bounds=None):
-    """
-    Load a NetworkDefinition from an onnx object.
+    """Load a NetworkDefinition from an onnx object.
 
     Parameters
     ----------
@@ -63,7 +59,7 @@ def load_onnx_neural_network(onnx, scaling_object=None, input_bounds=None):
     scaling_object : instance of object supporting ScalingInterface
     input_bounds : list of tuples
 
-    Returns
+    Returns:
     -------
     NetworkDefinition
     """
diff --git a/src/omlt/io/onnx_parser.py b/src/omlt/io/onnx_parser.py
index 511261c0..85b37526 100644
--- a/src/omlt/io/onnx_parser.py
+++ b/src/omlt/io/onnx_parser.py
@@ -1,4 +1,5 @@
 import math
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 from onnx import numpy_helper
@@ -12,15 +13,30 @@
 )
 from omlt.neuralnet.network_definition import NetworkDefinition
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
 _ACTIVATION_OP_TYPES = ["Relu", "Sigmoid", "LogSoftmax", "Tanh", "Softplus"]
 _POOLING_OP_TYPES = ["MaxPool"]
+DENSE_INPUT_DIMENSIONS = 2
+GEMM_INPUT_DIMENSIONS = 3
+CONV_INPUT_DIMENSIONS = [2, 3]
+TWO_D_IMAGE_W_CHANNELS = 3
+RESHAPE_INPUT_DIMENSIONS = 2
+MAXPOOL_INPUT_DIMENSIONS = 1
+MAXPOOL_INPUT_OUTPUT_W_BATCHES = 4
+# Attribute types enum:
+ATTR_FLOAT = 1
+ATTR_INT = 2
+ATTR_TENSOR = 4
+ATTR_INTS = 7
 
 
 class NetworkParser:
-    """
-    References
-    ----------
-    * https://github.com/onnx/onnx/blob/master/docs/Operators.md
+    """Network Parser.
+
+    References:
+        * https://github.com/onnx/onnx/blob/master/docs/Operators.md
     """
 
     def __init__(self):
@@ -28,62 +44,63 @@ def __init__(self):
 
     def _reset_state(self):
         self._graph = None
-        self._initializers = None
-        self._constants = None
-        self._nodes = None
+        self._initializers = {}
+        self._constants = {}
+        self._nodes = {}
         self._nodes_by_output = None
         self._inputs = None
         self._outputs = None
-        self._node_stack = None
-        self._node_map = None
+        self._node_stack = []
+        self._node_map = {}
 
-    def parse_network(self, graph, scaling_object, input_bounds):
+    def parse_network(self, graph, scaling_object, input_bounds):  # noqa: C901, PLR0912, PLR0915
         self._reset_state()
         self._graph = graph
 
         # initializers contain constant data
-        initializers = dict()
+        initializers: dict[str, Any] = {}
         for initializer in self._graph.initializer:
             initializers[initializer.name] = numpy_helper.to_array(initializer)
 
         self._initializers = initializers
 
         # Build graph
-        nodes = dict()
-        nodes_by_output = dict()
+        nodes: dict[str, tuple[str, Any, list[Any]]] = {}
+        nodes_by_output = {}
         inputs = set()
-        outputs = set()
-        self._node_map = dict()
+        outputs: set[Any] = set()
+        self._node_map = {}
 
         network = NetworkDefinition(
             scaling_object=scaling_object, scaled_input_bounds=input_bounds
         )
 
         network_input = None
-        for input in self._graph.input:
-            nodes[input.name] = ("input", input.type, [])
-            nodes_by_output[input.name] = input.name
-            inputs.add(input.name)
+        for input_node in self._graph.input:
+            nodes[input_node.name] = ("input", input_node.type, [])
+            nodes_by_output[input_node.name] = input_node.name
+            inputs.add(input_node.name)
             # onnx inputs are tensors. Flatten tensors to a vector.
             dim_value = None
             size = []
-            for dim in input.type.tensor_type.shape.dim:
+            for dim in input_node.type.tensor_type.shape.dim:
                 if dim.dim_value > 0:
                     if dim_value is None:
                         dim_value = 1
                     size.append(dim.dim_value)
                     dim_value *= dim.dim_value
             if dim_value is None:
-                raise ValueError(
+                msg = (
                     f'All dimensions in graph "{graph.name}" input tensor have 0 value.'
                 )
-            assert network_input is None
+                raise ValueError(msg)
             network_input = InputLayer(size)
-            self._node_map[input.name] = network_input
+            self._node_map[input_node.name] = network_input
             network.add_layer(network_input)
 
         if network_input is None:
-            raise ValueError(f'No valid input layer found in graph "{graph.name}".')
+            msg = f'No valid input layer found in graph "{graph.name}".'
+            raise ValueError(msg)
 
         self._nodes = nodes
         self._nodes_by_output = nodes_by_output
@@ -97,37 +114,39 @@ def parse_network(self, graph, scaling_object, input_bounds):
             for output in node.output:
                 nodes_by_output[output] = node.name
 
-        self._constants = dict()
+        self._constants = {}
         for node in self._graph.node:
             # add node not connected to anything
             self._nodes[node.name] = ("node", node, [])
 
             # Map inputs by their output name
             node_inputs = [
-                nodes_by_output[input]
-                for input in node.input
-                if input not in initializers
+                nodes_by_output[input_node]
+                for input_node in node.input
+                if input_node not in initializers
             ]
 
             if node_inputs:
                 # Now connect inputs to the current node
-                for input in node_inputs:
-                    self._nodes[input][2].append(node.name)
+                for input_node in node_inputs:
+                    self._nodes[input_node][2].append(node.name)
             elif node.op_type == "Constant":
                 for output in node.output:
                     value = _parse_constant_value(node)
                     self._constants[output] = value
             else:
-                raise ValueError(
-                    f'Nodes must have inputs or have op_type "Constant". Node "{node.name}" has no inputs and op_type "{node.op_type}".'
+                msg = (
+                    'Nodes must have inputs or have op_type "Constant". Node'
+                    f' "{node.name}" has no inputs and op_type "{node.op_type}".'
                 )
+                raise ValueError(msg)
 
         # traverse graph
         self._node_stack = list(inputs)
 
-        self._weights = dict()
-        self._biases = dict()
-        self._activations = dict()
+        self._weights = {}
+        self._biases = {}
+        self._activations = {}
 
         while self._node_stack:
             node_name = self._node_stack.pop()
@@ -141,8 +160,8 @@ def parse_network(self, graph, scaling_object, input_bounds):
                     for layer_input in new_layer_inputs:
                         network.add_edge(layer_input, new_layer)
             else:
-                for next in next_nodes:
-                    self._node_stack.append(next)
+                for next_node in next_nodes:
+                    self._node_stack.append(next_node)
 
         return network
 
@@ -167,41 +186,55 @@ def _visit_node(self, node, next_nodes):
                 node, next_nodes
             )
         else:
-            raise Exception(f"Unhandled node type {node.op_type}")
+            msg = f"Unhandled node type {node.op_type}"
+            raise ValueError(msg)
 
-        for next in next_nodes:
-            self._node_stack.append(next)
+        for next_node in next_nodes:
+            self._node_stack.append(next_node)
 
         return new_layer, new_layer_inputs
 
-    def _consume_dense_nodes(self, node, next_nodes):
+    def _consume_dense_nodes(  # noqa: C901, PLR0912
+        self, node: Any, next_nodes: Any
+    ) -> tuple[Any, Any, list[Any]]:
         """Starting from a MatMul node, consume nodes to form a dense Ax + b node."""
         if node.op_type != "MatMul":
-            raise ValueError(
-                f"{node.name} is a {node.op_type} node, only MatMul nodes can be used as starting points for consumption."
+            msg = (
+                f"{node.name} is a {node.op_type} node, but the parsing method for"
+                " MatMul nodes was called. This could indicate changes in the"
+                " network being parsed."
             )
-        if len(node.input) != 2:
-            raise ValueError(
-                f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."
+            raise ValueError(msg)
+
+        if len(node.input) != DENSE_INPUT_DIMENSIONS:
+            msg = (
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with 2"
+                " input dimensions can be used as starting points for parsing."
             )
+            raise ValueError(msg)
 
         [in_0, in_1] = list(node.input)
         input_layer, transformer = self._node_input_and_transformer(in_0)
         node_weights = self._initializers[in_1]
 
         if len(next_nodes) != 1:
-            raise ValueError(
-                f"Next nodes must have length 1, {next_nodes} has length {len(next_nodes)}"
+            msg = (
+                f"Next nodes must have length 1, {next_nodes} has length"
+                f" {len(next_nodes)}"
             )
+            raise ValueError(msg)
 
         # expect 'Add' node ahead
         type_, node, maybe_next_nodes = self._nodes[next_nodes[0]]
         if type_ != "node":
-            raise TypeError(f"Expected a node next, got a {type_} instead.")
+            msg = f"Expected a node next, got a {type_} instead."
+            raise TypeError(msg)
         if node.op_type != "Add":
-            raise ValueError(
-                f"The first node to be consumed, {node.name}, is a {node.op_type} node. Only Add nodes are supported."
+            msg = (
+                f"The first node to be consumed, {node.name}, is a {node.op_type} node."
+                " Only Add nodes are supported."
             )
+            raise ValueError(msg)
 
         # extract biases
         next_nodes = maybe_next_nodes
@@ -212,18 +245,20 @@ def _consume_dense_nodes(self, node, next_nodes):
         elif in_1 in self._initializers:
             node_biases = self._initializers[in_1]
         else:
-            raise ValueError(f"Node inputs were not found in graph initializers.")
-
-        if len(node_weights.shape) != 2:
-            raise ValueError(f"Node weights must be a 2-dimensional matrix.")
+            msg = "Node inputs were not found in graph initializers."
+            raise ValueError(msg)
+        if len(node_weights.shape) != DENSE_INPUT_DIMENSIONS:
+            msg = "Node weights must be a 2-dimensional matrix."
+            raise ValueError(msg)
         if node_weights.shape[1] != node_biases.shape[0]:
-            raise ValueError(
-                f"Node weights has {node_weights.shape[1]} columns; node biases has {node_biases.shape[0]} rows. These must be equal."
+            msg = (
+                f"Node weights has {node_weights.shape[1]} columns; node biases has "
+                f"{node_biases.shape[0]} rows. These must be equal."
             )
+            raise ValueError(msg)
         if len(node.output) != 1:
-            raise ValueError(
-                f"Node output is {node.output} but should be a single value."
-            )
+            msg = f"Node output is {node.output} but should be a single value."
+            raise ValueError(msg)
 
         input_output_size = _get_input_output_size(input_layer, transformer)
 
@@ -254,13 +289,18 @@ def _consume_dense_nodes(self, node, next_nodes):
     def _consume_gemm_dense_nodes(self, node, next_nodes):
         """Starting from a Gemm node, consume nodes to form a dense aAB + bC node."""
         if node.op_type != "Gemm":
-            raise ValueError(
-                f"{node.name} is a {node.op_type} node, only Gemm nodes can be used as starting points for consumption."
+            msg = (
+                f"{node.name} is a {node.op_type} node, but the parsing method for"
+                " Gemm nodes was called. This could indicate changes in the"
+                " network being parsed."
             )
-        if len(node.input) != 3:
-            raise ValueError(
-                f"{node.name} input has {len(node.input)} dimensions, only nodes with 3 input dimensions can be used as starting points for consumption."
+            raise ValueError(msg)
+        if len(node.input) != GEMM_INPUT_DIMENSIONS:
+            msg = (
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with"
+                " 3 input dimensions can be used as starting points for parsing."
             )
+            raise ValueError(msg)
 
         attr = _collect_attributes(node)
         alpha = attr["alpha"]
@@ -303,21 +343,27 @@ def _consume_gemm_dense_nodes(self, node, next_nodes):
 
         return next_nodes, dense_layer, [input_layer]
 
-    def _consume_conv_nodes(self, node, next_nodes):
-        """
+    def _consume_conv_nodes(self, node, next_nodes):  # noqa: PLR0912, C901, PLR0915
+        """Consume Conv nodes.
+
         Starting from a Conv node, consume nodes to form a convolution node with
         (optional) activation function.
         """
         if node.op_type != "Conv":
-            raise ValueError(
-                f"{node.name} is a {node.op_type} node, only Conv nodes can be used as starting points for consumption."
+            msg = (
+                f"{node.name} is a {node.op_type} node, but the parsing method for"
+                " Conv nodes was called. This could indicate changes in the"
+                " network being parsed."
             )
-        if len(node.input) not in [2, 3]:
-            raise ValueError(
-                f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 or 3 input dimensions can be used as starting points for consumption."
+            raise ValueError(msg)
+        if len(node.input) not in CONV_INPUT_DIMENSIONS:
+            msg = (
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with"
+                " 2 or 3 input dimensions can be used as starting points for parsing."
             )
+            raise ValueError(msg)
 
-        if len(node.input) == 2:
+        if len(node.input) == CONV_INPUT_DIMENSIONS[0]:
             [in_0, in_1] = list(node.input)
             in_2 = None
         else:
@@ -327,51 +373,59 @@ def _consume_conv_nodes(self, node, next_nodes):
         weights = self._initializers[in_1]
         [out_channels, in_channels, *kernel_shape] = weights.shape
 
-        if in_2 is None:
-            biases = np.zeros(out_channels)
-        else:
-            biases = self._initializers[in_2]
+        biases = np.zeros(out_channels) if in_2 is None else self._initializers[in_2]
 
         attr = _collect_attributes(node)
 
         strides = attr["strides"]
         # check only kernel shape and stride are set
         if attr["kernel_shape"] != kernel_shape:
-            raise ValueError(
-                f"Kernel shape attribute {attr['kernel_shape']} does not match initialized kernel shape {kernel_shape}."
+            msg = (
+                f"Kernel shape attribute {attr['kernel_shape']} does not match"
+                f" initialized kernel shape {kernel_shape}."
             )
+            raise ValueError(msg)
         if len(kernel_shape) != len(strides):
-            raise ValueError(
-                f"Initialized kernel shape {kernel_shape} has {len(kernel_shape)} dimensions. Strides attribute has {len(strides)} dimensions. These must be equal."
+            msg = (
+                f"Initialized kernel shape {kernel_shape} has {len(kernel_shape)} "
+                f"dimensions. Strides attribute has {len(strides)} dimensions. "
+                "These must be equal."
             )
+            raise ValueError(msg)
         if len(input_output_size) != len(kernel_shape) + 1:
-            raise ValueError(
-                f"Input/output size ({input_output_size}) must have one more dimension than initialized kernel shape ({kernel_shape})."
+            msg = (
+                f"Input/output size ({input_output_size}) must have one more dimension "
+                f"than initialized kernel shape ({kernel_shape})."
             )
+            raise ValueError(msg)
 
         # Check input, output have correct dimensions
         if biases.shape != (out_channels,):
-            raise ValueError(
-                f"Biases shape {biases.shape} must match output weights channels {(out_channels,)}."
+            msg = (
+                f"Biases shape {biases.shape} must match output weights channels"
+                f" {(out_channels,)}."
             )
+            raise ValueError(msg)
         if in_channels != input_output_size[0]:
-            raise ValueError(
-                f"Input/output size ({input_output_size}) first dimension must match input weights channels ({in_channels})."
+            msg = (
+                f"Input/output size ({input_output_size}) first dimension must match "
+                f"input weights channels ({in_channels})."
             )
+            raise ValueError(msg)
 
         # Other attributes are not supported
         if "dilations" in attr and attr["dilations"] != [1, 1]:
-            raise ValueError(
-                f"{node} has non-identity dilations ({attr['dilations']}). This is not supported."
+            msg = (
+                f"{node} has non-identity dilations ({attr['dilations']}). This is not"
+                " supported."
             )
+            raise ValueError(msg)
         if attr["group"] != 1:
-            raise ValueError(
-                f"{node} has multiple groups ({attr['group']}). This is not supported."
-            )
+            msg = f"{node} has multiple groups ({attr['group']}). This is unsupported."
+            raise ValueError(msg)
         if "pads" in attr and np.any(attr["pads"]):
-            raise ValueError(
-                f"{node} has non-zero pads ({attr['pads']}). This is not supported."
-            )
+            msg = f"{node} has non-zero pads ({attr['pads']}). This is not supported."
+            raise ValueError(msg)
 
         # generate new nodes for the node output
         padding = 0
@@ -391,10 +445,9 @@ def _consume_conv_nodes(self, node, next_nodes):
 
         # convolute image one channel at the time
         # expect 2d image with channels
-        if len(input_output_size) != 3:
-            raise ValueError(
-                f"Expected a 2D image with channels, got {input_output_size}."
-            )
+        if len(input_output_size) != TWO_D_IMAGE_W_CHANNELS:
+            msg = f"Expected a 2D image with channels, got {input_output_size}."
+            raise ValueError(msg)
 
         conv_layer = ConvLayer2D(
             input_output_size,
@@ -412,13 +465,18 @@ def _consume_conv_nodes(self, node, next_nodes):
     def _consume_reshape_nodes(self, node, next_nodes):
         """Parse a Reshape node."""
         if node.op_type != "Reshape":
-            raise ValueError(
-                f"{node.name} is a {node.op_type} node, only Reshape nodes can be used as starting points for consumption."
-            )
-        if len(node.input) != 2:
-            raise ValueError(
-                f"{node.name} input has {len(node.input)} dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."
-            )
+            msg = (
+                f"{node.name} is a {node.op_type} node, but the parsing method for"
+                " Reshape nodes was called. This could indicate changes in the"
+                " network being parsed."
+            )
+            raise ValueError(msg)
+        if len(node.input) != RESHAPE_INPUT_DIMENSIONS:
+            msg = (
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with"
+                " 2 input dimensions can be used as starting points for parsing."
+            )
+            raise ValueError(msg)
         [in_0, in_1] = list(node.input)
         input_layer = self._node_map[in_0]
         new_shape = self._constants[in_1]
@@ -427,38 +485,49 @@ def _consume_reshape_nodes(self, node, next_nodes):
         self._node_map[node.output[0]] = (transformer, input_layer)
         return next_nodes
 
-    def _consume_pool_nodes(self, node, next_nodes):
-        """
+    def _consume_pool_nodes(self, node, next_nodes):  # noqa: PLR0912, C901, PLR0915
+        """Consume MaxPool nodes.
+
         Starting from a MaxPool node, consume nodes to form a pooling node with
         (optional) activation function.
         """
         if node.op_type not in _POOLING_OP_TYPES:
-            raise ValueError(
-                f"{node.name} is a {node.op_type} node, only MaxPool nodes can be used as starting points for consumption."
+            msg = (
+                f"{node.name} is a {node.op_type} node, but the parsing method for"
+                " MaxPool nodes was called. This could indicate changes in the"
+                " network being parsed."
             )
+            raise ValueError(msg)
         pool_func_name = "max"
 
-        # ONNX network should not contain indices output from MaxPool - not supported by OMLT
+        # ONNX network should not contain indices output from MaxPool -
+        # not supported by OMLT
         if len(node.output) != 1:
-            raise ValueError(
-                f"The ONNX contains indices output from MaxPool. This is not supported by OMLT."
+            msg = (
+                "The ONNX network contains indices output from MaxPool. This is not"
+                " supported by OMLT."
             )
-        if len(node.input) != 1:
-            raise ValueError(
-                f"{node.name} input has {len(node.input)} dimensions, only nodes with 1 input dimension can be used as starting points for consumption."
+            raise ValueError(msg)
+        if len(node.input) != MAXPOOL_INPUT_DIMENSIONS:
+            msg = (
+                f"{node.name} input has {len(node.input)} dimensions, only nodes with "
+                "1 input dimension can be used as starting points for parsing."
             )
-
+            raise ValueError(msg)
         input_layer, transformer = self._node_input_and_transformer(node.input[0])
         input_output_size = _get_input_output_size(input_layer, transformer)
 
         # currently only support 2D image with channels.
-        if len(input_output_size) == 4:
+        if len(input_output_size) == MAXPOOL_INPUT_OUTPUT_W_BATCHES:
             # this means there is an extra dimension for number of batches
-            # batches not supported, so only accept if they're not there or there is only 1 batch
+            # batches not supported, so only accept if they're not there or there is
+            # only 1 batch
             if input_output_size[0] != 1:
-                raise ValueError(
-                    f"{node.name} has {input_output_size[0]} batches, only a single batch is supported."
+                msg = (
+                    f"{node.name} has {input_output_size[0]} batches, only single batch"
+                    " is supported."
                 )
+                raise ValueError(msg)
             input_output_size = input_output_size[1:]
 
         in_channels = input_output_size[0]
@@ -471,37 +540,46 @@ def _consume_pool_nodes(self, node, next_nodes):
         # check only kernel shape, stride, storage order are set
         # everything else is not supported
         if "dilations" in attr and attr["dilations"] != [1, 1]:
-            raise ValueError(
-                f"{node.name} has non-identity dilations ({attr['dilations']}). This is not supported."
+            msg = (
+                f"{node.name} has non-identity dilations ({attr['dilations']})."
+                " This is not supported."
             )
+            raise ValueError(msg)
         if "pads" in attr and np.any(attr["pads"]):
-            raise ValueError(
-                f"{node.name} has non-zero pads ({attr['pads']}). This is not supported."
+            msg = (
+                f"{node.name} has non-zero pads ({attr['pads']})."
+                " This is not supported."
             )
+            raise ValueError(msg)
         if ("auto_pad" in attr) and (attr["auto_pad"] != "NOTSET"):
-            raise ValueError(
-                f"{node.name} has autopad set ({attr['auto_pad']}). This is not supported."
+            msg = (
+                f"{node.name} has autopad set ({attr['auto_pad']})."
+                " This is not supported."
             )
+            raise ValueError(msg)
         if len(kernel_shape) != len(strides):
-            raise ValueError(
-                f"Kernel shape {kernel_shape} has {len(kernel_shape)} dimensions. Strides attribute has {len(strides)} dimensions. These must be equal."
+            msg = (
+                f"Kernel shape {kernel_shape} has {len(kernel_shape)} dimensions. "
+                f"Strides attribute has {len(strides)} dimensions. These must be equal."
             )
+            raise ValueError(msg)
         if len(input_output_size) != len(kernel_shape) + 1:
-            raise ValueError(
-                f"Input/output size ({input_output_size}) must have one more dimension than kernel shape ({kernel_shape})."
+            msg = (
+                f"Input/output size ({input_output_size}) must have one more dimension"
+                f" than kernel shape ({kernel_shape})."
             )
+            raise ValueError(msg)
 
-        output_shape_wrapper = math.floor
+        output_shape_wrapper: Callable[[float], int] = math.floor
         if "ceil_mode" in attr and attr["ceil_mode"] == 1:
             output_shape_wrapper = math.ceil
 
-        output_size = [in_channels]
-        for i in range(1, len(input_output_size)):
-            output_size.append(
-                output_shape_wrapper(
-                    (input_output_size[i] - kernel_shape[i - 1]) / strides[i - 1] + 1
-                )
+        output_size = [in_channels] + [
+            output_shape_wrapper(
+                (input_output_size[i] - kernel_shape[i - 1]) / strides[i - 1] + 1
             )
+            for i in range(1, len(input_output_size))
+        ]
 
         activation = "linear"
         if len(next_nodes) == 1:
@@ -532,31 +610,29 @@ def _node_input_and_transformer(self, node_name):
         if isinstance(maybe_layer, tuple):
             transformer, input_layer = maybe_layer
             return input_layer, transformer
-        else:
-            return maybe_layer, None
+        return maybe_layer, None
 
 
 def _collect_attributes(node):
-    r = dict()
+    r = {}
     for attr in node.attribute:
-        if attr.type == 1:  # FLOAT
+        if attr.type == ATTR_FLOAT:  # FLOAT
             r[attr.name] = attr.f
-        elif attr.type == 2:  # INT
+        elif attr.type == ATTR_INT:  # INT
             r[attr.name] = int(attr.i)
-        elif attr.type == 4:  # TENSOR
+        elif attr.type == ATTR_TENSOR:  # TENSOR
             r[attr.name] = numpy_helper.to_array(attr.t)
-            pass
-        elif attr.type == 7:  # INTS
+        elif attr.type == ATTR_INTS:  # INTS
             r[attr.name] = list(attr.ints)
         else:
-            raise RuntimeError(f"unhandled attribute type {attr.type}")
+            msg = f"unhandled attribute type {attr.type}"
+            raise RuntimeError(msg)
     return r
 
 
 def _parse_constant_value(node):
     attr = _collect_attributes(node)
-    value = attr["value"]
-    return value
+    return attr["value"]
 
 
 def _get_input_output_size(input_layer, transformer):
diff --git a/src/omlt/io/torch_geometric/__init__.py b/src/omlt/io/torch_geometric/__init__.py
index ae94d147..4b908c7a 100644
--- a/src/omlt/io/torch_geometric/__init__.py
+++ b/src/omlt/io/torch_geometric/__init__.py
@@ -5,3 +5,9 @@
 from omlt.io.torch_geometric.torch_geometric_reader import (
     load_torch_geometric_sequential,
 )
+
+__all__ = [
+    "gnn_with_fixed_graph",
+    "gnn_with_non_fixed_graph",
+    "load_torch_geometric_sequential",
+]
diff --git a/src/omlt/io/torch_geometric/build_gnn_formulation.py b/src/omlt/io/torch_geometric/build_gnn_formulation.py
index 6e2e04ee..545925de 100644
--- a/src/omlt/io/torch_geometric/build_gnn_formulation.py
+++ b/src/omlt/io/torch_geometric/build_gnn_formulation.py
@@ -7,7 +7,7 @@
 from omlt.neuralnet import FullSpaceNNFormulation
 
 
-def gnn_with_non_fixed_graph(
+def gnn_with_non_fixed_graph(  # noqa: PLR0913
     block,
     nn,
     N,
@@ -15,9 +15,11 @@ def gnn_with_non_fixed_graph(
     scaled_input_bounds=None,
     unscaled_input_bounds=None,
 ):
-    """
-    Build formulation for a torch_geometric graph neural network model (built with Sequential).
-    Since the input graph is not fixed, the elements in adjacency matrix are decision variables.
+    """Graph neural network with non-fixed graph.
+
+    Build formulation for a torch_geometric graph neural network model (built with
+    Sequential). Since the input graph is not fixed, the elements in adjacency matrix
+    are decision variables.
 
     Parameters
     ----------
@@ -29,7 +31,8 @@ def gnn_with_non_fixed_graph(
         The number of nodes of input graph
     scaling_object : instance of ScalingInterface or None
         Provide an instance of a scaling object to use to scale iputs --> scaled_inputs
-        and scaled_outputs --> outputs. If None, no scaling is performed. See scaling.py.
+        and scaled_outputs --> outputs. If None, no scaling is performed. See
+        scaling.py.
     scaled_input_bounds : dict or None
         A dict that contains the bounds on the scaled variables (the
         direct inputs to the neural network). If None, then no bounds
@@ -40,11 +43,10 @@ def gnn_with_non_fixed_graph(
         dictionary will be generated using the provided scaling object.
         If None, then no bounds are specified.
 
-    Returns
+    Returns:
     -------
     OmltBlock (formulated)
     """
-
     # build NetworkDefinition for nn
     net = load_torch_geometric_sequential(
         nn=nn,
@@ -68,7 +70,7 @@ def gnn_with_non_fixed_graph(
     block.symmetric_adjacency = pyo.ConstraintList()
     for u in range(N):
         for v in range(u + 1, N):
-            block.symmetric_adjacency.add((block.A[u, v] == block.A[v, u]))
+            block.symmetric_adjacency.add(block.A[u, v] == block.A[v, u])
 
     # build formulation for GNN
     block.build_formulation(FullSpaceNNFormulation(net))
@@ -76,7 +78,7 @@ def gnn_with_non_fixed_graph(
     return block
 
 
-def gnn_with_fixed_graph(
+def gnn_with_fixed_graph(  # noqa: PLR0913
     block,
     nn,
     N,
@@ -85,9 +87,10 @@ def gnn_with_fixed_graph(
     scaled_input_bounds=None,
     unscaled_input_bounds=None,
 ):
-    """
-    Build formulation for a torch_geometric graph neural network model (built with Sequential).
-    Given the adjacency matrix, the input graph structure is fixed.
+    """Graph neural network with non-fixed graph.
+
+    Build formulation for a torch_geometric graph neural network model (built with
+    Sequential). Given the adjacency matrix, the input graph structure is fixed.
 
     Parameters
     ----------
@@ -101,7 +104,8 @@ def gnn_with_fixed_graph(
         The adjacency matrix of input graph
     scaling_object : instance of ScalingInterface or None
         Provide an instance of a scaling object to use to scale iputs --> scaled_inputs
-        and scaled_outputs --> outputs. If None, no scaling is performed. See scaling.py.
+        and scaled_outputs --> outputs. If None, no scaling is performed. See
+        scaling.py.
     scaled_input_bounds : dict or None
         A dict that contains the bounds on the scaled variables (the
         direct inputs to the neural network). If None, then no bounds
@@ -112,13 +116,17 @@ def gnn_with_fixed_graph(
         dictionary will be generated using the provided scaling object.
         If None, then no bounds are specified.
 
-    Returns
+    Returns:
     -------
     OmltBlock (formulated)
     """
-
     # assume the adjacency matrix is always symmetric
-    assert np.array_equal(A, np.transpose(A))
+    if not np.array_equal(A, np.transpose(A)):
+        msg = (
+            f"Adjacency matrix A of the input graph must be symmetrical. {A} was"
+            " provided."
+        )
+        raise ValueError(msg)
 
     # build NetworkDefinition for nn
     net = load_torch_geometric_sequential(
diff --git a/src/omlt/io/torch_geometric/torch_geometric_reader.py b/src/omlt/io/torch_geometric/torch_geometric_reader.py
index 72d594cc..5ce9b315 100644
--- a/src/omlt/io/torch_geometric/torch_geometric_reader.py
+++ b/src/omlt/io/torch_geometric/torch_geometric_reader.py
@@ -2,13 +2,12 @@
 
 import numpy as np
 
-from omlt.neuralnet.layer import DenseLayer, GNNLayer, InputLayer
+from omlt.neuralnet.layer import DenseLayer, GNNLayer, InputLayer, Layer
 from omlt.neuralnet.network_definition import NetworkDefinition
 
 
 def _compute_gcn_norm(A):
-    """
-    Calculate the norm for a GCN layer
+    """Calculate the norm for a GCN layer.
 
     Parameters
     ----------
@@ -26,8 +25,7 @@ def _compute_gcn_norm(A):
 
 
 def _compute_sage_norm(A, aggr):
-    """
-    Calculate the norm for a SAGE layer
+    """Calculate the norm for a SAGE layer.
 
     Parameters
     ----------
@@ -50,8 +48,7 @@ def _compute_sage_norm(A, aggr):
 
 
 def _process_gnn_parameters(gnn_weights_uv, gnn_weights_vv, gnn_biases, gnn_norm):
-    """
-    Construct the weights and biases for the GNNLayer class
+    """Construct the weights and biases for the GNNLayer class.
 
     Parameters
     ----------
@@ -64,7 +61,7 @@ def _process_gnn_parameters(gnn_weights_uv, gnn_weights_vv, gnn_biases, gnn_norm
     gnn_norm : matrix-like
         the norm for the GNN layer, shape: (N, N)
 
-    Returns
+    Returns:
     -------
     weights : matrix-like
         the weights for the GNNLayer class, shape: (N * in_channels, N * out_channels)
@@ -105,7 +102,7 @@ def _process_gnn_parameters(gnn_weights_uv, gnn_weights_vv, gnn_biases, gnn_norm
 _OP_TYPES = _LAYER_OP_TYPES_FIXED_GRAPH + _ACTIVATION_OP_TYPES + _POOLING_OP_TYPES
 
 
-def load_torch_geometric_sequential(
+def load_torch_geometric_sequential(  # noqa: C901, PLR0913, PLR0912, PLR0915
     nn,
     N,
     A=None,
@@ -113,8 +110,9 @@ def load_torch_geometric_sequential(
     scaled_input_bounds=None,
     unscaled_input_bounds=None,
 ):
-    """
-    Load a torch_geometric  graph neural network model (built with Sequential) into
+    """Load a torch_geometric graph neural network model.
+
+    Load a torch_geometric graph neural network model (built with Sequential) into
     an OMLT network definition object. This network definition object
     can be used in different formulations.
 
@@ -128,7 +126,8 @@ def load_torch_geometric_sequential(
         The adjacency matrix of input graph
     scaling_object : instance of ScalingInterface or None
         Provide an instance of a scaling object to use to scale iputs --> scaled_inputs
-        and scaled_outputs --> outputs. If None, no scaling is performed. See scaling.py.
+        and scaled_outputs --> outputs. If None, no scaling is performed. See
+        scaling.py.
     scaled_input_bounds : dict or None
         A dict that contains the bounds on the scaled variables (the
         direct inputs to the neural network). If None, then no bounds
@@ -139,7 +138,7 @@ def load_torch_geometric_sequential(
         dictionary will be generated using the provided scaling object.
         If None, then no bounds are specified.
 
-    Returns
+    Returns:
     -------
     NetworkDefinition
     """
@@ -151,50 +150,53 @@ def load_torch_geometric_sequential(
         unscaled_input_bounds=unscaled_input_bounds,
     )
 
-    prev_layer = InputLayer([n_inputs])
+    prev_layer: Layer = InputLayer([n_inputs])
     net.add_layer(prev_layer)
 
     operations = []
-    for l in nn:
+    for layer in nn:
         op_name = None
-        if l.__class__.__name__ == "function":
-            op_name = l.__name__
+        if layer.__class__.__name__ == "function":
+            op_name = layer.__name__
         else:
-            op_name = l.__class__.__name__
+            op_name = layer.__class__.__name__
 
         if op_name not in _OP_TYPES:
-            raise ValueError("this operation is not supported")
+            msg = f"Operation {op_name} is not supported."
+            raise ValueError(msg)
         operations.append(op_name)
 
     if A is None:
+        supported_layers = {
+            "Linear",
+            *_ACTIVATION_OP_TYPES,
+            *_POOLING_OP_TYPES,
+        }
         # If A is None, then the graph is not fixed.
         # Only layers in _LAYER_OP_TYPES_NON_FIXED_GRAPH are supported.
         # Only "sum" aggregation is supported.
-        # Since all weights and biases are possibly needed, A is set to correspond to a complete graph.
-        for index, l in enumerate(nn):
-            if (
-                operations[index]
-                in ["Linear"] + _ACTIVATION_OP_TYPES + _POOLING_OP_TYPES
-            ):
+        # Since all weights and biases are possibly needed, A is set to correspond to a
+        # complete graph.
+        for index, layer in enumerate(nn):
+            if operations[index] in supported_layers:
                 # nonlinear activation results in a MINLP
-                if operations[index] in ["Sigmoid", "LogSoftmax", "Softplus", "Tanh"]:
+                if operations[index] in {"Sigmoid", "LogSoftmax", "Softplus", "Tanh"}:
                     warnings.warn(
                         "nonlinear activation results in a MINLP", stacklevel=2
                     )
-                # Linear layers, all activation functions, and all pooling functions are still supported.
+                # Linear layers, all activation functions, and all pooling functions are
+                # still supported.
                 continue
             if operations[index] not in _LAYER_OP_TYPES_NON_FIXED_GRAPH:
-                raise ValueError(
-                    "this layer is not supported when the graph is not fixed"
-                )
-            elif l.aggr != "sum":
-                raise ValueError(
-                    "this aggregation is not supported when the graph is not fixed"
-                )
+                msg = "this layer is not supported when the graph is not fixed."
+                raise ValueError(msg)
+            if layer.aggr != "sum":
+                msg = "this aggregation is not supported when the graph is not fixed"
+                raise ValueError(msg)
 
         A = np.ones((N, N)) - np.eye(N)
 
-    for index, l in enumerate(nn):
+    for index, layer in enumerate(nn):
         if operations[index] in _ACTIVATION_OP_TYPES:
             # Skip activation layers since they are already handled in last layer
             continue
@@ -205,10 +207,12 @@ def load_torch_geometric_sequential(
             activation = operations[index + 1].lower()
 
         if operations[index] == "Linear":
-            gnn_weights = l.weight.detach().numpy()
-            gnn_biases = l.bias.detach().numpy()
-            # A linear layer is either applied on each node's features (i.e., prev_layer.output_size[-1] = N * gnn_weights.shape[1])
-            # or the features after pooling (i.e., prev_layer.output_size[-1] = gnn_weights.shape[1])
+            gnn_weights = layer.weight.detach().numpy()
+            gnn_biases = layer.bias.detach().numpy()
+            # A linear layer is either applied on each node's features (i.e.,
+            # prev_layer.output_size[-1] = N * gnn_weights.shape[1])
+            # or the features after pooling (i.e.,
+            # prev_layer.output_size[-1] = gnn_weights.shape[1])
             gnn_norm = np.eye(prev_layer.output_size[-1] // gnn_weights.shape[1])
             weights, biases = _process_gnn_parameters(
                 gnn_weights, gnn_weights, gnn_biases, gnn_norm
@@ -222,12 +226,8 @@ def load_torch_geometric_sequential(
                 biases=biases,
             )
         elif operations[index] == "GCNConv":
-            assert l.improved == False
-            assert l.cached == False
-            assert l.add_self_loops == True
-            assert l.normalize == True
-            gnn_weights = l.lin.weight.detach().numpy()
-            gnn_biases = l.bias.detach().numpy()
+            gnn_weights = layer.lin.weight.detach().numpy()
+            gnn_biases = layer.bias.detach().numpy()
             gnn_norm = _compute_gcn_norm(A)
             weights, biases = _process_gnn_parameters(
                 gnn_weights, gnn_weights, gnn_biases, gnn_norm
@@ -242,15 +242,12 @@ def load_torch_geometric_sequential(
                 N=N,
             )
         elif operations[index] == "SAGEConv":
-            assert l.normalize == False
-            assert l.project == False
-            assert l.aggr in _AGGREGATION_OP_TYPES
-            gnn_weights_uv = l.lin_l.weight.detach().numpy()
-            gnn_biases = l.lin_l.bias.detach().numpy()
+            gnn_weights_uv = layer.lin_l.weight.detach().numpy()
+            gnn_biases = layer.lin_l.bias.detach().numpy()
             gnn_weights_vv = np.zeros(shape=gnn_weights_uv.shape)
-            if l.root_weight:
-                gnn_weights_vv = l.lin_r.weight.detach().numpy()
-            gnn_norm = _compute_sage_norm(A, l.aggr)
+            if layer.root_weight:
+                gnn_weights_vv = layer.lin_r.weight.detach().numpy()
+            gnn_norm = _compute_sage_norm(A, layer.aggr)
             weights, biases = _process_gnn_parameters(
                 gnn_weights_uv, gnn_weights_vv, gnn_biases, gnn_norm
             )
diff --git a/src/omlt/linear_tree/__init__.py b/src/omlt/linear_tree/__init__.py
index 2f89a669..2099e44e 100644
--- a/src/omlt/linear_tree/__init__.py
+++ b/src/omlt/linear_tree/__init__.py
@@ -1,5 +1,4 @@
-r"""
-There are multiple formulations for representing linear model decision trees.
+r"""There are multiple formulations for representing linear model decision trees.
 
 Please see the following reference:
     * Ammari et al. (2023) Linear Model Decision Trees as Surrogates in Optimization
@@ -23,3 +22,9 @@
     LinearTreeGDPFormulation,
     LinearTreeHybridBigMFormulation,
 )
+
+__all__ = [
+    "LinearTreeDefinition",
+    "LinearTreeGDPFormulation",
+    "LinearTreeHybridBigMFormulation",
+]
diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py
index 6bd26c8f..09adbb78 100644
--- a/src/omlt/linear_tree/lt_definition.py
+++ b/src/omlt/linear_tree/lt_definition.py
@@ -1,10 +1,11 @@
+from typing import Any
+
 import lineartree
 import numpy as np
 
 
 class LinearTreeDefinition:
-    """
-    Class to represent a linear tree model trained in the linear-tree package
+    """Class to represent a linear tree model trained in the linear-tree package.
 
     Attributes:
         __model (linear-tree model) : Linear Tree Model trained in linear-tree
@@ -27,22 +28,24 @@ def __init__(
         scaled_input_bounds=None,
         unscaled_input_bounds=None,
     ):
-        """Create a LinearTreeDefinition object and define attributes based on the
+        """Initialize LinearTreeDefinition.
+
+        Create a LinearTreeDefinition object and define attributes based on the
         trained linear model decision tree.
 
         Arguments:
-            lt_regressor -- A LinearTreeRegressor model that is trained by the
+            lt_regressor: A LinearTreeRegressor model that is trained by the
                 linear-tree package
 
         Keyword Arguments:
-            scaling_object -- A scaling object to specify the scaling parameters
+            scaling_object: A scaling object to specify the scaling parameters
                 for the linear model tree inputs and outputs. If None, then no
                 scaling is performed. (default: {None})
-            scaled_input_bounds -- A dict that contains the bounds on the scaled
+            scaled_input_bounds: A dict that contains the bounds on the scaled
                 variables (the direct inputs to the tree). If None, then the
                 user must specify the bounds via the input_bounds argument.
                 (default: {None})
-            unscaled_input_bounds -- A dict that contains the bounds on the
+            unscaled_input_bounds: A dict that contains the bounds on the
                 variables (the direct inputs to the tree). If None, then the
                 user must specify the scaled bounds via the scaled_input_bounds
                 argument. (default: {None})
@@ -65,7 +68,7 @@ def __init__(
                 )
 
                 scaled_input_bounds = {
-                    k: (lbs[k], ubs[k]) for k in unscaled_input_bounds.keys()
+                    k: (lbs[k], ubs[k]) for k in unscaled_input_bounds
                 }
 
             # If unscaled input bounds provided and no scaler provided, scaled
@@ -73,9 +76,8 @@ def __init__(
             elif unscaled_input_bounds is not None and scaling_object is None:
                 scaled_input_bounds = unscaled_input_bounds
             elif unscaled_input_bounds is None:
-                raise ValueError(
-                    "Input Bounds needed to represent linear trees as MIPs"
-                )
+                msg = "Input Bounds needed to represent linear trees as MIPs"
+                raise ValueError(msg)
 
         self.__unscaled_input_bounds = unscaled_input_bounds
         self.__scaled_input_bounds = scaled_input_bounds
@@ -89,48 +91,49 @@ def __init__(
 
     @property
     def scaling_object(self):
-        """Returns scaling object"""
+        """Returns scaling object."""
         return self.__scaling_object
 
     @property
     def scaled_input_bounds(self):
-        """Returns dict containing scaled input bounds"""
+        """Returns dict containing scaled input bounds."""
         return self.__scaled_input_bounds
 
     @property
     def splits(self):
-        """Returns dict containing split information"""
+        """Returns dict containing split information."""
         return self.__splits
 
     @property
     def leaves(self):
-        """Returns dict containing leaf information"""
+        """Returns dict containing leaf information."""
         return self.__leaves
 
     @property
     def thresholds(self):
-        """Returns dict containing threshold information"""
+        """Returns dict containing threshold information."""
         return self.__thresholds
 
     @property
     def n_inputs(self):
-        """Returns number of inputs to the linear tree"""
+        """Returns number of inputs to the linear tree."""
         return self.__n_inputs
 
     @property
     def n_outputs(self):
-        """Returns number of outputs to the linear tree"""
+        """Returns number of outputs to the linear tree."""
         return self.__n_outputs
 
 
 def _find_all_children_splits(split, splits_dict):
-    """
+    """Find all children splits.
+
     This helper function finds all multigeneration children splits for an
     argument split.
 
     Arguments:
-        split --The split for which you are trying to find children splits
-        splits_dict -- A dictionary of all the splits in the tree
+        split: The split for which you are trying to find children splits
+        splits_dict: A dictionary of all the splits in the tree
 
     Returns:
         A list containing the Node IDs of all children splits
@@ -154,20 +157,19 @@ def _find_all_children_splits(split, splits_dict):
 
 
 def _find_all_children_leaves(split, splits_dict, leaves_dict):
-    """
+    """Find all children leaves.
+
     This helper function finds all multigeneration children leaves for an
     argument split.
 
     Arguments:
-        split -- The split for which you are trying to find children leaves
-        splits_dict -- A dictionary of all the split info in the tree
-        leaves_dict -- A dictionary of all the leaf info in the tree
+        split: The split for which you are trying to find children leaves
+        splits_dict: A dictionary of all the split info in the tree
+        leaves_dict: A dictionary of all the leaf info in the tree
 
     Returns:
         A list containing all the Node IDs of all children leaves
     """
-    all_leaves = []
-
     # Find all the splits that are children of the relevant split
     all_splits = _find_all_children_splits(split, splits_dict)
 
@@ -177,20 +179,18 @@ def _find_all_children_leaves(split, splits_dict, leaves_dict):
 
     # For each leaf, check if the parents appear in the list of children
     # splits (all_splits). If so, it must be a leaf of the argument split
-    for leaf in leaves_dict:
-        if leaves_dict[leaf]["parent"] in all_splits:
-            all_leaves.append(leaf)
 
-    return all_leaves
+    return [leaf for leaf in leaves_dict if leaves_dict[leaf]["parent"] in all_splits]
 
 
 def _find_n_inputs(leaves):
-    """
+    """Find n inputs.
+
     Finds the number of inputs using the length of the slope vector in the
     first leaf
 
     Arguments:
-        leaves -- Dictionary of leaf information
+        leaves: Dictionary of leaf information
 
     Returns:
         Number of inputs
@@ -199,19 +199,19 @@ def _find_n_inputs(leaves):
     leaf_indices = np.array(list(leaves[tree_indices[0]].keys()))
     tree_one = tree_indices[0]
     leaf_one = leaf_indices[0]
-    n_inputs = len(np.arange(0, len(leaves[tree_one][leaf_one]["slope"])))
-    return n_inputs
+    return len(np.arange(0, len(leaves[tree_one][leaf_one]["slope"])))
 
 
 def _reassign_none_bounds(leaves, input_bounds):
-    """
+    """Reassign None bounds.
+
     This helper function reassigns bounds that are None to the bounds
     input by the user
 
     Arguments:
-        leaves -- The dictionary of leaf information. Attribute of the
+        leaves: The dictionary of leaf information. Attribute of the
             LinearTreeDefinition object
-        input_bounds -- The nested dictionary
+        input_bounds: The nested dictionary
 
     Returns:
         The modified leaves dict without any bounds that are listed as None
@@ -230,16 +230,18 @@ def _reassign_none_bounds(leaves, input_bounds):
     return leaves
 
 
-def _parse_tree_data(model, input_bounds):
-    """
+def _parse_tree_data(model, input_bounds):  # noqa: C901, PLR0915, PLR0912
+    """Parse tree data.
+
     This function creates the data structures with the information required
     for creation of the variables, sets, and constraints in the pyomo
     reformulation of the linear model decision trees. Note that these data
     structures are attributes of the LinearTreeDefinition Class.
 
     Arguments:
-        model -- Trained linear-tree model or dic containing linear-tree model
+        model: Trained linear-tree model or dic containing linear-tree model
             summary (e.g. dict = model.summary())
+        input_bounds: The input bounds
 
     Returns:
         leaves - Dict containing the following information for each leaf:
@@ -277,21 +279,23 @@ def _parse_tree_data(model, input_bounds):
         # Checks to ensure that the input nested dictionary contains the
         # correct information
         for entry in model:
-            if "children" not in model[entry].keys():
+            if "children" not in model[entry]:
                 leaves[entry] = model[entry]
             else:
                 left_child = model[entry]["children"][0]
                 right_child = model[entry]["children"][1]
                 num_splits_in_model += 1
-                if left_child not in model.keys() or right_child not in model.keys():
+                if left_child not in model or right_child not in model:
                     count += 1
         if count > 0 or num_splits_in_model == 0:
-            raise ValueError(
+            msg = (
                 "Input dict must be the summary of the linear-tree model"
-                + " e.g. dict = model.summary()"
+                " e.g. dict = model.summary()"
             )
+            raise ValueError(msg)
     else:
-        raise TypeError("Model entry must be dict or linear-tree instance")
+        msg = "Model entry must be dict or linear-tree instance"
+        raise TypeError(msg)
 
     # This loop adds keys for the slopes and intercept and removes the leaf
     # keys in the splits dictionary
@@ -337,7 +341,7 @@ def _parse_tree_data(model, input_bounds):
 
     # For each variable that appears in the tree, go through all the splits
     # and record its splitting threshold
-    splitting_thresholds = {}
+    splitting_thresholds: dict[int, Any] = {}
     for split in splits:
         var = splits[split]["col"]
         splitting_thresholds[var] = {}
diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py
index 4f83e7f3..f70e4873 100644
--- a/src/omlt/linear_tree/lt_formulation.py
+++ b/src/omlt/linear_tree/lt_formulation.py
@@ -6,7 +6,8 @@
 
 
 class LinearTreeGDPFormulation(_PyomoFormulation):
-    r"""
+    r"""Linear Tree GDP Formulation.
+
     Class to add a Linear Tree GDP formulation to OmltBlock. We use Pyomo.GDP
     to create the disjuncts and disjunctions and then apply a transformation
     to convert to a mixed-integer programming representation.
@@ -45,18 +46,17 @@ class LinearTreeGDPFormulation(_PyomoFormulation):
         * Ammari et al. (2023) Linear Model Decision Trees as Surrogates in
           Optimization of Engineering Applications. Computers & Chemical Engineering
         * Chen et al. (2022) Pyomo.GDP: An ecosystem for logic based modeling and
-          optimization development. Optimization and Engineering, 23:607–642
+          optimization development. Optimization and Engineering, 23:607-642
     """
 
     def __init__(self, lt_definition, transformation="bigm"):
-        """
-        Create a LinearTreeGDPFormulation object
+        """Create a LinearTreeGDPFormulation object.
 
         Arguments:
-            lt_definition -- LinearTreeDefintion Object
+            lt_definition: LinearTreeDefintion Object
 
         Keyword Arguments:
-            transformation -- choose which Pyomo.GDP formulation to apply.
+            transformation: choose which Pyomo.GDP formulation to apply.
                 Supported transformations are bigm, hull, mbigm, and custom
                 (default: {'bigm'})
 
@@ -70,9 +70,8 @@ def __init__(self, lt_definition, transformation="bigm"):
         # Ensure that the GDP transformation given is supported
         supported_transformations = ["bigm", "hull", "mbigm", "custom"]
         if transformation not in supported_transformations:
-            raise NotImplementedError(
-                "Supported transformations are: bigm, mbigm, hull, and custom"
-            )
+            msg = "Supported transformations are: bigm, mbigm, hull, and custom"
+            raise NotImplementedError(msg)
 
     @property
     def input_indexes(self):
@@ -85,7 +84,9 @@ def output_indexes(self):
         return list(range(self.model_definition.n_outputs))
 
     def _build_formulation(self):
-        """This method is called by the OmltBlock to build the corresponding
+        """Build formulation.
+
+        This method is called by the OmltBlock to build the corresponding
         mathematical formulation on the Pyomo block.
         """
         _setup_scaled_inputs_outputs(
@@ -104,8 +105,7 @@ def _build_formulation(self):
 
 
 class LinearTreeHybridBigMFormulation(_PyomoFormulation):
-    r"""
-    Class to add a Linear Tree Hybrid Big-M formulation to OmltBlock.
+    r"""Class to add a Linear Tree Hybrid Big-M formulation to OmltBlock.
 
     .. math::
         \begin{align*}
@@ -134,11 +134,10 @@ class LinearTreeHybridBigMFormulation(_PyomoFormulation):
     """
 
     def __init__(self, lt_definition):
-        """
-        Create a LinearTreeHybridBigMFormulation object
+        """Create a LinearTreeHybridBigMFormulation object.
 
         Arguments:
-            lt_definition -- LinearTreeDefinition Object
+            lt_definition: LinearTreeDefinition Object
         """
         super().__init__()
         self.model_definition = lt_definition
@@ -154,7 +153,9 @@ def output_indexes(self):
         return list(range(self.model_definition.n_outputs))
 
     def _build_formulation(self):
-        """This method is called by the OmltBlock to build the corresponding
+        """Build formulation.
+
+        This method is called by the OmltBlock to build the corresponding
         mathematical formulation on the Pyomo block.
         """
         _setup_scaled_inputs_outputs(
@@ -172,13 +173,14 @@ def _build_formulation(self):
 
 
 def _build_output_bounds(model_def, input_bounds):
-    """
+    """Build output bounds.
+
     This helper function develops bounds of the output variable based on the
     values of the input_bounds and the signs of the slope
 
     Arguments:
-        model_def -- Model definition
-        input_bounds -- Dict of input bounds
+        model_def: Model definition
+        input_bounds: Dict of input bounds
 
     Returns:
         List that contains the conservative lower and upper bounds of the
@@ -204,10 +206,8 @@ def _build_output_bounds(model_def, input_bounds):
                 else:
                     upper_bound += slopes[k] * input_bounds[k][1] + intercept
                     lower_bound += slopes[k] * input_bounds[k][0] + intercept
-                if upper_bound >= bounds[1]:
-                    bounds[1] = upper_bound
-                if lower_bound <= bounds[0]:
-                    bounds[0] = lower_bound
+                bounds[1] = max(bounds[1], upper_bound)
+                bounds[0] = min(bounds[0], lower_bound)
             upper_bound = 0
             lower_bound = 0
 
@@ -217,15 +217,14 @@ def _build_output_bounds(model_def, input_bounds):
 def _add_gdp_formulation_to_block(
     block, model_definition, input_vars, output_vars, transformation
 ):
-    """
-    This function adds the GDP representation to the OmltBlock using Pyomo.GDP
+    """This function adds the GDP representation to the OmltBlock using Pyomo.GDP.
 
     Arguments:
-        block -- OmltBlock
-        model_definition -- LinearTreeDefinition Object
-        input_vars -- input variables to the linear tree model
-        output_vars -- output variable of the linear tree model
-        transformation -- Transformation to apply
+        block: OmltBlock
+        model_definition: LinearTreeDefinition Object
+        input_vars: input variables to the linear tree model
+        output_vars: output variable of the linear tree model
+        transformation: Transformation to apply
 
     """
     leaves = model_definition.leaves
@@ -234,10 +233,7 @@ def _add_gdp_formulation_to_block(
 
     # The set of leaves and the set of features
     tree_ids = list(leaves.keys())
-    t_l = []
-    for tree in tree_ids:
-        for leaf in leaves[tree].keys():
-            t_l.append((tree, leaf))
+    t_l = [(tree, leaf) for tree in tree_ids for leaf in leaves[tree]]
     features = np.arange(0, n_inputs)
 
     # Use the input_bounds and the linear models in the leaves to calculate
@@ -292,14 +288,13 @@ def disjunction_rule(b, tree):
 
 
 def _add_hybrid_formulation_to_block(block, model_definition, input_vars, output_vars):
-    """
-    This function adds the Hybrid BigM representation to the OmltBlock
+    """This function adds the Hybrid BigM representation to the OmltBlock.
 
     Arguments:
-        block -- OmltBlock
-        model_definition -- LinearTreeDefinition Object
-        input_vars -- input variables to the linear tree model
-        output_vars -- output variable of the linear tree model
+        block: OmltBlock
+        model_definition: LinearTreeDefinition Object
+        input_vars: input variables to the linear tree model
+        output_vars: output variable of the linear tree model
     """
     leaves = model_definition.leaves
     input_bounds = model_definition.scaled_input_bounds
@@ -309,10 +304,7 @@ def _add_hybrid_formulation_to_block(block, model_definition, input_vars, output
     tree_ids = list(leaves.keys())
     # Create a list of tuples that contains the tree and leaf indices. Note that
     # the leaf indices depend on the tree in the ensemble.
-    t_l = []
-    for tree in tree_ids:
-        for leaf in leaves[tree].keys():
-            t_l.append((tree, leaf))
+    t_l = [(tree, leaf) for tree in tree_ids for leaf in leaves[tree]]
 
     features = np.arange(0, n_inputs)
 
diff --git a/src/omlt/neuralnet/__init__.py b/src/omlt/neuralnet/__init__.py
index 2b66fc97..014de739 100644
--- a/src/omlt/neuralnet/__init__.py
+++ b/src/omlt/neuralnet/__init__.py
@@ -1,4 +1,5 @@
-r"""
+r"""omlt.neuralnet.
+
 The basic pipeline in source code of OMLT is:
 
 .. math::
@@ -12,7 +13,10 @@
         \xrightarrow[\text{Constraints}]{\text{Layer 3}}\cdots
     \end{align*}
 
-where :math:`\mathbf z^{(0)}` is the output of `InputLayer`, :math:`\hat{\mathbf z}^{(l)}` is the pre-activation output of :math:`l`-th layer, :math:`\mathbf z^{(l)}` is the post-activation output of :math:`l`-th layer.
+where
+:math:`\mathbf z^{(0)}` is the output of `InputLayer`,
+:math:`\hat{\mathbf z}^{(l)}` is the pre-activation output of :math:`l`-th layer,
+:math:`\mathbf z^{(l)}` is the post-activation output of :math:`l`-th layer.
 
 """
 
@@ -26,3 +30,14 @@
     ReluComplementarityFormulation,
     ReluPartitionFormulation,
 )
+
+__all__ = [
+    "NetworkDefinition",
+    "FullSpaceNNFormulation",
+    "FullSpaceSmoothNNFormulation",
+    "ReducedSpaceNNFormulation",
+    "ReducedSpaceSmoothNNFormulation",
+    "ReluBigMFormulation",
+    "ReluComplementarityFormulation",
+    "ReluPartitionFormulation",
+]
diff --git a/src/omlt/neuralnet/activations/__init__.py b/src/omlt/neuralnet/activations/__init__.py
index 7918d9f1..d60d00c4 100644
--- a/src/omlt/neuralnet/activations/__init__.py
+++ b/src/omlt/neuralnet/activations/__init__.py
@@ -1,8 +1,13 @@
-r"""
-Since all activation functions are element-wised, we only consider how to formulate activation functions for a single neuron, where :math:`x` denotes pre-activation variable, and :math:`y` denotes post-activation variable.
+r"""Activation functions.
+
+Since all activation functions are element-wised, we only consider how to formulate
+activation functions for a single neuron, where :math:`x` denotes pre-activation
+variable, and :math:`y` denotes post-activation variable.
 
 """
 
+from typing import Any
+
 from .linear import linear_activation_constraint, linear_activation_function
 from .relu import ComplementarityReLUActivation, bigm_relu_activation_constraint
 from .smooth import (
@@ -22,4 +27,19 @@
     "tanh": tanh_activation_function,
 }
 
-NON_INCREASING_ACTIVATIONS = []
+NON_INCREASING_ACTIVATIONS: list[Any] = []
+
+__all__ = [
+    "linear_activation_constraint",
+    "linear_activation_function",
+    "ComplementarityReLUActivation",
+    "bigm_relu_activation_constraint",
+    "sigmoid_activation_constraint",
+    "sigmoid_activation_function",
+    "softplus_activation_constraint",
+    "softplus_activation_function",
+    "tanh_activation_constraint",
+    "tanh_activation_function",
+    "ACTIVATION_FUNCTION_MAP",
+    "NON_INCREASING_ACTIVATIONS",
+]
diff --git a/src/omlt/neuralnet/activations/linear.py b/src/omlt/neuralnet/activations/linear.py
index 712049c1..4538401a 100644
--- a/src/omlt/neuralnet/activations/linear.py
+++ b/src/omlt/neuralnet/activations/linear.py
@@ -3,10 +3,9 @@ def linear_activation_function(zhat):
 
 
 def linear_activation_constraint(
-    net_block, net, layer_block, layer, add_constraint=True
+    net_block, net, layer_block, layer, *, add_constraint=True
 ):
-    r"""
-    Linear activation constraint generator
+    r"""Linear activation constraint generator.
 
     Generates the constraints for the linear activation function:
 
diff --git a/src/omlt/neuralnet/activations/relu.py b/src/omlt/neuralnet/activations/relu.py
index 427be19a..b02b0591 100644
--- a/src/omlt/neuralnet/activations/relu.py
+++ b/src/omlt/neuralnet/activations/relu.py
@@ -1,10 +1,9 @@
 import pyomo.environ as pyo
-import pyomo.mpec as mpec
+from pyomo import mpec
 
 
 def bigm_relu_activation_constraint(net_block, net, layer_block, layer):
-    r"""
-    Big-M ReLU activation formulation.
+    r"""Big-M ReLU activation formulation.
 
     Generates the constraints for the ReLU activation function:
 
@@ -35,7 +34,8 @@ def bigm_relu_activation_constraint(net_block, net, layer_block, layer):
             y&\le \sigma u
         \end{align*}
 
-    The lower bound of :math:`y` is :math:`\max(0,l)`, and the upper bound of :math:`y` is :math:`\max(0,u)`.
+    The lower bound of :math:`y` is :math:`\max(0,l)`, and the upper bound of :math:`y`
+    is :math:`\max(0,u)`.
 
     """
     layer_block.q_relu = pyo.Var(layer.output_indexes, within=pyo.Binary)
@@ -77,14 +77,11 @@ def bigm_relu_activation_constraint(net_block, net, layer_block, layer):
             output_index
         ] <= layer_block.zhat[output_index] - layer_block._big_m_lb_relu[
             output_index
-        ] * (
-            1.0 - layer_block.q_relu[output_index]
-        )
+        ] * (1.0 - layer_block.q_relu[output_index])
 
 
 class ComplementarityReLUActivation:
-    r"""
-    Complementarity-based ReLU activation formulation.
+    r"""Complementarity-based ReLU activation formulation.
 
     Generates the constraints for the ReLU activation function:
 
@@ -119,7 +116,7 @@ def __init__(self, transform=None):
             transform = "mpec.simple_nonlinear"
         self.transform = transform
 
-    def __call__(self, net_block, net, layer_block, layer):
+    def __call__(self, net_block, net, layer_block, layer):  # noqa: ARG002
         layer_block._complementarity = mpec.Complementarity(
             layer.output_indexes, rule=_relu_complementarity
         )
diff --git a/src/omlt/neuralnet/activations/smooth.py b/src/omlt/neuralnet/activations/smooth.py
index b37ac6c7..7f5bd10d 100644
--- a/src/omlt/neuralnet/activations/smooth.py
+++ b/src/omlt/neuralnet/activations/smooth.py
@@ -2,8 +2,7 @@
 
 
 def softplus_activation_function(x):
-    r"""
-    Applies the softplus function:
+    r"""Applies the softplus function.
 
     .. math::
 
@@ -16,8 +15,7 @@ def softplus_activation_function(x):
 
 
 def sigmoid_activation_function(x):
-    r"""
-    Applies the sigmoid function:
+    r"""Applies the sigmoid function.
 
     .. math::
 
@@ -30,8 +28,7 @@ def sigmoid_activation_function(x):
 
 
 def tanh_activation_function(x):
-    r"""
-    Applies the tanh function:
+    r"""Applies the tanh function.
 
     .. math::
 
@@ -44,40 +41,31 @@ def tanh_activation_function(x):
 
 
 def softplus_activation_constraint(net_block, net, layer_block, layer):
-    r"""
-    Softplus activation constraint generator.
-
-    """
+    r"""Softplus activation constraint generator."""
     return smooth_monotonic_activation_constraint(
         net_block, net, layer_block, layer, softplus_activation_function
     )
 
 
 def sigmoid_activation_constraint(net_block, net, layer_block, layer):
-    r"""
-    Sigmoid activation constraint generator.
-
-    """
+    r"""Sigmoid activation constraint generator."""
     return smooth_monotonic_activation_constraint(
         net_block, net, layer_block, layer, sigmoid_activation_function
     )
 
 
 def tanh_activation_constraint(net_block, net, layer_block, layer):
-    r"""
-    tanh activation constraint generator.
-
-    """
+    r"""Tanh activation constraint generator."""
     return smooth_monotonic_activation_constraint(
         net_block, net, layer_block, layer, tanh_activation_function
     )
 
 
 def smooth_monotonic_activation_constraint(net_block, net, layer_block, layer, fcn):
-    r"""
-    Activation constraint generator for a smooth monotonic function.
+    r"""Activation constraint generator for a smooth monotonic function.
 
-    Generates the constraints for the activation function :math:`f` if it is smooth and monotonic:
+    Generates the constraints for the activation function :math:`f` if it is smooth and
+    monotonic:
 
     .. math::
 
diff --git a/src/omlt/neuralnet/layer.py b/src/omlt/neuralnet/layer.py
index 16e068a3..40e64f3e 100644
--- a/src/omlt/neuralnet/layer.py
+++ b/src/omlt/neuralnet/layer.py
@@ -1,5 +1,4 @@
-r"""
-Neural network layer classes.
+r"""Neural network layer classes.
 
 We use the following notations to define a layer:
 
@@ -18,13 +17,15 @@
 """
 
 import itertools
+from typing import ClassVar
 
 import numpy as np
 
+OUTPUT_DIMENSIONS = 3
+
 
 class Layer:
-    """
-    Base layer class.
+    """Base layer class.
 
     Parameters
     ----------
@@ -42,13 +43,11 @@ def __init__(
         self, input_size, output_size, *, activation=None, input_index_mapper=None
     ):
         if not isinstance(input_size, (list, tuple)):
-            raise TypeError(
-                f"input_size must be a list or tuple, {type(input_size)} was provided."
-            )
+            msg = f"input_size must be a list or tuple, {type(input_size)} provided."
+            raise TypeError(msg)
         if not isinstance(output_size, (list, tuple)):
-            raise TypeError(
-                f"output_size must be a list or tuple, {type(output_size)} was provided."
-            )
+            msg = f"output_size must be a list or tuple, {type(output_size)} provided."
+            raise TypeError(msg)
         self.__input_size = list(input_size)
         self.__output_size = list(output_size)
         self.activation = activation
@@ -58,35 +57,34 @@ def __init__(
 
     @property
     def input_size(self):
-        """Return the size of the input tensor"""
+        """Return the size of the input tensor."""
         return self.__input_size
 
     @property
     def output_size(self):
-        """Return the size of the output tensor"""
+        """Return the size of the output tensor."""
         return self.__output_size
 
     @property
     def activation(self):
-        """Return the activation function"""
+        """Return the activation function."""
         return self.__activation
 
     @activation.setter
     def activation(self, new_activation):
-        """Change the activation function"""
+        """Change the activation function."""
         if new_activation is None:
             new_activation = "linear"
         self.__activation = new_activation
 
     @property
     def input_index_mapper(self):
-        """Return the index mapper"""
+        """Return the index mapper."""
         return self.__input_index_mapper
 
     @property
     def input_indexes_with_input_layer_indexes(self):
-        """
-        Return an iterator generating a tuple of local and input indexes.
+        """Return an iterator generating a tuple of local and input indexes.
 
         Local indexes are indexes over the elements of the current layer.
         Input indexes are indexes over the elements of the previous layer.
@@ -101,17 +99,16 @@ def input_indexes_with_input_layer_indexes(self):
 
     @property
     def input_indexes(self):
-        """Return a list of the input indexes"""
+        """Return a list of the input indexes."""
         return list(itertools.product(*[range(v) for v in self.__input_size]))
 
     @property
     def output_indexes(self):
-        """Return a list of the output indexes"""
+        """Return a list of the output indexes."""
         return list(itertools.product(*[range(v) for v in self.__output_size]))
 
     def eval_single_layer(self, x):
-        """
-        Evaluate the layer at x.
+        """Evaluate the layer at x.
 
         Parameters
         ----------
@@ -124,34 +121,35 @@ def eval_single_layer(self, x):
             else x[:]
         )
         if x_reshaped.shape != tuple(self.input_size):
-            raise ValueError(
-                f"Layer requires an input size {self.input_size}, but the input tensor had size {x_reshaped.shape}."
+            msg = (
+                f"Layer requires an input size {self.input_size}, but the input tensor"
+                f" has size {x_reshaped.shape}."
             )
+            raise ValueError(msg)
         y = self._eval(x_reshaped)
         return self._apply_activation(y)
 
     def __repr__(self):
-        return f"<{str(self)} at {hex(id(self))}>"
+        return f"<{self!s} at {hex(id(self))}>"
 
     def _eval(self, x):
-        raise NotImplementedError()
+        raise NotImplementedError
 
     def _apply_activation(self, x):
         if self.__activation == "linear" or self.__activation is None:
             return x
-        elif self.__activation == "relu":
+        if self.__activation == "relu":
             return np.maximum(x, 0)
-        elif self.__activation == "sigmoid":
+        if self.__activation == "sigmoid":
             return 1.0 / (1.0 + np.exp(-x))
-        elif self.__activation == "tanh":
+        if self.__activation == "tanh":
             return np.tanh(x)
-        else:
-            raise ValueError(f"Unknown activation function {self.__activation}")
+        msg = f"Unknown activation function {self.__activation}"
+        raise ValueError(msg)
 
 
 class InputLayer(Layer):
-    """
-    The first layer in any network.
+    """The first layer in any network.
 
     Parameters
     ----------
@@ -172,13 +170,15 @@ def _eval(self, x):
 
 
 class DenseLayer(Layer):
-    r"""
+    r"""Dense layer.
+
     The dense layer is defined by:
 
     .. math::
 
         \begin{align*}
-            y_j = \sigma\left(\sum\limits_{i=0}^{F_{in}-1}w_{ij}x_i+b_j\right), && \forall 0\le j<F_{out}
+            y_j = \sigma\left(\sum\limits_{i=0}^{F_{in}-1}w_{ij}x_i+b_j\right),
+            && \forall 0\le j<F_{out}
         \end{align*}
 
     Parameters
@@ -197,7 +197,7 @@ class DenseLayer(Layer):
         map indexes from this layer index to the input layer index size
     """
 
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         input_size,
         output_size,
@@ -218,12 +218,12 @@ def __init__(
 
     @property
     def weights(self):
-        """Return the matrix of node weights"""
+        """Return the matrix of node weights."""
         return self.__weights
 
     @property
     def biases(self):
-        """Return the vector of node biases"""
+        """Return the vector of node biases."""
         return self.__biases
 
     def __str__(self):
@@ -233,12 +233,12 @@ def __str__(self):
 
     def _eval(self, x):
         y = np.dot(x, self.__weights) + self.__biases
-        y = np.reshape(y, tuple(self.output_size))
-        return y
+        return np.reshape(y, tuple(self.output_size))
 
 
 class GNNLayer(DenseLayer):
-    r"""
+    r"""GNN Layer.
+
     We additionally introduce the following notations to describe the gnn layer:
 
     .. math::
@@ -255,7 +255,8 @@ class GNNLayer(DenseLayer):
     .. math::
 
         \begin{align*}
-            y_j = \sigma \left(\sum\limits_{i=0}^{F_{in}-1}A_{u,v}w_{ij}x_i+b_j\right), && \forall 0\le j<F_{out},
+            y_j = \sigma \left(\sum\limits_{i=0}^{F_{in}-1}A_{u,v}w_{ij}x_i+b_j\right),
+            && \forall 0\le j<F_{out},
         \end{align*}
 
 
@@ -264,10 +265,14 @@ class GNNLayer(DenseLayer):
     .. math::
 
         \begin{align*}
-            \mathbf{y_v} =\sigma\left(\mathbf{w_1^T}\mathbf{x_v}+\mathbf{w_2}^T\sum\limits_{u\in\mathcal N(v)}\mathbf{x_u}+\mathbf{b}\right)
+            \mathbf{y_v} =\sigma\left(\mathbf{w_1^T}\mathbf{x_v}+\mathbf{w_2}^T
+            \sum\limits_{u\in\mathcal N(v)}\mathbf{x_u}+\mathbf{b}\right)
         \end{align*}
 
-    If the graph structure is fixed, assume that it is a line graph with :math:`N=3` nodes, i.e., the adjacency matrix :math:`A=\begin{pmatrix}1 & 1 & 0\\1 & 1 & 1\\ 0 & 1 & 1\end{pmatrix}`. Then the corresponding GNN layer is defined with parameters:
+    If the graph structure is fixed, assume that it is a line graph with :math:`N=3`
+    nodes, i.e., the adjacency matrix
+    :math:`A=\begin{pmatrix}1 & 1 & 0\\1 & 1 & 1\\ 0 & 1 & 1\end{pmatrix}`.
+    Then the corresponding GNN layer is defined with parameters:
 
     .. math::
 
@@ -282,7 +287,8 @@ class GNNLayer(DenseLayer):
             \end{pmatrix}
         \end{align*}
 
-    Otherwise, if the input graph structure is not fixed, all weights and biases should be provided. In this case, the GNN layer is defined with parameters:
+    Otherwise, if the input graph structure is not fixed, all weights and biases should
+    be provided. In this case, the GNN layer is defined with parameters:
 
     .. math::
 
@@ -317,7 +323,7 @@ class GNNLayer(DenseLayer):
         map indexes from this layer index to the input layer index size
     """
 
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         input_size,
         output_size,
@@ -337,30 +343,34 @@ def __init__(
             input_index_mapper=input_index_mapper,
         )
         if input_size[-1] % N != 0:
-            raise ValueError(
-                "Input size must equal to the number of nodes multiplied by the number of input node features"
+            msg = (
+                "Input size must equal to the number of nodes multiplied by the number"
+                " of input node features"
             )
+            raise ValueError(msg)
         if output_size[-1] % N != 0:
-            raise ValueError(
-                "Output size must equal to the number of nodes multiplied by the number of output node features"
+            msg = (
+                "Output size must equal to the number of nodes multiplied by the number"
+                " of output node features"
             )
+            raise ValueError(msg)
         self.__N = N
         self.__gnn_input_size = input_size[-1] // N
         self.__gnn_output_size = output_size[-1] // N
 
     @property
     def N(self):
-        """Return the number of nodes in the graphs"""
+        """Return the number of nodes in the graphs."""
         return self.__N
 
     @property
     def gnn_input_size(self):
-        """Return the size of the input tensor in original GNN"""
+        """Return the size of the input tensor in original GNN."""
         return self.__gnn_input_size
 
     @property
     def gnn_output_size(self):
-        """Return the size of the output tensor in original GNN"""
+        """Return the size of the output tensor in original GNN."""
         return self.__gnn_output_size
 
     def __str__(self):
@@ -372,7 +382,6 @@ def _eval_with_adjacency(self, x, A):
             if self.input_index_mapper is not None
             else x[:]
         )
-        assert x_reshaped.shape == tuple(self.input_size)
         y = np.zeros(shape=self.output_size)
         for output_index in self.output_indexes:
             for input_index in self.input_indexes:
@@ -391,8 +400,7 @@ def _eval_with_adjacency(self, x, A):
 
 
 class Layer2D(Layer):
-    """
-    Abstract two-dimensional layer that downsamples values in a kernel to a single value.
+    """Abstract two-dimensional layer, downsamples values in a kernel to a single value.
 
     Parameters
     ----------
@@ -427,21 +435,22 @@ def __init__(
 
     @property
     def strides(self):
-        """Return the stride of the layer"""
+        """Return the stride of the layer."""
         return self.__strides
 
     @property
     def kernel_shape(self):
-        """Return the shape of the kernel"""
-        raise NotImplementedError()
+        """Return the shape of the kernel."""
+        raise NotImplementedError
 
     @property
     def kernel_depth(self):
-        """Return the depth of the kernel"""
-        raise NotImplementedError()
+        """Return the depth of the kernel."""
+        raise NotImplementedError
+
+    def kernel_index_with_input_indexes(self, out_d, out_r, out_c):  # noqa: ARG002
+        """Kernel index with input indexes.
 
-    def kernel_index_with_input_indexes(self, out_d, out_r, out_c):
-        """
         Returns an iterator over the index within the kernel and input index
         for the output at index `(out_d, out_r, out_c)`.
 
@@ -460,16 +469,12 @@ def kernel_index_with_input_indexes(self, out_d, out_r, out_c):
         start_in_d = 0
         start_in_r = out_r * rows_stride
         start_in_c = out_c * cols_stride
-        mapper = lambda x: x
-        if self.input_index_mapper is not None:
-            mapper = self.input_index_mapper
 
         for k_d in range(kernel_d):
             for k_r in range(kernel_r):
                 for k_c in range(kernel_c):
                     input_index = (start_in_d + k_d, start_in_r + k_r, start_in_c + k_c)
 
-                    assert len(input_index) == len(self.input_size)
                     # don't yield an out-of-bounds input index;
                     # can happen if ceil mode is enabled for pooling layers
                     # as this could require using a partial kernel
@@ -482,7 +487,8 @@ def kernel_index_with_input_indexes(self, out_d, out_r, out_c):
                     yield (k_d, k_r, k_c), input_index
 
     def get_input_index(self, out_index, kernel_index):
-        """
+        """Get input index.
+
         Returns the input index corresponding to the output at `out_index`
         and the kernel index `kernel_index`.
         """
@@ -492,13 +498,14 @@ def get_input_index(self, out_index, kernel_index):
         ):
             if kernel_index == candidate_kernel_index:
                 return input_index
+            msg = "No input index matching the given kernel index was found."
+        raise ValueError(msg)
 
     def _eval(self, x):
         y = np.empty(shape=self.output_size)
-        if len(self.output_size) != 3:
-            raise ValueError(
-                f"Output should have 3 dimensions but instead has {len(self.output_size)}"
-            )
+        if len(self.output_size) != OUTPUT_DIMENSIONS:
+            msg = f"Output should have 3 dimensions but has {len(self.output_size)}"
+            raise ValueError(msg)
         [depth, rows, cols] = list(self.output_size)
         for out_d in range(depth):
             for out_r in range(rows):
@@ -507,12 +514,11 @@ def _eval(self, x):
         return y
 
     def _eval_at_index(self, x, out_d, out_r, out_c):
-        raise NotImplementedError()
+        raise NotImplementedError
 
 
 class PoolingLayer2D(Layer2D):
-    """
-    Two-dimensional pooling layer.
+    """Two-dimensional pooling layer.
 
     Parameters
     ----------
@@ -533,9 +539,9 @@ class PoolingLayer2D(Layer2D):
         map indexes from this layer index to the input layer index size
     """
 
-    _POOL_FUNCTIONS = {"max": max}
+    _POOL_FUNCTIONS: ClassVar = {"max": max}
 
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         input_size,
         output_size,
@@ -555,25 +561,31 @@ def __init__(
             input_index_mapper=input_index_mapper,
         )
         if pool_func_name not in PoolingLayer2D._POOL_FUNCTIONS:
-            raise ValueError(
-                f"Allowable pool functions are {PoolingLayer2D._POOL_FUNCTIONS}, {pool_func_name} was provided."
+            msg = (
+                f"Allowable pool functions are {PoolingLayer2D._POOL_FUNCTIONS},"
+                f" {pool_func_name} was provided."
             )
+            raise ValueError(msg)
         self._pool_func_name = pool_func_name
         self._kernel_shape = kernel_shape
         self._kernel_depth = kernel_depth
 
     @property
     def kernel_shape(self):
-        """Return the shape of the kernel"""
+        """Return the shape of the kernel."""
         return self._kernel_shape
 
     @property
     def kernel_depth(self):
-        """Return the depth of the kernel"""
+        """Return the depth of the kernel."""
         return self._kernel_depth
 
     def __str__(self):
-        return f"PoolingLayer(input_size={self.input_size}, output_size={self.output_size}, strides={self.strides}, kernel_shape={self.kernel_shape}), pool_func_name={self._pool_func_name}"
+        return (
+            f"PoolingLayer(input_size={self.input_size}, output_size={self.output_size}"
+            f", strides={self.strides}, kernel_shape={self.kernel_shape}),"
+            f" pool_func_name={self._pool_func_name}"
+        )
 
     def _eval_at_index(self, x, out_d, out_r, out_c):
         vals = [
@@ -585,8 +597,7 @@ def _eval_at_index(self, x, out_d, out_r, out_c):
 
 
 class ConvLayer2D(Layer2D):
-    """
-    Two-dimensional convolutional layer.
+    """Two-dimensional convolutional layer.
 
     Parameters
     ----------
@@ -604,7 +615,7 @@ class ConvLayer2D(Layer2D):
         map indexes from this layer index to the input layer index size
     """
 
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         input_size,
         output_size,
@@ -624,7 +635,8 @@ def __init__(
         self.__kernel = kernel
 
     def kernel_with_input_indexes(self, out_d, out_r, out_c):
-        """
+        """Kernel with input indexes.
+
         Returns an iterator over the kernel value and input index
         for the output at index `(out_d, out_r, out_c)`.
 
@@ -645,21 +657,24 @@ def kernel_with_input_indexes(self, out_d, out_r, out_c):
 
     @property
     def kernel_shape(self):
-        """Return the shape of the cross-correlation kernel"""
+        """Return the shape of the cross-correlation kernel."""
         return self.__kernel.shape[2:]
 
     @property
     def kernel_depth(self):
-        """Return the depth of the cross-correlation kernel"""
+        """Return the depth of the cross-correlation kernel."""
         return self.__kernel.shape[1]
 
     @property
     def kernel(self):
-        """Return the cross-correlation kernel"""
+        """Return the cross-correlation kernel."""
         return self.__kernel
 
     def __str__(self):
-        return f"ConvLayer(input_size={self.input_size}, output_size={self.output_size}, strides={self.strides}, kernel_shape={self.kernel_shape})"
+        return (
+            f"ConvLayer(input_size={self.input_size}, output_size={self.output_size},"
+            f" strides={self.strides}, kernel_shape={self.kernel_shape})"
+        )
 
     def _eval_at_index(self, x, out_d, out_r, out_c):
         acc = 0.0
@@ -669,8 +684,7 @@ def _eval_at_index(self, x, out_d, out_r, out_c):
 
 
 class IndexMapper:
-    """
-    Map indexes from one layer to the other.
+    """Map indexes from one layer to the other.
 
     Parameters
     ----------
@@ -686,12 +700,12 @@ def __init__(self, input_size, output_size):
 
     @property
     def input_size(self):
-        """Return the size of the input tensor"""
+        """Return the size of the input tensor."""
         return self.__input_size
 
     @property
     def output_size(self):
-        """Return the size of the output tensor"""
+        """Return the size of the output tensor."""
         return self.__output_size
 
     def __call__(self, index):
diff --git a/src/omlt/neuralnet/layers/__init__.py b/src/omlt/neuralnet/layers/__init__.py
index f3699c3d..ca8ac0e1 100644
--- a/src/omlt/neuralnet/layers/__init__.py
+++ b/src/omlt/neuralnet/layers/__init__.py
@@ -1,5 +1,7 @@
-r"""
-Since OMLT builds layer and activation functions in layer level, we ignore the layer index and use the following notations to describe the :math:`l`-th layer:
+r"""Neural network layers.
+
+Since OMLT builds layer and activation functions in layer level, we ignore the layer
+index and use the following notations to describe the :math:`l`-th layer:
 
 .. math::
 
@@ -16,3 +18,9 @@
 
 from .full_space import full_space_conv2d_layer, full_space_dense_layer
 from .reduced_space import reduced_space_dense_layer
+
+__all__ = [
+    "full_space_conv2d_layer",
+    "full_space_dense_layer",
+    "reduced_space_dense_layer",
+]
diff --git a/src/omlt/neuralnet/layers/full_space.py b/src/omlt/neuralnet/layers/full_space.py
index 8970bc69..978f1ce2 100644
--- a/src/omlt/neuralnet/layers/full_space.py
+++ b/src/omlt/neuralnet/layers/full_space.py
@@ -1,14 +1,12 @@
-import numpy as np
 import pyomo.environ as pyo
 from pyomo.contrib.fbbt.fbbt import compute_bounds_on_expr
 
 from omlt.neuralnet.activations import NON_INCREASING_ACTIVATIONS
-from omlt.neuralnet.layer import ConvLayer2D, IndexMapper, PoolingLayer2D
+from omlt.neuralnet.layer import ConvLayer2D, PoolingLayer2D
 
 
 def full_space_dense_layer(net_block, net, layer_block, layer):
-    r"""
-    Add full-space formulation of the dense layer to the block:
+    r"""Add full-space formulation of the dense layer to the block.
 
     .. math::
 
@@ -38,8 +36,7 @@ def dense_layer(b, *output_index):
 
 
 def full_space_gnn_layer(net_block, net, layer_block, layer):
-    r"""
-    We additionally introduce the following notations to describe the gnn layer:
+    r"""We additionally introduce the following notations to describe the gnn layer.
 
     .. math::
 
@@ -57,7 +54,8 @@ def full_space_gnn_layer(net_block, net, layer_block, layer):
     .. math::
 
         \begin{align*}
-            y_j &= \sum_{i=0}^{F_{in}-1} w_{ij} \bar x_{i,v} + b_j,  && \forall 0\le j<F_{out} \\
+            y_j &= \sum_{i=0}^{F_{in}-1} w_{ij} \bar x_{i,v} + b_j,
+            && \forall 0\le j<F_{out} \\
             \bar x_{i,v} &= A_{u,v} x_{i}, && \forall 0\le i<F_{in},~ 0\le v<N
         \end{align*}
 
@@ -83,7 +81,6 @@ def full_space_gnn_layer(net_block, net, layer_block, layer):
             \end{cases}
         \end{align*}
     """
-
     input_layer, input_layer_block = _input_layer_and_block(net_block, net, layer)
 
     input_layer_block.zbar = pyo.Var(
@@ -131,9 +128,7 @@ def full_space_gnn_layer(net_block, net, layer_block, layer):
                 local_index, output_node_index
             ] = input_layer_block.zbar[
                 local_index, output_node_index
-            ] >= input_layer_block.z[
-                input_index
-            ] - ub * (
+            ] >= input_layer_block.z[input_index] - ub * (
                 1.0 - net_block.A[input_node_index, output_node_index]
             )
 
@@ -141,9 +136,7 @@ def full_space_gnn_layer(net_block, net, layer_block, layer):
                 local_index, output_node_index
             ] = input_layer_block.zbar[
                 local_index, output_node_index
-            ] <= input_layer_block.z[
-                input_index
-            ] - lb * (
+            ] <= input_layer_block.z[input_index] - lb * (
                 1.0 - net_block.A[input_node_index, output_node_index]
             )
 
@@ -182,16 +175,16 @@ def gnn_layer(b, *output_index):
 
 
 def full_space_conv2d_layer(net_block, net, layer_block, layer):
-    r"""
-    Add full-space formulation of the 2-D convolutional layer to the block
+    r"""Add full-space formulation of the 2-D convolutional layer to the block.
 
     A 2-D convolutional layer applies cross-correlation kernels to a 2-D input.
-    Specifically, the input is convolved by sliding the kernels along the input vertically and horizontally.
-    At each location, the preactivation is computed as the dot product of the kernel weights and the input plus a bias term.
+    Specifically, the input is convolved by sliding the kernels along the input
+    vertically and horizontally. At each location, the preactivation is computed
+    as the dot product of the kernel weights and the input plus a bias term.
 
     """
-    # If activation is an increasing function,
-    #  move it onto successor max pooling layer (if it exists) for tighter max pooling formulation
+    # If activation is an increasing function, move it onto successor max pooling layer
+    # (if it exists) for tighter max pooling formulation
     succ_layers = list(net.successors(layer))
     succ_layer = succ_layers[0] if len(succ_layers) == 1 else None
     if (
@@ -199,18 +192,19 @@ def full_space_conv2d_layer(net_block, net, layer_block, layer):
         and layer.activation not in NON_INCREASING_ACTIVATIONS
         and layer.activation != "linear"
     ):
-        # activation applied after convolution layer, so there shouldn't be an activation after max pooling too
+        # activation applied after convolution layer, so there shouldn't be an
+        # activation after max pooling too
         if succ_layer.activation != "linear":
-            raise ValueError(
-                f"Activation is applied after convolution layer, but the successor max pooling layer {succ_layer} has an activation function also."
+            msg = (
+                "Activation is applied after convolution layer, but the successor max"
+                f"pooling layer {succ_layer} has an activation function also."
             )
+            raise ValueError(msg)
         succ_layer.activation = layer.activation
         layer.activation = "linear"
 
     input_layer, input_layer_block = _input_layer_and_block(net_block, net, layer)
 
-    # for out_d, out_r, out_c in layer.output_indexes:
-    #   output_index = (out_d, out_r, out_c)
     @layer_block.Constraint(layer.output_indexes)
     def convolutional_layer(b, *output_index):
         out_d, out_r, out_c = output_index
@@ -221,55 +215,63 @@ def convolutional_layer(b, *output_index):
         lb, ub = compute_bounds_on_expr(expr)
         layer_block.zhat[output_index].setlb(lb)
         layer_block.zhat[output_index].setub(ub)
-        # layer_block.constraints.add(layer_block.zhat[output_index] == expr)
         return layer_block.zhat[output_index] == expr
 
 
 def full_space_maxpool2d_layer(net_block, net, layer_block, layer):
-    r"""
-    Add Big-M max pooling formulation.
+    r"""Add Big-M max pooling formulation.
 
     .. math::
 
         \begin{align*}
             \hat{z_i} \leq w\cdot x_{i}^{l} + \sum_{k{=}1}^{d} M_{i}^{l,k} q_{i}^{k} &&
             \forall i \in N,\ \forall l \in \{ 1,...,d \} \\
-            \hat{z_i} \geq w\cdot x_{i}^{l} && \forall i \in N,\ \forall l \in \{ 1,...,d \} \\
-            (x_{i},\hat{z_i},q_{i}) \in [L_{i},U_{i}] \times \mathbb{R} \times \Delta^{d} && \forall i \in N \\
+            \hat{z_i} \geq w\cdot x_{i}^{l} && \forall i \in N,\ \\
+            \forall l \in \{ 1,...,d \} \\
+            (x_{i},\hat{z_i},q_{i}) \in [L_{i},U_{i}] \times \mathbb{R} \\
+                \times \Delta^{d} && \forall i \in N \\
             q_{i} \in \{ 0,1 \}^{d} && \forall i \in N \\
             M_{i}^{l,k} = w\cdot max\{ L_{i}^{l} - L_{i}^{k},
             \\ L_{i}^{l} - U_{i}^{k}, U_{i}^{l} - L_{i}^{k}, U_{i}^{l} - U_{i}^{k} \}
-            && \forall i \in N,\ \forall l \in \{ 1,...,d \},\ \forall k \in \{ 1,...,d \}
+            && \forall i \in N,\ \forall l \in \{ 1,...,d \},\ \\
+                \forall k \in \{ 1,...,d \}
         \end{align*}
 
-    where :math:`w` is the convolution kernel on the preceding convolutional layer; :math:`d` is the number of features
-    in each of the :math:`N` max pooling windows;
-    :math:`x_{i}` is the set of :math:`d` features in the :math:`i`-th max pooling window;
-    :math:`\Delta^{d}` is the :math:`d`-dimensional simplex; and [L_{i},U_{i}] are the bounds on x_{i}.
+    where:
 
-    NOTE This formulation is adapted from the Anderson et al. (2020) formulation, section 5.1, with the following changes:
+    :math:`w` is the convolution kernel on the preceding convolutional layer;
+    :math:`d` is the number of features in each of the :math:`N` max pooling windows;
+    :math:`x_{i}` is the set of :math:`d` features in the :math:`i`-th max pooling
+    window;
+    :math:`\Delta^{d}` is the :math:`d`-dimensional simplex; and [L_{i},U_{i}] are the
+    bounds on x_{i}.
 
-    - OMLT presently does not support biases on convolutional layers. Bias terms from the original formulation
-      are removed.
+    NOTE This formulation is adapted from the Anderson et al. (2020) formulation,
+    section 5.1, with the following changes:
 
-    - The original formulation formulates the max of :math:`w^{l}\cdot x + b^{l}`, varying the weights :math:`w`
-      and biases :math:`b` and keeping the input :math:`x` constant. Since convolutional layers have constant weights
-      and biases convolved with varying portions of the feature map, this formulation formulates the max of
+    - OMLT presently does not support biases on convolutional layers. Bias terms from
+      the original formulation are removed.
+    - The original formulation formulates the max of :math:`w^{l}\cdot x + b^{l}`,
+      varying the weights :math:`w` and biases :math:`b` and keeping the input :math:`x`
+      constant. Since convolutional layers have constant weights and biases convolved
+      with varying portions of the feature map, this formulation formulates the max of
       :math:`w\cdot x^{l} + b`.
-
     - Due to the above 2 changes, the calculation of :math:`N^{l,k}` is changed.
 
     """
     input_layer, input_layer_block = _input_layer_and_block(net_block, net, layer)
     if not isinstance(input_layer, ConvLayer2D):
-        raise TypeError("Input layer must be a ConvLayer2D.")
+        msg = "Input layer must be a ConvLayer2D."
+        raise TypeError(msg)
     if input_layer.activation != "linear":
-        raise ValueError(
-            "Non-increasing activation functions on the preceding convolutional layer are not supported."
+        msg = (
+            "Non-increasing activation functions on the preceding convolutional layer"
+            " are not supported."
         )
-    # TODO - add support for non-increasing activation functions on preceding convolutional layer
+        raise ValueError(msg)
 
-    # note kernel indexes are the same set of values for any output index, so wlog get kernel indexes for (0, 0, 0)
+    # note kernel indexes are the same set of values for any output index, so wlog get
+    # kernel indexes for (0, 0, 0)
     layer_block._kernel_indexes = pyo.Set(
         initialize=(
             kernel_index
@@ -291,7 +293,8 @@ def full_space_maxpool2d_layer(net_block, net, layer_block, layer):
         out_d, out_r, out_c = output_index
 
         # cannot compute an expr for the max,
-        # as pyomo expressions cannot contain functions whose output depends on a comparison (except piecewise linear functions)
+        # as pyomo expressions cannot contain functions whose output depends on a
+        # comparison (except piecewise linear functions)
         # so compute lb and ub directly
         bounds = (
             input_layer_block.z[layer.input_index_mapper(input_index)].bounds
@@ -303,33 +306,40 @@ def full_space_maxpool2d_layer(net_block, net, layer_block, layer):
         layer_block.zhat[output_index].setlb(max(lbs))
         layer_block.zhat[output_index].setub(max(ubs))
 
-        layer_block._q_sum_maxpool[output_index] = 1 == sum(
-            layer_block.q_maxpool[output_index, k] for k in layer_block._kernel_indexes
+        layer_block._q_sum_maxpool[output_index] = (
+            sum(
+                layer_block.q_maxpool[output_index, k]
+                for k in layer_block._kernel_indexes
+            )
+            == 1
         )
 
-        for l, input_index in layer.kernel_index_with_input_indexes(
+        for layer_index, input_index in layer.kernel_index_with_input_indexes(
             out_d, out_r, out_c
         ):
-            input_index = layer.input_index_mapper(input_index)
+            mapped_input_index = layer.input_index_mapper(input_index)
 
             # Since biases are zero,
             # input_layer_block.z[input_index] is equal to w dot x in the formulation.
-            layer_block._zhat_upper_bound[output_index, l] = layer_block.zhat[
+            layer_block._zhat_upper_bound[output_index, layer_index] = layer_block.zhat[
                 output_index
-            ] <= input_layer_block.z[input_index] + sum(
+            ] <= input_layer_block.z[mapped_input_index] + sum(
                 layer_block.q_maxpool[output_index, k]
-                * _calculate_n_plus(output_index, l, k, layer, input_layer_block)
+                * _calculate_n_plus(
+                    output_index, layer_index, k, layer, input_layer_block
+                )
                 for k in layer_block._kernel_indexes
             )
-            layer_block._zhat_lower_bound[output_index, l] = (
-                layer_block.zhat[output_index] >= input_layer_block.z[input_index]
+            layer_block._zhat_lower_bound[output_index, layer_index] = (
+                layer_block.zhat[output_index]
+                >= input_layer_block.z[mapped_input_index]
             )
 
 
-def _calculate_n_plus(out_index, l, k, layer, input_layer_block):
-    if l == k:
+def _calculate_n_plus(out_index, kernel_index, k, layer, input_layer_block):
+    if kernel_index == k:
         return 0
-    x_l_index = layer.input_index_mapper(layer.get_input_index(out_index, l))
+    x_l_index = layer.input_index_mapper(layer.get_input_index(out_index, kernel_index))
     x_k_index = layer.input_index_mapper(layer.get_input_index(out_index, k))
     return max(
         x_k_bound - x_l_bound
@@ -341,7 +351,8 @@ def _calculate_n_plus(out_index, l, k, layer, input_layer_block):
 def _input_layer_and_block(net_block, net, layer):
     input_layers = list(net.predecessors(layer))
     if len(input_layers) != 1:
-        raise ValueError("Multiple input layers are not currently supported.")
+        msg = "Multiple input layers are not currently supported."
+        raise ValueError(msg)
     input_layer = input_layers[0]
     input_layer_block = net_block.layer[id(input_layer)]
     return input_layer, input_layer_block
diff --git a/src/omlt/neuralnet/layers/partition_based.py b/src/omlt/neuralnet/layers/partition_based.py
index f29cadd2..f5d7d06c 100644
--- a/src/omlt/neuralnet/layers/partition_based.py
+++ b/src/omlt/neuralnet/layers/partition_based.py
@@ -4,8 +4,7 @@
 
 
 def default_partition_split_func(w, n):
-    r"""
-    Default function for partitioning weights in :math:`w` into :math:`n` partitions.
+    r"""Default function to partition weights in :math:`w` into :math:`n` partitions.
 
     Weights in :math:`w` are sorted and partitioned evenly.
 
@@ -15,16 +14,16 @@ def default_partition_split_func(w, n):
     return np.array_split(sorted_indexes, n)
 
 
-def partition_based_dense_relu_layer(net_block, net, layer_block, layer, split_func):
-    r"""
-    Partition-based ReLU activation formulation.
+def partition_based_dense_relu_layer(net_block, net, layer_block, layer, split_func):  # noqa: C901, PLR0915
+    r"""Partition-based ReLU activation formulation.
 
     Generates the constraints for the ReLU activation function:
 
     .. math::
 
         \begin{align*}
-            y_j = \max\left(0,\sum\limits_{i=0}^{F_{in}-1}w_{ij}x_i+b_j\right), && \forall 0\le j<F_{out}
+            y_j = \max\left(0,\sum\limits_{i=0}^{F_{in}-1}w_{ij}x_i+b_j\right),
+                && \forall 0\le j<F_{out}
         \end{align*}
 
     We additionally introduce the following notations to describe this formulation:
@@ -34,14 +33,16 @@ def partition_based_dense_relu_layer(net_block, net, layer_block, layer, split_f
         \begin{align*}
             n       &:= \text{the number of partitions}\\
             S_k     &:=  \text{indexes of the $k$-th partition satisfying:} \\
-                    & \quad\quad \bigcup\limits_{k=0}^{n-1} S_k=\{0,1,\dots,F_{in}-1\},~S_{k_1}\cap S_{k_2}=\emptyset, ~\forall k_1\neq k_2\\
+                    & \quad\quad \bigcup\limits_{k=0}^{n-1} S_k=\{0,1,\dots,F_{in}-1\},
+                    ~S_{k_1}\cap S_{k_2}=\emptyset, ~\forall k_1\neq k_2\\
             \sigma  &:= \text{if this activation function is activated, i.e.,}\\
                     & \quad\quad y_j=
                     \begin{cases}
                         0, & \sigma=1\\
                         \sum\limits_{i=0}^{F_{in}-1}w_{ij}x_i+b_j, & \sigma=0
                     \end{cases}\\
-            p_k     &:=\text{auxiliary variable representing the $k$-th partition, i.e., $\sum\limits_{i\in S_k}w_{ij}x_i$}\\
+            p_k     &:=\text{auxiliary variable representing the $k$-th partition,
+                    i.e., $\sum\limits_{i\in S_k}w_{ij}x_i$}\\
             l_k     &:=\text{the lower bound of $\sum\limits_{i\in S_k}w_{ij}x_i$}\\
             u_k     &:=\text{the upper bound of $\sum\limits_{i\in S_k}w_{ij}x_i$}
         \end{align*}
@@ -53,9 +54,11 @@ def partition_based_dense_relu_layer(net_block, net, layer_block, layer, split_f
 
         \begin{align*}
             & y_j=\sum\limits_{k=0}^{n-1}p_k+(1-\sigma)b_j\\
-            & \sum\limits_{k=0}^{n-1}\left(\sum\limits_{i\in S_k}w_{ij}x_i-p_k\right)+\sigma b_j\le 0\\
+            & \sum\limits_{k=0}^{n-1}\left(\sum\limits_{i\in S_k}w_{ij}x_i-p_k\right)
+                +\sigma b_j\le 0\\
             & \sum\limits_{k=0}^{n-1}p_k+(1-\sigma)b_j\ge 0\\
-            & \sigma l_k\le \sum\limits_{i\in S_k}w_{ij}x_i-p_k\le \sigma u_k,~0\le k<n\\
+            & \sigma l_k\le \sum\limits_{i\in S_k}w_{ij}x_i-p_k
+                \le \sigma u_k,~0\le k<n\\
             & (1-\sigma)l_k\le p_k\le (1-\sigma)u_k,~0\le k<n
         \end{align*}
 
@@ -63,16 +66,16 @@ def partition_based_dense_relu_layer(net_block, net, layer_block, layer, split_f
     # not an input layer, process the expressions
     prev_layers = list(net.predecessors(layer))
     if len(prev_layers) == 0:
-        raise ValueError(
-            f"Layer {layer} is not an input layer, but has no predecessors."
-        )
-    elif len(prev_layers) > 1:
-        raise ValueError(f"Layer {layer} has multiple predecessors.")
+        msg = f"Layer {layer} is not an input layer, but has no predecessors."
+        raise ValueError(msg)
+    if len(prev_layers) > 1:
+        msg = f"Layer {layer} has multiple predecessors."
+        raise ValueError(msg)
     prev_layer = prev_layers[0]
     prev_layer_block = net_block.layer[id(prev_layer)]
 
     @layer_block.Block(layer.output_indexes)
-    def output_node_block(b, *output_index):
+    def output_node_block(b, *output_index):  # noqa: PLR0915
         # dense layers multiply only the last dimension of
         # their inputs
         weights = layer.weights[:, output_index[-1]]
@@ -100,19 +103,18 @@ def output_node_block(b, *output_index):
             for split_local_index in splits[split_index]:
                 _, local_index = input_layer_indexes[split_local_index]
 
-                if mapper:
-                    input_index = mapper(local_index)
-                else:
-                    input_index = local_index
+                input_index = mapper(local_index) if mapper else local_index
 
                 w = weights[local_index[-1]]
                 expr += prev_layer_block.z[input_index] * w
 
             lb, ub = compute_bounds_on_expr(expr)
             if lb is None:
-                raise ValueError("Expression is unbounded below.")
+                msg = "Expression is unbounded below."
+                raise ValueError(msg)
             if ub is None:
-                raise ValueError("Expression is unbounded above.")
+                msg = "Expression is unbounded above."
+                raise ValueError(msg)
 
             z2 = b.z2[split_index]
             z2.setlb(min(0, lb))
@@ -133,9 +135,11 @@ def output_node_block(b, *output_index):
 
         lb, ub = compute_bounds_on_expr(expr)
         if lb is None:
-            raise ValueError("Expression is unbounded below.")
+            msg = "Expression is unbounded below."
+            raise ValueError(msg)
         if ub is None:
-            raise ValueError("Expression is unbounded above.")
+            msg = "Expression is unbounded above."
+            raise ValueError(msg)
 
         layer_block.z[output_index].setlb(0)
         layer_block.z[output_index].setub(max(0, ub))
@@ -144,10 +148,7 @@ def output_node_block(b, *output_index):
         for split_index in range(num_splits):
             for split_local_index in splits[split_index]:
                 _, local_index = input_layer_indexes[split_local_index]
-                if mapper:
-                    input_index = mapper(local_index)
-                else:
-                    input_index = local_index
+                input_index = mapper(local_index) if mapper else local_index
 
                 w = weights[local_index[-1]]
                 eq_13_expr += prev_layer_block.z[input_index] * w
diff --git a/src/omlt/neuralnet/layers/reduced_space.py b/src/omlt/neuralnet/layers/reduced_space.py
index 95d1f97f..9160f87e 100644
--- a/src/omlt/neuralnet/layers/reduced_space.py
+++ b/src/omlt/neuralnet/layers/reduced_space.py
@@ -1,6 +1,5 @@
 def reduced_space_dense_layer(net_block, net, layer_block, layer, activation):
-    r"""
-    Add reduced-space formulation of the dense layer to the block
+    r"""Add reduced-space formulation of the dense layer to the block.
 
     .. math::
 
@@ -12,11 +11,11 @@ def reduced_space_dense_layer(net_block, net, layer_block, layer, activation):
     # not an input layer, process the expressions
     prev_layers = list(net.predecessors(layer))
     if len(prev_layers) == 0:
-        raise ValueError(
-            f"Layer {layer} is not an input layer, but has no predecessors."
-        )
-    elif len(prev_layers) > 1:
-        raise ValueError(f"Layer {layer} has multiple predecessors.")
+        msg = f"Layer {layer} is not an input layer, but has no predecessors."
+        raise ValueError(msg)
+    if len(prev_layers) > 1:
+        msg = f"Layer {layer} has multiple predecessors."
+        raise ValueError(msg)
     prev_layer = prev_layers[0]
     prev_layer_block = net_block.layer[id(prev_layer)]
 
diff --git a/src/omlt/neuralnet/network_definition.py b/src/omlt/neuralnet/network_definition.py
index aeef22eb..783f0c76 100644
--- a/src/omlt/neuralnet/network_definition.py
+++ b/src/omlt/neuralnet/network_definition.py
@@ -7,7 +7,8 @@ class NetworkDefinition:
     def __init__(
         self, scaling_object=None, scaled_input_bounds=None, unscaled_input_bounds=None
     ):
-        """
+        """Network Definition.
+
         Create a network definition object used to create the neural network
         formulation in Pyomo
 
@@ -26,7 +27,7 @@ def __init__(
               parameter will be generated using the scaling object.
               If None, then no bounds are specified.
         """
-        self.__layers_by_id = dict()
+        self.__layers_by_id = {}
         self.__graph = nx.DiGraph()
         self.__scaling_object = scaling_object
 
@@ -41,10 +42,11 @@ def __init__(
                 )
 
                 scaled_input_bounds = {
-                    k: (lbs[k], ubs[k]) for k in unscaled_input_bounds.keys()
+                    k: (lbs[k], ubs[k]) for k in unscaled_input_bounds
                 }
 
-            # If unscaled input bounds provided and no scaler provided, scaled input bounds = unscaled input bounds
+            # If unscaled input bounds provided and no scaler provided,
+            # scaled input bounds = unscaled input bounds
             elif unscaled_input_bounds is not None and scaling_object is None:
                 scaled_input_bounds = unscaled_input_bounds
 
@@ -52,8 +54,7 @@ def __init__(
         self.__scaled_input_bounds = scaled_input_bounds
 
     def add_layer(self, layer):
-        """
-        Add a layer to the network.
+        """Add a layer to the network.
 
         Parameters
         ----------
@@ -65,8 +66,7 @@ def add_layer(self, layer):
         self.__graph.add_node(layer_id)
 
     def add_edge(self, from_layer, to_layer):
-        """
-        Add an edge between two layers.
+        """Add an edge between two layers.
 
         Parameters
         ----------
@@ -78,69 +78,85 @@ def add_edge(self, from_layer, to_layer):
         id_to = id(to_layer)
         id_from = id(from_layer)
         if id_to not in self.__layers_by_id:
-            raise ValueError(f"Inbound layer {to_layer} not found in network.")
+            msg = f"Inbound layer {to_layer} not found in network."
+            raise ValueError(msg)
         if id_from not in self.__layers_by_id:
-            raise ValueError(f"Outbound layer {from_layer} not found in network.")
+            msg = f"Outbound layer {from_layer} not found in network."
+            raise ValueError(msg)
         self.__graph.add_edge(id_from, id_to)
 
     @property
     def scaling_object(self):
-        """Return an instance of the scaling object that supports the ScalingInterface"""
+        """Return an instance of the scaling object supporting the ScalingInterface."""
         return self.__scaling_object
 
     @property
     def scaled_input_bounds(self):
-        """Return a dict of tuples containing lower and upper bounds of neural network inputs"""
+        """Scaled Input Bounds.
+
+        Return a dict of tuples containing lower and upper bounds of neural network
+        inputs.
+        """
         return self.__scaled_input_bounds
 
     @property
     def unscaled_input_bounds(self):
-        """Return a dict of tuples containing lower and upper bounds of unscaled neural network inputs"""
+        """Unscaled Input Bounds.
+
+        Return a dict of tuples containing lower and upper bounds of unscaled neural
+        network inputs.
+        """
         return self.__unscaled_input_bounds
 
     @property
     def input_layers(self):
-        """Return an iterator over the input layers"""
+        """Return an iterator over the input layers."""
         for layer_id, in_degree in self.__graph.in_degree():
             if in_degree == 0:
                 yield self.__layers_by_id[layer_id]
 
     @property
     def input_nodes(self):
-        """An alias for input_layers"""
+        """An alias for input_layers."""
         return self.input_layers
 
     @property
     def output_layers(self):
-        """Return an iterator over the output layer"""
+        """Return an iterator over the output layer."""
         for layer_id, out_degree in self.__graph.out_degree():
             if out_degree == 0:
                 yield self.__layers_by_id[layer_id]
 
     @property
     def output_nodes(self):
-        """An alias for output_layers"""
+        """An alias for output_layers."""
         return self.output_layers
 
     def layer(self, layer_id):
-        """Return the layer with the given id"""
+        """Return the layer with the given id."""
         return self.__layers_by_id[layer_id]
 
     @property
     def layers(self):
-        """Return an iterator over all the layers"""
+        """Return an iterator over all the layers."""
         for layer_id in nx.topological_sort(self.__graph):
             yield self.__layers_by_id[layer_id]
 
     def predecessors(self, layer):
-        """Return an iterator over the layers with outbound connections into the layer"""
+        """Predecessors.
+
+        Return an iterator over the layers with outbound connections into the layer.
+        """
         if isinstance(layer, Layer):
             layer = id(layer)
         for node_id in self.__graph.predecessors(layer):
             yield self.__layers_by_id[node_id]
 
     def successors(self, layer):
-        """Return an iterator over the layers with an inbound connection from the layer"""
+        """Successors.
+
+        Return an iterator over the layers with an inbound connection from the layer.
+        """
         if isinstance(layer, Layer):
             layer = id(layer)
         for node_id in self.__graph.successors(layer):
diff --git a/src/omlt/neuralnet/nn_formulation.py b/src/omlt/neuralnet/nn_formulation.py
index b0461aa2..b9ba0adf 100644
--- a/src/omlt/neuralnet/nn_formulation.py
+++ b/src/omlt/neuralnet/nn_formulation.py
@@ -1,4 +1,5 @@
-import numpy as np
+from functools import partial
+
 import pyomo.environ as pyo
 
 from omlt.formulation import _PyomoFormulation, _setup_scaled_inputs_outputs
@@ -57,10 +58,12 @@ def _ignore_input_layer():
     "tanh": tanh_activation_constraint,
 }
 
+MULTI_INPUTS_UNSUPPORTED = "Multiple input layers are not currently supported."
+MULTI_OUTPUTS_UNSUPPORTED = "Multiple output layers are not currently supported."
+
 
 class FullSpaceNNFormulation(_PyomoFormulation):
-    """
-    This class is the entry-point to build neural network formulations.
+    """This class is the entry-point to build neural network formulations.
 
     This class iterates over all nodes in the neural network and for
     each one them, generates the constraints to represent the layer
@@ -96,10 +99,10 @@ def __init__(
 
         network_inputs = list(self.__network_definition.input_nodes)
         if len(network_inputs) != 1:
-            raise ValueError("Multiple input layers are not currently supported.")
+            raise ValueError(MULTI_INPUTS_UNSUPPORTED)
         network_outputs = list(self.__network_definition.output_nodes)
         if len(network_outputs) != 1:
-            raise ValueError("Multiple output layers are not currently supported.")
+            raise ValueError(MULTI_OUTPUTS_UNSUPPORTED)
 
     def _supported_default_layer_constraints(self):
         return _DEFAULT_LAYER_CONSTRAINTS
@@ -124,7 +127,7 @@ def input_indexes(self):
         """The indexes of the formulation inputs."""
         network_inputs = list(self.__network_definition.input_nodes)
         if len(network_inputs) != 1:
-            raise ValueError("Multiple input layers are not currently supported.")
+            raise ValueError(MULTI_INPUTS_UNSUPPORTED)
         return network_inputs[0].input_indexes
 
     @property
@@ -132,15 +135,14 @@ def output_indexes(self):
         """The indexes of the formulation output."""
         network_outputs = list(self.__network_definition.output_nodes)
         if len(network_outputs) != 1:
-            raise ValueError("Multiple output layers are not currently supported.")
+            raise ValueError(MULTI_OUTPUTS_UNSUPPORTED)
         return network_outputs[0].output_indexes
 
 
-def _build_neural_network_formulation(
+def _build_neural_network_formulation(  # noqa: C901
     block, network_structure, layer_constraints, activation_constraints
 ):
-    """
-    Adds the neural network formulation to the given Pyomo block.
+    """Adds the neural network formulation to the given Pyomo block.
 
     Parameters
     ----------
@@ -183,27 +185,21 @@ def layer(b, layer_id):
 
         layer_constraints_func = layer_constraints.get(type(layer), None)
         if layer_constraints_func is None:
-            raise ValueError(
-                "Layer type {} is not supported by this formulation.".format(
-                    type(layer)
-                )
-            )
+            msg = f"Layer type {type(layer)} is not supported by this formulation."
+            raise ValueError(msg)
         layer_constraints_func(block, net, layer_block, layer)
 
         activation_constraints_func = activation_constraints.get(layer.activation, None)
         if activation_constraints_func is None:
-            raise ValueError(
-                "Activation {} is not supported by this formulation.".format(
-                    layer.activation
-                )
-            )
+            msg = f"Activation {layer.activation} is not supported by this formulation."
+            raise ValueError(msg)
         activation_constraints_func(block, net, layer_block, layer)
 
     # setup input variables constraints
     # currently only support a single input layer
     input_layers = list(net.input_layers)
     if len(input_layers) != 1:
-        raise ValueError("Multiple input layers are not currently supported.")
+        raise ValueError(MULTI_INPUTS_UNSUPPORTED)
     input_layer = input_layers[0]
 
     @block.Constraint(input_layer.output_indexes)
@@ -214,7 +210,7 @@ def input_assignment(b, *output_index):
     # currently only support a single output layer
     output_layers = list(net.output_layers)
     if len(output_layers) != 1:
-        raise ValueError("Multiple output layers are not currently supported.")
+        raise ValueError(MULTI_OUTPUTS_UNSUPPORTED)
     output_layer = output_layers[0]
 
     @block.Constraint(output_layer.output_indexes)
@@ -226,7 +222,8 @@ def output_assignment(b, *output_index):
 
 class FullSpaceSmoothNNFormulation(FullSpaceNNFormulation):
     def __init__(self, network_structure):
-        """
+        """Full Space Smooth Neural Network Formulation.
+
         This class is used for building "full-space" formulations of
         neural network models composed of smooth activations (e.g., tanh,
         sigmoid, etc.)
@@ -249,7 +246,8 @@ def _supported_default_activation_constraints(self):
 
 class ReluBigMFormulation(FullSpaceNNFormulation):
     def __init__(self, network_structure):
-        """
+        """Relu Big-M Formulation.
+
         This class is used for building "full-space" formulations of
         neural network models composed of relu activations using a
         big-M formulation
@@ -270,7 +268,8 @@ def _supported_default_activation_constraints(self):
 
 class ReluComplementarityFormulation(FullSpaceNNFormulation):
     def __init__(self, network_structure):
-        """
+        """Relu Complementarity Formulation.
+
         This class is used for building "full-space" formulations of
         neural network models composed of relu activations using
         a complementarity formulation (smooth represenation)
@@ -290,7 +289,8 @@ def _supported_default_activation_constraints(self):
 
 
 class ReducedSpaceNNFormulation(_PyomoFormulation):
-    """
+    """Reduced Space Neural Network Formulation.
+
     This class is used to build reduced-space formulations
     of neural networks.
 
@@ -309,23 +309,18 @@ def __init__(self, network_structure, activation_functions=None):
         self.__scaling_object = network_structure.scaling_object
         self.__scaled_input_bounds = network_structure.scaled_input_bounds
 
-        # TODO: look into increasing support for other layers / activations
-        # self._layer_constraints = {**_DEFAULT_LAYER_CONSTRAINTS, **layer_constraints}
         self._activation_functions = dict(
             self._supported_default_activation_functions()
         )
         if activation_functions is not None:
             self._activation_functions.update(activation_functions)
 
-        # If we want to do network input/output validation at initialize time instead
-        # of build time, as it is for FullSpaceNNFormulation:
-        #
-        # network_inputs = list(self.__network_definition.input_nodes)
-        # if len(network_inputs) != 1:
-        #     raise ValueError("Multiple input layers are not currently supported.")
-        # network_outputs = list(self.__network_definition.output_nodes)
-        # if len(network_outputs) != 1:
-        #     raise ValueError("Multiple output layers are not currently supported.")
+        network_inputs = list(self.__network_definition.input_nodes)
+        if len(network_inputs) != 1:
+            raise ValueError(MULTI_INPUTS_UNSUPPORTED)
+        network_outputs = list(self.__network_definition.output_nodes)
+        if len(network_outputs) != 1:
+            raise ValueError(MULTI_OUTPUTS_UNSUPPORTED)
 
     def _supported_default_activation_functions(self):
         return dict(_DEFAULT_ACTIVATION_FUNCTIONS)
@@ -346,10 +341,11 @@ def _build_formulation(self):
         # currently only support a single input layer
         input_layers = list(net.input_layers)
         if len(input_layers) != 1:
-            raise ValueError(
+            msg = (
                 "build_formulation called with a network that has more than"
                 " one input layer. Only single input layers are supported."
             )
+            raise ValueError(msg)
         input_layer = input_layers[0]
         input_layer_id = id(input_layer)
         input_layer_block = block.layer[input_layer_id]
@@ -367,18 +363,24 @@ def z(b, *output_index):
                 # skip the InputLayer
                 continue
 
-            # TODO: Add error checking on layer type
+            if not isinstance(layer, DenseLayer):
+                msg = (
+                    f"ReducedSpaceNNFormulation only supports Dense layers. {net}"
+                    f" contains {layer} which is a {type(layer)}."
+                )
+                raise TypeError(msg)
+
             # build the linear expressions and the activation function
             layer_id = id(layer)
             layer_block = block.layer[layer_id]
             layer_func = reduced_space_dense_layer  # layer_constraints[type(layer)]
             activation_func = self._activation_functions.get(layer.activation, None)
             if activation_func is None:
-                raise ValueError(
-                    "Activation {} is not supported by this formulation.".format(
-                        layer.activation
-                    )
+                msg = (
+                    f"Activation {layer.activation} is not supported by this"
+                    " formulation."
                 )
+                raise ValueError(msg)
 
             layer_func(block, net, layer_block, layer, activation_func)
 
@@ -386,34 +388,27 @@ def z(b, *output_index):
         # currently only support a single output layer
         output_layers = list(net.output_layers)
         if len(output_layers) != 1:
-            raise ValueError(
+            msg = (
                 "build_formulation called with a network that has more than"
                 " one output layer. Only single output layers are supported."
             )
+            raise ValueError(msg)
         output_layer = output_layers[0]
 
         @block.Constraint(output_layer.output_indexes)
         def output_assignment(b, *output_index):
-            pb = b.parent_block()
+            b.parent_block()
             return (
                 b.scaled_outputs[output_index]
                 == b.layer[id(output_layer)].z[output_index]
             )
 
-    # @property
-    # def layer_constraints(self):
-    #     return self._layer_constraints
-
-    # @property
-    # def activation_constraints(self):
-    #     return self._activation_constraints
-
     @property
     def input_indexes(self):
         """The indexes of the formulation inputs."""
         network_inputs = list(self.__network_definition.input_nodes)
         if len(network_inputs) != 1:
-            raise ValueError("Multiple input layers are not currently supported.")
+            raise ValueError(MULTI_INPUTS_UNSUPPORTED)
         return network_inputs[0].input_indexes
 
     @property
@@ -421,12 +416,13 @@ def output_indexes(self):
         """The indexes of the formulation output."""
         network_outputs = list(self.__network_definition.output_nodes)
         if len(network_outputs) != 1:
-            raise ValueError("Multiple output layers are not currently supported.")
+            raise ValueError(MULTI_OUTPUTS_UNSUPPORTED)
         return network_outputs[0].output_indexes
 
 
 class ReducedSpaceSmoothNNFormulation(ReducedSpaceNNFormulation):
-    """
+    """Reduced Space Smooth Neural Network Formulation.
+
     This class is used to build reduced-space formulations
     of neural networks with smooth activation functions.
 
@@ -449,7 +445,8 @@ def _supported_default_activation_functions(self):
 
 
 class ReluPartitionFormulation(_PyomoFormulation):
-    """
+    """ReLU Partition Formulation.
+
     This class is used to build partition-based formulations
     of neural networks.
 
@@ -469,11 +466,11 @@ def __init__(self, network_structure, split_func=None):
         self.__scaled_input_bounds = network_structure.scaled_input_bounds
 
         if split_func is None:
-            split_func = lambda w: default_partition_split_func(w, 2)
+            split_func = partial(default_partition_split_func, n=2)
 
         self.__split_func = split_func
 
-    def _build_formulation(self):
+    def _build_formulation(self):  # noqa: C901
         _setup_scaled_inputs_outputs(
             self.block, self.__scaling_object, self.__scaled_input_bounds
         )
@@ -515,11 +512,14 @@ def layer(b, layer_id):
                     full_space_dense_layer(block, net, layer_block, layer)
                     linear_activation_constraint(block, net, layer_block, layer)
                 else:
-                    raise ValueError(
-                        "ReluPartitionFormulation supports Dense layers with relu or linear activation"
+                    msg = (
+                        "ReluPartitionFormulation supports Dense layers with relu or"
+                        " linear activation"
                     )
+                    raise ValueError(msg)
             else:
-                raise ValueError("ReluPartitionFormulation supports only Dense layers")
+                msg = "ReluPartitionFormulation supports only Dense layers"
+                raise TypeError(msg)
 
         # This check is never hit. The formulation._build_formulation() function is
         # only ever called by an OmltBlock.build_formulation(), and that runs the
@@ -530,7 +530,7 @@ def layer(b, layer_id):
         # currently only support a single input layer
         input_layers = list(net.input_layers)
         if len(input_layers) != 1:
-            raise ValueError("Multiple input layers are not currently supported.")
+            raise ValueError(MULTI_INPUTS_UNSUPPORTED)
         input_layer = input_layers[0]
 
         @block.Constraint(input_layer.output_indexes)
@@ -544,7 +544,7 @@ def input_assignment(b, *output_index):
         # currently only support a single output layer
         output_layers = list(net.output_layers)
         if len(output_layers) != 1:
-            raise ValueError("Multiple output layers are not currently supported.")
+            raise ValueError(MULTI_OUTPUTS_UNSUPPORTED)
         output_layer = output_layers[0]
 
         @block.Constraint(output_layer.output_indexes)
@@ -559,7 +559,7 @@ def input_indexes(self):
         """The indexes of the formulation inputs."""
         network_inputs = list(self.__network_definition.input_nodes)
         if len(network_inputs) != 1:
-            raise ValueError("Multiple input layers are not currently supported.")
+            raise ValueError(MULTI_INPUTS_UNSUPPORTED)
         return network_inputs[0].input_indexes
 
     @property
@@ -567,5 +567,5 @@ def output_indexes(self):
         """The indexes of the formulation output."""
         network_outputs = list(self.__network_definition.output_nodes)
         if len(network_outputs) != 1:
-            raise ValueError("Multiple output layers are not currently supported.")
+            raise ValueError(MULTI_OUTPUTS_UNSUPPORTED)
         return network_outputs[0].output_indexes
diff --git a/src/omlt/py.typed b/src/omlt/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/src/omlt/scaling.py b/src/omlt/scaling.py
index ea7416ba..39163142 100644
--- a/src/omlt/scaling.py
+++ b/src/omlt/scaling.py
@@ -1,34 +1,43 @@
-"""
+"""Scaling.
+
 The omlt.scaling module describes the interface for providing different scaling
-expressions to the Pyomo model for the inputs and outputs of an ML model. An implementation of a common scaling approach is
-included with `OffsetScaling`.
+expressions to the Pyomo model for the inputs and outputs of an ML model. An
+implementation of a common scaling approach is included with `OffsetScaling`.
 """
 
 import abc
+from typing import Any
 
 
 class ScalingInterface(abc.ABC):
     @abc.abstractmethod
     def get_scaled_input_expressions(self, input_vars):
-        """This method returns a list of expressions for the scaled inputs from
-        the unscaled inputs"""
-        pass  # pragma: no cover
+        """Get scaled inputs.
+
+        This method returns a list of expressions for the scaled inputs from
+        the unscaled inputs
+        """
+        # pragma: no cover
 
     @abc.abstractmethod
     def get_unscaled_output_expressions(self, scaled_output_vars):
-        """This method returns a list of expressions for the unscaled outputs from
-        the scaled outputs"""
-        pass  # pragma: no cover
+        """Get unscaled outputs.
+
+        This method returns a list of expressions for the unscaled outputs from
+        the scaled outputs
+        """
+        # pragma: no cover
 
 
-def convert_to_dict(x):
-    if type(x) is dict:
+def convert_to_dict(x: Any) -> dict[Any, Any]:
+    if isinstance(x, dict):
         return dict(x)
-    return {i: v for i, v in enumerate(x)}
+    return dict(enumerate(x))
 
 
 class OffsetScaling(ScalingInterface):
-    r"""
+    r"""OffsetScaling interface.
+
     This scaling object represents the following scaling equations for inputs (x)
     and outputs (y)
 
@@ -51,7 +60,7 @@ class OffsetScaling(ScalingInterface):
     """
 
     def __init__(self, offset_inputs, factor_inputs, offset_outputs, factor_outputs):
-        super(OffsetScaling, self).__init__()
+        super().__init__()
         self.__x_offset = convert_to_dict(offset_inputs)
         self.__x_factor = convert_to_dict(factor_inputs)
         self.__y_offset = convert_to_dict(offset_outputs)
@@ -59,112 +68,100 @@ def __init__(self, offset_inputs, factor_inputs, offset_outputs, factor_outputs)
 
         for k, v in self.__x_factor.items():
             if v <= 0:
-                raise ValueError(
+                msg = (
                     "OffsetScaling only accepts positive values"
                     " for factor_inputs. Negative value found at"
-                    " index {}.".format(k)
+                    f" index {k}."
                 )
+                raise ValueError(msg)
         for k, v in self.__y_factor.items():
             if v <= 0:
-                raise ValueError(
+                msg = (
                     "OffsetScaling only accepts positive values"
                     " for factor_outputs. Negative value found at"
-                    " index {}.".format(k)
+                    f" index {k}."
                 )
+                raise ValueError(msg)
 
     def get_scaled_input_expressions(self, input_vars):
-        """
-        Get the scaled input expressions of the input variables.
-        """
+        """Get the scaled input expressions of the input variables."""
         sorted_keys = sorted(input_vars.keys())
         if (
             sorted(self.__x_offset) != sorted_keys
             or sorted(self.__x_factor) != sorted_keys
         ):
-            raise ValueError(
+            msg = (
                 "get_scaled_input_expressions called with input_vars"
                 " that do not have the same indices as offset_inputs"
                 " or factor_inputs.\n"
-                "Keys in input_vars: {}.\n"
-                "Keys in offset_inputs: {}.\n"
-                "Keys in offset_factor: {}.".format(
-                    sorted_keys, sorted(self.__x_offset), sorted(self.__x_factor)
-                )
+                f"Keys in input_vars: {sorted_keys}.\n"
+                f"Keys in offset_inputs: {sorted(self.__x_offset)}.\n"
+                f"Keys in offset_factor: {sorted(self.__x_factor)}."
             )
+            raise ValueError(msg)
         x = input_vars
-        return {k: (x[k] - self.__x_offset[k]) / self.__x_factor[k] for k in x.keys()}
+        return {k: (x[k] - self.__x_offset[k]) / self.__x_factor[k] for k in x}
 
     def get_unscaled_input_expressions(self, scaled_input_vars):
-        """
-        Get the unscaled input expressions of the scaled input variables.
-        """
+        """Get the unscaled input expressions of the scaled input variables."""
         sorted_keys = sorted(scaled_input_vars.keys())
         if (
             sorted(self.__x_offset) != sorted_keys
             or sorted(self.__x_factor) != sorted_keys
         ):
-            raise ValueError(
+            msg = (
                 "get_scaled_input_expressions called with input_vars"
                 " that do not have the same indices as offset_inputs"
                 " or factor_inputs.\n"
-                "Keys in input_vars: {}\n"
-                "Keys in offset_inputs: {}\n"
-                "Keys in offset_factor: {}".format(
-                    sorted_keys, sorted(self.__x_offset), sorted(self.__x_factor)
-                )
+                f"Keys in input_vars: {sorted_keys}\n"
+                f"Keys in offset_inputs: {sorted(self.__x_offset)}\n"
+                f"Keys in offset_factor: {sorted(self.__x_factor)}"
             )
+            raise ValueError(msg)
 
         scaled_x = scaled_input_vars
         return {
-            k: scaled_x[k] * self.__x_factor[k] + self.__x_offset[k]
-            for k in scaled_x.keys()
+            k: scaled_x[k] * self.__x_factor[k] + self.__x_offset[k] for k in scaled_x
         }
 
     def get_scaled_output_expressions(self, output_vars):
-        """
-        Get the scaled output expressions of the output variables.
-        """
+        """Get the scaled output expressions of the output variables."""
         sorted_keys = sorted(output_vars.keys())
         if (
             sorted(self.__y_offset) != sorted_keys
             or sorted(self.__y_factor) != sorted_keys
         ):
-            raise ValueError(
+            msg = (
                 "get_scaled_output_expressions called with output_vars"
                 " that do not have the same indices as offset_outputs"
                 " or factor_outputs.\n"
-                "Keys in output_vars: {}\n"
-                "Keys in offset_outputs: {}\n"
-                "Keys in offset_factor: {}".format(
-                    sorted_keys, sorted(self.__y_offset), sorted(self.__y_factor)
-                )
+                f"Keys in output_vars: {sorted_keys}\n"
+                f"Keys in offset_outputs: {sorted(self.__y_offset)}\n"
+                f"Keys in offset_factor: {sorted(self.__y_factor)}"
             )
+            raise ValueError(msg)
 
         y = output_vars
-        return {k: (y[k] - self.__y_offset[k]) / self.__y_factor[k] for k in y.keys()}
+        return {k: (y[k] - self.__y_offset[k]) / self.__y_factor[k] for k in y}
 
     def get_unscaled_output_expressions(self, scaled_output_vars):
-        """
-        Get the unscaled output expressions of the scaled output variables.
-        """
+        """Get the unscaled output expressions of the scaled output variables."""
         sorted_keys = sorted(scaled_output_vars.keys())
         if (
             sorted(self.__y_offset) != sorted_keys
             or sorted(self.__y_factor) != sorted_keys
         ):
-            raise ValueError(
+            msg = (
                 "get_scaled_output_expressions called with output_vars"
                 " that do not have the same indices as offset_outputs"
                 " or factor_outputs.\n"
-                "Keys in output_vars: {}\n"
-                "Keys in offset_outputs: {}\n"
-                "Keys in offset_factor: {}".format(
-                    sorted_keys, sorted(self.__y_offset), sorted(self.__y_factor)
-                )
+                f"Keys in output_vars: {sorted_keys}\n"
+                f"Keys in offset_outputs: {sorted(self.__y_offset)}\n"
+                f"Keys in offset_factor: {sorted(self.__y_factor)}"
             )
+            raise ValueError(msg)
 
         scaled_y = scaled_output_vars
         return {
-            k: scaled_y[k] * self.__y_factor[k] + self.__y_offset[k]
-            for k in scaled_y.keys()
+            k: scaled_y[k] * self.__y_factor[k] + self.__y_offset[k] for k in scaled_y
         }
diff --git a/tests/conftest.py b/tests/conftest.py
index b9c4daf7..6460461f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,39 +9,37 @@
 
 
 def get_neural_network_data(desc):
-    """
-    Return input and test data for a neural network.
+    """Return input and test data for a neural network.
 
     Parameters
     ----------
     desc : string
         model name. One of 131 or 2353.
     """
+    rng = np.random.default_rng(42)
+
     if desc == "131":
         # build data with 1 input and 1 output and 500 data points
-        x = np.random.uniform(-1, 1, 500)
+        x = rng.uniform(-1, 1, 500)
         y = np.sin(x)
-        x_test = np.random.uniform(-1, 1, 5)
+        x_test = rng.uniform(-1, 1, 5)
         return x, y, x_test
 
-    elif desc == "2353":
+    if desc == "2353":
         # build data with 2 inputs, 3 outputs, and 500 data points
-        np.random.seed(42)
-        x = np.random.uniform([-1, 2], [1, 3], (500, 2))
+        x = rng.uniform([-1, 2], [1, 3], (500, 2))
         y1 = np.sin(x[:, 0] * x[:, 1])
         y2 = x[:, 0] + x[:, 1]
         y3 = np.cos(x[:, 0] / x[:, 1])
         y = np.column_stack((y1, y2, y3))
-        x_test = np.random.uniform([-1, 2], [1, 3], (5, 2))
+        x_test = rng.uniform([-1, 2], [1, 3], (5, 2))
         return x, y, x_test
 
     return None
 
 
 class _Datadir:
-    """
-    Give access to files in the `models` directory.
-    """
+    """Give access to files in the `models` directory."""
 
     def __init__(self, basedir):
         self._basedir = basedir
@@ -58,8 +56,9 @@ def datadir():
 
 @pytest.fixture
 def two_node_network_relu():
-    """
-            1           1
+    """Two node network with ReLU activation.
+
+    1           1
     x0 -------- (1) --------- (3)
      |                   /
      |                  /
diff --git a/tests/gbt/test_gbt_formulation.py b/tests/gbt/test_gbt_formulation.py
index 57d93427..1d4ba621 100644
--- a/tests/gbt/test_gbt_formulation.py
+++ b/tests/gbt/test_gbt_formulation.py
@@ -8,6 +8,12 @@
 from omlt.gbt.gbt_formulation import GBTBigMFormulation
 from omlt.gbt.model import GradientBoostedTreeModel
 
+TOTAL_CONSTRAINTS = 423
+Y_VARS = 42
+Z_L_VARS = 160
+SINGLE_LEAVES = 20
+SPLITS = 140
+
 
 @pytest.mark.skip("Francesco and Alex need to check this test")
 def test_formulation_with_continuous_variables():
@@ -27,50 +33,18 @@ def test_formulation_with_continuous_variables():
     assert (
         len(list(m.gbt.component_data_objects(pe.Var))) == 202 + 10
     )  # our auto-created variables
-    assert len(list(m.gbt.component_data_objects(pe.Constraint))) == 423  # TODO: fix?
-
-    assert len(m.gbt.z_l) == 160
-    assert len(m.gbt.y) == 42
-
-    assert len(m.gbt.single_leaf) == 20
-    assert len(m.gbt.left_split) == 140
-    assert len(m.gbt.right_split) == 140
-    assert len(m.gbt.categorical) == 0
-    assert len(m.gbt.var_lower) == 42
-    assert len(m.gbt.var_upper) == 42
-
-
-# TODO: did we remove categorical variables intentionally?
-# def test_formulation_with_categorical_variables():
-#     model = onnx.load(Path(__file__).parent / "categorical_model.onnx")
 
-#     m = pe.ConcreteModel()
+    assert len(list(m.gbt.component_data_objects(pe.Constraint))) == TOTAL_CONSTRAINTS
 
-#     m.x = pe.Var(range(3), bounds=(-2.0, 2.0))
-#     # categorical variable
-#     m.y = pe.Var(bounds=(0, 1), domain=pe.Integers)
+    assert len(m.gbt.z_l) == Z_L_VARS
+    assert len(m.gbt.y) == Y_VARS
 
-#     m.z = pe.Var()
-
-#     m.gbt = pe.Block()
-#     add_formulation_to_block(
-#         m.gbt, model, input_vars=[m.x[0], m.x[1], m.x[2], m.y], output_vars=[m.z]
-#     )
-
-#     assert len(list(m.gbt.component_data_objects(pe.Var))) == 193
-#     # there are 28 * 2 constraints missing
-#     # related to categorical variables
-#     assert len(list(m.gbt.component_data_objects(pe.Constraint))) == 391
-
-#     assert len(m.gbt.z_l) == 160
-#     assert len(m.gbt.y) == 31
-
-#     assert len(m.gbt.single_leaf) == 20
-#     assert len(m.gbt.left_split) == 140
-#     assert len(m.gbt.right_split) == 140
-#     assert len(m.gbt.categorical) == 1
-#     assert len(m.gbt.var_lower) == 31
-#     assert len(m.gbt.var_upper) == 31
+    assert len(m.gbt.single_leaf) == SINGLE_LEAVES
+    assert len(m.gbt.left_split) == SPLITS
+    assert len(m.gbt.right_split) == SPLITS
+    assert len(m.gbt.categorical) == 0
+    assert len(m.gbt.var_lower) == Y_VARS
+    assert len(m.gbt.var_upper) == Y_VARS
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
diff --git a/tests/io/test_input_bounds.py b/tests/io/test_input_bounds.py
index c8f1f439..ea9238fc 100644
--- a/tests/io/test_input_bounds.py
+++ b/tests/io/test_input_bounds.py
@@ -15,7 +15,7 @@ def test_input_bounds_reader_writer_with_list():
 
 
 def test_input_bounds_reader_writer_with_dictionary():
-    input_bounds = dict(((i, i), (i * 10.0, i * 10.0 + 1.0)) for i in range(10))
+    input_bounds = {(i, i): (i * 10.0, i * 10.0 + 1.0) for i in range(10)}
     with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
         write_input_bounds(f.name, input_bounds)
 
diff --git a/tests/io/test_keras_reader.py b/tests/io/test_keras_reader.py
index 21629c66..5f738aca 100644
--- a/tests/io/test_keras_reader.py
+++ b/tests/io/test_keras_reader.py
@@ -2,6 +2,9 @@
 
 from omlt.dependencies import keras, keras_available
 
+NUM_LAYERS_131 = 3
+NUM_LAYERS_BIG = 5
+
 if keras_available:
     from omlt.io import load_keras_sequential
 
@@ -14,7 +17,7 @@ def test_keras_reader(datadir):
     net = load_keras_sequential(nn)
 
     layers = list(net.layers)
-    assert len(layers) == 3
+    assert len(layers) == NUM_LAYERS_131
     for layer in layers:
         assert layer.activation == "linear"
     assert layers[1].weights.shape == (1, 3)
@@ -25,7 +28,7 @@ def test_keras_reader(datadir):
     )
     net = load_keras_sequential(nn)
     layers = list(net.layers)
-    assert len(layers) == 3
+    assert len(layers) == NUM_LAYERS_131
     assert layers[1].activation == "sigmoid"
     assert layers[2].activation == "linear"
     assert layers[1].weights.shape == (1, 3)
@@ -36,7 +39,7 @@ def test_keras_reader(datadir):
     )
     net = load_keras_sequential(nn)
     layers = list(net.layers)
-    assert len(layers) == 3
+    assert len(layers) == NUM_LAYERS_131
     assert layers[1].activation == "sigmoid"
     assert layers[2].activation == "sigmoid"
     assert layers[1].weights.shape == (1, 3)
@@ -45,7 +48,7 @@ def test_keras_reader(datadir):
     nn = keras.models.load_model(datadir.file("big.keras"), compile=False)
     net = load_keras_sequential(nn)
     layers = list(net.layers)
-    assert len(layers) == 5
+    assert len(layers) == NUM_LAYERS_BIG
     assert layers[1].activation == "sigmoid"
     assert layers[2].activation == "sigmoid"
     assert layers[3].activation == "sigmoid"
diff --git a/tests/io/test_onnx_parser.py b/tests/io/test_onnx_parser.py
index 763b282c..4db71e03 100644
--- a/tests/io/test_onnx_parser.py
+++ b/tests/io/test_onnx_parser.py
@@ -2,6 +2,15 @@
 
 from omlt.dependencies import onnx, onnx_available
 
+NUM_LAYERS_131 = 3
+NUM_LAYERS_GEMM = 4
+NUM_LAYERS_MAXPOOL = 4
+NUM_LAYERS_BIG = 5
+
+MAXPOOL_KERNEL_DEPTH = 3
+
+NEAR_EQUAL = 1e-05
+
 if onnx_available:
     from omlt.io.onnx import load_onnx_neural_network
     from omlt.io.onnx_parser import NetworkParser
@@ -12,7 +21,7 @@ def test_linear_131(datadir):
     model = onnx.load(datadir.file("keras_linear_131.onnx"))
     net = load_onnx_neural_network(model)
     layers = list(net.layers)
-    assert len(layers) == 3
+    assert len(layers) == NUM_LAYERS_131
     for layer in layers:
         assert layer.activation == "linear"
     assert layers[1].weights.shape == (1, 3)
@@ -24,7 +33,7 @@ def test_linear_131_relu(datadir):
     model = onnx.load(datadir.file("keras_linear_131_relu.onnx"))
     net = load_onnx_neural_network(model)
     layers = list(net.layers)
-    assert len(layers) == 3
+    assert len(layers) == NUM_LAYERS_131
     assert layers[1].activation == "relu"
     assert layers[2].activation == "linear"
     assert layers[1].weights.shape == (1, 3)
@@ -36,7 +45,7 @@ def test_linear_131_sigmoid(datadir):
     model = onnx.load(datadir.file("keras_linear_131_sigmoid.onnx"))
     net = load_onnx_neural_network(model)
     layers = list(net.layers)
-    assert len(layers) == 3
+    assert len(layers) == NUM_LAYERS_131
     assert layers[1].activation == "sigmoid"
     assert layers[2].activation == "linear"
     assert layers[1].weights.shape == (1, 3)
@@ -48,7 +57,7 @@ def test_gemm(datadir):
     model = onnx.load(datadir.file("gemm.onnx"))
     net = load_onnx_neural_network(model)
     layers = list(net.layers)
-    assert len(layers) == 4
+    assert len(layers) == NUM_LAYERS_GEMM
     assert layers[1].weights.shape == (784, 75)
     assert layers[2].weights.shape == (75, 75)
     assert layers[3].weights.shape == (75, 10)
@@ -58,7 +67,7 @@ def test_gemm(datadir):
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
-def test_gemm_transB(datadir):
+def test_gemm_trans_b(datadir):
     model = onnx.load(datadir.file("gemm_not_transB.onnx"))
     model_transB = onnx.load(datadir.file("gemm_transB.onnx"))
     net = load_onnx_neural_network(model)
@@ -67,10 +76,10 @@ def test_gemm_transB(datadir):
     layers_transB = list(net_transB.layers)
     assert len(layers) == len(layers_transB)
     assert layers[1].weights.shape == layers_transB[1].weights.shape
-    assert abs(layers[1].weights[0][0] - layers_transB[1].weights[0][0]) < 1e-05
-    assert abs(layers[1].weights[0][1] - layers_transB[1].weights[1][0]) < 1e-05
-    assert abs(layers[1].weights[1][0] - layers_transB[1].weights[0][1]) < 1e-05
-    assert abs(layers[1].weights[1][1] - layers_transB[1].weights[1][1]) < 1e-05
+    assert abs(layers[1].weights[0][0] - layers_transB[1].weights[0][0]) < NEAR_EQUAL
+    assert abs(layers[1].weights[0][1] - layers_transB[1].weights[1][0]) < NEAR_EQUAL
+    assert abs(layers[1].weights[1][0] - layers_transB[1].weights[0][1]) < NEAR_EQUAL
+    assert abs(layers[1].weights[1][1] - layers_transB[1].weights[1][1]) < NEAR_EQUAL
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -78,7 +87,7 @@ def test_conv(datadir):
     model = onnx.load(datadir.file("convx1_gemmx1.onnx"))
     net = load_onnx_neural_network(model)
     layers = list(net.layers)
-    assert len(layers) == 4
+    assert len(layers) == NUM_LAYERS_GEMM
     assert layers[1].activation == "linear"
     assert layers[2].activation == "linear"
     assert layers[3].activation == "relu"
@@ -91,7 +100,7 @@ def test_maxpool(datadir):
     model = onnx.load(datadir.file("maxpool_2d.onnx"))
     net = load_onnx_neural_network(model)
     layers = list(net.layers)
-    assert len(layers) == 4
+    assert len(layers) == NUM_LAYERS_MAXPOOL
     assert layers[1].activation == "relu"
     assert layers[2].activation == "linear"
     assert layers[3].activation == "linear"
@@ -105,7 +114,7 @@ def test_maxpool(datadir):
     assert layers[2].output_size == [3, 5, 2]
     assert layers[3].output_size == [3, 2, 1]
     for layer in layers[1:]:
-        assert layer.kernel_depth == 3
+        assert layer.kernel_depth == MAXPOOL_KERNEL_DEPTH
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -113,10 +122,10 @@ def test_input_tensor_invalid_dims(datadir):
     model = onnx.load(datadir.file("keras_linear_131.onnx"))
     model.graph.input[0].type.tensor_type.shape.dim[1].dim_value = 0
     parser = NetworkParser()
-    with pytest.raises(ValueError) as excinfo:
+    with pytest.raises(
+        ValueError, match='All dimensions in graph "tf2onnx" input tensor have 0 value.'
+    ):
         parser.parse_network(model.graph, None, None)
-    expected_msg = 'All dimensions in graph "tf2onnx" input tensor have 0 value.'
-    assert str(excinfo.value) == expected_msg
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -124,10 +133,10 @@ def test_no_input_layers(datadir):
     model = onnx.load(datadir.file("keras_linear_131.onnx"))
     model.graph.input.remove(model.graph.input[0])
     parser = NetworkParser()
-    with pytest.raises(ValueError) as excinfo:
+    with pytest.raises(
+        ValueError, match='No valid input layer found in graph "tf2onnx".'
+    ):
         parser.parse_network(model.graph, None, None)
-    expected_msg = 'No valid input layer found in graph "tf2onnx".'
-    assert str(excinfo.value) == expected_msg
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -136,10 +145,13 @@ def test_node_no_inputs(datadir):
     while len(model.graph.node[0].input) > 0:
         model.graph.node[0].input.pop()
     parser = NetworkParser()
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg = (
+        'Nodes must have inputs or have op_type "Constant". Node '
+        '"StatefulPartitionedCall/keras_linear_131/dense/MatMul" has'
+        ' no inputs and op_type "MatMul".'
+    )
+    with pytest.raises(ValueError, match=expected_msg):
         parser.parse_network(model.graph, None, None)
-    expected_msg = """Nodes must have inputs or have op_type \"Constant\". Node \"StatefulPartitionedCall/keras_linear_131/dense/MatMul\" has no inputs and op_type \"MatMul\"."""
-    assert str(excinfo.value) == expected_msg
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -148,45 +160,56 @@ def test_consume_wrong_node_type(datadir):
     parser = NetworkParser()
     parser.parse_network(model.graph, None, None)
 
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_dense = (
+        "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, "
+        "but the parsing method for MatMul nodes was called. This could indicate "
+        "changes in the network being parsed."
+    )
+    with pytest.raises(ValueError, match=expected_msg_dense):
         parser._consume_dense_nodes(
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
         )
-    expected_msg_dense = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only MatMul nodes can be used as starting points for consumption."
-    assert str(excinfo.value) == expected_msg_dense
-
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_gemm = (
+        "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, "
+        "but the parsing method for Gemm nodes was called. This could indicate "
+        "changes in the network being parsed."
+    )
+    with pytest.raises(ValueError, match=expected_msg_gemm):
         parser._consume_gemm_dense_nodes(
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
         )
-    expected_msg_gemm = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only Gemm nodes can be used as starting points for consumption."
-    assert str(excinfo.value) == expected_msg_gemm
-
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_conv = (
+        "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, "
+        "but the parsing method for Conv nodes was called. This could indicate "
+        "changes in the network being parsed."
+    )
+    with pytest.raises(ValueError, match=expected_msg_conv):
         parser._consume_conv_nodes(
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
         )
-    expected_msg_conv = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only Conv nodes can be used as starting points for consumption."
-    assert str(excinfo.value) == expected_msg_conv
-
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_reshape = (
+        "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, "
+        "but the parsing method for Reshape nodes was called. This could indicate "
+        "changes in the network being parsed."
+    )
+    with pytest.raises(ValueError, match=expected_msg_reshape):
         parser._consume_reshape_nodes(
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
         )
-    expected_msg_reshape = "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only Reshape nodes can be used as starting points for consumption."
-    assert str(excinfo.value) == expected_msg_reshape
-
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_pool = (
+        "StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, "
+        "but the parsing method for MaxPool nodes was called. This could indicate "
+        "changes in the network being parsed."
+    )
+    with pytest.raises(ValueError, match=expected_msg_pool):
         parser._consume_pool_nodes(
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][1],
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/BiasAdd"][2],
         )
-    expected_msg_pool = """StatefulPartitionedCall/keras_linear_131/dense/BiasAdd is a Add node, only MaxPool nodes can be used as starting points for consumption."""
-    assert str(excinfo.value) == expected_msg_pool
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -198,13 +221,15 @@ def test_consume_dense_wrong_dims(datadir):
     parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/MatMul"][
         1
     ].input.append("abcd")
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_dense = (
+        "StatefulPartitionedCall/keras_linear_131/dense/MatMul input has 3 dimensions, "
+        "only nodes with 2 input dimensions can be used as starting points for parsing."
+    )
+    with pytest.raises(ValueError, match=expected_msg_dense):
         parser._consume_dense_nodes(
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/MatMul"][1],
             parser._nodes["StatefulPartitionedCall/keras_linear_131/dense/MatMul"][2],
         )
-    expected_msg_dense = "StatefulPartitionedCall/keras_linear_131/dense/MatMul input has 3 dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."
-    assert str(excinfo.value) == expected_msg_dense
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -213,12 +238,14 @@ def test_consume_gemm_wrong_dims(datadir):
     parser = NetworkParser()
     parser.parse_network(model.graph, None, None)
     parser._nodes["Gemm_0"][1].input.append("abcd")
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_gemm = (
+        "Gemm_0 input has 4 dimensions, only nodes with 3 input dimensions "
+        "can be used as starting points for parsing."
+    )
+    with pytest.raises(ValueError, match=expected_msg_gemm):
         parser._consume_gemm_dense_nodes(
             parser._nodes["Gemm_0"][1], parser._nodes["Gemm_0"][2]
         )
-    expected_msg_gemm = "Gemm_0 input has 4 dimensions, only nodes with 3 input dimensions can be used as starting points for consumption."
-    assert str(excinfo.value) == expected_msg_gemm
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -227,12 +254,14 @@ def test_consume_conv_wrong_dims(datadir):
     parser = NetworkParser()
     parser.parse_network(model.graph, None, None)
     parser._nodes["Conv_0"][1].input.append("abcd")
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_conv = (
+        "Conv_0 input has 4 dimensions, only nodes with 2 or 3 input"
+        " dimensions can be used as starting points for parsing."
+    )
+    with pytest.raises(ValueError, match=expected_msg_conv):
         parser._consume_conv_nodes(
             parser._nodes["Conv_0"][1], parser._nodes["Conv_0"][2]
         )
-    expected_msg_conv = "Conv_0 input has 4 dimensions, only nodes with 2 or 3 input dimensions can be used as starting points for consumption."
-    assert str(excinfo.value) == expected_msg_conv
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -241,12 +270,14 @@ def test_consume_reshape_wrong_dims(datadir):
     parser = NetworkParser()
     parser.parse_network(model.graph, None, None)
     parser._nodes["Reshape_2"][1].input.append("abcd")
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_reshape = (
+        "Reshape_2 input has 3 dimensions, only nodes with 2 input"
+        " dimensions can be used as starting points for parsing."
+    )
+    with pytest.raises(ValueError, match=expected_msg_reshape):
         parser._consume_reshape_nodes(
             parser._nodes["Reshape_2"][1], parser._nodes["Reshape_2"][2]
         )
-    expected_msg_reshape = """Reshape_2 input has 3 dimensions, only nodes with 2 input dimensions can be used as starting points for consumption."""
-    assert str(excinfo.value) == expected_msg_reshape
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -255,7 +286,9 @@ def test_consume_maxpool_wrong_dims(datadir):
     parser = NetworkParser()
     parser.parse_network(model.graph, None, None)
     parser._nodes["node1"][1].input.append("abcd")
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg_maxpool = (
+        "node1 input has 2 dimensions, only nodes with 1 input "
+        "dimension can be used as starting points for parsing."
+    )
+    with pytest.raises(ValueError, match=expected_msg_maxpool):
         parser._consume_pool_nodes(parser._nodes["node1"][1], parser._nodes["node1"][2])
-    expected_msg_maxpool = """node1 input has 2 dimensions, only nodes with 1 input dimension can be used as starting points for consumption."""
-    assert str(excinfo.value) == expected_msg_maxpool
diff --git a/tests/io/test_torch_geometric.py b/tests/io/test_torch_geometric.py
index 9cf6905f..43703d97 100644
--- a/tests/io/test_torch_geometric.py
+++ b/tests/io/test_torch_geometric.py
@@ -4,14 +4,12 @@
 
 from omlt import OmltBlock
 from omlt.dependencies import (
-    torch,
     torch_available,
-    torch_geometric,
     torch_geometric_available,
 )
 
 if torch_available and torch_geometric_available:
-    from torch.nn import Linear, ReLU, Sigmoid, Softplus, Tanh
+    from torch.nn import Linear, ReLU, Sigmoid, Tanh
     from torch_geometric.nn import (
         GCNConv,
         SAGEConv,
@@ -32,7 +30,7 @@
     not (torch_available and torch_geometric_available),
     reason="Test only valid when torch and torch_geometric are available",
 )
-def GCN_Sequential(activation, pooling):
+def gcn_sequential(activation, pooling):
     return Sequential(
         "x, edge_index",
         [
@@ -53,7 +51,7 @@ def GCN_Sequential(activation, pooling):
     not (torch_available and torch_geometric_available),
     reason="Test only valid when torch and torch_geometric are available",
 )
-def SAGE_Sequential(activation, pooling, aggr, root_weight):
+def sage_sequential(activation, pooling, aggr, root_weight):
     return Sequential(
         "x, edge_index",
         [
@@ -79,7 +77,7 @@ def _test_torch_geometric_reader(nn, activation, pooling):
     A = np.ones((N, N), dtype=int)
     net = load_torch_geometric_sequential(nn, N, A)
     layers = list(net.layers)
-    assert len(layers) == 7
+    assert len(layers) == 7  # noqa: PLR2004
     assert layers[1].weights.shape == (8, 16)
     assert layers[2].weights.shape == (16, 16)
     assert layers[3].weights.shape == (16, 16)
@@ -116,8 +114,8 @@ def _test_gnn_with_fixed_graph(nn):
     m.nn = OmltBlock()
     A = np.eye(N, dtype=int)
     gnn_with_fixed_graph(m.nn, nn, N, A, scaled_input_bounds=input_bounds)
-    assert m.nvariables() == 282
-    assert m.nconstraints() == 614
+    assert m.nvariables() == 282  # noqa: PLR2004
+    assert m.nconstraints() == 614  # noqa: PLR2004
 
 
 @pytest.mark.skipif(
@@ -135,8 +133,8 @@ def _test_gnn_with_non_fixed_graph(nn):
     m = pyo.ConcreteModel()
     m.nn = OmltBlock()
     gnn_with_non_fixed_graph(m.nn, nn, N, scaled_input_bounds=input_bounds)
-    assert m.nvariables() == 282
-    assert m.nconstraints() == 620
+    assert m.nvariables() == 282  # noqa: PLR2004
+    assert m.nconstraints() == 620  # noqa: PLR2004
 
 
 @pytest.mark.skipif(
@@ -146,11 +144,11 @@ def _test_gnn_with_non_fixed_graph(nn):
 def test_torch_geometric_reader():
     for activation in [ReLU, Sigmoid, Tanh]:
         for pooling in [global_mean_pool, global_add_pool]:
-            nn = GCN_Sequential(activation, pooling)
+            nn = gcn_sequential(activation, pooling)
             _test_torch_geometric_reader(nn, activation, pooling)
             for aggr in ["sum", "mean"]:
                 for root_weight in [False, True]:
-                    nn = SAGE_Sequential(activation, pooling, aggr, root_weight)
+                    nn = sage_sequential(activation, pooling, aggr, root_weight)
                     _test_torch_geometric_reader(nn, activation, pooling)
 
 
@@ -160,11 +158,11 @@ def test_torch_geometric_reader():
 )
 def test_gnn_with_fixed_graph():
     for pooling in [global_mean_pool, global_add_pool]:
-        nn = GCN_Sequential(ReLU, pooling)
+        nn = gcn_sequential(ReLU, pooling)
         _test_gnn_with_fixed_graph(nn)
         for aggr in ["sum", "mean"]:
             for root_weight in [False, True]:
-                nn = SAGE_Sequential(ReLU, pooling, aggr, root_weight)
+                nn = sage_sequential(ReLU, pooling, aggr, root_weight)
                 _test_gnn_with_fixed_graph(nn)
 
 
@@ -176,7 +174,7 @@ def test_gnn_with_non_fixed_graph():
     for pooling in [global_mean_pool, global_add_pool]:
         for aggr in ["sum"]:
             for root_weight in [False, True]:
-                nn = SAGE_Sequential(ReLU, pooling, aggr, root_weight)
+                nn = sage_sequential(ReLU, pooling, aggr, root_weight)
                 _test_gnn_with_non_fixed_graph(nn)
 
 
@@ -193,7 +191,7 @@ def _test_gnn_value_error(nn, error_info, error_type="ValueError"):
     for i in range(input_size[0]):
         input_bounds[(i)] = (-1.0, 1.0)
     if error_type == "ValueError":
-        with pytest.raises(ValueError) as excinfo:
+        with pytest.raises(ValueError) as excinfo:  # noqa: PT011
             load_torch_geometric_sequential(
                 nn=nn,
                 N=N,
@@ -217,16 +215,18 @@ def _test_gnn_value_error(nn, error_info, error_type="ValueError"):
     reason="Test only valid when torch and torch_geometric are available",
 )
 def test_gnn_value_error():
-    nn = SAGE_Sequential(ReLU, global_max_pool, "mean", True)
-    _test_gnn_value_error(nn, "this operation is not supported")
+    nn = sage_sequential(ReLU, global_max_pool, "mean", root_weight=True)
+    _test_gnn_value_error(nn, "Operation global_max_pool is not supported.")
 
-    nn = SAGE_Sequential(Sigmoid, global_mean_pool, "sum", True)
+    nn = sage_sequential(Sigmoid, global_mean_pool, "sum", root_weight=True)
     _test_gnn_value_error(nn, "nonlinear activation results in a MINLP", "warns")
 
-    nn = SAGE_Sequential(ReLU, global_mean_pool, "mean", True)
+    nn = sage_sequential(ReLU, global_mean_pool, "mean", root_weight=True)
     _test_gnn_value_error(
         nn, "this aggregation is not supported when the graph is not fixed"
     )
 
-    nn = GCN_Sequential(ReLU, global_mean_pool)
-    _test_gnn_value_error(nn, "this layer is not supported when the graph is not fixed")
+    nn = gcn_sequential(ReLU, global_mean_pool)
+    _test_gnn_value_error(
+        nn, "this layer is not supported when the graph is not fixed."
+    )
diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py
index 28f6f873..e3d2c2b1 100644
--- a/tests/linear_tree/test_lt_formulation.py
+++ b/tests/linear_tree/test_lt_formulation.py
@@ -1,22 +1,26 @@
 import numpy as np
 import pyomo.environ as pe
 import pytest
-from pytest import approx
 
 from omlt.dependencies import lineartree_available
 
 if lineartree_available:
     from lineartree import LinearTreeRegressor
     from sklearn.linear_model import LinearRegression
+
     from omlt.linear_tree import (
+        LinearTreeDefinition,
         LinearTreeGDPFormulation,
         LinearTreeHybridBigMFormulation,
-        LinearTreeDefinition,
     )
 
 import omlt
 from omlt import OmltBlock
 
+NUM_INPUTS = 2
+NUM_SPLITS = 5
+NUM_LEAVES = 6
+
 scip_available = pe.SolverFactory("scip").available()
 cbc_available = pe.SolverFactory("cbc").available()
 gurobi_available = pe.SolverFactory("gurobi").available()
@@ -82,7 +86,7 @@ def linear_model_tree(X, y):
 
 
 @pytest.mark.skipif(not lineartree_available, reason="Need Linear-Tree Package")
-def test_linear_tree_model_single_var():
+def test_linear_tree_model_single_var():  # noqa: C901
     # construct a LinearTreeDefinition
     regr_small = linear_model_tree(X=X_small, y=y_small)
     input_bounds = {0: (min(X_small)[0], max(X_small)[0])}
@@ -100,7 +104,7 @@ def test_linear_tree_model_single_var():
     assert n_outputs == 1
     # test for splits
     # assert the number of splits
-    assert len(splits[0].keys()) == 5
+    assert len(splits[0].keys()) == NUM_SPLITS
     splits_key_list = [
         "col",
         "th",
@@ -114,12 +118,12 @@ def test_linear_tree_model_single_var():
         "y_index",
     ]
     # assert whether all the dicts have such keys
-    for i in splits[0].keys():
-        for key in splits[0][i].keys():
+    for i in splits[0]:
+        for key in splits[0][i]:
             assert key in splits_key_list
     # test for leaves
     # assert the number of leaves
-    assert len(leaves[0].keys()) == 6
+    assert len(leaves[0].keys()) == NUM_LEAVES
     # assert whether all the dicts have such keys
     leaves_key_list = [
         "loss",
@@ -130,8 +134,8 @@ def test_linear_tree_model_single_var():
         "parent",
         "bounds",
     ]
-    for j in leaves[0].keys():
-        for key in leaves[0][j].keys():
+    for j in leaves[0]:
+        for key in leaves[0][j]:
             assert key in leaves_key_list
             # if the key is slope, ensure slope dimension match n_inputs
             if key == "slope":
@@ -187,7 +191,7 @@ def connect_outputs(mdl):
     pe.assert_optimal_termination(status_1_bigm)
     solution_1_bigm = (pe.value(model1.x), pe.value(model1.y))
     y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1))
-    assert y_pred[0] == approx(solution_1_bigm[1])
+    assert y_pred[0] == pytest.approx(solution_1_bigm[1])
 
 
 @pytest.mark.skipif(
@@ -221,7 +225,7 @@ def connect_outputs(mdl):
     pe.assert_optimal_termination(status_1_bigm)
     solution_1_bigm = (pe.value(model1.x), pe.value(model1.y))
     y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1))
-    assert y_pred[0] == approx(solution_1_bigm[1])
+    assert y_pred[0] == pytest.approx(solution_1_bigm[1])
 
 
 @pytest.mark.skipif(
@@ -255,7 +259,7 @@ def connect_outputs(mdl):
     pe.assert_optimal_termination(status_1_bigm)
     solution_1_bigm = (pe.value(model1.x), pe.value(model1.y))
     y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1))
-    assert y_pred[0] == approx(solution_1_bigm[1])
+    assert y_pred[0] == pytest.approx(solution_1_bigm[1])
 
 
 @pytest.mark.skipif(
@@ -289,7 +293,7 @@ def connect_outputs(mdl):
     pe.assert_optimal_termination(status_1_bigm)
     solution_1_bigm = (pe.value(model1.x), pe.value(model1.y))
     y_pred = regr_small.predict(np.array(solution_1_bigm[0]).reshape(1, -1))
-    assert y_pred[0] == approx(solution_1_bigm[1])
+    assert y_pred[0] == pytest.approx(solution_1_bigm[1])
 
 
 @pytest.mark.skipif(not lineartree_available, reason="Need Linear-Tree Package")
@@ -317,12 +321,12 @@ def test_scaling():
     lt_def2 = LinearTreeDefinition(
         regr, unscaled_input_bounds=unscaled_input_bounds, scaling_object=scaler
     )
-    assert lt_def2.scaled_input_bounds[0][0] == approx(scaled_input_bounds[0][0])
-    assert lt_def2.scaled_input_bounds[0][1] == approx(scaled_input_bounds[0][1])
+    assert lt_def2.scaled_input_bounds[0][0] == pytest.approx(scaled_input_bounds[0][0])
+    assert lt_def2.scaled_input_bounds[0][1] == pytest.approx(scaled_input_bounds[0][1])
     with pytest.raises(
         Exception, match="Input Bounds needed to represent linear trees as MIPs"
     ):
-        ltmodel_scaled = LinearTreeDefinition(regr)
+        LinearTreeDefinition(regr)
 
 
 #### MULTIVARIATE INPUT TESTING ####
@@ -379,7 +383,7 @@ def test_scaling():
 
 
 @pytest.mark.skipif(not lineartree_available, reason="Need Linear-Tree Package")
-def test_linear_tree_model_multi_var():
+def test_linear_tree_model_multi_var():  # noqa: C901
     # construct a LinearTreeDefinition
     regr = linear_model_tree(X=X, y=Y)
     input_bounds = {0: (min(X[:, 0]), max(X[:, 0])), 1: (min(X[:, 1]), max(X[:, 1]))}
@@ -394,12 +398,12 @@ def test_linear_tree_model_multi_var():
 
     # assert attributes in LinearTreeDefinition
     assert scaled_input_bounds is not None
-    assert n_inputs == 2
+    assert n_inputs == NUM_INPUTS
     assert n_outputs == 1
 
     # test for splits
     # assert the number of splits
-    assert len(splits[0].keys()) == 5
+    assert len(splits[0].keys()) == NUM_SPLITS
     splits_key_list = [
         "col",
         "th",
@@ -413,12 +417,12 @@ def test_linear_tree_model_multi_var():
         "y_index",
     ]
     # assert whether all the dicts have such keys
-    for i in splits[0].keys():
-        for key in splits[0][i].keys():
+    for i in splits[0]:
+        for key in splits[0][i]:
             assert key in splits_key_list
     # test for leaves
     # assert the number of leaves
-    assert len(leaves[0].keys()) == 6
+    assert len(leaves[0].keys()) == NUM_LEAVES
     # assert whether all the dicts have such keys
     leaves_key_list = [
         "loss",
@@ -429,8 +433,8 @@ def test_linear_tree_model_multi_var():
         "parent",
         "bounds",
     ]
-    for j in leaves[0].keys():
-        for key in leaves[0][j].keys():
+    for j in leaves[0]:
+        for key in leaves[0][j]:
             assert key in leaves_key_list
             # if the key is slope, test the shape of it
             if key == "slope":
@@ -494,7 +498,7 @@ def connect_outputs(mdl):
     y_pred = regr.predict(
         np.array([pe.value(model1.x0), pe.value(model1.x1)]).reshape(1, -1)
     )
-    assert y_pred[0] == approx(solution_1_bigm)
+    assert y_pred[0] == pytest.approx(solution_1_bigm)
 
 
 @pytest.mark.skipif(
@@ -536,7 +540,7 @@ def connect_outputs(mdl):
     y_pred = regr.predict(
         np.array([pe.value(model1.x0), pe.value(model1.x1)]).reshape(1, -1)
     )
-    assert y_pred[0] == approx(solution_1_bigm)
+    assert y_pred[0] == pytest.approx(solution_1_bigm)
 
 
 @pytest.mark.skipif(
@@ -578,7 +582,7 @@ def connect_outputs(mdl):
     y_pred = regr.predict(
         np.array([pe.value(model1.x0), pe.value(model1.x1)]).reshape(1, -1)
     )
-    assert y_pred[0] == approx(solution_1_bigm)
+    assert y_pred[0] == pytest.approx(solution_1_bigm)
 
 
 @pytest.mark.skipif(
@@ -620,11 +624,11 @@ def connect_outputs(mdl):
     y_pred = regr.predict(
         np.array([pe.value(model1.x0), pe.value(model1.x1)]).reshape(1, -1)
     )
-    assert y_pred[0] == approx(solution_1_bigm)
+    assert y_pred[0] == pytest.approx(solution_1_bigm)
 
 
 @pytest.mark.skipif(not lineartree_available, reason="Need Linear-Tree Package")
-def test_summary_dict_as_argument():
+def test_summary_dict_as_argument():  # noqa: C901
     # construct a LinearTreeDefinition
     regr = linear_model_tree(X=X, y=Y)
     input_bounds = {0: (min(X[:, 0]), max(X[:, 0])), 1: (min(X[:, 1]), max(X[:, 1]))}
@@ -641,11 +645,11 @@ def test_summary_dict_as_argument():
 
     # assert attributes in LinearTreeDefinition
     assert scaled_input_bounds is not None
-    assert n_inputs == 2
+    assert n_inputs == NUM_INPUTS
     assert n_outputs == 1
     # test for splits
     # assert the number of splits
-    assert len(splits[0].keys()) == 5
+    assert len(splits[0].keys()) == NUM_SPLITS
     splits_key_list = [
         "col",
         "th",
@@ -659,12 +663,12 @@ def test_summary_dict_as_argument():
         "y_index",
     ]
     # assert whether all the dicts have such keys
-    for i in splits[0].keys():
-        for key in splits[0][i].keys():
+    for i in splits[0]:
+        for key in splits[0][i]:
             assert key in splits_key_list
     # test for leaves
     # assert the number of leaves
-    assert len(leaves[0].keys()) == 6
+    assert len(leaves[0].keys()) == NUM_LEAVES
     # assert whether all the dicts have such keys
     leaves_key_list = [
         "loss",
@@ -675,8 +679,8 @@ def test_summary_dict_as_argument():
         "parent",
         "bounds",
     ]
-    for j in leaves[0].keys():
-        for key in leaves[0][j].keys():
+    for j in leaves[0]:
+        for key in leaves[0][j]:
             assert key in leaves_key_list
             # if the key is slope, test the shape of it
             if key == "slope":
@@ -709,24 +713,26 @@ def test_raise_exception_if_wrong_model_instance():
     input_bounds = {0: (min(X[:, 0]), max(X[:, 0])), 1: (min(X[:, 1]), max(X[:, 1]))}
     with pytest.raises(
         Exception,
-        match="Input dict must be the summary of the linear-tree model"
-        + " e.g. dict = model.summary()",
+        match=(
+            "Input dict must be the summary of the linear-tree model"
+            " e.g. dict = model.summary()"
+        ),
     ):
-        ltmodel_small = LinearTreeDefinition(
+        LinearTreeDefinition(
             regr.summary(only_leaves=True), scaled_input_bounds=input_bounds
         )
     with pytest.raises(
         Exception, match="Model entry must be dict or linear-tree instance"
     ):
-        ltmodel_small = LinearTreeDefinition((0, 0), scaled_input_bounds=input_bounds)
+        LinearTreeDefinition((0, 0), scaled_input_bounds=input_bounds)
     with pytest.raises(
         Exception,
-        match="Input dict must be the summary of the linear-tree model"
-        + " e.g. dict = model.summary()",
+        match=(
+            "Input dict must be the summary of the linear-tree model"
+            " e.g. dict = model.summary()"
+        ),
     ):
-        ltmodel_small = LinearTreeDefinition(
-            wrong_summary_dict, scaled_input_bounds=input_bounds
-        )
+        LinearTreeDefinition(wrong_summary_dict, scaled_input_bounds=input_bounds)
 
 
 @pytest.mark.skipif(not lineartree_available, reason="Need Linear-Tree Package")
@@ -762,4 +768,4 @@ def test_raise_exception_for_wrong_transformation():
         Exception,
         match="Supported transformations are: bigm, mbigm, hull, and custom",
     ):
-        formulation = LinearTreeGDPFormulation(model_def, transformation="hello")
+        LinearTreeGDPFormulation(model_def, transformation="hello")
diff --git a/tests/neuralnet/test_keras.py b/tests/neuralnet/test_keras.py
index 02da81aa..9eda9772 100644
--- a/tests/neuralnet/test_keras.py
+++ b/tests/neuralnet/test_keras.py
@@ -10,19 +10,23 @@
 
 from conftest import get_neural_network_data
 
-from omlt.block import OmltBlock
+from omlt import OmltBlock
 from omlt.neuralnet import FullSpaceNNFormulation, ReducedSpaceNNFormulation
 from omlt.neuralnet.activations import ComplementarityReLUActivation
 from omlt.scaling import OffsetScaling
 
+LESS_NEAR_EQUAL = 1e-3
+NEAR_EQUAL = 1e-4
+VERY_NEAR_EQUAL = 1e-5
+
 
 @pytest.mark.skipif(keras_available, reason="Test only valid when keras not available")
 def test_keras_not_available_exception(datadir):
     with pytest.raises(DeferredImportError):
-        NN = keras.models.load_model(datadir.file("keras_linear_131_relu"))
+        keras.models.load_model(datadir.file("keras_linear_131_relu"))
 
 
-def _test_keras_linear_131(keras_fname, reduced_space=False):
+def _test_keras_linear_131(keras_fname, *, reduced_space=False):
     x, y, x_test = get_neural_network_data("131")
 
     nn = keras.models.load_model(keras_fname, compile=False)
@@ -30,17 +34,19 @@ def _test_keras_linear_131(keras_fname, reduced_space=False):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     if reduced_space:
-        formulation = ReducedSpaceNNFormulation(net)
+        m.neural_net_block.build_formulation(ReducedSpaceNNFormulation(net))
     else:
-        formulation = FullSpaceNNFormulation(net)
-    m.neural_net_block.build_formulation(formulation)
+        m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
 
     nn_outputs = nn.predict(x=x_test)
     for d in range(len(x_test)):
         m.neural_net_block.inputs[0].fix(x_test[d])
         status = pyo.SolverFactory("ipopt").solve(m, tee=False)
         pyo.assert_optimal_termination(status)
-        assert abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0]) < 1e-5
+        assert (
+            abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0])
+            < VERY_NEAR_EQUAL
+        )
 
 
 def _test_keras_mip_relu_131(keras_fname):
@@ -60,7 +66,10 @@ def _test_keras_mip_relu_131(keras_fname):
         m.neural_net_block.inputs[0].fix(x_test[d])
         status = pyo.SolverFactory("cbc").solve(m, tee=False)
         pyo.assert_optimal_termination(status)
-        assert abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0]) < 1e-5
+        assert (
+            abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0])
+            < VERY_NEAR_EQUAL
+        )
 
 
 def _test_keras_complementarity_relu_131(keras_fname):
@@ -81,10 +90,13 @@ def _test_keras_complementarity_relu_131(keras_fname):
         m.neural_net_block.inputs[0].fix(x_test[d])
         status = pyo.SolverFactory("ipopt").solve(m, tee=False)
         pyo.assert_optimal_termination(status)
-        assert abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0]) < 1e-4
+        assert (
+            abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0])
+            < NEAR_EQUAL
+        )
 
 
-def _test_keras_linear_big(keras_fname, reduced_space=False):
+def _test_keras_linear_big(keras_fname, *, reduced_space=False):
     x, y, x_test = get_neural_network_data("131")
 
     nn = keras.models.load_model(keras_fname, compile=False)
@@ -93,17 +105,19 @@ def _test_keras_linear_big(keras_fname, reduced_space=False):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     if reduced_space:
-        formulation = ReducedSpaceNNFormulation(net)
+        m.neural_net_block.build_formulation(ReducedSpaceNNFormulation(net))
     else:
-        formulation = FullSpaceNNFormulation(net)
-    m.neural_net_block.build_formulation(formulation)
+        m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
 
     nn_outputs = nn.predict(x=x_test)
     for d in range(len(x_test)):
         m.neural_net_block.inputs[0].fix(x_test[d])
         status = pyo.SolverFactory("ipopt").solve(m, tee=False)
         pyo.assert_optimal_termination(status)
-        assert abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0]) < 1e-5
+        assert (
+            abs(pyo.value(m.neural_net_block.outputs[0]) - nn_outputs[d][0])
+            < VERY_NEAR_EQUAL
+        )
 
 
 @pytest.mark.skipif(not keras_available, reason="Need keras for this test")
@@ -156,7 +170,7 @@ def test_keras_linear_big_reduced_space(datadir):
 
 
 @pytest.mark.skipif(not keras_available, reason="Need keras for this test")
-def test_scaling_NN_block(datadir):
+def test_scaling_nn_block(datadir):
     NN = keras.models.load_model(datadir.file("keras_linear_131_relu.keras"))
 
     model = pyo.ConcreteModel()
@@ -183,12 +197,14 @@ def test_scaling_NN_block(datadir):
     def obj(mdl):
         return 1
 
-    for x in np.random.normal(1, 0.5, 10):
+    rng = np.random.default_rng()
+
+    for x in rng.normal(1, 0.5, 10):
         model.nn.inputs[0].fix(x)
-        result = pyo.SolverFactory("cbc").solve(model, tee=False)
+        pyo.SolverFactory("cbc").solve(model, tee=False)
 
         x_s = (x - scale_x[0]) / scale_x[1]
         y_s = NN.predict([np.array((x_s,))])
         y = y_s * scale_y[1] + scale_y[0]
 
-        assert y - pyo.value(model.nn.outputs[0]) <= 1e-3
+        assert y - pyo.value(model.nn.outputs[0]) <= LESS_NEAR_EQUAL
diff --git a/tests/neuralnet/test_layer.py b/tests/neuralnet/test_layer.py
index 4a8944ac..2f42fc59 100644
--- a/tests/neuralnet/test_layer.py
+++ b/tests/neuralnet/test_layer.py
@@ -133,16 +133,16 @@ def test_gnn_layer_with_input_index_mapper():
     y3 = np.array([[[-6, 4, 0, -12, 11, 1, -5, 5, 2], [-1, 0, 1, -1, 0, 1, -1, 0, 1]]])
     assert np.array_equal(layer._eval_with_adjacency(inputs, A3), y3)
 
-    with pytest.raises(ValueError) as excinfo:
-        layer = GNNLayer([5], [9], weights, biases, N=3)
-    assert (
-        str(excinfo.value)
-        == "Input size must equal to the number of nodes multiplied by the number of input node features"
+    expected_msg = (
+        "Input size must equal to the number of nodes multiplied by the number of"
+        " input node features"
     )
+    with pytest.raises(ValueError, match=expected_msg):
+        layer = GNNLayer([5], [9], weights, biases, N=3)
 
-    with pytest.raises(ValueError) as excinfo:
-        layer = GNNLayer([6], [8], weights, biases, N=3)
-    assert (
-        str(excinfo.value)
-        == "Output size must equal to the number of nodes multiplied by the number of output node features"
+    expected_msg = (
+        "Output size must equal to the number of nodes multiplied by the number of"
+        " output node features"
     )
+    with pytest.raises(ValueError, match=expected_msg):
+        layer = GNNLayer([6], [8], weights, biases, N=3)
diff --git a/tests/neuralnet/test_network_definition.py b/tests/neuralnet/test_network_definition.py
index f3cadcb7..609e12ec 100644
--- a/tests/neuralnet/test_network_definition.py
+++ b/tests/neuralnet/test_network_definition.py
@@ -1,18 +1,22 @@
+import re
+
 import numpy as np
 import pyomo.environ as pyo
 import pytest
 
-from omlt.block import OmltBlock
+from omlt import OmltBlock
 from omlt.neuralnet.layer import DenseLayer, InputLayer
 from omlt.neuralnet.network_definition import NetworkDefinition
 from omlt.neuralnet.nn_formulation import FullSpaceNNFormulation
 from omlt.scaling import OffsetScaling
 
+ALMOST_EXACTLY_EQUAL = 1e-8
+
 
-# TODO: Build more tests with different activations and edge cases
 def test_two_node_full_space():
-    """
-            1           1
+    """Two node full space network.
+
+    1           1
     x0 -------- (1) --------- (3)
      |                   /
      |                  /
@@ -57,14 +61,16 @@ def test_two_node_full_space():
     m.obj1 = pyo.Objective(expr=0)
     status = pyo.SolverFactory("cbc").solve(m, tee=True)
     pyo.assert_optimal_termination(status)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10.0) < 1e-8
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2.0) < 1e-8
+    assert (
+        abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10.0) < ALMOST_EXACTLY_EQUAL
+    )
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2.0) < ALMOST_EXACTLY_EQUAL
 
     m.neural_net_block.inputs[0].fix(1)
     status = pyo.SolverFactory("cbc").solve(m, tee=False)
     pyo.assert_optimal_termination(status)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1.0) < 1e-8
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0.0) < 1e-8
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1.0) < ALMOST_EXACTLY_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0.0) < ALMOST_EXACTLY_EQUAL
 
 
 def test_input_bounds_no_scaler():
@@ -75,7 +81,7 @@ def test_input_bounds_no_scaler():
     assert net.scaled_input_bounds == scaled_input_bounds
 
 
-def test_input_bound_scaling_1D():
+def test_input_bound_scaling_1d():
     xoffset = {i: float(i) for i in range(3)}
     xfactor = {i: 0.5 * (i + 1) for i in range(3)}
     yoffset = {i: -0.25 * i for i in range(2)}
@@ -91,7 +97,7 @@ def test_input_bound_scaling_1D():
     scaled_input_bounds = {0: (0, 5), 1: (-2, 2), 2: (0, 1)}
     unscaled_input_bounds = {}
 
-    for k in scaled_input_bounds.keys():
+    for k in scaled_input_bounds:
         lb, ub = scaled_input_bounds[k]
         unscaled_input_bounds[k] = (
             (lb * xfactor[k]) + xoffset[k],
@@ -104,7 +110,7 @@ def test_input_bound_scaling_1D():
     assert net.scaled_input_bounds == scaled_input_bounds
 
 
-def test_input_bound_scaling_multiD():
+def test_input_bound_scaling_multi_d():
     # Multidimensional test
     xoffset = {(0, i): float(i) for i in range(3)}
     xfactor = {(0, i): 0.5 * (i + 1) for i in range(3)}
@@ -121,7 +127,7 @@ def test_input_bound_scaling_multiD():
     scaled_input_bounds = {(0, 0): (0, 5), (0, 1): (-2, 2), (0, 2): (0, 1)}
     unscaled_input_bounds = {}
 
-    for k in scaled_input_bounds.keys():
+    for k in scaled_input_bounds:
         lb, ub = scaled_input_bounds[k]
         unscaled_input_bounds[k] = (
             (lb * xfactor[k]) + xoffset[k],
@@ -135,9 +141,7 @@ def test_input_bound_scaling_multiD():
 
 
 def _test_add_invalid_edge(direction):
-    """
-    direction can be "in" or "out"
-    """
+    """Direction can be "in" or "out"."""
     net = NetworkDefinition(scaled_input_bounds=[(-10.0, 10.0)])
 
     input_layer = InputLayer([1])
@@ -162,15 +166,19 @@ def _test_add_invalid_edge(direction):
     )
 
     if direction == "in":
-        with pytest.raises(ValueError) as excinfo:
+        expected_msg = re.escape(
+            "Inbound layer DenseLayer(input_size=[1], output_size=[1]) not"
+            " found in network."
+        )
+        with pytest.raises(ValueError, match=expected_msg):
             net.add_edge(input_layer, dense_layer_1)
-        expected_msg = f"Inbound layer {dense_layer_1} not found in network."
-        assert str(excinfo.value) == expected_msg
     elif direction == "out":
-        with pytest.raises(ValueError) as excinfo:
+        expected_msg = re.escape(
+            "Outbound layer DenseLayer(input_size=[1], output_size=[1]) not"
+            " found in network."
+        )
+        with pytest.raises(ValueError, match=expected_msg):
             net.add_edge(dense_layer_1, dense_layer_0)
-        expected_msg = f"Outbound layer {dense_layer_1} not found in network."
-        assert str(excinfo.value) == expected_msg
 
 
 def test_add_invalid_edge():
diff --git a/tests/neuralnet/test_nn_formulation.py b/tests/neuralnet/test_nn_formulation.py
index 577a5f45..9d0ed37c 100644
--- a/tests/neuralnet/test_nn_formulation.py
+++ b/tests/neuralnet/test_nn_formulation.py
@@ -1,3 +1,7 @@
+import re
+from functools import partial
+from typing import TYPE_CHECKING
+
 import numpy as np
 import pyomo.environ as pyo
 import pytest
@@ -30,10 +34,30 @@
 )
 from omlt.neuralnet.layers.reduced_space import reduced_space_dense_layer
 
+if TYPE_CHECKING:
+    from omlt.formulation import _PyomoFormulation
+
+formulations = {
+    "FullSpace": FullSpaceNNFormulation,
+    "ReducedSpace": ReducedSpaceNNFormulation,
+    "relu": ReluPartitionFormulation,
+}
+
+NEAR_EQUAL = 1e-6
+FULLSPACE_SMOOTH_VARS = 15
+FULLSPACE_SMOOTH_CONSTRAINTS = 14
+FULLSPACE_RELU_VARS = 19
+FULLSPACE_RELU_CONSTRAINTS = 26
+REDUCED_VARS = 6
+REDUCED_CONSTRAINTS = 5
+THREE_NODE_VARS = 81
+THREE_NODE_CONSTRAINTS = 120
+
 
 def two_node_network(activation, input_value):
-    """
-            1           1
+    """Two node network.
+
+    1           1
     x0 -------- (1) --------- (3)
      |                   /
      |                  /
@@ -75,158 +99,157 @@ def two_node_network(activation, input_value):
     return net, y
 
 
-def _test_two_node_FullSpaceNNFormulation_smooth(activation):
+def _test_two_node_full_space_nn_formulation_smooth(activation):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     net, y = two_node_network(activation, -2.0)
     m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
-    assert m.nvariables() == 15
-    assert m.nconstraints() == 14
+    assert m.nvariables() == FULLSPACE_SMOOTH_VARS
+    assert m.nconstraints() == FULLSPACE_SMOOTH_CONSTRAINTS
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
 
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
     net, y = two_node_network(activation, 1.0)
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
 
-def _test_two_node_FullSpaceNNFormulation_relu():
+def _test_two_node_full_space_nn_formulation_relu():
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     net, y = two_node_network("relu", -2.0)
     m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
-    assert m.nvariables() == 19
-    assert m.nconstraints() == 26
+    assert m.nvariables() == FULLSPACE_RELU_VARS
+    assert m.nconstraints() == FULLSPACE_RELU_CONSTRAINTS
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
+    pyo.SolverFactory("cbc").solve(m, tee=False)
 
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
     net, y = two_node_network("relu", 1.0)
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
 
-def _test_two_node_FullSpaceSmoothNNFormulation(activation):
+def _test_two_node_full_space_smooth_nn_formulation(activation):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     net, y = two_node_network(activation, -2.0)
     m.neural_net_block.build_formulation(FullSpaceSmoothNNFormulation(net))
-    assert m.nvariables() == 15
-    assert m.nconstraints() == 14
+    assert m.nvariables() == FULLSPACE_SMOOTH_VARS
+    assert m.nconstraints() == FULLSPACE_SMOOTH_CONSTRAINTS
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
 
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
     net, y = two_node_network(activation, 1.0)
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
 
-def _test_two_node_ReducedSpaceNNFormulation(activation):
+def _test_two_node_reduced_space_nn_formulation(activation):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     net, y = two_node_network(activation, -2.0)
     m.neural_net_block.build_formulation(ReducedSpaceNNFormulation(net))
-    assert m.nvariables() == 6
-    assert m.nconstraints() == 5
+    assert m.nvariables() == REDUCED_VARS
+    assert m.nconstraints() == REDUCED_CONSTRAINTS
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
 
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
     net, y = two_node_network(activation, 1.0)
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
 
-def _test_two_node_ReducedSpaceSmoothNNFormulation(activation):
+def _test_two_node_reduced_space_smooth_nn_formulation(activation):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     net, y = two_node_network(activation, -2.0)
     m.neural_net_block.build_formulation(ReducedSpaceSmoothNNFormulation(net))
-    assert m.nvariables() == 6
-    assert m.nconstraints() == 5
+    assert m.nvariables() == REDUCED_VARS
+    assert m.nconstraints() == REDUCED_CONSTRAINTS
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
 
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
     net, y = two_node_network(activation, 1.0)
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < 1e-6
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < 1e-6
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - y[0, 0]) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - y[0, 1]) < NEAR_EQUAL
 
 
-def test_two_node_ReducedSpaceNNFormulation():
-    _test_two_node_ReducedSpaceNNFormulation("linear")
-    _test_two_node_ReducedSpaceNNFormulation("sigmoid")
-    _test_two_node_ReducedSpaceNNFormulation("tanh")
+def test_two_node_reduced_space_nn_formulation():
+    _test_two_node_reduced_space_nn_formulation("linear")
+    _test_two_node_reduced_space_nn_formulation("sigmoid")
+    _test_two_node_reduced_space_nn_formulation("tanh")
 
 
-def test_two_node_ReducedSpaceSmoothNNFormulation():
-    _test_two_node_ReducedSpaceSmoothNNFormulation("linear")
-    _test_two_node_ReducedSpaceSmoothNNFormulation("sigmoid")
-    _test_two_node_ReducedSpaceSmoothNNFormulation("tanh")
+def test_two_node_reduced_space_smooth_nn_formulation():
+    _test_two_node_reduced_space_smooth_nn_formulation("linear")
+    _test_two_node_reduced_space_smooth_nn_formulation("sigmoid")
+    _test_two_node_reduced_space_smooth_nn_formulation("tanh")
 
 
-def test_two_node_ReducedSpaceSmoothNNFormulation_invalid_activation():
-    with pytest.raises(ValueError) as excinfo:
-        _test_two_node_ReducedSpaceSmoothNNFormulation("relu")
+def test_two_node_reduced_space_smooth_nn_formulation_invalid_activation():
     expected_msg = "Activation relu is not supported by this formulation."
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        _test_two_node_reduced_space_smooth_nn_formulation("relu")
 
 
-def test_two_node_FullSpaceNNFormulation():
-    _test_two_node_FullSpaceNNFormulation_smooth("linear")
-    _test_two_node_FullSpaceNNFormulation_smooth("sigmoid")
-    _test_two_node_FullSpaceNNFormulation_smooth("tanh")
-    _test_two_node_FullSpaceNNFormulation_relu()
+def test_two_node_full_space_nn_formulation():
+    _test_two_node_full_space_nn_formulation_smooth("linear")
+    _test_two_node_full_space_nn_formulation_smooth("sigmoid")
+    _test_two_node_full_space_nn_formulation_smooth("tanh")
+    _test_two_node_full_space_nn_formulation_relu()
 
 
-def test_two_node_FullSpaceSmoothNNFormulation():
-    _test_two_node_FullSpaceSmoothNNFormulation("linear")
-    _test_two_node_FullSpaceSmoothNNFormulation("sigmoid")
-    _test_two_node_FullSpaceSmoothNNFormulation("tanh")
+def test_two_node_full_space_smooth_nn_formulation():
+    _test_two_node_full_space_smooth_nn_formulation("linear")
+    _test_two_node_full_space_smooth_nn_formulation("sigmoid")
+    _test_two_node_full_space_smooth_nn_formulation("tanh")
 
 
-def test_two_node_FullSpaceSmoothNNFormulation_invalid_activation():
-    with pytest.raises(ValueError) as excinfo:
-        _test_two_node_FullSpaceSmoothNNFormulation("relu")
+def test_two_node_full_space_smooth_nn_formulation_invalid_activation():
     expected_msg = "Activation relu is not supported by this formulation."
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        _test_two_node_full_space_smooth_nn_formulation("relu")
 
 
 @pytest.mark.skip(reason="Need to add checks on layer types")
 def test_invalid_layer_type():
-    raise AssertionError("Layer type test not yet implemented")
+    msg = "Layer type test not yet implemented"
+    raise AssertionError(msg)
 
 
 def _maxpool_conv_network(inputs):
@@ -305,7 +328,7 @@ def _maxpool_conv_network(inputs):
     return net, y
 
 
-def test_maxpool_FullSpaceNNFormulation():
+def test_maxpool_full_space_nn_formulation():
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
 
@@ -326,8 +349,6 @@ def test_maxpool_FullSpaceNNFormulation():
 
     net, y = _maxpool_conv_network(inputs)
     m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
-    # assert m.nvariables() == 15
-    # assert m.nconstraints() == 14
 
     for inputs_d in range(inputs.shape[0]):
         for inputs_r in range(inputs.shape[1]):
@@ -337,81 +358,51 @@ def test_maxpool_FullSpaceNNFormulation():
                     inputs_d, inputs_r, inputs_c
                 ]
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0, 0]) - y[0, 0, 0]) < 1e-6
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0, 0]) - y[0, 0, 0]) < NEAR_EQUAL
 
 
 def _test_formulation_initialize_extra_input(network_formulation):
-    """
-    network_formulation can be:
-    'FullSpace',
-    'ReducedSpace'
-    """
+    """network_formulation can be:'FullSpace', 'ReducedSpace'."""
     net, y = two_node_network("linear", -2.0)
     extra_input = InputLayer([1])
     net.add_layer(extra_input)
-    with pytest.raises(ValueError) as excinfo:
-        if network_formulation == "FullSpace":
+
+    expected_msg = "Multiple input layers are not currently supported."
+    if network_formulation == "FullSpace":
+        with pytest.raises(ValueError, match=expected_msg):
             FullSpaceNNFormulation(net)
-        elif network_formulation == "ReducedSpace":
+    elif network_formulation == "ReducedSpace":
+        with pytest.raises(ValueError, match=expected_msg):
             ReducedSpaceNNFormulation(net)
-    expected_msg = "Multiple input layers are not currently supported."
-    assert str(excinfo.value) == expected_msg
 
 
 def _test_formulation_added_extra_input(network_formulation):
-    """
-    network_formulation can be:
-    'FullSpace',
-    'ReducedSpace'
-    'relu'
-    """
+    """network_formulation can be:'FullSpace', 'ReducedSpace', 'relu'."""
     net, y = two_node_network("linear", -2.0)
     extra_input = InputLayer([1])
-    if network_formulation == "FullSpace":
-        formulation = FullSpaceNNFormulation(net)
-    elif network_formulation == "ReducedSpace":
-        formulation = ReducedSpaceNNFormulation(net)
-    elif network_formulation == "relu":
-        formulation = ReluPartitionFormulation(net)
+    formulation: _PyomoFormulation = formulations[network_formulation](net)
     net.add_layer(extra_input)
-    with pytest.raises(ValueError) as excinfo:
-        formulation.input_indexes
     expected_msg = "Multiple input layers are not currently supported."
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        _ = formulation.input_indexes
 
 
 def _test_formulation_build_extra_input(network_formulation):
-    """
-    network_formulation can be:
-    'FullSpace',
-    'ReducedSpace'
-    'relu'
-    """
+    """network_formulation can be:'FullSpace', 'ReducedSpace', 'relu'."""
     net, y = two_node_network("linear", -2.0)
     extra_input = InputLayer([1])
-    if network_formulation == "FullSpace":
-        formulation = FullSpaceNNFormulation(net)
-    elif network_formulation == "ReducedSpace":
-        formulation = ReducedSpaceNNFormulation(net)
-    elif network_formulation == "relu":
-        formulation = ReluPartitionFormulation(net)
+    formulation: _PyomoFormulation = formulations[network_formulation](net)
     net.add_layer(extra_input)
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
-    with pytest.raises(ValueError) as excinfo:
-        m.neural_net_block.build_formulation(formulation)
     expected_msg = "Multiple input layers are not currently supported."
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        m.neural_net_block.build_formulation(formulation)
 
 
 def _test_formulation_added_extra_output(network_formulation):
-    """
-    network_formulation can be:
-    'FullSpace',
-    'ReducedSpace'
-    'relu'
-    """
+    """network_formulation can be: 'FullSpace', 'ReducedSpace' 'relu'."""
     net, y = two_node_network("linear", -2.0)
     extra_output = DenseLayer(
         [1, 2],
@@ -420,26 +411,16 @@ def _test_formulation_added_extra_output(network_formulation):
         weights=np.array([[1.0, 0.0], [5.0, 1.0]]),
         biases=np.array([3.0, 4.0]),
     )
-    if network_formulation == "FullSpace":
-        formulation = FullSpaceNNFormulation(net)
-    elif network_formulation == "ReducedSpace":
-        formulation = ReducedSpaceNNFormulation(net)
-    elif network_formulation == "relu":
-        formulation = ReluPartitionFormulation(net)
+    formulation: _PyomoFormulation = formulations[network_formulation](net)
     net.add_layer(extra_output)
     net.add_edge(list(net.layers)[-2], extra_output)
-    with pytest.raises(ValueError) as excinfo:
-        formulation.output_indexes
     expected_msg = "Multiple output layers are not currently supported."
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        _ = formulation.output_indexes
 
 
 def _test_formulation_initialize_extra_output(network_formulation):
-    """
-    network_formulation can be:
-    'FullSpace',
-    'ReducedSpace'
-    """
+    """network_formulation can be: 'FullSpace', 'ReducedSpace'."""
     net, y = two_node_network("linear", -2.0)
     extra_output = DenseLayer(
         [1, 2],
@@ -450,16 +431,17 @@ def _test_formulation_initialize_extra_output(network_formulation):
     )
     net.add_layer(extra_output)
     net.add_edge(list(net.layers)[-2], extra_output)
-    with pytest.raises(ValueError) as excinfo:
-        if network_formulation == "FullSpace":
+
+    expected_msg = "Multiple output layers are not currently supported."
+    if network_formulation == "FullSpace":
+        with pytest.raises(ValueError, match=expected_msg):
             FullSpaceNNFormulation(net)
-        elif network_formulation == "ReducedSpace":
+    elif network_formulation == "ReducedSpace":
+        with pytest.raises(ValueError, match=expected_msg):
             ReducedSpaceNNFormulation(net)
-    expected_msg = "Multiple output layers are not currently supported."
-    assert str(excinfo.value) == expected_msg
 
 
-def test_FullSpaceNNFormulation_invalid_network():
+def test_full_space_nn_formulation_invalid_network():
     _test_formulation_initialize_extra_input("FullSpace")
     _test_formulation_added_extra_input("FullSpace")
     _test_formulation_build_extra_input("FullSpace")
@@ -467,15 +449,13 @@ def test_FullSpaceNNFormulation_invalid_network():
     _test_formulation_added_extra_output("FullSpace")
 
 
-def test_ReducedSpaceNNFormulation_invalid_network():
-    # _test_formulation_initialize_extra_input("ReducedSpace")
+def test_reduced_space_nn_formulation_invalid_network():
     _test_formulation_added_extra_input("ReducedSpace")
     _test_formulation_build_extra_input("ReducedSpace")
-    # _test_formulation_initialize_extra_output("ReducedSpace")
     _test_formulation_added_extra_output("ReducedSpace")
 
 
-def test_ReluPartitionFormulation_invalid_network():
+def test_relu_partition_formulation_invalid_network():
     _test_formulation_added_extra_input("relu")
     _test_formulation_build_extra_input("relu")
     _test_formulation_added_extra_output("relu")
@@ -489,19 +469,18 @@ def _test_dense_layer_multiple_predecessors(layer_type):
     test_layer = list(net.layers)[2]
     net.add_layer(extra_input)
     net.add_edge(extra_input, test_layer)
-    with pytest.raises(ValueError) as excinfo:
-        if layer_type == "PartitionBased":
+
+    expected_msg = re.escape(f"Layer {test_layer} has multiple predecessors.")
+    if layer_type == "PartitionBased":
+        with pytest.raises(ValueError, match=expected_msg):
             partition_based_dense_relu_layer(m, net, m, test_layer, None)
-        elif layer_type == "ReducedSpace":
+    elif layer_type == "ReducedSpace":
+        with pytest.raises(ValueError, match=expected_msg):
             reduced_space_dense_layer(m, net, m, test_layer, None)
-    expected_msg = f"Layer {test_layer} has multiple predecessors."
-    assert str(excinfo.value) == expected_msg
 
 
 def _test_dense_layer_no_predecessors(layer_type):
-    """
-    Layer type can be "ReducedSpace", or "PartitionBased".
-    """
+    """Layer type can be "ReducedSpace", or "PartitionBased"."""
     m = pyo.ConcreteModel()
     net = NetworkDefinition(scaled_input_bounds=[(-10.0, 10.0)])
 
@@ -513,13 +492,16 @@ def _test_dense_layer_no_predecessors(layer_type):
         biases=np.array([1.0, 2.0]),
     )
     net.add_layer(test_layer)
-    with pytest.raises(ValueError) as excinfo:
-        if layer_type == "PartitionBased":
+
+    expected_msg = re.escape(
+        f"Layer {test_layer} is not an input layer, but has no predecessors."
+    )
+    if layer_type == "PartitionBased":
+        with pytest.raises(ValueError, match=expected_msg):
             partition_based_dense_relu_layer(m, net, m, test_layer, None)
-        elif layer_type == "ReducedSpace":
+    elif layer_type == "ReducedSpace":
+        with pytest.raises(ValueError, match=expected_msg):
             reduced_space_dense_layer(m, net, m, test_layer, None)
-    expected_msg = f"Layer {test_layer} is not an input layer, but has no predecessors."
-    assert str(excinfo.value) == expected_msg
 
 
 def test_partition_based_dense_layer_predecessors():
@@ -544,14 +526,13 @@ def test_partition_based_unbounded_below():
     prev_layer_block = m.neural_net_block.layer[prev_layer_id]
     prev_layer_block.z.setlb(-interval.inf)
 
-    split_func = lambda w: default_partition_split_func(w, 2)
+    split_func = partial(default_partition_split_func, n=2)
 
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg = "Expression is unbounded below."
+    with pytest.raises(ValueError, match=expected_msg):
         partition_based_dense_relu_layer(
             m.neural_net_block, net, m.neural_net_block, test_layer, split_func
         )
-    expected_msg = "Expression is unbounded below."
-    assert str(excinfo.value) == expected_msg
 
 
 def test_partition_based_unbounded_above():
@@ -566,14 +547,13 @@ def test_partition_based_unbounded_above():
     prev_layer_block = m.neural_net_block.layer[prev_layer_id]
     prev_layer_block.z.setub(interval.inf)
 
-    split_func = lambda w: default_partition_split_func(w, 2)
+    split_func = partial(default_partition_split_func, n=2)
 
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg = "Expression is unbounded above."
+    with pytest.raises(ValueError, match=expected_msg):
         partition_based_dense_relu_layer(
             m.neural_net_block, net, m.neural_net_block, test_layer, split_func
         )
-    expected_msg = "Expression is unbounded above."
-    assert str(excinfo.value) == expected_msg
 
 
 def test_partition_based_bias_unbounded_below():
@@ -586,14 +566,13 @@ def test_partition_based_bias_unbounded_below():
     m.neural_net_block.build_formulation(formulation)
 
     test_layer.biases[0] = -interval.inf
-    split_func = lambda w: default_partition_split_func(w, 2)
+    split_func = partial(default_partition_split_func, n=2)
 
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg = "Expression is unbounded below."
+    with pytest.raises(ValueError, match=expected_msg):
         partition_based_dense_relu_layer(
             m.neural_net_block, net, m.neural_net_block, test_layer, split_func
         )
-    expected_msg = "Expression is unbounded below."
-    assert str(excinfo.value) == expected_msg
 
 
 def test_partition_based_bias_unbounded_above():
@@ -606,14 +585,12 @@ def test_partition_based_bias_unbounded_above():
     m.neural_net_block.build_formulation(formulation)
 
     test_layer.biases[0] = interval.inf
-    split_func = lambda w: default_partition_split_func(w, 2)
-
-    with pytest.raises(ValueError) as excinfo:
+    split_func = partial(default_partition_split_func, n=2)
+    expected_msg = "Expression is unbounded above."
+    with pytest.raises(ValueError, match=expected_msg):
         partition_based_dense_relu_layer(
             m.neural_net_block, net, m.neural_net_block, test_layer, split_func
         )
-    expected_msg = "Expression is unbounded above."
-    assert str(excinfo.value) == expected_msg
 
 
 def test_fullspace_internal_extra_input():
@@ -626,10 +603,9 @@ def test_fullspace_internal_extra_input():
     m.neural_net_block.build_formulation(formulation)
     net.add_layer(extra_input)
     net.add_edge(extra_input, test_layer)
-    with pytest.raises(ValueError) as excinfo:
-        _input_layer_and_block(m.neural_net_block, net, test_layer)
     expected_msg = "Multiple input layers are not currently supported."
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        _input_layer_and_block(m.neural_net_block, net, test_layer)
 
 
 def test_conv2d_extra_activation():
@@ -673,10 +649,14 @@ def test_conv2d_extra_activation():
     )
     net.add_layer(maxpool_layer_1)
     net.add_edge(conv_layer_2, maxpool_layer_1)
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg = re.escape(
+        "Activation is applied after convolution layer, but the successor maxpooling"
+        " layer PoolingLayer(input_size=[1, 3, 4], output_size=[1, 1, 2],"
+        " strides=[2, 2], kernel_shape=[3, 2]), pool_func_name=max has an activation"
+        " function also."
+    )
+    with pytest.raises(ValueError, match=expected_msg):
         m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
-    expected_msg = """Activation is applied after convolution layer, but the successor max pooling layer PoolingLayer(input_size=[1, 3, 4], output_size=[1, 1, 2], strides=[2, 2], kernel_shape=[3, 2]), pool_func_name=max has an activation function also."""
-    assert str(excinfo.value) == expected_msg
 
 
 def test_maxpool2d_bad_input_activation():
@@ -730,13 +710,14 @@ def test_maxpool2d_bad_input_activation():
     m.neural_net_block.build_formulation(FullSpaceNNFormulation(net))
 
     conv_layer_2.activation = "relu"
-
-    with pytest.raises(ValueError) as excinfo:
+    expected_msg = (
+        "Non-increasing activation functions on the preceding convolutional"
+        " layer are not supported."
+    )
+    with pytest.raises(ValueError, match=expected_msg):
         full_space_maxpool2d_layer(
             m.neural_net_block, net, m.neural_net_block, maxpool_layer_1
         )
-    expected_msg = """Non-increasing activation functions on the preceding convolutional layer are not supported."""
-    assert str(excinfo.value) == expected_msg
 
 
 def test_maxpool2d_bad_input_layer():
@@ -876,15 +857,15 @@ def _test_three_node_graph_neural_network(graph_type):
     for i in range(6):
         m.nn.inputs[i].fix(inputs[i])
 
-    assert m.nvariables() == 81
-    assert m.nconstraints() == 120
+    assert m.nvariables() == THREE_NODE_VARS
+    assert m.nconstraints() == THREE_NODE_CONSTRAINTS
 
     m.obj = pyo.Objective(expr=0)
 
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
+    pyo.SolverFactory("cbc").solve(m, tee=False)
 
     for i in range(9):
-        assert abs(pyo.value(m.nn.outputs[i]) - y[i]) < 1e-6
+        assert abs(pyo.value(m.nn.outputs[i]) - y[i]) < NEAR_EQUAL
 
     for i in range(6):
         for j in range(3):
@@ -893,7 +874,7 @@ def _test_three_node_graph_neural_network(graph_type):
                     pyo.value(m.nn.layer[m.nn.layers.at(1)].zbar[i, j])
                     - pyo.value(m.nn.A[i // 2, j]) * inputs[i]
                 )
-                < 1e-6
+                < NEAR_EQUAL
             )
 
 
diff --git a/tests/neuralnet/test_onnx.py b/tests/neuralnet/test_onnx.py
index bb9b9dfd..7c8014e3 100644
--- a/tests/neuralnet/test_onnx.py
+++ b/tests/neuralnet/test_onnx.py
@@ -8,13 +8,14 @@
 
 if onnx_available:
     import onnxruntime as ort
+
     from omlt.io.onnx import (
         load_onnx_neural_network,
         load_onnx_neural_network_with_bounds,
         write_onnx_model_with_bounds,
     )
 
-from pyomo.environ import *
+from pyomo.environ import ConcreteModel, SolverFactory, value
 
 from omlt import OffsetScaling, OmltBlock
 from omlt.neuralnet import FullSpaceNNFormulation
@@ -23,7 +24,7 @@
 @pytest.mark.skipif(onnx_available, reason="Test only valid when onnx not available")
 def test_onnx_not_available_exception(datadir):
     with pytest.raises(DeferredImportError):
-        neural_net = onnx.load(datadir.file("keras_linear_131_relu.onnx"))
+        onnx.load(datadir.file("keras_linear_131_relu.onnx"))
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
@@ -58,11 +59,11 @@ def obj(mdl):
 
     for x in [-0.25, 0.0, 0.25, 1.5]:
         model.nn.inputs.fix(x)
-        result = SolverFactory("cbc").solve(model, tee=False)
+        SolverFactory("cbc").solve(model, tee=False)
 
         x_s = (x - scale_x[0]) / scale_x[1]
-        x_s = np.array([[x_s]], dtype=np.float32)
-        outputs = net_regression.run(None, {"dense_input:0": x_s})
+        x_s_arr = np.array([[x_s]], dtype=np.float32)
+        outputs = net_regression.run(None, {"dense_input:0": x_s_arr})
         y_s = outputs[0][0, 0]
         y = y_s * scale_y[1] + scale_y[0]
 
@@ -101,11 +102,11 @@ def obj(mdl):
 
     for x in [-0.25, 0.0, 0.25, 1.5]:
         model.nn.inputs.fix(x)
-        result = SolverFactory("cbc").solve(model, tee=False)
+        SolverFactory("cbc").solve(model, tee=False)
 
         x_s = (x - scale_x[0]) / scale_x[1]
-        x_s = np.array([[x_s]], dtype=np.float32)
-        outputs = net_regression.run(None, {"dense_input:0": x_s})
+        x_s_arr = np.array([[x_s]], dtype=np.float32)
+        outputs = net_regression.run(None, {"dense_input:0": x_s_arr})
         y_s = outputs[0][0, 0]
         y = y_s * scale_y[1] + scale_y[0]
 
@@ -145,11 +146,11 @@ def obj(mdl):
 
     for x in [-0.25, 0.0, 0.25, 1.5]:
         model.nn.inputs.fix(x)
-        result = SolverFactory("ipopt").solve(model, tee=False)
+        SolverFactory("ipopt").solve(model, tee=False)
 
         x_s = (x - scale_x[0]) / scale_x[1]
-        x_s = np.array([[x_s]], dtype=np.float32)
-        outputs = net_regression.run(None, {"dense_2_input:0": x_s})
+        x_s_arr = np.array([[x_s]], dtype=np.float32)
+        outputs = net_regression.run(None, {"dense_2_input:0": x_s_arr})
         y_s = outputs[0][0, 0]
         y = y_s * scale_y[1] + scale_y[0]
 
@@ -159,12 +160,12 @@ def obj(mdl):
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
 def test_onnx_bounds_loader_writer(datadir):
     onnx_model = onnx.load(datadir.file("keras_conv_7x7_relu.onnx"))
-    scaled_input_bounds = dict()
+    scaled_input_bounds = {}
     for i in range(7):
         for j in range(7):
             scaled_input_bounds[0, i, j] = (0.0, 1.0)
     with tempfile.NamedTemporaryFile(suffix=".onnx") as f:
         write_onnx_model_with_bounds(f.name, onnx_model, scaled_input_bounds)
         net = load_onnx_neural_network_with_bounds(f.name)
-    for key, value in net.scaled_input_bounds.items():
-        assert scaled_input_bounds[key] == value
+    for key, val in net.scaled_input_bounds.items():
+        assert scaled_input_bounds[key] == val
diff --git a/tests/neuralnet/test_relu.py b/tests/neuralnet/test_relu.py
index 15ba97d1..8a842f81 100644
--- a/tests/neuralnet/test_relu.py
+++ b/tests/neuralnet/test_relu.py
@@ -2,7 +2,7 @@
 import pyomo.environ as pyo
 import pytest
 
-from omlt.block import OmltBlock
+from omlt import OmltBlock
 from omlt.dependencies import onnx_available
 from omlt.neuralnet import (
     FullSpaceNNFormulation,
@@ -12,7 +12,7 @@
 )
 from omlt.neuralnet.activations import ComplementarityReLUActivation
 
-# TODO: Add tests for single dimensional outputs as well
+NEAR_EQUAL = 1e-3
 
 
 def test_two_node_bigm(two_node_network_relu):
@@ -24,17 +24,17 @@ def test_two_node_bigm(two_node_network_relu):
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < 1e-3
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < NEAR_EQUAL
 
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < 1e-3
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < NEAR_EQUAL
 
 
-def test_two_node_ReluBigMFormulation(two_node_network_relu):
+def test_two_node_relu_big_m_formulation(two_node_network_relu):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     formulation = ReluBigMFormulation(two_node_network_relu)
@@ -43,14 +43,14 @@ def test_two_node_ReluBigMFormulation(two_node_network_relu):
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < 1e-3
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < NEAR_EQUAL
 
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < 1e-3
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < NEAR_EQUAL
 
 
 def test_two_node_complementarity(two_node_network_relu):
@@ -64,17 +64,17 @@ def test_two_node_complementarity(two_node_network_relu):
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < 1e-3
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < NEAR_EQUAL
 
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < 1e-3
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < NEAR_EQUAL
 
 
-def test_two_node_ReluComplementarityFormulation(two_node_network_relu):
+def test_two_node_relu_complementarity_formulation(two_node_network_relu):
     m = pyo.ConcreteModel()
     m.neural_net_block = OmltBlock()
     formulation = ReluComplementarityFormulation(two_node_network_relu)
@@ -82,17 +82,17 @@ def test_two_node_ReluComplementarityFormulation(two_node_network_relu):
 
     m.neural_net_block.inputs[0].fix(-2)
     m.obj1 = pyo.Objective(expr=0)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < 1e-3
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < NEAR_EQUAL
 
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("ipopt").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < 1e-3
+    pyo.SolverFactory("ipopt").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < NEAR_EQUAL
 
 
-def test_two_node_ReluPartitionFormulation(two_node_network_relu):
+def test_two_node_relu_partition_formulation(two_node_network_relu):
     m = pyo.ConcreteModel()
 
     m.neural_net_block = OmltBlock()
@@ -101,18 +101,18 @@ def test_two_node_ReluPartitionFormulation(two_node_network_relu):
     m.obj1 = pyo.Objective(expr=0)
 
     m.neural_net_block.inputs[0].fix(-2)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < 1e-3
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 10) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 2) < NEAR_EQUAL
 
     m.neural_net_block.inputs[0].fix(1)
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < 1e-3
-    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < 1e-3
+    pyo.SolverFactory("cbc").solve(m, tee=False)
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 0]) - 1) < NEAR_EQUAL
+    assert abs(pyo.value(m.neural_net_block.outputs[0, 1]) - 0) < NEAR_EQUAL
 
 
 @pytest.mark.skipif(not onnx_available, reason="Need ONNX for this test")
-def test_conv_ReluBigMFormulation(datadir):
+def test_conv_relu_big_m_formulation(datadir):
     from omlt.io.onnx import load_onnx_neural_network_with_bounds
 
     net = load_onnx_neural_network_with_bounds(datadir.file("keras_conv_7x7_relu.onnx"))
@@ -124,16 +124,16 @@ def test_conv_ReluBigMFormulation(datadir):
     m.obj1 = pyo.Objective(expr=0)
 
     # compute expected output for this input
-    input = np.eye(7, 7).reshape(1, 7, 7)
-    x = input
+    x_start = np.eye(7, 7).reshape(1, 7, 7)
+    x = x_start
     for layer in net.layers:
         x = layer.eval_single_layer(x)
     output = x
 
     for i in range(7):
         for j in range(7):
-            m.neural_net_block.inputs[0, i, j].fix(input[0, i, j])
-    status = pyo.SolverFactory("cbc").solve(m, tee=False)
+            m.neural_net_block.inputs[0, i, j].fix(x_start[0, i, j])
+    pyo.SolverFactory("cbc").solve(m, tee=False)
 
     d, r, c = output.shape
     for i in range(d):
@@ -141,4 +141,4 @@ def test_conv_ReluBigMFormulation(datadir):
             for k in range(c):
                 expected = output[i, j, k]
                 actual = pyo.value(m.neural_net_block.outputs[i, j, k])
-                assert abs(actual - expected) < 1e-3
+                assert abs(actual - expected) < NEAR_EQUAL
diff --git a/tests/neuralnet/train_keras_models.py b/tests/neuralnet/train_keras_models.py
index c2de9dbc..e68e1b1a 100644
--- a/tests/neuralnet/train_keras_models.py
+++ b/tests/neuralnet/train_keras_models.py
@@ -1,16 +1,14 @@
-import pytest
 import keras
-
-# from conftest import get_neural_network_data
+from conftest import get_neural_network_data
 from keras.layers import Conv2D, Dense
-from keras.models import Model, Sequential
-from pyomo.common.fileutils import this_file_dir
+from keras.models import Sequential
 from keras.optimizers import Adamax
+from pyomo.common.fileutils import this_file_dir
 
 from omlt.io import write_onnx_model_with_bounds
 
 
-def train_models():
+def train_models():  # noqa: PLR0915
     x, y, x_test = get_neural_network_data("131")
     nn = Sequential(name="keras_linear_131")
     nn.add(
@@ -37,9 +35,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
     nn.save(this_file_dir() + "/models/keras_linear_131.keras")
 
     x, y, x_test = get_neural_network_data("131")
@@ -69,9 +65,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
     nn.save(this_file_dir() + "/models/keras_linear_131_sigmoid.keras")
 
     x, y, x_test = get_neural_network_data("131")
@@ -102,9 +96,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
     nn.save(
         this_file_dir() + "/models/keras_linear_131_sigmoid_output_activation.keras"
     )
@@ -136,9 +128,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
     nn.save(this_file_dir() + "/models/keras_linear_131_relu.keras")
 
     x, y, x_test = get_neural_network_data("131")
@@ -169,9 +159,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
     nn.save(this_file_dir() + "/models/keras_linear_131_relu_output_activation.keras")
 
     x, y, x_test = get_neural_network_data("131")
@@ -202,9 +190,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
     nn.save(
         this_file_dir()
         + "/models/keras_linear_131_sigmoid_softplus_output_activation.keras"
@@ -263,9 +249,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
     nn.save(this_file_dir() + "/models/big.keras")
 
     x, y, x_test = get_neural_network_data("2353")
@@ -305,9 +289,7 @@ def train_models():
         )
     )
     nn.compile(optimizer=Adamax(learning_rate=0.01), loss="mae")
-    history = nn.fit(
-        x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15
-    )
+    nn.fit(x=x, y=y, validation_split=0.2, batch_size=16, verbose=1, epochs=15)
 
     nn.save(this_file_dir() + "/models/keras_linear_2353.keras")
 
@@ -333,13 +315,13 @@ def train_conv():
 
     onnx_model, _ = tf2onnx.convert.from_keras(nn)
 
-    input_bounds = dict()
+    input_bounds = {}
     for i in range(7):
         for j in range(7):
             input_bounds[0, i, j] = (0.0, 1.0)
     with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
         write_onnx_model_with_bounds(f.name, onnx_model, input_bounds)
-        print(f"Wrote ONNX model with bounds at {f.name}")
+        print(f"Wrote ONNX model with bounds at {f.name}")  # noqa: T201
 
 
 if __name__ == "__main__":
diff --git a/tests/notebooks/test_run_notebooks.py b/tests/notebooks/test_run_notebooks.py
index 9b1361c9..db45f01d 100644
--- a/tests/notebooks/test_run_notebooks.py
+++ b/tests/notebooks/test_run_notebooks.py
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 
 import pytest
 from pyomo.common.fileutils import this_file_dir
@@ -12,11 +13,10 @@
 )
 
 
-# TODO: These will be replaced with stronger tests using testbook soon
 def _test_run_notebook(folder, notebook_fname, n_cells):
     # Change to notebook directory to allow for testing
-    cwd = os.getcwd()
-    os.chdir(os.path.join(this_file_dir(), "..", "..", "docs", "notebooks", folder))
+    cwd = Path.cwd()
+    os.chdir(Path(this_file_dir()) / ".." / ".." / "docs" / "notebooks" / folder)
     with testbook(notebook_fname, timeout=500, execute=True) as tb:
         assert tb.code_cells_executed == n_cells
     os.chdir(cwd)
diff --git a/tests/test_block.py b/tests/test_block.py
index 6c6311f5..8867032e 100644
--- a/tests/test_block.py
+++ b/tests/test_block.py
@@ -3,8 +3,11 @@
 
 from omlt import OmltBlock
 
+INPUTS_LENGTH = 3
+OUTPUTS_LENGTH = 2
 
-class dummy_formulation(object):
+
+class DummyFormulation:
     def __init__(self):
         self.input_indexes = ["A", "C", "D"]
         self.output_indexes = [(0, 0), (0, 1), (1, 0), (1, 1)]
@@ -15,6 +18,12 @@ def _set_block(self, blk):
     def _build_formulation(self):
         pass
 
+    def _clear_inputs(self):
+        self.input_indexes = []
+
+    def _clear_outputs(self):
+        self.output_indexes = []
+
 
 def test_block():
     m = pyo.ConcreteModel()
@@ -26,27 +35,29 @@ def test_block():
         output_indexes=[(0, 0), (0, 1), (1, 0), (1, 1)],
     )
 
-    assert [k for k in m.b.inputs] == ["A", "B", "C"]
-    assert [k for k in m.b.outputs] == [1, 4]
-    assert [k for k in m.b2.inputs] == [(1, 3), (42, 1975), (13, 2)]
-    assert [k for k in m.b2.outputs] == [(0, 0), (0, 1), (1, 0), (1, 1)]
+    assert list(m.b.inputs) == ["A", "B", "C"]
+    assert list(m.b.outputs) == [1, 4]
+    assert list(m.b2.inputs) == [(1, 3), (42, 1975), (13, 2)]
+    assert list(m.b2.outputs) == [(0, 0), (0, 1), (1, 0), (1, 1)]
 
     m = pyo.ConcreteModel()
     m.b = OmltBlock()
-    formulation = dummy_formulation()
+    formulation = DummyFormulation()
     m.b.build_formulation(formulation)
-    print(dir(m.b))
+
     assert m.b._OmltBlockData__formulation is formulation
-    assert [k for k in m.b.inputs] == ["A", "C", "D"]
-    assert [k for k in m.b.outputs] == [(0, 0), (0, 1), (1, 0), (1, 1)]
+    assert list(m.b.inputs) == ["A", "C", "D"]
+    assert list(m.b.outputs) == [(0, 0), (0, 1), (1, 0), (1, 1)]
 
 
 def test_input_output_auto_creation():
     m = pyo.ConcreteModel()
     m.b = OmltBlock()
-    m.b._setup_inputs_outputs(input_indexes=range(3), output_indexes=range(2))
-    assert len(m.b.inputs) == 3
-    assert len(m.b.outputs) == 2
+    m.b._setup_inputs_outputs(
+        input_indexes=range(INPUTS_LENGTH), output_indexes=range(OUTPUTS_LENGTH)
+    )
+    assert len(m.b.inputs) == INPUTS_LENGTH
+    assert len(m.b.outputs) == OUTPUTS_LENGTH
 
     m.b2 = OmltBlock()
     m.b2._setup_inputs_outputs(
@@ -57,8 +68,20 @@ def test_input_output_auto_creation():
     assert len(m.b2.outputs) == 1
 
     m.b3 = OmltBlock()
-    with pytest.raises(ValueError):
-        m.b3._setup_inputs_outputs(
-            input_indexes=[],
-            output_indexes=[],
-        )
+    formulation1 = DummyFormulation()
+    formulation1._clear_inputs()
+    expected_msg = (
+        "OmltBlock must have at least one input to build a formulation. "
+        f"{formulation1} has no inputs."
+    )
+    with pytest.raises(ValueError, match=expected_msg):
+        m.b3.build_formulation(formulation1)
+
+    formulation2 = DummyFormulation()
+    formulation2._clear_outputs()
+    expected_msg = (
+        "OmltBlock must have at least one output to build a formulation. "
+        f"{formulation2} has no outputs."
+    )
+    with pytest.raises(ValueError, match=expected_msg):
+        m.b3.build_formulation(formulation2)
diff --git a/tests/test_formulation.py b/tests/test_formulation.py
index 4e047845..9196e5e7 100644
--- a/tests/test_formulation.py
+++ b/tests/test_formulation.py
@@ -1,23 +1,23 @@
 import pytest
-from pyomo.environ import ConcreteModel, Objective, SolverFactory, Var, value
+from pyomo.environ import ConcreteModel, Objective, SolverFactory, value
 
-from omlt.block import OmltBlock
+from omlt import OmltBlock
 from omlt.formulation import _setup_scaled_inputs_outputs
 from omlt.scaling import OffsetScaling
 
 
 def test_scaled_inputs_outputs():
     m = ConcreteModel()
-    xoffset = {(0, i): float(i) for i in range(3)}
-    xfactor = {(0, i): 0.5 * (i + 1) for i in range(3)}
-    yoffset = {(1, i): -0.25 * i for i in range(2)}
-    yfactor = {(1, i): 0.125 * (i + 1) for i in range(2)}
+    x1offset: dict[tuple[int, int], float] = {(0, i): float(i) for i in range(3)}
+    x1factor: dict[tuple[int, int], float] = {(0, i): 0.5 * (i + 1) for i in range(3)}
+    y1offset: dict[tuple[int, int], float] = {(1, i): -0.25 * i for i in range(2)}
+    y1factor: dict[tuple[int, int], float] = {(1, i): 0.125 * (i + 1) for i in range(2)}
 
     scaler = OffsetScaling(
-        offset_inputs=xoffset,
-        factor_inputs=xfactor,
-        offset_outputs=yoffset,
-        factor_outputs=yfactor,
+        offset_inputs=x1offset,
+        factor_inputs=x1factor,
+        offset_outputs=y1offset,
+        factor_outputs=y1factor,
     )
 
     scaled_input_bounds = {(0, 0): (0, 5), (0, 1): (-2, 2), (0, 2): (0, 1)}
@@ -32,7 +32,7 @@ def test_scaled_inputs_outputs():
     m.obj = Objective(expr=1)
     m.b1.inputs.fix(2)
     m.b1.outputs.fix(1)
-    status = SolverFactory("ipopt").solve(m)
+    SolverFactory("ipopt").solve(m)
 
     assert value(m.b1.scaled_inputs[(0, 0)]) == pytest.approx(4.0)
     assert value(m.b1.scaled_inputs[(0, 1)]) == pytest.approx(1.0)
@@ -48,16 +48,16 @@ def test_scaled_inputs_outputs():
     assert m.b1.inputs[(0, 2)].ub == pytest.approx(3.5)
 
     m = ConcreteModel()
-    xoffset = {i: float(i) for i in range(3)}
-    xfactor = {i: 0.5 * (i + 1) for i in range(3)}
-    yoffset = {i: -0.25 * i for i in range(2)}
-    yfactor = {i: 0.125 * (i + 1) for i in range(2)}
+    x2offset: dict[int, float] = {i: float(i) for i in range(3)}
+    x2factor: dict[int, float] = {i: 0.5 * (i + 1) for i in range(3)}
+    y2offset: dict[int, float] = {i: -0.25 * i for i in range(2)}
+    y2factor: dict[int, float] = {i: 0.125 * (i + 1) for i in range(2)}
 
     scaler = OffsetScaling(
-        offset_inputs=xoffset,
-        factor_inputs=xfactor,
-        offset_outputs=yoffset,
-        factor_outputs=yfactor,
+        offset_inputs=x2offset,
+        factor_inputs=x2factor,
+        offset_outputs=y2offset,
+        factor_outputs=y2factor,
     )
 
     input_bounds = {0: (0, 5), 1: (-2, 2), 2: (0, 1)}
@@ -68,7 +68,7 @@ def test_scaled_inputs_outputs():
     m.obj = Objective(expr=1)
     m.b1.inputs.fix(2)
     m.b1.outputs.fix(1)
-    status = SolverFactory("ipopt").solve(m)
+    SolverFactory("ipopt").solve(m)
     assert value(m.b1.scaled_inputs[0]) == pytest.approx(4.0)
     assert value(m.b1.scaled_inputs[1]) == pytest.approx(1.0)
     assert value(m.b1.scaled_inputs[2]) == pytest.approx(0.0)
diff --git a/tests/test_scaling.py b/tests/test_scaling.py
index 05b0e013..1161dd1f 100644
--- a/tests/test_scaling.py
+++ b/tests/test_scaling.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 import pytest
 
@@ -7,16 +9,16 @@
 
 def test_convert_to_dict():
     x = ["a", "b"]
-    x = convert_to_dict(x)
-    assert sorted(x.keys()) == [0, 1]
-    assert x[0] == "a"
-    assert x[1] == "b"
+    xd = convert_to_dict(x)
+    assert sorted(xd.keys()) == [0, 1]
+    assert xd[0] == "a"
+    assert xd[1] == "b"
 
-    x = {2: "a", 1: "b"}
-    x = convert_to_dict(x)
-    assert sorted(x.keys()) == [1, 2]
-    assert x[2] == "a"
-    assert x[1] == "b"
+    y = {2: "a", 1: "b"}
+    yd = convert_to_dict(y)
+    assert sorted(yd.keys()) == [1, 2]
+    assert yd[2] == "a"
+    assert yd[1] == "b"
 
 
 def test_offset_scaling():
@@ -71,48 +73,44 @@ def test_incorrect_keys():
     np.testing.assert_almost_equal(list(test_y_unscal.values()), list(y.values()))
 
     x = {1: 42, 2: 65}
-    with pytest.raises(ValueError) as excinfo:
-        test_x_scal = scaling.get_scaled_input_expressions(x)
-    expected_msg = (
+    expected_msg = re.escape(
         "get_scaled_input_expressions called with input_vars that "
         "do not have the same indices as offset_inputs or factor_inputs.\nKeys "
         "in input_vars: [1, 2].\nKeys in offset_inputs: [1, 42].\nKeys in "
         "offset_factor: [1, 42]."
     )
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        test_x_scal = scaling.get_scaled_input_expressions(x)
 
     y = {7: -1, 19: 2, 11: 3}
-    with pytest.raises(ValueError) as excinfo:
-        test_y_scal = scaling.get_scaled_output_expressions(y)
-    expected_msg = (
+    expected_msg = re.escape(
         "get_scaled_output_expressions called with output_vars that "
         "do not have the same indices as offset_outputs or factor_outputs.\nKeys "
         "in output_vars: [7, 11, 19]\nKeys in offset_outputs: [7, 9, 11]\nKeys in "
         "offset_factor: [7, 9, 11]"
     )
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        scaling.get_scaled_output_expressions(y)
 
     x_scal = {1: 42, 2: 65}
-    with pytest.raises(ValueError) as excinfo:
-        test_x_unscal = scaling.get_unscaled_input_expressions(x_scal)
-    expected_msg = (
+    expected_msg = re.escape(
         "get_scaled_input_expressions called with input_vars that "
         "do not have the same indices as offset_inputs or factor_inputs.\nKeys "
         "in input_vars: [1, 2]\nKeys in offset_inputs: [1, 42]\nKeys in "
         "offset_factor: [1, 42]"
     )
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        scaling.get_unscaled_input_expressions(x_scal)
 
     y_scal = {7: -1, 8: 2, 11: 3}
-    with pytest.raises(ValueError) as excinfo:
-        test_y_unscal = scaling.get_unscaled_output_expressions(y_scal)
-    expected_msg = (
+    expected_msg = re.escape(
         "get_scaled_output_expressions called with output_vars that do "
         "not have the same indices as offset_outputs or factor_outputs.\nKeys in "
         "output_vars: [7, 8, 11]\nKeys in offset_outputs: [7, 9, 11]\nKeys in "
         "offset_factor: [7, 9, 11]"
     )
-    assert str(excinfo.value) == expected_msg
+    with pytest.raises(ValueError, match=expected_msg):
+        test_y_unscal = scaling.get_unscaled_output_expressions(y_scal)
 
 
 def test_negative_offsets():
@@ -121,36 +119,38 @@ def test_negative_offsets():
     y_offset = [-4, 2, 1.784]
     y_factor = [2, 1.5, 1.3]
 
-    with pytest.raises(ValueError) as excinfo:
-        scaling = OffsetScaling(
+    expected_msg = (
+        "OffsetScaling only accepts positive values"
+        " for factor_inputs. Negative value found at"
+        " index 0."
+    )
+
+    with pytest.raises(ValueError, match=expected_msg):
+        OffsetScaling(
             offset_inputs=x_offset,
             factor_inputs=x_factor,
             offset_outputs=y_offset,
             factor_outputs=y_factor,
         )
-    assert (
-        str(excinfo.value) == "OffsetScaling only accepts positive values"
-        " for factor_inputs. Negative value found at"
-        " index 0."
-    )
 
     x_offset = [42, 65]
     x_factor = [1975, 1964]
     y_offset = [-4, 2, 1.784]
     y_factor = [2, -1.5, 1.3]
 
-    with pytest.raises(ValueError) as excinfo:
-        scaling = OffsetScaling(
+    expected_msg = (
+        "OffsetScaling only accepts positive values"
+        " for factor_outputs. Negative value found at"
+        " index 1."
+    )
+
+    with pytest.raises(ValueError, match=expected_msg):
+        OffsetScaling(
             offset_inputs=x_offset,
             factor_inputs=x_factor,
             offset_outputs=y_offset,
             factor_outputs=y_factor,
         )
-    assert (
-        str(excinfo.value) == "OffsetScaling only accepts positive values"
-        " for factor_outputs. Negative value found at"
-        " index 1."
-    )
 
 
 if __name__ == "__main__":
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index e64ab1d8..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,117 +0,0 @@
-# Tox configuration file
-# Read more under https://tox.readthedocs.org/
-# THIS SCRIPT IS SUPPOSED TO BE AN EXAMPLE. MODIFY IT ACCORDING TO YOUR NEEDS!
-
-[tox]
-minversion = 3.15
-envlist = py36, py37, py38, py39, py310, py311, py312, lint
-
-[gh-actions]
-python =
-    3.6: py36
-    3.7: py37
-    3.8: py38
-    3.9: lint, py39
-    3.10: py310
-    3.11: py311
-    3.12: py312
-
-[testenv]
-deps = pytest
-extras = testing
-commands = pytest {posargs}
-
-#[testenv:fullenv]
-#description = Testing with full dependencies
-#deps = pytest
-#extras = testing
-#commands = pytest {posargs}
-
-[testenv:leanenv]
-description = Testing with fewer dependencies
-deps = pytest
-extras = testing_lean
-commands = pytest {posargs}
-
-[testenv:notebooks]
-deps = pytest
-extras = testing
-commands = pytest --nbmake --cov-append {posargs}
-
-[testenv:{clean,build}]
-description =
-    Build (or clean) the package in isolation according to instructions in:
-    https://setuptools.readthedocs.io/en/latest/build_meta.html#how-to-use-it
-    https://github.com/pypa/pep517/issues/91
-    https://github.com/pypa/build
-# NOTE: build is still experimental, please refer to the links for updates/issues
-skip_install = True
-changedir = {toxinidir}
-deps =
-    build: build[virtualenv]
-commands =
-    clean: python -c 'from shutil import rmtree; rmtree("build", True); rmtree("dist", True)'
-    build: python -m build .
-# By default `build` produces wheels, you can also explicitly use the flags `--sdist` and `--wheel`
-
-
-[testenv:{docs,doctests}]
-description = invoke sphinx-build to build the docs/run doctests
-setenv =
-    DOCSDIR = {toxinidir}/docs
-    BUILDDIR = {toxinidir}/docs/_build
-    docs: BUILD = html
-    doctests: BUILD = doctest
-deps =
-    -r {toxinidir}/docs/requirements.txt
-    # ^  requirements.txt shared with Read The Docs
-commands =
-    sphinx-build -b {env:BUILD} -d "{env:BUILDDIR}/doctrees" "{env:DOCSDIR}" "{env:BUILDDIR}/{env:BUILD}" {posargs}
-
-
-[testenv:publish]
-description =
-    Publish the package you have been developing to a package index server.
-    By default, it uses testpypi. If you really want to publish your package
-    to be publicly accessible in PyPI, use the `-- --repository pypi` option.
-skip_install = True
-changedir = {toxinidir}
-passenv =
-    TWINE_USERNAME
-    TWINE_PASSWORD
-    TWINE_REPOSITORY
-deps = twine
-commands =
-    python -m twine check dist/*
-    python -m twine upload {posargs:--repository testpypi} dist/*
-
-[flake8]
-extend-ignore = D, E, F, N
-per-file-ignores = __init__.py:F401
-
-[testenv:lint]
-description = Lint files using isort, black, and flake8
-skip_install = True
-changedir = {toxinidir}
-deps =
-    black
-    flake8
-    flake8-bugbear
-    flake8-docstrings
-    isort
-    pep8-naming
-commands =
-    flake8 --config=tox.ini src/omlt tests/
-    black --check --diff src/omlt tests/
-
-[testenv:format]
-description = Format Python files using isort and black
-skip_install = true
-changedir = {toxinidir}
-deps =
-    black
-    typing-extensions
-    isort
-commands =
-    isort src/omlt tests
-    black src/omlt tests