diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 91fd70f..f27575b 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -15,7 +15,6 @@ jobs:
     - uses: mamba-org/setup-micromamba@v1
       with:
         environment-file: environment_torch.yml
-        activate-environment: test
     - shell: bash -l {0}
       run: |
         conda info
@@ -28,12 +27,14 @@ jobs:
       run: |
         micromamba install ruff
         # stop the build if there are Python syntax errors or undefined names
-        ruff . --count --select=E9,F63,F7,F82 --show-source --statistics
+        ruff check --select=E9,F63,F7,F82
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        ruff . --count --exit-zero --max-complexity=100 --max-line-length=127 --statistics
+        ruff check --output-format concise
         # Checking documentation errors
-        ruff . --count --select=D --exit-zero --max-complexity=100 --max-line-length=127 --statistics
+        ruff check  --select=D
+        ruff check  --select=D  --statistics
     - name: Test with pytest
       shell: bash -l {0}
       run: |
+        export KERAS_BACKEND="torch"
         pytest
diff --git a/applications/evaluate_ptype.py b/applications/evaluate_ptype.py
index 77a7786..7cc1a73 100644
--- a/applications/evaluate_ptype.py
+++ b/applications/evaluate_ptype.py
@@ -10,12 +10,12 @@
 import pandas as pd
 from sklearn.metrics import precision_recall_fscore_support
 
-from ptype.reliability import (
+from mlguess.reliability import (
     compute_calibration,
     reliability_diagram,
     reliability_diagrams,
 )
-from ptype.plotting import (
+from mlguess.plotting import (
     plot_confusion_matrix,
     coverage_figures,
 )
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 0630cbc..35c6d9c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,6 +17,7 @@
 extensions = ['myst_parser',
               'sphinx.ext.napoleon',
               'sphinx.ext.intersphinx',
+              'sphinx.ext.mathjax',
               'nbsphinx']
 
 templates_path = ['_templates']
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 023ce9b..160ef07 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -11,6 +11,8 @@ Welcome to miles-guess's documentation!
    :caption: Contents:
 
    install.rst
+   keras.md
+   torch.md
    mlguess.rst
    notebooks/classifier_example.ipynb
    notebooks/regression_example.ipynb
diff --git a/docs/source/keras.md b/docs/source/keras.md
index d1d8a14..7d3e722 100644
--- a/docs/source/keras.md
+++ b/docs/source/keras.md
@@ -1,7 +1,10 @@
+# Keras in MILES-GUESS
+
 Welcome to the Keras users page. The instructions below outline how to compute various UQ quantities like aleatoric and epistemic using different modeling approaches.
 
+## Regression Models
 
-### 1a. Train/evaluate a deterministic multi-layer perceptrion (MLP) on the SL dataset:
+### 1a. Train/evaluate a deterministic multi-layer perceptron (MLP) on the SL dataset:
 ```bash
 python3 applications/train_mlp_SL.py -c config/model_mlp_SL.yml
 ```
@@ -16,6 +19,8 @@ python applications/train_gaussian_SL.py -c config/model_gaussian_SL.yml
 python applications/train_evidential_SL.py -c config/model_evidential_SL.yml
 ```
 
+## Classification Models
+
 ### 2a. Train a categorical MLP classifier on the p-type dataset:
 ```bash
 python applications/train_classifier_ptype.py -c config/model_classifier_ptype.yml
@@ -213,4 +218,4 @@ Depending on the problem, a data field is customized and also present in the con
 
 ## ECHO hyperparameter optimization 
 
-Configuration files are also supplied for use with the Earth Computing Hyperparameter Optimization (ECHO) package. See the echo package https://github.com/NCAR/echo-opt/tree/main/echo for more details on the configuration fields.
\ No newline at end of file
+Configuration files are also supplied for use with the Earth Computing Hyperparameter Optimization (ECHO) package. See the echo package https://github.com/NCAR/echo-opt/tree/main/echo for more details on the configuration fields.
diff --git a/docs/source/mlguess.keras.rst b/docs/source/mlguess.keras.rst
index 80f59f9..19587e5 100644
--- a/docs/source/mlguess.keras.rst
+++ b/docs/source/mlguess.keras.rst
@@ -36,10 +36,10 @@ mlguess.keras.models module
    :undoc-members:
    :show-inheritance:
 
-mlguess.keras.models\_deprecated module
+mlguess.keras.deprecated module
 ---------------------------------------
 
-.. automodule:: mlguess.keras.models_deprecated
+.. automodule:: mlguess.keras.deprecated
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/torch.md b/docs/source/torch.md
index fca43aa..03d82a0 100644
--- a/docs/source/torch.md
+++ b/docs/source/torch.md
@@ -1,3 +1,5 @@
+# Pytorch in MILES-GUESS
+
 Welcome to the pyTorch users page. The instructions below outline how to compute various UQ quantities like aleatoric and epistemic using different modeling approaches. Email schreck@ucar.edu for questions/concerns/fixes/etc
 
 ## Regression usage
@@ -158,4 +160,4 @@ scheduler:
 
 ### Classifier Models
 
-* `output_activation`: Set to `softmax` for standard classification. If not set, the model will use evidential classification.
\ No newline at end of file
+* `output_activation`: Set to `softmax` for standard classification. If not set, the model will use evidential classification.
diff --git a/environment_gpu.yml b/environment_gpu_tf.yml
similarity index 91%
rename from environment_gpu.yml
rename to environment_gpu_tf.yml
index f733088..7fb6c67 100644
--- a/environment_gpu.yml
+++ b/environment_gpu_tf.yml
@@ -1,10 +1,10 @@
-name: guess
+name: guesstf
 channels:
   - conda-forge
   - nvidia
 dependencies:
-  - python=3.10
-  - numpy<1.24
+  - python=3.11
+  - numpy<2
   - scipy
   - matplotlib
   - xarray
diff --git a/environment_gpu_torch.yml b/environment_gpu_torch.yml
new file mode 100644
index 0000000..f6d9f74
--- /dev/null
+++ b/environment_gpu_torch.yml
@@ -0,0 +1,36 @@
+name: guesstorch
+channels:
+  - conda-forge
+  - pytorch
+dependencies:
+  - python=3.11
+  - numpy<2
+  - scipy
+  - matplotlib
+  - xarray
+  - metpy
+  - netcdf4
+  - pandas
+  - pyyaml
+  - pytest
+  - pip
+  - pyproj
+  - jupyter
+  - jupyterlab
+  - cartopy
+  - tqdm
+  - seaborn
+  - sphinx
+  - numba
+  - properscoring
+  - pyarrow
+  - imbalanced-learn
+  - pytorch
+  - pytorch-cuda=12.1
+  - pip:
+    - keras
+    - echo-opt
+    - hagelslag
+    - bridgescaler
+    - safetensors
+    - -e .
diff --git a/environment_tensorflow.yml b/environment_tensorflow.yml
index 48b56fb..a4f9460 100644
--- a/environment_tensorflow.yml
+++ b/environment_tensorflow.yml
@@ -1,9 +1,9 @@
-name: guess
+name: guesstf
 channels:
   - conda-forge
 dependencies:
   - python=3.10
-  - numpy<1.24
+  - numpy<2
   - scipy
   - matplotlib
   - xarray
@@ -31,5 +31,4 @@ dependencies:
     - echo-opt
     - hagelslag
     - bridgescaler
-    - -e git+https://github.com/ai2es/ptype-physical.git#egg=ptype
     - -e .
diff --git a/environment_torch.yml b/environment_torch.yml
index 6e7c538..5b0785d 100644
--- a/environment_torch.yml
+++ b/environment_torch.yml
@@ -31,5 +31,5 @@ dependencies:
     - echo-opt
     - hagelslag
     - bridgescaler
-    - -e git+https://github.com/ai2es/ptype-physical.git#egg=ptype
+    - safetensors
     - -e .
diff --git a/mlguess/VERSION b/mlguess/VERSION
index 0136388..7e08acf 100644
--- a/mlguess/VERSION
+++ b/mlguess/VERSION
@@ -1 +1 @@
-2024.1.0b2
+2024.2.0
diff --git a/mlguess/keras/deprecated/models.py b/mlguess/keras/deprecated/models.py
index 0540405..1d00667 100644
--- a/mlguess/keras/deprecated/models.py
+++ b/mlguess/keras/deprecated/models.py
@@ -97,7 +97,8 @@ def __init__(
         self.history = None
 
     def build_neural_network(self, inputs, outputs, last_layer="Dense"):
-        """Create Keras neural network model and compile it.
+        """
+        Create Keras neural network model and compile it.
 
         Args:
             inputs (int): Number of input predictor variables.
@@ -108,7 +109,6 @@ def build_neural_network(self, inputs, outputs, last_layer="Dense"):
 
         if self.activation == "leaky":
             self.activation = LeakyReLU()
-
         if self.kernel_reg == "l1":
             self.kernel_reg = L1(self.l1_weight)
         elif self.kernel_reg == "l2":
@@ -522,7 +522,8 @@ def __init__(
 
 
 class GaussianRegressorDNN(BaseRegressor):
-    """A Dense Neural Network Model that can support arbitrary numbers of hidden layers
+    """
+    A Dense Neural Network Model that can support arbitrary numbers of hidden layers
     and provides evidential uncertainty estimation.
     Inherits from BaseRegressor.
 
@@ -542,6 +543,7 @@ class GaussianRegressorDNN(BaseRegressor):
         model: Keras Model object.
         evidential_coef: Evidential regularization coefficient.
         metrics: Optional list of metrics to monitor during training.
+
     """
 
     def __init__(
@@ -571,11 +573,13 @@ def __init__(
         metrics=None,
         eps=1e-7
     ):
-        """Initialize the EvidentialRegressorDNN.
+        """
+        Initialize the EvidentialRegressorDNN.
 
         Args:
             coupling_coef: Coupling coeffient for loss fix
             evidential_coef: Evidential regularization coefficient.
+
         """
         super().__init__(  # Call the constructor of the base class
             hidden_layers,
@@ -606,7 +610,8 @@ def __init__(
         self.loss = gaussian_nll
 
     def build_neural_network(self, inputs, outputs, last_layer="DenseNormal"):
-        """Create Keras neural network model and compile it.
+        """
+        Create Keras neural network model and compile it.
 
         Args:
             inputs (int): Number of input predictor variables.
@@ -702,7 +707,8 @@ def predict_monte_carlo(self, x_test, forward_passes, scaler=None, batch_size=No
 
 
 class EvidentialRegressorDNN(BaseRegressor):
-    """A Dense Neural Network Model that can support arbitrary numbers of hidden layers
+    """
+    A Dense Neural Network Model that can support arbitrary numbers of hidden layers
     and provides evidential uncertainty estimation.
     Inherits from BaseRegressor.
 
@@ -722,6 +728,7 @@ class EvidentialRegressorDNN(BaseRegressor):
         model: Keras Model object.
         evidential_coef: Evidential regularization coefficient.
         metrics: Optional list of metrics to monitor during training.
+
     """
     def __init__(
         self,
@@ -803,7 +810,8 @@ def __init__(
         logging.info(f"Using loss: {loss}")
 
     def build_neural_network(self, inputs, outputs):
-        """Create Keras neural network model and compile it.
+        """
+        Create Keras neural network model and compile it.
 
         Args:
             inputs (int): Number of input predictor variables.
@@ -905,19 +913,17 @@ def predict_dist_params(self, x, y_scaler=None, batch_size=None):
 
         return mu, v, alpha, beta
 
-    def predict_ensemble(
-        self, x_test, scaler=None, batch_size=None
-    ):
+    def predict_ensemble(self, x_test, scaler=None, batch_size=None):
         return super().predict_ensemble(x_test, scaler=scaler, batch_size=batch_size, num_outputs=3)
 
-    def predict_monte_carlo(
-        self, x_test, forward_passes, scaler=None, batch_size=None
-    ):
-        return super().predict_monte_carlo(x_test, forward_passes, scaler=scaler, batch_size=batch_size, num_outputs=3)
+    def predict_monte_carlo(self, x_test, forward_passes, scaler=None, batch_size=None):
+        return super().predict_monte_carlo(x_test, forward_passes,
+                                           scaler=scaler, batch_size=batch_size, num_outputs=3)
 
 
 class CategoricalDNN(object):
-    """A Dense Neural Network Model that can support arbitrary numbers of hidden layers.
+    """
+    A Dense Neural Network Model that can support arbitrary numbers of hidden layers.
 
     Attributes:
         hidden_layers: Number of hidden layers
@@ -941,6 +947,7 @@ class CategoricalDNN(object):
         decay: Level of decay to apply to learning rate
         verbose: Level of detail to provide during training (0 = None, 1 = Minimal, 2 = All)
         classifier: (boolean) If training on classes
+
     """
     def __init__(
         self,
@@ -969,7 +976,7 @@ def __init__(
         decay=0,
         verbose=0,
         random_state=1000,
-        callbacks=[],
+        callbacks=None,
         balanced_classes=0,
         steps_per_epoch=0,
     ):
@@ -997,6 +1004,8 @@ def __init__(
         self.use_dropout = use_dropout
         self.dropout_alpha = dropout_alpha
         self.epochs = epochs
+        if callbacks is None:
+            self.callbacks = []
         self.callbacks = callbacks
         self.decay = decay
         self.verbose = verbose
@@ -1007,7 +1016,8 @@ def __init__(
         self.steps_per_epoch = steps_per_epoch
 
     def build_neural_network(self, inputs, outputs):
-        """Create Keras neural network model and compile it.
+        """
+        Create Keras neural network model and compile it.
 
         Args:
             inputs (int): Number of input predictor variables
@@ -1066,13 +1076,15 @@ def build_neural_network(self, inputs, outputs):
         self.model.compile(optimizer=self.optimizer_obj, loss=self.loss)
 
     def build_from_sequential(self, model, optimizer="adam", loss="mse", metrics=None):
-        """Build the neural network model using a Keras Sequential model.
+        """
+        Build the neural network model using a Keras Sequential model.
 
         Args:
             model (tf.keras.Sequential): Keras Sequential model to use.
             optimizer (str or tf.keras.optimizers.Optimizer): Optimizer for the model.
             loss (str or tf.keras.losses.Loss): Loss function for the model.
             metrics (list of str or tf.keras.metrics.Metric): Metrics for the model.
+
         """
         self.model = model
 
diff --git a/mlguess/keras/layers.py b/mlguess/keras/layers.py
index 0315359..4899fcc 100644
--- a/mlguess/keras/layers.py
+++ b/mlguess/keras/layers.py
@@ -1,8 +1,6 @@
 import keras
 import keras.layers as layers
 import keras.ops as ops
-from keras.src import activations
-from keras.src.layers.layer import Layer
 
 
 @keras.saving.register_keras_serializable()
diff --git a/mlguess/keras/models.py b/mlguess/keras/models.py
index e97475f..43b6c40 100644
--- a/mlguess/keras/models.py
+++ b/mlguess/keras/models.py
@@ -1,13 +1,14 @@
 import sys
 import keras
 import keras.ops as ops
+import keras.layers as layers
+import keras.optimizers as optimizers
 import numpy as np
-from keras.regularizers import L1, L2, L1L2
-from keras.layers import Dense, GaussianNoise, Dropout
+# from keras.layers import Dense, GaussianNoise, Dropout
 from mlguess.keras.layers import DenseNormalGamma, DenseNormal
 from mlguess.keras.losses import evidential_cat_loss, evidential_reg_loss, gaussian_nll
 from mlguess.keras.callbacks import ReportEpoch
-from keras.optimizers import Adam, SGD
+# from keras.optimizers import Adam, SGD
 
 
 
@@ -42,9 +43,9 @@ class CategoricalDNN(keras.models.Model):
         classifier: (boolean) If training on classes
 
     Example:
-            When evidential==True, the output activation and the loss function will be overridden under the hood. When
-            evidential==False, it will use the parameters specified and ignore the annealing_coeff.
-            Note: Model compilation happens under the hood when .fit() is called.
+        When evidential==True, the output activation and the loss function will be overridden under the hood. When
+        evidential==False, it will use the parameters specified and ignore the annealing_coeff.
+        Note: Model compilation happens under the hood when .fit() is called.::
 
             n_samples = 1000
             n_features = 23
@@ -76,6 +77,7 @@ class CategoricalDNN(keras.models.Model):
                                    lr=0.0001)
             hist = model.fit(x_train, y_train)
             p = model.predict(x_train, return_uncertainties=False, batch_size=5000)
+
     """
 
     def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activation="relu",
@@ -83,8 +85,14 @@ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activat
                  annealing_coeff=1.0, use_noise=False, noise_sd=0.0, lr=0.001, use_dropout=False, dropout_alpha=0.2,
                  batch_size=128, epochs=2, kernel_reg=None, l1_weight=0.0, l2_weight=0.0, sgd_momentum=0.9,
                  adam_beta_1=0.9, adam_beta_2=0.999, epsilon=1e-7, decay=0, verbose=0, random_state=1000, n_classes=2,
-                 n_inputs=42, callbacks=[], **kwargs):
+                 n_inputs=42, callbacks=None, **kwargs):
+        """
+        Create Keras neural network model and compile it.
 
+        Args:
+            inputs (int): Number of input predictor variables
+            outputs (int): Number of output predictor variables
+        """
         super().__init__(**kwargs)
         self.hidden_layers = hidden_layers
         self.hidden_neurons = hidden_neurons
@@ -115,63 +123,62 @@ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activat
         self.random_state = random_state
         self.n_classes = n_classes
         self.n_inputs = n_inputs
-        self.callbacks = callbacks
+        if callbacks is None:
+            self.callbacks = []
+        else:
+            self.callbacks = callbacks
+
         self.hyperparameters = ["hidden_layers", "hidden_neurons", "evidential", "activation", "output_activation",
                                 "optimizer", "sgd_momentum", "adam_beta_1", "adam_beta_2", "epsilon", "loss",
                                 "loss_weights", "annealing_coeff", "lr", "kernel_reg", "l1_weight", "l2_weight",
                                 "batch_size", "use_noise", "noise_sd", "use_dropout", "dropout_alpha", "epochs",
                                 "callbacks", "decay", "verbose", "random_state", "n_classes", "n_inputs"]
-        """
-        Create Keras neural network model and compile it.
-        Args:
-            inputs (int): Number of input predictor variables
-            outputs (int): Number of output predictor variables
-        """
+
 
         if self.kernel_reg == "l1":
-            self.kernel_reg = L1(self.l1_weight)
+            self.kernel_reg = keras.regularizers.L1(self.l1_weight)
         elif self.kernel_reg == "l2":
-            self.kernel_reg = L2(self.l2_weight)
+            self.kernel_reg = keras.regularizers.L2(self.l2_weight)
         elif self.kernel_reg == "l1_l2":
-            self.kernel_reg = L1L2(self.l1_weight, self.l2_weight)
+            self.kernel_reg = keras.regularizers.L1L2(self.l1_weight, self.l2_weight)
         else:
             self.kernel_reg = None
 
         if self.optimizer == "adam":
-            self.optimizer_obj = Adam(learning_rate=self.lr,
+            self.optimizer_obj = optimizers.Adam(learning_rate=self.lr,
                                       beta_1=self.adam_beta_1,
                                       beta_2=self.adam_beta_2,
                                       epsilon=self.epsilon)
         elif self.optimizer == "sgd":
-            self.optimizer_obj = SGD(learning_rate=self.lr, momentum=self.sgd_momentum)
+            self.optimizer_obj = optimizers.SGD(learning_rate=self.lr, momentum=self.sgd_momentum)
 
         if self.evidential:
             self.output_activation = "linear"
 
         self.model_layers = []
-        self.model_layers.append(Dense(self.n_inputs,
+        self.model_layers.append(layers.Dense(self.n_inputs,
                                        activation=self.activation,
                                        kernel_regularizer=self.kernel_reg,
                                        name="input_dense"))
         for h in range(self.hidden_layers):
-            self.model_layers.append(Dense(self.hidden_neurons,
+            self.model_layers.append(layers.Dense(self.hidden_neurons,
                                            activation=self.activation,
                                            kernel_regularizer=self.kernel_reg,
                                            name=f"dense_{h:02d}"))
             if self.use_dropout:
-                self.model_layers.append(Dropout(self.dropout_alpha, name=f"dropout_{h:02d}"))
+                self.model_layers.append(layers.Dropout(self.dropout_alpha, name=f"dropout_{h:02d}"))
             if self.use_noise:
-                self.model_layers.append(GaussianNoise(self.noise_sd, name=f"noise_{h:02d}"))
+                self.model_layers.append(layers.GaussianNoise(self.noise_sd, name=f"noise_{h:02d}"))
 
-        self.model_layers.append(Dense(self.n_classes,
+        self.model_layers.append(layers.Dense(self.n_classes,
                                        activation=self.output_activation,
                                        name="dense_output"))
 
     def call(self, inputs):
 
         mod = self.model_layers[0](inputs)
-        for l in range(1, len(self.model_layers)):
-            mod = self.model_layers[l](mod)
+        for layer in range(1, len(self.model_layers)):
+            mod = self.model_layers[layer](mod)
 
         return mod
 
@@ -194,7 +201,10 @@ def fit(self, x=None, y=None, **kwargs):
         return hist
 
     def predict(self, x, return_uncertainties=True, **kwargs):
-        """Args:
+        """
+        Make a prediction with the trained model.
+
+        Args:
             x: Input data
             batch_size: Size of batch to predict
             return_uncertainties: Returns derived uncertainties from evidential distribution parameters.
@@ -213,7 +223,8 @@ def predict(self, x, return_uncertainties=True, **kwargs):
         else:
             return output
 
-    def calc_uncertainty(self, y_pred):
+    @staticmethod
+    def calc_uncertainty(y_pred):
         num_classes = y_pred.shape[-1]
         evidence = ops.relu(y_pred)
         alpha = evidence + 1
@@ -283,11 +294,11 @@ class RegressorDNN(keras.models.Model):
         metrics: Optional list of metrics to monitor during training.
 
     Example:
-            When evidential==True or uncertainty==True, the output activation and the loss function will be overridden
-            under the hood. If both are True, the evidential model will override. When both are set to False,
-            it will train a generic DNN with a linear output activation and the specified loss function.
-            'evi_coeff' is only used when evidential==True and is otherwise ignored.
-            Note: Model compilation happens under the hood when .fit() is called.
+        When evidential==True or uncertainty==True, the output activation and the loss function will be overridden
+        under the hood. If both are True, the evidential model will override. When both are set to False,
+        it will train a generic DNN with a linear output activation and the specified loss function.
+        'evi_coeff' is only used when evidential==True and is otherwise ignored.
+        Note: Model compilation happens under the hood when .fit() is called.::
 
             n_samples = 1000
             n_features = 23
@@ -325,6 +336,7 @@ class RegressorDNN(keras.models.Model):
                                  epochs=10)
             model.fit(x_train, y_train)
             p = model.predict(x_train, return_uncertainties=False)
+
     """
     def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activation="relu", optimizer="adam",
                  loss_weights=None, use_noise=False, noise_sd=0.01, lr=0.00001, use_dropout=False, dropout_alpha=0.1,
@@ -371,48 +383,49 @@ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activat
                                 "verbose", "n_inputs", "n_output_tasks", "epsilon", "evi_coeff", "uncertainty"]
 
         if self.kernel_reg == "l1":
-            self.kernel_reg = L1(self.l1_weight)
+            self.kernel_reg = keras.regularizers.L1(self.l1_weight)
         elif self.kernel_reg == "l2":
-            self.kernel_reg = L2(self.l2_weight)
+            self.kernel_reg = keras.regularizers.L2(self.l2_weight)
         elif self.kernel_reg == "l1_l2":
-            self.kernel_reg = L1L2(self.l1_weight, self.l2_weight)
+            self.kernel_reg = keras.regularizers.L1L2(self.l1_weight, self.l2_weight)
         else:
             self.kernel_reg = None
 
         if self.optimizer == "adam":
-            self.optimizer_obj = Adam(learning_rate=self.lr,
+            self.optimizer_obj = optimizers.Adam(learning_rate=self.lr,
                                       beta_1=self.adam_beta_1,
                                       beta_2=self.adam_beta_2,
                                       epsilon=self.epsilon)
         elif self.optimizer == "sgd":
-            self.optimizer_obj = SGD(learning_rate=self.lr, momentum=self.sgd_momentum)
+            self.optimizer_obj = optimizers.SGD(learning_rate=self.lr, momentum=self.sgd_momentum)
 
         self.model_layers = []
-        self.model_layers.append(Dense(self.n_inputs,
+        self.model_layers.append(layers.Dense(self.n_inputs,
                                        activation=self.activation,
                                        kernel_regularizer=self.kernel_reg,
                                        name="input_dense"))
         for h in range(self.hidden_layers):
-            self.model_layers.append(Dense(self.hidden_neurons,
+            self.model_layers.append(layers.Dense(self.hidden_neurons,
                                            activation=self.activation,
                                            kernel_regularizer=self.kernel_reg,
                                            name=f"dense_{h:02d}"))
             if self.use_dropout:
-                self.model_layers.append(Dropout(self.dropout_alpha, name=f"dropout_{h:02d}"))
+                self.model_layers.append(layers.Dropout(self.dropout_alpha, name=f"dropout_{h:02d}"))
             if self.use_noise:
-                self.model_layers.append(GaussianNoise(self.noise_sd, name=f"noise_{h:02d}"))
+                self.model_layers.append(layers.GaussianNoise(self.noise_sd, name=f"noise_{h:02d}"))
 
         if self.evidential:
             self.model_layers.append(DenseNormalGamma(self.n_output_tasks, name="dense_output"))
         elif self.uncertainty:
             self.model_layers.append(DenseNormal(self.n_output_tasks, name="dense_output"))
         else:
-            self.model_layers.append(Dense(self.n_output_tasks, name="dense_output"))
+            self.model_layers.append(layers.Dense(self.n_output_tasks, name="dense_output"))
+
     def call(self, inputs):
 
         layer_output = self.model_layers[0](inputs)
-        for l in range(1, len(self.model_layers)):
-            layer_output = self.model_layers[l](layer_output)
+        for layer in range(1, len(self.model_layers)):
+            layer_output = self.model_layers[layer](layer_output)
 
         return layer_output
 
@@ -439,14 +452,14 @@ def predict(self, x, return_uncertainties=True, batch_size=1000, **kwargs):
             If return_uncertainties is True: np.array(mu, aleatoric uncertainty, epistemic uncertainty)
             Else If return_uncertainties is False: np.array(mu, gamma, alpha, beta)
         """
-        if type(return_uncertainties) != bool:
+        if not isinstance(return_uncertainties, bool):
             raise ValueError("return_uncertainties must be a boolean")
 
         if (not self.evidential) and (not self.uncertainty) and return_uncertainties:
             raise NotImplementedError("You can only return uncertainty estimates when 'evidential' or 'uncertainty' is"
                                       " True. Otherwise you can set 'return_uncertainties' to False to return predictions.")
 
-        elif (self.evidential) and return_uncertainties:
+        elif self.evidential and return_uncertainties:
             return self.calc_uncertainties(super().predict(x, batch_size=batch_size))
 
         else:
diff --git a/mlguess/plotting.py b/mlguess/plotting.py
index 8560f30..69b799a 100644
--- a/mlguess/plotting.py
+++ b/mlguess/plotting.py
@@ -41,12 +41,80 @@ def plot_confusion_matrix(y_true, y_pred, classes, model_name, normalize=False,
                     color="white" if cm[i, j] > thresh else "black",
                    fontsize=10)
             
-    if filename:
-        path = f'/glade/u/home/jwillson/winter-ptype/images/{model_name}/'
-        plt.savefig(path + filename, dpi=300, bbox_inches="tight")
+    if filename is not None:
+        plt.savefig(filename, dpi=300, bbox_inches="tight")
         
     return ax
 
+def compute_cov(df, col="pred_conf", quan="uncertainty", ascending=False):
+    df = df.copy()
+    df = df.sort_values(col, ascending=ascending)
+    df["dummy"] = 1
+    df[f"cu_{quan}"] = df[quan].cumsum() / df["dummy"].cumsum()
+    df[f"cu_{col}"] = df[col].cumsum() / df["dummy"].cumsum()
+    df[f"{col}_cov"] = df["dummy"].cumsum() / len(df)
+    return df
+
+
+def coverage_figures(
+    test_data, output_cols, colors=None, title=None, save_location=None
+):
+
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3.5), sharey="col")
+
+    test_data["accuracy"] = (
+        test_data["pred_label"] == test_data["true_label"]
+    ).values.astype(int)
+
+    _test_data_sorted = compute_cov(test_data, col="pred_conf", quan="accuracy")
+    ax1.plot(_test_data_sorted["pred_conf_cov"], _test_data_sorted["cu_accuracy"])
+
+    num_classes = test_data["true_label"].nunique()
+    for label in range(num_classes):
+        cond = test_data["true_label"] == label
+        _test_data_sorted = compute_cov(
+            test_data[cond], col="pred_conf", quan="accuracy"
+        )
+        ax2.plot(
+            _test_data_sorted["pred_conf_cov"],
+            _test_data_sorted["cu_accuracy"],
+            c=colors[label],
+        )
+
+    if "evidential" in test_data:
+        _test_data_sorted = compute_cov(
+            test_data, col="evidential", quan="accuracy", ascending=True
+        )
+        ax1.plot(
+            _test_data_sorted["evidential_cov"],
+            _test_data_sorted["cu_accuracy"],
+            ls="--",
+        )
+        for label in range(num_classes):
+            c = test_data["true_label"] == label
+            _test_data_sorted = compute_cov(
+                test_data[c], col="evidential", quan="accuracy", ascending=True
+            )
+            ax2.plot(
+                _test_data_sorted["evidential_cov"],
+                _test_data_sorted["cu_accuracy"],
+                c=colors[label],
+                ls="--",
+            )
+
+    if title is not None:
+        ax1.set_title(title)
+
+    ax1.set_ylabel("Cumulative accuracy")
+    ax1.set_xlabel("Coverage (sorted by confidence/uncertainty)")
+    ax2.set_xlabel("Coverage (sorted by confidence/uncertainty)")
+    ax1.legend(["Confidence", "Uncertainty"], loc="best")
+    ax2.legend(output_cols, loc="best")
+    plt.tight_layout()
+
+    if save_location:
+        plt.savefig(save_location, dpi=300, bbox_inches="tight")
+
 
 def conus_plot(df, 
                dataset = "mping", 
diff --git a/mlguess/reliability.py b/mlguess/reliability.py
new file mode 100644
index 0000000..e3189a3
--- /dev/null
+++ b/mlguess/reliability.py
@@ -0,0 +1,296 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+# https://github.com/hollance/reliability-diagrams/blob/master/reliability_diagrams.py
+
+# This file combines some analysis and plotting functions
+
+def compute_calibration(true_labels, pred_labels, confidences, num_bins=10):
+    """Collects predictions into bins used to draw a reliability diagram.
+    The true_labels, pred_labels, confidences arguments must be NumPy arrays;
+    pred_labels and true_labels may contain numeric or string labels.
+    For a multi-class model, the predicted label and confidence should be those
+    of the highest scoring class.
+
+    Args:
+        true_labels: the true labels for the test examples
+        pred_labels: the predicted labels for the test examples
+        confidences: the predicted confidences for the test examples
+        num_bins: number of bins
+
+    Returns:
+        a dictionary containing the following NumPy arrays:
+            accuracies: the average accuracy for each bin
+            confidences: the average confidence for each bin
+            counts: the number of examples in each bin
+            bins: the confidence thresholds for each bin
+            avg_accuracy: the accuracy over the entire test set
+            avg_confidence: the average confidence over the entire test set
+            expected_calibration_error: a weighted average of all calibration gaps
+            max_calibration_error: the largest calibration gap across all bins
+    """
+    if len(confidences) != len(pred_labels) or len(confidences) != len(true_labels):
+        raise TypeError("Lengths of confidences, pred_labels, and true_labels should be equal.")
+
+    if num_bins <= 0:
+        raise TypeError("Number of bins should be greater than zero.")
+
+    bins = np.linspace(0.0, 1.0, num_bins + 1)
+    indices = np.digitize(confidences, bins, right=True)
+
+    bin_accuracies = np.zeros(num_bins, dtype=np.float64)
+    bin_confidences = np.zeros(num_bins, dtype=np.float64)
+    bin_counts = np.zeros(num_bins, dtype=np.int64)
+
+    for b in range(num_bins):
+        selected = np.where(indices == b + 1)[0]
+        if len(selected) > 0:
+            bin_accuracies[b] = np.mean(true_labels[selected] == pred_labels[selected])
+            bin_confidences[b] = np.mean(confidences[selected])
+            bin_counts[b] = len(selected)
+
+    avg_acc = np.sum(bin_accuracies * bin_counts) / np.sum(bin_counts)
+    avg_conf = np.sum(bin_confidences * bin_counts) / np.sum(bin_counts)
+
+    gaps = np.abs(bin_accuracies - bin_confidences)
+    ece = np.sum(gaps * bin_counts) / np.sum(bin_counts)
+    mce = np.max(gaps)
+
+    return {"accuracies": bin_accuracies,
+            "confidences": bin_confidences,
+            "counts": bin_counts,
+            "bins": bins,
+            "avg_accuracy": avg_acc,
+            "avg_confidence": avg_conf,
+            "expected_calibration_error": ece,
+            "max_calibration_error": mce}
+
+
+def _reliability_diagram_subplot(ax, bin_data,
+                                 draw_ece=True,
+                                 draw_bin_importance=False,
+                                 title="Reliability Diagram",
+                                 xlabel="Confidence",
+                                 ylabel="Expected Accuracy",
+                                 ):
+    """
+    Draws a reliability diagram into the specified subplot.
+
+    Args:
+        ax (matplotlib.axes.Axes): The subplot to draw the reliability diagram on.
+        bin_data (dict): A dictionary containing the following keys:
+            - 'accuracies' (numpy.ndarray): Array of accuracy values for each bin.
+            - 'confidences' (numpy.ndarray): Array of confidence values for each bin.
+            - 'counts' (numpy.ndarray): Array of counts for each bin.
+            - 'bins' (numpy.ndarray): Array of bin edges.
+        draw_ece (bool, optional): Flag to draw the Expected Calibration Error (ECE) value on the diagram. Default is True.
+        draw_bin_importance (bool or str, optional): Flag to adjust the appearance of the bins based on their importance.
+            If False, all bins have equal appearance. If True, bins are adjusted based on their counts.
+            If 'alpha', bins are adjusted based on counts using varying alpha values. If 'width', bins are adjusted
+            based on counts using varying bar widths. Default is False.
+        title (str, optional): The title of the reliability diagram. Default is "Reliability Diagram".
+        xlabel (str, optional): The label for the x-axis. Default is "Confidence".
+        ylabel (str, optional): The label for the y-axis. Default is "Expected Accuracy".
+
+    Returns:
+        plot
+    """
+
+    accuracies = bin_data["accuracies"]
+    confidences = bin_data["confidences"]
+    counts = bin_data["counts"]
+    bins = bin_data["bins"]
+
+    bin_size = 1.0 / len(counts)
+    positions = bins[:-1] + bin_size / 2.0
+
+    widths = bin_size
+    alphas = 0.3
+    min_count = np.min(counts)
+    max_count = np.max(counts)
+    normalized_counts = (counts - min_count) / (max_count - min_count)
+
+    if draw_bin_importance == "alpha":
+        alphas = 0.2 + 0.8 * normalized_counts
+    elif draw_bin_importance == "width":
+        widths = 0.1 * bin_size + 0.9 * bin_size * normalized_counts
+
+    colors = np.zeros((len(counts), 4))
+    colors[:, 0] = 240 / 255.
+    colors[:, 1] = 60 / 255.
+    colors[:, 2] = 60 / 255.
+    colors[:, 3] = alphas
+
+    gap_plt = ax.bar(positions, np.abs(accuracies - confidences),
+                     bottom=np.minimum(accuracies, confidences), width=widths,
+                     edgecolor=colors, color=colors, linewidth=1, label="Gap")
+
+    acc_plt = ax.bar(positions, 0, bottom=accuracies, width=widths,
+                     edgecolor="black", color="black", alpha=1.0, linewidth=3,
+                     label="Accuracy")
+
+    ax.set_aspect("equal")
+    ax.plot([0, 1], [0, 1], linestyle="--", color="gray")
+
+    if draw_ece:
+        ece = (bin_data["expected_calibration_error"] * 100)
+        ax.text(0.98, 0.02, "ECE=%.2f" % ece, color="black",
+                ha="right", va="bottom", transform=ax.transAxes)
+
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+
+    ax.set_title(title)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+
+    ax.legend(handles=[gap_plt, acc_plt])
+
+
+def _confidence_histogram_subplot(ax, bin_data,
+                                  draw_averages=True,
+                                  title="Examples per bin",
+                                  xlabel="Confidence",
+                                  ylabel="Count"):
+    """Draws a confidence histogram into a subplot."""
+    counts = bin_data["counts"]
+    bins = bin_data["bins"]
+
+    bin_size = 1.0 / len(counts)
+    positions = bins[:-1] + bin_size / 2.0
+
+    ax.bar(positions, counts, width=bin_size * 0.9)
+
+    ax.set_xlim(0, 1)
+    ax.set_title(title)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+
+    if draw_averages:
+        acc_plt = ax.axvline(x=bin_data["avg_accuracy"], ls="solid", lw=3,
+                             c="black", label="Accuracy")
+        conf_plt = ax.axvline(x=bin_data["avg_confidence"], ls="dotted", lw=3,
+                              c="#444", label="Avg. confidence")
+        ax.legend(handles=[acc_plt, conf_plt])
+
+
+def _reliability_diagram_combined(bin_data,
+                                  draw_ece, draw_bin_importance, draw_averages,
+                                  title, figsize, dpi, return_fig):
+    """Draws a reliability diagram and confidence histogram using the output
+    from compute_calibration()."""
+    figsize = (figsize[0], figsize[0] * 1.4)
+
+    fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=figsize, dpi=dpi,
+                           gridspec_kw={"height_ratios": [4, 1]})
+
+    plt.tight_layout()
+    plt.subplots_adjust(hspace=-0.1)
+
+    _reliability_diagram_subplot(ax[0], bin_data, draw_ece, draw_bin_importance,
+                                 title=title, xlabel="")
+
+    # Draw the confidence histogram upside down.
+    orig_counts = bin_data["counts"]
+    bin_data["counts"] = -bin_data["counts"]
+    _confidence_histogram_subplot(ax[1], bin_data, draw_averages, title="")
+    bin_data["counts"] = orig_counts
+
+    # Also negate the ticks for the upside-down histogram.
+    new_ticks = np.abs(ax[1].get_yticks()).astype(np.int)
+    ax[1].set_yticklabels(new_ticks)
+
+    if return_fig: return fig
+
+
+def reliability_diagram(true_labels, pred_labels, confidences, num_bins=10,
+                        draw_ece=True, draw_bin_importance=False,
+                        draw_averages=True, title="Reliability Diagram",
+                        figsize=(6, 6), dpi=72, return_fig=False):
+    """Draws a reliability diagram and confidence histogram in a single plot.
+
+    First, the model's predictions are divided up into bins based on their
+    confidence scores.
+    The reliability diagram shows the gap between average accuracy and average
+    confidence in each bin. These are the red bars.
+    The black line is the accuracy, the other end of the bar is the confidence.
+    Ideally, there is no gap and the black line is on the dotted diagonal.
+    In that case, the model is properly calibrated and we can interpret the
+    confidence scores as probabilities.
+    The confidence histogram visualizes how many examples are in each bin.
+    This is useful for judging how much each bin contributes to the calibration
+    error.
+    The confidence histogram also shows the overall accuracy and confidence.
+    The closer these two lines are together, the better the calibration.
+
+    The ECE or Expected Calibration Error is a summary statistic that gives the
+    difference in expectation between confidence and accuracy. In other words,
+    it's a weighted average of the gaps across all bins. A lower ECE is better.
+
+    Args:
+        true_labels: the true labels for the test examples
+        pred_labels: the predicted labels for the test examples
+        confidences: the predicted confidences for the test examples
+        num_bins: number of bins
+        draw_ece: whether to include the Expected Calibration Error
+        draw_bin_importance: whether to represent how much each bin contributes
+            to the total accuracy: False, "alpha", "widths"
+        draw_averages: whether to draw the overall accuracy and confidence in
+            the confidence histogram
+        title: optional title for the plot
+        figsize: setting for matplotlib; height is ignored
+        dpi: setting for matplotlib
+        return_fig: if True, returns the matplotlib Figure object
+    """
+    bin_data = compute_calibration(true_labels, pred_labels, confidences, num_bins)
+    return _reliability_diagram_combined(bin_data, draw_ece, draw_bin_importance,
+                                         draw_averages, title, figsize=figsize,
+                                         dpi=dpi, return_fig=return_fig)
+
+
+def reliability_diagrams(results, num_bins=10,
+                         draw_ece=True, draw_bin_importance=False,
+                         num_cols=4, dpi=72, return_fig=False):
+    """Draws reliability diagrams for one or more models.
+
+    Args:
+        results: dictionary where the key is the model name and the value is
+            a dictionary containing the true labels, predicated labels, and
+            confidences for this model
+        num_bins: number of bins
+        draw_ece: whether to include the Expected Calibration Error
+        draw_bin_importance: whether to represent how much each bin contributes
+            to the total accuracy: False, "alpha", "widths"
+        num_cols: how wide to make the plot
+        dpi: setting for matplotlib
+        return_fig: if True, returns the matplotlib Figure object
+    """
+    ncols = num_cols
+    nrows = (len(results) + ncols - 1) // ncols
+    figsize = (ncols * 4, nrows * 4)
+
+    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True,
+                           figsize=figsize, dpi=dpi, constrained_layout=True)
+
+    for i, (plot_name, data) in enumerate(results.items()):
+        y_true = data["true_labels"]
+        y_pred = data["pred_labels"]
+        y_conf = data["confidences"]
+
+        bin_data = compute_calibration(y_true, y_pred, y_conf, num_bins)
+
+        row = i // ncols
+        col = i % ncols
+        _reliability_diagram_subplot(ax[row, col], bin_data, draw_ece,
+                                     draw_bin_importance,
+                                     title=plot_name,  # "\n".join(plot_name.split()),
+                                     xlabel="Confidence" if row == nrows - 1 else "",
+                                     ylabel="Expected Accuracy" if col == 0 else "")
+
+    for i in range(i + 1, nrows * ncols):
+        row = i // ncols
+        col = i % ncols
+        ax[row, col].axis("off")
+
+    if return_fig: return fig
\ No newline at end of file
diff --git a/mlguess/torch/class_losses.py b/mlguess/torch/class_losses.py
index dd4856b..b3d5f7f 100644
--- a/mlguess/torch/class_losses.py
+++ b/mlguess/torch/class_losses.py
@@ -3,7 +3,6 @@
 
 # Adapted from https://github.com/dougbrion/pytorch-classification-uncertainty/blob/master/losses.py
 
-
 def get_device():
     """Get the device for PyTorch operations.
 
diff --git a/mlguess/torch/distributed.py b/mlguess/torch/distributed.py
index a1208b9..d5e1075 100644
--- a/mlguess/torch/distributed.py
+++ b/mlguess/torch/distributed.py
@@ -6,20 +6,18 @@
     MixedPrecision,
     CPUOffload
 )
-from torch.distributed.fsdp.wrap import (
-    transformer_auto_wrap_policy,
-    size_based_auto_wrap_policy,
-)
+from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy
+
 from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import (
    checkpoint_wrapper,
    CheckpointImpl,
    apply_activation_checkpointing,
 )
-from credit.models.checkpoint import (
+from mlguess.torch.checkpoint import (
     TorchFSDPModel
 )
 from torch.nn.parallel import DistributedDataParallel as DDP
-from credit.mixed_precision import parse_dtype
+from mlguess.torch.mixed_precision import parse_dtype
 import functools
 import logging
 
diff --git a/mlguess/torch/layers.py b/mlguess/torch/layers.py
index 053f4c2..7d0c413 100644
--- a/mlguess/torch/layers.py
+++ b/mlguess/torch/layers.py
@@ -1,4 +1,3 @@
-
 import torch
 from torch import nn
 from torch.nn.utils import spectral_norm as SpectralNorm
@@ -29,9 +28,9 @@ def __init__(self, in_channels, out_channels, spectral_norm=True):
         """
         super().__init__()
         if spectral_norm:
-            self.linear = SpectralNorm(nn.Linear(in_channels, out_channels*4))
+            self.linear = SpectralNorm(nn.Linear(in_channels, out_channels * 4))
         else:
-            self.linear = nn.Linear(in_channels, out_channels*4)
+            self.linear = nn.Linear(in_channels, out_channels * 4)
 
     def evidence(self, x):
         """Applies a log transformation to the input with a shift.
diff --git a/mlguess/torch/mc_dropout.py b/mlguess/torch/mc_dropout.py
index ffbc6d4..b0b924e 100644
--- a/mlguess/torch/mc_dropout.py
+++ b/mlguess/torch/mc_dropout.py
@@ -11,11 +11,12 @@ def enable_dropout(model):
         if m.__class__.__name__.startswith('Dropout'):
             m.train()
 
+
 def monte_carlo_dropout(data_loader,
                         forward_passes,
                         model,
                         n_classes,
-                        n_samples, 
+                        n_samples,
                         batch_size=1024,
                         uncertainty=False):
     """Function to get the monte-carlo samples and uncertainty estimates
@@ -43,7 +44,7 @@ def monte_carlo_dropout(data_loader,
                 if uncertainty:
                     evidence = relu_evidence(output)
                     alpha = evidence + 1
-                    #u = num_classes / torch.sum(alpha, dim=1, keepdim=True)
+                    # u = num_classes / torch.sum(alpha, dim=1, keepdim=True)
                     output = alpha / torch.sum(alpha, dim=1, keepdim=True)
                 else:
                     output = F.softmax(output, dim=1)  # shape (n_samples, n_classes)
@@ -64,10 +65,10 @@ def monte_carlo_dropout(data_loader,
     if n_classes > 1:
         epsilon = sys.float_info.min
         # Calculating entropy across multiple MCD forward passes 
-        entropy = -np.sum(mean*np.log(mean + epsilon), axis=-1)  # shape (n_samples,)
+        entropy = -np.sum(mean * np.log(mean + epsilon), axis=-1)  # shape (n_samples,)
 
         # Calculating mutual information across multiple MCD forward passes 
-        mutual_info = entropy - np.mean(np.sum(-dropout_predictions*np.log(dropout_predictions + epsilon),
+        mutual_info = entropy - np.mean(np.sum(-dropout_predictions * np.log(dropout_predictions + epsilon),
                                                axis=-1), axis=0)  # shape (n_samples,)
 
         results["entropy"] = entropy
diff --git a/mlguess/torch/metrics.py b/mlguess/torch/metrics.py
index bd84442..c97e675 100644
--- a/mlguess/torch/metrics.py
+++ b/mlguess/torch/metrics.py
@@ -2,6 +2,7 @@
 from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
 from hagelslag.evaluation.ProbabilityMetrics import DistributedROC
 
+
 class MetricsCalculator:
     """A class to calculate various metrics for model evaluation, including CSI, average accuracy,
     precision, recall, F1 score, AUC, MCE, and ECE.
@@ -112,8 +113,8 @@ def ave_acc(self, true_labels, pred_labels):
         return np.mean(
             [
                 (
-                    true_labels[np.where(true_labels == _label)]
-                    == pred_labels[np.where(true_labels == _label)]
+                        true_labels[np.where(true_labels == _label)]
+                        == pred_labels[np.where(true_labels == _label)]
                 ).mean()
                 for _label in np.unique(true_labels)
             ]
@@ -136,7 +137,7 @@ def mce(self, true_labels, pred_probs):
         mce = 0.0
         for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
             in_bin = (confidences > bin_lower).astype(float) * (
-                confidences <= bin_upper
+                    confidences <= bin_upper
             ).astype(float)
             prop_in_bin = in_bin.astype(float).mean()
             in_bin = in_bin.squeeze(-1).astype(int)
@@ -167,7 +168,7 @@ def ece(self, true_labels, pred_probs):
         ece = []
         for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
             in_bin = (confidences > bin_lower).astype(float) * (
-                confidences <= bin_upper
+                    confidences <= bin_upper
             ).astype(float)
             prop_in_bin = in_bin.astype(float).mean()
             in_bin = in_bin.squeeze(-1).astype(int)
@@ -176,7 +177,7 @@ def ece(self, true_labels, pred_probs):
                     accuracy_in_bin = accuracies[in_bin].astype(float).mean()
                     avg_confidence_in_bin = confidences[in_bin].mean()
                     avg_calibration = (
-                        np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
+                            np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
                     )
                     ece.append(avg_calibration)
                 except ValueError:
diff --git a/mlguess/torch/mixed_precision.py b/mlguess/torch/mixed_precision.py
new file mode 100644
index 0000000..82131c8
--- /dev/null
+++ b/mlguess/torch/mixed_precision.py
@@ -0,0 +1,38 @@
+import torch
+
+
+def parse_dtype(dtype_str: str) -> torch.dtype:
+    """
+    Parses a string representation of a torch dtype and returns the corresponding torch.dtype object.
+
+    Args:
+        dtype_str (str): The string representation of the dtype (e.g., "float32", "int64").
+
+    Returns:
+        torch.dtype: The corresponding torch.dtype object.
+    """
+    dtype_map = {
+        "float32": torch.float32,
+        "float64": torch.float64,
+        "float16": torch.float16,
+        "float": torch.float32,  # Alias for float32
+        "double": torch.float64,  # Alias for float64
+        "half": torch.float16,  # Alias for float16
+        "bfloat16": torch.bfloat16,
+        "int8": torch.int8,
+        "uint8": torch.uint8,
+        "int16": torch.int16,
+        "int32": torch.int32,
+        "int64": torch.int64,
+        "short": torch.int16,  # Alias for int16
+        "long": torch.int64,  # Alias for int64
+        "bool": torch.bool
+    }
+
+    # Convert the dtype string to lowercase for case-insensitive matching
+    dtype_str_lower = dtype_str.lower()
+
+    if dtype_str_lower in dtype_map:
+        return dtype_map[dtype_str_lower]
+    else:
+        raise ValueError(f"Invalid dtype string: {dtype_str}")
\ No newline at end of file
diff --git a/mlguess/torch/models.py b/mlguess/torch/models.py
index 9117b82..684a126 100644
--- a/mlguess/torch/models.py
+++ b/mlguess/torch/models.py
@@ -10,7 +10,6 @@
 from mlguess.torch.checkpoint import load_model_state
 from mlguess.torch.layers import LinearNormalGamma
 
-
 warnings.filterwarnings("ignore")
 logger = logging.getLogger(__name__)
 
@@ -71,6 +70,7 @@ def init_weights(net, init_type='normal', init_gain=0.0, verbose=True):
         - 'kaiming': Kaiming initialization.
         - 'orthogonal': Orthogonal initialization.
     """
+
     def init_func(m):
         """Initialization function for network layers.
 
@@ -95,9 +95,11 @@ def init_func(m):
                 raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
             if hasattr(m, 'bias') and m.bias is not None:
                 init.constant_(m.bias.data, 0.0)
-        elif classname.find('BatchNorm2d') != -1:  # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
+        elif classname.find(
+                'BatchNorm2d') != -1:  # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
             init.normal_(m.weight.data, 1.0, init_gain)
             init.constant_(m.bias.data, 0.0)
+
     if verbose:
         logging.info('Initializing network with %s' % init_type)
     net.apply(init_func)
@@ -116,6 +118,7 @@ class DNN(nn.Module):
         weight_init (bool): Whether to initialize weights. Default is False.
         num_layers (int): Number of layers to create if layer_size is a single number. Default is None.
     """
+
     def __init__(self,
                  input_size,
                  output_size,
@@ -262,12 +265,12 @@ def predict_uncertainty(self, input, y_scaler=None):
             mu = y_scaler.inverse_transform(mu)
             mu = torch.from_numpy(mu).to(aleatoric.device)
 
-        # Torch version of some of the sklearn scalers -- this needs updated later
-        # MinMaxScaler inverse transform
-        # if y_scaler:
-        #     min_val = torch.tensor(y_scaler.data_min_, device=mu.device)
-        #     max_val = torch.tensor(y_scaler.data_max_, device=mu.device)
-        #     mu = mu * (max_val - min_val) + min_val
+            # Torch version of some of the sklearn scalers -- this needs updated later
+            # MinMaxScaler inverse transform
+            # if y_scaler:
+            #     min_val = torch.tensor(y_scaler.data_min_, device=mu.device)
+            #     max_val = torch.tensor(y_scaler.data_max_, device=mu.device)
+            #     mu = mu * (max_val - min_val) + min_val
 
             for i in range(mu.shape[-1]):
                 aleatoric[:, i] *= self.training_var[i]
@@ -332,6 +335,7 @@ def from_config(cls, conf, device="cpu"):
 
         return model
 
+
 class CategoricalDNN(DNN):
 
     def calc_uncertainty(self, y_pred):
diff --git a/mlguess/torch/regression_losses.py b/mlguess/torch/regression_losses.py
index 54dd5b5..be69234 100644
--- a/mlguess/torch/regression_losses.py
+++ b/mlguess/torch/regression_losses.py
@@ -14,6 +14,7 @@ class EvidentialRegressionLoss:
     Args:
         coef (float, optional): Coefficient for the regularization term. Defaults to 1.0.
     """
+
     def __init__(self, coef=1.0):
         self.coef = coef
 
@@ -35,10 +36,10 @@ def normal_inverse_gamma_nll(self, y, gamma, v, alpha, beta):
         """
         two_blambda = 2 * beta * (1 + v) + tol
         nll = 0.5 * torch.log(np.pi / (v + tol)) \
-            - alpha * torch.log(two_blambda + tol) \
-            + (alpha + 0.5) * torch.log(v * (y - gamma) ** 2 + two_blambda + tol) \
-            + torch.lgamma(alpha) \
-            - torch.lgamma(alpha + 0.5)
+              - alpha * torch.log(two_blambda + tol) \
+              + (alpha + 0.5) * torch.log(v * (y - gamma) ** 2 + two_blambda + tol) \
+              + torch.lgamma(alpha) \
+              - torch.lgamma(alpha + 0.5)
 
         return nll
 
@@ -97,9 +98,9 @@ def modified_mse(gamma, nu, alpha, beta, target, reduction='mean'):
     Reference: https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf
     Source: https://github.com/deargen/MT-ENet/tree/468822188f52e517b1ee8e386eea607b2b7d8829
     """
-    mse = (gamma-target)**2
+    mse = (gamma - target) ** 2
     c = get_mse_coef(gamma, nu, alpha, beta, target).detach()
-    mod_mse = mse*c
+    mod_mse = mse * c
 
     if reduction == 'mean':
         return mod_mse.mean()
@@ -131,7 +132,7 @@ def get_mse_coef(gamma, nu, alpha, beta, y):
     nu_eff = check_mse_efficiency_nu(gamma, nu, alpha, beta)
     delta = (gamma - y).abs()
     min_bound = torch.min(nu_eff, alpha_eff).min()
-    c = (min_bound.sqrt()/(delta + tol)).detach()
+    c = (min_bound.sqrt() / (delta + tol)).detach()
     return torch.clip(c, min=False, max=1.)
 
 
@@ -153,7 +154,7 @@ def check_mse_efficiency_alpha(nu, alpha, beta):
         where f => the NLL loss (BayesianDTI.loss.MarginalLikelihood)
 
     """
-    right = (torch.exp((torch.digamma(alpha+0.5)-torch.digamma(alpha))) - 1)*2*beta*(1+nu) / (nu + 1e-8)
+    right = (torch.exp((torch.digamma(alpha + 0.5) - torch.digamma(alpha))) - 1) * 2 * beta * (1 + nu) / (nu + 1e-8)
     return right.detach()
 
 
@@ -184,17 +185,19 @@ class EvidentialMarginalLikelihood(torch.nn.modules.loss._Loss):
     """Marginal likelihood error of prior network.
     The target value is not a distribution (mu, std), but a just value.
 
-    This is a negative log marginal likelihood, with integral mu and sigma.
-
-    Reference: https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf
+    Reference: Amini et al. 2020 (https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf)
     Source: https://github.com/deargen/MT-ENet/tree/468822188f52e517b1ee8e386eea607b2b7d8829
     """
+
     def __init__(self, size_average=None, reduce=None, reduction: str = 'mean'):
         super(EvidentialMarginalLikelihood, self).__init__(size_average, reduce, reduction)
 
     def forward(self, gamma: torch.Tensor, nu: torch.Tensor, alpha: torch.Tensor, beta: torch.Tensor,
                 target: torch.Tensor) -> torch.Tensor:
-        """Args:
+        """
+        Conduct the forward pass through the loss.
+
+        Args:
             gamma (torch.Tensor): gamma output value of the evidential network
             nu (torch.Tensor): nu output value of the evidential network
             alpha (torch.Tensor): alpha output value of the evidential network
@@ -203,16 +206,12 @@ def forward(self, gamma: torch.Tensor, nu: torch.Tensor, alpha: torch.Tensor, be
 
         Return:
             (Tensor) Negative log marginal likelihood of EvidentialNet
-                p(y|m) = Student-t(y; gamma, (beta(1+nu))/(nu*alpha) , 2*alpha)
-                then, the negative log likelihood is (CAUTION QUITE COMPLEX!)
-                NLL = -log(p(y|m)) =
-                    log(3.14/nu)*0.5 - alpha*log(2*beta*(1 + nu)) + (alpha + 0.5)*log( nu(target - gamma)^2 + 2*beta(1 + nu) )
-                    + log(GammaFunc(alpha)/GammaFunc(alpha + 0.5))
+
         """
         pi = torch.tensor(np.pi)
-        x1 = torch.log(pi/(nu + tol))*0.5
-        x2 = -alpha*torch.log(2.*beta*(1. + nu) + tol)
-        x3 = (alpha + 0.5)*torch.log(nu*(target - gamma)**2 + 2.*beta*(1. + nu) + tol)
+        x1 = torch.log(pi / (nu + tol)) * 0.5
+        x2 = -alpha * torch.log(2. * beta * (1. + nu) + tol)
+        x3 = (alpha + 0.5) * torch.log(nu * (target - gamma) ** 2 + 2. * beta * (1. + nu) + tol)
         x4 = torch.lgamma(alpha + tol) - torch.lgamma(alpha + 0.5 + tol)
         if self.reduction == 'mean':
             return (x1 + x2 + x3 + x4).mean()
@@ -229,24 +228,28 @@ class EvidenceRegularizer(torch.nn.modules.loss._Loss):
     Reference: https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf
     Source: https://github.com/deargen/MT-ENet/tree/468822188f52e517b1ee8e386eea607b2b7d8829
     """
+
     def __init__(self, size_average=None, reduce=None, reduction: str = 'mean', coef=0.1):
         super(EvidenceRegularizer, self).__init__(size_average, reduce, reduction)
         self.coef = coef
 
     def forward(self, gamma: torch.Tensor, nu: torch.Tensor, alpha: torch.Tensor,
                 target: torch.Tensor) -> torch.Tensor:
-        """Args:
+        """
+        Forward pass through the loss.
+
+        Args:
             gamma (torch.Tensor): gamma output value of the evidential network
             nu (torch.Tensor): nu output value of the evidential network
             alpha (torch.Tensor): alpha output value of the evidential network
             target (torch.Tensor): target value
 
-        Return:
+        Returns:
             (Tensor) prior network regularization
             Loss = |y - gamma|*(2*nu + alpha) * factor
 
         """
-        loss_value = torch.abs(target - gamma)*(2*nu + alpha) * self.coef
+        loss_value = torch.abs(target - gamma) * (2 * nu + alpha) * self.coef
         if self.reduction == 'mean':
             return loss_value.mean()
         elif self.reduction == 'sum':
@@ -264,6 +267,7 @@ class LipschitzMSELoss(torch.nn.Module):
         coef (float, optional): Coefficient for the regularization term. Defaults to 0.1.
         reduction (str, optional): Specifies the method to reduce the loss over the batch. Can be 'mean', 'sum', or 'none'. Defaults to 'mean'.
     """
+
     def __init__(self, tol=1e-8, coef=0.1, reduction='mean'):
         super(LipschitzMSELoss, self).__init__()
         self.tol = tol
diff --git a/mlguess/torch/scheduler.py b/mlguess/torch/scheduler.py
index 0aaeb01..4f5b351 100644
--- a/mlguess/torch/scheduler.py
+++ b/mlguess/torch/scheduler.py
@@ -13,11 +13,11 @@ def load_scheduler(optimizer, conf):
     """Load a learning rate scheduler based on the configuration.
 
     Parameters:
-    - optimizer: The PyTorch optimizer.
-    - conf: The configuration dictionary.
+        optimizer: The PyTorch optimizer.
+        conf: The configuration dictionary.
 
     Returns:
-    - scheduler: The PyTorch learning rate scheduler.
+        scheduler: The PyTorch learning rate scheduler.
     """
     conf = copy.deepcopy(conf)
 
@@ -103,14 +103,18 @@ def lr_lambda_phase1(epoch, num_epochs=100, warmup_epochs=10):
 
 
 class CosineAnnealingWarmupRestarts(LRScheduler):
-    """optimizer (Optimizer): Wrapped optimizer.
-    first_cycle_steps (int): First cycle step size.
-    cycle_mult(float): Cycle steps magnification. Default: -1.
-    max_lr(float): First cycle's max learning rate. Default: 0.1.
-    min_lr(float): Min learning rate. Default: 0.001.
-    warmup_steps(int): Linear warmup step size. Default: 0.
-    gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
-    last_epoch (int): The index of last epoch. Default: -1.
+    """
+    Cosine Annealing learning rate scheduler.
+
+    Attributes:
+        optimizer (Optimizer): Wrapped optimizer.
+        first_cycle_steps (int): First cycle step size.
+        cycle_mult(float): Cycle steps magnification. Default: -1.
+        max_lr(float): First cycle's max learning rate. Default: 0.1.
+        min_lr(float): Min learning rate. Default: 0.001.
+        warmup_steps(int): Linear warmup step size. Default: 0.
+        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
+        last_epoch (int): The index of last epoch. Default: -1.
     """
 
     def __init__(
@@ -190,14 +194,14 @@ def step(self, epoch=None):
 def annealed_probability(epoch, max_epochs=100, min_probability=0.01, max_probability=1.0):
     """Anneal the termination probability from 1 to a small value.
 
-    Parameters:
-    - epoch: The current epoch.
-    - max_epochs: The maximum number of epochs for annealing.
-    - min_probability: The minimum termination probability.
-    - max_probability: The maximum termination probability.
+    Args:
+        epoch: The current epoch.
+        max_epochs: The maximum number of epochs for annealing.
+        min_probability: The minimum termination probability.
+        max_probability: The maximum termination probability.
 
     Returns:
-    - termination_probability: The annealed termination probability.
+        termination_probability: The annealed termination probability.
     """
     # Linear annealing schedule
     termination_probability = 1.0 - (epoch / max_epochs) * (1.0 - min_probability)
diff --git a/mlguess/torch/trainer_classifier.py b/mlguess/torch/trainer_classifier.py
index e691060..0e3619c 100644
--- a/mlguess/torch/trainer_classifier.py
+++ b/mlguess/torch/trainer_classifier.py
@@ -67,7 +67,8 @@ def __init__(self, model, rank, module=False, uncertainty=False):
         super(Trainer, self).__init__()
         self.model = model
         self.rank = rank
-        self.device = torch.device(f"cuda:{rank % torch.cuda.device_count()}") if torch.cuda.is_available() else torch.device("cpu")
+        self.device = torch.device(
+            f"cuda:{rank % torch.cuda.device_count()}") if torch.cuda.is_available() else torch.device("cpu")
 
         if module:
             self.model = self.model.module
@@ -76,15 +77,15 @@ def __init__(self, model, rank, module=False, uncertainty=False):
 
     # Training function.
     def train_one_epoch(
-        self,
-        epoch,
-        conf,
-        trainloader,
-        optimizer,
-        criterion,
-        scaler,
-        scheduler,
-        metrics
+            self,
+            epoch,
+            conf,
+            trainloader,
+            optimizer,
+            criterion,
+            scaler,
+            scheduler,
+            metrics
     ):
         """Train the model for one epoch.
 
@@ -191,7 +192,8 @@ def train_one_epoch(
             if self.rank == 0:
                 batch_group_generator.set_description(to_print)
 
-            if conf['trainer']['use_scheduler'] and conf['trainer']['scheduler']['scheduler_type'] == "cosine-annealing":
+            if conf['trainer']['use_scheduler'] and conf['trainer']['scheduler'][
+                'scheduler_type'] == "cosine-annealing":
                 scheduler.step()
 
             if i >= batches_per_epoch and i > 0:
@@ -207,12 +209,12 @@ def train_one_epoch(
         return results_dict
 
     def validate(
-        self,
-        epoch,
-        conf,
-        valid_loader,
-        criterion,
-        metrics
+            self,
+            epoch,
+            conf,
+            valid_loader,
+            criterion,
+            metrics
     ):
         """Validate the model on the validation dataset.
 
@@ -306,12 +308,12 @@ def validate(
         return results_dict
 
     def predict(
-        self,
-        conf,
-        test_loader,
-        criterion,
-        metrics,
-        split=None
+            self,
+            conf,
+            test_loader,
+            criterion,
+            metrics,
+            split=None
     ):
         """Make predictions with the model on the test dataset.
 
@@ -413,17 +415,17 @@ def predict(
         }
 
     def fit(
-        self,
-        conf,
-        train_loader,
-        valid_loader,
-        optimizer,
-        train_criterion,
-        valid_criterion,
-        scaler,
-        scheduler,
-        metrics,
-        trial=False
+            self,
+            conf,
+            train_loader,
+            valid_loader,
+            optimizer,
+            train_criterion,
+            valid_criterion,
+            scaler,
+            scheduler,
+            metrics,
+            trial=False
     ):
         """Train and validate the model.
 
@@ -560,23 +562,25 @@ def fit(
                 if conf["trainer"]["mode"] != "fsdp":
 
                     if self.rank == 0:
-
                         # Save the current model
 
-                        logging.info(f"Saving model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}")
+                        logging.info(
+                            f"Saving model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}")
 
                         state_dict = {
                             "epoch": epoch,
                             "model_state_dict": self.model.state_dict(),
                             "optimizer_state_dict": optimizer.state_dict(),
-                            'scheduler_state_dict': scheduler.state_dict() if conf["trainer"]["use_scheduler"] else None,
+                            'scheduler_state_dict': scheduler.state_dict() if conf["trainer"][
+                                "use_scheduler"] else None,
                             'scaler_state_dict': scaler.state_dict()
                         }
                         torch.save(state_dict, f"{save_loc}/checkpoint.pt")
 
                 else:
 
-                    logging.info(f"Saving FSDP model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}")
+                    logging.info(
+                        f"Saving FSDP model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}")
 
                     # Initialize the checkpoint I/O handler
 
diff --git a/pyproject.toml b/pyproject.toml
index 2b1be53..796679d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "miles-guess"
-authors = [{name = "John Schreck, David John Gagne, Charlie Becker, Gabrielle Gantos", email = "milescore@ucar.edu"}]
+authors = [{name = "University Corporation for Atmospheric Research", email = "milescore@ucar.edu"}]
 readme = "README.md"
 license = {file = "LICENSE"}
 dynamic = ["version"]
@@ -32,8 +32,7 @@ dependencies = [
     "nbsphinx",
     "myst_parser",
     "sphinx_book_theme",
-    "pytest",
-    "hagelslag"
+    "hagelslag",
     ]
 
 [project.optional-dependencies]
@@ -41,7 +40,7 @@ tensorflow = ["tensorflow>=2.16.0"]
 
 tensorflow_gpu = ["tensorflow[and-cuda]>=2.16.0"]
 
-torch = ["torch"]
+torch = ["torch", "safetensors"]
 
 jax = ["jax"]
 
@@ -55,4 +54,4 @@ version = {file = "mlguess/VERSION"}
 readme = {file = ["README.md"]}
 
 [tool.ruff]
-pydocstyle.convention = "google"
+lint.pydocstyle.convention = "google"