diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 91fd70f..f27575b 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -15,7 +15,6 @@ jobs: - uses: mamba-org/setup-micromamba@v1 with: environment-file: environment_torch.yml - activate-environment: test - shell: bash -l {0} run: | conda info @@ -28,12 +27,14 @@ jobs: run: | micromamba install ruff # stop the build if there are Python syntax errors or undefined names - ruff . --count --select=E9,F63,F7,F82 --show-source --statistics + ruff check --select=E9,F63,F7,F82 # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - ruff . --count --exit-zero --max-complexity=100 --max-line-length=127 --statistics + ruff check --output-format concise # Checking documentation errors - ruff . --count --select=D --exit-zero --max-complexity=100 --max-line-length=127 --statistics + ruff check --select=D + ruff check --select=D --statistics - name: Test with pytest shell: bash -l {0} run: | + export KERAS_BACKEND="torch" pytest diff --git a/applications/evaluate_ptype.py b/applications/evaluate_ptype.py index 77a7786..7cc1a73 100644 --- a/applications/evaluate_ptype.py +++ b/applications/evaluate_ptype.py @@ -10,12 +10,12 @@ import pandas as pd from sklearn.metrics import precision_recall_fscore_support -from ptype.reliability import ( +from mlguess.reliability import ( compute_calibration, reliability_diagram, reliability_diagrams, ) -from ptype.plotting import ( +from mlguess.plotting import ( plot_confusion_matrix, coverage_figures, ) diff --git a/docs/source/conf.py b/docs/source/conf.py index 0630cbc..35c6d9c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,6 +17,7 @@ extensions = ['myst_parser', 'sphinx.ext.napoleon', 'sphinx.ext.intersphinx', + 'sphinx.ext.mathjax', 'nbsphinx'] templates_path = ['_templates'] diff --git a/docs/source/index.rst b/docs/source/index.rst index 023ce9b..160ef07 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,6 +11,8 @@ Welcome to miles-guess's documentation! :caption: Contents: install.rst + keras.md + torch.md mlguess.rst notebooks/classifier_example.ipynb notebooks/regression_example.ipynb diff --git a/docs/source/keras.md b/docs/source/keras.md index d1d8a14..7d3e722 100644 --- a/docs/source/keras.md +++ b/docs/source/keras.md @@ -1,7 +1,10 @@ +# Keras in MILES-GUESS + Welcome to the Keras users page. The instructions below outline how to compute various UQ quantities like aleatoric and epistemic using different modeling approaches. +## Regression Models -### 1a. Train/evaluate a deterministic multi-layer perceptrion (MLP) on the SL dataset: +### 1a. Train/evaluate a deterministic multi-layer perceptron (MLP) on the SL dataset: ```bash python3 applications/train_mlp_SL.py -c config/model_mlp_SL.yml ``` @@ -16,6 +19,8 @@ python applications/train_gaussian_SL.py -c config/model_gaussian_SL.yml python applications/train_evidential_SL.py -c config/model_evidential_SL.yml ``` +## Classification Models + ### 2a. Train a categorical MLP classifier on the p-type dataset: ```bash python applications/train_classifier_ptype.py -c config/model_classifier_ptype.yml @@ -213,4 +218,4 @@ Depending on the problem, a data field is customized and also present in the con ## ECHO hyperparameter optimization -Configuration files are also supplied for use with the Earth Computing Hyperparameter Optimization (ECHO) package. See the echo package https://github.com/NCAR/echo-opt/tree/main/echo for more details on the configuration fields. \ No newline at end of file +Configuration files are also supplied for use with the Earth Computing Hyperparameter Optimization (ECHO) package. See the echo package https://github.com/NCAR/echo-opt/tree/main/echo for more details on the configuration fields. diff --git a/docs/source/mlguess.keras.rst b/docs/source/mlguess.keras.rst index 80f59f9..19587e5 100644 --- a/docs/source/mlguess.keras.rst +++ b/docs/source/mlguess.keras.rst @@ -36,10 +36,10 @@ mlguess.keras.models module :undoc-members: :show-inheritance: -mlguess.keras.models\_deprecated module +mlguess.keras.deprecated module --------------------------------------- -.. automodule:: mlguess.keras.models_deprecated +.. automodule:: mlguess.keras.deprecated :members: :undoc-members: :show-inheritance: diff --git a/docs/source/torch.md b/docs/source/torch.md index fca43aa..03d82a0 100644 --- a/docs/source/torch.md +++ b/docs/source/torch.md @@ -1,3 +1,5 @@ +# Pytorch in MILES-GUESS + Welcome to the pyTorch users page. The instructions below outline how to compute various UQ quantities like aleatoric and epistemic using different modeling approaches. Email schreck@ucar.edu for questions/concerns/fixes/etc ## Regression usage @@ -158,4 +160,4 @@ scheduler: ### Classifier Models -* `output_activation`: Set to `softmax` for standard classification. If not set, the model will use evidential classification. \ No newline at end of file +* `output_activation`: Set to `softmax` for standard classification. If not set, the model will use evidential classification. diff --git a/environment_gpu.yml b/environment_gpu_tf.yml similarity index 91% rename from environment_gpu.yml rename to environment_gpu_tf.yml index f733088..7fb6c67 100644 --- a/environment_gpu.yml +++ b/environment_gpu_tf.yml @@ -1,10 +1,10 @@ -name: guess +name: guesstf channels: - conda-forge - nvidia dependencies: - - python=3.10 - - numpy<1.24 + - python=3.11 + - numpy<2 - scipy - matplotlib - xarray diff --git a/environment_gpu_torch.yml b/environment_gpu_torch.yml new file mode 100644 index 0000000..f6d9f74 --- /dev/null +++ b/environment_gpu_torch.yml @@ -0,0 +1,36 @@ +name: guesstorch +channels: + - conda-forge + - pytorch +dependencies: + - python=3.11 + - numpy<2 + - scipy + - matplotlib + - xarray + - metpy + - netcdf4 + - pandas + - pyyaml + - pytest + - pip + - pyproj + - jupyter + - jupyterlab + - cartopy + - tqdm + - seaborn + - sphinx + - numba + - properscoring + - pyarrow + - imbalanced-learn + - pytorch + - pytorch-cuda=12.1 + - pip: + - keras + - echo-opt + - hagelslag + - bridgescaler + - safetensors + - -e . diff --git a/environment_tensorflow.yml b/environment_tensorflow.yml index 48b56fb..a4f9460 100644 --- a/environment_tensorflow.yml +++ b/environment_tensorflow.yml @@ -1,9 +1,9 @@ -name: guess +name: guesstf channels: - conda-forge dependencies: - python=3.10 - - numpy<1.24 + - numpy<2 - scipy - matplotlib - xarray @@ -31,5 +31,4 @@ dependencies: - echo-opt - hagelslag - bridgescaler - - -e git+https://github.com/ai2es/ptype-physical.git#egg=ptype - -e . diff --git a/environment_torch.yml b/environment_torch.yml index 6e7c538..5b0785d 100644 --- a/environment_torch.yml +++ b/environment_torch.yml @@ -31,5 +31,5 @@ dependencies: - echo-opt - hagelslag - bridgescaler - - -e git+https://github.com/ai2es/ptype-physical.git#egg=ptype + - safetensors - -e . diff --git a/mlguess/VERSION b/mlguess/VERSION index 0136388..7e08acf 100644 --- a/mlguess/VERSION +++ b/mlguess/VERSION @@ -1 +1 @@ -2024.1.0b2 +2024.2.0 diff --git a/mlguess/keras/deprecated/models.py b/mlguess/keras/deprecated/models.py index 0540405..1d00667 100644 --- a/mlguess/keras/deprecated/models.py +++ b/mlguess/keras/deprecated/models.py @@ -97,7 +97,8 @@ def __init__( self.history = None def build_neural_network(self, inputs, outputs, last_layer="Dense"): - """Create Keras neural network model and compile it. + """ + Create Keras neural network model and compile it. Args: inputs (int): Number of input predictor variables. @@ -108,7 +109,6 @@ def build_neural_network(self, inputs, outputs, last_layer="Dense"): if self.activation == "leaky": self.activation = LeakyReLU() - if self.kernel_reg == "l1": self.kernel_reg = L1(self.l1_weight) elif self.kernel_reg == "l2": @@ -522,7 +522,8 @@ def __init__( class GaussianRegressorDNN(BaseRegressor): - """A Dense Neural Network Model that can support arbitrary numbers of hidden layers + """ + A Dense Neural Network Model that can support arbitrary numbers of hidden layers and provides evidential uncertainty estimation. Inherits from BaseRegressor. @@ -542,6 +543,7 @@ class GaussianRegressorDNN(BaseRegressor): model: Keras Model object. evidential_coef: Evidential regularization coefficient. metrics: Optional list of metrics to monitor during training. + """ def __init__( @@ -571,11 +573,13 @@ def __init__( metrics=None, eps=1e-7 ): - """Initialize the EvidentialRegressorDNN. + """ + Initialize the EvidentialRegressorDNN. Args: coupling_coef: Coupling coeffient for loss fix evidential_coef: Evidential regularization coefficient. + """ super().__init__( # Call the constructor of the base class hidden_layers, @@ -606,7 +610,8 @@ def __init__( self.loss = gaussian_nll def build_neural_network(self, inputs, outputs, last_layer="DenseNormal"): - """Create Keras neural network model and compile it. + """ + Create Keras neural network model and compile it. Args: inputs (int): Number of input predictor variables. @@ -702,7 +707,8 @@ def predict_monte_carlo(self, x_test, forward_passes, scaler=None, batch_size=No class EvidentialRegressorDNN(BaseRegressor): - """A Dense Neural Network Model that can support arbitrary numbers of hidden layers + """ + A Dense Neural Network Model that can support arbitrary numbers of hidden layers and provides evidential uncertainty estimation. Inherits from BaseRegressor. @@ -722,6 +728,7 @@ class EvidentialRegressorDNN(BaseRegressor): model: Keras Model object. evidential_coef: Evidential regularization coefficient. metrics: Optional list of metrics to monitor during training. + """ def __init__( self, @@ -803,7 +810,8 @@ def __init__( logging.info(f"Using loss: {loss}") def build_neural_network(self, inputs, outputs): - """Create Keras neural network model and compile it. + """ + Create Keras neural network model and compile it. Args: inputs (int): Number of input predictor variables. @@ -905,19 +913,17 @@ def predict_dist_params(self, x, y_scaler=None, batch_size=None): return mu, v, alpha, beta - def predict_ensemble( - self, x_test, scaler=None, batch_size=None - ): + def predict_ensemble(self, x_test, scaler=None, batch_size=None): return super().predict_ensemble(x_test, scaler=scaler, batch_size=batch_size, num_outputs=3) - def predict_monte_carlo( - self, x_test, forward_passes, scaler=None, batch_size=None - ): - return super().predict_monte_carlo(x_test, forward_passes, scaler=scaler, batch_size=batch_size, num_outputs=3) + def predict_monte_carlo(self, x_test, forward_passes, scaler=None, batch_size=None): + return super().predict_monte_carlo(x_test, forward_passes, + scaler=scaler, batch_size=batch_size, num_outputs=3) class CategoricalDNN(object): - """A Dense Neural Network Model that can support arbitrary numbers of hidden layers. + """ + A Dense Neural Network Model that can support arbitrary numbers of hidden layers. Attributes: hidden_layers: Number of hidden layers @@ -941,6 +947,7 @@ class CategoricalDNN(object): decay: Level of decay to apply to learning rate verbose: Level of detail to provide during training (0 = None, 1 = Minimal, 2 = All) classifier: (boolean) If training on classes + """ def __init__( self, @@ -969,7 +976,7 @@ def __init__( decay=0, verbose=0, random_state=1000, - callbacks=[], + callbacks=None, balanced_classes=0, steps_per_epoch=0, ): @@ -997,6 +1004,8 @@ def __init__( self.use_dropout = use_dropout self.dropout_alpha = dropout_alpha self.epochs = epochs + if callbacks is None: + self.callbacks = [] self.callbacks = callbacks self.decay = decay self.verbose = verbose @@ -1007,7 +1016,8 @@ def __init__( self.steps_per_epoch = steps_per_epoch def build_neural_network(self, inputs, outputs): - """Create Keras neural network model and compile it. + """ + Create Keras neural network model and compile it. Args: inputs (int): Number of input predictor variables @@ -1066,13 +1076,15 @@ def build_neural_network(self, inputs, outputs): self.model.compile(optimizer=self.optimizer_obj, loss=self.loss) def build_from_sequential(self, model, optimizer="adam", loss="mse", metrics=None): - """Build the neural network model using a Keras Sequential model. + """ + Build the neural network model using a Keras Sequential model. Args: model (tf.keras.Sequential): Keras Sequential model to use. optimizer (str or tf.keras.optimizers.Optimizer): Optimizer for the model. loss (str or tf.keras.losses.Loss): Loss function for the model. metrics (list of str or tf.keras.metrics.Metric): Metrics for the model. + """ self.model = model diff --git a/mlguess/keras/layers.py b/mlguess/keras/layers.py index 0315359..4899fcc 100644 --- a/mlguess/keras/layers.py +++ b/mlguess/keras/layers.py @@ -1,8 +1,6 @@ import keras import keras.layers as layers import keras.ops as ops -from keras.src import activations -from keras.src.layers.layer import Layer @keras.saving.register_keras_serializable() diff --git a/mlguess/keras/models.py b/mlguess/keras/models.py index e97475f..43b6c40 100644 --- a/mlguess/keras/models.py +++ b/mlguess/keras/models.py @@ -1,13 +1,14 @@ import sys import keras import keras.ops as ops +import keras.layers as layers +import keras.optimizers as optimizers import numpy as np -from keras.regularizers import L1, L2, L1L2 -from keras.layers import Dense, GaussianNoise, Dropout +# from keras.layers import Dense, GaussianNoise, Dropout from mlguess.keras.layers import DenseNormalGamma, DenseNormal from mlguess.keras.losses import evidential_cat_loss, evidential_reg_loss, gaussian_nll from mlguess.keras.callbacks import ReportEpoch -from keras.optimizers import Adam, SGD +# from keras.optimizers import Adam, SGD @@ -42,9 +43,9 @@ class CategoricalDNN(keras.models.Model): classifier: (boolean) If training on classes Example: - When evidential==True, the output activation and the loss function will be overridden under the hood. When - evidential==False, it will use the parameters specified and ignore the annealing_coeff. - Note: Model compilation happens under the hood when .fit() is called. + When evidential==True, the output activation and the loss function will be overridden under the hood. When + evidential==False, it will use the parameters specified and ignore the annealing_coeff. + Note: Model compilation happens under the hood when .fit() is called.:: n_samples = 1000 n_features = 23 @@ -76,6 +77,7 @@ class CategoricalDNN(keras.models.Model): lr=0.0001) hist = model.fit(x_train, y_train) p = model.predict(x_train, return_uncertainties=False, batch_size=5000) + """ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activation="relu", @@ -83,8 +85,14 @@ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activat annealing_coeff=1.0, use_noise=False, noise_sd=0.0, lr=0.001, use_dropout=False, dropout_alpha=0.2, batch_size=128, epochs=2, kernel_reg=None, l1_weight=0.0, l2_weight=0.0, sgd_momentum=0.9, adam_beta_1=0.9, adam_beta_2=0.999, epsilon=1e-7, decay=0, verbose=0, random_state=1000, n_classes=2, - n_inputs=42, callbacks=[], **kwargs): + n_inputs=42, callbacks=None, **kwargs): + """ + Create Keras neural network model and compile it. + Args: + inputs (int): Number of input predictor variables + outputs (int): Number of output predictor variables + """ super().__init__(**kwargs) self.hidden_layers = hidden_layers self.hidden_neurons = hidden_neurons @@ -115,63 +123,62 @@ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activat self.random_state = random_state self.n_classes = n_classes self.n_inputs = n_inputs - self.callbacks = callbacks + if callbacks is None: + self.callbacks = [] + else: + self.callbacks = callbacks + self.hyperparameters = ["hidden_layers", "hidden_neurons", "evidential", "activation", "output_activation", "optimizer", "sgd_momentum", "adam_beta_1", "adam_beta_2", "epsilon", "loss", "loss_weights", "annealing_coeff", "lr", "kernel_reg", "l1_weight", "l2_weight", "batch_size", "use_noise", "noise_sd", "use_dropout", "dropout_alpha", "epochs", "callbacks", "decay", "verbose", "random_state", "n_classes", "n_inputs"] - """ - Create Keras neural network model and compile it. - Args: - inputs (int): Number of input predictor variables - outputs (int): Number of output predictor variables - """ + if self.kernel_reg == "l1": - self.kernel_reg = L1(self.l1_weight) + self.kernel_reg = keras.regularizers.L1(self.l1_weight) elif self.kernel_reg == "l2": - self.kernel_reg = L2(self.l2_weight) + self.kernel_reg = keras.regularizers.L2(self.l2_weight) elif self.kernel_reg == "l1_l2": - self.kernel_reg = L1L2(self.l1_weight, self.l2_weight) + self.kernel_reg = keras.regularizers.L1L2(self.l1_weight, self.l2_weight) else: self.kernel_reg = None if self.optimizer == "adam": - self.optimizer_obj = Adam(learning_rate=self.lr, + self.optimizer_obj = optimizers.Adam(learning_rate=self.lr, beta_1=self.adam_beta_1, beta_2=self.adam_beta_2, epsilon=self.epsilon) elif self.optimizer == "sgd": - self.optimizer_obj = SGD(learning_rate=self.lr, momentum=self.sgd_momentum) + self.optimizer_obj = optimizers.SGD(learning_rate=self.lr, momentum=self.sgd_momentum) if self.evidential: self.output_activation = "linear" self.model_layers = [] - self.model_layers.append(Dense(self.n_inputs, + self.model_layers.append(layers.Dense(self.n_inputs, activation=self.activation, kernel_regularizer=self.kernel_reg, name="input_dense")) for h in range(self.hidden_layers): - self.model_layers.append(Dense(self.hidden_neurons, + self.model_layers.append(layers.Dense(self.hidden_neurons, activation=self.activation, kernel_regularizer=self.kernel_reg, name=f"dense_{h:02d}")) if self.use_dropout: - self.model_layers.append(Dropout(self.dropout_alpha, name=f"dropout_{h:02d}")) + self.model_layers.append(layers.Dropout(self.dropout_alpha, name=f"dropout_{h:02d}")) if self.use_noise: - self.model_layers.append(GaussianNoise(self.noise_sd, name=f"noise_{h:02d}")) + self.model_layers.append(layers.GaussianNoise(self.noise_sd, name=f"noise_{h:02d}")) - self.model_layers.append(Dense(self.n_classes, + self.model_layers.append(layers.Dense(self.n_classes, activation=self.output_activation, name="dense_output")) def call(self, inputs): mod = self.model_layers[0](inputs) - for l in range(1, len(self.model_layers)): - mod = self.model_layers[l](mod) + for layer in range(1, len(self.model_layers)): + mod = self.model_layers[layer](mod) return mod @@ -194,7 +201,10 @@ def fit(self, x=None, y=None, **kwargs): return hist def predict(self, x, return_uncertainties=True, **kwargs): - """Args: + """ + Make a prediction with the trained model. + + Args: x: Input data batch_size: Size of batch to predict return_uncertainties: Returns derived uncertainties from evidential distribution parameters. @@ -213,7 +223,8 @@ def predict(self, x, return_uncertainties=True, **kwargs): else: return output - def calc_uncertainty(self, y_pred): + @staticmethod + def calc_uncertainty(y_pred): num_classes = y_pred.shape[-1] evidence = ops.relu(y_pred) alpha = evidence + 1 @@ -283,11 +294,11 @@ class RegressorDNN(keras.models.Model): metrics: Optional list of metrics to monitor during training. Example: - When evidential==True or uncertainty==True, the output activation and the loss function will be overridden - under the hood. If both are True, the evidential model will override. When both are set to False, - it will train a generic DNN with a linear output activation and the specified loss function. - 'evi_coeff' is only used when evidential==True and is otherwise ignored. - Note: Model compilation happens under the hood when .fit() is called. + When evidential==True or uncertainty==True, the output activation and the loss function will be overridden + under the hood. If both are True, the evidential model will override. When both are set to False, + it will train a generic DNN with a linear output activation and the specified loss function. + 'evi_coeff' is only used when evidential==True and is otherwise ignored. + Note: Model compilation happens under the hood when .fit() is called.:: n_samples = 1000 n_features = 23 @@ -325,6 +336,7 @@ class RegressorDNN(keras.models.Model): epochs=10) model.fit(x_train, y_train) p = model.predict(x_train, return_uncertainties=False) + """ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activation="relu", optimizer="adam", loss_weights=None, use_noise=False, noise_sd=0.01, lr=0.00001, use_dropout=False, dropout_alpha=0.1, @@ -371,48 +383,49 @@ def __init__(self, hidden_layers=2, hidden_neurons=64, evidential=False, activat "verbose", "n_inputs", "n_output_tasks", "epsilon", "evi_coeff", "uncertainty"] if self.kernel_reg == "l1": - self.kernel_reg = L1(self.l1_weight) + self.kernel_reg = keras.regularizers.L1(self.l1_weight) elif self.kernel_reg == "l2": - self.kernel_reg = L2(self.l2_weight) + self.kernel_reg = keras.regularizers.L2(self.l2_weight) elif self.kernel_reg == "l1_l2": - self.kernel_reg = L1L2(self.l1_weight, self.l2_weight) + self.kernel_reg = keras.regularizers.L1L2(self.l1_weight, self.l2_weight) else: self.kernel_reg = None if self.optimizer == "adam": - self.optimizer_obj = Adam(learning_rate=self.lr, + self.optimizer_obj = optimizers.Adam(learning_rate=self.lr, beta_1=self.adam_beta_1, beta_2=self.adam_beta_2, epsilon=self.epsilon) elif self.optimizer == "sgd": - self.optimizer_obj = SGD(learning_rate=self.lr, momentum=self.sgd_momentum) + self.optimizer_obj = optimizers.SGD(learning_rate=self.lr, momentum=self.sgd_momentum) self.model_layers = [] - self.model_layers.append(Dense(self.n_inputs, + self.model_layers.append(layers.Dense(self.n_inputs, activation=self.activation, kernel_regularizer=self.kernel_reg, name="input_dense")) for h in range(self.hidden_layers): - self.model_layers.append(Dense(self.hidden_neurons, + self.model_layers.append(layers.Dense(self.hidden_neurons, activation=self.activation, kernel_regularizer=self.kernel_reg, name=f"dense_{h:02d}")) if self.use_dropout: - self.model_layers.append(Dropout(self.dropout_alpha, name=f"dropout_{h:02d}")) + self.model_layers.append(layers.Dropout(self.dropout_alpha, name=f"dropout_{h:02d}")) if self.use_noise: - self.model_layers.append(GaussianNoise(self.noise_sd, name=f"noise_{h:02d}")) + self.model_layers.append(layers.GaussianNoise(self.noise_sd, name=f"noise_{h:02d}")) if self.evidential: self.model_layers.append(DenseNormalGamma(self.n_output_tasks, name="dense_output")) elif self.uncertainty: self.model_layers.append(DenseNormal(self.n_output_tasks, name="dense_output")) else: - self.model_layers.append(Dense(self.n_output_tasks, name="dense_output")) + self.model_layers.append(layers.Dense(self.n_output_tasks, name="dense_output")) + def call(self, inputs): layer_output = self.model_layers[0](inputs) - for l in range(1, len(self.model_layers)): - layer_output = self.model_layers[l](layer_output) + for layer in range(1, len(self.model_layers)): + layer_output = self.model_layers[layer](layer_output) return layer_output @@ -439,14 +452,14 @@ def predict(self, x, return_uncertainties=True, batch_size=1000, **kwargs): If return_uncertainties is True: np.array(mu, aleatoric uncertainty, epistemic uncertainty) Else If return_uncertainties is False: np.array(mu, gamma, alpha, beta) """ - if type(return_uncertainties) != bool: + if not isinstance(return_uncertainties, bool): raise ValueError("return_uncertainties must be a boolean") if (not self.evidential) and (not self.uncertainty) and return_uncertainties: raise NotImplementedError("You can only return uncertainty estimates when 'evidential' or 'uncertainty' is" " True. Otherwise you can set 'return_uncertainties' to False to return predictions.") - elif (self.evidential) and return_uncertainties: + elif self.evidential and return_uncertainties: return self.calc_uncertainties(super().predict(x, batch_size=batch_size)) else: diff --git a/mlguess/plotting.py b/mlguess/plotting.py index 8560f30..69b799a 100644 --- a/mlguess/plotting.py +++ b/mlguess/plotting.py @@ -41,12 +41,80 @@ def plot_confusion_matrix(y_true, y_pred, classes, model_name, normalize=False, color="white" if cm[i, j] > thresh else "black", fontsize=10) - if filename: - path = f'/glade/u/home/jwillson/winter-ptype/images/{model_name}/' - plt.savefig(path + filename, dpi=300, bbox_inches="tight") + if filename is not None: + plt.savefig(filename, dpi=300, bbox_inches="tight") return ax +def compute_cov(df, col="pred_conf", quan="uncertainty", ascending=False): + df = df.copy() + df = df.sort_values(col, ascending=ascending) + df["dummy"] = 1 + df[f"cu_{quan}"] = df[quan].cumsum() / df["dummy"].cumsum() + df[f"cu_{col}"] = df[col].cumsum() / df["dummy"].cumsum() + df[f"{col}_cov"] = df["dummy"].cumsum() / len(df) + return df + + +def coverage_figures( + test_data, output_cols, colors=None, title=None, save_location=None +): + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3.5), sharey="col") + + test_data["accuracy"] = ( + test_data["pred_label"] == test_data["true_label"] + ).values.astype(int) + + _test_data_sorted = compute_cov(test_data, col="pred_conf", quan="accuracy") + ax1.plot(_test_data_sorted["pred_conf_cov"], _test_data_sorted["cu_accuracy"]) + + num_classes = test_data["true_label"].nunique() + for label in range(num_classes): + cond = test_data["true_label"] == label + _test_data_sorted = compute_cov( + test_data[cond], col="pred_conf", quan="accuracy" + ) + ax2.plot( + _test_data_sorted["pred_conf_cov"], + _test_data_sorted["cu_accuracy"], + c=colors[label], + ) + + if "evidential" in test_data: + _test_data_sorted = compute_cov( + test_data, col="evidential", quan="accuracy", ascending=True + ) + ax1.plot( + _test_data_sorted["evidential_cov"], + _test_data_sorted["cu_accuracy"], + ls="--", + ) + for label in range(num_classes): + c = test_data["true_label"] == label + _test_data_sorted = compute_cov( + test_data[c], col="evidential", quan="accuracy", ascending=True + ) + ax2.plot( + _test_data_sorted["evidential_cov"], + _test_data_sorted["cu_accuracy"], + c=colors[label], + ls="--", + ) + + if title is not None: + ax1.set_title(title) + + ax1.set_ylabel("Cumulative accuracy") + ax1.set_xlabel("Coverage (sorted by confidence/uncertainty)") + ax2.set_xlabel("Coverage (sorted by confidence/uncertainty)") + ax1.legend(["Confidence", "Uncertainty"], loc="best") + ax2.legend(output_cols, loc="best") + plt.tight_layout() + + if save_location: + plt.savefig(save_location, dpi=300, bbox_inches="tight") + def conus_plot(df, dataset = "mping", diff --git a/mlguess/reliability.py b/mlguess/reliability.py new file mode 100644 index 0000000..e3189a3 --- /dev/null +++ b/mlguess/reliability.py @@ -0,0 +1,296 @@ +import numpy as np +import matplotlib.pyplot as plt + + +# https://github.com/hollance/reliability-diagrams/blob/master/reliability_diagrams.py + +# This file combines some analysis and plotting functions + +def compute_calibration(true_labels, pred_labels, confidences, num_bins=10): + """Collects predictions into bins used to draw a reliability diagram. + The true_labels, pred_labels, confidences arguments must be NumPy arrays; + pred_labels and true_labels may contain numeric or string labels. + For a multi-class model, the predicted label and confidence should be those + of the highest scoring class. + + Args: + true_labels: the true labels for the test examples + pred_labels: the predicted labels for the test examples + confidences: the predicted confidences for the test examples + num_bins: number of bins + + Returns: + a dictionary containing the following NumPy arrays: + accuracies: the average accuracy for each bin + confidences: the average confidence for each bin + counts: the number of examples in each bin + bins: the confidence thresholds for each bin + avg_accuracy: the accuracy over the entire test set + avg_confidence: the average confidence over the entire test set + expected_calibration_error: a weighted average of all calibration gaps + max_calibration_error: the largest calibration gap across all bins + """ + if len(confidences) != len(pred_labels) or len(confidences) != len(true_labels): + raise TypeError("Lengths of confidences, pred_labels, and true_labels should be equal.") + + if num_bins <= 0: + raise TypeError("Number of bins should be greater than zero.") + + bins = np.linspace(0.0, 1.0, num_bins + 1) + indices = np.digitize(confidences, bins, right=True) + + bin_accuracies = np.zeros(num_bins, dtype=np.float64) + bin_confidences = np.zeros(num_bins, dtype=np.float64) + bin_counts = np.zeros(num_bins, dtype=np.int64) + + for b in range(num_bins): + selected = np.where(indices == b + 1)[0] + if len(selected) > 0: + bin_accuracies[b] = np.mean(true_labels[selected] == pred_labels[selected]) + bin_confidences[b] = np.mean(confidences[selected]) + bin_counts[b] = len(selected) + + avg_acc = np.sum(bin_accuracies * bin_counts) / np.sum(bin_counts) + avg_conf = np.sum(bin_confidences * bin_counts) / np.sum(bin_counts) + + gaps = np.abs(bin_accuracies - bin_confidences) + ece = np.sum(gaps * bin_counts) / np.sum(bin_counts) + mce = np.max(gaps) + + return {"accuracies": bin_accuracies, + "confidences": bin_confidences, + "counts": bin_counts, + "bins": bins, + "avg_accuracy": avg_acc, + "avg_confidence": avg_conf, + "expected_calibration_error": ece, + "max_calibration_error": mce} + + +def _reliability_diagram_subplot(ax, bin_data, + draw_ece=True, + draw_bin_importance=False, + title="Reliability Diagram", + xlabel="Confidence", + ylabel="Expected Accuracy", + ): + """ + Draws a reliability diagram into the specified subplot. + + Args: + ax (matplotlib.axes.Axes): The subplot to draw the reliability diagram on. + bin_data (dict): A dictionary containing the following keys: + - 'accuracies' (numpy.ndarray): Array of accuracy values for each bin. + - 'confidences' (numpy.ndarray): Array of confidence values for each bin. + - 'counts' (numpy.ndarray): Array of counts for each bin. + - 'bins' (numpy.ndarray): Array of bin edges. + draw_ece (bool, optional): Flag to draw the Expected Calibration Error (ECE) value on the diagram. Default is True. + draw_bin_importance (bool or str, optional): Flag to adjust the appearance of the bins based on their importance. + If False, all bins have equal appearance. If True, bins are adjusted based on their counts. + If 'alpha', bins are adjusted based on counts using varying alpha values. If 'width', bins are adjusted + based on counts using varying bar widths. Default is False. + title (str, optional): The title of the reliability diagram. Default is "Reliability Diagram". + xlabel (str, optional): The label for the x-axis. Default is "Confidence". + ylabel (str, optional): The label for the y-axis. Default is "Expected Accuracy". + + Returns: + plot + """ + + accuracies = bin_data["accuracies"] + confidences = bin_data["confidences"] + counts = bin_data["counts"] + bins = bin_data["bins"] + + bin_size = 1.0 / len(counts) + positions = bins[:-1] + bin_size / 2.0 + + widths = bin_size + alphas = 0.3 + min_count = np.min(counts) + max_count = np.max(counts) + normalized_counts = (counts - min_count) / (max_count - min_count) + + if draw_bin_importance == "alpha": + alphas = 0.2 + 0.8 * normalized_counts + elif draw_bin_importance == "width": + widths = 0.1 * bin_size + 0.9 * bin_size * normalized_counts + + colors = np.zeros((len(counts), 4)) + colors[:, 0] = 240 / 255. + colors[:, 1] = 60 / 255. + colors[:, 2] = 60 / 255. + colors[:, 3] = alphas + + gap_plt = ax.bar(positions, np.abs(accuracies - confidences), + bottom=np.minimum(accuracies, confidences), width=widths, + edgecolor=colors, color=colors, linewidth=1, label="Gap") + + acc_plt = ax.bar(positions, 0, bottom=accuracies, width=widths, + edgecolor="black", color="black", alpha=1.0, linewidth=3, + label="Accuracy") + + ax.set_aspect("equal") + ax.plot([0, 1], [0, 1], linestyle="--", color="gray") + + if draw_ece: + ece = (bin_data["expected_calibration_error"] * 100) + ax.text(0.98, 0.02, "ECE=%.2f" % ece, color="black", + ha="right", va="bottom", transform=ax.transAxes) + + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + + ax.set_title(title) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + + ax.legend(handles=[gap_plt, acc_plt]) + + +def _confidence_histogram_subplot(ax, bin_data, + draw_averages=True, + title="Examples per bin", + xlabel="Confidence", + ylabel="Count"): + """Draws a confidence histogram into a subplot.""" + counts = bin_data["counts"] + bins = bin_data["bins"] + + bin_size = 1.0 / len(counts) + positions = bins[:-1] + bin_size / 2.0 + + ax.bar(positions, counts, width=bin_size * 0.9) + + ax.set_xlim(0, 1) + ax.set_title(title) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + + if draw_averages: + acc_plt = ax.axvline(x=bin_data["avg_accuracy"], ls="solid", lw=3, + c="black", label="Accuracy") + conf_plt = ax.axvline(x=bin_data["avg_confidence"], ls="dotted", lw=3, + c="#444", label="Avg. confidence") + ax.legend(handles=[acc_plt, conf_plt]) + + +def _reliability_diagram_combined(bin_data, + draw_ece, draw_bin_importance, draw_averages, + title, figsize, dpi, return_fig): + """Draws a reliability diagram and confidence histogram using the output + from compute_calibration().""" + figsize = (figsize[0], figsize[0] * 1.4) + + fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=figsize, dpi=dpi, + gridspec_kw={"height_ratios": [4, 1]}) + + plt.tight_layout() + plt.subplots_adjust(hspace=-0.1) + + _reliability_diagram_subplot(ax[0], bin_data, draw_ece, draw_bin_importance, + title=title, xlabel="") + + # Draw the confidence histogram upside down. + orig_counts = bin_data["counts"] + bin_data["counts"] = -bin_data["counts"] + _confidence_histogram_subplot(ax[1], bin_data, draw_averages, title="") + bin_data["counts"] = orig_counts + + # Also negate the ticks for the upside-down histogram. + new_ticks = np.abs(ax[1].get_yticks()).astype(np.int) + ax[1].set_yticklabels(new_ticks) + + if return_fig: return fig + + +def reliability_diagram(true_labels, pred_labels, confidences, num_bins=10, + draw_ece=True, draw_bin_importance=False, + draw_averages=True, title="Reliability Diagram", + figsize=(6, 6), dpi=72, return_fig=False): + """Draws a reliability diagram and confidence histogram in a single plot. + + First, the model's predictions are divided up into bins based on their + confidence scores. + The reliability diagram shows the gap between average accuracy and average + confidence in each bin. These are the red bars. + The black line is the accuracy, the other end of the bar is the confidence. + Ideally, there is no gap and the black line is on the dotted diagonal. + In that case, the model is properly calibrated and we can interpret the + confidence scores as probabilities. + The confidence histogram visualizes how many examples are in each bin. + This is useful for judging how much each bin contributes to the calibration + error. + The confidence histogram also shows the overall accuracy and confidence. + The closer these two lines are together, the better the calibration. + + The ECE or Expected Calibration Error is a summary statistic that gives the + difference in expectation between confidence and accuracy. In other words, + it's a weighted average of the gaps across all bins. A lower ECE is better. + + Args: + true_labels: the true labels for the test examples + pred_labels: the predicted labels for the test examples + confidences: the predicted confidences for the test examples + num_bins: number of bins + draw_ece: whether to include the Expected Calibration Error + draw_bin_importance: whether to represent how much each bin contributes + to the total accuracy: False, "alpha", "widths" + draw_averages: whether to draw the overall accuracy and confidence in + the confidence histogram + title: optional title for the plot + figsize: setting for matplotlib; height is ignored + dpi: setting for matplotlib + return_fig: if True, returns the matplotlib Figure object + """ + bin_data = compute_calibration(true_labels, pred_labels, confidences, num_bins) + return _reliability_diagram_combined(bin_data, draw_ece, draw_bin_importance, + draw_averages, title, figsize=figsize, + dpi=dpi, return_fig=return_fig) + + +def reliability_diagrams(results, num_bins=10, + draw_ece=True, draw_bin_importance=False, + num_cols=4, dpi=72, return_fig=False): + """Draws reliability diagrams for one or more models. + + Args: + results: dictionary where the key is the model name and the value is + a dictionary containing the true labels, predicated labels, and + confidences for this model + num_bins: number of bins + draw_ece: whether to include the Expected Calibration Error + draw_bin_importance: whether to represent how much each bin contributes + to the total accuracy: False, "alpha", "widths" + num_cols: how wide to make the plot + dpi: setting for matplotlib + return_fig: if True, returns the matplotlib Figure object + """ + ncols = num_cols + nrows = (len(results) + ncols - 1) // ncols + figsize = (ncols * 4, nrows * 4) + + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True, + figsize=figsize, dpi=dpi, constrained_layout=True) + + for i, (plot_name, data) in enumerate(results.items()): + y_true = data["true_labels"] + y_pred = data["pred_labels"] + y_conf = data["confidences"] + + bin_data = compute_calibration(y_true, y_pred, y_conf, num_bins) + + row = i // ncols + col = i % ncols + _reliability_diagram_subplot(ax[row, col], bin_data, draw_ece, + draw_bin_importance, + title=plot_name, # "\n".join(plot_name.split()), + xlabel="Confidence" if row == nrows - 1 else "", + ylabel="Expected Accuracy" if col == 0 else "") + + for i in range(i + 1, nrows * ncols): + row = i // ncols + col = i % ncols + ax[row, col].axis("off") + + if return_fig: return fig \ No newline at end of file diff --git a/mlguess/torch/class_losses.py b/mlguess/torch/class_losses.py index dd4856b..b3d5f7f 100644 --- a/mlguess/torch/class_losses.py +++ b/mlguess/torch/class_losses.py @@ -3,7 +3,6 @@ # Adapted from https://github.com/dougbrion/pytorch-classification-uncertainty/blob/master/losses.py - def get_device(): """Get the device for PyTorch operations. diff --git a/mlguess/torch/distributed.py b/mlguess/torch/distributed.py index a1208b9..d5e1075 100644 --- a/mlguess/torch/distributed.py +++ b/mlguess/torch/distributed.py @@ -6,20 +6,18 @@ MixedPrecision, CPUOffload ) -from torch.distributed.fsdp.wrap import ( - transformer_auto_wrap_policy, - size_based_auto_wrap_policy, -) +from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy + from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( checkpoint_wrapper, CheckpointImpl, apply_activation_checkpointing, ) -from credit.models.checkpoint import ( +from mlguess.torch.checkpoint import ( TorchFSDPModel ) from torch.nn.parallel import DistributedDataParallel as DDP -from credit.mixed_precision import parse_dtype +from mlguess.torch.mixed_precision import parse_dtype import functools import logging diff --git a/mlguess/torch/layers.py b/mlguess/torch/layers.py index 053f4c2..7d0c413 100644 --- a/mlguess/torch/layers.py +++ b/mlguess/torch/layers.py @@ -1,4 +1,3 @@ - import torch from torch import nn from torch.nn.utils import spectral_norm as SpectralNorm @@ -29,9 +28,9 @@ def __init__(self, in_channels, out_channels, spectral_norm=True): """ super().__init__() if spectral_norm: - self.linear = SpectralNorm(nn.Linear(in_channels, out_channels*4)) + self.linear = SpectralNorm(nn.Linear(in_channels, out_channels * 4)) else: - self.linear = nn.Linear(in_channels, out_channels*4) + self.linear = nn.Linear(in_channels, out_channels * 4) def evidence(self, x): """Applies a log transformation to the input with a shift. diff --git a/mlguess/torch/mc_dropout.py b/mlguess/torch/mc_dropout.py index ffbc6d4..b0b924e 100644 --- a/mlguess/torch/mc_dropout.py +++ b/mlguess/torch/mc_dropout.py @@ -11,11 +11,12 @@ def enable_dropout(model): if m.__class__.__name__.startswith('Dropout'): m.train() + def monte_carlo_dropout(data_loader, forward_passes, model, n_classes, - n_samples, + n_samples, batch_size=1024, uncertainty=False): """Function to get the monte-carlo samples and uncertainty estimates @@ -43,7 +44,7 @@ def monte_carlo_dropout(data_loader, if uncertainty: evidence = relu_evidence(output) alpha = evidence + 1 - #u = num_classes / torch.sum(alpha, dim=1, keepdim=True) + # u = num_classes / torch.sum(alpha, dim=1, keepdim=True) output = alpha / torch.sum(alpha, dim=1, keepdim=True) else: output = F.softmax(output, dim=1) # shape (n_samples, n_classes) @@ -64,10 +65,10 @@ def monte_carlo_dropout(data_loader, if n_classes > 1: epsilon = sys.float_info.min # Calculating entropy across multiple MCD forward passes - entropy = -np.sum(mean*np.log(mean + epsilon), axis=-1) # shape (n_samples,) + entropy = -np.sum(mean * np.log(mean + epsilon), axis=-1) # shape (n_samples,) # Calculating mutual information across multiple MCD forward passes - mutual_info = entropy - np.mean(np.sum(-dropout_predictions*np.log(dropout_predictions + epsilon), + mutual_info = entropy - np.mean(np.sum(-dropout_predictions * np.log(dropout_predictions + epsilon), axis=-1), axis=0) # shape (n_samples,) results["entropy"] = entropy diff --git a/mlguess/torch/metrics.py b/mlguess/torch/metrics.py index bd84442..c97e675 100644 --- a/mlguess/torch/metrics.py +++ b/mlguess/torch/metrics.py @@ -2,6 +2,7 @@ from sklearn.metrics import precision_recall_fscore_support, roc_auc_score from hagelslag.evaluation.ProbabilityMetrics import DistributedROC + class MetricsCalculator: """A class to calculate various metrics for model evaluation, including CSI, average accuracy, precision, recall, F1 score, AUC, MCE, and ECE. @@ -112,8 +113,8 @@ def ave_acc(self, true_labels, pred_labels): return np.mean( [ ( - true_labels[np.where(true_labels == _label)] - == pred_labels[np.where(true_labels == _label)] + true_labels[np.where(true_labels == _label)] + == pred_labels[np.where(true_labels == _label)] ).mean() for _label in np.unique(true_labels) ] @@ -136,7 +137,7 @@ def mce(self, true_labels, pred_probs): mce = 0.0 for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): in_bin = (confidences > bin_lower).astype(float) * ( - confidences <= bin_upper + confidences <= bin_upper ).astype(float) prop_in_bin = in_bin.astype(float).mean() in_bin = in_bin.squeeze(-1).astype(int) @@ -167,7 +168,7 @@ def ece(self, true_labels, pred_probs): ece = [] for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers): in_bin = (confidences > bin_lower).astype(float) * ( - confidences <= bin_upper + confidences <= bin_upper ).astype(float) prop_in_bin = in_bin.astype(float).mean() in_bin = in_bin.squeeze(-1).astype(int) @@ -176,7 +177,7 @@ def ece(self, true_labels, pred_probs): accuracy_in_bin = accuracies[in_bin].astype(float).mean() avg_confidence_in_bin = confidences[in_bin].mean() avg_calibration = ( - np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin + np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin ) ece.append(avg_calibration) except ValueError: diff --git a/mlguess/torch/mixed_precision.py b/mlguess/torch/mixed_precision.py new file mode 100644 index 0000000..82131c8 --- /dev/null +++ b/mlguess/torch/mixed_precision.py @@ -0,0 +1,38 @@ +import torch + + +def parse_dtype(dtype_str: str) -> torch.dtype: + """ + Parses a string representation of a torch dtype and returns the corresponding torch.dtype object. + + Args: + dtype_str (str): The string representation of the dtype (e.g., "float32", "int64"). + + Returns: + torch.dtype: The corresponding torch.dtype object. + """ + dtype_map = { + "float32": torch.float32, + "float64": torch.float64, + "float16": torch.float16, + "float": torch.float32, # Alias for float32 + "double": torch.float64, # Alias for float64 + "half": torch.float16, # Alias for float16 + "bfloat16": torch.bfloat16, + "int8": torch.int8, + "uint8": torch.uint8, + "int16": torch.int16, + "int32": torch.int32, + "int64": torch.int64, + "short": torch.int16, # Alias for int16 + "long": torch.int64, # Alias for int64 + "bool": torch.bool + } + + # Convert the dtype string to lowercase for case-insensitive matching + dtype_str_lower = dtype_str.lower() + + if dtype_str_lower in dtype_map: + return dtype_map[dtype_str_lower] + else: + raise ValueError(f"Invalid dtype string: {dtype_str}") \ No newline at end of file diff --git a/mlguess/torch/models.py b/mlguess/torch/models.py index 9117b82..684a126 100644 --- a/mlguess/torch/models.py +++ b/mlguess/torch/models.py @@ -10,7 +10,6 @@ from mlguess.torch.checkpoint import load_model_state from mlguess.torch.layers import LinearNormalGamma - warnings.filterwarnings("ignore") logger = logging.getLogger(__name__) @@ -71,6 +70,7 @@ def init_weights(net, init_type='normal', init_gain=0.0, verbose=True): - 'kaiming': Kaiming initialization. - 'orthogonal': Orthogonal initialization. """ + def init_func(m): """Initialization function for network layers. @@ -95,9 +95,11 @@ def init_func(m): raise NotImplementedError('initialization method [%s] is not implemented' % init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant_(m.bias.data, 0.0) - elif classname.find('BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. + elif classname.find( + 'BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. init.normal_(m.weight.data, 1.0, init_gain) init.constant_(m.bias.data, 0.0) + if verbose: logging.info('Initializing network with %s' % init_type) net.apply(init_func) @@ -116,6 +118,7 @@ class DNN(nn.Module): weight_init (bool): Whether to initialize weights. Default is False. num_layers (int): Number of layers to create if layer_size is a single number. Default is None. """ + def __init__(self, input_size, output_size, @@ -262,12 +265,12 @@ def predict_uncertainty(self, input, y_scaler=None): mu = y_scaler.inverse_transform(mu) mu = torch.from_numpy(mu).to(aleatoric.device) - # Torch version of some of the sklearn scalers -- this needs updated later - # MinMaxScaler inverse transform - # if y_scaler: - # min_val = torch.tensor(y_scaler.data_min_, device=mu.device) - # max_val = torch.tensor(y_scaler.data_max_, device=mu.device) - # mu = mu * (max_val - min_val) + min_val + # Torch version of some of the sklearn scalers -- this needs updated later + # MinMaxScaler inverse transform + # if y_scaler: + # min_val = torch.tensor(y_scaler.data_min_, device=mu.device) + # max_val = torch.tensor(y_scaler.data_max_, device=mu.device) + # mu = mu * (max_val - min_val) + min_val for i in range(mu.shape[-1]): aleatoric[:, i] *= self.training_var[i] @@ -332,6 +335,7 @@ def from_config(cls, conf, device="cpu"): return model + class CategoricalDNN(DNN): def calc_uncertainty(self, y_pred): diff --git a/mlguess/torch/regression_losses.py b/mlguess/torch/regression_losses.py index 54dd5b5..be69234 100644 --- a/mlguess/torch/regression_losses.py +++ b/mlguess/torch/regression_losses.py @@ -14,6 +14,7 @@ class EvidentialRegressionLoss: Args: coef (float, optional): Coefficient for the regularization term. Defaults to 1.0. """ + def __init__(self, coef=1.0): self.coef = coef @@ -35,10 +36,10 @@ def normal_inverse_gamma_nll(self, y, gamma, v, alpha, beta): """ two_blambda = 2 * beta * (1 + v) + tol nll = 0.5 * torch.log(np.pi / (v + tol)) \ - - alpha * torch.log(two_blambda + tol) \ - + (alpha + 0.5) * torch.log(v * (y - gamma) ** 2 + two_blambda + tol) \ - + torch.lgamma(alpha) \ - - torch.lgamma(alpha + 0.5) + - alpha * torch.log(two_blambda + tol) \ + + (alpha + 0.5) * torch.log(v * (y - gamma) ** 2 + two_blambda + tol) \ + + torch.lgamma(alpha) \ + - torch.lgamma(alpha + 0.5) return nll @@ -97,9 +98,9 @@ def modified_mse(gamma, nu, alpha, beta, target, reduction='mean'): Reference: https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf Source: https://github.com/deargen/MT-ENet/tree/468822188f52e517b1ee8e386eea607b2b7d8829 """ - mse = (gamma-target)**2 + mse = (gamma - target) ** 2 c = get_mse_coef(gamma, nu, alpha, beta, target).detach() - mod_mse = mse*c + mod_mse = mse * c if reduction == 'mean': return mod_mse.mean() @@ -131,7 +132,7 @@ def get_mse_coef(gamma, nu, alpha, beta, y): nu_eff = check_mse_efficiency_nu(gamma, nu, alpha, beta) delta = (gamma - y).abs() min_bound = torch.min(nu_eff, alpha_eff).min() - c = (min_bound.sqrt()/(delta + tol)).detach() + c = (min_bound.sqrt() / (delta + tol)).detach() return torch.clip(c, min=False, max=1.) @@ -153,7 +154,7 @@ def check_mse_efficiency_alpha(nu, alpha, beta): where f => the NLL loss (BayesianDTI.loss.MarginalLikelihood) """ - right = (torch.exp((torch.digamma(alpha+0.5)-torch.digamma(alpha))) - 1)*2*beta*(1+nu) / (nu + 1e-8) + right = (torch.exp((torch.digamma(alpha + 0.5) - torch.digamma(alpha))) - 1) * 2 * beta * (1 + nu) / (nu + 1e-8) return right.detach() @@ -184,17 +185,19 @@ class EvidentialMarginalLikelihood(torch.nn.modules.loss._Loss): """Marginal likelihood error of prior network. The target value is not a distribution (mu, std), but a just value. - This is a negative log marginal likelihood, with integral mu and sigma. - - Reference: https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf + Reference: Amini et al. 2020 (https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf) Source: https://github.com/deargen/MT-ENet/tree/468822188f52e517b1ee8e386eea607b2b7d8829 """ + def __init__(self, size_average=None, reduce=None, reduction: str = 'mean'): super(EvidentialMarginalLikelihood, self).__init__(size_average, reduce, reduction) def forward(self, gamma: torch.Tensor, nu: torch.Tensor, alpha: torch.Tensor, beta: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - """Args: + """ + Conduct the forward pass through the loss. + + Args: gamma (torch.Tensor): gamma output value of the evidential network nu (torch.Tensor): nu output value of the evidential network alpha (torch.Tensor): alpha output value of the evidential network @@ -203,16 +206,12 @@ def forward(self, gamma: torch.Tensor, nu: torch.Tensor, alpha: torch.Tensor, be Return: (Tensor) Negative log marginal likelihood of EvidentialNet - p(y|m) = Student-t(y; gamma, (beta(1+nu))/(nu*alpha) , 2*alpha) - then, the negative log likelihood is (CAUTION QUITE COMPLEX!) - NLL = -log(p(y|m)) = - log(3.14/nu)*0.5 - alpha*log(2*beta*(1 + nu)) + (alpha + 0.5)*log( nu(target - gamma)^2 + 2*beta(1 + nu) ) - + log(GammaFunc(alpha)/GammaFunc(alpha + 0.5)) + """ pi = torch.tensor(np.pi) - x1 = torch.log(pi/(nu + tol))*0.5 - x2 = -alpha*torch.log(2.*beta*(1. + nu) + tol) - x3 = (alpha + 0.5)*torch.log(nu*(target - gamma)**2 + 2.*beta*(1. + nu) + tol) + x1 = torch.log(pi / (nu + tol)) * 0.5 + x2 = -alpha * torch.log(2. * beta * (1. + nu) + tol) + x3 = (alpha + 0.5) * torch.log(nu * (target - gamma) ** 2 + 2. * beta * (1. + nu) + tol) x4 = torch.lgamma(alpha + tol) - torch.lgamma(alpha + 0.5 + tol) if self.reduction == 'mean': return (x1 + x2 + x3 + x4).mean() @@ -229,24 +228,28 @@ class EvidenceRegularizer(torch.nn.modules.loss._Loss): Reference: https://www.mit.edu/~amini/pubs/pdf/deep-evidential-regression.pdf Source: https://github.com/deargen/MT-ENet/tree/468822188f52e517b1ee8e386eea607b2b7d8829 """ + def __init__(self, size_average=None, reduce=None, reduction: str = 'mean', coef=0.1): super(EvidenceRegularizer, self).__init__(size_average, reduce, reduction) self.coef = coef def forward(self, gamma: torch.Tensor, nu: torch.Tensor, alpha: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - """Args: + """ + Forward pass through the loss. + + Args: gamma (torch.Tensor): gamma output value of the evidential network nu (torch.Tensor): nu output value of the evidential network alpha (torch.Tensor): alpha output value of the evidential network target (torch.Tensor): target value - Return: + Returns: (Tensor) prior network regularization Loss = |y - gamma|*(2*nu + alpha) * factor """ - loss_value = torch.abs(target - gamma)*(2*nu + alpha) * self.coef + loss_value = torch.abs(target - gamma) * (2 * nu + alpha) * self.coef if self.reduction == 'mean': return loss_value.mean() elif self.reduction == 'sum': @@ -264,6 +267,7 @@ class LipschitzMSELoss(torch.nn.Module): coef (float, optional): Coefficient for the regularization term. Defaults to 0.1. reduction (str, optional): Specifies the method to reduce the loss over the batch. Can be 'mean', 'sum', or 'none'. Defaults to 'mean'. """ + def __init__(self, tol=1e-8, coef=0.1, reduction='mean'): super(LipschitzMSELoss, self).__init__() self.tol = tol diff --git a/mlguess/torch/scheduler.py b/mlguess/torch/scheduler.py index 0aaeb01..4f5b351 100644 --- a/mlguess/torch/scheduler.py +++ b/mlguess/torch/scheduler.py @@ -13,11 +13,11 @@ def load_scheduler(optimizer, conf): """Load a learning rate scheduler based on the configuration. Parameters: - - optimizer: The PyTorch optimizer. - - conf: The configuration dictionary. + optimizer: The PyTorch optimizer. + conf: The configuration dictionary. Returns: - - scheduler: The PyTorch learning rate scheduler. + scheduler: The PyTorch learning rate scheduler. """ conf = copy.deepcopy(conf) @@ -103,14 +103,18 @@ def lr_lambda_phase1(epoch, num_epochs=100, warmup_epochs=10): class CosineAnnealingWarmupRestarts(LRScheduler): - """optimizer (Optimizer): Wrapped optimizer. - first_cycle_steps (int): First cycle step size. - cycle_mult(float): Cycle steps magnification. Default: -1. - max_lr(float): First cycle's max learning rate. Default: 0.1. - min_lr(float): Min learning rate. Default: 0.001. - warmup_steps(int): Linear warmup step size. Default: 0. - gamma(float): Decrease rate of max learning rate by cycle. Default: 1. - last_epoch (int): The index of last epoch. Default: -1. + """ + Cosine Annealing learning rate scheduler. + + Attributes: + optimizer (Optimizer): Wrapped optimizer. + first_cycle_steps (int): First cycle step size. + cycle_mult(float): Cycle steps magnification. Default: -1. + max_lr(float): First cycle's max learning rate. Default: 0.1. + min_lr(float): Min learning rate. Default: 0.001. + warmup_steps(int): Linear warmup step size. Default: 0. + gamma(float): Decrease rate of max learning rate by cycle. Default: 1. + last_epoch (int): The index of last epoch. Default: -1. """ def __init__( @@ -190,14 +194,14 @@ def step(self, epoch=None): def annealed_probability(epoch, max_epochs=100, min_probability=0.01, max_probability=1.0): """Anneal the termination probability from 1 to a small value. - Parameters: - - epoch: The current epoch. - - max_epochs: The maximum number of epochs for annealing. - - min_probability: The minimum termination probability. - - max_probability: The maximum termination probability. + Args: + epoch: The current epoch. + max_epochs: The maximum number of epochs for annealing. + min_probability: The minimum termination probability. + max_probability: The maximum termination probability. Returns: - - termination_probability: The annealed termination probability. + termination_probability: The annealed termination probability. """ # Linear annealing schedule termination_probability = 1.0 - (epoch / max_epochs) * (1.0 - min_probability) diff --git a/mlguess/torch/trainer_classifier.py b/mlguess/torch/trainer_classifier.py index e691060..0e3619c 100644 --- a/mlguess/torch/trainer_classifier.py +++ b/mlguess/torch/trainer_classifier.py @@ -67,7 +67,8 @@ def __init__(self, model, rank, module=False, uncertainty=False): super(Trainer, self).__init__() self.model = model self.rank = rank - self.device = torch.device(f"cuda:{rank % torch.cuda.device_count()}") if torch.cuda.is_available() else torch.device("cpu") + self.device = torch.device( + f"cuda:{rank % torch.cuda.device_count()}") if torch.cuda.is_available() else torch.device("cpu") if module: self.model = self.model.module @@ -76,15 +77,15 @@ def __init__(self, model, rank, module=False, uncertainty=False): # Training function. def train_one_epoch( - self, - epoch, - conf, - trainloader, - optimizer, - criterion, - scaler, - scheduler, - metrics + self, + epoch, + conf, + trainloader, + optimizer, + criterion, + scaler, + scheduler, + metrics ): """Train the model for one epoch. @@ -191,7 +192,8 @@ def train_one_epoch( if self.rank == 0: batch_group_generator.set_description(to_print) - if conf['trainer']['use_scheduler'] and conf['trainer']['scheduler']['scheduler_type'] == "cosine-annealing": + if conf['trainer']['use_scheduler'] and conf['trainer']['scheduler'][ + 'scheduler_type'] == "cosine-annealing": scheduler.step() if i >= batches_per_epoch and i > 0: @@ -207,12 +209,12 @@ def train_one_epoch( return results_dict def validate( - self, - epoch, - conf, - valid_loader, - criterion, - metrics + self, + epoch, + conf, + valid_loader, + criterion, + metrics ): """Validate the model on the validation dataset. @@ -306,12 +308,12 @@ def validate( return results_dict def predict( - self, - conf, - test_loader, - criterion, - metrics, - split=None + self, + conf, + test_loader, + criterion, + metrics, + split=None ): """Make predictions with the model on the test dataset. @@ -413,17 +415,17 @@ def predict( } def fit( - self, - conf, - train_loader, - valid_loader, - optimizer, - train_criterion, - valid_criterion, - scaler, - scheduler, - metrics, - trial=False + self, + conf, + train_loader, + valid_loader, + optimizer, + train_criterion, + valid_criterion, + scaler, + scheduler, + metrics, + trial=False ): """Train and validate the model. @@ -560,23 +562,25 @@ def fit( if conf["trainer"]["mode"] != "fsdp": if self.rank == 0: - # Save the current model - logging.info(f"Saving model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}") + logging.info( + f"Saving model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}") state_dict = { "epoch": epoch, "model_state_dict": self.model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), - 'scheduler_state_dict': scheduler.state_dict() if conf["trainer"]["use_scheduler"] else None, + 'scheduler_state_dict': scheduler.state_dict() if conf["trainer"][ + "use_scheduler"] else None, 'scaler_state_dict': scaler.state_dict() } torch.save(state_dict, f"{save_loc}/checkpoint.pt") else: - logging.info(f"Saving FSDP model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}") + logging.info( + f"Saving FSDP model, optimizer, grad scaler, and learning rate scheduler states to {save_loc}") # Initialize the checkpoint I/O handler diff --git a/pyproject.toml b/pyproject.toml index 2b1be53..796679d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "miles-guess" -authors = [{name = "John Schreck, David John Gagne, Charlie Becker, Gabrielle Gantos", email = "milescore@ucar.edu"}] +authors = [{name = "University Corporation for Atmospheric Research", email = "milescore@ucar.edu"}] readme = "README.md" license = {file = "LICENSE"} dynamic = ["version"] @@ -32,8 +32,7 @@ dependencies = [ "nbsphinx", "myst_parser", "sphinx_book_theme", - "pytest", - "hagelslag" + "hagelslag", ] [project.optional-dependencies] @@ -41,7 +40,7 @@ tensorflow = ["tensorflow>=2.16.0"] tensorflow_gpu = ["tensorflow[and-cuda]>=2.16.0"] -torch = ["torch"] +torch = ["torch", "safetensors"] jax = ["jax"] @@ -55,4 +54,4 @@ version = {file = "mlguess/VERSION"} readme = {file = ["README.md"]} [tool.ruff] -pydocstyle.convention = "google" +lint.pydocstyle.convention = "google"