diff --git a/peslearn/constants.py b/peslearn/constants.py index 1eb1962..6f68469 100644 --- a/peslearn/constants.py +++ b/peslearn/constants.py @@ -51,7 +51,7 @@ def pes(geom_vectors, cartesian=True): axis = 0 g = np.apply_along_axis(cart1d_to_distances1d, axis, g) newX = gp.transform_new_X(g, params, Xscaler) - E, cov = final.predict(newX, full_cov=False) + E, cov = model.predict_f_compiled(newX) e = gp.inverse_transform_new_y(E,yscaler) #e = e - (insert min energy here) #e *= 219474.63 ( convert units ) diff --git a/peslearn/ml/__init__.py b/peslearn/ml/__init__.py index bc3a5c0..ee51cd3 100644 --- a/peslearn/ml/__init__.py +++ b/peslearn/ml/__init__.py @@ -3,7 +3,20 @@ from . import neural_network from . import preprocessing_helper from . import model +from . import svigp +from . import mfgp +from . import mfmodel +from . import mfnn +from . import diff_nn +from . import gpytorch_gpr +from . import mfgp_nsp from .gaussian_process import GaussianProcess from .data_sampler import DataSampler from .neural_network import NeuralNetwork +from .svigp import SVIGP +from .mfgp import MFGP +from .mfmodel import MFModel +from .gpytorch_gpr import GaussianProcess as GpyGPR +from .mfgp_nsp import MFGP_NSP +#from .mfnn.dual import DualNN \ No newline at end of file diff --git a/peslearn/ml/diff_nn/__init__.py b/peslearn/ml/diff_nn/__init__.py new file mode 100755 index 0000000..0d5bb12 --- /dev/null +++ b/peslearn/ml/diff_nn/__init__.py @@ -0,0 +1,5 @@ +from . import diff_model +from . import diff_neural_network + +from .diff_model import DiffModel +from .diff_neural_network import DiffNeuralNetwork \ No newline at end of file diff --git a/peslearn/ml/diff_nn/diff_model.py b/peslearn/ml/diff_nn/diff_model.py new file mode 100755 index 0000000..9794cf1 --- /dev/null +++ b/peslearn/ml/diff_nn/diff_model.py @@ -0,0 +1,23 @@ +from ..model import Model +from ...constants import bohr2angstroms +import re + +class DiffModel(Model): + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, der_lvl=0, train_path=None, test_path=None, valid_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path, valid_path) + nletters = re.findall(r"[A-Z]", self.molecule_type) + nnumbers = re.findall(r"\d", self.molecule_type) + nnumbers2 = [int(i) for i in nnumbers] + self.natoms = len(nletters) + sum(nnumbers2) - len(nnumbers2) + # Assuming Cartesian coordinates, and Cartesian gradients and Hessians in Bohr + ncart = self.natoms*3 + nhess = ncart**2 + if der_lvl == 1: + self.raw_grad = self.raw_X[:,ncart:2*ncart] + self.raw_X = self.raw_X[:, :ncart] + elif der_lvl == 2: + self.raw_hess = self.raw_X[:, 2*ncart:nhess+2*ncart] + self.raw_grad = self.raw_X[:, ncart:2*ncart] + self.raw_X = self.raw_X[:, :ncart] + else: + raise ValueError(f"Error: Invalid value for `der_lvl': {der_lvl}") diff --git a/peslearn/ml/diff_nn/diff_neural_network.py b/peslearn/ml/diff_nn/diff_neural_network.py new file mode 100644 index 0000000..4917a9b --- /dev/null +++ b/peslearn/ml/diff_nn/diff_neural_network.py @@ -0,0 +1,666 @@ +from ..neural_network import NeuralNetwork +from ..model import Model +from .diff_model import DiffModel +import torch +import torch.nn as nn +torch.set_num_threads(8) +print(torch.get_num_threads()) +from torch import autograd +import numpy as np +from collections import OrderedDict +import copy +from copy import deepcopy +from sklearn.model_selection import train_test_split +from .utils import Pip_B +from .utils.transform_deriv import degree_B1, degree_B2, morse, morse_B1, morse_B2 +from .utils.cart_dist import cart_dist_B_2 +import os +from ...constants import package_directory, hartree2cm +import re +from ..preprocessing_helper import morse, interatomics_to_fundinvar, degree_reduce, general_scaler + +class DiffNeuralNetwork(NeuralNetwork): + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, der_lvl = 0, + train_path=None, test_path=None, valid_path=None, + grad_input_obj=None, hess_input_obj=None, grad_data_path=None, hess_data_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path, valid_path) + # Assume raw_X is Cartesian, if not we got problems. Same with grad and Hess data, assume Cartesian basis + # Calc. X in interatomic distance basis + nletters = re.findall(r"[A-Z]", self.molecule_type) + nnumbers = re.findall(r"\d", self.molecule_type) + nnumbers2 = [int(i) for i in nnumbers] + self.natoms = len(nletters) + sum(nnumbers2) - len(nnumbers2) + self.n_interatomics = int(0.5 * (self.natoms * self.natoms - self.natoms)) + + if self.pip: + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + self.pip_B = Pip_B(path, self.n_interatomics) + #self.raw_X_mod = self.raw_X.reshape((self.raw_X.shape[0], self.natoms, 3)) + #self.raw_Xr = np.zeros((self.raw_X.shape[0], self.n_interatomics)) + + self.train_grad = der_lvl == 1 or der_lvl == 2 + self.train_hess = der_lvl == 2 + self.grad = False + self.hess = False + + self.fullraw_X = deepcopy(self.raw_X) + self.fullraw_y = deepcopy(self.raw_y) + self.raw_Xr = self.cart_to_interatomic(self.raw_X) + #self.fullraw_X.reshape((-1,1)) + + #for atom in range(1, self.natoms): + # # Create an array of duplicated cartesian coordinates of this particular atom, for every geometry, which is the same shape as 'cartesians' + # tmp1 = np.broadcast_to(self.raw_X_mod[:,atom,:], (self.raw_X.shape[0], 3)) + # tmp2 = np.tile(tmp1, (self.natoms,1,1)).transpose(1,0,2) + # # Take the non-redundant norms of this atom to all atoms after it in cartesian array + # diff = tmp2[:, 0:atom,:] - self.raw_X_mod[:, 0:atom,:] + # norms = np.sqrt(np.einsum('...ij,...ij->...i', diff , diff)) + # # Fill in the norms into interatomic distances 2d array , n_interatomic_distances) + # if atom == 1: + # idx1, idx2 = 0, 1 + # if atom > 1: + # x = int((atom**2 - atom) / 2) + # idx1, idx2 = x, x + atom + # self.raw_Xr[:, idx1:idx2] = norms + if grad_input_obj is not None: + self.grad = True + self.grad_model = DiffModel(grad_data_path, grad_input_obj, molecule_type, molecule, der_lvl=1) + self.grad_offset = self.fullraw_X.shape[0] + #self.fullraw_X = np.vstack((self.fullraw_X, self.grad_model.raw_X)) + #self.fullraw_y = np.vstack((self.fullraw_y, self.grad_model.raw_y)) + #self.raw_Xr_grad = self.cart_to_interatomic(self.grad_model.raw_X) + #self.grad_model.raw_grad = self.grad_model.raw_grad[:,-1].reshape((-1,1)) + #print(self.grad_model.raw_grad) + #self.grad_model.raw_X = self.grad_model.raw_X[:,0:2]#.reshape((-1,1)) + self.fullraw_X = np.vstack((self.fullraw_X, self.grad_model.raw_X)) + self.fullraw_y = np.vstack((self.fullraw_y, self.grad_model.raw_y)) + + if self.grad_model.input_obj.keywords["validation_points"]: + self.nvalid_grad = self.grad_model.input_obj.keywords["validation_points"] + if (self.nvalid_grad + self.grad_model.ntrain + 1) > self.grad_model.n_datapoints: + raise Exception("Error: User-specified training set size and validation set size exceeds the size of the dataset.") + else: + self.nvalid_grad = round((self.grad_model.n_datapoints - self.grad_model.ntrain) / 2) + if hess_input_obj is not None: + self.hess = True + self.hess_model = DiffModel(hess_data_path, hess_input_obj, molecule_type, molecule, der_lvl=2) + self.raw_Xr_hess = self.cart_to_interatomic(self.hess_model.raw_X) + if self.hess_model.input_obj.keywords["validation_points"]: + self.nvalid_hess = self.hess_model.input_obj.keywords["validation_points"] + if (self.nvalid_hess + self.hess_model.ntrain + 1) > self.hess_model.n_datapoints: + raise Exception("Error: User-specified training set size and validation set size exceeds the size of the dataset.") + else: + self.nvalid_grad = round((self.grad_model.n_datapoints - self.grad_model.ntrain) / 2) + if not self.grad and not self.hess: + raise Exception("Not much point in using this Neural Network without gradients or Hessians") + + self.ndat_full = self.fullraw_X.shape[0] + self.fullraw_Xr = self.cart_to_interatomic(self.fullraw_X) + + def cart_to_interatomic(self, cartflat): + ndat = cartflat.shape[0] + cart_3d = cartflat.reshape((ndat, self.natoms, 3)) + Xr = np.zeros((ndat, self.n_interatomics)) + for atom in range(1, self.natoms): + # Create an array of duplicated cartesian coordinates of this particular atom, for every geometry, which is the same shape as 'cartesians' + tmp1 = np.broadcast_to(cart_3d[:,atom,:], (ndat, 3)) + tmp2 = np.tile(tmp1, (self.natoms,1,1)).transpose(1,0,2) + # Take the non-redundant norms of this atom to all atoms after it in cartesian array + diff = tmp2[:, 0:atom,:] - cart_3d[:, 0:atom,:] + norms = np.sqrt(np.einsum('...ij,...ij->...i', diff , diff)) + # Fill in the norms into interatomic distances 2d array , n_interatomic_distances) + if atom == 1: + idx1, idx2 = 0, 1 + if atom > 1: + x = int((atom**2 - atom) / 2) + idx1, idx2 = x, x + atom + Xr[:, idx1:idx2] = norms + return Xr + + def split_train_test(self, params, validation_size=None, grad_validation_size=None, hess_validation_size=None, precision=32): + # Do preprocess with interatomic distances + #self.full_X, self.full_y, self.Xscaler, self.yscaler = self.preprocess(params, self.fullraw_Xr, self.fullraw_y) + # All geometries and energies preprocessed + self.full_X, self.full_y, self.Xscaler, self.yscaler = self.preprocess(params, self.fullraw_Xr, self.fullraw_y) # Cartesian inputs + + # Full partitions of energy, gradient, and Hessian datasets + self.X = self.full_X[0:self.n_datapoints] + self.y = self.full_y[0:self.n_datapoints] + if self.grad: + # Cartesian + self.X_grad = self.full_X[self.grad_offset:self.grad_offset+self.grad_model.n_datapoints] + self.y_grad = self.full_y[self.grad_offset:self.grad_offset+self.grad_model.n_datapoints] + #self.X_grad = self.grad_model.raw_X + #self.y_grad = self.grad_model.raw_y + self.grad_grad = self.grad_model.raw_grad + if self.hess: + # Cartesian + self.X_hess = self.hess_model.raw_X + self.y_hess = self.hess_model.raw_y + self.grad_hess = self.hess_model.raw_grad + self.hess_hess = self.hess_model.raw_hess + + # Grads and Hess stay raw as transformations are not well defined + #if self.grad: + # self.X_grad, self.y_grad, self.Xscaler_grad, self.yscaler_grad = self.preprocess(params, self.grad_model.raw_X, self.grad_model.raw_y) + #if self.hess: + # self.X_hess, self.y_hess, self.Xscaler_hess, self.yscaler_hess = self.preprocess(params, self.hess_model.raw_X, self.hess_model.raw_y) + + if self.sampler == 'user_supplied': + # TODO: Not implemented + raise Exception("User supplied sampling not supported for differentiable neural networks") + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + #if self.grad: + # self.Xtr_grad = self.transform_new_X(self.grad_model.raw_Xtr, params, self.Xscaler_grad) + # self.ytr_grad = self.transform_new_y(self.grad_model.raw_ytr, self.yscaler_grad) + # self.Xtest_grad = self.transform_new_X(self.grad_model.raw_Xtest, params, self.Xscaler_grad) + # self.ytest_grad = self.transform_new_y(self.grad_model.raw_ytest, self.yscaler_grad) + #if self.hess: + # self.Xtr_hess = self.transform_new_X(self.hess_model.raw_Xtr, params, self.Xscaler_hess) + # self.ytr_hess = self.transform_new_y(self.hess_model.raw_ytr, self.yscaler_hess) + # self.Xtest_hess = self.transform_new_X(self.hess_model.raw_Xtest, params, self.Xscaler_hess) + # self.ytest_hess = self.transform_new_y(self.hess_model.raw_ytest, self.yscaler_hess) + + if self.valid_path: + self.Xvalid = self.transform_new_X(self.raw_Xvalid, params, self.Xscaler) + self.yvalid = self.transform_new_y(self.raw_yvalid, self.yscaler) + #if self.grad: + # self.Xvalid_grad = self.transform_new_X(self.grad_model.raw_Xvalid, params, self.Xscaler_grad) + # self.yvalid_grad = self.transform_new_y(self.grad_model.raw_yvalid, self.yscaler_grad) + #if self.hess: + # self.Xvalid_hess = self.transform_new_X(self.hess_model.raw_Xvalid, params, self.Xscaler_hess) + # self.yvalid_hess = self.transform_new_y(self.hess_model.raw_yvalid, self.yscaler_hess) + else: + raise Exception("Please provide a validation set for Neural Network training.") + + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + if self.grad: + self.full_indices_grad = np.arange(self.y_grad.shape[0]) + self.Xtr_grad = self.X_grad[self.grad_model.train_indices] + self.ytr_grad = self.y_grad[self.grad_model.train_indices] + self.gradtr_grad = self.grad_grad[self.grad_model.train_indices] + #if self.hess: + # self.full_indices_hess = np.arange(self.y_hess.shape[0]) + # self.Xtr_hess = self.X_hess[self.hess_model.train_indices] + # self.ytr_hess = self.y_hess[self.hess_model.train_indices] + #TODO: this is splitting validation data in the same way at every model build, not necessary. + self.valid_indices, self.new_test_indices = train_test_split(self.test_indices, train_size = validation_size, random_state=42) + #if self.grad: + # self.valid_indices_grad, self.new_test_indices_grad = train_test_split(self.grad_model.test_indices, train_size = grad_validation_size, random_state=42) + #if self.hess: + # self.valid_indices_hess, self.new_test_indices_hess = train_test_split(self.hess_model.test_indices, train_size = hess_validation_size, random_state=42) + if validation_size: + self.Xvalid = self.X[self.valid_indices] + self.yvalid = self.y[self.valid_indices] + self.Xtest = self.X[self.new_test_indices] + self.ytest = self.y[self.new_test_indices] + if self.grad: + self.valid_indices_grad, self.new_test_indices_grad = train_test_split(self.grad_model.test_indices, train_size = grad_validation_size, random_state=42) + self.Xvalid_grad = self.X_grad[self.valid_indices_grad] + self.yvalid_grad = self.y_grad[self.valid_indices_grad] + self.gradvalid_grad = self.grad_grad[self.valid_indices_grad] + self.Xtest_grad = self.X_grad[self.new_test_indices_grad] + self.ytest_grad = self.y_grad[self.new_test_indices_grad] + self.gradtest_grad = self.grad_grad[self.new_test_indices_grad] + #if self.hess: + # self.Xvalid_hess = self.X_hess[self.valid_indices_hess] + # self.yvalid_hess = self.y_hess[self.valid_indices_hess] + # self.Xtest_hess = self.X_hess[self.new_test_indices_hess] + # self.ytest_hess = self.y_hess[self.new_test_indices_hess] + else: + raise Exception("Please specify a validation set size for Neural Network training.") + + # convert to Torch Tensors + if precision == 32: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float32, requires_grad=True) + self.ytr = torch.tensor(self.ytr, dtype=torch.float32) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float32, requires_grad=True) + self.ytest = torch.tensor(self.ytest, dtype=torch.float32) + self.Xvalid = torch.tensor(self.Xvalid,dtype=torch.float32, requires_grad=True) + self.yvalid = torch.tensor(self.yvalid,dtype=torch.float32) + self.X = torch.tensor(self.X,dtype=torch.float32, requires_grad=True) + self.y = torch.tensor(self.y,dtype=torch.float32) + + if self.grad: + self.Xtr_grad = torch.tensor(self.Xtr_grad, dtype=torch.float32, requires_grad=True) + self.ytr_grad = torch.tensor(self.ytr_grad, dtype=torch.float32) + self.gradtr_grad = torch.tensor(self.gradtr_grad, dtype=torch.float32, requires_grad=True) + self.Xtest_grad = torch.tensor(self.Xtest_grad, dtype=torch.float32, requires_grad=True) + self.ytest_grad = torch.tensor(self.ytest_grad, dtype=torch.float32) + self.gradtest_grad = torch.tensor(self.gradtest_grad, dtype=torch.float32, requires_grad=True) + self.Xvalid_grad = torch.tensor(self.Xvalid_grad, dtype=torch.float32, requires_grad=True) + self.yvalid_grad = torch.tensor(self.yvalid_grad, dtype=torch.float32) + self.gradvalid_grad = torch.tensor(self.gradvalid_grad, dtype=torch.float32, requires_grad=True) + # Full gradient data sets + self.X_grad = torch.tensor(self.X_grad, dtype=torch.float32, requires_grad=True) + self.y_grad = torch.tensor(self.y_grad, dtype=torch.float32) + self.grad_grad = torch.tensor(self.grad_grad, dtype=torch.float32, requires_grad=True) + + if False: + # WTF is all this? Consider renaming + #self.Xtr_grad = torch.tensor(self.Xtr_grad, dtype=torch.float32) + #self.ytr_grad = torch.tensor(self.ytr_grad, dtype=torch.float32) + # Geom of gradient training points + self.Xtr_t_grad = torch.tensor(self.full_X[self.grad_offset + self.grad_model.train_indices], dtype=torch.float32, requires_grad=True) + ## grad gradient training points + self.gradtr_grad = torch.tensor(self.grad_grad[self.grad_model.train_indices], dtype=torch.float32, requires_grad=True) + #self.gradtr_grad = torch.tensor(self.grad_model.raw_grad[self.grad_model.train_indices], dtype=torch.float32, requires_grad=True) + + #self.Xtest_grad = torch.tensor(self.Xtest_grad, dtype=torch.float32) + #self.ytest_grad = torch.tensor(self.ytest_grad, dtype=torch.float32) + #self.Xvalid_grad = torch.tensor(self.Xvalid_grad,dtype=torch.float32) + #self.yvalid_grad = torch.tensor(self.yvalid_grad,dtype=torch.float32) + + # Test geometries for grad + self.Xt_grad = torch.tensor(self.full_X[self.grad_offset:],dtype=torch.float32, requires_grad=True) + #self.X_grad = torch.tensor(self.X_grad,dtype=torch.float32, requires_grad=True) + # All grad energies + self.y_grad = torch.tensor(self.full_y[self.grad_offset:],dtype=torch.float32) + # All grad gradients + self.grad_grad = torch.tensor(self.grad_grad, dtype=torch.float32) + if self.hess: + self.Xtr_hess = torch.tensor(self.Xtr_hess, dtype=torch.float32) + self.ytr_hess = torch.tensor(self.ytr_hess, dtype=torch.float32) + self.Xtest_hess = torch.tensor(self.Xtest_hess, dtype=torch.float32) + self.ytest_hess = torch.tensor(self.ytest_hess, dtype=torch.float32) + self.Xvalid_hess = torch.tensor(self.Xvalid_hess,dtype=torch.float32) + self.yvalid_hess = torch.tensor(self.yvalid_hess,dtype=torch.float32) + self.X_hess = torch.tensor(self.X_hess,dtype=torch.float32) + self.y_hess = torch.tensor(self.y_hess,dtype=torch.float32) + elif precision == 64: + raise Exception("64 bit float in diff_neural_network not supported currently") + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float64) + self.ytr = torch.tensor(self.ytr, dtype=torch.float64) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float64) + self.ytest = torch.tensor(self.ytest, dtype=torch.float64) + self.Xvalid = torch.tensor(self.Xvalid,dtype=torch.float64) + self.yvalid = torch.tensor(self.yvalid,dtype=torch.float64) + self.X = torch.tensor(self.X,dtype=torch.float64) + self.y = torch.tensor(self.y,dtype=torch.float64) + if self.grad: + self.Xtr_grad = torch.tensor(self.Xtr_grad, dtype=torch.float64, requires_grad=True) + self.ytr_grad = torch.tensor(self.ytr_grad, dtype=torch.float64) + self.Xtest_grad = torch.tensor(self.Xtest_grad, dtype=torch.float64, requires_grad=True) + self.ytest_grad = torch.tensor(self.ytest_grad, dtype=torch.float64) + self.Xvalid_grad = torch.tensor(self.Xvalid_grad,dtype=torch.float64, requires_grad=True) + self.yvalid_grad = torch.tensor(self.yvalid_grad,dtype=torch.float64) + self.X_grad = torch.tensor(self.X_grad,dtype=torch.float64, requires_grad=True) + self.y_grad = torch.tensor(self.y_grad,dtype=torch.float64) + if self.hess: + self.Xtr_hess = torch.tensor(self.Xtr_hess, dtype=torch.float64) + self.ytr_hess = torch.tensor(self.ytr_hess, dtype=torch.float64) + self.Xtest_hess = torch.tensor(self.Xtest_hess, dtype=torch.float64) + self.ytest_hess = torch.tensor(self.ytest_hess, dtype=torch.float64) + self.Xvalid_hess = torch.tensor(self.Xvalid_hess,dtype=torch.float64) + self.yvalid_hess = torch.tensor(self.yvalid_hess,dtype=torch.float64) + self.X_hess = torch.tensor(self.X_hess,dtype=torch.float64) + self.y_hess = torch.tensor(self.y_hess,dtype=torch.float64) + else: + raise Exception("Invalid option for 'precision'") + + def build_model(self, params, maxit=1000, val_freq=10, es_patience=2, opt='lbfgs', tol=1, decay=False, verbose=False, precision=32, return_model=False): + #params["morse_transform"]["morse"] = False + #params["pip"]["pip"] = False + print("Hyperparameters: ", params) + self.split_train_test(params, validation_size=self.nvalid, precision=precision) # split data, according to scaling hp's + scale = params['scale_y'] # Find descaling factor to convert loss to original energy units + if scale == 'std': + loss_descaler = self.yscaler.var_[0] + if scale.startswith('mm'): + loss_descaler = (1/self.yscaler.scale_[0]**2) + activation = params['scale_X']['activation'] + if activation == 'tanh': + activ = nn.Tanh() + if activation == 'sigmoid': + activ = nn.Sigmoid() + inp_dim = self.X.shape[1] + #inp_dim = self.inp_dim + l = params['layers'] + torch.manual_seed(0) + depth = len(l) + structure = OrderedDict([('input', nn.Linear(inp_dim, l[0])), + ('activ_in' , activ)]) + model = nn.Sequential(structure) + for i in range(depth-1): + model.add_module('layer' + str(i), nn.Linear(l[i], l[i+1])) + model.add_module('activ' + str(i), activ) + model.add_module('output', nn.Linear(l[depth-1], 1)) + if precision == 64: # cast model to proper precision + model = model.double() + + metric = torch.nn.MSELoss() + # Define optimizer + if 'lr' in params: + lr = params['lr'] + elif opt == 'lbfgs': + lr = 0.5 + else: + lr = 0.1 + optimizer = self.get_optimizer(opt, model.parameters(), lr=lr) + # Define update variables for early stopping, decay, gradient explosion handling + prev_loss = 1.0 + es_tracker = 0 + best_val_error = None + failures = 0 + decay_attempts = 0 + prev_best = None + decay_start = False + #labmda = 1e-4 + for epoch in range(1,maxit): + #print(f"Begin epoch {epoch}") + def closure(): + optimizer.zero_grad() + y_pred = model(self.Xtr) + #l2_norm = sum(p.pow(2.0).sum() for p in model.parameters()) + loss = torch.sqrt(metric(y_pred, self.ytr)) + if self.train_grad: + #y_pred_grad = model(self.Xtr_t_grad) + y_pred_grad = model(self.Xtr_grad) + #gradspred, = autograd.grad(y_pred_grad, self.Xtr_t_grad, + # grad_outputs=y_pred_grad.data.new(y_pred_grad.shape).fill_(1), + # create_graph=True) + gradspred, = autograd.grad(y_pred_grad, self.Xtr_grad, + grad_outputs=y_pred_grad.data.new(y_pred_grad.shape).fill_(1), + create_graph=True) + gradpred_cart = self.transform_grad(self.grad_model.train_indices, gradspred, params, self.Xscaler, self.yscaler, precision=precision) + #grad_error = torch.sqrt(torch.sum((gradpred_cart - self.gradtr_grad)**2)) + grad_e_error = torch.sqrt(metric(y_pred_grad, self.ytr_grad)) + grad_grad_error = torch.sqrt(torch.mean(torch.sum(1.0*(gradpred_cart - self.gradtr_grad) ** 2,dim=1).reshape(-1,1))) + floss = loss + grad_e_error + grad_grad_error + if self.train_hess: + floss += 0.0 + if self.train_grad: + floss.backward() + return floss + else: + loss.backward() + return loss + optimizer.step(closure) + # validate + if epoch % val_freq == 0: + #if self.grad: + # valid_grad_pred = model(self.Xvalid_grad) + # valid_gradspred, = autograd.grad(valid_grad_pred, self.Xvalid_grad, + # grad_outputs=valid_grad_pred.data.new(valid_grad_pred.shape).fill_(1), + # create_graph=True) + with torch.no_grad(): + tmp_pred = model(self.Xvalid) + tmp_loss = metric(tmp_pred, self.yvalid) + val_error_rmse = np.sqrt(tmp_loss.item() * loss_descaler) * hartree2cm # loss_descaler converts MSE in scaled data domain to MSE in unscaled data domain + if self.train_grad: + valid_grad_pred = model(self.Xvalid_grad) + valid_grad_loss = metric(valid_grad_pred, self.yvalid_grad) + valid_grad_E_error_rmse = np.sqrt(valid_grad_loss.item() * loss_descaler) * hartree2cm + #valid_grad_cart = self.transform_grad(self.valid_indices_grad, valid_gradspred, params, self.Xscaler, self.yscaler, precision=precision) + #valid_grad_grad_error_rmse = torch.sqrt(torch.mean(torch.sum(1.0*(valid_grad_cart - self.grad_grad) ** 2,dim=1).reshape(-1,1))) * hartree2cm + # Add energy RMSE from gradient data set to valid_error_rmse + val_error_rmse = np.sqrt(((val_error_rmse**2) + (valid_grad_E_error_rmse**2))/2.0) + if best_val_error: + if val_error_rmse < best_val_error: + prev_best = best_val_error * 1.0 + best_val_error = val_error_rmse * 1.0 + else: + record = True + best_val_error = val_error_rmse * 1.0 + prev_best = best_val_error + if verbose: + print("Epoch {} Validation RMSE (cm-1): {:5.3f}".format(epoch, val_error_rmse)) + if decay_start: + scheduler.step(val_error_rmse) + + # Early Stopping + if epoch > 5: + # if current validation error is not the best (current - best > 0) and is within tol of previous error, the model is stagnant. + if ((val_error_rmse - prev_loss) < tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: current validation error is not the best (current - best > 0) and is greater than the best by tol, the model is overfitting. Bad epoch. + elif ((val_error_rmse - best_val_error) > tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: if the current validation error is a new record, but not significant, the model is stagnant + elif (prev_best - best_val_error) < 0.001: + es_tracker += 1 + # else: model set a new record validation error. Reset early stopping tracker + else: + es_tracker = 0 + #TODO this framework does not detect oscillatory behavior about 'tol', though this has not been observed to occur in any case + # Check status of early stopping tracker. First try decaying to see if stagnation can be resolved, if not then terminate training + if es_tracker > es_patience: + if decay: # if decay is set to true, if early stopping criteria is triggered, begin LR scheduler and go back to previous model state and attempt LR decay. + if decay_attempts < 1: + decay_attempts += 1 + es_tracker = 0 + if verbose: + print("Performance plateau detected. Reverting model state and decaying learning rate.") + decay_start = True + thresh = (0.1 / np.sqrt(loss_descaler)) / hartree2cm # threshold is 0.1 wavenumbers + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, threshold=thresh, threshold_mode='abs', min_lr=0.05, cooldown=2, patience=10, verbose=verbose) + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.9 + optimizer.load_state_dict(saved_optimizer_state_dict) + # Since learning rate is decayed, override tolerance, patience, validation frequency for high-precision + #tol = 0.05 + #es_patience = 100 + #val_freq = 1 + continue + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + + # Handle exploding gradients + if epoch > 10: + if (val_error_rmse > prev_loss*10): # detect large increases in loss + if epoch > 60: # distinguish between exploding gradients at near converged models and early on exploding grads + if verbose: + print("Exploding gradient detected. Resuming previous model state and decaying learning rate") + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.5 + optimizer.load_state_dict(saved_optimizer_state_dict) + failures += 1 # if + if failures > 2: + break + else: + continue + else: + break + if val_error_rmse != val_error_rmse: # detect NaN + break + if ((prev_loss < 1.0) and (precision == 32)): # if 32 bit precision and model is giving very high accuracy, kill so the accuracy does not go beyond 32 bit precision + break + prev_loss = val_error_rmse * 1.0 # save previous loss to track improvement + + # Periodically save model state so we can reset under instability/overfitting/performance plateau + if epoch % 50 == 0: + saved_model_state_dict = copy.deepcopy(model.state_dict()) + saved_optimizer_state_dict = copy.deepcopy(optimizer.state_dict()) + + with torch.no_grad(): + test_pred = model(self.Xtest) + test_loss = metric(test_pred, self.ytest) + test_error_rmse = np.sqrt(test_loss.item() * loss_descaler) * hartree2cm + val_pred = model(self.Xvalid) + val_loss = metric(val_pred, self.yvalid) + val_error_rmse = np.sqrt(val_loss.item() * loss_descaler) * hartree2cm + full_pred = model(self.X) + full_loss = metric(full_pred, self.y) + full_error_rmse = np.sqrt(full_loss.item() * loss_descaler) * hartree2cm + + # Error w/ grad + grad_full_pred = model(self.X_grad) + full_gradspred, = autograd.grad(grad_full_pred, self.X_grad, + grad_outputs=grad_full_pred.data.new(grad_full_pred.shape).fill_(1), + create_graph=True) + with torch.no_grad(): + grad_full_loss = metric(grad_full_pred, self.y_grad) + grad_test_loss = metric(grad_full_pred, self.y_grad) + grad_val_loss = metric(grad_full_pred, self.y_grad) + grad_full_E_error_rmse = np.sqrt(grad_full_loss.item() * loss_descaler) * hartree2cm + grad_test_E_error_rmse = np.sqrt(grad_test_loss.item() * loss_descaler) * hartree2cm + grad_val_E_error_rmse = np.sqrt(grad_val_loss.item() * loss_descaler) * hartree2cm + grad_full_gradcart = self.transform_grad(slice(self.grad_offset, self.ndat_full), full_gradspred, params, self.Xscaler, self.yscaler, precision=precision) + grad_test_gradcart = grad_full_gradcart[self.new_test_indices_grad] + grad_val_gradcart = grad_full_gradcart[self.valid_indices_grad] + + grad_full_grad_error_rmse = torch.sqrt(torch.mean(torch.sum(1.0*(grad_full_gradcart - self.grad_grad) ** 2,dim=1).reshape(-1,1))) * hartree2cm + grad_test_grad_error_rmse = torch.sqrt(torch.mean(torch.sum(1.0*(grad_test_gradcart - self.gradtest_grad) ** 2,dim=1).reshape(-1,1))) * hartree2cm + grad_val_grad_error_rmse = torch.sqrt(torch.mean(torch.sum(1.0*(grad_val_gradcart - self.gradvalid_grad) ** 2,dim=1).reshape(-1,1))) * hartree2cm + + #print(f"Grad. Energy Error: {full_grad_E_error_rmse.item()} Gradient Error: {full_grad_grad_error_rmse.item()}") + + output_str = " {} set RMSE (cm-1):" + print(f"{output_str.format('Test'):45s} {test_error_rmse:5.2f}") + print(f"{output_str.format('Validation'):45s} {val_error_rmse:5.2f}") + print(f"{output_str.format('Full'):45s} {full_error_rmse:5.2f}") + #print("Test set RMSE (cm-1): {:5.2f} Validation set RMSE (cm-1): {:5.2f} Full dataset RMSE (cm-1): {:5.2f}".format(test_error_rmse, val_error_rmse, full_error_rmse)) + if self.grad: + grad_output_str = " {} set RMSE {} (cm-1{}):" + print(f"{grad_output_str.format('Test','Energy',''):45s} {grad_test_E_error_rmse:5.2f}") + print(f"{grad_output_str.format('Validation','Energy',''):45s} {grad_val_E_error_rmse:5.2f}") + print(f"{grad_output_str.format('Full','Energy',''):45s} {grad_full_E_error_rmse:5.2f}") + print(f"{grad_output_str.format('Test','Gradient','/bohr'):45s} {grad_test_grad_error_rmse.item():5.2f}") + print(f"{grad_output_str.format('Validation','Gradient','/bohr'):45s} {grad_val_grad_error_rmse.item():5.2f}") + print(f"{grad_output_str.format('Full','Gradient','/bohr'):45s} {grad_full_grad_error_rmse.item():5.2f}") + #print("Grad. Test set RMSE Energy (cm-1): {:5.2f} Grad. Validation set RMSE Energy (cm-1): {:5.2f} Grad. Full dataset RMSE Energy (cm-1): {:5.2f}".format(grad_test_E_error_rmse, grad_val_E_error_rmse, grad_full_E_error_rmse)) + #print("Grad. Test set RMSE Gradient (cm-1/bohr): {:5.2f} Grad. Validation set RMSE Gradient (cm-1/bohr): {:5.2f} Grad. Full dataset RMSE Gradient (cm-1/bohr): {:5.2f}".format(grad_test_grad_error_rmse.item(), grad_val_grad_error_rmse.item(), grad_full_grad_error_rmse.item())) + #print(f"Grad. Energy Test set RMSE (cm-1): {full_grad_E_error_rmse.item():5.2f} ") + #print(f"Grad. Energy Test set RMSE (cm-1): {full_grad_E_error_rmse.item():5.2f} Gradient Error: {full_grad_grad_error_rmse.item()}") + #assert False + if return_model: + return model, test_error_rmse, val_error_rmse, full_error_rmse + else: + return test_error_rmse, val_error_rmse + + def holland(self, X, grad_vec): + # Assume PIP true, scale_X/y std, and noting else (default params of NN architecture search) + # Transfrom known gradient from Cartesian to PIP, NOT GENERALIZABLE!!! + print(grad_vec) + # Cart to dist + ndat = grad_vec.shape[0] + ncart = self.natoms*3 + nr = self.n_interatomics + X_r, B1_r, B2_r = cart_dist_B_2(X) # dr/dx + print(B1_r) + print("SVD") + print(np.linalg.svd(B1_r)[1][:,-1]) + A_r = np.zeros((ndat, ncart, nr)) + for i in range(ndat): + A_r[i,:,:] = np.linalg.pinv(B1_r[i,:,:]) + print(A_r) + print(np.dot(B1_r[0,:,:], A_r[0,:,:])) + print(np.dot(A_r[0,:,:], B1_r[0,:,:])) + # Calc. C + #C_r = np.einsum("naij,nir,njs->nars", B2_r, A_r, A_r) + + # Calc. Grad. and Hess. in interatomic dist. from Cart. + G_r = np.einsum("ni,nir->nr", grad_vec, A_r) + print(G_r) + # Dist to PIP + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + X_p, degrees, B1_p, B2_p = self.pip_B.transform(path, X_r) + npip = X_p.shape[1] + A_p = np.zeros((ndat, nr, npip)) + for i in range(ndat): + A_p[i,:,:] = np.linalg.pinv(B1_p[i,:,:]) + G_p = np.einsum("ni,nir->nr", G_r, A_p) + print(G_p) + # PIP to std PIP + X_scale = self.Xscaler.transform(X_p) + G_scale = G_p / self.yscaler.scale_ + G_scale *= self.Xscaler.scale_[None,:] + print(G_scale) + return G_scale + + def preprocess(self, params, raw_X, raw_y): + """ + Preprocess raw data according to hyperparameters + """ + #raw_X = deepcopy(raw_X_in) + if params['morse_transform']['morse']: + raw_X = morse(raw_X, params['morse_transform']['morse_alpha']) + self.raw_Xm = deepcopy(raw_X) + if params['pip']['pip']: + # find path to fundamental invariants form molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + raw_X, self.degrees = interatomics_to_fundinvar(raw_X,path) + self.raw_Xp = deepcopy(raw_X) + if params['pip']['degree_reduction']: + raw_X = degree_reduce(raw_X, self.degrees) + if params['scale_X']: + X, Xscaler = general_scaler(params['scale_X']['scale_X'], raw_X) + else: + X = raw_X + Xscaler = None + if params['scale_y']: + y, yscaler = general_scaler(params['scale_y'], raw_y) + else: + y = raw_y + yscaler = None + return X, y, Xscaler, yscaler + + def transform_grad(self, Xindices, grad, params, Xscaler=None, yscaler=None, precision=32): + if precision == 32: + dtype = torch.float32 + elif precision == 64: + dtype = torch.float64 + # Transform gradient from NN to Cartesian + scaler = torch.ones(grad.shape[1], dtype=dtype) + if yscaler: + # Multiply by stdev of E, dE/dX = dEmean/dX * sigma + if params['scale_y'] == "std": + #grad *= torch.from_numpy(yscaler.scale_) + scaler *= torch.tensor(yscaler.scale_, dtype=dtype) + else: + #grad /= torch.from_numpy(yscaler.scale_) + scaler /= torch.tensor(yscaler.scale_, dtype=dtype) + #grad *= np.sqrt(yscaler) + if Xscaler: + # Divide by stdev of X, dXmean/dX = 1/sigma + #X_std = torch.from_numpy(Xscaler.scale_[None,:]) + X_sc = torch.tensor(Xscaler.scale_, dtype=dtype) + #X = Xscaler.inverse_transform(X) + if params['scale_X']['scale_X'] == "std": + #grad *= X_std**-1 + scaler *= X_sc**-1 + else: + #grad *= X_std + scaler *= X_sc + # Rename lower grads TODO + scaled_grad = grad * scaler + if params['pip']['pip']: + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + #X, degrees, B1_p, B2_p = pip_B(path, X) + if params['pip']['degree_reduction']: + B1_dr = degree_B1(self.raw_Xp[Xindices,:], self.degrees) + #B2_dr = degree_B2(X, degrees) + scaled_grad *= torch.from_numpy(B1_dr) + if params['morse_transform']['morse']: + #X, degrees, B1_p, B2_p = pip_B(path, self.raw_Xm[Xindices,:]) + X, degrees, B1_p, B2_p = self.pip_B.transform(self.raw_Xm[Xindices,:]) + else: + #X, degrees, B1_p, B2_p = pip_B(path, self.fullraw_Xr[Xindices,:]) + X, degrees, B1_p, B2_p = self.pip_B.transform(self.fullraw_Xr[Xindices,:]) + scaled_grad = torch.einsum("np,npi->ni", scaled_grad, torch.tensor(B1_p, dtype=dtype)) + if params['morse_transform']['morse']: + B1_m = morse_B1(self.fullraw_Xr[Xindices,:], alpha=params['morse_transform']['morse_alpha']) + scaled_grad = scaled_grad * torch.tensor(B1_m, dtype=dtype) + #return scaled_grad + # r to Cart. + X_r, B1_r, B2_r = cart_dist_B_2(self.fullraw_X[Xindices]) # dr/dx + #print(B1_r[5,:]) + grad_cart = torch.einsum("np,npi->ni", scaled_grad, torch.tensor(B1_r, dtype=dtype)) + return grad_cart + + def transform_hess(self, hess, params): + # Transform Hessian from NN to Cartesian + pass diff --git a/peslearn/ml/diff_nn/utils/__init__.py b/peslearn/ml/diff_nn/utils/__init__.py new file mode 100755 index 0000000..3095ce4 --- /dev/null +++ b/peslearn/ml/diff_nn/utils/__init__.py @@ -0,0 +1,5 @@ +from . import cart_dist +from . import pip +from . import transform_deriv + +from .pip import Pip_B diff --git a/peslearn/ml/diff_nn/utils/cart_dist.py b/peslearn/ml/diff_nn/utils/cart_dist.py new file mode 100755 index 0000000..c644072 --- /dev/null +++ b/peslearn/ml/diff_nn/utils/cart_dist.py @@ -0,0 +1,33 @@ +import numpy as np + +def cart_dist_B_2(X, do_hess=False): + ndat, nvar = X.shape + natom = round(nvar/3) + X = X.reshape((ndat,natom,3)) + nr = round((natom**2 - natom) / 2) + R = np.zeros((ndat, nr)) + ctr = 0 + for atomi in range(natom): + for atomj in range(atomi): + R[:,ctr] = np.linalg.norm(X[:,atomi,:] - X[:,atomj,:], axis=1) + ctr += 1 + B1 = np.zeros((ndat, nr, nvar)) + B2 = np.zeros((ndat, nr, nvar, nvar)) + for atomi in range(natom): + for atomj in range(atomi): + r = round((atomi*(atomi-1))/2) + atomj + B1[:, r, 3*atomi:3*(atomi+1)] = np.divide((X[:,atomi,:] - X[:,atomj,:]), R[:,r][:,None]) + B1[:, r, 3*atomj:3*(atomj+1)] = -B1[:, r, 3*atomi:3*(atomi+1)] + if do_hess: + for atomi in range(natom): + for atomj in range(atomi): + r = round((atomi*(atomi-1))/2) + atomj + v = X[:, atomi, :] - X[:, atomj, :] + matt = np.einsum("ni, nj -> nij", v, v, optimize="optimal") + matt *= (R[:,r][:,None,None]**-3.0) + matt -= (np.identity(3) * (R[:,r][:,None,None]**-1.0)) + B2[:, r, 3*atomi:3*(atomi+1), 3*atomi:3*(atomi+1)] = -1.0 * matt + B2[:, r, 3*atomj:3*(atomj+1), 3*atomj:3*(atomj+1)] = -1.0 * matt + B2[:, r, 3*atomi:3*(atomi+1), 3*atomj:3*(atomj+1)] = matt + B2[:, r, 3*atomj:3*(atomj+1), 3*atomi:3*(atomi+1)] = matt + return R, B1, B2 diff --git a/peslearn/ml/diff_nn/utils/pip.py b/peslearn/ml/diff_nn/utils/pip.py new file mode 100755 index 0000000..889df59 --- /dev/null +++ b/peslearn/ml/diff_nn/utils/pip.py @@ -0,0 +1,114 @@ +import numpy as np +import sympy +import re + +class Pip_B(): + def __init__(self, fn, nbonds): + # Returns PIPs from X (n interatomic distances), degrees of PIPs, + # Grad. of PIP wrt dist., and Hess. of PIP wrt dist. x dist. + + # Read and prep PIP strings + with open(fn, 'r') as f: + data = f.read() + data = re.sub('\^', '**', data) + + # Define variables for Sympy + symbols = "" + for i in range(nbonds): + symbols += f"x{i} " + + variables = sympy.symbols(symbols) + for i in range(1, nbonds+1): + data = re.sub('x{}(\D)'.format(str(i)), 'x{}\\1'.format(i-1), data) + + # Define PIP equations for Sympy + self.polys = re.findall("\]=(.+)",data) + + # Calculate PIP first and second derivatives and associated "lambdified" functions + self.grad = [] + self.grad_lambda = [] + self.hess = [] + self.hess_lambda = [] + for p in self.polys: + lil_grad = [] + lil_grad_lambda = [] + lil_hess = [] + lil_hess_lambda = [] + for x1 in variables: + d1 = sympy.diff(p, x1) + lil_grad.append(d1) + lil_grad_lambda.append(re.sub(r"x(\d+)", r"X[:,\1]", str(d1))) + liller_hess = [] + liller_hess_lambda = [] + for x2 in variables: + d1d2 = sympy.diff(d1, x2) + liller_hess.append(d1d2) + liller_hess_lambda.append(re.sub(r"x(\d+)", r"X[:,\1]", str(d1d2))) + lil_hess.append(liller_hess) + lil_hess_lambda.append(liller_hess_lambda) + self.grad.append(lil_grad) + self.grad_lambda.append(lil_grad_lambda) + self.hess.append(lil_hess) + self.hess_lambda.append(lil_hess_lambda) + + # Determine nonzero second derivatives w.r.t. polynomial and the Hessian row (xi) + self.grad_not_zero = [] + self.grad_not_const = [] + self.grad_const = [] + self.hess_not_const = [] + self.hess_const = [] + self.hess_not_zero = [] + + for pi in range(len(self.polys)): + for xi in range(nbonds): + if self.grad[pi][xi] != 0: + if "x" in str(self.grad[pi][xi]): + self.grad_not_const.append((pi, xi)) + else: + self.grad_const.append((pi, xi)) + self.grad_not_zero.append((pi, xi)) + for xj in range(nbonds): + if self.hess[pi][xi][xj] != 0: + if "x" in str(self.hess[pi][xi][xj]): + self.hess_not_const.append((pi, xi, xj)) + else: + self.hess_const.append((pi, xi, xj)) + self.hess_not_zero.append((pi, xi, xj)) + + + def transform(self, X, do_hess=False): + ndat, nbonds = X.shape + new_X = np.zeros((ndat, len(self.polys))) + # Evaluate polynomials + for i, p in enumerate(self.polys): # evaluate each FI + # convert the FI to a python expression of raw_X, e.g. x1 + x2 becomes raw_X[:,1] + raw_X[:,2] + eval_string = re.sub(r"(x)(\d+)", r"X[:,\2]", p) + # evaluate that column's FI from columns of raw_X + new_X[:,i] = eval(eval_string) + + # Evaluate polynomial derivatives + egrad = np.zeros((ndat, len(self.polys), nbonds)) + ehess = np.zeros((ndat, len(self.polys), nbonds, nbonds)) + + for pi, xi in self.grad_not_const: + egrad[:,pi,xi] = eval(self.grad_lambda[pi][xi]) + for pi, xi in self.grad_const: + egrad[:,pi,xi] = float(self.grad[pi][xi]) + + if do_hess: + for pi, xi, xj in self.hess_not_const: + ehess[:,pi,xi,xj] = eval(self.hess_lambda[pi][xi][xj]) + for pi, xi, xj in self.hess_const: + ehess[:,pi,xi,xj] = float(self.hess[pi][xi][xj]) + + degrees = [] + for p in self.polys: + # just checking first, assumes every term in each FI polynomial has the same degree (seems to always be true) + tmp = p.split('+')[0] + # count number of exponents and number of occurances of character 'x' + exps = [int(i) - 1 for i in re.findall("\*\*(\d+)", tmp)] + ndegrees = len(re.findall("x", tmp)) + sum(exps) + degrees.append(ndegrees) + + return new_X, degrees, egrad, ehess # PIP values, degrees, B1, and B2 + diff --git a/peslearn/ml/diff_nn/utils/transform_deriv.py b/peslearn/ml/diff_nn/utils/transform_deriv.py new file mode 100755 index 0000000..3b38295 --- /dev/null +++ b/peslearn/ml/diff_nn/utils/transform_deriv.py @@ -0,0 +1,37 @@ +import numpy as np + +# Scaling functions (flattened to vectors): begin +def morse(x, alpha): + # THIS IS BIG BOOTY WRONG!!! NEEDS MINUS SIGN IN EXPONENT!!!!!!!!! TODO + return np.exp(-x/alpha) + +def morse_B1(x, alpha=1.0): + # dm/dr + return -1.0 * (alpha**-1.0) * morse(x, alpha) #TODO + +def morse_B2(x, alpha=1.0): + return (alpha**-2.0) * morse(x, alpha) + +def scale_mean_B1(x, stds): + return np.array([[stds[i] for i in range(len(stds))] for n in range(len(stds))]) + #return x.std(axis=0)**-1 + +def scale_mm_B1(x, bmin, bmax): + xmin = x.min(axis=0) + xmax = x.max(axis=0) + return (bmax-bmin)/(xmax-xmin) + +def degree_B1(x, degrees): + pwr = np.power(degrees, -1.0) - 1 + return np.divide(np.power(x, pwr), degrees) + +def degree_B2(x, degrees): + pwr = np.power(degrees, -1.0) - 2 + factor = np.power(degrees,-2.0) - np.power(degrees,-1.0) + return np.multiply(np.power(x, pwr), factor) + +def dist(x): + pass + +def ddist(x): + pass diff --git a/peslearn/ml/gaussian_process.py b/peslearn/ml/gaussian_process.py index 8c00dc7..5300875 100644 --- a/peslearn/ml/gaussian_process.py +++ b/peslearn/ml/gaussian_process.py @@ -29,7 +29,7 @@ def set_default_hyperparameters(self): Set default hyperparameter space. If none is provided, default is used. """ self.hyperparameter_space = { - 'scale_X': hp.choice('scale_X', ['std', 'mm01', 'mm11', None]), + #'scale_X': hp.choice('scale_X', ['std', 'mm01', 'mm11', None]), 'scale_y': hp.choice('scale_y', ['std', 'mm01', 'mm11', None]), } @@ -75,6 +75,7 @@ def split_train_test(self, params): self.ytest = self.y[self.test_indices] def build_model(self, params, nrestarts=10, maxit=1000, seed=0): + params['scale_X'] = 'std' print("Hyperparameters: ", params) self.split_train_test(params) np.random.seed(seed) # make GPy deterministic for a given hyperparameter config @@ -87,7 +88,11 @@ def build_model(self, params, nrestarts=10, maxit=1000, seed=0): ard_val = False kernel = RBF(dim, ARD=ard_val) # TODO add HP control of kernel self.model = GPRegression(self.Xtr, self.ytr, kernel=kernel, normalizer=False) - self.model.optimize_restarts(nrestarts, optimizer="lbfgsb", robust=True, verbose=False, max_iters=maxit, messages=False) + #self.model.optimize_restarts(nrestarts, optimizer="lbfgsb", robust=True, verbose=False, max_iters=maxit, messages=False) + self.model.optimize(optimizer="lbfgsb", max_iters=maxit, messages=False) + #TODO + err = self.vet_model(self.model) + #TODO gc.collect(2) #fixes some memory leak issues with certain BLAS configs def hyperopt_model(self, params): @@ -117,8 +122,7 @@ def vet_model(self, model): print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') print("Median error: {}".format(np.round(median_error[0],2)), end=' ') print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') - error_test_invcm = round(hartree2cm * error_test,2) - return error_test_invcm + return error_test def preprocess(self, params, raw_X, raw_y): """ @@ -135,7 +139,6 @@ def preprocess(self, params, raw_X, raw_y): raw_X, degrees = interatomics_to_fundinvar(raw_X,path) if params['pip']['degree_reduction']: raw_X = degree_reduce(raw_X, degrees) - if params['scale_X']: X, Xscaler = general_scaler(params['scale_X'], raw_X) else: @@ -157,7 +160,8 @@ def optimize_model(self): self.hyperopt_trials = Trials() self.itercount = 1 # keep track of hyperopt iterations if self.input_obj.keywords['rseed']: - rstate = np.random.RandomState(self.input_obj.keywords['rseed']) + rstate = np.random.default_rng(self.input_obj.keywords['rseed']) + #rstate = np.random.RandomState(self.input_obj.keywords['rseed']) else: rstate = None best = fmin(self.hyperopt_model, diff --git a/peslearn/ml/gpflow_gpr.py b/peslearn/ml/gpflow_gpr.py new file mode 100644 index 0000000..b0a9d0e --- /dev/null +++ b/peslearn/ml/gpflow_gpr.py @@ -0,0 +1,289 @@ +import numpy as np +import sklearn.metrics +import json +import os +import re +import sys +import gc +from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials, space_eval +# Shut up tensorflow +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any {'0', '1', '2'} +import tensorflow as tf +import gpflow +from .model import Model +from ..constants import hartree2cm, package_directory, gp_convenience_function +from ..utils.printing_helper import hyperopt_complete +from ..lib.path import fi_dir +from .data_sampler import DataSampler +from .preprocessing_helper import morse, interatomics_to_fundinvar, degree_reduce, general_scaler +import time + +class GaussianProcess(Model): + """ + Constructs a Gaussian Process Model using GPFlow + """ + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, train_path=None, test_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path) + self.set_default_hyperparameters() + + def set_default_hyperparameters(self): + """ + Set default hyperparameter space. If none is provided, default is used. + """ + self.hyperparameter_space = { + 'scale_X': hp.choice('scale_X', ['std', 'mm01', 'mm11', None]), + 'scale_y': hp.choice('scale_y', ['std', 'mm01', 'mm11', None]), + } + + if self.input_obj.keywords['pes_format'] == 'interatomics': + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': True,'morse_alpha': hp.quniform('morse_alpha', 1, 2, 0.1)},{'morse': False}])) + else: + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': False}])) + if self.pip: + val = hp.choice('pip',[{'pip': True,'degree_reduction': hp.choice('degree_reduction', [True,False])}]) + self.set_hyperparameter('pip', val) + else: + self.set_hyperparameter('pip', hp.choice('pip', [{'pip': False}])) + + if self.input_obj.keywords['gp_ard'] == 'opt': # auto relevancy determination (independant length scales for each feature) + self.set_hyperparameter('ARD', hp.choice('ARD', [True,False])) + # TODO add optional space inclusions, something like: if option: self.hyperparameter_space['newoption'] = hp.choice(..) + + def split_train_test(self, params): + """ + Take raw dataset and apply hyperparameters/input keywords/preprocessing + and train/test (tr,test) splitting. + Assigns: + self.X : complete input data, transformed + self.y : complete output data, transformed + self.Xscaler : scaling transformer for inputs + self.yscaler : scaling transformer for outputs + self.Xtr : training input data, transformed + self.ytr : training output data, transformed + self.Xtest : test input data, transformed + self.ytest : test output data, transformed + """ + self.X, self.y, self.Xscaler, self.yscaler = self.preprocess(params, self.raw_X, self.raw_y) + if self.sampler == 'user_supplied': + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + self.Xtest = self.X[self.test_indices] + self.ytest = self.y[self.test_indices] + + def build_model(self, params, nrestarts=10, maxiter=1000, seed=0): + """ + Optimizes model (with specified hyperparameters) using L-BFGS-B algorithm. Does this 'nrestarts' times and returns model with + greatest marginal log likelihood. + """ + # TODO Give user control over 'nrestarts', 'maxiter', optimization method, and kernel hyperparameter initiation. + print("********************************************\n\nHyperparameters: ", params) + self.split_train_test(params) + np.random.seed(seed) # make GPy deterministic for a given hyperparameter config + tf.random.set_seed(0) + # TODO: ARD + # Optimize model several times with random parameter initiation. This will hopefully bypass the issue with Cholesky Decomposition + models = [] + for i in range(nrestarts): + model_i, opt_i = self.init_model(seed+10*i) + try: + # Do an opt + logs = opt_i.minimize(model_i.training_loss, model_i.trainable_variables, method='L-BFGS-B', options=dict(maxiter=maxiter)) + except tf.errors.InvalidArgumentError: + print("Optimization went wild, moving on to the next iteration. This is why we do restarts.") + + else: + # Wrap things up + models.append(model_i) + + self.model = sorted(models, key = lambda x: x.log_marginal_likelihood())[-1] + gpflow.utilities.print_summary(self.model) + gc.collect(2) #fixes some memory leak issues with certain BLAS configs + + def hyperopt_model(self, params): + # skip building this model if hyperparameter combination already attempted + for i in self.hyperopt_trials.results: + if 'memo' in i: + if params == i['memo']: + return {'loss': i['loss'], 'status': STATUS_OK, 'memo': 'repeat'} + if self.itercount > self.hp_maxit: + return {'loss': 0.0, 'status': STATUS_FAIL, 'memo': 'max iters reached'} + self.build_model(params) + error_test = self.vet_model(self.model) + self.itercount += 1 + return {'loss': error_test, 'status': STATUS_OK, 'memo': params} + + def predict(self, model, data_in): + prediction, v1 = model.predict_f(data_in, full_cov=False) + return prediction + + def vet_model(self, model): + """Convenience method for getting model errors of test and full datasets""" + pred_test = self.predict(model, self.Xtest) + pred_full = self.predict(model, self.X) + error_test = self.compute_error(self.ytest, pred_test, self.yscaler) + error_full, median_error, max_errors = self.compute_error(self.y, pred_full, yscaler=self.yscaler, max_errors=5) + print("Test Dataset {}".format(round(hartree2cm * error_test,2)), end=' ') + print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') + print("Median error: {}".format(np.round(median_error[0],2)), end=' ') + print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') + return error_test + + def preprocess(self, params, raw_X, raw_y): + """ + Preprocess raw data according to hyperparameters + """ + # TODO make more flexible. If keys don't exist, ignore them. smth like "if key: if param['key']: do transform" + if params['morse_transform']['morse']: + raw_X = morse(raw_X, params['morse_transform']['morse_alpha']) # Transform to morse variables (exp(-r/alpha)) + # Transform to FIs, degree reduce if called + if params['pip']['pip']: + # find path to fundamental invariants form molecule type AxByCz... + #path = os.path.join(package_directory, "lib", self.molecule_type, "output") + path = os.path.join(fi_dir, self.molecule_type, "output") + raw_X, degrees = interatomics_to_fundinvar(raw_X,path) + if params['pip']['degree_reduction']: + raw_X = degree_reduce(raw_X, degrees) + + if params['scale_X']: + X, Xscaler = general_scaler(params['scale_X'], raw_X) + else: + X = raw_X + Xscaler = None + if params['scale_y']: + y, yscaler = general_scaler(params['scale_y'], raw_y) + else: + y = raw_y + yscaler = None + return X, y, Xscaler, yscaler + + def optimize_model(self): + print("Beginning hyperparameter optimization...") + print("Trying {} combinations of hyperparameters".format(self.hp_maxit)) + print("Training with {} points (Full dataset contains {} points).".format(self.ntrain, self.n_datapoints)) + print("Using {} training set point sampling.".format(self.sampler)) + print("Errors are root-mean-square error in wavenumbers (cm-1)") + self.hyperopt_trials = Trials() + self.itercount = 1 # keep track of hyperopt iterations + if self.input_obj.keywords['rseed'] != None: + rstate = np.random.default_rng(self.input_obj.keywords['rseed']) + #rstate = np.random.RandomState(self.input_obj.keywords['rseed']) + else: + rstate = None + best = fmin(self.hyperopt_model, + space=self.hyperparameter_space, + algo=tpe.suggest, + max_evals=self.hp_maxit*2, + rstate=rstate, + show_progressbar=False, + trials=self.hyperopt_trials) + hyperopt_complete() + print("Best performing hyperparameters are:") + final = space_eval(self.hyperparameter_space, best) + print(str(sorted(final.items()))) + self.optimal_hyperparameters = dict(final) + # obtain final model from best hyperparameters + print("Fine-tuning final model architecture...") + self.build_model(self.optimal_hyperparameters, nrestarts=10) + print("Final model performance (cm-1):") + self.test_error = self.vet_model(self.model) + self.save_model(self.optimal_hyperparameters) + + def save_model(self, params): + # Save model. + print("Saving ML model data...") + model_path = "model1_data" + while os.path.isdir(model_path): + new = int(re.findall("\d+", model_path)[0]) + 1 + model_path = re.sub("\d+",str(new), model_path) + os.mkdir(model_path) + os.chdir(model_path) + self.model.predict_f_compiled = tf.function(self.model.predict_f, input_signature=[tf.TensorSpec(shape=None, dtype = tf.float64)]) + tf.saved_model.save(self.model, './') + + with open('hyperparameters', 'w') as f: + print(params, file=f) + + if self.sampler == 'user_supplied': + self.traindata.to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.testdata.to_csv('test_set', sep=',', index=False, float_format='%12.12f') + else: + self.dataset.iloc[self.train_indices].to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.dataset.iloc[self.test_indices].to_csv('test_set', sep=',', index=False, float_format='%12.12f') + + self.dataset.to_csv('PES.dat', sep=',',index=False,float_format='%12.12f') + # write convenience function + with open('compute_energy.py', 'w+') as f: + print(self.write_convenience_function(), file=f) + + # print model performance + sys.stdout = open('performance', 'w') + self.vet_model(self.model) + sys.stdout = sys.__stdout__ + os.chdir("../") + + def transform_new_X(self, newX, params, Xscaler=None): + """ + Transform a new, raw input according to the model's transformation procedure + so that prediction can be made. + """ + # ensure X dimension is n x m (n new points, m input variables) + if len(newX.shape) == 1: + newX = np.expand_dims(newX,0) + elif len(newX.shape) > 2: + raise Exception("Dimensions of input data is incorrect.") + if params['morse_transform']['morse']: + newX = morse(newX, params['morse_transform']['morse_alpha']) + if params['pip']['pip']: + # find path to fundamental invariants for an N atom system with molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + newX, degrees = interatomics_to_fundinvar(newX,path) + if params['pip']['degree_reduction']: + newX = degree_reduce(newX, degrees) + if Xscaler: + newX = Xscaler.transform(newX) + return newX + + def transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.transform(newy) + return newy + + def inverse_transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.inverse_transform(newy) + return newy + + def write_convenience_function(self): + string = "from peslearn.ml import GaussianProcess\nfrom peslearn import InputProcessor\nimport tensorflow as tf\nimport gpflow\nimport numpy as np\nimport json\nfrom itertools import combinations\n\n" + if self.pip: + string += "gp = GaussianProcess('PES.dat', InputProcessor(''), molecule_type='{}')\n".format(self.molecule_type) + else: + string += "gp = GaussianProcess('PES.dat', InputProcessor(''))\n" + with open('hyperparameters', 'r') as f: + hyperparameters = f.read() + string += "params = {}\n".format(hyperparameters) + string += "X, y, Xscaler, yscaler = gp.preprocess(params, gp.raw_X, gp.raw_y)\n" + string += "model = tf.saved_model.load('./')\n" + string += gp_convenience_function + return string + + def init_model(self, seed): + # Initializes model and model parameters + dim = self.X.shape[1] + np.random.seed(seed) + r = np.random.rand(dim+2) + #lss = [] + #for i in range(dim): + # lss.append(r[i+2]* 10) + kernel = gpflow.kernels.RBF(variance = (r[0]*100), lengthscales = ([1.0] * dim)) + gpflow.kernels.White(variance = 10**(-8)) + model = gpflow.models.GPR(data = (self.Xtr, self.ytr), kernel = kernel) + gpflow.utilities.set_trainable(model.kernel.kernels[1].variance, False) + opt = gpflow.optimizers.Scipy() + return model, opt + diff --git a/peslearn/ml/gpflow_spgp.py b/peslearn/ml/gpflow_spgp.py new file mode 100644 index 0000000..5d8aa06 --- /dev/null +++ b/peslearn/ml/gpflow_spgp.py @@ -0,0 +1,66 @@ +import numpy as np +import tensorflow as tf +import gpflow +from .gaussian_process import GaussianProcess +import itertools +import gc + +class SVIGP(GaussianProcess): + + + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, train_path=None, test_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path) + + def build_model(self, params, nrestarts=10, maxiter=10000, seed=0, dont_do_it_jeffrey=False): + print("Hyperparameters: ", params) + # Jeffrey won't do it if he's using MFGP's + if not dont_do_it_jeffrey: + self.split_train_test(params) + np.random.seed(seed) # make GPy deterministic for a given hyperparameter config + #TODO: ARD + + self.num_inducing = 100 + self.batchsize = 200 + self.Z = self.Xtr[np.random.choice(len(self.Xtr), self.num_inducing, replace=False),:].copy() + kernel = gpflow.kernels.RBF() + gpflow.kernels.White() + self.model = gpflow.models.SVGP(kernel, gpflow.likelihoods.Gaussian(), self.Z, num_data=len(self.Xtr)) + self.elbo = tf.function(self.model.elbo) + tensor_data = tuple(map(tf.convert_to_tensor, (self.Xtr, self.ytr))) + self.elbo(tensor_data) # run it once to trace & compile + self.train_dataset = tf.data.Dataset.from_tensor_slices((self.Xtr, self.ytr)).repeat().shuffle(len(self.Xtr)) + train_iter = iter(self.train_dataset.batch(self.batchsize)) + ground_truth = self.elbo(tensor_data).numpy() + evals = [self.elbo(minibatch).numpy() for minibatch in itertools.islice(train_iter, 100)] + #gpflow.set_trainable(self.model.inducing_variable, False) + self.logf = self.run_adam(self.model, maxiter) + print(self.logf[-10:]) + gc.collect(2) #fixes some memory leak issues with certain BLAS configs + + def run_adam(self, model, iterations): + """ + Utility function running the Adam optimizer + + :param model: GPflow model + :param interations: number of iterations + """ + # Create an Adam Optimizer action + logf = [] + train_iter = iter(self.train_dataset.batch(self.batchsize)) + training_loss = model.training_loss_closure(train_iter, compile=True) + optimizer = tf.optimizers.Adam() + + @tf.function + def optimization_step(): + optimizer.minimize(training_loss, self.model.trainable_variables) + + for step in range(iterations): + optimization_step() + if step % 10 == 0: + elbo = -training_loss().numpy() + logf.append(elbo) + return logf + + def predict(self, model, data_in): + prediction, v1 = model.predict_y(data_in) + return prediction + diff --git a/peslearn/ml/gpy_mfgp.py b/peslearn/ml/gpy_mfgp.py new file mode 100644 index 0000000..5300875 --- /dev/null +++ b/peslearn/ml/gpy_mfgp.py @@ -0,0 +1,268 @@ +import numpy as np +import sklearn.metrics +import json +import os +import re +import sys +import gc +from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials, space_eval +from GPy.models import GPRegression +from GPy.kern import RBF + +from .model import Model +from ..constants import hartree2cm, package_directory, gp_convenience_function +from ..utils.printing_helper import hyperopt_complete +from ..lib.path import fi_dir +from .data_sampler import DataSampler +from .preprocessing_helper import morse, interatomics_to_fundinvar, degree_reduce, general_scaler + +class GaussianProcess(Model): + """ + Constructs a Gaussian Process Model using GPy + """ + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, train_path=None, test_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path) + self.set_default_hyperparameters() + + def set_default_hyperparameters(self): + """ + Set default hyperparameter space. If none is provided, default is used. + """ + self.hyperparameter_space = { + #'scale_X': hp.choice('scale_X', ['std', 'mm01', 'mm11', None]), + 'scale_y': hp.choice('scale_y', ['std', 'mm01', 'mm11', None]), + } + + if self.input_obj.keywords['pes_format'] == 'interatomics': + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': True,'morse_alpha': hp.quniform('morse_alpha', 1, 2, 0.1)},{'morse': False}])) + else: + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': False}])) + if self.pip: + val = hp.choice('pip',[{'pip': True,'degree_reduction': hp.choice('degree_reduction', [True,False])}]) + self.set_hyperparameter('pip', val) + else: + self.set_hyperparameter('pip', hp.choice('pip', [{'pip': False}])) + + if self.input_obj.keywords['gp_ard'] == 'opt': # auto relevancy determination (independant length scales for each feature) + self.set_hyperparameter('ARD', hp.choice('ARD', [True,False])) + #TODO add optional space inclusions, something like: if option: self.hyperparameter_space['newoption'] = hp.choice(..) + + def split_train_test(self, params): + """ + Take raw dataset and apply hyperparameters/input keywords/preprocessing + and train/test (tr,test) splitting. + Assigns: + self.X : complete input data, transformed + self.y : complete output data, transformed + self.Xscaler : scaling transformer for inputs + self.yscaler : scaling transformer for outputs + self.Xtr : training input data, transformed + self.ytr : training output data, transformed + self.Xtest : test input data, transformed + self.ytest : test output data, transformed + """ + self.X, self.y, self.Xscaler, self.yscaler = self.preprocess(params, self.raw_X, self.raw_y) + if self.sampler == 'user_supplied': + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + self.Xtest = self.X[self.test_indices] + self.ytest = self.y[self.test_indices] + + def build_model(self, params, nrestarts=10, maxit=1000, seed=0): + params['scale_X'] = 'std' + print("Hyperparameters: ", params) + self.split_train_test(params) + np.random.seed(seed) # make GPy deterministic for a given hyperparameter config + dim = self.X.shape[1] + if self.input_obj.keywords['gp_ard'] == 'opt': + ard_val = params['ARD'] + elif self.input_obj.keywords['gp_ard'] == 'true': + ard_val = True + else: + ard_val = False + kernel = RBF(dim, ARD=ard_val) # TODO add HP control of kernel + self.model = GPRegression(self.Xtr, self.ytr, kernel=kernel, normalizer=False) + #self.model.optimize_restarts(nrestarts, optimizer="lbfgsb", robust=True, verbose=False, max_iters=maxit, messages=False) + self.model.optimize(optimizer="lbfgsb", max_iters=maxit, messages=False) + #TODO + err = self.vet_model(self.model) + #TODO + gc.collect(2) #fixes some memory leak issues with certain BLAS configs + + def hyperopt_model(self, params): + # skip building this model if hyperparameter combination already attempted + for i in self.hyperopt_trials.results: + if 'memo' in i: + if params == i['memo']: + return {'loss': i['loss'], 'status': STATUS_OK, 'memo': 'repeat'} + if self.itercount > self.hp_maxit: + return {'loss': 0.0, 'status': STATUS_FAIL, 'memo': 'max iters reached'} + self.build_model(params) + error_test = self.vet_model(self.model) + self.itercount += 1 + return {'loss': error_test, 'status': STATUS_OK, 'memo': params} + + def predict(self, model, data_in): + prediction, v1 = model.predict(data_in, full_cov=False) + return prediction + + def vet_model(self, model): + """Convenience method for getting model errors of test and full datasets""" + pred_test = self.predict(model, self.Xtest) + pred_full = self.predict(model, self.X) + error_test = self.compute_error(self.ytest, pred_test, self.yscaler) + error_full, median_error, max_errors = self.compute_error(self.y, pred_full, yscaler=self.yscaler, max_errors=5) + print("Test Dataset {}".format(round(hartree2cm * error_test,2)), end=' ') + print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') + print("Median error: {}".format(np.round(median_error[0],2)), end=' ') + print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') + return error_test + + def preprocess(self, params, raw_X, raw_y): + """ + Preprocess raw data according to hyperparameters + """ + # TODO make more flexible. If keys don't exist, ignore them. smth like "if key: if param['key']: do transform" + if params['morse_transform']['morse']: + raw_X = morse(raw_X, params['morse_transform']['morse_alpha']) # Transform to morse variables (exp(-r/alpha)) + # Transform to FIs, degree reduce if called + if params['pip']['pip']: + # find path to fundamental invariants form molecule type AxByCz... + #path = os.path.join(package_directory, "lib", self.molecule_type, "output") + path = os.path.join(fi_dir, self.molecule_type, "output") + raw_X, degrees = interatomics_to_fundinvar(raw_X,path) + if params['pip']['degree_reduction']: + raw_X = degree_reduce(raw_X, degrees) + if params['scale_X']: + X, Xscaler = general_scaler(params['scale_X'], raw_X) + else: + X = raw_X + Xscaler = None + if params['scale_y']: + y, yscaler = general_scaler(params['scale_y'], raw_y) + else: + y = raw_y + yscaler = None + return X, y, Xscaler, yscaler + + def optimize_model(self): + print("Beginning hyperparameter optimization...") + print("Trying {} combinations of hyperparameters".format(self.hp_maxit)) + print("Training with {} points (Full dataset contains {} points).".format(self.ntrain, self.n_datapoints)) + print("Using {} training set point sampling.".format(self.sampler)) + print("Errors are root-mean-square error in wavenumbers (cm-1)") + self.hyperopt_trials = Trials() + self.itercount = 1 # keep track of hyperopt iterations + if self.input_obj.keywords['rseed']: + rstate = np.random.default_rng(self.input_obj.keywords['rseed']) + #rstate = np.random.RandomState(self.input_obj.keywords['rseed']) + else: + rstate = None + best = fmin(self.hyperopt_model, + space=self.hyperparameter_space, + algo=tpe.suggest, + max_evals=self.hp_maxit*2, + rstate=rstate, + show_progressbar=False, + trials=self.hyperopt_trials) + hyperopt_complete() + print("Best performing hyperparameters are:") + final = space_eval(self.hyperparameter_space, best) + print(str(sorted(final.items()))) + self.optimal_hyperparameters = dict(final) + # obtain final model from best hyperparameters + print("Fine-tuning final model architecture...") + self.build_model(self.optimal_hyperparameters, nrestarts=10, maxit=1000) + print("Final model performance (cm-1):") + self.test_error = self.vet_model(self.model) + self.save_model(self.optimal_hyperparameters) + + def save_model(self, params): + # Save model. Currently GPy requires saving training data in model for some reason. + model_dict = self.model.to_dict(save_data=True) + print("Saving ML model data...") + model_path = "model1_data" + while os.path.isdir(model_path): + new = int(re.findall("\d+", model_path)[0]) + 1 + model_path = re.sub("\d+",str(new), model_path) + os.mkdir(model_path) + os.chdir(model_path) + with open('model.json', 'w') as f: + json.dump(model_dict, f) + with open('hyperparameters', 'w') as f: + print(params, file=f) + + if self.sampler == 'user_supplied': + self.traindata.to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.testdata.to_csv('test_set', sep=',', index=False, float_format='%12.12f') + else: + self.dataset.iloc[self.train_indices].to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.dataset.iloc[self.test_indices].to_csv('test_set', sep=',', index=False, float_format='%12.12f') + + self.dataset.to_csv('PES.dat', sep=',',index=False,float_format='%12.12f') + # write convenience function + with open('compute_energy.py', 'w+') as f: + print(self.write_convenience_function(), file=f) + + # print model performance + sys.stdout = open('performance', 'w') + self.vet_model(self.model) + sys.stdout = sys.__stdout__ + os.chdir("../") + + def transform_new_X(self, newX, params, Xscaler=None): + """ + Transform a new, raw input according to the model's transformation procedure + so that prediction can be made. + """ + # ensure X dimension is n x m (n new points, m input variables) + if len(newX.shape) == 1: + newX = np.expand_dims(newX,0) + elif len(newX.shape) > 2: + raise Exception("Dimensions of input data is incorrect.") + if params['morse_transform']['morse']: + newX = morse(newX, params['morse_transform']['morse_alpha']) + if params['pip']['pip']: + # find path to fundamental invariants for an N atom system with molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + newX, degrees = interatomics_to_fundinvar(newX,path) + if params['pip']['degree_reduction']: + newX = degree_reduce(newX, degrees) + if Xscaler: + newX = Xscaler.transform(newX) + return newX + + def transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.transform(newy) + return newy + + def inverse_transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.inverse_transform(newy) + return newy + + def write_convenience_function(self): + string = "from peslearn.ml import GaussianProcess\nfrom peslearn import InputProcessor\nfrom GPy.core.model import Model\nimport numpy as np\nimport json\nfrom itertools import combinations\n\n" + if self.pip: + string += "gp = GaussianProcess('PES.dat', InputProcessor(''), molecule_type='{}')\n".format(self.molecule_type) + else: + string += "gp = GaussianProcess('PES.dat', InputProcessor(''))\n" + with open('hyperparameters', 'r') as f: + hyperparameters = f.read() + string += "params = {}\n".format(hyperparameters) + string += "X, y, Xscaler, yscaler = gp.preprocess(params, gp.raw_X, gp.raw_y)\n" + string += "model = Model('mymodel')\n" + string += "with open('model.json', 'r') as f:\n" + string += " model_dict = json.load(f)\n" + string += "final = model.from_dict(model_dict)\n\n" + string += gp_convenience_function + return string + + diff --git a/peslearn/ml/gpytorch_gpr.py b/peslearn/ml/gpytorch_gpr.py new file mode 100644 index 0000000..aecff78 --- /dev/null +++ b/peslearn/ml/gpytorch_gpr.py @@ -0,0 +1,336 @@ +import numpy as np +import sklearn.metrics +import json +import os +import re +import sys +import gc +from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials, space_eval +import torch +import gpytorch +from .model import Model +from ..constants import hartree2cm, package_directory, gp_convenience_function +from ..utils.printing_helper import hyperopt_complete +from ..lib.path import fi_dir +from .data_sampler import DataSampler +from .preprocessing_helper import morse, interatomics_to_fundinvar, degree_reduce, general_scaler +import time + +class GPR(gpytorch.models.ExactGP): + def __init__(self, train_x, train_y, likelihood): + super(GPR, self).__init__(train_x, train_y, likelihood) + self.mean = gpytorch.means.ConstantMean() + self.kernel = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims = train_x.size()[1])) # + self.white_noise_module(train_x)) #This assume Xdata is Kij with Xdim len(j) + + def forward(self, x): + mean_x = self.mean(x) + kernel_x = self.kernel(x) + return gpytorch.distributions.MultivariateNormal(mean_x, kernel_x) + +class GaussianProcess(Model): + """ + Constructs a Gaussian Process Model using GPyTorch + """ + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, train_path=None, test_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path) + self.set_default_hyperparameters() + + def set_default_hyperparameters(self): + """ + Set default hyperparameter space. If none is provided, default is used. + """ + self.hyperparameter_space = { + 'scale_X': hp.choice('scale_X', ['std', 'mm01', 'mm11', None]), + 'scale_y': hp.choice('scale_y', ['std', 'mm01', 'mm11', None]), + } + + if self.input_obj.keywords['pes_format'] == 'interatomics': + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': True,'morse_alpha': hp.quniform('morse_alpha', 1, 2, 0.1)},{'morse': False}])) + else: + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': False}])) + if self.pip: + val = hp.choice('pip',[{'pip': True,'degree_reduction': hp.choice('degree_reduction', [True,False])}]) + self.set_hyperparameter('pip', val) + else: + self.set_hyperparameter('pip', hp.choice('pip', [{'pip': False}])) + + if self.input_obj.keywords['gp_ard'] == 'opt': # auto relevancy determination (independant length scales for each feature) + self.set_hyperparameter('ARD', hp.choice('ARD', [True,False])) + # TODO add optional space inclusions, something like: if option: self.hyperparameter_space['newoption'] = hp.choice(..) + + def split_train_test(self, params, precision=64): + """ + Take raw dataset and apply hyperparameters/input keywords/preprocessing + and train/test (tr,test) splitting. + Assigns: + self.X : complete input data, transformed + self.y : complete output data, transformed + self.Xscaler : scaling transformer for inputs + self.yscaler : scaling transformer for outputs + self.Xtr : training input data, transformed + self.ytr : training output data, transformed + self.Xtest : test input data, transformed + self.ytest : test output data, transformed + """ + self.X, self.y, self.Xscaler, self.yscaler = self.preprocess(params, self.raw_X, self.raw_y) + if self.sampler == 'user_supplied': + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + self.Xtest = self.X[self.test_indices] + self.ytest = self.y[self.test_indices] + + # convert to Torch Tensors + if precision == 32: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float32) + self.ytr = torch.tensor(self.ytr, dtype=torch.float32) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float32) + self.ytest = torch.tensor(self.ytest, dtype=torch.float32) + self.X = torch.tensor(self.X,dtype=torch.float32) + self.y = torch.tensor(self.y,dtype=torch.float32) + elif precision == 64: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float64) + self.ytr = torch.tensor(self.ytr, dtype=torch.float64) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float64) + self.ytest = torch.tensor(self.ytest, dtype=torch.float64) + self.X = torch.tensor(self.X,dtype=torch.float64) + self.y = torch.tensor(self.y,dtype=torch.float64) + else: + raise Exception("Invalid option for 'precision'") + #momba = 100 + #self.Xtr *= momba + #self.ytr *= momba + #self.Xtest *= momba + #self.ytest *= momba + #self.ytr = self.ytr.squeeze() + #self.ytest = self.ytest.squeeze() + #self.y = self.y.squeeze() + def build_model(self, params, nrestarts=10, maxiter=1000, seed=0): + """ + Optimizes model (with specified hyperparameters) using L-BFGS-B algorithm. Does this 'nrestarts' times and returns model with + greatest marginal log likelihood. + """ + # TODO Give user control over 'nrestarts', 'maxiter', optimization method, and kernel hyperparameter initiation. + params['scale_X'] = 'std' + print("********************************************\n\nHyperparameters: ", params) + self.split_train_test(params) + #np.random.seed(seed) # make GPy deterministic for a given hyperparameter config + self.likelihood = gpytorch.likelihoods.GaussianLikelihood() + self.model = GPR(self.Xtr, self.ytr.squeeze(), self.likelihood) + self.likelihood.train() + self.model.train() + + self.opt = torch.optim.Adam(self.model.parameters(), lr=0.1) + #self.opt = torch.optim.LBFGS(self.model.parameters(), max_iter=20) + mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model) + for i in range(maxiter): + def closure(): + self.opt.zero_grad() + out = self.model(self.Xtr) + loss = -mll(out, torch.squeeze(self.ytr)) + #print(f'Iter {i + 1}/{maxiter} - Loss: {loss.item()} lengthscale: {self.model.kernel.base_kernel.lengthscale.detach().numpy()}, variance: {self.model.kernel.outputscale.item()}, noise: {self.model.likelihood.noise.item()}') + loss.backward() + #print(f'Iter {i + 1}/{maxiter} - Loss: {loss.item()} lengthscale: {self.model.kernel.base_kernel.lengthscale.detach().numpy()}, variance: {self.model.kernel.outputscale.item()}, noise: {self.model.likelihood.noise.item()}') + return loss + self.opt.step(closure) + + self.model.eval() + self.likelihood.eval() + gc.collect(2) #fixes some memory leak issues with certain BLAS configs + + def hyperopt_model(self, params): + # skip building this model if hyperparameter combination already attempted + for i in self.hyperopt_trials.results: + if 'memo' in i: + if params == i['memo']: + return {'loss': i['loss'], 'status': STATUS_OK, 'memo': 'repeat'} + if self.itercount > self.hp_maxit: + return {'loss': 0.0, 'status': STATUS_FAIL, 'memo': 'max iters reached'} + self.build_model(params) + error_test = self.vet_model(self.model) + self.itercount += 1 + return {'loss': error_test, 'status': STATUS_OK, 'memo': params} + + def predict(self, model, data_in): + xpred_dataloader = torch.utils.data.DataLoader(data_in, batch_size = 1024, shuffle = False) + prediction = torch.tensor([0.]) + with torch.no_grad(), gpytorch.settings.fast_pred_var(): + for x_batch in xpred_dataloader: + pred = model(x_batch).mean.unsqueeze(1) + prediction = torch.cat([prediction, pred.squeeze(-1)]) + return prediction[1:].unsqueeze(1) + #with torch.no_grad(), gpytorch.settings.fast_pred_var(): + # prediction = model(data_in).mean.unsqueeze(1) + #return prediction + + def vet_model(self, model): + #pred_test = self.predict(model, self.model_l, self.Xtest) + pred_full = self.predict(model, self.X) + #error_test = self.compute_error(self.ytest.squeeze(), pred_test, self.yscaler) + error_full, median_error, max_errors = self.compute_error(self.y.squeeze(0), pred_full, yscaler=self.yscaler, max_errors=5) + #print("Test Dataset {}".format(round(hartree2cm * error_test,2)), end=' ') + print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') + print("Median error: {}".format(np.round(median_error,2)), end=' ') + print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') + print("-"*128) + return error_full # was test + #"""Convenience method for getting model errors of test and full datasets""" + #pred_test = self.predict(model, self.Xtest) + #pred_full = self.predict(model, self.X) + #error_test = self.compute_error(self.ytest, pred_test, self.yscaler) + #error_full, median_error, max_errors = self.compute_error(self.y, pred_full, yscaler=self.yscaler, max_errors=5) + #print("Test Dataset {}".format(round(hartree2cm * error_test,2)), end=' ') + #print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') + #print("Median error: {}".format(np.round(median_error[0],2)), end=' ') + #print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') + #return error_test + + def preprocess(self, params, raw_X, raw_y): + """ + Preprocess raw data according to hyperparameters + """ + # Add artificial noise in data to prevent numerical instabilities. + #bunkbed = raw_y.shape + #raw_y = raw_y + np.random.rand(bunkbed[0], bunkbed[1])*1e-6 + + # TODO make more flexible. If keys don't exist, ignore them. smth like "if key: if param['key']: do transform" + if params['morse_transform']['morse']: + raw_X = morse(raw_X, params['morse_transform']['morse_alpha']) # Transform to morse variables (exp(-r/alpha)) + # Transform to FIs, degree reduce if called + if params['pip']['pip']: + # find path to fundamental invariants form molecule type AxByCz... + #path = os.path.join(package_directory, "lib", self.molecule_type, "output") + path = os.path.join(fi_dir, self.molecule_type, "output") + raw_X, degrees = interatomics_to_fundinvar(raw_X,path) + if params['pip']['degree_reduction']: + raw_X = degree_reduce(raw_X, degrees) + + if params['scale_X']: + X, Xscaler = general_scaler(params['scale_X'], raw_X) + else: + X = raw_X + Xscaler = None + if params['scale_y']: + y, yscaler = general_scaler(params['scale_y'], raw_y) + else: + y = raw_y + yscaler = None + return X, y, Xscaler, yscaler + + def optimize_model(self): + print("Beginning hyperparameter optimization...") + print("Trying {} combinations of hyperparameters".format(self.hp_maxit)) + print("Training with {} points (Full dataset contains {} points).".format(self.ntrain, self.n_datapoints)) + print("Using {} training set point sampling.".format(self.sampler)) + print("Errors are root-mean-square error in wavenumbers (cm-1)") + self.hyperopt_trials = Trials() + self.itercount = 1 # keep track of hyperopt iterations + if self.input_obj.keywords['rseed'] != None: + rstate = np.random.default_rng(self.input_obj.keywords['rseed']) + #rstate = np.random.RandomState(self.input_obj.keywords['rseed']) + else: + rstate = None + best = fmin(self.hyperopt_model, + space=self.hyperparameter_space, + algo=tpe.suggest, + max_evals=self.hp_maxit*2, + rstate=rstate, + show_progressbar=False, + trials=self.hyperopt_trials) + hyperopt_complete() + print("Best performing hyperparameters are:") + final = space_eval(self.hyperparameter_space, best) + print(str(sorted(final.items()))) + self.optimal_hyperparameters = dict(final) + # obtain final model from best hyperparameters + print("Fine-tuning final model architecture...") + self.build_model(self.optimal_hyperparameters, nrestarts=10) + print("Final model performance (cm-1):") + self.test_error = self.vet_model(self.model) + self.save_model(self.optimal_hyperparameters) + + def save_model(self, params): + # Save model. + print("Saving ML model data...") + model_path = "model1_data" + while os.path.isdir(model_path): + new = int(re.findall("\d+", model_path)[0]) + 1 + model_path = re.sub("\d+",str(new), model_path) + os.mkdir(model_path) + os.chdir(model_path) + + torch.save(self.model.state_dict(), 'model_state.pth') + + with open('hyperparameters', 'w') as f: + print(params, file=f) + + if self.sampler == 'user_supplied': + self.traindata.to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.testdata.to_csv('test_set', sep=',', index=False, float_format='%12.12f') + else: + self.dataset.iloc[self.train_indices].to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.dataset.iloc[self.test_indices].to_csv('test_set', sep=',', index=False, float_format='%12.12f') + + self.dataset.to_csv('PES.dat', sep=',',index=False,float_format='%12.12f') + # write convenience function + with open('compute_energy.py', 'w+') as f: + print(self.write_convenience_function(), file=f) + + # print model performance + sys.stdout = open('performance', 'w') + self.vet_model(self.model) + sys.stdout = sys.__stdout__ + os.chdir("../") + + def transform_new_X(self, newX, params, Xscaler=None): + """ + Transform a new, raw input according to the model's transformation procedure + so that prediction can be made. + """ + # ensure X dimension is n x m (n new points, m input variables) + if len(newX.shape) == 1: + newX = np.expand_dims(newX,0) + elif len(newX.shape) > 2: + raise Exception("Dimensions of input data is incorrect.") + if params['morse_transform']['morse']: + newX = morse(newX, params['morse_transform']['morse_alpha']) + if params['pip']['pip']: + # find path to fundamental invariants for an N atom system with molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + newX, degrees = interatomics_to_fundinvar(newX,path) + if params['pip']['degree_reduction']: + newX = degree_reduce(newX, degrees) + if Xscaler: + newX = Xscaler.transform(newX) + return newX + + def transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.transform(newy) + return newy + + def inverse_transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.inverse_transform(newy) + return newy + + def write_convenience_function(self): + # TODO + string = "from peslearn.ml import GaussianProcess\nfrom peslearn import InputProcessor\nimport tensorflow as tf\nimport gpflow\nimport numpy as np\nimport json\nfrom itertools import combinations\n\n" + if self.pip: + string += "gp = GaussianProcess('PES.dat', InputProcessor(''), molecule_type='{}')\n".format(self.molecule_type) + else: + string += "gp = GaussianProcess('PES.dat', InputProcessor(''))\n" + with open('hyperparameters', 'r') as f: + hyperparameters = f.read() + string += "params = {}\n".format(hyperparameters) + string += "X, y, Xscaler, yscaler = gp.preprocess(params, gp.raw_X, gp.raw_y)\n" + string += "model = tf.saved_model.load('./')\n" + string += gp_convenience_function + return string + diff --git a/peslearn/ml/mfgp.py b/peslearn/ml/mfgp.py new file mode 100644 index 0000000..179f0ff --- /dev/null +++ b/peslearn/ml/mfgp.py @@ -0,0 +1,209 @@ +import numpy as np +from sklearn.utils import shuffle +import torch +import gpytorch +from gpytorch.kernels import ScaleKernel, RBFKernel +from .gpytorch_gpr import GaussianProcess +import itertools +import gc +from ..constants import hartree2cm + +class SVI(gpytorch.models.ApproximateGP): + def __init__(self, train_x, train_y, inducing_points): + variational_distribution = gpytorch.variational.TrilNaturalVariationalDistribution(inducing_points.size(0)) + variational_strategy = gpytorch.variational.VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) + #variational_strategy = gpytorch.variational.CiqVariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) + super(SVI, self).__init__(variational_strategy) + self.mean = gpytorch.means.ConstantMean() + self.covar = ScaleKernel(RBFKernel(ard_num_dims = train_x.size(1))) + def forward(self, x): + mean_x = self.mean(x) + covar_x = self.covar(x) + #np.savetxt('/home/smg13363/GPR_PES/gpytorch_test_space/spgp/benchmarks/array.dat', covar_x.detach().numpy()) + return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + +class GP(gpytorch.models.ExactGP): + def __init__(self, train_x, train_y, likelihood): + super(GP, self).__init__(train_x, train_y, likelihood) + self.mean = gpytorch.means.ConstantMean() + self.covar = ScaleKernel(RBFKernel(ard_num_dims=1, active_dims=(1))) * ScaleKernel(RBFKernel(ard_num_dims=train_x.size()[1])) + ScaleKernel(RBFKernel(ard_num_dims=train_x.size()[1])) + + def forward(self, x): + mean_x = self.mean(x) + kernel_x = self.covar(x) + return gpytorch.distributions.MultivariateNormal(mean_x, kernel_x) + + +class MFGP(GaussianProcess): + def __init__(self, dataset_path, lf_dataset_path, input_obj, input_obj_l, molecule_type=None, molecule=None, train_path=None, test_path=None, train_path_low=None, test_path_low=None, epochs=(100,100), num_inducing=50, batchsize=100): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path) + self.m_low = GaussianProcess(lf_dataset_path, input_obj_l, molecule_type, molecule, train_path_low, test_path_low) + torch.set_default_tensor_type(torch.DoubleTensor) + #gpytorch.settings.tridiagonal_jitter(1e-5) + torch.set_default_dtype(torch.float64) + #gpytorch.settings.lazily_evaluate_kernels(False) + self.epochs_h = epochs[1] + self.epochs_l = epochs[0] + self.num_inducing = num_inducing + self.batchsize = batchsize + + """ + Process LF and HF data + Build models simultaneously, LF the HF + Vet based on HF model + Win? + """ + + def split_train_test(self, params, precision=64): + """ + Take raw dataset and apply hyperparameters/input keywords/preprocessing + and train/test (tr,test) splitting. + Assigns: + self.X : complete input data, transformed + self.y : complete output data, transformed + self.Xscaler : scaling transformer for inputs + self.yscaler : scaling transformer for outputs + self.Xtr : training input data, transformed + self.ytr : training output data, transformed + self.Xtest : test input data, transformed + self.ytest : test output data, transformed + """ + self.X, self.y, self.Xscaler, self.yscaler = self.preprocess(params, self.raw_X, self.raw_y) + self.X_l, self.y_l, self.Xscaler_l, self.yscaler_l = self.preprocess(params, self.m_low.raw_X, self.m_low.raw_y) + if self.sampler == 'user_supplied': + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + self.Xtest = self.X[self.test_indices] + self.ytest = self.y[self.test_indices] + + if self.m_low.sampler == 'user_supplied': + self.Xtr_l = self.transform_new_X(self.m_low.raw_Xtr, params, self.Xscaler_l) + self.ytr_l = self.transform_new_y(self.m_low.raw_ytr, self.yscaler_l) + self.Xtest_l = self.transform_new_X(self.m_low.raw_Xtest, params, self.Xscaler_l) + self.ytest_l = self.transform_new_y(self.m_low.raw_ytest, self.yscaler_l) + else: + self.Xtr_l = self.X_l[self.m_low.train_indices] + self.ytr_l = self.y_l[self.m_low.train_indices] + self.Xtest_l = self.X_l[self.m_low.test_indices] + self.ytest_l = self.y_l[self.m_low.test_indices] + + # convert to Torch Tensors + if precision == 32: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float32) + self.ytr = torch.tensor(self.ytr, dtype=torch.float32) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float32) + self.ytest = torch.tensor(self.ytest, dtype=torch.float32) + self.X = torch.tensor(self.X, dtype=torch.float32) + self.y = torch.tensor(self.y, dtype=torch.float32) + + self.Xtr_l = torch.tensor(self.Xtr_l, dtype=torch.float32) + self.ytr_l = torch.tensor(self.ytr_l, dtype=torch.float32) + self.Xtest_l = torch.tensor(self.Xtest_l, dtype=torch.float32) + self.ytest_l = torch.tensor(self.ytest_l, dtype=torch.float32) + self.X_l = torch.tensor(self.X_l, dtype=torch.float32) + self.y_l = torch.tensor(self.y_l, dtype=torch.float32) + + elif precision == 64: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float64) + self.ytr = torch.tensor(self.ytr, dtype=torch.float64) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float64) + self.ytest = torch.tensor(self.ytest, dtype=torch.float64) + self.X = torch.tensor(self.X, dtype=torch.float64) + self.y = torch.tensor(self.y, dtype=torch.float64) + + self.Xtr_l = torch.tensor(self.Xtr_l, dtype=torch.float64) + self.ytr_l = torch.tensor(self.ytr_l, dtype=torch.float64) + self.Xtest_l = torch.tensor(self.Xtest_l, dtype=torch.float64) + self.ytest_l = torch.tensor(self.ytest_l, dtype=torch.float64) + self.X_l = torch.tensor(self.X_l, dtype=torch.float64) + self.y_l = torch.tensor(self.y_l, dtype=torch.float64) + + else: + raise Exception("Invalid option for 'precision'") + + + + def build_model(self, params, nrestarts=10, maxiter=1000, seed=0): + self.split_train_test(params) + np.random.seed(seed) # make GPy deterministic for a given hyperparameter config + print("\n") + print("-"*128) + print(f"\nParams: \n{params}") + # LF Training + self.Z = self.X_l[:self.num_inducing,:] + train_l_ds = torch.utils.data.TensorDataset(self.Xtr_l, self.ytr_l) + train_l_loader = torch.utils.data.DataLoader(train_l_ds, batch_size = self.batchsize, shuffle=True) + self.likelihood_l = gpytorch.likelihoods.GaussianLikelihood() + self.model_l = SVI(self.Xtr_l, self.ytr_l, inducing_points = self.Z) + self.model_l.train() + self.likelihood_l.train() + opt_ngd = gpytorch.optim.NGD(self.model_l.variational_parameters(), num_data=self.ytr_l.size(0), lr=0.01) + opt_hyp = torch.optim.Adam([{'params':self.model_l.parameters()}, {'params':self.likelihood_l.parameters()}], lr=0.01) + mll_l = gpytorch.mlls.VariationalELBO(self.likelihood_l, self.model_l, num_data=self.ytr_l.size(0)) + for i in range(self.epochs_l): + for x_batch, y_batch in train_l_loader: + opt_ngd.zero_grad() + opt_hyp.zero_grad() + out = self.model_l(x_batch) + loss = -mll_l(out, y_batch.squeeze()) + loss.backward() + opt_ngd.step() + opt_hyp.step() + print('\nLF Training Done') + self.model_l.eval() + self.likelihood_l.eval() + + # HF Training + + with torch.no_grad(), gpytorch.settings.fast_pred_var(): + mean_low = self.model_l(self.Xtr).mean.unsqueeze(-1) + + xx = torch.hstack((self.Xtr.squeeze(0), mean_low.squeeze(0))) + self.likelihood = gpytorch.likelihoods.GaussianLikelihood() + self.model = GP(xx, self.ytr.squeeze(), self.likelihood) + self.likelihood.train() + self.model.train() + opt = torch.optim.Adam(self.model.parameters(), lr=0.1) + mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model) + for i in range(self.epochs_h): + opt.zero_grad() + out = self.model(xx) + loss = -mll(out, torch.squeeze(self.ytr)) + loss.backward() + opt.step() + print('HF Training Done\n') + self.model.eval() + self.likelihood.eval() + gc.collect(2) #fixes some memory leak issues with certain BLAS configs + + + def predict(self, hf_model, lf_model, x_in): + #xpred_dataset = torch.utils.data.TensorDataset(x_in) + xpred_dataloader = torch.utils.data.DataLoader(x_in, batch_size = 1024, shuffle = False) + prediction = torch.tensor([0.]) + with torch.no_grad(), gpytorch.settings.fast_pred_var(): + for x_batch in xpred_dataloader: + lf_pred = lf_model(x_batch).mean.unsqueeze(-1) + xx = torch.hstack((x_batch, lf_pred.squeeze(0))) + hfpred = hf_model(xx).mean.unsqueeze(1) + prediction = torch.cat([prediction, hfpred.squeeze(-1)]) + return prediction[1:].unsqueeze(1) + + def vet_model(self, model): + """Convenience method for getting model errors of test and full datasets""" + #pred_test = self.predict(model, self.model_l, self.Xtest) + pred_full = self.predict(model, self.model_l, self.X) + #error_test = self.compute_error(self.ytest.squeeze(), pred_test, self.yscaler) + error_full, median_error, max_errors = self.compute_error(self.y.squeeze(0), pred_full, yscaler=self.yscaler, max_errors=5) + #print("Test Dataset {}".format(round(hartree2cm * error_test,2)), end=' ') + print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') + print("Median error: {}".format(np.round(median_error,2)), end=' ') + print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') + print("-"*128) + return error_full # was test + diff --git a/peslearn/ml/mfgp_nsp.py b/peslearn/ml/mfgp_nsp.py new file mode 100644 index 0000000..b7b4bab --- /dev/null +++ b/peslearn/ml/mfgp_nsp.py @@ -0,0 +1,235 @@ +import numpy as np +import torch +import gpytorch +from gpytorch.kernels import ScaleKernel, RBFKernel +from .gpytorch_gpr import GaussianProcess +import itertools +import gc +from ..constants import hartree2cm + +class SVI(gpytorch.models.ApproximateGP): + def __init__(self, train_x, train_y, inducing_points): + variational_distribution = gpytorch.variational.TrilNaturalVariationalDistribution(inducing_points.size(0)) + variational_strategy = gpytorch.variational.VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) + #variational_strategy = gpytorch.variational.CiqVariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) + super(SVI, self).__init__(variational_strategy) + self.mean = gpytorch.means.ConstantMean() + self.covar = ScaleKernel(RBFKernel(ard_num_dims = train_x.size(1))) + def forward(self, x): + mean_x = self.mean(x) + covar_x = self.covar(x) + #np.savetxt('/home/smg13363/GPR_PES/gpytorch_test_space/spgp/benchmarks/array.dat', covar_x.detach().numpy()) + return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + +class LFGP(gpytorch.models.ExactGP): + def __init__(self, train_x, train_y, likelihood): + super(LFGP, self).__init__(train_x, train_y, likelihood) + self.mean_module = gpytorch.means.ConstantMean() + self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) + + def forward(self, x): + mean_x = self.mean_module(x) + covar_x = self.covar_module(x) + return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + +class GP(gpytorch.models.ExactGP): + def __init__(self, train_x, train_y, likelihood): + super(GP, self).__init__(train_x, train_y, likelihood) + self.mean = gpytorch.means.ConstantMean() + self.covar = ScaleKernel(RBFKernel(active_dims=(1))) * ScaleKernel(RBFKernel() + ScaleKernel(RBFKernel())) + #self.covar = ScaleKernel(RBFKernel(ard_num_dims=1, active_dims=(1))) * ScaleKernel(RBFKernel(ard_num_dims=train_x.size()[1])) + ScaleKernel(RBFKernel(ard_num_dims=train_x.size()[1])) + + def forward(self, x): + mean_x = self.mean(x) + kernel_x = self.covar(x) + return gpytorch.distributions.MultivariateNormal(mean_x, kernel_x) + + +class MFGP_NSP(GaussianProcess): + def __init__(self, dataset_path, lf_dataset_path, input_obj, input_obj_l, molecule_type=None, molecule=None, train_path=None, test_path=None, train_path_low=None, test_path_low=None, epochs=(100,100)): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path) + self.m_low = GaussianProcess(lf_dataset_path, input_obj_l, molecule_type, molecule, train_path_low, test_path_low) + torch.set_default_tensor_type(torch.DoubleTensor) + #gpytorch.settings.tridiagonal_jitter(1e-5) + torch.set_default_dtype(torch.float64) + #gpytorch.settings.lazily_evaluate_kernels(False) + self.epochs_h = epochs[1] + self.epochs_l = epochs[0] + + """ + Process LF and HF data + Build models simultaneously, LF the HF + Vet based on HF model + Win? + """ + + def split_train_test(self, params, precision=64): + """ + Take raw dataset and apply hyperparameters/input keywords/preprocessing + and train/test (tr,test) splitting. + Assigns: + self.X : complete input data, transformed + self.y : complete output data, transformed + self.Xscaler : scaling transformer for inputs + self.yscaler : scaling transformer for outputs + self.Xtr : training input data, transformed + self.ytr : training output data, transformed + self.Xtest : test input data, transformed + self.ytest : test output data, transformed + """ + self.X, self.y, self.Xscaler, self.yscaler = self.preprocess(params, self.raw_X, self.raw_y) + self.X_l, self.y_l, self.Xscaler_l, self.yscaler_l = self.preprocess(params, self.m_low.raw_X, self.m_low.raw_y) + if self.sampler == 'user_supplied': + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + self.Xtest = self.X[self.test_indices] + self.ytest = self.y[self.test_indices] + + if self.m_low.sampler == 'user_supplied': + self.Xtr_l = self.transform_new_X(self.m_low.raw_Xtr, params, self.Xscaler_l) + self.ytr_l = self.transform_new_y(self.m_low.raw_ytr, self.yscaler_l) + self.Xtest_l = self.transform_new_X(self.m_low.raw_Xtest, params, self.Xscaler_l) + self.ytest_l = self.transform_new_y(self.m_low.raw_ytest, self.yscaler_l) + else: + self.Xtr_l = self.X_l[self.m_low.train_indices] + self.ytr_l = self.y_l[self.m_low.train_indices] + self.Xtest_l = self.X_l[self.m_low.test_indices] + self.ytest_l = self.y_l[self.m_low.test_indices] + + # convert to Torch Tensors + if precision == 32: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float32) + self.ytr = torch.tensor(self.ytr, dtype=torch.float32) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float32) + self.ytest = torch.tensor(self.ytest, dtype=torch.float32) + self.X = torch.tensor(self.X, dtype=torch.float32) + self.y = torch.tensor(self.y, dtype=torch.float32) + + self.Xtr_l = torch.tensor(self.Xtr_l, dtype=torch.float32) + self.ytr_l = torch.tensor(self.ytr_l, dtype=torch.float32) + self.Xtest_l = torch.tensor(self.Xtest_l, dtype=torch.float32) + self.ytest_l = torch.tensor(self.ytest_l, dtype=torch.float32) + self.X_l = torch.tensor(self.X_l, dtype=torch.float32) + self.y_l = torch.tensor(self.y_l, dtype=torch.float32) + + elif precision == 64: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float64) + self.ytr = torch.tensor(self.ytr, dtype=torch.float64) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float64) + self.ytest = torch.tensor(self.ytest, dtype=torch.float64) + self.X = torch.tensor(self.X, dtype=torch.float64) + self.y = torch.tensor(self.y, dtype=torch.float64) + + self.Xtr_l = torch.tensor(self.Xtr_l, dtype=torch.float64) + self.ytr_l = torch.tensor(self.ytr_l, dtype=torch.float64) + self.Xtest_l = torch.tensor(self.Xtest_l, dtype=torch.float64) + self.ytest_l = torch.tensor(self.ytest_l, dtype=torch.float64) + self.X_l = torch.tensor(self.X_l, dtype=torch.float64) + self.y_l = torch.tensor(self.y_l, dtype=torch.float64) + + else: + raise Exception("Invalid option for 'precision'") + + + + def build_model(self, params, nrestarts=10, maxiter=1000, seed=0): + self.split_train_test(params) + #np.random.seed(seed) # make GPy deterministic for a given hyperparameter config + print("\n") + print("-"*128) + print(f"\nParams: \n{params}") # LF Training + #self.Z = self.X_l[:self.num_inducing,:] + #train_l_ds = torch.utils.data.TensorDataset(self.Xtr_l, self.ytr_l) + #train_l_loader = torch.utils.data.DataLoader(train_l_ds, batch_size = self.batchsize, shuffle=True) + self.likelihood_l = gpytorch.likelihoods.GaussianLikelihood() + self.model_l = LFGP(self.Xtr_l, self.ytr_l.squeeze(), self.likelihood_l) + self.model_l.train() + self.likelihood_l.train() + opt_l = torch.optim.Adam(self.model_l.parameters(), lr=0.1) + mll_l = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood_l, self.model_l) + #opt_ngd = gpytorch.optim.NGD(self.model_l.variational_parameters(), num_data=self.ytr_l.size(0), lr=0.01) + #opt_hyp = torch.optim.Adam([{'params':self.model_l.parameters()}, {'params':self.likelihood_l.parameters()}], lr=0.01) + #mll_l = gpytorch.mlls.VariationalELBO(self.likelihood_l, self.model_l, num_data=self.ytr_l.size(0)) + for i in range(self.epochs_l): + #opt_ngd.zero_grad() + #opt_hyp.zero_grad() + opt_l.zero_grad() + out = self.model_l(self.Xtr_l) + loss = -mll_l(out, torch.squeeze(self.ytr_l)) + loss.backward() + opt_l.step() + #opt_ngd.step() + #opt_hyp.step() + print('\nLF Training Done') + self.model_l.eval() + self.likelihood_l.eval() + + # HF Training + + with torch.no_grad(), gpytorch.settings.fast_pred_var(): + mean_low = self.model_l(self.Xtr).mean.unsqueeze(-1) + xx = torch.hstack((self.Xtr.squeeze(0), mean_low.squeeze(0))) + self.likelihood = gpytorch.likelihoods.GaussianLikelihood() + self.model = GP(xx, self.ytr.squeeze(), self.likelihood) + self.likelihood.train() + self.model.train() + opt = torch.optim.Adam(self.model.parameters(), lr=0.1) + mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model) + for i in range(self.epochs_h): + opt.zero_grad() + out = self.model(xx) + loss = -mll(out, torch.squeeze(self.ytr)) + loss.backward() + opt.step() + print('HF Training Done\n') + self.model.eval() + self.likelihood.eval() + gc.collect(2) #fixes some memory leak issues with certain BLAS configs + + + def predict(self, hf_model, lf_model, x_in): + xpred_dataloader = torch.utils.data.DataLoader(x_in, batch_size = 1024, shuffle = False) + prediction = torch.tensor([0.]) + with torch.no_grad(), gpytorch.settings.fast_pred_var(): + for x_batch in xpred_dataloader: + lf_pred = lf_model(x_batch).mean.unsqueeze(-1) + xx = torch.hstack((x_batch, lf_pred.squeeze(0))) + hfpred = hf_model(xx).mean.unsqueeze(1) + prediction = torch.cat([prediction, hfpred.squeeze(-1)]) + return prediction[1:].unsqueeze(1) + + #with torch.no_grad(), gpytorch.settings.fast_pred_var(): + # lf_pred = lf_model(x_in).mean.unsqueeze(-1) + # xx = torch.hstack((x_in.squeeze(0), lf_pred.squeeze(0))) + # prediction = hf_model(xx).mean.unsqueeze(1) + #return prediction + + def vet_model(self, model): + """Convenience method for getting model errors of test and full datasets""" + #pred_test = self.predict(model, self.model_l, self.Xtest) + pred_full = self.predict(model, self.model_l, self.X) + #error_test = self.compute_error(self.ytest.squeeze(), pred_test, self.yscaler) + error_full, median_error, max_errors = self.compute_error(self.y.squeeze(0), pred_full, yscaler=self.yscaler, max_errors=5) + #print("Test Dataset {}".format(round(hartree2cm * error_test,2)), end=' ') + print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') + print("Median error: {}".format(np.round(median_error,2)), end=' ') + print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') + print("-"*128) + return error_full # was test + + #"""Convenience method for getting model errors of test and full datasets""" + #pred_test = self.predict(model, self.model_l, self.Xtest) + #pred_full = self.predict(model, self.model_l, self.X) + #error_test = self.compute_error(self.ytest.squeeze(0), pred_test, self.yscaler) + #error_full, median_error, max_errors = self.compute_error(self.y.squeeze(0), pred_full, yscaler=self.yscaler, max_errors=5) + #print("Test Dataset {}".format(round(hartree2cm * error_test,2)), end=' ') + #print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ') + #print("Median error: {}".format(np.round(median_error[0],2)), end=' ') + #print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n') + #return error_test + diff --git a/peslearn/ml/mfmodel.py b/peslearn/ml/mfmodel.py new file mode 100644 index 0000000..ef9ffe6 --- /dev/null +++ b/peslearn/ml/mfmodel.py @@ -0,0 +1,398 @@ +import torch +import torch.nn as nn +import numpy as np +import pandas as pd +import os +from collections import OrderedDict +import re +import copy + +from .model import Model +from .data_sampler import DataSampler +from ..constants import hartree2cm, package_directory, nn_convenience_function +from .preprocessing_helper import morse, interatomics_to_fundinvar, degree_reduce, general_scaler +from ..utils.printing_helper import hyperopt_complete +from sklearn.model_selection import train_test_split +from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials, space_eval +from .preprocessing_helper import sort_architectures + + +torch.set_printoptions(precision=15) + +class MFModel(Model): + """ + A class that handles data processing and other convenience functions for multifidelity models + """ + def __init__(self, dataset_paths, input_objs, molecule_type=None, molecule=None, train_paths=(None, None), test_paths=(None, None), valid_paths=(None, None)): #All input objs are tuples ordered high to low fidelity. Only works with 2 for now + print("Big BEEBUS") + self.m_high = Model(dataset_paths[0], input_objs[0], molecule_type, molecule, train_paths[0], test_paths[0], valid_paths[0]) + self.m_low = Model(dataset_paths[1], input_objs[1], molecule_type, molecule, train_paths[1], test_paths[1], valid_paths[1]) + self.molecule_type = molecule_type + self.molecule = molecule + self.set_default_hyperparameters() + self.initModel(self.m_high) + self.initModel(self.m_low) + + def initModel(self, m): + # Because I do not want to write everything in __init__ twice + m.trial_layers = m.input_obj.keywords['nas_trial_layers'] + if m.input_obj.keywords['validation_points']: + m.nvalid = m.input_obj.keywords['validation_points'] + if (m.nvalid + m.ntrain + 1) > m.n_datapoints: + raise Exception("Error: User-specified training set size and validation set size exceeds the size of the dataset.") + else: + m.nvalid = round((m.n_datapoints - m.ntrain) / 2) + + if m.pip: + if self.molecule_type: + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + m.inp_dim = len(open(path).readlines()) + if self.molecule: + path = os.path.join(package_directory, "lib", self.molecule.molecule_type, "output") + m.inp_dim = len(open(path).readlines()) + else: + m.inp_dim = m.raw_X.shape[1] + + def set_default_hyperparameters(self, nn_search_space=1): + """ + Set default hyperparameter space. If none is provided, default is used. + + Parameters + ---------- + nn_search_space : int + Which tier of default hyperparameter search spaces to use. Neural networks have too many hyperparameter configurations to search across, + so this option reduces the number of variable hyperparameters to search over. Generally, larger integer => more hyperparameters, and more iterations of hp_maxit are recommended. + """ + if nn_search_space == 1: + self.hyperparameter_space = { + 'scale_X': hp.choice('scale_X', + [ + {'scale_X': 'mm11', + 'activation': hp.choice('activ2', ['tanh'])}, + {'scale_X': 'std', + 'activation': hp.choice('activ3', ['tanh'])}, + ]), + 'scale_y': hp.choice('scale_y', ['std', 'mm01', 'mm11']),} + # TODO make more expansive search spaces, benchmark them, expose them as input options + #elif nn_search_space == 2: + #elif nn_search_space == 3: + else: + raise Exception("Invalid search space specification") + + # Standard geometry transformations, always use these. + if self.m_high.input_obj.keywords['pes_format'] == 'interatomics': + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': True,'morse_alpha': hp.quniform('morse_alpha', 1, 2, 0.1)},{'morse': False}])) + else: + self.set_hyperparameter('morse_transform', hp.choice('morse_transform',[{'morse': False}])) + if self.m_high.pip: + val = hp.choice('pip',[{'pip': True,'degree_reduction': hp.choice('degree_reduction', [True,False])}]) + self.set_hyperparameter('pip', val) + else: + self.set_hyperparameter('pip', hp.choice('pip', [{'pip': False}])) + + def optimize_model(self): + if not self.m_high.input_obj.keywords['validation_points']: + print("Number of validation points not specified. Splitting test set in half --> 50% test, 50% validation") + print("Training with {} points. Validating with {} points. Full dataset contains {} points.".format(self.m_high.ntrain, self.m_high.nvalid, self.m_high.n_datapoints)) + print("Using {} training set point sampling.".format(self.m_high.sampler)) + print("Errors are root-mean-square error in wavenumbers (cm-1)") + print("Beginning hyperparameter optimization...") + print("Trying {} combinations of hyperparameters".format(self.m_high.hp_maxit)) + self.hyperopt_trials = Trials() + self.itercount = 1 + if self.m_high.input_obj.keywords['rseed']: + rstate = np.random.default_rng(self.m_high.input_obj.keywords['rseed']) + #rstate = np.random.RandomState(self.m_high.input_obj.keywords['rseed']) + else: + rstate = None + best = fmin(self.hyperopt_model, + space=self.hyperparameter_space, + algo=tpe.suggest, + max_evals=self.m_high.hp_maxit*2, + rstate=rstate, + show_progressbar=False, + trials=self.hyperopt_trials) + hyperopt_complete() + print("Best performing hyperparameters are:") + final = space_eval(self.hyperparameter_space, best) + print(str(sorted(final.items()))) + self.optimal_hyperparameters = dict(final) + print("Optimizing learning rate...") + + if self.m_high.input_obj.keywords['nn_precision'] == 64: + precision = 64 + else: + precision = 32 + learning_rates = [1.0, 0.8, 0.6, 0.5, 0.4, 0.2] + val_errors = [] + for i in learning_rates: + self.optimal_hyperparameters['lr'] = i + test_error, val_error = self.build_model(self.optimal_hyperparameters, maxit=5000, val_freq=10, es_patience=5, opt='lbfgs', tol=0.5, decay=False, verbose=False, precision=precision) + val_errors.append(val_error) + best_lr = learning_rates[np.argsort(val_errors)[0]] + self.optimal_hyperparameters['lr'] = best_lr + print("Fine-tuning final model...") + model, test_error, val_error, full_error = self.build_model(self.optimal_hyperparameters, maxit=5000, val_freq=1, es_patience=100, opt='lbfgs', tol=0.1, decay=True, verbose=True,precision=precision,return_model=True) + performance = [test_error, val_error, full_error] + print("Model optimization complete. Saving final model...") + self.save_model(self.optimal_hyperparameters, model, performance) + + def preprocess_protocol(self, params): + """ + How do you want to handle preprocessing? Choose your own adventure!!! + Need to set values for: + self.X_h + self.y_h + self.Xscaler_h + self.yscaler_h + self.X_l + self.y_l + self.Xscaler_l + self.yscaler_l + """ + + # Default + self.X_h, self.y_h, self.Xscaler_h, self.yscaler_h = self.preprocess(params, self.m_high.raw_X, self.m_high.raw_y) + self.X_l, self.y_l, self.Xscaler_l, self.yscaler_l = self.preprocess(params, self.m_low.raw_X, self.m_low.raw_y) + + + def split_train_test(self, params, precision=32): + """ + Take raw dataset and apply hyperparameters/input keywords/preprocessing + and train/test (tr,test) splitting. + Assigns: + self.X : complete input data, transformed + self.y : complete output data, transformed + self.Xscaler : scaling transformer for inputs + self.yscaler : scaling transformer for outputs + self.Xtr : training input data, transformed + self.ytr : training output data, transformed + self.Xtest : test input data, transformed + self.ytest : test output data, transformed + self.Xvalid : validation input data, transformed + self.yvalid : validation output data, transformed + """ + + self.preprocess_protocol(params) + if self.m_high.sampler == 'user_supplied': + self.Xtr_h = self.transform_new_X(self.m_high.raw_Xtr, params, self.Xscaler_h) + self.ytr_h = self.transform_new_y(self.m_high.raw_ytr, self.yscaler_h) + self.Xtest_h = self.transform_new_X(self.m_high.raw_Xtest, params, self.Xscaler_h) + self.ytest_h = self.transform_new_y(self.m_high.raw_ytest, self.yscaler_h) + if self.m_high.valid_path: + self.Xvalid_h = self.transform_new_X(self.m_high.raw_Xvalid, params, self.Xscaler_h) + self.yvalid_h = self.transform_new_y(self.m_high.raw_yvalid, self.yscaler_h) + else: + raise Exception("Please provide a validation set for Neural Network training.") + else: + self.Xtr_h = self.X_h[self.m_high.train_indices] + self.ytr_h = self.y_h[self.m_high.train_indices] + #TODO: this is splitting validation data in the same way at every model build, not necessary. + self.valid_indices_h, self.new_test_indices_h = train_test_split(self.m_high.test_indices, train_size = self.m_high.nvalid, random_state=42) + if self.m_high.nvalid: + self.Xvalid_h = self.X_h[self.valid_indices_h] + self.yvalid_h = self.y_h[self.valid_indices_h] + self.Xtest_h = self.X_h[self.new_test_indices_h] + self.ytest_h = self.y_h[self.new_test_indices_h] + + else: + raise Exception("Please specify a validation set size for Neural Network training.") + + if self.m_low.sampler == 'user_supplied': + self.Xtr_l = self.transform_new_X(self.m_low.raw_Xtr, params, self.Xscaler_l) + self.ytr_l = self.transform_new_y(self.m_low.raw_ytr, self.yscaler_l) + self.Xtest_l = self.transform_new_X(self.m_low.raw_Xtest, params, self.Xscaler_l) + self.ytest_l = self.transform_new_y(self.m_low.raw_ytest, self.yscaler_l) + if self.m_low.valid_path: + self.Xvalid_l = self.transform_new_X(self.m_low.raw_Xvalid, params, self.Xscaler_l) + self.yvalid_l = self.transform_new_y(self.m_low.raw_yvalid, self.yscaler_l) + else: + raise Exception("Please provide a validation set for Neural Network training.") + else: + self.Xtr_l = self.X_l[self.m_low.train_indices] + self.ytr_l = self.y_l[self.m_low.train_indices] + #TODO: this is splitting validation data in the same way at every model build, not necessary. + self.valid_indices_l, self.new_test_indices_l = train_test_split(self.m_low.test_indices, train_size = self.m_low.nvalid, random_state=42) + if self.m_low.nvalid: + self.Xvalid_l = self.X_l[self.valid_indices_l] + self.yvalid_l = self.y_l[self.valid_indices_l] + self.Xtest_l = self.X_l[self.new_test_indices_l] + self.ytest_l = self.y_l[self.new_test_indices_l] + + else: + raise Exception("Please specify a validation set size for Neural Network training.") + + # convert to Torch Tensors + if precision == 32: + self.Xtr_h = torch.tensor(self.Xtr_h, dtype=torch.float32) + self.ytr_h = torch.tensor(self.ytr_h, dtype=torch.float32) + self.Xtest_h = torch.tensor(self.Xtest_h, dtype=torch.float32) + self.ytest_h = torch.tensor(self.ytest_h, dtype=torch.float32) + self.Xvalid_h = torch.tensor(self.Xvalid_h, dtype=torch.float32) + self.yvalid_h = torch.tensor(self.yvalid_h, dtype=torch.float32) + self.X_h = torch.tensor(self.X_h, dtype=torch.float32) + self.y_h = torch.tensor(self.y_h, dtype=torch.float32) + + self.Xtr_l = torch.tensor(self.Xtr_l, dtype=torch.float32) + self.ytr_l = torch.tensor(self.ytr_l, dtype=torch.float32) + self.Xtest_l = torch.tensor(self.Xtest_l, dtype=torch.float32) + self.ytest_l = torch.tensor(self.ytest_l, dtype=torch.float32) + self.Xvalid_l = torch.tensor(self.Xvalid_l, dtype=torch.float32) + self.yvalid_l = torch.tensor(self.yvalid_l, dtype=torch.float32) + self.X_l = torch.tensor(self.X_l, dtype=torch.float32) + self.y_l = torch.tensor(self.y_l, dtype=torch.float32) + + elif precision == 64: + self.Xtr_h = torch.tensor(self.Xtr_h, dtype=torch.float64) + self.ytr_h = torch.tensor(self.ytr_h, dtype=torch.float64) + self.Xtest_h = torch.tensor(self.Xtest_h, dtype=torch.float64) + self.ytest_h = torch.tensor(self.ytest_h, dtype=torch.float64) + self.Xvalid_h = torch.tensor(self.Xvalid_h, dtype=torch.float64) + self.yvalid_h = torch.tensor(self.yvalid_h, dtype=torch.float64) + self.X_h = torch.tensor(self.X_h, dtype=torch.float64) + self.y_h = torch.tensor(self.y_h, dtype=torch.float64) + + self.Xtr_l = torch.tensor(self.Xtr_l, dtype=torch.float64) + self.ytr_l = torch.tensor(self.ytr_l, dtype=torch.float64) + self.Xtest_l = torch.tensor(self.Xtest_l, dtype=torch.float64) + self.ytest_l = torch.tensor(self.ytest_l, dtype=torch.float64) + self.Xvalid_l = torch.tensor(self.Xvalid_l, dtype=torch.float64) + self.yvalid_l = torch.tensor(self.yvalid_l, dtype=torch.float64) + self.X_l = torch.tensor(self.X_l, dtype=torch.float64) + self.y_l = torch.tensor(self.y_l, dtype=torch.float64) + + else: + raise Exception("Invalid option for 'precision'") + + def get_optimizer(self, opt_type, mdata, lr=0.1): + rate = lr + if opt_type == 'lbfgs': + #optimizer = torch.optim.LBFGS(mdata, lr=rate, max_iter=20, max_eval=None, tolerance_grad=1e-5, tolerance_change=1e-9, history_size=100) # Defaults + #optimizer = torch.optim.LBFGS(mdata, lr=rate, max_iter=100, max_eval=None, tolerance_grad=1e-10, tolerance_change=1e-14, history_size=200) + optimizer = torch.optim.LBFGS(mdata, lr=rate, max_iter=20, max_eval=None, tolerance_grad=1e-8, tolerance_change=1e-12, history_size=100) + if opt_type == 'adam': + optimizer = torch.optim.Adam(mdata, lr=rate) + return optimizer + + def hyperopt_model(self, params): + """ + A Hyperopt-friendly wrapper for build_model + """ + # skip building this model if hyperparameter combination already attempted + for i in self.hyperopt_trials.results: + if 'memo' in i: + if params == i['memo']: + return {'loss': i['loss'], 'status': STATUS_OK, 'memo': 'repeat'} + if self.itercount > self.m_high.hp_maxit: + return {'loss': 0.0, 'status': STATUS_FAIL, 'memo': 'max iters reached'} + error_test, error_valid = self.build_model(params) + self.itercount += 1 + if np.isnan(error_valid): + return {'loss': 1e5, 'status': STATUS_FAIL, 'memo': 'nan'} + else: + return {'loss': error_valid, 'status': STATUS_OK, 'memo': params} + + def preprocess(self, params, raw_X, raw_y): + """ + Preprocess raw data according to hyperparameters + """ + if params['morse_transform']['morse']: + raw_X = morse(raw_X, params['morse_transform']['morse_alpha']) + if params['pip']['pip']: + # find path to fundamental invariants form molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + raw_X, degrees = interatomics_to_fundinvar(raw_X,path) + if params['pip']['degree_reduction']: + raw_X = degree_reduce(raw_X, degrees) + if params['scale_X']: + X, Xscaler = general_scaler(params['scale_X']['scale_X'], raw_X) + else: + X = raw_X + Xscaler = None + if params['scale_y']: + y, yscaler = general_scaler(params['scale_y'], raw_y) + else: + y = raw_y + yscaler = None + return X, y, Xscaler, yscaler + + def save_model(self, params, model, performance): + print("Saving ML model data...") + model_path = "model1_data" + while os.path.isdir(model_path): + new = int(re.findall("\d+", model_path)[0]) + 1 + model_path = re.sub("\d+",str(new), model_path) + os.mkdir(model_path) + os.chdir(model_path) + torch.save(model, 'model.pt') + + with open('hyperparameters', 'w') as f: + print(params, file=f) + + test, valid, full = performance + with open('performance', 'w') as f: + print("Test set RMSE (cm-1): {:5.2f} Validation set RMSE (cm-1): {:5.2f} Full dataset RMSE (cm-1): {:5.2f}".format(test, valid, full), file=f) + + if self.m_high.sampler == 'user_supplied': + self.m_high.traindata.to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.m_high.validdata.to_csv('validation_set',sep=',',index=False,float_format='%12.12f') + self.m_high.testdata.to_csv('test_set', sep=',', index=False, float_format='%12.12f') + else: + self.m_high.dataset.iloc[self.m_high.train_indices].to_csv('train_set',sep=',',index=False,float_format='%12.12f') + self.m_high.dataset.iloc[self.valid_indices_h].to_csv('validation_set', sep=',', index=False, float_format='%12.12f') + self.m_high.dataset.iloc[self.new_test_indices_h].to_csv('test_set', sep=',', index=False, float_format='%12.12f') + + self.m_high.dataset.to_csv('PES.dat', sep=',',index=False,float_format='%12.12f') + with open('compute_energy.py', 'w+') as f: + print(self.write_convenience_function(), file=f) + os.chdir("../") + + def transform_new_X(self, newX, params, Xscaler=None): + """ + Transform a new, raw input according to the model's transformation procedure + so that prediction can be made. + """ + # ensure X dimension is n x m (n new points, m input variables) + if len(newX.shape) == 1: + newX = np.expand_dims(newX,0) + elif len(newX.shape) > 2: + raise Exception("Dimensions of input data is incorrect.") + if params['morse_transform']['morse']: + newX = morse(newX, params['morse_transform']['morse_alpha']) + if params['pip']['pip']: + # find path to fundamental invariants for an N atom system with molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + newX, degrees = interatomics_to_fundinvar(newX,path) + if params['pip']['degree_reduction']: + newX = degree_reduce(newX, degrees) + if Xscaler: + newX = Xscaler.transform(newX) + return newX + + def transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.transform(newy) + return newy + + def inverse_transform_new_y(self, newy, yscaler=None): + if yscaler: + newy = yscaler.inverse_transform(newy) + return newy + + def write_convenience_function(self): + string = "from peslearn.ml import NeuralNetwork\nfrom peslearn import InputProcessor\nimport torch\nimport numpy as np\nfrom itertools import combinations\n\n" + if self.m_high.pip: + string += "nn = NeuralNetwork('PES.dat', InputProcessor(''), molecule_type='{}')\n".format(self.molecule_type) + else: + string += "nn = NeuralNetwork('PES.dat', InputProcessor(''))\n" + with open('hyperparameters', 'r') as f: + hyperparameters = f.read() + string += "params = {}\n".format(hyperparameters) + string += "X, y, Xscaler, yscaler = nn.preprocess(params, nn.raw_X, nn.raw_y)\n" + string += "model = torch.load('model.pt')\n" + string += nn_convenience_function + return string + + + + diff --git a/peslearn/ml/mfnn/__init__.py b/peslearn/ml/mfnn/__init__.py new file mode 100644 index 0000000..d8dfcf5 --- /dev/null +++ b/peslearn/ml/mfnn/__init__.py @@ -0,0 +1,9 @@ +from . import dual +from . import delta +from . import weight_transfer +from . import mknn + +from .dual import DualNN +from .delta import DeltaNN +from .weight_transfer import WTNN +from .mknn import MKNN diff --git a/peslearn/ml/mfnn/delta.py b/peslearn/ml/mfnn/delta.py new file mode 100644 index 0000000..1365b69 --- /dev/null +++ b/peslearn/ml/mfnn/delta.py @@ -0,0 +1,10 @@ +from copy import deepcopy +from ..neural_network import NeuralNetwork + +class DeltaNN(NeuralNetwork): + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, train_path=None, test_path=None, valid_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path, valid_path) + lf_E = self.raw_X[:,-1].reshape(-1,1) + self.raw_X = self.raw_X[:,:-1] + self.raw_y = deepcopy(self.raw_y) - lf_E # If modified in place (i.e. self.raw_y -= lf_E) then PES.dat will be modified to delta rather than HF_E + \ No newline at end of file diff --git a/peslearn/ml/mfnn/dual.py b/peslearn/ml/mfnn/dual.py new file mode 100644 index 0000000..b604d25 --- /dev/null +++ b/peslearn/ml/mfnn/dual.py @@ -0,0 +1,142 @@ +import torch +import numpy as np +from ..neural_network import NeuralNetwork +import os +from copy import deepcopy +from ...constants import package_directory +from ..preprocessing_helper import morse, interatomics_to_fundinvar, degree_reduce, general_scaler +from sklearn.model_selection import train_test_split + +torch.set_printoptions(precision=15) + +class DualNN(NeuralNetwork): + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, train_path=None, test_path=None, valid_path=None): + #super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path, valid_path) + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path, valid_path) + self.trial_layers = self.input_obj.keywords['nas_trial_layers'] + self.set_default_hyperparameters() + + if self.input_obj.keywords['validation_points']: + self.nvalid = self.input_obj.keywords['validation_points'] + if (self.nvalid + self.ntrain + 1) > self.n_datapoints: + raise Exception("Error: User-specified training set size and validation set size exceeds the size of the dataset.") + else: + self.nvalid = round((self.n_datapoints - self.ntrain) / 2) + + if self.pip: + if molecule_type: + path = os.path.join(package_directory, "lib", molecule_type, "output") + self.inp_dim = len(open(path).readlines())+1 + if molecule: + path = os.path.join(package_directory, "lib", molecule.molecule_type, "output") + self.inp_dim = len(open(path).readlines())+1 + else: + self.inp_dim = self.raw_X.shape[1] + + def split_train_test(self, params, validation_size=None, precision=32): + self.X, self.y, self.Xscaler, self.yscaler, self.lf_E_scaler = self.preprocess(params, self.raw_X, self.raw_y) + if self.sampler == 'user_supplied': + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + if self.valid_path: + self.Xvalid = self.transform_new_X(self.raw_Xvalid, params, self.Xscaler) + self.yvalid = self.transform_new_y(self.raw_yvalid, self.yscaler) + else: + raise Exception("Please provide a validation set for Neural Network training.") + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + #TODO: this is splitting validation data in the same way at every model build, not necessary. + self.valid_indices, self.new_test_indices = train_test_split(self.test_indices, train_size = validation_size, random_state=42) + if validation_size: + self.Xvalid = self.X[self.valid_indices] + self.yvalid = self.y[self.valid_indices] + self.Xtest = self.X[self.new_test_indices] + self.ytest = self.y[self.new_test_indices] + + else: + raise Exception("Please specify a validation set size for Neural Network training.") + + # convert to Torch Tensors + if precision == 32: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float32) + self.ytr = torch.tensor(self.ytr, dtype=torch.float32) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float32) + self.ytest = torch.tensor(self.ytest, dtype=torch.float32) + self.Xvalid = torch.tensor(self.Xvalid,dtype=torch.float32) + self.yvalid = torch.tensor(self.yvalid,dtype=torch.float32) + self.X = torch.tensor(self.X,dtype=torch.float32) + self.y = torch.tensor(self.y,dtype=torch.float32) + elif precision == 64: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float64) + self.ytr = torch.tensor(self.ytr, dtype=torch.float64) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float64) + self.ytest = torch.tensor(self.ytest, dtype=torch.float64) + self.Xvalid = torch.tensor(self.Xvalid,dtype=torch.float64) + self.yvalid = torch.tensor(self.yvalid,dtype=torch.float64) + self.X = torch.tensor(self.X,dtype=torch.float64) + self.y = torch.tensor(self.y,dtype=torch.float64) + else: + raise Exception("Invalid option for 'precision'") + + def preprocess(self, params, raw_X_less, raw_y): + """ + Preprocess raw data according to hyperparameters + """ + lf_E = deepcopy(raw_X_less[:,-1].reshape(-1,1)) + raw_X = deepcopy(raw_X_less[:,:-1]) + if params['morse_transform']['morse']: + raw_X = morse(raw_X, params['morse_transform']['morse_alpha']) + if params['pip']['pip']: + # find path to fundamental invariants form molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + #lf_E = raw_X[:,-1] + raw_X, degrees = interatomics_to_fundinvar(raw_X,path) + #raw_X = np.hstack((raw_X, lf_E[:,None])) + if params['pip']['degree_reduction']: + #raw_X[:,:-1] = degree_reduce(raw_X[:,:-1], degrees) + raw_X = degree_reduce(raw_X, degrees) + if params['scale_X']: + X, Xscaler = general_scaler(params['scale_X']['scale_X'], raw_X) + else: + X = raw_X + Xscaler = None + if params['scale_y']: + lf_E, lf_E_scaler = general_scaler(params['scale_y'], lf_E) + y, yscaler = general_scaler(params['scale_y'], raw_y) + else: + lf_E_scaler = None + y = raw_y + yscaler = None + X = np.hstack((X, lf_E)) + #X = np.hstack((X, lf_E[:,None])) + return X, y, Xscaler, yscaler, lf_E_scaler + + def transform_new_X(self, newX, params, Xscaler=None, lf_E_scaler=None): + """ + Transform a new, raw input according to the model's transformation procedure + so that prediction can be made. + """ + # ensure X dimension is n x m (n new points, m input variables) + if len(newX.shape) == 1: + newX = np.expand_dims(newX,0) + elif len(newX.shape) > 2: + raise Exception("Dimensions of input data is incorrect.") + newX_geom = newX[:,:-1] + lf_E = newX[:,-1].reshape(-1,1) + if params['morse_transform']['morse']: + newX_geom = morse(newX_geom, params['morse_transform']['morse_alpha']) + if params['pip']['pip']: + # find path to fundamental invariants for an N atom system with molecule type AxByCz... + path = os.path.join(package_directory, "lib", self.molecule_type, "output") + newX_geom, degrees = interatomics_to_fundinvar(newX_geom,path) + if params['pip']['degree_reduction']: + newX_geom = degree_reduce(newX_geom, degrees) + if Xscaler: + newX_geom = Xscaler.transform(newX_geom) + if lf_E_scaler: + lf_E = lf_E_scaler.transform(lf_E) + #lf_E = lf_E.reshape(-1,1) + return np.hstack((newX_geom, lf_E)) diff --git a/peslearn/ml/mfnn/mknn.py b/peslearn/ml/mfnn/mknn.py new file mode 100644 index 0000000..98e810b --- /dev/null +++ b/peslearn/ml/mfnn/mknn.py @@ -0,0 +1,218 @@ +import numpy as np +from .weight_transfer import WTNN +import torch +import torch.nn as nn +from collections import OrderedDict +from ...constants import hartree2cm +import copy + +class MKNNModel(nn.Module): + def __init__(self, inp_dim, layers, activ) -> None: + super(MKNNModel, self).__init__() + + depth = len(layers) + structure_lf = OrderedDict([('input', nn.Linear(inp_dim, layers[0])), + ('activ_in' , activ)]) + self.model_lf = nn.Sequential(structure_lf) + for i in range(depth-1): + self.model_lf.add_module('layer' + str(i), nn.Linear(layers[i], layers[i+1])) + self.model_lf.add_module('activ' + str(i), activ) + self.model_lf.add_module('output', nn.Linear(layers[depth-1], 1)) + + #structure_hf = OrderedDict([('input', nn.Linear(inp_dim+1, layers[0])), + # ('activ_in' , activ)]) # Add one to inp_dim for LF energy + #self.nonlinear_hf = nn.Sequential(structure_hf) # Nonlinear NN for HF prediction + #for i in range(depth-1): + # self.nonlinear_hf.add_module('layer' + str(i), nn.Linear(layers[i], layers[i+1])) + # self.nonlinear_hf.add_module('activ' + str(i), activ) + #self.nonlinear_hf.add_module('output', nn.Linear(layers[depth-1], 1)) + self.nonlinear_hf = nn.Sequential( + nn.Linear(inp_dim+1,32), + nn.Tanh(), + nn.Linear(32,32), + nn.Tanh(), + nn.Linear(32,32), + nn.Tanh(), + nn.Linear(32,1), + nn.Tanh()) + + self.linear_hf = nn.Linear(inp_dim+1,1) # Linear NN + + def forward(self, xh, xl): + yl = self.model_lf(xl) + yl_xh = self.model_lf(xh) + #print(xh.shape) + #print(yl_xh.shape) + hin = torch.cat((xh,yl_xh), dim=1) + nliny = self.nonlinear_hf(hin) + liny = self.linear_hf(hin) + yh = liny + nliny + return yh, yl + + +class MKNN(WTNN): + def __init__(self, dataset_path, dataset_path_lf, input_obj, input_obj_lf, molecule_type=None, molecule=None, train_path=None, test_path=None, valid_path=None): + super().__init__(dataset_path, dataset_path_lf, input_obj, input_obj_lf, molecule_type, molecule, train_path, test_path, valid_path) + + def build_model(self, params, maxit=1000, val_freq=10, es_patience=2, opt='lbfgs', tol=1, decay=False, verbose=False, precision=32, return_model=False): + print("Hyperparameters: ", params) + self.split_train_test(params, validation_size=self.nvalid, validation_size_lf=self.nvalid_lf, precision=precision) # split data, according to scaling hp's + scale = params['scale_y'] # Find descaling factor to convert loss to original energy units + if scale == 'std': + loss_descaler = self.yscaler.var_[0] + if scale.startswith('mm'): + loss_descaler = (1/self.yscaler.scale_[0]**2) + + activation = params['scale_X']['activation'] + if activation == 'tanh': + activ = nn.Tanh() + if activation == 'sigmoid': + activ = nn.Sigmoid() + + inp_dim = self.inp_dim + l = params['layers'] + torch.manual_seed(0) + + model = MKNNModel(inp_dim, l, activ) + + if precision == 64: # cast model to proper precision + model = model.double() + + metric = torch.nn.MSELoss() + # Define optimizer + if 'lr' in params: + lr = params['lr'] + elif opt == 'lbfgs': + lr = 0.5 + else: + lr = 0.1 + + optimizer = self.get_optimizer(opt, model.parameters(), lr=lr) + #optimizer = torch.optim.Adam(model.parameters(), lr=lr*0.01) + # Define update variables for early stopping, decay, gradient explosion handling + prev_loss = 1.0 + es_tracker = 0 + best_val_error = None + failures = 0 + decay_attempts = 0 + prev_best = None + decay_start = False + maxit += 5000 + labda = 1e-6 #l2_norm = sum(p.pow(2.0).sum() for p in model.parameters()) + for epoch in range(1,maxit): + def closure(): + optimizer.zero_grad() + y_pred_hf, y_pred_lf = model(self.Xtr, self.Xtr_lf) + loss = torch.sqrt(metric(y_pred_lf, self.ytr_lf)) + torch.sqrt(metric(y_pred_hf, self.ytr)) + labda*sum(p.pow(2.0).sum() for p in model.parameters()) # L2 regularization + loss.backward() + return loss + optimizer.step(closure) + # validate + if epoch % val_freq == 0: + with torch.no_grad(): + tmp_pred, trash = model(self.Xvalid, self.Xvalid) + tmp_loss = metric(tmp_pred, self.yvalid) + val_error_rmse = np.sqrt(tmp_loss.item() * loss_descaler) * hartree2cm # loss_descaler converts MSE in scaled data domain to MSE in unscaled data domain + if best_val_error: + if val_error_rmse < best_val_error: + prev_best = best_val_error * 1.0 + best_val_error = val_error_rmse * 1.0 + else: + record = True + best_val_error = val_error_rmse * 1.0 + prev_best = best_val_error + if verbose: + print("Epoch {} Validation RMSE (cm-1): {:5.3f}".format(epoch, val_error_rmse)) + if decay_start: + scheduler.step(val_error_rmse) + + # Early Stopping + if epoch > 5: + # if current validation error is not the best (current - best > 0) and is within tol of previous error, the model is stagnant. + if ((val_error_rmse - prev_loss) < tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: current validation error is not the best (current - best > 0) and is greater than the best by tol, the model is overfitting. Bad epoch. + elif ((val_error_rmse - best_val_error) > tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: if the current validation error is a new record, but not significant, the model is stagnant + elif (prev_best - best_val_error) < 0.001: + es_tracker += 1 + # else: model set a new record validation error. Reset early stopping tracker + else: + es_tracker = 0 + #TODO this framework does not detect oscillatory behavior about 'tol', though this has not been observed to occur in any case + # Check status of early stopping tracker. First try decaying to see if stagnation can be resolved, if not then terminate training + if es_tracker > es_patience: + if decay: # if decay is set to true, if early stopping criteria is triggered, begin LR scheduler and go back to previous model state and attempt LR decay. + if decay_attempts < 1: + decay_attempts += 1 + es_tracker = 0 + if verbose: + print("Performance plateau detected. Reverting model state and decaying learning rate.") + decay_start = True + thresh = (0.1 / np.sqrt(loss_descaler)) / hartree2cm # threshold is 0.1 wavenumbers + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, threshold=thresh, threshold_mode='abs', min_lr=0.05, cooldown=2, patience=10, verbose=verbose) + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.9 + optimizer.load_state_dict(saved_optimizer_state_dict) + # Since learning rate is decayed, override tolerance, patience, validation frequency for high-precision + #tol = 0.05 + #es_patience = 100 + #val_freq = 1 + continue + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + + # Handle exploding gradients + if epoch > 10: + if (val_error_rmse > prev_loss*10): # detect large increases in loss + if epoch > 60: # distinguish between exploding gradients at near converged models and early on exploding grads + if verbose: + print("Exploding gradient detected. Resuming previous model state and decaying learning rate") + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.5 + optimizer.load_state_dict(saved_optimizer_state_dict) + failures += 1 # if + if failures > 2: + break + else: + continue + else: + break + if val_error_rmse != val_error_rmse: # detect NaN + break + if ((prev_loss < 1.0) and (precision == 32)): # if 32 bit precision and model is giving very high accuracy, kill so the accuracy does not go beyond 32 bit precision + break + prev_loss = val_error_rmse * 1.0 # save previous loss to track improvement + + # Periodically save model state so we can reset under instability/overfitting/performance plateau + if epoch % 50 == 0: + saved_model_state_dict = copy.deepcopy(model.state_dict()) + saved_optimizer_state_dict = copy.deepcopy(optimizer.state_dict()) + + with torch.no_grad(): + train_pred, trash = model(self.Xtr, self.Xtr) + train_loss = metric(train_pred, self.ytr) + train_error_rmse = np.sqrt(train_loss.item() * loss_descaler) * hartree2cm + test_pred, trash = model(self.Xtest, self.Xtest) + test_loss = metric(test_pred, self.ytest) + test_error_rmse = np.sqrt(test_loss.item() * loss_descaler) * hartree2cm + val_pred, trash = model(self.Xvalid, self.Xvalid) + val_loss = metric(val_pred, self.yvalid) + val_error_rmse = np.sqrt(val_loss.item() * loss_descaler) * hartree2cm + full_pred, trash = model(self.X, self.X) + full_loss = metric(full_pred, self.y) + full_error_rmse = np.sqrt(full_loss.item() * loss_descaler) * hartree2cm + print("Test set RMSE (cm-1): {:5.2f} Validation set RMSE (cm-1): {:5.2f} Train set RMSE: {:5.2f} Full dataset RMSE (cm-1): {:5.2f}".format(test_error_rmse, val_error_rmse, train_error_rmse, full_error_rmse)) + if return_model: + return model, test_error_rmse, val_error_rmse, full_error_rmse + else: + return test_error_rmse, val_error_rmse diff --git a/peslearn/ml/mfnn/weight_transfer.py b/peslearn/ml/mfnn/weight_transfer.py new file mode 100644 index 0000000..f353374 --- /dev/null +++ b/peslearn/ml/mfnn/weight_transfer.py @@ -0,0 +1,400 @@ +from ..neural_network import NeuralNetwork +from ..model import Model +import torch +import torch.nn as nn +from collections import OrderedDict +from sklearn.model_selection import train_test_split +from ...constants import hartree2cm +import copy +import numpy as np + +class WTNN(NeuralNetwork): + def __init__(self, dataset_path, dataset_path_lf, input_obj, input_obj_lf, molecule_type=None, molecule=None, train_path=None, test_path=None, valid_path=None): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path, valid_path) + self.lf_model = Model(dataset_path_lf, input_obj_lf, molecule_type, molecule, train_path, test_path, valid_path) # TODO: Train, test, valid paths are for HF model + if self.lf_model.input_obj.keywords['validation_points']: + self.nvalid_lf = self.lf_model.input_obj.keywords['validation_points'] + if (self.nvalid_lf + self.lf_model.ntrain + 1) > self.lf_model.n_datapoints: + raise Exception("Error: User-specified training set size and validation set size exceeds the size of the dataset.") + else: + self.nvalid_lf = round((self.lf_model.n_datapoints - self.lf_model.ntrain) / 2) + + def split_train_test(self, params, validation_size=None, validation_size_lf=None, precision=32): + self.X, self.y, self.Xscaler, self.yscaler = self.preprocess(params, self.raw_X, self.raw_y) + self.X_lf, self.y_lf, self.Xscaler_lf, self.yscaler_lf = self.preprocess(params, self.lf_model.raw_X, self.lf_model.raw_y) + if self.sampler == 'user_supplied': + self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) + self.ytr = self.transform_new_y(self.raw_ytr, self.yscaler) + self.Xtest = self.transform_new_X(self.raw_Xtest, params, self.Xscaler) + self.ytest = self.transform_new_y(self.raw_ytest, self.yscaler) + + self.Xtr_lf = self.transform_new_X(self.lf_model.raw_Xtr, params, self.Xscaler_lf) + self.ytr_lf = self.transform_new_y(self.lf_model.raw_ytr, self.yscaler_lf) + self.Xtest_lf = self.transform_new_X(self.lf_model.raw_Xtest, params, self.Xscaler_lf) + self.ytest_lf = self.transform_new_y(self.lf_model.raw_ytest, self.yscaler_lf) + if self.valid_path: + self.Xvalid = self.transform_new_X(self.raw_Xvalid, params, self.Xscaler) + self.yvalid = self.transform_new_y(self.raw_yvalid, self.yscaler) + + self.Xvalid_lf = self.transform_new_X(self.lf_model.raw_Xvalid, params, self.Xscaler_lf) + self.yvalid_lf = self.transform_new_y(self.lf_model.raw_yvalid, self.yscaler_lf) + else: + raise Exception("Please provide a validation set for Neural Network training.") + else: + self.Xtr = self.X[self.train_indices] + self.ytr = self.y[self.train_indices] + + self.Xtr_lf = self.X_lf[self.lf_model.train_indices] + self.ytr_lf = self.y_lf[self.lf_model.train_indices] + #TODO: this is splitting validation data in the same way at every model build, not necessary. + self.valid_indices, self.new_test_indices = train_test_split(self.test_indices, train_size = validation_size, random_state=42) + self.valid_indices_lf, self.new_test_indices_lf = train_test_split(self.lf_model.test_indices, train_size = validation_size_lf, random_state=42) + if validation_size and validation_size_lf: + self.Xvalid = self.X[self.valid_indices] + self.yvalid = self.y[self.valid_indices] + self.Xtest = self.X[self.new_test_indices] + self.ytest = self.y[self.new_test_indices] + + self.Xvalid_lf = self.X_lf[self.valid_indices_lf] + self.yvalid_lf = self.y_lf[self.valid_indices_lf] + self.Xtest_lf = self.X_lf[self.new_test_indices_lf] + self.ytest_lf = self.y_lf[self.new_test_indices_lf] + + else: + raise Exception("Please specify a validation set size for Neural Network training.") + + # convert to Torch Tensors + if precision == 32: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float32) + self.ytr = torch.tensor(self.ytr, dtype=torch.float32) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float32) + self.ytest = torch.tensor(self.ytest, dtype=torch.float32) + self.Xvalid = torch.tensor(self.Xvalid,dtype=torch.float32) + self.yvalid = torch.tensor(self.yvalid,dtype=torch.float32) + self.X = torch.tensor(self.X,dtype=torch.float32) + self.y = torch.tensor(self.y,dtype=torch.float32) + + self.Xtr_lf = torch.tensor(self.Xtr_lf, dtype=torch.float32) + self.ytr_lf = torch.tensor(self.ytr_lf, dtype=torch.float32) + self.Xtest_lf = torch.tensor(self.Xtest_lf, dtype=torch.float32) + self.ytest_lf = torch.tensor(self.ytest_lf, dtype=torch.float32) + self.Xvalid_lf = torch.tensor(self.Xvalid_lf,dtype=torch.float32) + self.yvalid_lf = torch.tensor(self.yvalid_lf,dtype=torch.float32) + self.X_lf = torch.tensor(self.X_lf,dtype=torch.float32) + self.y_lf = torch.tensor(self.y_lf,dtype=torch.float32) + elif precision == 64: + self.Xtr = torch.tensor(self.Xtr, dtype=torch.float64) + self.ytr = torch.tensor(self.ytr, dtype=torch.float64) + self.Xtest = torch.tensor(self.Xtest, dtype=torch.float64) + self.ytest = torch.tensor(self.ytest, dtype=torch.float64) + self.Xvalid = torch.tensor(self.Xvalid,dtype=torch.float64) + self.yvalid = torch.tensor(self.yvalid,dtype=torch.float64) + self.X = torch.tensor(self.X,dtype=torch.float64) + self.y = torch.tensor(self.y,dtype=torch.float64) + + self.Xtr_lf = torch.tensor(self.Xtr_lf, dtype=torch.float64) + self.ytr_lf = torch.tensor(self.ytr_lf, dtype=torch.float64) + self.Xtest_lf = torch.tensor(self.Xtest_lf, dtype=torch.float64) + self.ytest_lf = torch.tensor(self.ytest_lf, dtype=torch.float64) + self.Xvalid_lf = torch.tensor(self.Xvalid_lf,dtype=torch.float64) + self.yvalid_lf = torch.tensor(self.yvalid_lf,dtype=torch.float64) + self.X_lf = torch.tensor(self.X_lf,dtype=torch.float64) + self.y_lf = torch.tensor(self.y_lf,dtype=torch.float64) + else: + raise Exception("Invalid option for 'precision'") + + def build_model(self, params, maxit=1000, val_freq=10, es_patience=2, opt='lbfgs', tol=1, decay=False, verbose=False, precision=32, return_model=False): + # LF Training + print("Hyperparameters: ", params) + self.split_train_test(params, validation_size=self.nvalid, validation_size_lf=self.nvalid_lf, precision=precision) # split data, according to scaling hp's + scale = params['scale_y'] # Find descaling factor to convert loss to original energy units + + if scale == 'std': + loss_descaler = self.yscaler_lf.var_[0] # Here + if scale.startswith('mm'): + loss_descaler = (1/self.yscaler_lf.scale_[0]**2) # Here + + activation = params['scale_X']['activation'] + if activation == 'tanh': + activ = nn.Tanh() + if activation == 'sigmoid': + activ = nn.Sigmoid() + + inp_dim = self.inp_dim + l = params['layers'] + torch.manual_seed(0) + depth = len(l) + structure = OrderedDict([('input', nn.Linear(inp_dim, l[0])), + ('activ_in' , activ)]) + + model = nn.Sequential(structure) # Here + for i in range(depth-1): + model.add_module('layer' + str(i), nn.Linear(l[i], l[i+1])) + model.add_module('activ' + str(i), activ) + model.add_module('output', nn.Linear(l[depth-1], 1)) + if precision == 64: # cast model to proper precision + model = model.double() + metric = torch.nn.MSELoss() + + # Define optimizer + if 'lr' in params: + lr = params['lr'] + elif opt == 'lbfgs': + lr = 0.5 + else: + lr = 0.1 + + optimizer = self.get_optimizer(opt, model.parameters(), lr=lr) + # Define update variables for early stopping, decay, gradient explosion handling + prev_loss = 1.0 + es_tracker = 0 + best_val_error = None + failures = 0 + decay_attempts = 0 + prev_best = None + decay_start = False + for epoch in range(1,maxit): + def closure(): + optimizer.zero_grad() + y_pred = model(self.Xtr_lf) + loss = torch.sqrt(metric(y_pred, self.ytr_lf)) # passing RMSE instead of MSE improves precision IMMENSELY + loss.backward() + return loss + optimizer.step(closure) + # validate + if epoch % val_freq == 0: + with torch.no_grad(): + tmp_pred = model(self.Xvalid_lf) + tmp_loss = metric(tmp_pred, self.yvalid_lf) + val_error_rmse = np.sqrt(tmp_loss.item() * loss_descaler) * hartree2cm # loss_descaler converts MSE in scaled data domain to MSE in unscaled data domain + if best_val_error: + if val_error_rmse < best_val_error: + prev_best = best_val_error * 1.0 + best_val_error = val_error_rmse * 1.0 + else: + record = True + best_val_error = val_error_rmse * 1.0 + prev_best = best_val_error + if verbose: + print("Epoch {} Validation RMSE (cm-1): {:5.3f}".format(epoch, val_error_rmse)) + if decay_start: + scheduler.step(val_error_rmse) + + # Early Stopping + if epoch > 5: + # if current validation error is not the best (current - best > 0) and is within tol of previous error, the model is stagnant. + if ((val_error_rmse - prev_loss) < tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: current validation error is not the best (current - best > 0) and is greater than the best by tol, the model is overfitting. Bad epoch. + elif ((val_error_rmse - best_val_error) > tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: if the current validation error is a new record, but not significant, the model is stagnant + elif (prev_best - best_val_error) < 0.001: + es_tracker += 1 + # else: model set a new record validation error. Reset early stopping tracker + else: + es_tracker = 0 + #TODO this framework does not detect oscillatory behavior about 'tol', though this has not been observed to occur in any case + # Check status of early stopping tracker. First try decaying to see if stagnation can be resolved, if not then terminate training + if es_tracker > es_patience: + if decay: # if decay is set to true, if early stopping criteria is triggered, begin LR scheduler and go back to previous model state and attempt LR decay. + if decay_attempts < 1: + decay_attempts += 1 + es_tracker = 0 + if verbose: + print("Performance plateau detected. Reverting model state and decaying learning rate.") + decay_start = True + thresh = (0.1 / np.sqrt(loss_descaler)) / hartree2cm # threshold is 0.1 wavenumbers + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, threshold=thresh, threshold_mode='abs', min_lr=0.05, cooldown=2, patience=10, verbose=verbose) + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.9 + optimizer.load_state_dict(saved_optimizer_state_dict) + # Since learning rate is decayed, override tolerance, patience, validation frequency for high-precision + #tol = 0.05 + #es_patience = 100 + #val_freq = 1 + continue + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + + # Handle exploding gradients + if epoch > 10: + if (val_error_rmse > prev_loss*10): # detect large increases in loss + if epoch > 60: # distinguish between exploding gradients at near converged models and early on exploding grads + if verbose: + print("Exploding gradient detected. Resuming previous model state and decaying learning rate") + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.5 + optimizer.load_state_dict(saved_optimizer_state_dict) + failures += 1 # if + if failures > 2: + break + else: + continue + else: + break + if val_error_rmse != val_error_rmse: # detect NaN + break + if ((prev_loss < 1.0) and (precision == 32)): # if 32 bit precision and model is giving very high accuracy, kill so the accuracy does not go beyond 32 bit precision + break + prev_loss = val_error_rmse * 1.0 # save previous loss to track improvement + + # Periodically save model state so we can reset under instability/overfitting/performance plateau + if epoch % 50 == 0: + saved_model_state_dict = copy.deepcopy(model.state_dict()) + saved_optimizer_state_dict = copy.deepcopy(optimizer.state_dict()) + + with torch.no_grad(): + test_pred = model(self.Xtest_lf) + test_loss = metric(test_pred, self.ytest_lf) + test_error_rmse = np.sqrt(test_loss.item() * loss_descaler) * hartree2cm + val_pred = model(self.Xvalid_lf) + val_loss = metric(val_pred, self.yvalid_lf) + val_error_rmse = np.sqrt(val_loss.item() * loss_descaler) * hartree2cm + full_pred = model(self.X_lf) + full_loss = metric(full_pred, self.y_lf) + full_error_rmse = np.sqrt(full_loss.item() * loss_descaler) * hartree2cm + print("LF: Test set RMSE (cm-1): {:5.2f} Validation set RMSE (cm-1): {:5.2f} Full dataset RMSE (cm-1): {:5.2f}".format(test_error_rmse, val_error_rmse, full_error_rmse)) + + # HF Training + + if scale == 'std': + loss_descaler = self.yscaler.var_[0] + if scale.startswith('mm'): + loss_descaler = (1/self.yscaler.scale_[0]**2) + + # Define update variables for early stopping, decay, gradient explosion handling + prev_loss = 1.0 + es_tracker = 0 + best_val_error = None + failures = 0 + decay_attempts = 0 + prev_best = None + decay_start = False + saved_optimizer_state_dict = copy.deepcopy(optimizer.state_dict()) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr * 0.1 + optimizer.load_state_dict(saved_optimizer_state_dict) + for epoch in range(1,maxit): + def closure(): + optimizer.zero_grad() + y_pred = model(self.Xtr) + loss = torch.sqrt(metric(y_pred, self.ytr)) # passing RMSE instead of MSE improves precision IMMENSELY + loss.backward() + return loss + optimizer.step(closure) + # validate + if epoch % val_freq == 0: + with torch.no_grad(): + tmp_pred = model(self.Xvalid) + tmp_loss = metric(tmp_pred, self.yvalid) + val_error_rmse = np.sqrt(tmp_loss.item() * loss_descaler) * hartree2cm # loss_descaler converts MSE in scaled data domain to MSE in unscaled data domain + if best_val_error: + if val_error_rmse < best_val_error: + prev_best = best_val_error * 1.0 + best_val_error = val_error_rmse * 1.0 + else: + record = True + best_val_error = val_error_rmse * 1.0 + prev_best = best_val_error + if verbose: + print("Epoch {} Validation RMSE (cm-1): {:5.3f}".format(epoch, val_error_rmse)) + if decay_start: + scheduler.step(val_error_rmse) + + # Early Stopping + if epoch > 5: + # if current validation error is not the best (current - best > 0) and is within tol of previous error, the model is stagnant. + if ((val_error_rmse - prev_loss) < tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: current validation error is not the best (current - best > 0) and is greater than the best by tol, the model is overfitting. Bad epoch. + elif ((val_error_rmse - best_val_error) > tol) and (val_error_rmse - best_val_error) > 0.0: + es_tracker += 1 + # else if: if the current validation error is a new record, but not significant, the model is stagnant + elif (prev_best - best_val_error) < 0.001: + es_tracker += 1 + # else: model set a new record validation error. Reset early stopping tracker + else: + es_tracker = 0 + #TODO this framework does not detect oscillatory behavior about 'tol', though this has not been observed to occur in any case + # Check status of early stopping tracker. First try decaying to see if stagnation can be resolved, if not then terminate training + if es_tracker > es_patience: + if decay: # if decay is set to true, if early stopping criteria is triggered, begin LR scheduler and go back to previous model state and attempt LR decay. + if decay_attempts < 1: + decay_attempts += 1 + es_tracker = 0 + if verbose: + print("Performance plateau detected. Reverting model state and decaying learning rate.") + decay_start = True + thresh = (0.1 / np.sqrt(loss_descaler)) / hartree2cm # threshold is 0.1 wavenumbers + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, threshold=thresh, threshold_mode='abs', min_lr=0.05, cooldown=2, patience=10, verbose=verbose) + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.9 + optimizer.load_state_dict(saved_optimizer_state_dict) + # Since learning rate is decayed, override tolerance, patience, validation frequency for high-precision + #tol = 0.05 + #es_patience = 100 + #val_freq = 1 + continue + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + else: + prev_loss = val_error_rmse * 1.0 + if verbose: + print('Early stopping termination') + break + + # Handle exploding gradients + if epoch > 10: + if (val_error_rmse > prev_loss*10): # detect large increases in loss + if epoch > 60: # distinguish between exploding gradients at near converged models and early on exploding grads + if verbose: + print("Exploding gradient detected. Resuming previous model state and decaying learning rate") + model.load_state_dict(saved_model_state_dict) + saved_optimizer_state_dict['param_groups'][0]['lr'] = lr*0.5 + optimizer.load_state_dict(saved_optimizer_state_dict) + failures += 1 # if + if failures > 2: + break + else: + continue + else: + break + if val_error_rmse != val_error_rmse: # detect NaN + break + if ((prev_loss < 1.0) and (precision == 32)): # if 32 bit precision and model is giving very high accuracy, kill so the accuracy does not go beyond 32 bit precision + break + prev_loss = val_error_rmse * 1.0 # save previous loss to track improvement + + # Periodically save model state so we can reset under instability/overfitting/performance plateau + if epoch % 50 == 0: + saved_model_state_dict = copy.deepcopy(model.state_dict()) + saved_optimizer_state_dict = copy.deepcopy(optimizer.state_dict()) + + with torch.no_grad(): + test_pred = model(self.Xtest) + test_loss = metric(test_pred, self.ytest) + test_error_rmse = np.sqrt(test_loss.item() * loss_descaler) * hartree2cm + val_pred = model(self.Xvalid) + val_loss = metric(val_pred, self.yvalid) + val_error_rmse = np.sqrt(val_loss.item() * loss_descaler) * hartree2cm + full_pred = model(self.X) + full_loss = metric(full_pred, self.y) + full_error_rmse = np.sqrt(full_loss.item() * loss_descaler) * hartree2cm + print("HF: Test set RMSE (cm-1): {:5.2f} Validation set RMSE (cm-1): {:5.2f} Full dataset RMSE (cm-1): {:5.2f}".format(test_error_rmse, val_error_rmse, full_error_rmse)) + + if return_model: + return model, test_error_rmse, val_error_rmse, full_error_rmse + else: + return test_error_rmse, val_error_rmse + diff --git a/peslearn/ml/model.py b/peslearn/ml/model.py index 4772074..00674ae 100644 --- a/peslearn/ml/model.py +++ b/peslearn/ml/model.py @@ -55,10 +55,20 @@ def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, t self.raw_Xvalid = self.validdata.values[:, :-1] self.raw_yvalid = self.validdata.values[:,-1].reshape(-1,1) + self.dataset = data.sort_values("E") self.n_datapoints = self.dataset.shape[0] - self.raw_X = self.dataset.values[:, :-1] - self.raw_y = self.dataset.values[:,-1].reshape(-1,1) + idx = 0 # My code I think... + for i in self.dataset: + if 'E' in i: + idx += 1 + if idx > 1 and False: + self.raw_X = self.dataset.values[:,:-idx] + self.raw_y = self.dataset.values[:,-idx].reshape(-1,1) + self.raw_y_l = self.dataset.values[:,-1].reshape(-1,1) + else: + self.raw_X = self.dataset.values[:,:-1] + self.raw_y = self.dataset.values[:,-1].reshape(-1,1) self.input_obj = input_obj self.pip = False @@ -132,17 +142,12 @@ def interpret_dataset(self, path): #try: # data = pd.read_csv(path, sep=None) return data - - @abstractmethod def build_model(self): pass - @abstractmethod def save_model(self): pass - @abstractmethod def preprocess(self): pass - @abstractmethod def split_train_test(self): pass diff --git a/peslearn/ml/neural_network.py b/peslearn/ml/neural_network.py index 44e4d29..947c212 100644 --- a/peslearn/ml/neural_network.py +++ b/peslearn/ml/neural_network.py @@ -15,7 +15,7 @@ from sklearn.model_selection import train_test_split from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials, space_eval from .preprocessing_helper import sort_architectures - +from copy import deepcopy torch.set_printoptions(precision=15) @@ -97,7 +97,8 @@ def optimize_model(self): self.hyperopt_trials = Trials() self.itercount = 1 if self.input_obj.keywords['rseed']: - rstate = np.random.RandomState(self.input_obj.keywords['rseed']) + rstate = np.random.default_rng(self.input_obj.keywords['rseed']) + #rstate = np.random.RandomState(self.input_obj.keywords['rseed']) else: rstate = None best = fmin(self.hyperopt_model, @@ -131,7 +132,6 @@ def optimize_model(self): print("Fine-tuning final model...") model, test_error, val_error, full_error = self.build_model(self.optimal_hyperparameters, maxit=5000, val_freq=1, es_patience=100, opt='lbfgs', tol=0.1, decay=True, verbose=True,precision=precision,return_model=True) performance = [test_error, val_error, full_error] - self.test_error = test_error print("Model optimization complete. Saving final model...") self.save_model(self.optimal_hyperparameters, model, performance) @@ -189,6 +189,7 @@ def split_train_test(self, params, validation_size=None, precision=32): self.Xvalid : validation input data, transformed self.yvalid : validation output data, transformed """ + old_raw_X = deepcopy(self.raw_X) self.X, self.y, self.Xscaler, self.yscaler = self.preprocess(params, self.raw_X, self.raw_y) if self.sampler == 'user_supplied': self.Xtr = self.transform_new_X(self.raw_Xtr, params, self.Xscaler) @@ -211,6 +212,28 @@ def split_train_test(self, params, validation_size=None, precision=32): self.Xtest = self.X[self.new_test_indices] self.ytest = self.y[self.new_test_indices] + #self.Xtmp = self.X[self.test_indices] + #self.ytmp = self.y[self.test_indices] + #if validation_size: + # self.Xvalid, self.Xtest, self.yvalid, self.ytest = train_test_split(self.Xtmp, + # self.ytmp, + # train_size = validation_size, + # random_state=42) + + ## temporary implementation: structure based validation set sample + #if validation_size: + # data = np.hstack((self.Xtmp, self.ytmp)) + # col = [str(i) for i in range(data.shape[1])] + # col[-1] = 'E' + # df = pd.DataFrame(data, columns=col) + # df.columns.values[-1] = 'E' + # sample = DataSampler(df, validation_size) + # sample.structure_based() + # validation_indices, test_indices = sample.get_indices() + # self.Xvalid = self.Xtmp[validation_indices] + # self.yvalid = self.ytmp[validation_indices] + # self.Xtest = self.Xtmp[test_indices] + # self.ytest = self.ytmp[test_indices] else: raise Exception("Please specify a validation set size for Neural Network training.") @@ -240,6 +263,7 @@ def get_optimizer(self, opt_type, mdata, lr=0.1): rate = lr if opt_type == 'lbfgs': #optimizer = torch.optim.LBFGS(mdata, lr=rate, max_iter=20, max_eval=None, tolerance_grad=1e-5, tolerance_change=1e-9, history_size=100) # Defaults + #optimizer = torch.optim.LBFGS(mdata, lr=rate, max_iter=100, max_eval=None, tolerance_grad=1e-10, tolerance_change=1e-14, history_size=200) optimizer = torch.optim.LBFGS(mdata, lr=rate, max_iter=20, max_eval=None, tolerance_grad=1e-8, tolerance_change=1e-12, history_size=100) if opt_type == 'adam': optimizer = torch.optim.Adam(mdata, lr=rate) @@ -310,12 +334,13 @@ def build_model(self, params, maxit=1000, val_freq=10, es_patience=2, opt='lbfgs decay_attempts = 0 prev_best = None decay_start = False - + #labmda = 1e-4 for epoch in range(1,maxit): def closure(): optimizer.zero_grad() y_pred = model(self.Xtr) - loss = torch.sqrt(metric(y_pred, self.ytr)) # passing RMSE instead of MSE improves precision IMMENSELY + #l2_norm = sum(p.pow(2.0).sum() for p in model.parameters()) + loss = torch.sqrt(metric(y_pred, self.ytr)) #+ labmda*l2_norm # passing RMSE instead of MSE improves precision IMMENSELY loss.backward() return loss optimizer.step(closure) diff --git a/peslearn/ml/preprocessing_helper.py b/peslearn/ml/preprocessing_helper.py index 2dd73be..e71e184 100644 --- a/peslearn/ml/preprocessing_helper.py +++ b/peslearn/ml/preprocessing_helper.py @@ -100,9 +100,12 @@ def sort_architectures(layers, inp_dim): size += out_dim * struct[-1] sizes.append(size) sorted_indices = np.argsort(sizes).tolist() - layers = np.asarray(layers) - layers = layers[sorted_indices].tolist() - return layers + print(layers) + print(sorted_indices) + #layers = np.array(layers) + #layers = layers[sorted_indices].tolist() + layers2 = [layers[i] for i in sorted_indices] + return layers2 diff --git a/peslearn/ml/svigp.py b/peslearn/ml/svigp.py new file mode 100644 index 0000000..8db49f5 --- /dev/null +++ b/peslearn/ml/svigp.py @@ -0,0 +1,92 @@ +import numpy as np +import torch +import gpytorch +from .gpytorch_gpr import GaussianProcess +import itertools +import gc + +class SVI(gpytorch.models.ApproximateGP): + def __init__(self, train_x, train_y, inducing_points): + variational_distribution = gpytorch.variational.TrilNaturalVariationalDistribution(inducing_points.size(0)) + variational_strategy = gpytorch.variational.VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) + #variational_strategy = gpytorch.variational.CiqVariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) + super(SVI, self).__init__(variational_strategy) + self.mean = gpytorch.means.ConstantMean() + self.covar = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims = train_x.size(1))) + def forward(self, x): + mean_x = self.mean(x) + covar_x = self.covar(x) + #np.savetxt('/home/smg13363/GPR_PES/gpytorch_test_space/spgp/benchmarks/array.dat', covar_x.detach().numpy()) + return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + +class SVIGP(GaussianProcess): + def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, train_path=None, test_path=None, epochs=100, num_inducing=50, batchsize=100): + super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path) + torch.set_default_tensor_type(torch.DoubleTensor) + gpytorch.settings.verbose_linalg(True) + gpytorch.settings.tridiagonal_jitter(1e-5) + torch.set_default_dtype(torch.float64) + gpytorch.settings.lazily_evaluate_kernels(False) + self.epochs = epochs + self.num_inducing = num_inducing + self.batchsize = batchsize + + def build_model(self, params, nrestarts=10, maxiter=10000, seed=0): + print("Hyperparameters: ", params) + self.split_train_test(params) + np.random.seed(seed) # make GPy deterministic for a given hyperparameter config + #TODO: ARD + + #epochs = 1000 + #self.num_inducing = 100 + #self.batchsize = 300 + self.Z = self.X[:self.num_inducing,:] + #self.Z = torch.rand(self.Xtr.size(0), self.num_inducing) + #self.Z = self.Xtr[np.random.choice(len(self.Xtr), self.num_inducing, replace=False),:] + #scale_rand = 1e-4 + #ytritty = self.ytr + scale_rand * torch.Tensor(np.random.rand(self.ytr.size(0))-0.5).unsqueeze(dim=1) + + #train_ds = torch.utils.data.TensorDataset(self.Xtr, ytritty) + train_ds = torch.utils.data.TensorDataset(self.Xtr, self.ytr) + train_loader = torch.utils.data.DataLoader(train_ds, batch_size = self.batchsize, shuffle=True) + + self.likelihood = gpytorch.likelihoods.GaussianLikelihood() + self.model = SVI(self.Xtr, self.ytr, inducing_points = self.Z) + + #cuda = 'cuda' + #self.Xtr = self.Xtr.cuda() + #self.ytr = self.ytr.cuda() + #self.model = self.model.to(cuda) + #self.likelihood = self.likelihood.to(cuda) + + self.model.train() + self.likelihood.train() + opt_ngd = gpytorch.optim.NGD(self.model.variational_parameters(), num_data=self.ytr.size(0), lr=0.01) + opt_hyp = torch.optim.Adam([{'params':self.model.parameters()}, {'params':self.likelihood.parameters()}], lr=0.01) + mll = gpytorch.mlls.VariationalELBO(self.likelihood, self.model, num_data=self.ytr.size(0)) + + for i in range(self.epochs): + print(f'\nEpoch {i}/{self.epochs}\n') + for x_batch, y_batch in train_loader: + #x_batch = x_batch.to(cuda) + #y_batch = y_batch.to(cuda) + opt_ngd.zero_grad() + opt_hyp.zero_grad() + out = self.model(x_batch) + loss = -mll(out, y_batch.squeeze()) + loss.backward() + opt_ngd.step() + opt_hyp.step() + #if i % 5 == 0: + # self.model.eval() + # self.likelihood.eval() + # print(f'\nEpoch {i}/{self.epochs}\n') + # self.vet_model(self.model) + # self.model.train() + # self.likelihood.train() + print('\nTraining Done\n') + self.model.eval() + self.likelihood.eval() + gc.collect(2) #fixes some memory leak issues with certain BLAS configs + + diff --git a/setup.py b/setup.py index 4828bd7..38e9ec7 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,15 @@ license='BSD-3C', packages=setuptools.find_packages(), install_requires=[ - 'numpy>=1.7','GPy>=1.9','scikit-learn>=0.20','pandas>=0.24','hyperopt>=0.1.1','cclib>=1.6', 'torch>=1.0.1' + 'numpy>=1.7', + 'GPy>=1.9', + 'scikit-learn>=0.20', + 'pandas>=0.24', + 'hyperopt>=0.1.1', + 'cclib>=1.6', + 'matplotlib==3.0.3', + 'torch>=1.0.1', + 'gpytorch>=1.9.0' ], extras_require={ 'docs': [ diff --git a/tests/grad_hess/sandbox_1d.py b/tests/grad_hess/sandbox_1d.py new file mode 100644 index 0000000..26520ac --- /dev/null +++ b/tests/grad_hess/sandbox_1d.py @@ -0,0 +1,75 @@ +import numpy as np +import torch +import torch.nn as nn +from torch import autograd +import matplotlib.pyplot as plt + +# x**2 +x_dat = torch.tensor([-5.0,-4.0,-3.0,-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0], requires_grad=True) +x_dat = x_dat[:,None] +print(x_dat.shape) +y_dat = torch.tensor([25.0,16.0,9.0,4.0,1.0,0.0,1.0,4.0,9.0,16.0,25.0]).reshape(-1,1) +grad = torch.tensor([-10.0,-8.0,-6.0,-4.0,-2.0,0.0,2.0,4.0,6.0,8.0,10.0]).reshape(-1,1) +hess = torch.tensor([2.0 for i in range(11)]).reshape(-1,1) +print(hess) +class NeuralNetwork(nn.Module): + def __init__(self): + super().__init__() + self.flatten = nn.Flatten() + self.linear_relu_stack = nn.Sequential( + nn.Linear(1, 10), + nn.Tanh(), + nn.Linear(10, 10), + nn.Tanh(), + nn.Linear(10, 1) + ) + + def forward(self, x): + #x = self.flatten(x) + logits = self.linear_relu_stack(x) + return logits + +model = NeuralNetwork() +optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) +def train(x, y, grad, model, optimizer, der=0): + model.train() + + # Compute prediction error + ypred = model(x) + gradspred, = autograd.grad(ypred, x, + grad_outputs=ypred.data.new(ypred.shape).fill_(1), + create_graph=True) + hesspred, = autograd.grad(gradspred, x, grad_outputs=ypred.data.new(ypred.shape).fill_(1), create_graph=True) + if der == 0: + loss = torch.mean((y - ypred) ** 2) + elif der == 1: + loss = torch.mean((y - ypred) ** 2 + (gradspred - grad)) + elif der == 2: + loss = torch.mean((y - ypred) ** 2 + 1.0*(gradspred - grad) ** 2 + 1.0*(hess - hesspred)**2) + # Backpropagation + loss.backward() + optimizer.step() + optimizer.zero_grad() + return loss.item(), gradspred, hesspred + #loss = loss.item() + #print(f"loss: {loss:>7f}") + +epochs = 5000 +for t in range(epochs): + err, gradspred, hesspred = train(x_dat, y_dat, grad, model, optimizer, der = 2) + if (t+1)%5000 == 0: + print(f"Epoch {t+1}\n-------------------------------") + print(err) + print(gradspred) + print(hesspred) +print("Done!") + +xs = np.linspace(-10,10,500, dtype=float) +xs_t = torch.tensor(xs, dtype=float, requires_grad=False) +xs_t = xs_t[:,None] +y_pred = model(xs_t.float()) + +plt.figure() +plt.plot(x_dat.detach().numpy(), y_dat.numpy(), "ko") +plt.plot(xs, y_pred.detach().numpy(), "b-") +plt.show() diff --git a/tests/grad_hess/sandbox_2d.py b/tests/grad_hess/sandbox_2d.py new file mode 100644 index 0000000..d3458c6 --- /dev/null +++ b/tests/grad_hess/sandbox_2d.py @@ -0,0 +1,178 @@ +import numpy as np +import torch +import torch.nn as nn +from torch import autograd +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import axes3d + +# x +#x_dat = torch.tensor([-5.0,-4.0,-3.0,-2.0,-1.0,0.0,1.0,2.0,3.0,4.0,5.0], requires_grad=True) +#x_dat = x_dat[:,None] +#print(x_dat.shape) +#y_dat = torch.tensor([25.0,16.0,9.0,4.0,1.0,0.0,1.0,4.0,9.0,16.0,25.0]).reshape(-1,1) +#grad = torch.tensor([-10.0,-8.0,-6.0,-4.0,-2.0,0.0,2.0,4.0,6.0,8.0,10.0]).reshape(-1,1) +#hess = torch.tensor([2.0 for i in range(11)]).reshape(-1,1) +#print(hess) + +def f(x1, x2): + return (3*(x1*(x2**2)) + (x1**3) + x2)/400 + +def df_x1(x1, x2): + return (2*(x2**2) + 9*(x1**2))/400 + +def df_x2(x1, x2): + return (6*x1*x2+1)/400 + +def d2f_x12(x1, x2): + return 18*x1/400 + +def d2f_x22(x1, x2): + return 6*x1/400 + +def d2f_x1x2(x1, x2): + return 6*x2/400 + +g = np.linspace(-5,5,51) +print(g) +X, Y = np.meshgrid(g, g) +print(np.shape(X)) +Z = f(X, Y) +#fig = plt.figure() +#ax = plt.axes(projection='3d') +#ax.plot_wireframe(X, Y, Z) +#plt.show() + +ndensity = 5 +g1 = np.linspace(-5,5,ndensity) +X1, Y1 = np.meshgrid(g1, g1) +Z1 = f(X1, Y1).flatten() +#x_dat = torch.tensor() +x_dat = torch.tensor(np.column_stack((X1.flatten(), Y1.flatten())), dtype=torch.float32, requires_grad=True) +y_dat = torch.tensor(f(X1, Y1).flatten(), dtype=torch.float32, requires_grad=False).reshape(-1,1) +grad = torch.tensor(np.column_stack((df_x1(X1, Y1).flatten(), df_x2(X1, Y1).flatten())), dtype=torch.float32) +mn4 = np.column_stack((d2f_x12(X1, Y1).flatten(), d2f_x1x2(X1, Y1).flatten(), d2f_x1x2(X1, Y1).flatten(), d2f_x22(X1, Y1).flatten())) +#print(mn4.reshape((25,2,2))) +hess = torch.tensor(mn4.reshape((ndensity**2,2,2)), dtype=torch.float32) +print(hess.size()) + +class NeuralNetwork(nn.Module): + def __init__(self): + super().__init__() + self.flatten = nn.Flatten() + self.linear_relu_stack = nn.Sequential( + nn.Linear(2, 10), + nn.Tanh(), + nn.Linear(10, 10), + nn.Tanh(), + nn.Linear(10, 1) + ) + + def forward(self, x): + #x = self.flatten(x) + logits = self.linear_relu_stack(x) + return logits + +model = NeuralNetwork() +optimizer = torch.optim.LBFGS(model.parameters(), lr=1e-3) +def train(x, y, grad, hess, model, optimizer, der=0): + model.train() + + # Compute prediction error + def closure(): + optimizer.zero_grad() + ypred = model(x) + gradspred, = autograd.grad(ypred, x, + grad_outputs=ypred.data.new(ypred.shape).fill_(1), + create_graph=True) + s = gradspred.size() + hesspred = torch.zeros(s[0], s[1], s[1]) + #for i in range(s[1]): + # print(autograd.grad(gradspred[:,i], x, grad_outputs=torch.ones(25), create_graph=True)) + # hesspred[:,i,:], = autograd.grad(gradspred[:,i], x, grad_outputs=gradspred.data.new(gradspred.shape).fill_(1), create_graph=True) + hesspredi = [autograd.grad(gradspred[:,t], x, grad_outputs=gradspred.data.new(s[0]).fill_(1), create_graph=True)[0] for t in range(s[1])] + hesspred = torch.stack(hesspredi, dim=2) + #print(hesspred.size()) + #beans = (hess - hesspred)**2 + #beebus = (gradspred - grad)**2 + #seebus = (y-ypred)**2 + #print(seebus.size()) + #print(beebus.size()) + #print(torch.sum(beans, dim=2).size()) + if der == 0: + loss = torch.sqrt(torch.mean((y - ypred) ** 2)) + elif der == 1: + loss = torch.sqrt(torch.mean((y - ypred) ** 2 + (gradspred - grad)**2)) + elif der == 2: + loss = torch.sqrt(torch.mean((y - ypred) ** 2 + torch.sum(1.0*(gradspred - grad) ** 2,dim=1).reshape(-1,1) + torch.sum(1.0*(hess - hesspred)**2, dim=(1,2)).reshape(-1,1))) + # Backpropagation + loss.backward() + return loss + + optimizer.step(closure) + #optimizer.zero_grad() + #return loss.item(), gradspred, hesspred + #loss = loss.item() + #print(f"loss: {loss:>7f}") + +def test(x, y, grad, hess, model): + ypred = model(x) + gradspred, = autograd.grad(ypred, x, + grad_outputs=ypred.data.new(ypred.shape).fill_(1), + create_graph=True) + s = gradspred.size() + hesspred = torch.zeros(s[0], s[1], s[1]) + hesspredi = [autograd.grad(gradspred[:,t], x, grad_outputs=gradspred.data.new(s[0]).fill_(1), create_graph=True)[0] for t in range(s[1])] + hesspred = torch.stack(hesspredi, dim=2) + loss = torch.sqrt(torch.mean((y - ypred) ** 2)) + gradloss = torch.sqrt(torch.mean(torch.sum(1.0*(gradspred - grad) ** 2,dim=1).reshape(-1,1))) + hessloss = torch.sqrt(torch.mean(torch.sum(1.0*(hess - hesspred)**2, dim=(1,2)).reshape(-1,1))) + + g1 = np.linspace(-5,5,51) + X1, Y1 = np.meshgrid(g1, g1) + #Z1 = f(X1, Y1).flatten() + #x_dat = torch.tensor() + x_dat = torch.tensor(np.column_stack((X1.flatten(), Y1.flatten())), dtype=torch.float32, requires_grad=True) + y_dat = torch.tensor(f(X1, Y1).flatten(), dtype=torch.float32, requires_grad=False).reshape(-1,1) + gradt = torch.tensor(np.column_stack((df_x1(X1, Y1).flatten(), df_x2(X1, Y1).flatten())), dtype=torch.float32) + mn4t = np.column_stack((d2f_x12(X1, Y1).flatten(), d2f_x1x2(X1, Y1).flatten(), d2f_x1x2(X1, Y1).flatten(), d2f_x22(X1, Y1).flatten())) + #print(mn4.reshape((25,2,2))) + hesst = torch.tensor(mn4t.reshape((51**2,2,2)), dtype=torch.float32) + ypredt = model(x_dat) + gradspred, = autograd.grad(ypredt, x_dat, + grad_outputs=ypredt.data.new(ypredt.shape).fill_(1), + create_graph=True) + s = gradspred.size() + hesspred = torch.zeros(s[0], s[1], s[1]) + hesspredi = [autograd.grad(gradspred[:,t], x_dat, grad_outputs=gradspred.data.new(s[0]).fill_(1), create_graph=True)[0] for t in range(s[1])] + hesspred = torch.stack(hesspredi, dim=2) + test_err = torch.sqrt(torch.mean((y_dat - ypredt) ** 2)) + test_grad = torch.sqrt(torch.mean(torch.sum(1.0*(gradspred - gradt) ** 2,dim=1).reshape(-1,1))) + test_hess = torch.sqrt(torch.mean(torch.sum(1.0*(hesst - hesspred)**2, dim=(1,2)).reshape(-1,1))) + + return test_err, test_grad, test_hess, loss, gradloss, hessloss + +epochs = 1000 +for t in range(epochs): + #err, gradspred, hesspred = train(x_dat, y_dat, grad, model, optimizer, der = 2) + train(x_dat, y_dat, grad, hess, model, optimizer, der=0) + #if (t+1)%1000 == 0: + # print(f"Epoch {t+1}\n-------------------------------") + # print(err) + # print(gradspred) + # print(hesspred) +print("Done!") +test_err, test_grad, test_hess, loss, gradloss, hessloss = test(x_dat, y_dat, grad, hess, model) +print(loss.item(), gradloss.item(), hessloss.item()) +print(test_err.item(), test_grad.item(), test_hess.item()) +g = np.linspace(-5,5,51) +Xt, Yt = np.meshgrid(g, g) +X = Xt.flatten() +Y = Yt.flatten() +xdat = torch.tensor(np.column_stack((X,Y)), dtype=torch.float32) +Z = model(xdat) +#print(X) +fig = plt.figure() +ax = plt.axes(projection='3d') +ax.scatter(x_dat[:,0].detach().numpy(), x_dat[:,1].detach().numpy(), y_dat.numpy(), "ko") +ax.plot_wireframe(Xt, Yt, Z.detach().numpy().reshape(51,51)) +plt.show()