diff --git a/hidimstat/test/test_cpi.py b/hidimstat/test/test_cpi.py index c7ce617..83da78d 100644 --- a/hidimstat/test/test_cpi.py +++ b/hidimstat/test/test_cpi.py @@ -1,6 +1,8 @@ import numpy as np import pandas as pd +import pytest from sklearn.base import clone +from sklearn.exceptions import NotFittedError from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split @@ -91,3 +93,47 @@ def test_cpi(linear_scenario): groups=None, ) vim = cpi.score(X_test, y_test_clf) + + +def test_raises_value_error( + linear_scenario, +): + test_str = "unknown method" + with pytest.raises(ValueError): + CPI( + estimator=LinearRegression(), + imputation_model=LinearRegression(), + method=test_str, + ) + X, y, beta = linear_scenario + + with pytest.raises(NotFittedError): + cpi = CPI( + estimator=LinearRegression(), + imputation_model=LinearRegression(), + method="predict", + ) + cpi.predict(X) + with pytest.raises(NotFittedError): + cpi = CPI( + estimator=LinearRegression(), + imputation_model=LinearRegression(), + method="predict", + ) + cpi.score(X, y) + with pytest.raises(ValueError): + fitted_model = LinearRegression().fit(X, y) + cpi = CPI( + estimator=fitted_model, + imputation_model=LinearRegression(), + method="predict", + ) + cpi.predict(X) + with pytest.raises(ValueError): + fitted_model = LinearRegression().fit(X, y) + cpi = CPI( + estimator=fitted_model, + imputation_model=LinearRegression(), + method="predict", + ) + cpi.score(X, y) diff --git a/hidimstat/utils.py b/hidimstat/utils.py index a15c4d8..d0d9cd1 100644 --- a/hidimstat/utils.py +++ b/hidimstat/utils.py @@ -1,12 +1,14 @@ import copy import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F + +# import torch +# import torch.nn as nn +# import torch.nn.functional as F from sklearn.metrics import log_loss, mean_squared_error from sklearn.preprocessing import StandardScaler -from torchmetrics import Accuracy + +# from torchmetrics import Accuracy def quantile_aggregation(pvals, gamma=0.5, gamma_min=0.05, adaptive=False): @@ -282,603 +284,603 @@ def _check_vim_predict_method(method): ) -def sigmoid(x): - """ - This function applies the sigmoid function element-wise to the input array x - """ - return 1 / (1 + np.exp(-x)) - - -def softmax(x): - """ - This function applies the softmax function element-wise to the input array x - """ - # Ensure numerical stability by subtracting the maximum value of x from each element of x - # This prevents overflow errors when exponentiating large numbers - x = x - np.max(x, axis=-1, keepdims=True) - exp_x = np.exp(x) - return exp_x / np.sum(exp_x, axis=-1, keepdims=True) - - -def relu(x): - """ - This function applies the relu function element-wise to the input array x - """ - return (abs(x) + x) / 2 - - -def relu_(x): - """ - This function applies the derivative of the relu function element-wise - to the input array x - """ - return (x > 0) * 1 - - -def convert_predict_proba(list_probs): - """ - If the classification is done using a one-hot encoded variable, the list of - probabilites will be a list of lists for the probabilities of each of the categories. - This function takes the probabilities of having each category (=1 with binary) and stack - them into one ndarray. - """ - if len(list_probs.shape) == 3: - list_probs = np.array(list_probs)[..., 1].T - return list_probs - - -def ordinal_encode(y): - """ - This function encodes the ordinal variable with a special gradual encoding storing also - the natural order information. - """ - list_y = [] - for y_col in range(y.shape[-1]): - # Retrieve the unique values - unique_vals = np.unique(y[:, y_col]) - # Mapping each unique value to its corresponding index - mapping_dict = {} - for i, val in enumerate(unique_vals): - mapping_dict[val] = i + 1 - # create a zero-filled array for the ordinal encoding - y_ordinal = np.zeros((len(y[:, y_col]), len(set(y[:, y_col])))) - # set the appropriate indices to 1 for each ordinal value and all lower ordinal values - for ind_el, el in enumerate(y[:, y_col]): - y_ordinal[ind_el, np.arange(mapping_dict[el])] = 1 - list_y.append(y_ordinal[:, 1:]) - - return list_y - - -def sample_predictions(predictions, random_state=None): - """ - This function samples from the same leaf node of the input sample - in both the regression and the classification cases - """ - rng = np.random.RandomState(random_state) - # print(predictions[..., rng.randint(predictions.shape[2]), :]) - # print(predictions.shape) - # exit(0) - return predictions[..., rng.randint(predictions.shape[2]), :] - - -def joblib_ensemble_dnnet( - X, - y, - problem_type="regression", - activation_outcome=None, - list_continuous=None, - list_grps=None, - sampling_with_repetition=False, - split_percentage=0.8, - group_stacking=False, - input_dimensions=None, - n_epoch=200, - batch_size=32, - beta1=0.9, - beta2=0.999, - lr=1e-3, - l1_weight=1e-2, - l2_weight=1e-2, - epsilon=1e-8, - random_state=None, -): - """ - This function implements the ensemble learning of the sub-DNN models - - Parameters - ---------- - X : {array-like, sparse matrix} of shape (n_train_samples, n_features) - The input samples. - y : array-like of shape (n_train_samples,) or (n_train_samples, n_outputs) - The target values (class labels in classification, real numbers in - regression). - problem_type : str, default='regression' - A classification or a regression problem. - activation_outcome : str, default=None - The activation function to apply in the outcome layer, "softmax" for - classification and "sigmoid" for both ordinal and binary cases. - list_continuous : list, default=None - The list of continuous variables. - list_grps : list of lists, default=None - A list collecting the indices of the groups' variables - while applying the stacking method. - sampling_with_repetition : bool, default=True - Application of sampling_with_repetition sampling for the training set. - split_percentage : float, default=0.8 - The training/validation cut for the provided data. - group_stacking : bool, default=False - Apply the stacking-based method for the provided groups. - input_dimensions : list, default=None - The cumsum of inputs after the linear sub-layers. - n_epoch : int, default=200 - The number of epochs for the DNN learner(s). - batch_size : int, default=32 - The number of samples per batch for training. - beta1 : float, default=0.9 - The exponential decay rate for the first moment estimates. - beta2 : float, default=0.999 - The exponential decay rate for the second moment estimates. - lr : float, default=1e-3 - The learning rate. - l1_weight : float, default=1e-2 - The L1-regularization paramter for weight decay. - l2_weight : float, default=0 - The L2-regularization paramter for weight decay. - epsilon : float, default=1e-8 - A small constant added to the denominator to prevent division by zero. - random_state : int, default=2023 - Fixing the seeds of the random generator. - - Returns - ------- - current_model : list - The parameters of the sub-DNN model - scaler_x : list of Scikit-learn StandardScalers - The scalers for the continuous input variables. - scaler_y : Scikit-learn StandardScaler - The scaler for the continuous output variable. - pred_v : ndarray - The predictions of the sub-DNN model. - loss : float - The loss score of the sub-DNN model. - """ - - pred_v = np.empty(X.shape[0]) - # Sampling and Train/Validate splitting - ( - X_train_scaled, - y_train_scaled, - X_validation_scaled, - y_validation_scaled, - X_scaled, - y_validation, - scaler_x, - scaler_y, - valid_ind, - ) = create_X_y( - X, - y, - sampling_with_repetition=sampling_with_repetition, - split_percentage=split_percentage, - problem_type=problem_type, - list_continuous=list_continuous, - random_state=random_state, - ) - - current_model = dnn_net( - X_train_scaled, - y_train_scaled, - X_validation_scaled, - y_validation_scaled, - problem_type=problem_type, - n_epoch=n_epoch, - batch_size=batch_size, - beta1=beta1, - beta2=beta2, - lr=lr, - l1_weight=l1_weight, - l2_weight=l2_weight, - epsilon=epsilon, - list_grps=list_grps, - group_stacking=group_stacking, - input_dimensions=input_dimensions, - random_state=random_state, - ) - - if not group_stacking: - X_scaled_n = X_scaled.copy() - else: - X_scaled_n = np.zeros((X_scaled.shape[0], input_dimensions[-1])) - for grp_ind in range(len(list_grps)): - n_layer_stacking = len(current_model[3][grp_ind]) - 1 - curr_pred = X_scaled[:, list_grps[grp_ind]].copy() - for ind_w_b in range(n_layer_stacking): - if ind_w_b == 0: - curr_pred = relu( - X_scaled[:, list_grps[grp_ind]].dot( - current_model[3][grp_ind][ind_w_b] - ) - + current_model[4][grp_ind][ind_w_b] - ) - else: - curr_pred = relu( - curr_pred.dot(current_model[3][grp_ind][ind_w_b]) - + current_model[4][grp_ind][ind_w_b] - ) - X_scaled_n[ - :, - list( - np.arange(input_dimensions[grp_ind], input_dimensions[grp_ind + 1]) - ), - ] = ( - curr_pred.dot(current_model[3][grp_ind][n_layer_stacking]) - + current_model[4][grp_ind][n_layer_stacking] - ) - - n_layer = len(current_model[0]) - 1 - for j in range(n_layer): - if j == 0: - pred = relu(X_scaled_n.dot(current_model[0][j]) + current_model[1][j]) - else: - pred = relu(pred.dot(current_model[0][j]) + current_model[1][j]) - - pred = pred.dot(current_model[0][n_layer]) + current_model[1][n_layer] - - if problem_type not in ("classification", "binary"): - if problem_type != "ordinal": - pred_v = pred * scaler_y.scale_ + scaler_y.mean_ - else: - pred_v = activation_outcome[problem_type](pred) - loss = np.std(y_validation) ** 2 - mean_squared_error( - y_validation, pred_v[valid_ind] - ) - else: - pred_v = activation_outcome[problem_type](pred) - loss = log_loss( - y_validation, np.ones(y_validation.shape) * np.mean(y_validation, axis=0) - ) - log_loss(y_validation, pred_v[valid_ind]) - - return (current_model, scaler_x, scaler_y, pred_v, loss) - - -def initialize_weights(layer): - if isinstance(layer, nn.Linear): - layer.weight.data = (layer.weight.data.uniform_() - 0.5) * 0.2 - layer.bias.data = (layer.bias.data.uniform_() - 0.5) * 0.1 - - -def Dataset_Loader(X, y, shuffle=False, batch_size=50): - if y.shape[-1] == 2: - y = y[:, [1]] - dataset = torch.utils.data.TensorDataset( - torch.from_numpy(X).float(), torch.from_numpy(y).float() - ) - - loader = torch.utils.data.DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle - ) - return loader - - -class DNN(nn.Module): - """ - Feedfoward Neural Network with 4 hidden layers - """ - - def __init__( - self, input_dim, group_stacking, list_grps, output_dimension, problem_type - ): - super().__init__() - if problem_type == "classification": - self.accuracy = Accuracy(task="multiclass", num_classes=output_dimension) - else: - self.accuracy = Accuracy(task="binary") - self.list_grps = list_grps - self.group_stacking = group_stacking - if group_stacking: - self.layers_stacking = nn.ModuleList( - [ - nn.Linear( - in_features=len(grp), - out_features=input_dim[grp_ind + 1] - input_dim[grp_ind], - ) - # nn.Sequential( - # nn.Linear( - # in_features=len(grp), - # # out_features=max(1, int(0.1 * len(grp))), - # out_features=input_dim[grp_ind + 1] - # - input_dim[grp_ind], - # ), - # nn.ReLU(), - # nn.Linear( - # in_features=max(1, int(0.1 * len(grp))), - # out_features=input_dim[grp_ind + 1] - # - input_dim[grp_ind], - # ), - # nn.ReLU(), - # nn.Linear( - # in_features=max(1, int(0.1 * len(grp))), - # out_features=input_dim[grp_ind + 1] - # - input_dim[grp_ind], - # ), - # ) - for grp_ind, grp in enumerate(list_grps) - ] - ) - input_dim = input_dim[-1] - self.layers = nn.Sequential( - # hidden layers - nn.Linear(input_dim, 50), - nn.ReLU(), - nn.Linear(50, 40), - nn.ReLU(), - nn.Linear(40, 30), - nn.ReLU(), - nn.Linear(30, 20), - nn.ReLU(), - # output layer - nn.Linear(20, output_dimension), - ) - self.loss = 0 - - def forward(self, x): - if self.group_stacking: - list_stacking = [None] * len(self.layers_stacking) - for ind_layer, layer in enumerate(self.layers_stacking): - list_stacking[ind_layer] = layer(x[:, self.list_grps[ind_layer]]) - x = torch.cat(list_stacking, dim=1) - return self.layers(x) - - def training_step(self, batch, device, problem_type): - X, y = batch[0].to(device), batch[1].to(device) - y_pred = self(X) # Generate predictions - if problem_type == "regression": - loss = F.mse_loss(y_pred, y) - elif problem_type == "classification": - loss = F.cross_entropy(y_pred, y) # Calculate loss - else: - loss = F.binary_cross_entropy_with_logits(y_pred, y) - return loss - - def validation_step(self, batch, device, problem_type): - X, y = batch[0].to(device), batch[1].to(device) - y_pred = self(X) # Generate predictions - if problem_type == "regression": - loss = F.mse_loss(y_pred, y) - return { - "val_mse": loss, - "batch_size": len(X), - } - else: - if problem_type == "classification": - loss = F.cross_entropy(y_pred, y) # Calculate loss - else: - loss = F.binary_cross_entropy_with_logits(y_pred, y) - acc = self.accuracy(y_pred, y.int()) - return { - "val_loss": loss, - "val_acc": acc, - "batch_size": len(X), - } - - def validation_epoch_end(self, outputs, problem_type): - if problem_type in ("classification", "binary"): - batch_losses = [] - batch_accs = [] - batch_sizes = [] - for x in outputs: - batch_losses.append(x["val_loss"] * x["batch_size"]) - batch_accs.append(x["val_acc"] * x["batch_size"]) - batch_sizes.append(x["batch_size"]) - self.loss = torch.stack(batch_losses).sum().item() / np.sum( - batch_sizes - ) # Combine losses - epoch_acc = torch.stack(batch_accs).sum().item() / np.sum( - batch_sizes - ) # Combine accuracies - return {"val_loss": self.loss, "val_acc": epoch_acc} - else: - batch_losses = [x["val_mse"] * x["batch_size"] for x in outputs] - batch_sizes = [x["batch_size"] for x in outputs] - self.loss = torch.stack(batch_losses).sum().item() / np.sum( - batch_sizes - ) # Combine losses - return {"val_mse": self.loss} - - def epoch_end(self, epoch, result): - if len(result) == 2: - print( - "Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format( - epoch + 1, result["val_loss"], result["val_acc"] - ) - ) - else: - print("Epoch [{}], val_mse: {:.4f}".format(epoch + 1, result["val_mse"])) - - -def evaluate(model, loader, device, problem_type): - outputs = [model.validation_step(batch, device, problem_type) for batch in loader] - return model.validation_epoch_end(outputs, problem_type) - - -def dnn_net( - X_train, - y_train, - X_validation, - y_validation, - problem_type="regression", - n_epoch=200, - batch_size=32, - batch_size_validation=128, - beta1=0.9, - beta2=0.999, - lr=1e-3, - l1_weight=1e-2, - l2_weight=1e-2, - epsilon=1e-8, - list_grps=None, - group_stacking=False, - input_dimensions=None, - random_state=2023, - verbose=0, -): - """ - This function implements the training/validation process of the sub-DNN - models - - Parameters - ---------- - X_train : {array-like, sparse matrix} of shape (n_train_samples, n_features) - The training input samples. - y_train : {array-like} of shape (n_train_samples, ) - The training output samples. - X_validation : {array-like, sparse matrix} of shape (n_validation_samples, n_features) - The validation input samples. - y_validation : {array-like} of shape (n_validation_samples, ) - The validation output samples. - problem_type : str, default='regression' - A classification or a regression problem. - n_epoch : int, default=200 - The number of epochs for the DNN learner(s). - batch_size : int, default=32 - The number of samples per batch for training. - batch_size_validation : int, default=128 - The number of samples per batch for validation. - beta1 : float, default=0.9 - The exponential decay rate for the first moment estimates. - beta2 : float, default=0.999 - The exponential decay rate for the second moment estimates. - lr : float, default=1e-3 - The learning rate. - l1_weight : float, default=1e-2 - The L1-regularization paramter for weight decay. - l2_weight : float, default=0 - The L2-regularization paramter for weight decay. - epsilon : float, default=1e-8 - A small constant added to the denominator to prevent division by zero. - list_grps : list of lists, default=None - A list collecting the indices of the groups' variables - while applying the stacking method. - group_stacking : bool, default=False - Apply the stacking-based method for the provided groups. - input_dimensions : list, default=None - The cumsum of inputs after the linear sub-layers. - random_state : int, default=2023 - Fixing the seeds of the random generator. - verbose : int, default=0 - If verbose > 0, the fitted iterations will be printed. - """ - # Creating DataLoaders - train_loader = Dataset_Loader( - X_train, - y_train, - shuffle=True, - batch_size=batch_size, - ) - validate_loader = Dataset_Loader( - X_validation, y_validation, batch_size=batch_size_validation - ) - # Set the seed for PyTorch's random number generator - torch.manual_seed(random_state) - - # Set the seed for PyTorch's CUDA random number generator(s), if available - if torch.cuda.is_available(): - torch.cuda.manual_seed(random_state) - torch.cuda.manual_seed_all(random_state) - - # Specify whether to use GPU or CPU - # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - device = torch.device("cpu") - - if problem_type in ("regression", "binary"): - output_dimension = 1 - else: - output_dimension = y_train.shape[-1] - - # DNN model - input_dim = input_dimensions.copy() if group_stacking else X_train.shape[1] - model = DNN(input_dim, group_stacking, list_grps, output_dimension, problem_type) - model.to(device) - # Initializing weights/bias - model.apply(initialize_weights) - # Adam Optimizer - optimizer = torch.optim.Adam( - model.parameters(), lr=lr, betas=(beta1, beta2), eps=epsilon - ) - - best_loss = 1e100 - for epoch in range(n_epoch): - # Training Phase - model.train() - for batch in train_loader: - optimizer.zero_grad() - loss = model.training_step(batch, device, problem_type) - - loss.backward() - optimizer.step() - for name, param in model.named_parameters(): - if "bias" not in name: - # if name.split(".")[0] == "layers_stacking": - # param.data -= l2_weight * param.data - # else: - param.data -= ( - l1_weight * torch.sign(param.data) + l2_weight * param.data - ) - # Validation Phase - model.eval() - result = evaluate(model, validate_loader, device, problem_type) - if model.loss < best_loss: - best_loss = model.loss - dict_params = copy.deepcopy(model.state_dict()) - if verbose >= 2: - model.epoch_end(epoch, result) - - best_weight = [] - best_bias = [] - best_weight_stack = [[].copy() for _ in range(len(list_grps))] - best_bias_stack = [[].copy() for _ in range(len(list_grps))] - - for name, param in dict_params.items(): - if name.split(".")[0] == "layers": - if name.split(".")[-1] == "weight": - best_weight.append(param.numpy().T) - if name.split(".")[-1] == "bias": - best_bias.append(param.numpy()[np.newaxis, :]) - if name.split(".")[0] == "layers_stacking": - curr_ind = int(name.split(".")[1]) - if name.split(".")[-1] == "weight": - best_weight_stack[curr_ind].append(param.numpy().T) - if name.split(".")[-1] == "bias": - best_bias_stack[curr_ind].append(param.numpy()[np.newaxis, :]) - - return [ - best_weight, - best_bias, - best_loss, - best_weight_stack, - best_bias_stack, - ] - - -def compute_imp_std(pred_scores): - weights = np.array([el.shape[-2] for el in pred_scores]) - # Compute the mean of each fold over the number of observations - pred_mean = np.array([np.mean(el.copy(), axis=-2) for el in pred_scores]) - - # Weighted average - imp = np.average(pred_mean, axis=0, weights=weights) - - # Compute the standard deviation of each fold - # over the number of observations - pred_std = np.array( - [ - np.mean( - (el - imp[..., np.newaxis]) ** 2, - axis=-2, - ) - for el in pred_scores - ] - ) - std = np.sqrt(np.average(pred_std, axis=0, weights=weights) / (np.sum(weights) - 1)) - return (imp, std) +# def sigmoid(x): +# """ +# This function applies the sigmoid function element-wise to the input array x +# """ +# return 1 / (1 + np.exp(-x)) + + +# def softmax(x): +# """ +# This function applies the softmax function element-wise to the input array x +# """ +# # Ensure numerical stability by subtracting the maximum value of x from each element of x +# # This prevents overflow errors when exponentiating large numbers +# x = x - np.max(x, axis=-1, keepdims=True) +# exp_x = np.exp(x) +# return exp_x / np.sum(exp_x, axis=-1, keepdims=True) + + +# def relu(x): +# """ +# This function applies the relu function element-wise to the input array x +# """ +# return (abs(x) + x) / 2 + + +# def relu_(x): +# """ +# This function applies the derivative of the relu function element-wise +# to the input array x +# """ +# return (x > 0) * 1 + + +# def convert_predict_proba(list_probs): +# """ +# If the classification is done using a one-hot encoded variable, the list of +# probabilites will be a list of lists for the probabilities of each of the categories. +# This function takes the probabilities of having each category (=1 with binary) and stack +# them into one ndarray. +# """ +# if len(list_probs.shape) == 3: +# list_probs = np.array(list_probs)[..., 1].T +# return list_probs + + +# def ordinal_encode(y): +# """ +# This function encodes the ordinal variable with a special gradual encoding storing also +# the natural order information. +# """ +# list_y = [] +# for y_col in range(y.shape[-1]): +# # Retrieve the unique values +# unique_vals = np.unique(y[:, y_col]) +# # Mapping each unique value to its corresponding index +# mapping_dict = {} +# for i, val in enumerate(unique_vals): +# mapping_dict[val] = i + 1 +# # create a zero-filled array for the ordinal encoding +# y_ordinal = np.zeros((len(y[:, y_col]), len(set(y[:, y_col])))) +# # set the appropriate indices to 1 for each ordinal value and all lower ordinal values +# for ind_el, el in enumerate(y[:, y_col]): +# y_ordinal[ind_el, np.arange(mapping_dict[el])] = 1 +# list_y.append(y_ordinal[:, 1:]) + +# return list_y + + +# def sample_predictions(predictions, random_state=None): +# """ +# This function samples from the same leaf node of the input sample +# in both the regression and the classification cases +# """ +# rng = np.random.RandomState(random_state) +# # print(predictions[..., rng.randint(predictions.shape[2]), :]) +# # print(predictions.shape) +# # exit(0) +# return predictions[..., rng.randint(predictions.shape[2]), :] + + +# def joblib_ensemble_dnnet( +# X, +# y, +# problem_type="regression", +# activation_outcome=None, +# list_continuous=None, +# list_grps=None, +# sampling_with_repetition=False, +# split_percentage=0.8, +# group_stacking=False, +# input_dimensions=None, +# n_epoch=200, +# batch_size=32, +# beta1=0.9, +# beta2=0.999, +# lr=1e-3, +# l1_weight=1e-2, +# l2_weight=1e-2, +# epsilon=1e-8, +# random_state=None, +# ): +# """ +# This function implements the ensemble learning of the sub-DNN models + +# Parameters +# ---------- +# X : {array-like, sparse matrix} of shape (n_train_samples, n_features) +# The input samples. +# y : array-like of shape (n_train_samples,) or (n_train_samples, n_outputs) +# The target values (class labels in classification, real numbers in +# regression). +# problem_type : str, default='regression' +# A classification or a regression problem. +# activation_outcome : str, default=None +# The activation function to apply in the outcome layer, "softmax" for +# classification and "sigmoid" for both ordinal and binary cases. +# list_continuous : list, default=None +# The list of continuous variables. +# list_grps : list of lists, default=None +# A list collecting the indices of the groups' variables +# while applying the stacking method. +# sampling_with_repetition : bool, default=True +# Application of sampling_with_repetition sampling for the training set. +# split_percentage : float, default=0.8 +# The training/validation cut for the provided data. +# group_stacking : bool, default=False +# Apply the stacking-based method for the provided groups. +# input_dimensions : list, default=None +# The cumsum of inputs after the linear sub-layers. +# n_epoch : int, default=200 +# The number of epochs for the DNN learner(s). +# batch_size : int, default=32 +# The number of samples per batch for training. +# beta1 : float, default=0.9 +# The exponential decay rate for the first moment estimates. +# beta2 : float, default=0.999 +# The exponential decay rate for the second moment estimates. +# lr : float, default=1e-3 +# The learning rate. +# l1_weight : float, default=1e-2 +# The L1-regularization paramter for weight decay. +# l2_weight : float, default=0 +# The L2-regularization paramter for weight decay. +# epsilon : float, default=1e-8 +# A small constant added to the denominator to prevent division by zero. +# random_state : int, default=2023 +# Fixing the seeds of the random generator. + +# Returns +# ------- +# current_model : list +# The parameters of the sub-DNN model +# scaler_x : list of Scikit-learn StandardScalers +# The scalers for the continuous input variables. +# scaler_y : Scikit-learn StandardScaler +# The scaler for the continuous output variable. +# pred_v : ndarray +# The predictions of the sub-DNN model. +# loss : float +# The loss score of the sub-DNN model. +# """ + +# pred_v = np.empty(X.shape[0]) +# # Sampling and Train/Validate splitting +# ( +# X_train_scaled, +# y_train_scaled, +# X_validation_scaled, +# y_validation_scaled, +# X_scaled, +# y_validation, +# scaler_x, +# scaler_y, +# valid_ind, +# ) = create_X_y( +# X, +# y, +# sampling_with_repetition=sampling_with_repetition, +# split_percentage=split_percentage, +# problem_type=problem_type, +# list_continuous=list_continuous, +# random_state=random_state, +# ) + +# current_model = dnn_net( +# X_train_scaled, +# y_train_scaled, +# X_validation_scaled, +# y_validation_scaled, +# problem_type=problem_type, +# n_epoch=n_epoch, +# batch_size=batch_size, +# beta1=beta1, +# beta2=beta2, +# lr=lr, +# l1_weight=l1_weight, +# l2_weight=l2_weight, +# epsilon=epsilon, +# list_grps=list_grps, +# group_stacking=group_stacking, +# input_dimensions=input_dimensions, +# random_state=random_state, +# ) + +# if not group_stacking: +# X_scaled_n = X_scaled.copy() +# else: +# X_scaled_n = np.zeros((X_scaled.shape[0], input_dimensions[-1])) +# for grp_ind in range(len(list_grps)): +# n_layer_stacking = len(current_model[3][grp_ind]) - 1 +# curr_pred = X_scaled[:, list_grps[grp_ind]].copy() +# for ind_w_b in range(n_layer_stacking): +# if ind_w_b == 0: +# curr_pred = relu( +# X_scaled[:, list_grps[grp_ind]].dot( +# current_model[3][grp_ind][ind_w_b] +# ) +# + current_model[4][grp_ind][ind_w_b] +# ) +# else: +# curr_pred = relu( +# curr_pred.dot(current_model[3][grp_ind][ind_w_b]) +# + current_model[4][grp_ind][ind_w_b] +# ) +# X_scaled_n[ +# :, +# list( +# np.arange(input_dimensions[grp_ind], input_dimensions[grp_ind + 1]) +# ), +# ] = ( +# curr_pred.dot(current_model[3][grp_ind][n_layer_stacking]) +# + current_model[4][grp_ind][n_layer_stacking] +# ) + +# n_layer = len(current_model[0]) - 1 +# for j in range(n_layer): +# if j == 0: +# pred = relu(X_scaled_n.dot(current_model[0][j]) + current_model[1][j]) +# else: +# pred = relu(pred.dot(current_model[0][j]) + current_model[1][j]) + +# pred = pred.dot(current_model[0][n_layer]) + current_model[1][n_layer] + +# if problem_type not in ("classification", "binary"): +# if problem_type != "ordinal": +# pred_v = pred * scaler_y.scale_ + scaler_y.mean_ +# else: +# pred_v = activation_outcome[problem_type](pred) +# loss = np.std(y_validation) ** 2 - mean_squared_error( +# y_validation, pred_v[valid_ind] +# ) +# else: +# pred_v = activation_outcome[problem_type](pred) +# loss = log_loss( +# y_validation, np.ones(y_validation.shape) * np.mean(y_validation, axis=0) +# ) - log_loss(y_validation, pred_v[valid_ind]) + +# return (current_model, scaler_x, scaler_y, pred_v, loss) + + +# def initialize_weights(layer): +# if isinstance(layer, nn.Linear): +# layer.weight.data = (layer.weight.data.uniform_() - 0.5) * 0.2 +# layer.bias.data = (layer.bias.data.uniform_() - 0.5) * 0.1 + + +# def Dataset_Loader(X, y, shuffle=False, batch_size=50): +# if y.shape[-1] == 2: +# y = y[:, [1]] +# dataset = torch.utils.data.TensorDataset( +# torch.from_numpy(X).float(), torch.from_numpy(y).float() +# ) + +# loader = torch.utils.data.DataLoader( +# dataset, batch_size=batch_size, shuffle=shuffle +# ) +# return loader + + +# class DNN(nn.Module): +# """ +# Feedfoward Neural Network with 4 hidden layers +# """ + +# def __init__( +# self, input_dim, group_stacking, list_grps, output_dimension, problem_type +# ): +# super().__init__() +# if problem_type == "classification": +# self.accuracy = Accuracy(task="multiclass", num_classes=output_dimension) +# else: +# self.accuracy = Accuracy(task="binary") +# self.list_grps = list_grps +# self.group_stacking = group_stacking +# if group_stacking: +# self.layers_stacking = nn.ModuleList( +# [ +# nn.Linear( +# in_features=len(grp), +# out_features=input_dim[grp_ind + 1] - input_dim[grp_ind], +# ) +# # nn.Sequential( +# # nn.Linear( +# # in_features=len(grp), +# # # out_features=max(1, int(0.1 * len(grp))), +# # out_features=input_dim[grp_ind + 1] +# # - input_dim[grp_ind], +# # ), +# # nn.ReLU(), +# # nn.Linear( +# # in_features=max(1, int(0.1 * len(grp))), +# # out_features=input_dim[grp_ind + 1] +# # - input_dim[grp_ind], +# # ), +# # nn.ReLU(), +# # nn.Linear( +# # in_features=max(1, int(0.1 * len(grp))), +# # out_features=input_dim[grp_ind + 1] +# # - input_dim[grp_ind], +# # ), +# # ) +# for grp_ind, grp in enumerate(list_grps) +# ] +# ) +# input_dim = input_dim[-1] +# self.layers = nn.Sequential( +# # hidden layers +# nn.Linear(input_dim, 50), +# nn.ReLU(), +# nn.Linear(50, 40), +# nn.ReLU(), +# nn.Linear(40, 30), +# nn.ReLU(), +# nn.Linear(30, 20), +# nn.ReLU(), +# # output layer +# nn.Linear(20, output_dimension), +# ) +# self.loss = 0 + +# def forward(self, x): +# if self.group_stacking: +# list_stacking = [None] * len(self.layers_stacking) +# for ind_layer, layer in enumerate(self.layers_stacking): +# list_stacking[ind_layer] = layer(x[:, self.list_grps[ind_layer]]) +# x = torch.cat(list_stacking, dim=1) +# return self.layers(x) + +# def training_step(self, batch, device, problem_type): +# X, y = batch[0].to(device), batch[1].to(device) +# y_pred = self(X) # Generate predictions +# if problem_type == "regression": +# loss = F.mse_loss(y_pred, y) +# elif problem_type == "classification": +# loss = F.cross_entropy(y_pred, y) # Calculate loss +# else: +# loss = F.binary_cross_entropy_with_logits(y_pred, y) +# return loss + +# def validation_step(self, batch, device, problem_type): +# X, y = batch[0].to(device), batch[1].to(device) +# y_pred = self(X) # Generate predictions +# if problem_type == "regression": +# loss = F.mse_loss(y_pred, y) +# return { +# "val_mse": loss, +# "batch_size": len(X), +# } +# else: +# if problem_type == "classification": +# loss = F.cross_entropy(y_pred, y) # Calculate loss +# else: +# loss = F.binary_cross_entropy_with_logits(y_pred, y) +# acc = self.accuracy(y_pred, y.int()) +# return { +# "val_loss": loss, +# "val_acc": acc, +# "batch_size": len(X), +# } + +# def validation_epoch_end(self, outputs, problem_type): +# if problem_type in ("classification", "binary"): +# batch_losses = [] +# batch_accs = [] +# batch_sizes = [] +# for x in outputs: +# batch_losses.append(x["val_loss"] * x["batch_size"]) +# batch_accs.append(x["val_acc"] * x["batch_size"]) +# batch_sizes.append(x["batch_size"]) +# self.loss = torch.stack(batch_losses).sum().item() / np.sum( +# batch_sizes +# ) # Combine losses +# epoch_acc = torch.stack(batch_accs).sum().item() / np.sum( +# batch_sizes +# ) # Combine accuracies +# return {"val_loss": self.loss, "val_acc": epoch_acc} +# else: +# batch_losses = [x["val_mse"] * x["batch_size"] for x in outputs] +# batch_sizes = [x["batch_size"] for x in outputs] +# self.loss = torch.stack(batch_losses).sum().item() / np.sum( +# batch_sizes +# ) # Combine losses +# return {"val_mse": self.loss} + +# def epoch_end(self, epoch, result): +# if len(result) == 2: +# print( +# "Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format( +# epoch + 1, result["val_loss"], result["val_acc"] +# ) +# ) +# else: +# print("Epoch [{}], val_mse: {:.4f}".format(epoch + 1, result["val_mse"])) + + +# def evaluate(model, loader, device, problem_type): +# outputs = [model.validation_step(batch, device, problem_type) for batch in loader] +# return model.validation_epoch_end(outputs, problem_type) + + +# def dnn_net( +# X_train, +# y_train, +# X_validation, +# y_validation, +# problem_type="regression", +# n_epoch=200, +# batch_size=32, +# batch_size_validation=128, +# beta1=0.9, +# beta2=0.999, +# lr=1e-3, +# l1_weight=1e-2, +# l2_weight=1e-2, +# epsilon=1e-8, +# list_grps=None, +# group_stacking=False, +# input_dimensions=None, +# random_state=2023, +# verbose=0, +# ): +# """ +# This function implements the training/validation process of the sub-DNN +# models + +# Parameters +# ---------- +# X_train : {array-like, sparse matrix} of shape (n_train_samples, n_features) +# The training input samples. +# y_train : {array-like} of shape (n_train_samples, ) +# The training output samples. +# X_validation : {array-like, sparse matrix} of shape (n_validation_samples, n_features) +# The validation input samples. +# y_validation : {array-like} of shape (n_validation_samples, ) +# The validation output samples. +# problem_type : str, default='regression' +# A classification or a regression problem. +# n_epoch : int, default=200 +# The number of epochs for the DNN learner(s). +# batch_size : int, default=32 +# The number of samples per batch for training. +# batch_size_validation : int, default=128 +# The number of samples per batch for validation. +# beta1 : float, default=0.9 +# The exponential decay rate for the first moment estimates. +# beta2 : float, default=0.999 +# The exponential decay rate for the second moment estimates. +# lr : float, default=1e-3 +# The learning rate. +# l1_weight : float, default=1e-2 +# The L1-regularization paramter for weight decay. +# l2_weight : float, default=0 +# The L2-regularization paramter for weight decay. +# epsilon : float, default=1e-8 +# A small constant added to the denominator to prevent division by zero. +# list_grps : list of lists, default=None +# A list collecting the indices of the groups' variables +# while applying the stacking method. +# group_stacking : bool, default=False +# Apply the stacking-based method for the provided groups. +# input_dimensions : list, default=None +# The cumsum of inputs after the linear sub-layers. +# random_state : int, default=2023 +# Fixing the seeds of the random generator. +# verbose : int, default=0 +# If verbose > 0, the fitted iterations will be printed. +# """ +# # Creating DataLoaders +# train_loader = Dataset_Loader( +# X_train, +# y_train, +# shuffle=True, +# batch_size=batch_size, +# ) +# validate_loader = Dataset_Loader( +# X_validation, y_validation, batch_size=batch_size_validation +# ) +# # Set the seed for PyTorch's random number generator +# torch.manual_seed(random_state) + +# # Set the seed for PyTorch's CUDA random number generator(s), if available +# if torch.cuda.is_available(): +# torch.cuda.manual_seed(random_state) +# torch.cuda.manual_seed_all(random_state) + +# # Specify whether to use GPU or CPU +# # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +# device = torch.device("cpu") + +# if problem_type in ("regression", "binary"): +# output_dimension = 1 +# else: +# output_dimension = y_train.shape[-1] + +# # DNN model +# input_dim = input_dimensions.copy() if group_stacking else X_train.shape[1] +# model = DNN(input_dim, group_stacking, list_grps, output_dimension, problem_type) +# model.to(device) +# # Initializing weights/bias +# model.apply(initialize_weights) +# # Adam Optimizer +# optimizer = torch.optim.Adam( +# model.parameters(), lr=lr, betas=(beta1, beta2), eps=epsilon +# ) + +# best_loss = 1e100 +# for epoch in range(n_epoch): +# # Training Phase +# model.train() +# for batch in train_loader: +# optimizer.zero_grad() +# loss = model.training_step(batch, device, problem_type) + +# loss.backward() +# optimizer.step() +# for name, param in model.named_parameters(): +# if "bias" not in name: +# # if name.split(".")[0] == "layers_stacking": +# # param.data -= l2_weight * param.data +# # else: +# param.data -= ( +# l1_weight * torch.sign(param.data) + l2_weight * param.data +# ) +# # Validation Phase +# model.eval() +# result = evaluate(model, validate_loader, device, problem_type) +# if model.loss < best_loss: +# best_loss = model.loss +# dict_params = copy.deepcopy(model.state_dict()) +# if verbose >= 2: +# model.epoch_end(epoch, result) + +# best_weight = [] +# best_bias = [] +# best_weight_stack = [[].copy() for _ in range(len(list_grps))] +# best_bias_stack = [[].copy() for _ in range(len(list_grps))] + +# for name, param in dict_params.items(): +# if name.split(".")[0] == "layers": +# if name.split(".")[-1] == "weight": +# best_weight.append(param.numpy().T) +# if name.split(".")[-1] == "bias": +# best_bias.append(param.numpy()[np.newaxis, :]) +# if name.split(".")[0] == "layers_stacking": +# curr_ind = int(name.split(".")[1]) +# if name.split(".")[-1] == "weight": +# best_weight_stack[curr_ind].append(param.numpy().T) +# if name.split(".")[-1] == "bias": +# best_bias_stack[curr_ind].append(param.numpy()[np.newaxis, :]) + +# return [ +# best_weight, +# best_bias, +# best_loss, +# best_weight_stack, +# best_bias_stack, +# ] + + +# def compute_imp_std(pred_scores): +# weights = np.array([el.shape[-2] for el in pred_scores]) +# # Compute the mean of each fold over the number of observations +# pred_mean = np.array([np.mean(el.copy(), axis=-2) for el in pred_scores]) + +# # Weighted average +# imp = np.average(pred_mean, axis=0, weights=weights) + +# # Compute the standard deviation of each fold +# # over the number of observations +# pred_std = np.array( +# [ +# np.mean( +# (el - imp[..., np.newaxis]) ** 2, +# axis=-2, +# ) +# for el in pred_scores +# ] +# ) +# std = np.sqrt(np.average(pred_std, axis=0, weights=weights) / (np.sum(weights) - 1)) +# return (imp, std)