Skip to content

Commit

Permalink
Merge pull request #36 from wwu-mmll/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
ksarink authored Mar 5, 2021
2 parents 4217950 + 0355dd1 commit dd8501e
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 47 deletions.
6 changes: 3 additions & 3 deletions examples/heart_failure/heart_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
from photonai.optimization import FloatRange, IntegerRange

# setup training and test workflow
my_pipe = Hyperpipe('heart_failure_lasso',
outer_cv=ShuffleSplit(n_splits=100, test_size=0.2),
my_pipe = Hyperpipe('heart_failure',
outer_cv=ShuffleSplit(n_splits=10, test_size=0.2),
inner_cv=KFold(n_splits=10, shuffle=True),
use_test_set=False,
metrics=['balanced_accuracy', 'f1_score', 'matthews_corrcoef',
'sensitivity', 'specificity'],
best_config_metric='f1_score',
optimizer='switch',
optimizer_params={'name': 'sk_opt', 'n_configurations': 10},
project_folder='./tmpv2',
project_folder='./tmp',
cache_folder='./cache',
verbosity=0)

Expand Down
109 changes: 77 additions & 32 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,79 @@ def score(self, data: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
scorer = Scorer.create(self.optimization.best_config_metric)
return scorer(y, predictions)

def _calculate_permutation_importances(self, **kwargs):
"""
extracted function from get_feature_importance to improve unit testing
"""

importance_list = {'mean': list(), 'std': list()}

def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwargs, fold_str):

train_X, train_y, train_kwargs = PhotonDataHelper.split_data(data_X, data_y, data_kwargs,
indices=train_idx)

test_X, test_y, test_kwargs = PhotonDataHelper.split_data(data_X, data_y, data_kwargs,
indices=test_idx)

# fit fold's best model (again) -> to obtain that model's feature importances
logger.photon_system_log("Permutation Importances: Fitting model for " + fold_str)
pipeline.fit(train_X, train_y, **train_kwargs)

# get feature importances
logger.photon_system_log("Permutation Importances: Calculating performances for " + fold_str)
perm_imps = permutation_importance(pipeline, test_X, test_y, **kwargs)

# store into list
importance_list['mean'].append(perm_imps["importances_mean"])
importance_list['std'].append(perm_imps["importances_std"])

return perm_imps

for outer_fold in self.results.outer_folds:

if outer_fold.best_config is None:
raise ValueError("Could not find a best config for outer fold " + str(outer_fold.fold_nr))

pipe_copy = self.optimum_pipe.copy_me()

# set pipe to config
pipe_copy.set_params(**outer_fold.best_config.config_dict)

if not self.results.hyperpipe_info.eval_final_performance:
no_outer_cv_indices = False
if outer_fold.best_config.best_config_score is None:
no_outer_cv_indices = True
if outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
no_outer_cv_indices = True

if no_outer_cv_indices:
data_to_split, y_to_split, kwargs_to_split = self.data.X, self.data.y, self.data.kwargs
else:

logger.photon_system_log("Permutation Importances: Using inner_cv folds.")

# get outer fold data
idx = outer_fold.best_config.best_config_score.training.indices
data_to_split, y_to_split, kwargs_to_split = PhotonDataHelper.split_data(self.data.X,
self.data.y,
self.data.kwargs,
indices=idx)

for inner_fold in outer_fold.best_config.inner_folds:
train_and_get_fimps(pipe_copy,
inner_fold.training.indices, inner_fold.validation.indices,
data_to_split, y_to_split, kwargs_to_split,
"inner fold " + str(inner_fold.fold_nr))

else:
train_and_get_fimps(pipe_copy,
outer_fold.best_config.best_config_score.training.indices,
outer_fold.best_config.best_config_score.validation.indices,
self.data.X, self.data.y, self.data.kwargs, "outer fold " + str(outer_fold.fold_nr))

return importance_list

def get_permutation_feature_importances(self, **kwargs):
"""
Fits a model for the best config of each outer fold (using the training data of that fold).
Expand All @@ -1191,41 +1264,13 @@ def get_permutation_feature_importances(self, **kwargs):
"""

importance_list = {'mean': list(), 'std': list()}
pipe_copy = self.optimum_pipe.copy_me()
logger.photon_system_log("")
logger.photon_system_log("Computing permutation importances. This may take a while.")
logger.stars()
for outer_fold in self.results.outer_folds:

if outer_fold.best_config.best_config_score is None:
raise ValueError("Cannot compute permutation importances when use_test_set is false")


# prepare data
train_indices = outer_fold.best_config.best_config_score.training.indices
test_indices = outer_fold.best_config.best_config_score.validation.indices

train_X, train_y, train_kwargs = PhotonDataHelper.split_data(self.data.X,
self.data.y,
self.data.kwargs,
indices=train_indices)

test_X, test_y, test_kwargs = PhotonDataHelper.split_data(self.data.X,
self.data.y,
self.data.kwargs,
indices=test_indices)
# set pipe to config
pipe_copy.set_params(**outer_fold.best_config.config_dict)
logger.photon_system_log("Permutation Importances: Fitting model for outer fold " + str(outer_fold.fold_nr))
pipe_copy.fit(train_X, train_y, **train_kwargs)

logger.photon_system_log("Permutation Importances: Calculating performances for outer fold "
+ str(outer_fold.fold_nr))
outer_fold_perm_imps = permutation_importance(pipe_copy, test_X, test_y, **kwargs)
importance_list['mean'].append(outer_fold_perm_imps["importances_mean"])
importance_list['std'].append(outer_fold_perm_imps["importances_std"])

if self.optimum_pipe is None:
raise ValueError("Cannot calculate permutation importances when optimum_pipe is None (probably the "
"training and optimization procedure failed)")
importance_list = self._calculate_permutation_importances(**kwargs)
mean_importances = np.mean(np.array(importance_list["mean"]), axis=0)
std_importances = np.mean(np.array(importance_list["std"]), axis=0)
logger.stars()
Expand Down
4 changes: 2 additions & 2 deletions photonai/processing/results_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,9 +751,9 @@ def write_convenience_files(self):
self.write_predictions_file()

def convert_to_json_serializable(self, value):
if isinstance(value, (np.int, np.int32, np.int64)):
if isinstance(value, (int, np.int32, np.int64)):
return int(value)
if isinstance(value, (np.float, np.float32, np.float64)):
if isinstance(value, (float, np.float32, np.float64)):
if self.output_settings.reduce_space:
return round(float(value), 3)
return float(value)
Expand Down
57 changes: 51 additions & 6 deletions test/base_tests/test_hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ def test_preprocessing(self):

def test_permutation_feature_importances(self):
hp = Hyperpipe('god',
inner_cv=self.inner_cv_object,
outer_cv=KFold(n_splits=3),
inner_cv=KFold(n_splits=2),
metrics=self.metrics,
best_config_metric=self.best_config_metric,
project_folder=self.tmp_folder_path,
Expand All @@ -197,16 +198,60 @@ def test_permutation_feature_importances(self):
score_element = svc.score(self.__X, self.__y)
self.assertAlmostEqual(score_photon, score_element)

permutation_score = hp.get_permutation_feature_importances(n_repeats=5, random_state=0)
self.assertTrue("mean" in permutation_score)
self.assertTrue("std" in permutation_score)
self.assertEqual(permutation_score["mean"].shape, (self.__X.shape[1],))
self.assertEqual(permutation_score["std"].shape, (self.__X.shape[1],))
# do it on outer folds
permutation_list_outer = hp._calculate_permutation_importances(n_repeats=5, random_state=0)
self.assertEqual(len(permutation_list_outer["mean"]), 3)

permutation_score_outer = hp.get_permutation_feature_importances(n_repeats=5, random_state=0)
self.assertTrue("mean" in permutation_score_outer)
self.assertTrue("std" in permutation_score_outer)
self.assertEqual(permutation_score_outer["mean"].shape, (self.__X.shape[1],))
self.assertEqual(permutation_score_outer["std"].shape, (self.__X.shape[1],))

# do it on inner folds but on training sets from outer split
hp.cross_validation.use_test_set = False
hp.fit(self.__X, self.__y)
permutation_list_inner = hp._calculate_permutation_importances(n_repeats=5)
self.assertEqual(len(permutation_list_inner["mean"]), 3*2)
permutation_score_inner = hp.get_permutation_feature_importances(n_repeats=5)
self.assertEqual(permutation_score_inner["mean"].shape, (self.__X.shape[1],))
self.assertEqual(permutation_score_inner["std"].shape, (self.__X.shape[1],))
# check that validation set permutation importances (inner folds) differ from those of test set (outer folds)
self.assertFalse(np.array_equal(permutation_score_outer["mean"], permutation_score_inner["mean"]))

# do it on inner folds only
hp.cross_validation.outer_folds = {}
hp.cross_validation.outer_cv = None
hp.cross_validation.use_test_set = False
hp.fit(self.__X, self.__y)
permutation_list_no_outer = hp._calculate_permutation_importances(n_repeats=5)
self.assertEqual(len(permutation_list_no_outer), 2)
permutation_score_no_outer = hp.get_permutation_feature_importances(n_repeats=5)
self.assertEqual(permutation_score_inner["mean"].shape, (self.__X.shape[1],))
self.assertEqual(permutation_score_inner["std"].shape, (self.__X.shape[1],))

# raise error
def fake_metric(y_true, y_pred):
return 'a'

hp = Hyperpipe('god',
outer_cv=KFold(n_splits=3),
inner_cv=KFold(n_splits=2),
metrics=[('fake_metric', fake_metric)],
best_config_metric='fake_metric',
project_folder=self.tmp_folder_path,
verbosity=0)
svc = PipelineElement('SVC')
hp += svc
try:
hp.fit(self.__X, self.__y)
except Exception as e:
# should produce an error so that hp.results.best_config is None.
pass
with self.assertRaises(ValueError):
hp.get_permutation_feature_importances(n_repeats=5)
with self.assertRaises(ValueError):
hp._calculate_permutation_importances(n_repeats=5)

def test_estimation_type(self):
def callback(X, y=None, **kwargs):
Expand Down
5 changes: 2 additions & 3 deletions test/integration_tests/test_architecture.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ def setUpClass(cls) -> None:
if cls.test_multiple_hyperpipes:
optimizer_list = ['random_grid_search', 'sk_opt']
eval_final_performance_list = [True, False]
inner_cv_list = [KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.2), LeaveOneOut()]
outer_cv_list = [None, KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.25),
LeaveOneOut()]
inner_cv_list = [KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.2)]
outer_cv_list = [None, KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.25)]
performance_constraints_list = [None]

combinations = list(product(optimizer_list, eval_final_performance_list, inner_cv_list, outer_cv_list,
Expand Down
2 changes: 1 addition & 1 deletion test/optimization_tests/sk_opt_tests/test_sk_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def setUp(self):
PipelineElement("SVC", hyperparameters={'C': FloatRange(1, 100)})]
self.optimizer = SkOptOptimizer()
self.optimizer_name = "sk_opt"
self.optimizer_params = None
self.optimizer_params = {'n_configurations': 10}

def test_ask_advanced(self):
with self.assertRaises(ValueError):
Expand Down

0 comments on commit dd8501e

Please sign in to comment.