diff --git a/documentation/docs/algorithms/hpos.md b/documentation/docs/algorithms/hpos.md
index 309aa9c4..d958de73 100644
--- a/documentation/docs/algorithms/hpos.md
+++ b/documentation/docs/algorithms/hpos.md
@@ -32,6 +32,11 @@ pipe = Hyperpipe("...",
'limit_in_minutes': 20})
```
+If the `optimizer_params` contain a time and numerical limit, both limits are
+considered by aborting if either of the limits is met.
+
+The default limit for Random Search is `n_configurations=10`.
+
Scikit-Optimize
Scikit-Optimize, or skopt, is a simple and efficient library to
minimize (very) expensive and noisy black-box functions.
diff --git a/examples/advanced/regression_with_constraints.py b/examples/advanced/regression_with_constraints.py
index aafe7c07..f614ce5a 100644
--- a/examples/advanced/regression_with_constraints.py
+++ b/examples/advanced/regression_with_constraints.py
@@ -4,6 +4,7 @@
from photonai.base import Hyperpipe, PipelineElement, OutputSettings
from photonai.optimization import MinimumPerformanceConstraint, DummyPerformanceConstraint, BestPerformanceConstraint, IntegerRange
+import matplotlib.pyplot as plt
my_pipe = Hyperpipe(name='constrained_forest_pipe',
optimizer='grid_search',
@@ -14,8 +15,8 @@
use_test_set=True,
verbosity=1,
project_folder='./tmp',
- output_settings=OutputSettings(mongodb_connect_url="mongodb://localhost:27017/photon_results",
- save_output=True),
+ # output_settings=OutputSettings(mongodb_connect_url="mongodb://localhost:27017/photon_results",
+ # save_output=True),
performance_constraints=[DummyPerformanceConstraint('mean_absolute_error'),
MinimumPerformanceConstraint('pearson_correlation', 0.65, 'any'),
BestPerformanceConstraint('mean_squared_error', 3, 'mean')])
@@ -26,3 +27,21 @@
X, y = load_boston(return_X_y=True)
my_pipe.fit(X, y)
+
+
+## plot Scatter plot
+
+train_df = my_pipe.results_handler.get_mean_train_predictions()
+pred_df = my_pipe.results_handler.get_test_predictions()
+
+max_value = int(max(max(pred_df['y_true']), max(pred_df['y_pred']), max(train_df['y_pred'])))
+
+fig, main_axes = plt.subplots()
+main_axes.plot(range(max_value), range(max_value), color='black')
+test_set = main_axes.scatter(pred_df["y_true"], pred_df["y_pred"], label="Test")
+train_set = main_axes.scatter(train_df["y_true"], train_df["y_pred"], label="Training")
+main_axes.legend(handles=[test_set, train_set], loc='lower right')
+main_axes.set_xlabel("y true")
+main_axes.set_ylabel("y predicted")
+
+plt.show()
diff --git a/examples/basic/imbalanced_data.py b/examples/basic/imbalanced_data.py
index f28a21fd..007a7a03 100644
--- a/examples/basic/imbalanced_data.py
+++ b/examples/basic/imbalanced_data.py
@@ -28,9 +28,14 @@
my_pipe += PipelineElement('StandardScaler')
tested_methods = Categorical(['RandomOverSampler', 'SMOTEENN', 'SVMSMOTE',
- 'BorderlineSMOTE', 'SMOTE', 'ClusterCentroids'])
+ 'BorderlineSMOTE', 'SMOTE'])
+
+# Only SMOTE got a different input parameter.
+# All other strategies stay with the default setting.
+# Please do not try to optimize over this parameter (not use config inside the 'hyperparameters').
my_pipe += PipelineElement('ImbalancedDataTransformer',
hyperparameters={'method_name': tested_methods},
+ config={"SMOTE": {"k_neighbors": 3}},
test_disabled=True)
my_pipe += PipelineElement("RandomForestClassifier", n_estimators=200)
diff --git a/photonai/__init__.py b/photonai/__init__.py
index 2d5ffbf2..28dfdda2 100644
--- a/photonai/__init__.py
+++ b/photonai/__init__.py
@@ -13,6 +13,6 @@
"""
-__version__ = '2.2.0'
+__version__ = '2.2.1'
# __all__ = ( )
diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py
index 8e460791..740ddfc5 100644
--- a/photonai/base/hyperpipe.py
+++ b/photonai/base/hyperpipe.py
@@ -305,9 +305,9 @@ def __init__(self, name: Optional[str],
- "grid_search": Optimizer that iteratively tests all possible hyperparameter combinations.
- "random_grid_search": A variation of the grid search optimization that randomly picks
hyperparameter combinations from all possible hyperparameter combinations.
- - "sk_opt": Scikit-Optimize based on theories of Baysian optimization.
+ - "sk_opt": Scikit-Optimize based on theories of bayesian optimization.
- "random_search": randomly chooses hyperparameter from grid-free domain.
- - "smac": SMAC based on theories of Baysian optimization.
+ - "smac": SMAC based on theories of bayesian optimization.
- "nevergrad": Nevergrad based on theories of evolutionary learning.
- In case an object is given:
@@ -359,7 +359,7 @@ def __init__(self, name: Optional[str],
test_size:
The amount of the data that should be left out if no outer_cv is given and
- eval_final_perfomance is set to True.
+ eval_final_performance is set to True.
calculate_metrics_per_fold:
If True, the metrics are calculated for each inner_fold.
@@ -377,11 +377,11 @@ def __init__(self, name: Optional[str],
gives only warn and error, 1 gives adds info and 2 adds debug.
learning_curves:
- Enables larning curve procedure. Evaluate learning process over
+ Enables learning curve procedure. Evaluate learning process over
different sizes of input. Depends on learning_curves_cut.
learning_curves_cut:
- The tested relativ cuts for data size.
+ The tested relative cuts for data size.
performance_constraints:
Objects that indicate whether a configuration should
@@ -439,7 +439,7 @@ def __init__(self, name: Optional[str],
learning_curves_cut=learning_curves_cut)
# ====================== Data ===========================
- self.data = Hyperpipe.Data()
+ self.data = Hyperpipe.Data(allow_multidim_targets=allow_multidim_targets)
# ====================== Output Folder and Log File Management ===========================
if output_settings:
@@ -1215,7 +1215,7 @@ def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwar
no_outer_cv_indices = False
if outer_fold.best_config.best_config_score is None:
no_outer_cv_indices = True
- if outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
+ elif outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
no_outer_cv_indices = True
if no_outer_cv_indices:
diff --git a/photonai/modelwrapper/imbalanced_data_transformer.py b/photonai/modelwrapper/imbalanced_data_transformer.py
index 4c46e7fb..29100207 100644
--- a/photonai/modelwrapper/imbalanced_data_transformer.py
+++ b/photonai/modelwrapper/imbalanced_data_transformer.py
@@ -51,7 +51,7 @@ class ImbalancedDataTransformer(BaseEstimator, TransformerMixin):
'combine': ["SMOTEENN", "SMOTETomek"],
}
- def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
+ def __init__(self, method_name: str = 'RandomUnderSampler', config: dict = None):
"""
Instantiates an object that transforms the data into balanced groups according to the given method.
@@ -84,20 +84,33 @@ def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
- SMOTEENN,
- SMOTETomek.
- **kwargs:
- Any parameters to pass to the imbalance strategy object.
+ config:
+ Each strategy has a set of presets. This parameter is necessary
+ to select the appropriate settings for the selected method.
+ It is important that the key exactly matches the method_name.
+ If no key is found for a method, it will be started with the default settings.
+ Please do not use this parameter inside the 'hyperparmeters' to optimize it.
"""
if not __found__:
raise ModuleNotFoundError("Module imblearn not found or not installed as expected. "
"Please install the requirements.txt in PHOTON main folder.")
+ self.config = config
+ self._method_name = None
self.method_name = method_name
self.needs_y = True
+ @property
+ def method_name(self):
+ return self._method_name
+
+ @method_name.setter
+ def method_name(self, value):
+
imbalance_type = ''
for group, possible_strategies in ImbalancedDataTransformer.IMBALANCED_DICT.items():
- if self.method_name in possible_strategies:
+ if value in possible_strategies:
imbalance_type = group
if imbalance_type == "oversampling":
@@ -115,8 +128,17 @@ def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
logger.error(msg)
raise ValueError(msg)
- desired_class = getattr(home, method_name)
- self.method = desired_class(**kwargs)
+ desired_class = getattr(home, value)
+ self._method_name = value
+ if self.config is not None and value in self.config:
+ if not isinstance(self.config[value], dict):
+ msg = "Please use for the imbalanced config a format like: " \
+ "config={'SMOTE': {'sampling_strategy': {0: 9, 1: 12}}}."
+ logger.error(msg)
+ raise ValueError(msg)
+ self.method = desired_class(**self.config[value])
+ else:
+ self.method = desired_class()
def fit_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray):
"""
diff --git a/photonai/modelwrapper/keras_base_estimator.py b/photonai/modelwrapper/keras_base_estimator.py
index e8a11b16..70d6401f 100644
--- a/photonai/modelwrapper/keras_base_estimator.py
+++ b/photonai/modelwrapper/keras_base_estimator.py
@@ -1,5 +1,5 @@
import warnings
-import keras
+import tensorflow.keras as keras
from sklearn.base import BaseEstimator
from photonai.photonlogger.logger import logger
diff --git a/photonai/modelwrapper/keras_base_models.py b/photonai/modelwrapper/keras_base_models.py
index 16048d7d..9f7f026b 100644
--- a/photonai/modelwrapper/keras_base_models.py
+++ b/photonai/modelwrapper/keras_base_models.py
@@ -1,13 +1,13 @@
import warnings
import numpy as np
-import keras
+import tensorflow.keras as keras
from typing import Union
-from keras.utils.all_utils import to_categorical
-from keras.layers import Dropout, Dense
-from keras.layers import BatchNormalization
-from keras.models import Sequential
-from keras.optimizers import Optimizer, adam_v2, rmsprop_v2, adadelta_v2, adagrad_v2, adamax_v2, nadam_v2, gradient_descent_v2
-from keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.layers import Dropout, Dense
+from tensorflow.keras.layers import BatchNormalization
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.optimizers import Optimizer, Adam, RMSprop, Adadelta, Adagrad, Adamax, Nadam, SGD
+from tensorflow.keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
linear, exponential
from sklearn.base import ClassifierMixin, RegressorMixin
@@ -15,13 +15,13 @@
from photonai.modelwrapper.keras_base_estimator import KerasBaseEstimator
__supported_optimizers__ = {
- 'sgd': gradient_descent_v2.SGD,
- 'rmsprop': rmsprop_v2.RMSprop,
- 'adagrad': adagrad_v2.Adagrad,
- 'adadelta': adadelta_v2.Adadelta,
- 'adam': adam_v2.Adam,
- 'adamax': adamax_v2.Adamax,
- 'nadam': nadam_v2.Nadam
+ 'sgd': SGD,
+ 'rmsprop': RMSprop,
+ 'adagrad': Adagrad,
+ 'adadelta': Adadelta,
+ 'adam': Adam,
+ 'adamax': Adamax,
+ 'nadam': Nadam
}
__supported_activations__ = {
'softmax': softmax,
diff --git a/photonai/modelwrapper/keras_dnn_classifier.py b/photonai/modelwrapper/keras_dnn_classifier.py
index be03af7c..6a92ab57 100644
--- a/photonai/modelwrapper/keras_dnn_classifier.py
+++ b/photonai/modelwrapper/keras_dnn_classifier.py
@@ -1,6 +1,6 @@
import numpy as np
from typing import Union
-from keras.optimizers import Optimizer
+from tensorflow.keras.optimizers import Optimizer
from photonai.modelwrapper.keras_base_models import KerasDnnBaseModel, KerasBaseClassifier
import photonai.modelwrapper.keras_base_models as keras_dnn_base_model
diff --git a/photonai/modelwrapper/keras_dnn_regressor.py b/photonai/modelwrapper/keras_dnn_regressor.py
index e9568fa0..d7340892 100644
--- a/photonai/modelwrapper/keras_dnn_regressor.py
+++ b/photonai/modelwrapper/keras_dnn_regressor.py
@@ -1,6 +1,6 @@
from typing import Union
import numpy as np
-from keras.optimizers import Optimizer
+from tensorflow.keras.optimizers import Optimizer
import photonai.modelwrapper.keras_base_models as keras_dnn_base_model
from photonai.modelwrapper.keras_base_models import KerasDnnBaseModel, KerasBaseRegressor
diff --git a/photonai/optimization/random_search/random_search.py b/photonai/optimization/random_search/random_search.py
index d66e2cb0..88532da9 100644
--- a/photonai/optimization/random_search/random_search.py
+++ b/photonai/optimization/random_search/random_search.py
@@ -13,7 +13,7 @@ class RandomSearchOptimizer(PhotonSlaveOptimizer):
testing hyperparameter combinations without any grid.
"""
- def __init__(self, limit_in_minutes: Union[float, None] = 60, n_configurations: Union[int, None] = None):
+ def __init__(self, limit_in_minutes: Union[float, None] = None, n_configurations: Union[int, None] = 10):
"""
Initialize the object.
One of limit_in_minutes or n_configurations must differ from None.
@@ -74,19 +74,24 @@ def next_config_generator(self) -> Generator:
"""
while True:
- _ = (yield self._generate_config())
self.k_configutration += 1
+ new_config = True
if self.limit_in_minutes:
if self.start_time is None:
self.start_time = datetime.datetime.now()
self.end_time = self.start_time + datetime.timedelta(minutes=self.limit_in_minutes)
if datetime.datetime.now() >= self.end_time:
- return
+ new_config = False
if self.n_configurations:
- if self.k_configutration >= self.n_configurations:
- return
+ if self.k_configutration >= self.n_configurations + 1:
+ new_config = False
+
+ if not new_config:
+ return
+
+ _ = (yield self._generate_config())
def _generate_config(self):
config = {}
diff --git a/photonai/processing/results_handler.py b/photonai/processing/results_handler.py
index 9009f6be..4e06de9a 100644
--- a/photonai/processing/results_handler.py
+++ b/photonai/processing/results_handler.py
@@ -447,6 +447,30 @@ def collect_fold_lists(score_info_list, fold_nr, predictions_filename=''):
return sorted_df.to_dict('list')
+ def get_mean_train_predictions(self, filename=''):
+ """
+ This function returns the MEAN predictions, true targets, and fold index
+ for the TRAINING Set of the best configuration of each outer fold.
+ """
+ if self.results is None:
+ raise ValueError("Result tree information is needed but results attribute of object is None.")
+
+ score_info_list = list()
+ fold_nr_list = list()
+ for outer_fold in self.results.outer_folds:
+ score_info_list.append(outer_fold.best_config.best_config_score.training)
+ fold_nr_list.append(outer_fold.fold_nr)
+ infos = self.collect_fold_lists(score_info_list, fold_nr_list, filename)
+ infos = {key: np.array(value) for key, value in infos.items()}
+ num_items = np.unique(infos["indices"])
+ mean_pred = np.zeros(num_items.shape)
+ y_true = np.zeros(num_items.shape)
+ for i in num_items:
+ idx = (infos["indices"] == i)
+ mean_pred[i] = np.mean(infos["y_pred"][idx])
+ y_true[i] = infos["y_true"][idx][0]
+ return {'y_true': y_true, 'y_pred': mean_pred, 'indices': num_items}
+
def get_test_predictions(self, filename=''):
"""
This function returns the predictions, true targets, and fold index
diff --git a/photonai/requirements.txt b/photonai/requirements.txt
index a1047532..27106f51 100644
--- a/photonai/requirements.txt
+++ b/photonai/requirements.txt
@@ -2,7 +2,6 @@
numpy
matplotlib
scikit-learn
-keras<=2.6.0
pandas
plotly
imbalanced-learn
diff --git a/setup.py b/setup.py
index 4a0d3b7b..a68f3ce8 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
from setuptools import setup, find_packages
-__version__ = '2.2.0'
+__version__ = '2.2.1'
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
@@ -41,7 +41,6 @@
'numpy',
'matplotlib',
'scikit-learn',
- 'keras<=2.6.0',
'pandas',
'plotly',
'imbalanced-learn',
diff --git a/test/base_tests/test_hyperpipe.py b/test/base_tests/test_hyperpipe.py
index 35a92e68..ae2452ac 100644
--- a/test/base_tests/test_hyperpipe.py
+++ b/test/base_tests/test_hyperpipe.py
@@ -13,8 +13,7 @@
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline as SKLPipeline
from sklearn.preprocessing import StandardScaler
-from sklearn.inspection import permutation_importance
-from keras.metrics import Accuracy
+from tensorflow.keras.metrics import Accuracy
from photonai.base import PipelineElement, Hyperpipe, OutputSettings, Preprocessing, CallbackElement, Branch, Stack, \
Switch, ParallelBranch
diff --git a/test/modelwrapper_tests/test_imbalanced_data_transformer.py b/test/modelwrapper_tests/test_imbalanced_data_transformer.py
index b3d5aa73..5054a5c7 100644
--- a/test/modelwrapper_tests/test_imbalanced_data_transformer.py
+++ b/test/modelwrapper_tests/test_imbalanced_data_transformer.py
@@ -1,6 +1,11 @@
import numpy as np
+import pandas as pd
+from sklearn.datasets import make_classification
+from sklearn.model_selection import StratifiedShuffleSplit
from photonai.modelwrapper.imbalanced_data_transformer import ImbalancedDataTransformer
+from photonai.base import Hyperpipe, PipelineElement
+from photonai.optimization import Categorical
from test.modelwrapper_tests.test_base_model_wrapper import BaseModelWrapperTest
from imblearn.over_sampling._smote.tests import test_smote
@@ -28,28 +33,23 @@ def test_strategy_oversampling(self):
"""
sample test of different functions based on imblearn implementation for oversampling methods.
"""
- sampling_strategy = {0: 9, 1: 12}
imbalanced_data_transformer = ImbalancedDataTransformer(method_name='SMOTE',
- sampling_strategy = {0: 9, 1: 12},
- random_state = test_smote.RND_SEED)
+ config={"SMOTE": {"sampling_strategy": {0: 9, 1: 12},
+ "random_state": test_smote.RND_SEED}})
# test_sample_regular_half() -> smote
X_resampled, y_resampled = imbalanced_data_transformer.fit_transform(test_smote.X, test_smote.Y)
- X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], [
- 1.25192108, -0.22367336
- ], [0.53366841, -0.30312976], [1.52091956, -0.49283504], [
- -0.28162401, -2.10400981
- ], [0.83680821, 1.72827342], [0.3084254, 0.33299982], [
- 0.70472253, -0.73309052
- ], [0.28893132, -0.38761769], [1.15514042, 0.0129463], [
- 0.88407872, 0.35454207
- ], [1.31301027, -0.92648734], [-1.11515198, -0.93689695], [
- -0.18410027, -0.45194484
- ], [0.9281014, 0.53085498], [-0.14374509, 0.27370049],
- [-0.41635887, -0.38299653], [0.08711622, 0.93259929],
- [1.70580611, -0.11219234], [0.36784496, -0.1953161]])
- y_gt = np.array(
- [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0])
+ X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
+ [1.25192108, -0.22367336], [0.53366841, -0.30312976],
+ [1.52091956, -0.49283504], [-0.28162401, -2.10400981],
+ [0.83680821, 1.72827342], [0.3084254, 0.33299982],
+ [0.70472253, -0.73309052], [0.28893132, -0.38761769],
+ [1.15514042, 0.0129463], [0.88407872, 0.35454207],
+ [1.31301027, -0.92648734], [-1.11515198, -0.93689695],
+ [-0.18410027, -0.45194484], [0.9281014, 0.53085498],
+ [-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
+ [0.08711622, 0.93259929], [1.70580611, -0.11219234], [0.36784496, -0.1953161]])
+ y_gt = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0])
test_smote.assert_allclose(X_resampled, X_gt, rtol=test_smote.R_TOL)
test_smote.assert_array_equal(y_resampled, y_gt)
@@ -57,13 +57,15 @@ def test_strategy_undersampling(self):
"""
sample test of different functions based on imblearn implementation for undersampling methods.
"""
- imbalanced_data_transformer = ImbalancedDataTransformer(method_name='InstanceHardnessThreshold',
- estimator=test_instance_hardness_threshold.ESTIMATOR,
- sampling_strategy={0: 6, 1: 8},
- random_state=test_instance_hardness_threshold.RND_SEED)
+ transformer = ImbalancedDataTransformer(method_name='InstanceHardnessThreshold',
+ config={"InstanceHardnessThreshold":
+ {"estimator": test_instance_hardness_threshold.ESTIMATOR,
+ "sampling_strategy": {0: 6, 1: 8},
+ "random_state": test_instance_hardness_threshold.RND_SEED}
+ })
- X_resampled, y_resampled = imbalanced_data_transformer.fit_resample(test_instance_hardness_threshold.X,
- test_instance_hardness_threshold.Y)
+ X_resampled, y_resampled = transformer.fit_resample(test_instance_hardness_threshold.X,
+ test_instance_hardness_threshold.Y)
assert X_resampled.shape == (15, 2)
assert y_resampled.shape == (15,)
@@ -72,7 +74,9 @@ def test_strategy_combine(self):
sample test of different functions based on imblearn implementation for oversampling methods.
"""
imbalanced_data_transformer = ImbalancedDataTransformer(method_name='SMOTETomek',
- random_state=test_smote_tomek.RND_SEED)
+ config={"SMOTETomek":
+ {"random_state": test_smote_tomek.RND_SEED}
+ })
X_resampled, y_resampled = imbalanced_data_transformer.fit_resample(test_smote_tomek.X, test_smote_tomek.Y)
X_gt = np.array(
[
@@ -97,3 +101,50 @@ def test_strategy_combine(self):
y_gt = np.array([1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0])
test_smote_tomek.assert_allclose(X_resampled, X_gt, rtol=test_smote_tomek.R_TOL)
test_smote_tomek.assert_array_equal(y_resampled, y_gt)
+
+ def test_config_parameter(self):
+ with self.assertRaises(ValueError):
+ ImbalancedDataTransformer(method_name='SMOTETomek', config={"SMOTETomek": test_smote_tomek.RND_SEED})
+
+ def test_different_strategies(self):
+ def target_relative(y_true, y_pred):
+ return (y_true == 0).sum() / len(y_true)
+
+ def target_absolute(y_true, y_pred):
+ return len(y_true)
+
+ X, y = make_classification(weights=[0.9, 0.1], n_samples=300)
+
+ my_pipe = Hyperpipe('balanced_pipe',
+ optimizer='grid_search',
+ metrics=['accuracy', target_relative, target_absolute],
+ best_config_metric="accuracy",
+ inner_cv=StratifiedShuffleSplit(n_splits=3, test_size=0.2),
+ verbosity=1,
+ project_folder='./tmp/')
+
+ my_pipe += PipelineElement('StandardScaler')
+
+ tested_methods = Categorical(['RandomOverSampler', 'RandomUnderSampler'])
+ my_pipe += PipelineElement('ImbalancedDataTransformer',
+ hyperparameters={'method_name': tested_methods},
+ test_disabled=False)
+ my_pipe += PipelineElement("LogisticRegression")
+ my_pipe.fit(X, y)
+
+ # -> test samples per strategy
+ test_perf = pd.DataFrame([], columns=["config", "acc", "class_distribution", "absolute_samples"])
+
+ for i, test_config_item in enumerate(my_pipe.results.outer_folds[0].tested_config_list):
+ config = test_config_item.config_dict["ImbalancedDataTransformer__method_name"]
+ acc = round(test_config_item.metrics_train[0].value, 3)
+ relative = round(test_config_item.metrics_train[2].value, 3)
+ absolute = round(test_config_item.metrics_train[4].value, 3)
+ test_perf = test_perf.append(pd.Series([config, acc, relative, absolute], index=test_perf.columns),
+ ignore_index=True)
+
+ self.assertGreater(test_perf[test_perf["config"] == "RandomOverSampler"]["absolute_samples"].tolist()[0],
+ test_perf[test_perf["config"] == "RandomUnderSampler"]["absolute_samples"].tolist()[0])
+
+ self.assertEqual(test_perf[test_perf["config"] == "RandomOverSampler"]["class_distribution"].tolist()[0],
+ test_perf[test_perf["config"] == "RandomUnderSampler"]["class_distribution"].tolist()[0])
diff --git a/test/modelwrapper_tests/test_keras_basic.py b/test/modelwrapper_tests/test_keras_basic.py
index 87e0c175..9996f17d 100644
--- a/test/modelwrapper_tests/test_keras_basic.py
+++ b/test/modelwrapper_tests/test_keras_basic.py
@@ -60,7 +60,7 @@ def test_tf_model(self):
reload_estinator = self.estimator_type()
reload_estinator.load("keras_example_saved_model")
- np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estinator.predict(self.X), decimal=4)
+ np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estinator.predict(self.X), decimal=3)
# remove saved keras files
for fname in os.listdir("."):
diff --git a/test/optimization_tests/random_search_tests/test_random_search.py b/test/optimization_tests/random_search_tests/test_random_search.py
index 07ce9402..b49d4261 100644
--- a/test/optimization_tests/random_search_tests/test_random_search.py
+++ b/test/optimization_tests/random_search_tests/test_random_search.py
@@ -34,14 +34,15 @@ def test_constraint_obligation(self):
RandomSearchOptimizer(n_configurations=-1, limit_in_minutes=-1)
def test_time_limit(self):
- self.optimizer = RandomSearchOptimizer(limit_in_minutes=0.05) # 3 seconds
+ self.optimizer = RandomSearchOptimizer(limit_in_minutes=1/60.) # 1 second
self.optimizer.prepare(pipeline_elements=self.pipeline_elements, maximize_metric=True)
- configs = []
- start = time.time()
- for config in self.optimizer.ask:
- configs.append(config)
- stop = time.time()
- self.assertAlmostEqual(stop-start, 3, 0)
+ config_iter = iter(self.optimizer.ask)
+ c1 = next(config_iter)
+ self.assertTrue(type(c1) == dict)
+ time.sleep(2)
+
+ with self.assertRaises(StopIteration):
+ config2 = next(config_iter)
def test_run(self):
pass
diff --git a/test/processing_tests/test_results.py b/test/processing_tests/test_results.py
index 94ba7186..3d31dfa5 100644
--- a/test/processing_tests/test_results.py
+++ b/test/processing_tests/test_results.py
@@ -139,6 +139,11 @@ def test_get_predictions(self):
self.assertTrue(np.array_equal(csv_file.y_true.values, self.y_true))
self.assertTrue(np.array_equal(csv_file.probabilities.values, self.y_true / 10))
+ training_preds = self.hyperpipe.results_handler.get_mean_train_predictions()
+ self.assertTrue(np.array_equal(training_preds['y_true'], self.y_true))
+ self.assertTrue(np.array_equal(training_preds['y_pred'], self.y_true))
+ self.assertTrue(np.array_equal(training_preds['indices'], range(len(self.X))))
+
def test_get_predictions_no_outer_cv_eval_final_performance_false(self):
self.hyperpipe += PipelineElement('PhotonTestXPredictor')
self.hyperpipe.cross_validation.outer_cv = None