Skip to content

Commit

Permalink
Merge pull request #47 from wwu-mmll/develop
Browse files Browse the repository at this point in the history
New PHOTONAI Version
  • Loading branch information
jernsting authored Aug 3, 2022
2 parents 79d9725 + aa77b16 commit c9f9550
Show file tree
Hide file tree
Showing 19 changed files with 211 additions and 77 deletions.
5 changes: 5 additions & 0 deletions documentation/docs/algorithms/hpos.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ pipe = Hyperpipe("...",
'limit_in_minutes': 20})
```

If the `optimizer_params` contain a time and numerical limit, both limits are
considered by aborting if either of the limits is met.

The default limit for Random Search is `n_configurations=10`.

<h3>Scikit-Optimize</h3>
Scikit-Optimize, or skopt, is a simple and efficient library to
minimize (very) expensive and noisy black-box functions.
Expand Down
23 changes: 21 additions & 2 deletions examples/advanced/regression_with_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from photonai.base import Hyperpipe, PipelineElement, OutputSettings
from photonai.optimization import MinimumPerformanceConstraint, DummyPerformanceConstraint, BestPerformanceConstraint, IntegerRange

import matplotlib.pyplot as plt

my_pipe = Hyperpipe(name='constrained_forest_pipe',
optimizer='grid_search',
Expand All @@ -14,8 +15,8 @@
use_test_set=True,
verbosity=1,
project_folder='./tmp',
output_settings=OutputSettings(mongodb_connect_url="mongodb://localhost:27017/photon_results",
save_output=True),
# output_settings=OutputSettings(mongodb_connect_url="mongodb://localhost:27017/photon_results",
# save_output=True),
performance_constraints=[DummyPerformanceConstraint('mean_absolute_error'),
MinimumPerformanceConstraint('pearson_correlation', 0.65, 'any'),
BestPerformanceConstraint('mean_squared_error', 3, 'mean')])
Expand All @@ -26,3 +27,21 @@

X, y = load_boston(return_X_y=True)
my_pipe.fit(X, y)


## plot Scatter plot

train_df = my_pipe.results_handler.get_mean_train_predictions()
pred_df = my_pipe.results_handler.get_test_predictions()

max_value = int(max(max(pred_df['y_true']), max(pred_df['y_pred']), max(train_df['y_pred'])))

fig, main_axes = plt.subplots()
main_axes.plot(range(max_value), range(max_value), color='black')
test_set = main_axes.scatter(pred_df["y_true"], pred_df["y_pred"], label="Test")
train_set = main_axes.scatter(train_df["y_true"], train_df["y_pred"], label="Training")
main_axes.legend(handles=[test_set, train_set], loc='lower right')
main_axes.set_xlabel("y true")
main_axes.set_ylabel("y predicted")

plt.show()
7 changes: 6 additions & 1 deletion examples/basic/imbalanced_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,14 @@
my_pipe += PipelineElement('StandardScaler')

tested_methods = Categorical(['RandomOverSampler', 'SMOTEENN', 'SVMSMOTE',
'BorderlineSMOTE', 'SMOTE', 'ClusterCentroids'])
'BorderlineSMOTE', 'SMOTE'])

# Only SMOTE got a different input parameter.
# All other strategies stay with the default setting.
# Please do not try to optimize over this parameter (not use config inside the 'hyperparameters').
my_pipe += PipelineElement('ImbalancedDataTransformer',
hyperparameters={'method_name': tested_methods},
config={"SMOTE": {"k_neighbors": 3}},
test_disabled=True)

my_pipe += PipelineElement("RandomForestClassifier", n_estimators=200)
Expand Down
2 changes: 1 addition & 1 deletion photonai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
"""

__version__ = '2.2.0'
__version__ = '2.2.1'

# __all__ = ( )
14 changes: 7 additions & 7 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,9 @@ def __init__(self, name: Optional[str],
- "grid_search": Optimizer that iteratively tests all possible hyperparameter combinations.
- "random_grid_search": A variation of the grid search optimization that randomly picks
hyperparameter combinations from all possible hyperparameter combinations.
- "sk_opt": Scikit-Optimize based on theories of Baysian optimization.
- "sk_opt": Scikit-Optimize based on theories of bayesian optimization.
- "random_search": randomly chooses hyperparameter from grid-free domain.
- "smac": SMAC based on theories of Baysian optimization.
- "smac": SMAC based on theories of bayesian optimization.
- "nevergrad": Nevergrad based on theories of evolutionary learning.
- In case an object is given:
Expand Down Expand Up @@ -359,7 +359,7 @@ def __init__(self, name: Optional[str],
test_size:
The amount of the data that should be left out if no outer_cv is given and
eval_final_perfomance is set to True.
eval_final_performance is set to True.
calculate_metrics_per_fold:
If True, the metrics are calculated for each inner_fold.
Expand All @@ -377,11 +377,11 @@ def __init__(self, name: Optional[str],
gives only warn and error, 1 gives adds info and 2 adds debug.
learning_curves:
Enables larning curve procedure. Evaluate learning process over
Enables learning curve procedure. Evaluate learning process over
different sizes of input. Depends on learning_curves_cut.
learning_curves_cut:
The tested relativ cuts for data size.
The tested relative cuts for data size.
performance_constraints:
Objects that indicate whether a configuration should
Expand Down Expand Up @@ -439,7 +439,7 @@ def __init__(self, name: Optional[str],
learning_curves_cut=learning_curves_cut)

# ====================== Data ===========================
self.data = Hyperpipe.Data()
self.data = Hyperpipe.Data(allow_multidim_targets=allow_multidim_targets)

# ====================== Output Folder and Log File Management ===========================
if output_settings:
Expand Down Expand Up @@ -1215,7 +1215,7 @@ def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwar
no_outer_cv_indices = False
if outer_fold.best_config.best_config_score is None:
no_outer_cv_indices = True
if outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
elif outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
no_outer_cv_indices = True

if no_outer_cv_indices:
Expand Down
34 changes: 28 additions & 6 deletions photonai/modelwrapper/imbalanced_data_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class ImbalancedDataTransformer(BaseEstimator, TransformerMixin):
'combine': ["SMOTEENN", "SMOTETomek"],
}

def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
def __init__(self, method_name: str = 'RandomUnderSampler', config: dict = None):
"""
Instantiates an object that transforms the data into balanced groups according to the given method.
Expand Down Expand Up @@ -84,20 +84,33 @@ def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
- SMOTEENN,
- SMOTETomek.
**kwargs:
Any parameters to pass to the imbalance strategy object.
config:
Each strategy has a set of presets. This parameter is necessary
to select the appropriate settings for the selected method.
It is important that the key exactly matches the method_name.
If no key is found for a method, it will be started with the default settings.
Please do not use this parameter inside the 'hyperparmeters' to optimize it.
"""
if not __found__:
raise ModuleNotFoundError("Module imblearn not found or not installed as expected. "
"Please install the requirements.txt in PHOTON main folder.")

self.config = config
self._method_name = None
self.method_name = method_name
self.needs_y = True

@property
def method_name(self):
return self._method_name

@method_name.setter
def method_name(self, value):

imbalance_type = ''
for group, possible_strategies in ImbalancedDataTransformer.IMBALANCED_DICT.items():
if self.method_name in possible_strategies:
if value in possible_strategies:
imbalance_type = group

if imbalance_type == "oversampling":
Expand All @@ -115,8 +128,17 @@ def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
logger.error(msg)
raise ValueError(msg)

desired_class = getattr(home, method_name)
self.method = desired_class(**kwargs)
desired_class = getattr(home, value)
self._method_name = value
if self.config is not None and value in self.config:
if not isinstance(self.config[value], dict):
msg = "Please use for the imbalanced config a format like: " \
"config={'SMOTE': {'sampling_strategy': {0: 9, 1: 12}}}."
logger.error(msg)
raise ValueError(msg)
self.method = desired_class(**self.config[value])
else:
self.method = desired_class()

def fit_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray):
"""
Expand Down
2 changes: 1 addition & 1 deletion photonai/modelwrapper/keras_base_estimator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import warnings
import keras
import tensorflow.keras as keras
from sklearn.base import BaseEstimator

from photonai.photonlogger.logger import logger
Expand Down
28 changes: 14 additions & 14 deletions photonai/modelwrapper/keras_base_models.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
import warnings
import numpy as np
import keras
import tensorflow.keras as keras
from typing import Union
from keras.utils.all_utils import to_categorical
from keras.layers import Dropout, Dense
from keras.layers import BatchNormalization
from keras.models import Sequential
from keras.optimizers import Optimizer, adam_v2, rmsprop_v2, adadelta_v2, adagrad_v2, adamax_v2, nadam_v2, gradient_descent_v2
from keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Optimizer, Adam, RMSprop, Adadelta, Adagrad, Adamax, Nadam, SGD
from tensorflow.keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
linear, exponential
from sklearn.base import ClassifierMixin, RegressorMixin

from photonai.photonlogger.logger import logger
from photonai.modelwrapper.keras_base_estimator import KerasBaseEstimator

__supported_optimizers__ = {
'sgd': gradient_descent_v2.SGD,
'rmsprop': rmsprop_v2.RMSprop,
'adagrad': adagrad_v2.Adagrad,
'adadelta': adadelta_v2.Adadelta,
'adam': adam_v2.Adam,
'adamax': adamax_v2.Adamax,
'nadam': nadam_v2.Nadam
'sgd': SGD,
'rmsprop': RMSprop,
'adagrad': Adagrad,
'adadelta': Adadelta,
'adam': Adam,
'adamax': Adamax,
'nadam': Nadam
}
__supported_activations__ = {
'softmax': softmax,
Expand Down
2 changes: 1 addition & 1 deletion photonai/modelwrapper/keras_dnn_classifier.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from typing import Union
from keras.optimizers import Optimizer
from tensorflow.keras.optimizers import Optimizer

from photonai.modelwrapper.keras_base_models import KerasDnnBaseModel, KerasBaseClassifier
import photonai.modelwrapper.keras_base_models as keras_dnn_base_model
Expand Down
2 changes: 1 addition & 1 deletion photonai/modelwrapper/keras_dnn_regressor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Union
import numpy as np
from keras.optimizers import Optimizer
from tensorflow.keras.optimizers import Optimizer
import photonai.modelwrapper.keras_base_models as keras_dnn_base_model

from photonai.modelwrapper.keras_base_models import KerasDnnBaseModel, KerasBaseRegressor
Expand Down
15 changes: 10 additions & 5 deletions photonai/optimization/random_search/random_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class RandomSearchOptimizer(PhotonSlaveOptimizer):
testing hyperparameter combinations without any grid.
"""
def __init__(self, limit_in_minutes: Union[float, None] = 60, n_configurations: Union[int, None] = None):
def __init__(self, limit_in_minutes: Union[float, None] = None, n_configurations: Union[int, None] = 10):
"""
Initialize the object.
One of limit_in_minutes or n_configurations must differ from None.
Expand Down Expand Up @@ -74,19 +74,24 @@ def next_config_generator(self) -> Generator:
"""
while True:
_ = (yield self._generate_config())
self.k_configutration += 1
new_config = True
if self.limit_in_minutes:
if self.start_time is None:
self.start_time = datetime.datetime.now()
self.end_time = self.start_time + datetime.timedelta(minutes=self.limit_in_minutes)

if datetime.datetime.now() >= self.end_time:
return
new_config = False

if self.n_configurations:
if self.k_configutration >= self.n_configurations:
return
if self.k_configutration >= self.n_configurations + 1:
new_config = False

if not new_config:
return

_ = (yield self._generate_config())

def _generate_config(self):
config = {}
Expand Down
24 changes: 24 additions & 0 deletions photonai/processing/results_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,30 @@ def collect_fold_lists(score_info_list, fold_nr, predictions_filename=''):

return sorted_df.to_dict('list')

def get_mean_train_predictions(self, filename=''):
"""
This function returns the MEAN predictions, true targets, and fold index
for the TRAINING Set of the best configuration of each outer fold.
"""
if self.results is None:
raise ValueError("Result tree information is needed but results attribute of object is None.")

score_info_list = list()
fold_nr_list = list()
for outer_fold in self.results.outer_folds:
score_info_list.append(outer_fold.best_config.best_config_score.training)
fold_nr_list.append(outer_fold.fold_nr)
infos = self.collect_fold_lists(score_info_list, fold_nr_list, filename)
infos = {key: np.array(value) for key, value in infos.items()}
num_items = np.unique(infos["indices"])
mean_pred = np.zeros(num_items.shape)
y_true = np.zeros(num_items.shape)
for i in num_items:
idx = (infos["indices"] == i)
mean_pred[i] = np.mean(infos["y_pred"][idx])
y_true[i] = infos["y_true"][idx][0]
return {'y_true': y_true, 'y_pred': mean_pred, 'indices': num_items}

def get_test_predictions(self, filename=''):
"""
This function returns the predictions, true targets, and fold index
Expand Down
1 change: 0 additions & 1 deletion photonai/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
numpy
matplotlib
scikit-learn
keras<=2.6.0
pandas
plotly
imbalanced-learn
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from setuptools import setup, find_packages


__version__ = '2.2.0'
__version__ = '2.2.1'

with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
Expand Down Expand Up @@ -41,7 +41,6 @@
'numpy',
'matplotlib',
'scikit-learn',
'keras<=2.6.0',
'pandas',
'plotly',
'imbalanced-learn',
Expand Down
3 changes: 1 addition & 2 deletions test/base_tests/test_hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline as SKLPipeline
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
from keras.metrics import Accuracy
from tensorflow.keras.metrics import Accuracy

from photonai.base import PipelineElement, Hyperpipe, OutputSettings, Preprocessing, CallbackElement, Branch, Stack, \
Switch, ParallelBranch
Expand Down
Loading

0 comments on commit c9f9550

Please sign in to comment.