Skip to content

Commit

Permalink
Merge pull request #155 from rvandewater/version_release
Browse files Browse the repository at this point in the history
Major version release with new hyperparameter optimization, polars preprocessing, modality selection, and other new features.
  • Loading branch information
rvandewater authored Oct 17, 2024
2 parents cdb5bf4 + 5372971 commit 4dd915e
Show file tree
Hide file tree
Showing 50 changed files with 1,894 additions and 593 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# the GitHub editor is 127 chars wide
flake8 . --count --max-complexity=20 --max-line-length=127 --statistics
flake8 . --count --max-complexity=30 --max-line-length=127 --statistics
# - name: Test with pytest
# run: python -m pytest ./tests/recipes
# If we want to test running the tool later on
Expand Down
18 changes: 18 additions & 0 deletions configs/prediction_models/BRFClassifier.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Settings for ImbLearn Balanced Random Forest Classifier.

# Common settings for ML models
include "configs/prediction_models/common/MLCommon.gin"

# Train params
train_common.model = @BRFClassifier

model/hyperparameter.class_to_tune = @BRFClassifier
model/hyperparameter.n_estimators = [50, 100, 250, 500, 750,1000,1500]
model/hyperparameter.max_depth = [3, 5, 10, 15]
model/hyperparameter.min_samples_split = (2, 5, 10)
model/hyperparameter.min_samples_leaf = (1, 2, 4)
model/hyperparameter.max_features = ['sqrt', 'log2', 1.0]
model/hyperparameter.bootstrap = [True, False]
model/hyperparameter.class_weight = [None, 'balanced']


15 changes: 15 additions & 0 deletions configs/prediction_models/CBClassifier.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Settings for Catboost classifier.

# Common settings for ML models
include "configs/prediction_models/common/MLCommon.gin"

# Train params
train_common.model = @CBClassifier

model/hyperparameter.class_to_tune = @CBClassifier
model/hyperparameter.learning_rate = (1e-4, 0.5, "log")
model/hyperparameter.num_trees = [50, 100, 250, 500, 750,1000,1500]
model/hyperparameter.depth = [3, 5, 10, 15]
model/hyperparameter.scale_pos_weight = [1, 5, 10, 25, 50, 75, 99, 100, 1000]
model/hyperparameter.border_count = [5, 10, 20, 50, 100, 200]
model/hyperparameter.l2_leaf_reg = [1, 3, 5, 7, 9]
6 changes: 3 additions & 3 deletions configs/prediction_models/GRU.gin
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ train_common.model = @GRUNet
# Optimizer params
optimizer/hyperparameter.class_to_tune = @Adam
optimizer/hyperparameter.weight_decay = 1e-6
optimizer/hyperparameter.lr = (1e-5, 3e-4)
optimizer/hyperparameter.lr = (1e-6, 1e-4, "log")

# Encoder params
model/hyperparameter.class_to_tune = @GRUNet
model/hyperparameter.num_classes = %NUM_CLASSES
model/hyperparameter.hidden_dim = (32, 256, "log-uniform", 2)
model/hyperparameter.layer_dim = (1, 3)
model/hyperparameter.hidden_dim = (32, 512, "log")
model/hyperparameter.layer_dim = (1, 10)

8 changes: 4 additions & 4 deletions configs/prediction_models/RFClassifier.gin
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ train_common.model = @RFClassifier

model/hyperparameter.class_to_tune = @RFClassifier
model/hyperparameter.n_estimators = (10, 50, 100, 200, 500)
model/hyperparameter.max_depth = (None, 5, 10, 20)
model/hyperparameter.max_depth = (5, 10, 20)
model/hyperparameter.min_samples_split = (2, 5, 10)
model/hyperparameter.min_samples_leaf = (1, 2, 4)
model/hyperparameter.max_features = ('sqrt', 'log2', None)
model/hyperparameter.bootstrap = (True, False)
model/hyperparameter.class_weight = (None, 'balanced')
model/hyperparameter.max_features = ['sqrt', 'log2', None]
model/hyperparameter.bootstrap = [True, False]
model/hyperparameter.class_weight = [None, 'balanced']


14 changes: 14 additions & 0 deletions configs/prediction_models/RUSBClassifier.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Settings for ImbLearn Balanced Random Forest Classifier.

# Common settings for ML models
include "configs/prediction_models/common/MLCommon.gin"

# Train params
train_common.model = @RUSBClassifier

model/hyperparameter.class_to_tune = @RUSBClassifier
model/hyperparameter.n_estimators = (10, 50, 100, 200, 500)
model/hyperparameter.learning_rate = (0.005, 1, "log")
model/hyperparameter.sampling_strategy = "auto"


6 changes: 3 additions & 3 deletions configs/prediction_models/TCN.gin
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ train_common.model = @TemporalConvNet
# Optimizer params
optimizer/hyperparameter.class_to_tune = @Adam
optimizer/hyperparameter.weight_decay = 1e-6
optimizer/hyperparameter.lr = (1e-5, 3e-4)
optimizer/hyperparameter.lr = (1e-6, 3e-4)

# Encoder params
model/hyperparameter.class_to_tune = @TemporalConvNet
model/hyperparameter.num_classes = %NUM_CLASSES
model/hyperparameter.max_seq_length = %HORIZON
model/hyperparameter.num_channels = (32, 256, "log-uniform", 2)
model/hyperparameter.kernel_size = (2, 32, "log-uniform", 2)
model/hyperparameter.num_channels = (32, 256, "log")
model/hyperparameter.kernel_size = (2, 128, "log")
model/hyperparameter.dropout = (0.0, 0.4)
14 changes: 7 additions & 7 deletions configs/prediction_models/Transformer.gin
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ train_common.model = @Transformer

optimizer/hyperparameter.class_to_tune = @Adam
optimizer/hyperparameter.weight_decay = 1e-6
optimizer/hyperparameter.lr = (1e-5, 3e-4)
optimizer/hyperparameter.lr = (1e-6, 1e-4)

# Encoder params
model/hyperparameter.class_to_tune = @Transformer
model/hyperparameter.ff_hidden_mult = 2
model/hyperparameter.l1_reg = 0.0
model/hyperparameter.ff_hidden_mult = (2,4,6,8)
model/hyperparameter.l1_reg = (0.0,1.0)
model/hyperparameter.num_classes = %NUM_CLASSES
model/hyperparameter.hidden = (32, 256, "log-uniform", 2)
model/hyperparameter.heads = (1, 8, "log-uniform", 2)
model/hyperparameter.hidden = (32, 512, "log")
model/hyperparameter.heads = (1, 8, "log")
model/hyperparameter.depth = (1, 3)
model/hyperparameter.dropout = (0.0, 0.4)
model/hyperparameter.dropout_att = (0.0, 0.4)
model/hyperparameter.dropout = 0 # no improvement (0.0, 0.4)
model/hyperparameter.dropout_att = (0.0, 1.0)


17 changes: 17 additions & 0 deletions configs/prediction_models/XGBClassifier.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Settings for XGBoost classifier.

# Common settings for ML models
include "configs/prediction_models/common/MLCommon.gin"

# Train params
train_common.model = @XGBClassifier

model/hyperparameter.class_to_tune = @XGBClassifier
model/hyperparameter.learning_rate = (0.01, 0.1, "log")
model/hyperparameter.n_estimators = [50, 100, 250, 500, 750, 1000,1500,2000]
model/hyperparameter.max_depth = [3, 5, 10, 15]
model/hyperparameter.scale_pos_weight = [1, 5, 10, 15, 20, 25, 30, 35, 40, 50, 75, 99, 100, 1000]
model/hyperparameter.min_child_weight = [1, 0.5]
model/hyperparameter.max_delta_step = [0, 1, 2, 3, 4, 5, 10]
model/hyperparameter.colsample_bytree = [0.1, 0.25, 0.5, 0.75, 1.0]
model/hyperparameter.eval_metric = "aucpr"
6 changes: 4 additions & 2 deletions configs/prediction_models/common/DLCommon.gin
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
# Imports to register the models
import gin.torch.external_configurables
import icu_benchmarks.models.wrappers
import icu_benchmarks.models.dl_models
import icu_benchmarks.models.dl_models.rnn
import icu_benchmarks.models.dl_models.transformer
import icu_benchmarks.models.dl_models.tcn
import icu_benchmarks.models.utils

# Do not generate features from dynamic data
Expand All @@ -12,7 +14,7 @@ base_regression_preprocessor.generate_features = False

# Train params
train_common.optimizer = @Adam
train_common.epochs = 1000
train_common.epochs = 50
train_common.batch_size = 64
train_common.patience = 10
train_common.min_delta = 1e-4
Expand Down
2 changes: 1 addition & 1 deletion configs/prediction_models/common/DLTuning.gin
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
tune_hyperparameters.scopes = ["model", "optimizer"]
tune_hyperparameters.n_initial_points = 5
tune_hyperparameters.n_calls = 30
tune_hyperparameters.folds_to_tune_on = 2
tune_hyperparameters.folds_to_tune_on = 5
6 changes: 5 additions & 1 deletion configs/prediction_models/common/MLCommon.gin
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
# Imports to register the models
import gin.torch.external_configurables
import icu_benchmarks.models.wrappers
import icu_benchmarks.models.ml_models
import icu_benchmarks.models.ml_models.sklearn
import icu_benchmarks.models.ml_models.lgbm
import icu_benchmarks.models.ml_models.xgboost
import icu_benchmarks.models.ml_models.imblearn
import icu_benchmarks.models.ml_models.catboost
import icu_benchmarks.models.utils

# Patience for early stopping
Expand Down
6 changes: 3 additions & 3 deletions configs/prediction_models/common/MLTuning.gin
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Hyperparameter tuner settings for classical Machine Learning.
tune_hyperparameters.scopes = ["model"]
tune_hyperparameters.n_initial_points = 10
tune_hyperparameters.n_calls = 3
tune_hyperparameters.folds_to_tune_on = 1
tune_hyperparameters.n_initial_points = 5
tune_hyperparameters.n_calls = 30
tune_hyperparameters.folds_to_tune_on = 5
5 changes: 2 additions & 3 deletions configs/tasks/BinaryClassification.gin
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ DLPredictionWrapper.loss = @cross_entropy

# SELECTING PREPROCESSOR
preprocess.preprocessor = @base_classification_preprocessor
preprocess.modality_mapping = %modality_mapping
preprocess.vars = %vars
preprocess.use_static = True

# SELECTING DATASET
PredictionDataset.vars = %vars
PredictionDataset.ram_cache = True

include "configs/tasks/common/Dataloader.gin"

4 changes: 2 additions & 2 deletions configs/tasks/DatasetImputation.gin
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ preprocess.file_names = {
preprocess.preprocessor = @base_imputation_preprocessor

preprocess.vars = %vars
ImputationDataset.vars = %vars
ImputationDataset.ram_cache = True

include "configs/tasks/common/Dataloader.gin"

3 changes: 1 addition & 2 deletions configs/tasks/Regression.gin
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,5 @@ base_regression_preprocessor.outcome_min = 0
base_regression_preprocessor.outcome_max = 15

# SELECTING DATASET
PredictionDataset.vars = %vars
PredictionDataset.ram_cache = True
include "configs/tasks/common/Dataloader.gin"

8 changes: 8 additions & 0 deletions configs/tasks/common/Dataloader.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Prediction
PredictionPandasDataset.vars = %vars
PredictionPandasDataset.ram_cache = True
PredictionPolarsDataset.vars = %vars
PredictionPolarsDataset.ram_cache = True
# Imputation
ImputationPandasDataset.vars = %vars
ImputationPandasDataset.ram_cache = True
8 changes: 8 additions & 0 deletions configs/tasks/common/PredictionTaskVariables.gin
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,12 @@ vars = {
"methb", "mg", "na", "neut", "o2sat", "pco2", "ph", "phos", "plt", "po2", "ptt", "resp", "sbp", "temp", "tnt", "urine",
"wbc"],
"STATIC": ["age", "sex", "height", "weight"],
}

modality_mapping = {
"DYNAMIC": ["alb", "alp", "alt", "ast", "be", "bicar", "bili", "bili_dir", "bnd", "bun", "ca", "cai", "ck", "ckmb", "cl",
"crea", "crp", "dbp", "fgn", "fio2", "glu", "hgb", "hr", "inr_pt", "k", "lact", "lymph", "map", "mch", "mchc", "mcv",
"methb", "mg", "na", "neut", "o2sat", "pco2", "ph", "phos", "plt", "po2", "ptt", "resp", "sbp", "temp", "tnt", "urine",
"wbc"],
"STATIC": ["age", "sex", "height", "weight"],
}
18 changes: 18 additions & 0 deletions docs/adding_model/RNN.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Settings for Recurrent Neural Network (RNN) models.

# Common settings for DL models
include "configs/prediction_models/common/DLCommon.gin"

# Train params
train_common.model = @RNNet

# Optimizer params
optimizer/hyperparameter.class_to_tune = @Adam
optimizer/hyperparameter.weight_decay = 1e-6
optimizer/hyperparameter.lr = (1e-5, 3e-4)

# Encoder params
model/hyperparameter.class_to_tune = @RNNet
model/hyperparameter.num_classes = %NUM_CLASSES
model/hyperparameter.hidden_dim = (32, 256, "log-uniform", 2)
model/hyperparameter.layer_dim = (1, 3)
Loading

0 comments on commit 4dd915e

Please sign in to comment.