From 672b243ce6f0b1cdf8a9f36f67c67d246a9d5472 Mon Sep 17 00:00:00 2001 From: Stefan Kahl Date: Fri, 20 Oct 2023 16:21:46 +0200 Subject: [PATCH] Dropout regularization --- README.adoc | 1 + config.py | 3 +++ model.py | 7 +++++++ train.py | 2 ++ 4 files changed, 13 insertions(+) diff --git a/README.adoc b/README.adoc index 1ca6ff92..de29921f 100644 --- a/README.adoc +++ b/README.adoc @@ -684,6 +684,7 @@ Here is a list of all command line arguments: --val_split, Validation split ratio. Defaults to 0.2. --learning_rate, Learning rate. Defaults to 0.01. --hidden_units, Number of hidden units. Defaults to 0. If set to >0, a two-layer classifier is used. +--dropout, Dropout rate. Defaults to 0. --mixup, Whether to use mixup for training. --upsampling_ratio, Balance train data and upsample minority classes. Values between 0 and 1. Defaults to 0. --upsampling_mode, Upsampling mode. Can be 'repeat', 'mean' or 'smote'. Defaults to 'repeat'. diff --git a/config.py b/config.py index 17e5b752..2913701e 100644 --- a/config.py +++ b/config.py @@ -127,6 +127,9 @@ # If >0, a two-layer classifier will be trained TRAIN_HIDDEN_UNITS: int = 0 +# Dropout rate for training +TRAIN_DROPOUT: float = 0.0 + # Whether to use mixup for training TRAIN_WITH_MIXUP: bool = False diff --git a/model.py b/model.py index bce59b89..a37d6c3f 100644 --- a/model.py +++ b/model.py @@ -135,8 +135,15 @@ def buildLinearClassifier(num_labels, input_size, hidden_units=0): # Hidden layer if hidden_units > 0: + # Dropout layer? + if cfg.TRAIN_DROPOUT > 0: + model.add(keras.layers.Dropout(cfg.TRAIN_DROPOUT)) model.add(keras.layers.Dense(hidden_units, activation="relu")) + # Dropout layer? + if cfg.TRAIN_DROPOUT > 0: + model.add(keras.layers.Dropout(cfg.TRAIN_DROPOUT)) + # Classification layer model.add(keras.layers.Dense(num_labels)) diff --git a/train.py b/train.py index a8bba5f9..59526c1b 100644 --- a/train.py +++ b/train.py @@ -176,6 +176,7 @@ def trainModel(on_epoch_end=None): default=0, help="Number of hidden units. Defaults to 0. If set to >0, a two-layer classifier is used.", ) + parser.add_argument("--dropout", type=float, default=0.0, help="Dropout rate. Defaults to 0.") parser.add_argument("--mixup", action=argparse.BooleanOptionalAction, help="Whether to use mixup for training.") parser.add_argument("--upsampling_ratio", type=float, default=0.0, help="Balance train data and upsample minority classes. Values between 0 and 1. Defaults to 0.") parser.add_argument("--upsampling_mode", default="repeat", help="Upsampling mode. Can be 'repeat', 'mean' or 'smote'. Defaults to 'repeat'.") @@ -195,6 +196,7 @@ def trainModel(on_epoch_end=None): cfg.TRAIN_VAL_SPLIT = args.val_split cfg.TRAIN_LEARNING_RATE = args.learning_rate cfg.TRAIN_HIDDEN_UNITS = args.hidden_units + cfg.TRAIN_DROPOUT = min(max(0, args.dropout), 0.9) cfg.TRAIN_WITH_MIXUP = args.mixup cfg.UPSAMPLING_RATIO = min(max(0, args.upsampling_ratio), 1) cfg.UPSAMPLING_MODE = args.upsampling_mode