diff --git a/README.md b/README.md index af22121a0..25afee748 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,23 @@ # Keras Tuner -An hyperparameter tuner for [Keras](https://keras.io), specifically for `tf.keras` with TensorFlow 2.0. +A hyperparameter tuner for [Keras](https://keras.io), specifically for `tf.keras` with TensorFlow 2.0. -**Status: Beta.** +Full documentation and tutorials available on the [Keras Tuner website.](https://keras-team.github.io/keras-tuner/) ## Installation Requirements: - Python 3.6 -- TensorFlow 2.0 (Beta-1 release or higher) +- TensorFlow 2.0 -Installation process: +Install latest release: + +``` +pip install -U keras-tuner +``` + +Install from source: ``` git clone https://github.com/keras-team/keras-tuner.git diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 56d15693e..8dc8128c7 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -11,6 +11,7 @@ nav: - Home: index.md - Tutorials: - Distributed Tuning: tutorials/distributed-tuning.md + - Subclassing Tuner for Custom Training Loops: tutorials/subclass-tuner.md - Documentation: - HyperModels: documentation/hypermodels.md - HyperParameters: documentation/hyperparameters.md diff --git a/docs/templates/tutorials/distributed-tuning.md b/docs/templates/tutorials/distributed-tuning.md index 90edc7cc7..2892c1eab 100644 --- a/docs/templates/tutorials/distributed-tuning.md +++ b/docs/templates/tutorials/distributed-tuning.md @@ -58,8 +58,8 @@ def build_model(hp): x = inputs for i in range(hp.Int('conv_layers', 1, 3, default=3)): x = tf.keras.layers.Conv2D( - filters=hp.Int('filters_' + str(i), 4, 32, step=4, default=8) - kernel_size=hp.Int('kernel_size_' + str(i), 3, 5) + filters=hp.Int('filters_' + str(i), 4, 32, step=4, default=8), + kernel_size=hp.Int('kernel_size_' + str(i), 3, 5), activation='relu', padding='same')(x) diff --git a/docs/templates/tutorials/subclass-tuner.md b/docs/templates/tutorials/subclass-tuner.md new file mode 100644 index 000000000..79b3878a0 --- /dev/null +++ b/docs/templates/tutorials/subclass-tuner.md @@ -0,0 +1,180 @@ +# Subclassing Tuner for Custom Training Loops + +The `Tuner` class at `kerastuner.engine.tuner.Tuner` can be subclassed to support advanced uses such as: + +- Custom training loops (GANs, reinforement learning, etc.) +- Adding hyperparameters outside of the model builing function (preprocessing, data augmentation, test time augmentation, etc.) + +This tutorial will not cover subclassing to support non-Keras models. To accomplish this, you can subclass the `kerastuner.engine.base_tuner.BaseTuner` class (See `kerastuner.tuners.sklearn.Sklearn` for an example). + +### Understanding the search process. + +`Tuner.search` can be passed any arguments. These arguments will be passed directly to `Tuner.run_trial`, along with a `Trial` object that contains information about the current trial, including hyperparameters and the status of the trial. Typically, `Tuner.run_trial` is the only method that users need to override when subclassing `Tuner`. + +### Overriding `run_trial`. + +There are two ways to write `run_trial`. One is to leverage `Tuner`'s built-in callback hooks, which send the value of the `objective` to the `Oracle` and save the latest state of the Model. These hooks are: + +* `self.on_epoch_end`: Must be called. Reports results to the `Oracle` and saves the Model. The `logs` dictionary passed to this method must contain the `objective` name. +* `self.on_epoch_begin`, `self.on_batch_begin`, `self.on_batch_end`: Optional. These methods do nothing in `Tuner`, but are useful to provide as hooks if you expect users of your subclass to create their own subclasses that override these parts of the training process. + +```python +class MyTuner(kt.Tuner): + + def run_trial(self, trial, ...): + model = self.hypermodel.build(trial.hyperparameters) + for epoch in range(10): + epoch_loss = ... + self.on_epoch_end(trial, model, epoch, logs={'loss': epoch_loss}) +``` + +Alternatively, you can instead directly call the methods used to report results to the `Oracle` and save the Model. This can allow more flexibility for use cases where there is no natural concept of epoch or where you do not want to report results to the `Oracle` after each epoch. These methods are: + +* `self.oracle.update_trial`: Reports current results to the `Oracle`. The `metrics` dictionary passed to this method must contain the `objective` name. +* `self.save_model`: Saves the trained model. + +```python +class MyTuner(kt.Tuner): + + def run_trial(self, trial, ...): + model = self.hypermodel.build(trial.hyperparameters) + score = ... + self.oracle.update_trial(trial.trial_id, {'score': score}) + self.oracle.save_model(trail.trial_id, model) +``` + +### Adding HyperParameters during preprocessing, evaluation, etc. + +New `HyperParameter`s can be defined anywhere in `run_trial`, in the same way that `HyperParameter`s are defined in a `HyperModel`. These hyperparameters take on their default value the first time they are encountered, and thereafter are tuned by the `Oracle`. + +```python +class MyTuner(kt.Tuner): + + def run_trial(self, trial, ...): + hp = trial.hyperparameters + model = self.hypermodel.build(hp) + + batch_size = hp.Int('batch_size', 32, 128, step=32) + random_flip = hp.Boolean('random_flip') + ... +``` + +### End-to-end Example: + +```python +import kerastuner as kt +import tensorflow as tf +import tensorflow_datasets as tfds + + +def build_model(hp): + """Builds a convolutional model.""" + inputs = tf.keras.Input(shape=(28, 28, 1)) + x = inputs + for i in range(hp.Int('conv_layers', 1, 3, default=3)): + x = tf.keras.layers.Conv2D( + filters=hp.Int('filters_' + str(i), 4, 32, step=4, default=8), + kernel_size=hp.Int('kernel_size_' + str(i), 3, 5), + activation='relu', + padding='same')(x) + + if hp.Choice('pooling' + str(i), ['max', 'avg']) == 'max': + x = tf.keras.layers.MaxPooling2D()(x) + else: + x = tf.keras.layers.AveragePooling2D()(x) + + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.ReLU()(x) + + if hp.Choice('global_pooling', ['max', 'avg']) == 'max': + x = tf.keras.layers.GlobalMaxPooling2D()(x) + else: + x = tf.keras.layers.GlobalAveragePooling2D()(x) + outputs = tf.keras.layers.Dense(10, activation='softmax')(x) + + model = tf.keras.Model(inputs, outputs) + + optimizer = hp.Choice('optimizer', ['adam', 'sgd']) + model.compile(optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) + return model + + +class MyTuner(kt.Tuner): + + def run_trial(self, trial, train_ds): + hp = trial.hyperparameters + + # Hyperparameters can be added anywhere inside `run_trial`. + # When the first trial is run, they will take on their default values. + # Afterwards, they will be tuned by the `Oracle`. + train_ds = train_ds.batch( + hp.Int('batch_size', 32, 128, step=32, default=64)) + + model = self.hypermodel.build(trial.hyperparameters) + lr = hp.Float('learning_rate', 1e-4, 1e-2, sampling='log', default=1e-3) + optimizer = tf.keras.optimizers.Adam(lr) + epoch_loss_metric = tf.keras.metrics.Mean() + + @tf.function + def run_train_step(data): + images = tf.dtypes.cast(data['image'], 'float32') / 255. + labels = data['label'] + with tf.GradientTape() as tape: + logits = model(images) + loss = tf.keras.losses.sparse_categorical_crossentropy( + labels, logits) + # Add any regularization losses. + if model.losses: + loss += tf.math.add_n(model.losses) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + epoch_loss_metric.update_state(loss) + return loss + + # `self.on_epoch_end` reports results to the `Oracle` and saves the + # current state of the Model. The other hooks called here only log values + # for display but can also be overridden. For use cases where there is no + # natural concept of epoch, you do not have to call any of these hooks. In + # this case you should instead call `self.oracle.update_trial` and + # `self.oracle.save_model` manually. + for epoch in range(10): + print('Epoch: {}'.format(epoch)) + + self.on_epoch_begin(trial, model, epoch, logs={}) + for batch, data in enumerate(train_ds): + self.on_batch_begin(trial, model, batch, logs={}) + batch_loss = float(run_train_step(data)) + self.on_batch_end(trial, model, batch, logs={'loss': batch_loss}) + + if batch % 100 == 0: + loss = epoch_loss_metric.result().numpy() + print('Batch: {}, Average Loss: {}'.format(batch, loss)) + + epoch_loss = epoch_loss_metric.result().numpy() + self.on_epoch_end(trial, model, epoch, logs={'loss': epoch_loss}) + epoch_loss_metric.reset_states() + + +def main(): + tuner = MyTuner( + oracle=kt.oracles.BayesianOptimization( + objective=kt.Objective('loss', 'min'), + max_trials=2), + hypermodel=build_model, + directory='results', + project_name='mnist_custom_training') + + mnist_data = tfds.load('mnist') + mnist_train, mnist_test = mnist_data['train'], mnist_data['test'] + mnist_train = mnist_train.shuffle(1000) + + tuner.search(train_ds=mnist_train) + + best_hps = tuner.get_best_hyperparameters()[0] + print(best_hps.values) + + best_model = tuner.get_best_models()[0] + +if __name__ == '__main__': + main() +```