diff --git a/intro.ipynb b/intro.ipynb index d4f22a41bd..d7e4fbb656 100644 --- a/intro.ipynb +++ b/intro.ipynb @@ -1,5 +1,21 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"DISABLE_MLFLOW_INTEGRATION\"] = \"true\"\n", + "os.environ[\"WANDB_DISABLED\"] = \"true\"" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, { "cell_type": "code", "execution_count": null, @@ -18,23 +34,28 @@ }, { "cell_type": "code", + "execution_count": null, + "outputs": [], "source": [ + "# Basic configuration for\n", "cfg = DictDefault({\n", " \"base_model\": \"TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\",\n", - " \"load_in_8bit\": True,\n", + " \"model_kwargs\": {\"from_tf\": True},\n", + " \"load_in_4bit\": True,\n", " \"datasets\": [{\"path\": \"mhenrichsen/alpaca_2k_test\", \"type\": \"alpaca\"}],\n", " \"val_set_size\": 0.1,\n", " \"output_dir\": \"./lora-out\",\n", " \"sequence_len\": 2048,\n", " \"sample_packing\": True,\n", " \"pad_to_sequence_len\": True,\n", - " \"adapter\": \"lora\",\n", + " \"adapter\": \"qlora\",\n", " \"lora_r\": 32,\n", " \"lora_alpha\": 16,\n", " \"lora_dropout\": 0.1,\n", - " \"lora_target_linear\": true,\n", + " \"lora_target_linear\": True,\n", " \"gradient_accumulation_steps\": 4,\n", - " \"micro_batch_size\": 2,\n", + " \"micro_batch_size\": 1,\n", + " \"eval_batch_size\": 1,\n", " \"num_epochs\": 1,\n", " \"optimizer\": \"adamw_bnb_8bit\",\n", " \"lr_scheduler\": \"cosine\",\n", @@ -47,13 +68,12 @@ " \"evals_per_epoch\": 4,\n", " \"saves_per_epoch\": 1,\n", " \"weight_decay\": 0.0,\n", - " \"wandb_disabled\": True,\n", "})\n" ], "metadata": { "collapsed": false, "pycharm": { - "name": "#%% md\n" + "name": "#%%\n" } } }, @@ -62,6 +82,7 @@ "execution_count": null, "outputs": [], "source": [ + "# Kick off the training\n", "model, tokenizer = do_train(cfg, TrainerCliArgs())\n" ], "metadata": { diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 18dc353a23..6a07658df6 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -745,9 +745,10 @@ def build(self, total_num_steps): training_arguments_kwargs[ "per_device_train_batch_size" ] = self.cfg.micro_batch_size - training_arguments_kwargs[ - "per_device_eval_batch_size" - ] = self.cfg.eval_batch_size + if self.cfg.eval_batch_size: + training_arguments_kwargs[ + "per_device_eval_batch_size" + ] = self.cfg.eval_batch_size training_arguments_kwargs[ "gradient_accumulation_steps" ] = self.cfg.gradient_accumulation_steps diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py index 40be0d9ac8..8f33665c69 100644 --- a/src/axolotl/utils/bench.py +++ b/src/axolotl/utils/bench.py @@ -20,7 +20,8 @@ def wrapper(*args, **kwargs): device = kwargs.get("device", args[0] if args else None) if ( - not torch.cuda.is_available() + device is None + or not torch.cuda.is_available() or device == "auto" or torch.device(device).type == "cpu" ):