diff --git a/examples/ruGPT3XL_finetune_example.ipynb b/examples/ruGPT3XL_finetune_example.ipynb new file mode 100644 index 0000000..3622f77 --- /dev/null +++ b/examples/ruGPT3XL_finetune_example.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run finetuning" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Prepare your files and split to shards. All files will be splitted between all gpus. All paths to text files should be stored at file.list\n", + "\n", + "Example of `files.list` for single gpu:\n", + "\n", + "```text\n", + "data/train.txt\n", + "```\n", + "\n", + "* Add your path to template script in our repo [scripts/deepspeed_gpt3_xl_finetune.sh](https://github.com/sberbank-ai/ru-gpts/blob/master/scripts/deepspeed_gpt3_xl_finetune.sh)\n", + "\n", + "* `cd scripts` for go to scripts dir\n", + "\n", + "* run command\n", + "\n", + "```bash\n", + "sh deepspeed_gpt3_xl_finetune.sh\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7TbybJfIpBVa" + }, + "source": [ + "### Test model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Load model" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "EnTy1SEajpPV" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "Z-DSEz0ljpPV" + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append(\"../../ru-gpts/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "_9GABoxNVpH4" + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"USE_DEEPSPEED\"] = \"1\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_HeCFnJEjpPV" + }, + "outputs": [], + "source": [ + "from src.xl_wrapper import RuGPT3XL" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OdyughHDjpPV" + }, + "source": [ + "Note! seq_len is max sequence length for generation used in generation process. Max avialable seq_len is 2048 (in tokens).\n", + "Also inference takes around 10 Gb GPU memory." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "56aNJNPYjpPW", + "outputId": "aa154214-82b6-4604-d0b7-8ad7629aa704", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> initializing model parallel with size 1\n", + "> initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/user/conda/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", + " and should_run_async(code)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Use alternating sparse & dense attention layers\n" + ] + } + ], + "source": [ + "gpt = RuGPT3XL.from_pretrained(\n", + " \"sberbank-ai/rugpt3xl\",\n", + " weights_path=\"/path/2/save/model/train-iters/mp_rank_00_model_states.pt\",\n", + " deepspeed_config_path=\"../src/deepspeed_config/gpt3_xl_sparse_2048.json\",\n", + " seq_len=512\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example of generation" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7Qw65CVzjpPZ", + "outputId": "79a31fb8-656d-4923-f208-1589d675d7c7" + }, + "outputs": [], + "source": [ + "def generate(text, additional_len=20):\n", + " return filter_resuls(gpt.generate(\n", + " text=text,\n", + " max_length=len(gpt.tokenizer.encode(text)) + additional_len,\n", + " num_beams=10,\n", + " eos_token_id=gpt.tokenizer.eos_token_id,\n", + " num_return_sequences=1,\n", + " ))[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def filter_resuls(nr):\n", + " return [x[:x.find(\"<|endoftext|>\")] for x in nr]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Фрегат «Адмирал Макаров» вошел в Средиземное море\\u2028Большой противолодочный корабль «Вице-адмирал Кулак'" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "generate(\"Фрегат «Адмирал Макаров»\")" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "ruGPT3XL_generation", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}