diff --git a/src/python/examples/pymarian-EMNLP24.ipynb b/src/python/examples/pymarian-EMNLP24.ipynb new file mode 100755 index 000000000..83ddd1784 --- /dev/null +++ b/src/python/examples/pymarian-EMNLP24.ipynb @@ -0,0 +1,1159 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "P3V5qVVV11Md" + }, + "source": [ + "# Pymarian Quick Start\n", + "\n", + "Last updated: 2024-10-18\n", + "\n", + ">This notebook accompanies Pymarian demo paper @ EMNLP24 Demo.\n", + ">* OpenReview link: https://openreview.net/forum?id=3BKsyqIieh\n", + ">* ArXiv : https://arxiv.org/abs/2408.11853\n", + ">* Benchmarking Scripts: https://github.com/thammegowda/017-pymarian\n", + "\n", + "\n", + "In this notebook, we demonstrate how to work with Pymarian APIs.\n", + "* Evaluator\n", + "* Translator\n", + "* Trainer\n", + "\n", + "---\n", + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9KoqMLQ8qhhq", + "outputId": "82db372f-60ad-4b91-e441-8f405aa6cae5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pymarian==1.12.31\n", + " Downloading pymarian-1.12.31-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)\n", + "Collecting portalocker (from pymarian==1.12.31)\n", + " Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from pymarian==1.12.31) (6.0.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from pymarian==1.12.31) (4.66.5)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from pymarian==1.12.31) (2.32.3)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from pymarian==1.12.31) (0.23.5)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->pymarian==1.12.31) (3.15.4)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->pymarian==1.12.31) (2024.6.1)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->pymarian==1.12.31) (24.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->pymarian==1.12.31) (4.12.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->pymarian==1.12.31) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->pymarian==1.12.31) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->pymarian==1.12.31) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->pymarian==1.12.31) (2024.7.4)\n", + "Downloading pymarian-1.12.31-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (602.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m602.7/602.7 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading portalocker-2.10.1-py3-none-any.whl (18 kB)\n", + "Installing collected packages: portalocker, pymarian\n", + "Successfully installed portalocker-2.10.1 pymarian-1.12.31\n" + ] + } + ], + "source": [ + "# Install pymarian\n", + "!pip install pymarian==1.12.31" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u-GeQtsoxct5", + "outputId": "896068cd-d55c-43e1-c88b-82b161ce5caf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pymarian-eval 1.12.31\n" + ] + } + ], + "source": [ + "!pymarian-eval --version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ff26zQAFPh6p", + "outputId": "1cfef76e-954e-42ff-e2f5-f1f2a7be0c1c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python 3.10.12 (main, Jul 29 2024, 16:56:48) [GCC 11.4.0]; pymarian 1.12.31\n" + ] + } + ], + "source": [ + "import sys\n", + "import urllib\n", + "import tarfile\n", + "from pathlib import Path\n", + "from huggingface_hub import hf_hub_download as hf_get\n", + "import pymarian\n", + "print(f'Python {sys.version}; pymarian {pymarian.__version__}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8E4ft-ek1SUf" + }, + "source": [ + "---\n", + "## Evaluator\n", + "\n", + "\n", + "NOTE: run `huggingface-cli login` for accessing gated models such as cometkiwi22 or newer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 243, + "referenced_widgets": [ + "637ca7ba08e949fea3785eff934f020a", + "523886666955408ead223a9a4000f875", + "4e465ef0da6e42ffb3e5c16662df9091", + "dc33862700374d37ae3a2e6a1a475a0f", + "2b229f0467e549828ec45cc2827bca11", + "027ebfb5f34f4fda96b401a8b155476a", + "cc82d496719b404caf0bf81a7b5a97fe", + "9ede3132d44d4f4b861b4e420cea24be", + "ebf25c1c64ae4081aedb89fb152a1ded", + "2d60f2ba4d58406f987b36494ef03f82", + "4f0f617b5e9a463080f69e758f0f2d28", + "fc179b3b59a94d6f8030280ba52b3c5a", + "984f2ee5ee4241c599d7d505122830ce", + "ae68418d1a034d0787dd2d2ebd9e7981", + "4ec14312802e4a16b13b76001853d659", + "ec6263930052437492882363b59bd385", + "4760b72992d542028064a7f05e80b517", + "6e76351324f84e4fb45cc9440faca4e5", + "6313d37086a848148893de8a18f42de5", + "56426bccb6cc4902ba2ef498af311972", + "65242377d0ee40e28fb47ddafadd9140", + "43b6326733b54938a32ec5b6d9fd641b" + ] + }, + "id": "T8ay4DGo1Pv9", + "outputId": "79907f72-15cc-44c5-b712-fef483a831ee" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "637ca7ba08e949fea3785eff934f020a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "marian.model.bin: 0%| | 0.00/2.28G [00:00 /root/tmp/marian-tests-data/deu-eng/sample.5k.deu <==\n", + "Steigt Gold auf 10.000 Dollar?\n", + "SAN FRANCISCO – Es war noch nie leicht, ein rationales Gespräch über den Wert von Gold zu führen.\n", + "In letzter Zeit allerdings ist dies schwieriger denn je, ist doch der Goldpreis im letzten Jahrzehnt um über 300 Prozent angestiegen.\n", + "Erst letzten Dezember verfassten meine Kollegen Martin Feldstein und Nouriel Roubini Kommentare, in denen sie mutig die vorherrschende optimistische Marktstimmung hinterfragten und sehr überlegt auf die Risiken des Goldes  hinwiesen.\n", + "\n", + "==> /root/tmp/marian-tests-data/deu-eng/sample.5k.eng <==\n", + "$10,000 Gold?\n", + "SAN FRANCISCO – It has never been easy to have a rational conversation about the value of gold.\n", + "Lately, with gold prices up more than 300% over the last decade, it is harder than ever.\n", + "Just last December, fellow economists Martin Feldstein and Nouriel Roubini each penned op-eds bravely questioning bullish market sentiment, sensibly pointing out gold’s risks.\n" + ] + } + ], + "source": [ + "data_url = \"https://textmt.blob.core.windows.net/www/data/marian-tests-data.tgz\"\n", + "data_dir = Path.home() / 'tmp' / 'marian-tests-data'\n", + "data_dir.mkdir(parents=True, exist_ok=True)\n", + "vocab_file = data_dir / 'deu-eng/vocab.8k.spm'\n", + "train_src = data_dir / 'deu-eng/sample.5k.deu'\n", + "train_tgt = train_src.with_suffix('.eng')\n", + "\n", + "if not train_tgt.exists():\n", + " print(f\"Downloading data package... to {data_dir}\")\n", + " with urllib.request.urlopen(data_url) as response:\n", + " with tarfile.open(fileobj=response, mode=\"r|gz\") as tar:\n", + " tar.extractall(path=data_dir.parent)\n", + " print(\"Downloaded the data package\")\n", + "\n", + "!head -n4 {train_src} {train_tgt}\n", + "\n", + "vocab_file = str(vocab_file)\n", + "train_src = str(train_src)\n", + "train_tgt = str(train_tgt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fT7JnH5d1uoQ" + }, + "outputs": [], + "source": [ + "from pymarian import Trainer\n", + "args = {\n", + " 'type': 'transformer',\n", + " 'dim_emb': 512,\n", + " 'enc_depth': 6,\n", + " 'dec_depth': 6,\n", + " 'tied_embeddings_all': True,\n", + " 'transformer_heads': 8,\n", + " 'transformer_dim_ffn': 2048,\n", + " 'transformer_ffn_activation': 'relu',\n", + " 'transformer_dropout': 0.1,\n", + " 'cost_type': 'ce-mean-words',\n", + " 'max_length': 80,\n", + " 'mini_batch_fit': False,\n", + " 'maxi_batch': 256,\n", + " 'optimizer_params': [0.9, 0.98, 1e-09],\n", + " 'sync_sgd': True,\n", + " 'learn_rate': 0.0003,\n", + " 'lr_decay_inv_sqrt': [16000],\n", + " 'lr_warmup': 16000,\n", + " 'label_smoothing': 0.1,\n", + " 'clip_norm': 0,\n", + " 'exponential_smoothing': 0.0001,\n", + " 'early_stopping': 8,\n", + " 'keep_best': True,\n", + " 'beam_size': 2,\n", + " 'normalize': 1,\n", + " 'valid_metrics': ['ce-mean-words', 'bleu', 'perplexity'],\n", + " 'valid_mini_batch': 16,\n", + " 'mini_batch': '1Mt',\n", + " 'after': '100e', # stop after 500 updates\n", + " 'valid_freq': '100Mt', # validate every 250 updates\n", + " 'disp_freq': '50kt',\n", + " 'disp_first': 10,\n", + " 'save_freq': '100Mt',\n", + " 'vocabs': [vocab_file, vocab_file],\n", + " 'train_sets': [train_src, train_tgt],\n", + " 'quiet': False,\n", + "}\n", + "\n", + "args['model'] = f'{data_dir.parent}/model.npz'\n", + "\n", + "trainer = Trainer(**args)\n", + "trainer.train()\n", + "\n", + "# careful with notebook retaining objects in memory\n", + "# you cant create second object if first one is still consuming GPU RAM\n", + "del trainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DnCvzd6ojTMB", + "outputId": "4ac287a1-8743-4eac-86af-06e684a7ad15" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "marian-models\t model.iter200.npz.decoder.yml model.npz\t\t model.npz.progress.yml\n", + "marian-tests-data model.iter400.npz\t\t model.npz.decoder.yml model.npz.yml\n", + "model.iter200.npz model.iter400.npz.decoder.yml model.npz.optimizer.npz\n" + ] + } + ], + "source": [ + "!ls {data_dir.parent}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tSu_g0RvkIyZ" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "027ebfb5f34f4fda96b401a8b155476a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2b229f0467e549828ec45cc2827bca11": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2d60f2ba4d58406f987b36494ef03f82": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "43b6326733b54938a32ec5b6d9fd641b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4760b72992d542028064a7f05e80b517": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e465ef0da6e42ffb3e5c16662df9091": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9ede3132d44d4f4b861b4e420cea24be", + "max": 2277369907, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ebf25c1c64ae4081aedb89fb152a1ded", + "value": 2277369907 + } + }, + "4ec14312802e4a16b13b76001853d659": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_65242377d0ee40e28fb47ddafadd9140", + "placeholder": "​", + "style": "IPY_MODEL_43b6326733b54938a32ec5b6d9fd641b", + "value": " 5.07M/5.07M [00:00<00:00, 11.2MB/s]" + } + }, + "4f0f617b5e9a463080f69e758f0f2d28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "523886666955408ead223a9a4000f875": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_027ebfb5f34f4fda96b401a8b155476a", + "placeholder": "​", + "style": "IPY_MODEL_cc82d496719b404caf0bf81a7b5a97fe", + "value": "marian.model.bin: 100%" + } + }, + "56426bccb6cc4902ba2ef498af311972": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6313d37086a848148893de8a18f42de5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "637ca7ba08e949fea3785eff934f020a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_523886666955408ead223a9a4000f875", + "IPY_MODEL_4e465ef0da6e42ffb3e5c16662df9091", + "IPY_MODEL_dc33862700374d37ae3a2e6a1a475a0f" + ], + "layout": "IPY_MODEL_2b229f0467e549828ec45cc2827bca11" + } + }, + "65242377d0ee40e28fb47ddafadd9140": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6e76351324f84e4fb45cc9440faca4e5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "984f2ee5ee4241c599d7d505122830ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4760b72992d542028064a7f05e80b517", + "placeholder": "​", + "style": "IPY_MODEL_6e76351324f84e4fb45cc9440faca4e5", + "value": "vocab.spm: 100%" + } + }, + "9ede3132d44d4f4b861b4e420cea24be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae68418d1a034d0787dd2d2ebd9e7981": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6313d37086a848148893de8a18f42de5", + "max": 5069051, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_56426bccb6cc4902ba2ef498af311972", + "value": 5069051 + } + }, + "cc82d496719b404caf0bf81a7b5a97fe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dc33862700374d37ae3a2e6a1a475a0f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2d60f2ba4d58406f987b36494ef03f82", + "placeholder": "​", + "style": "IPY_MODEL_4f0f617b5e9a463080f69e758f0f2d28", + "value": " 2.28G/2.28G [01:36<00:00, 24.3MB/s]" + } + }, + "ebf25c1c64ae4081aedb89fb152a1ded": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ec6263930052437492882363b59bd385": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc179b3b59a94d6f8030280ba52b3c5a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_984f2ee5ee4241c599d7d505122830ce", + "IPY_MODEL_ae68418d1a034d0787dd2d2ebd9e7981", + "IPY_MODEL_4ec14312802e4a16b13b76001853d659" + ], + "layout": "IPY_MODEL_ec6263930052437492882363b59bd385" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}