diff --git a/README.md b/README.md
index ea6ec138eb..103f57543e 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
+## What's New
+- Added `Mistral-7b-example`: A comprehensive example for fine-tuning Mistral-7b model. [Check it out here](https://github.com/Tilemachoc/axolotl/tree/mistral-7b-example/examples/mistral/Mistral-7b-example).
+
 # Axolotl
 
 Axolotl is a tool designed to streamline the fine-tuning of various AI models, offering support for multiple configurations and architectures.
diff --git a/examples/mistral/Mistral-7b-example/README.md b/examples/mistral/Mistral-7b-example/README.md
new file mode 100644
index 0000000000..2d5ac87a17
--- /dev/null
+++ b/examples/mistral/Mistral-7b-example/README.md
@@ -0,0 +1,12 @@
+# Description
+This repository presents an in-depth guide for fine-tuning Mistral-7b or any other compatible model using Axolotl, tailored specifically for chatbot development. It streamlines the process of fine-tuning and uploading the enhanced model to HuggingFace 🤗, thereby serving as an invaluable tool for developers in the AI and chatbot domain.
+
+**What’s Inside:**
+
+Beginner-Friendly Instructions: Comprehensive steps to guide you through fine-tuning your chosen model, including details on the data structure (jsonl), configuration, and the code itself.
+
+Hardware Utilized: For reference, the fine-tuning in this guide was performed using 4x NVIDIA GeForce RTX 3090 (rented 2.1.2-cuda12.1-cudnn8-devel).
+
+**Uploading to HuggingFace 🤗:**
+To upload your fine-tuned model to Hugging Face, include the following files:
+![Screenshot 2024-01-19 213932](https://github.com/OpenAccess-AI-Collective/axolotl/assets/138583191/d660eb84-2d76-46a1-9846-cf0aeb3006d9)
diff --git a/examples/mistral/Mistral-7b-example/code.ipynb b/examples/mistral/Mistral-7b-example/code.ipynb
new file mode 100644
index 0000000000..756988006c
--- /dev/null
+++ b/examples/mistral/Mistral-7b-example/code.ipynb
@@ -0,0 +1,970 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3fe31229-8f6b-48bc-a86d-af8e5466d11c",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GPU available? True\n",
+      "BF16 is supported? True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check if GPU is available I used 4x NVIDIA GeForce RTX 3090 (rented 2.1.2-cuda12.1-cudnn8-devel)\n",
+    "import torch\n",
+    "print('GPU available?', torch.cuda.is_available())\n",
+    "print('BF16 is supported?', torch.cuda.is_bf16_supported())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1dee845b-f3cb-4b1e-bdd9-1a918eac140b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting huggingface_hub\n",
+      "  Downloading huggingface_hub-0.20.1-py3-none-any.whl.metadata (12 kB)\n",
+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (3.9.0)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (2023.10.0)\n",
+      "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (2.31.0)\n",
+      "Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (4.65.0)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (4.7.1)\n",
+      "Requirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface_hub) (23.1)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (1.26.18)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface_hub) (2023.7.22)\n",
+      "Downloading huggingface_hub-0.20.1-py3-none-any.whl (330 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m330.1/330.1 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: huggingface_hub\n",
+      "Successfully installed huggingface_hub-0.20.1\n",
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install huggingface_hub"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "88731672-9050-4034-8266-11aaace2a44e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import notebook_login"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6b5aa7d7-3b18-4c14-afd4-043c2c545259",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "60df98d7b0294289aad8b6c8cd023c3b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#Login to huggingface so you can push the model to hub later\n",
+    "import sys\n",
+    "stdout = sys.stdout\n",
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "b74d0635-5033-4494-b7bd-ff6822103d93",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#I noticed that when you use notebook_login() nothing gets printed after so we use sys \n",
+    "sys.stdout = stdout"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e3c3b088-45e7-484b-ae39-66beabc48da8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cloning into 'axolotl'...\n",
+      "remote: Enumerating objects: 235, done.\u001b[K\n",
+      "remote: Counting objects: 100% (235/235), done.\u001b[K\n",
+      "remote: Compressing objects: 100% (207/207), done.\u001b[K\n",
+      "remote: Total 235 (delta 48), reused 123 (delta 13), pack-reused 0\u001b[K\n",
+      "Receiving objects: 100% (235/235), 1.46 MiB | 11.65 MiB/s, done.\n",
+      "Resolving deltas: 100% (48/48), done.\n"
+     ]
+    }
+   ],
+   "source": [
+    "#axolotl\n",
+    "!git clone -b main --depth 1 https://github.com/OpenAccess-AI-Collective/axolotl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "66927751-4fd6-4477-97fc-6ab08c9d9a74",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/axolotl\n"
+     ]
+    }
+   ],
+   "source": [
+    "cd axolotl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fcccf8da-353b-4d70-8f55-5cfe08c7e6b9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (23.1)\n",
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+      "\u001b[0mObtaining file:///axolotl\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting auto-gptq==0.5.1\n",
+      "  Downloading auto_gptq-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n",
+      "Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (23.1)\n",
+      "Collecting peft==0.6.0\n",
+      "  Downloading peft-0.6.0-py3-none-any.whl.metadata (23 kB)\n",
+      "Collecting transformers==4.36.2\n",
+      "  Downloading transformers-4.36.2-py3-none-any.whl.metadata (126 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m126.8/126.8 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting tokenizers==0.15.0\n",
+      "  Downloading tokenizers-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
+      "Collecting bitsandbytes>=0.41.1\n",
+      "  Downloading bitsandbytes-0.41.3.post2-py3-none-any.whl.metadata (9.8 kB)\n",
+      "Collecting accelerate==0.24.1\n",
+      "  Downloading accelerate-0.24.1-py3-none-any.whl.metadata (18 kB)\n",
+      "Collecting addict\n",
+      "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
+      "Collecting fire\n",
+      "  Downloading fire-0.5.0.tar.gz (88 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.3/88.3 kB\u001b[0m \u001b[31m28.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hRequirement already satisfied: PyYAML>=6.0 in /opt/conda/lib/python3.10/site-packages (6.0.1)\n",
+      "Collecting datasets>=2.15.0\n",
+      "  Downloading datasets-2.16.0-py3-none-any.whl.metadata (20 kB)\n",
+      "Collecting sentencepiece\n",
+      "  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m47.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting wandb\n",
+      "  Downloading wandb-0.16.1-py3-none-any.whl.metadata (9.8 kB)\n",
+      "Collecting einops\n",
+      "  Downloading einops-0.7.0-py3-none-any.whl.metadata (13 kB)\n",
+      "Collecting optimum==1.13.2\n",
+      "  Downloading optimum-1.13.2.tar.gz (300 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.0/301.0 kB\u001b[0m \u001b[31m72.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Installing build dependencies ... \u001b[?25ldone\n",
+      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
+      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting hf_transfer\n",
+      "  Downloading hf_transfer-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting colorama\n",
+      "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
+      "Collecting numba\n",
+      "  Downloading numba-0.58.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.7 kB)\n",
+      "Requirement already satisfied: numpy>=1.24.4 in /opt/conda/lib/python3.10/site-packages (1.26.0)\n",
+      "Collecting bert-score==0.3.13\n",
+      "  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.1/61.1 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting evaluate==0.4.0\n",
+      "  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m26.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting rouge-score==0.1.2\n",
+      "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting scipy\n",
+      "  Downloading scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.4/60.4 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting scikit-learn==1.2.2\n",
+      "  Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.6/9.6 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n",
+      "\u001b[?25hCollecting pynvml\n",
+      "  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting art\n",
+      "  Downloading art-6.1-py3-none-any.whl.metadata (69 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.9/69.9 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting fschat==0.2.34\n",
+      "  Downloading fschat-0.2.34-py3-none-any.whl.metadata (20 kB)\n",
+      "Collecting gradio==3.50.2\n",
+      "  Downloading gradio-3.50.2-py3-none-any.whl.metadata (17 kB)\n",
+      "Collecting tensorboard\n",
+      "  Downloading tensorboard-2.15.1-py3-none-any.whl.metadata (1.7 kB)\n",
+      "Collecting s3fs\n",
+      "  Downloading s3fs-2023.12.2-py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting gcsfs\n",
+      "  Downloading gcsfs-2023.12.2.post1-py2.py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting xformers==0.0.23\n",
+      "  Downloading xformers-0.0.23-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.0 kB)\n",
+      "Collecting deepspeed\n",
+      "  Downloading deepspeed-0.12.6.tar.gz (1.2 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m109.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting flash-attn==2.3.3\n",
+      "  Downloading flash_attn-2.3.3.tar.gz (2.3 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m111.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hRequirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from accelerate==0.24.1) (5.9.0)\n",
+      "Requirement already satisfied: torch>=1.10.0 in /opt/conda/lib/python3.10/site-packages (from accelerate==0.24.1) (2.1.1)\n",
+      "Requirement already satisfied: huggingface-hub in /opt/conda/lib/python3.10/site-packages (from accelerate==0.24.1) (0.20.1)\n",
+      "Collecting rouge (from auto-gptq==0.5.1)\n",
+      "  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)\n",
+      "Collecting gekko (from auto-gptq==0.5.1)\n",
+      "  Downloading gekko-1.0.6-py3-none-any.whl (12.2 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.2/12.2 MB\u001b[0m \u001b[31m77.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hCollecting safetensors (from auto-gptq==0.5.1)\n",
+      "  Downloading safetensors-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
+      "Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from auto-gptq==0.5.1) (4.65.0)\n",
+      "Collecting pandas>=1.0.1 (from bert-score==0.3.13)\n",
+      "  Downloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
+      "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from bert-score==0.3.13) (2.31.0)\n",
+      "Collecting matplotlib (from bert-score==0.3.13)\n",
+      "  Downloading matplotlib-3.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)\n",
+      "Collecting dill (from evaluate==0.4.0)\n",
+      "  Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n",
+      "Collecting xxhash (from evaluate==0.4.0)\n",
+      "  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
+      "Collecting multiprocess (from evaluate==0.4.0)\n",
+      "  Downloading multiprocess-0.70.15-py310-none-any.whl.metadata (7.2 kB)\n",
+      "Requirement already satisfied: fsspec>=2021.05.0 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]>=2021.05.0->evaluate==0.4.0) (2023.10.0)\n",
+      "Collecting responses<0.19 (from evaluate==0.4.0)\n",
+      "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
+      "Collecting ninja (from flash-attn==2.3.3)\n",
+      "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n",
+      "Collecting aiohttp (from fschat==0.2.34)\n",
+      "  Downloading aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.4 kB)\n",
+      "Collecting fastapi (from fschat==0.2.34)\n",
+      "  Downloading fastapi-0.108.0-py3-none-any.whl.metadata (24 kB)\n",
+      "Collecting httpx (from fschat==0.2.34)\n",
+      "  Downloading httpx-0.26.0-py3-none-any.whl.metadata (7.6 kB)\n",
+      "Collecting markdown2[all] (from fschat==0.2.34)\n",
+      "  Downloading markdown2-2.4.12-py2.py3-none-any.whl.metadata (2.0 kB)\n",
+      "Collecting nh3 (from fschat==0.2.34)\n",
+      "  Downloading nh3-0.2.15-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n",
+      "Requirement already satisfied: prompt-toolkit>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from fschat==0.2.34) (3.0.36)\n",
+      "Collecting pydantic<2,>=1 (from fschat==0.2.34)\n",
+      "  Downloading pydantic-1.10.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (149 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m42.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting rich>=10.0.0 (from fschat==0.2.34)\n",
+      "  Downloading rich-13.7.0-py3-none-any.whl.metadata (18 kB)\n",
+      "Collecting shortuuid (from fschat==0.2.34)\n",
+      "  Downloading shortuuid-1.0.11-py3-none-any.whl (10 kB)\n",
+      "Collecting tiktoken (from fschat==0.2.34)\n",
+      "  Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
+      "Collecting uvicorn (from fschat==0.2.34)\n",
+      "  Downloading uvicorn-0.25.0-py3-none-any.whl.metadata (6.4 kB)\n",
+      "Collecting aiofiles<24.0,>=22.0 (from gradio==3.50.2)\n",
+      "  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n",
+      "Collecting altair<6.0,>=4.2.0 (from gradio==3.50.2)\n",
+      "  Downloading altair-5.2.0-py3-none-any.whl.metadata (8.7 kB)\n",
+      "Collecting ffmpy (from gradio==3.50.2)\n",
+      "  Downloading ffmpy-0.3.1.tar.gz (5.5 kB)\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting gradio-client==0.6.1 (from gradio==3.50.2)\n",
+      "  Downloading gradio_client-0.6.1-py3-none-any.whl.metadata (7.1 kB)\n",
+      "Collecting importlib-resources<7.0,>=1.3 (from gradio==3.50.2)\n",
+      "  Downloading importlib_resources-6.1.1-py3-none-any.whl.metadata (4.1 kB)\n",
+      "Requirement already satisfied: jinja2<4.0 in /opt/conda/lib/python3.10/site-packages (from gradio==3.50.2) (3.1.2)\n",
+      "Requirement already satisfied: markupsafe~=2.0 in /opt/conda/lib/python3.10/site-packages (from gradio==3.50.2) (2.1.1)\n",
+      "Collecting orjson~=3.0 (from gradio==3.50.2)\n",
+      "  Downloading orjson-3.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: pillow<11.0,>=8.0 in /opt/conda/lib/python3.10/site-packages (from gradio==3.50.2) (10.0.1)\n",
+      "Collecting pydub (from gradio==3.50.2)\n",
+      "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
+      "Collecting python-multipart (from gradio==3.50.2)\n",
+      "  Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting semantic-version~=2.0 (from gradio==3.50.2)\n",
+      "  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
+      "Requirement already satisfied: typing-extensions~=4.0 in /opt/conda/lib/python3.10/site-packages (from gradio==3.50.2) (4.7.1)\n",
+      "Collecting websockets<12.0,>=10.0 (from gradio==3.50.2)\n",
+      "  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m30.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting coloredlogs (from optimum==1.13.2)\n",
+      "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from optimum==1.13.2) (1.11.1)\n",
+      "Collecting absl-py (from rouge-score==0.1.2)\n",
+      "  Downloading absl_py-2.0.0-py3-none-any.whl.metadata (2.3 kB)\n",
+      "Collecting nltk (from rouge-score==0.1.2)\n",
+      "  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m90.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: six>=1.14.0 in /opt/conda/lib/python3.10/site-packages (from rouge-score==0.1.2) (1.16.0)\n",
+      "Collecting joblib>=1.1.1 (from scikit-learn==1.2.2)\n",
+      "  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)\n",
+      "Collecting threadpoolctl>=2.0.0 (from scikit-learn==1.2.2)\n",
+      "  Downloading threadpoolctl-3.2.0-py3-none-any.whl.metadata (10.0 kB)\n",
+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers==4.36.2) (3.9.0)\n",
+      "Collecting regex!=2019.12.17 (from transformers==4.36.2)\n",
+      "  Downloading regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate==0.24.1) (3.1)\n",
+      "Collecting pyarrow>=8.0.0 (from datasets>=2.15.0)\n",
+      "  Downloading pyarrow-14.0.2-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.0 kB)\n",
+      "Collecting pyarrow-hotfix (from datasets>=2.15.0)\n",
+      "  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n",
+      "Collecting hjson (from deepspeed)\n",
+      "  Downloading hjson-3.1.0-py3-none-any.whl (54 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting py-cpuinfo (from deepspeed)\n",
+      "  Downloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)\n",
+      "Collecting termcolor (from fire)\n",
+      "  Downloading termcolor-2.4.0-py3-none-any.whl.metadata (6.1 kB)\n",
+      "Requirement already satisfied: decorator>4.1.2 in /opt/conda/lib/python3.10/site-packages (from gcsfs) (5.1.1)\n",
+      "INFO: pip is looking at multiple versions of gcsfs to determine which version is compatible with other requirements. This could take a while.\n",
+      "Collecting gcsfs\n",
+      "  Downloading gcsfs-2023.12.1-py2.py3-none-any.whl.metadata (1.6 kB)\n",
+      "  Downloading gcsfs-2023.12.0-py2.py3-none-any.whl.metadata (1.6 kB)\n",
+      "  Downloading gcsfs-2023.10.0-py2.py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting google-auth>=1.2 (from gcsfs)\n",
+      "  Downloading google_auth-2.25.2-py2.py3-none-any.whl.metadata (4.7 kB)\n",
+      "Collecting google-auth-oauthlib (from gcsfs)\n",
+      "  Downloading google_auth_oauthlib-1.2.0-py2.py3-none-any.whl.metadata (2.7 kB)\n",
+      "Collecting google-cloud-storage (from gcsfs)\n",
+      "  Downloading google_cloud_storage-2.14.0-py2.py3-none-any.whl.metadata (6.1 kB)\n",
+      "Collecting llvmlite<0.42,>=0.41.0dev0 (from numba)\n",
+      "  Downloading llvmlite-0.41.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.8 kB)\n",
+      "Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs)\n",
+      "  Downloading aiobotocore-2.9.0-py3-none-any.whl.metadata (20 kB)\n",
+      "INFO: pip is looking at multiple versions of s3fs to determine which version is compatible with other requirements. This could take a while.\n",
+      "Collecting s3fs\n",
+      "  Downloading s3fs-2023.12.1-py3-none-any.whl.metadata (1.6 kB)\n",
+      "  Downloading s3fs-2023.10.0-py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting aiobotocore~=2.7.0 (from s3fs)\n",
+      "  Downloading aiobotocore-2.7.0-py3-none-any.whl.metadata (20 kB)\n",
+      "Collecting grpcio>=1.48.2 (from tensorboard)\n",
+      "  Downloading grpcio-1.60.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n",
+      "Collecting markdown>=2.6.8 (from tensorboard)\n",
+      "  Downloading Markdown-3.5.1-py3-none-any.whl.metadata (7.1 kB)\n",
+      "Collecting protobuf<4.24,>=3.19.6 (from tensorboard)\n",
+      "  Downloading protobuf-4.23.4-cp37-abi3-manylinux2014_x86_64.whl.metadata (540 bytes)\n",
+      "Requirement already satisfied: setuptools>=41.0.0 in /opt/conda/lib/python3.10/site-packages (from tensorboard) (68.0.0)\n",
+      "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard)\n",
+      "  Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n",
+      "Collecting werkzeug>=1.0.1 (from tensorboard)\n",
+      "  Downloading werkzeug-3.0.1-py3-none-any.whl.metadata (4.1 kB)\n",
+      "Requirement already satisfied: Click!=8.0.0,>=7.1 in /opt/conda/lib/python3.10/site-packages (from wandb) (8.1.7)\n",
+      "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)\n",
+      "  Downloading GitPython-3.1.40-py3-none-any.whl.metadata (12 kB)\n",
+      "Collecting sentry-sdk>=1.0.0 (from wandb)\n",
+      "  Downloading sentry_sdk-1.39.1-py2.py3-none-any.whl.metadata (9.7 kB)\n",
+      "Collecting docker-pycreds>=0.4.0 (from wandb)\n",
+      "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
+      "Collecting setproctitle (from wandb)\n",
+      "  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)\n",
+      "Collecting appdirs>=1.4.3 (from wandb)\n",
+      "  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)\n",
+      "Collecting botocore<1.31.65,>=1.31.16 (from aiobotocore~=2.7.0->s3fs)\n",
+      "  Downloading botocore-1.31.64-py3-none-any.whl.metadata (6.1 kB)\n",
+      "Collecting wrapt<2.0.0,>=1.10.10 (from aiobotocore~=2.7.0->s3fs)\n",
+      "  Downloading wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
+      "Collecting aioitertools<1.0.0,>=0.5.1 (from aiobotocore~=2.7.0->s3fs)\n",
+      "  Downloading aioitertools-0.11.0-py3-none-any.whl (23 kB)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->fschat==0.2.34) (23.1.0)\n",
+      "Collecting multidict<7.0,>=4.5 (from aiohttp->fschat==0.2.34)\n",
+      "  Downloading multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m37.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting yarl<2.0,>=1.0 (from aiohttp->fschat==0.2.34)\n",
+      "  Downloading yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)\n",
+      "Collecting frozenlist>=1.1.1 (from aiohttp->fschat==0.2.34)\n",
+      "  Downloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
+      "Collecting aiosignal>=1.1.2 (from aiohttp->fschat==0.2.34)\n",
+      "  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
+      "Collecting async-timeout<5.0,>=4.0 (from aiohttp->fschat==0.2.34)\n",
+      "  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)\n",
+      "Requirement already satisfied: jsonschema>=3.0 in /opt/conda/lib/python3.10/site-packages (from altair<6.0,>=4.2.0->gradio==3.50.2) (4.20.0)\n",
+      "Requirement already satisfied: toolz in /opt/conda/lib/python3.10/site-packages (from altair<6.0,>=4.2.0->gradio==3.50.2) (0.12.0)\n",
+      "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb)\n",
+      "  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n",
+      "Collecting cachetools<6.0,>=2.0.0 (from google-auth>=1.2->gcsfs)\n",
+      "  Downloading cachetools-5.3.2-py3-none-any.whl.metadata (5.2 kB)\n",
+      "Collecting pyasn1-modules>=0.2.1 (from google-auth>=1.2->gcsfs)\n",
+      "  Downloading pyasn1_modules-0.3.0-py2.py3-none-any.whl (181 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.3/181.3 kB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting rsa<5,>=3.1.4 (from google-auth>=1.2->gcsfs)\n",
+      "  Downloading rsa-4.9-py3-none-any.whl (34 kB)\n",
+      "Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib->gcsfs)\n",
+      "  Downloading requests_oauthlib-1.3.1-py2.py3-none-any.whl (23 kB)\n",
+      "Collecting contourpy>=1.0.1 (from matplotlib->bert-score==0.3.13)\n",
+      "  Downloading contourpy-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)\n",
+      "Collecting cycler>=0.10 (from matplotlib->bert-score==0.3.13)\n",
+      "  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
+      "Collecting fonttools>=4.22.0 (from matplotlib->bert-score==0.3.13)\n",
+      "  Downloading fonttools-4.47.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (157 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.2/157.2 kB\u001b[0m \u001b[31m41.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib->bert-score==0.3.13)\n",
+      "  Downloading kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (6.4 kB)\n",
+      "Collecting pyparsing>=2.3.1 (from matplotlib->bert-score==0.3.13)\n",
+      "  Downloading pyparsing-3.1.1-py3-none-any.whl.metadata (5.1 kB)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.10/site-packages (from matplotlib->bert-score==0.3.13) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas>=1.0.1->bert-score==0.3.13) (2023.3.post1)\n",
+      "Collecting tzdata>=2022.1 (from pandas>=1.0.1->bert-score==0.3.13)\n",
+      "  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m72.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit>=3.0.0->fschat==0.2.34) (0.2.5)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->bert-score==0.3.13) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->bert-score==0.3.13) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->bert-score==0.3.13) (1.26.18)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->bert-score==0.3.13) (2023.7.22)\n",
+      "Collecting markdown-it-py>=2.2.0 (from rich>=10.0.0->fschat==0.2.34)\n",
+      "  Downloading markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich>=10.0.0->fschat==0.2.34) (2.15.1)\n",
+      "Collecting h11>=0.8 (from uvicorn->fschat==0.2.34)\n",
+      "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m21.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting humanfriendly>=9.1 (from coloredlogs->optimum==1.13.2)\n",
+      "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting starlette<0.33.0,>=0.29.0 (from fastapi->fschat==0.2.34)\n",
+      "  Downloading starlette-0.32.0.post1-py3-none-any.whl.metadata (5.8 kB)\n",
+      "Collecting typing-extensions~=4.0 (from gradio==3.50.2)\n",
+      "  Downloading typing_extensions-4.9.0-py3-none-any.whl.metadata (3.0 kB)\n",
+      "Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 (from google-cloud-storage->gcsfs)\n",
+      "  Downloading google_api_core-2.15.0-py3-none-any.whl.metadata (2.7 kB)\n",
+      "Collecting google-cloud-core<3.0dev,>=2.3.0 (from google-cloud-storage->gcsfs)\n",
+      "  Downloading google_cloud_core-2.4.1-py2.py3-none-any.whl.metadata (2.7 kB)\n",
+      "Collecting google-resumable-media>=2.6.0 (from google-cloud-storage->gcsfs)\n",
+      "  Downloading google_resumable_media-2.7.0-py2.py3-none-any.whl.metadata (2.2 kB)\n",
+      "Collecting google-crc32c<2.0dev,>=1.0 (from google-cloud-storage->gcsfs)\n",
+      "  Downloading google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32 kB)\n",
+      "Requirement already satisfied: anyio in /opt/conda/lib/python3.10/site-packages (from httpx->fschat==0.2.34) (4.2.0)\n",
+      "Collecting httpcore==1.* (from httpx->fschat==0.2.34)\n",
+      "  Downloading httpcore-1.0.2-py3-none-any.whl.metadata (20 kB)\n",
+      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from httpx->fschat==0.2.34) (1.3.0)\n",
+      "Collecting wavedrom (from markdown2[all]->fschat==0.2.34)\n",
+      "  Downloading wavedrom-2.0.3.post3.tar.gz (137 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.7/137.7 kB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->optimum==1.13.2) (1.3.0)\n",
+      "Collecting jmespath<2.0.0,>=0.7.1 (from botocore<1.31.65,>=1.31.16->aiobotocore~=2.7.0->s3fs)\n",
+      "  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
+      "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb)\n",
+      "  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n",
+      "Collecting googleapis-common-protos<2.0.dev0,>=1.56.2 (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs)\n",
+      "  Downloading googleapis_common_protos-1.62.0-py2.py3-none-any.whl.metadata (1.5 kB)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==3.50.2) (2023.12.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==3.50.2) (0.32.0)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==3.50.2) (0.15.2)\n",
+      "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich>=10.0.0->fschat==0.2.34)\n",
+      "  Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n",
+      "Collecting pyasn1<0.6.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs)\n",
+      "  Downloading pyasn1-0.5.1-py2.py3-none-any.whl.metadata (8.6 kB)\n",
+      "Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs)\n",
+      "  Downloading oauthlib-3.2.2-py3-none-any.whl (151 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m151.7/151.7 kB\u001b[0m \u001b[31m50.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.10/site-packages (from anyio->httpx->fschat==0.2.34) (1.0.4)\n",
+      "Collecting svgwrite (from wavedrom->markdown2[all]->fschat==0.2.34)\n",
+      "  Downloading svgwrite-1.4.3-py3-none-any.whl (67 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.1/67.1 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading accelerate-0.24.1-py3-none-any.whl (261 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m261.4/261.4 kB\u001b[0m \u001b[31m53.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading auto_gptq-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.8/4.8 MB\u001b[0m \u001b[31m89.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading fschat-0.2.34-py3-none-any.whl (220 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m220.1/220.1 kB\u001b[0m \u001b[31m63.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading gradio-3.50.2-py3-none-any.whl (20.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.3/20.3 MB\u001b[0m \u001b[31m82.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading peft-0.6.0-py3-none-any.whl (134 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.9/134.9 kB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tokenizers-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m87.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading transformers-4.36.2-py3-none-any.whl (8.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.2/8.2 MB\u001b[0m \u001b[31m90.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading xformers-0.0.23-cp310-cp310-manylinux2014_x86_64.whl (213.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.0/213.0 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading gradio_client-0.6.1-py3-none-any.whl (299 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m299.2/299.2 kB\u001b[0m \u001b[31m64.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading bitsandbytes-0.41.3.post2-py3-none-any.whl (92.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading datasets-2.16.0-py3-none-any.whl (507 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m87.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m36.4/36.4 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading art-6.1-py3-none-any.whl (599 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m599.8/599.8 kB\u001b[0m \u001b[31m96.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading einops-0.7.0-py3-none-any.whl (44 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading gcsfs-2023.10.0-py2.py3-none-any.whl (33 kB)\n",
+      "Downloading hf_transfer-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.9 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.9/3.9 MB\u001b[0m \u001b[31m99.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading numba-0.58.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m100.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading s3fs-2023.10.0-py3-none-any.whl (28 kB)\n",
+      "Downloading tensorboard-2.15.1-py3-none-any.whl (5.5 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m96.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading wandb-0.16.1-py3-none-any.whl (2.1 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m99.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading absl_py-2.0.0-py3-none-any.whl (130 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading aiobotocore-2.7.0-py3-none-any.whl (73 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.5/73.5 kB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
+      "Downloading aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m99.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading altair-5.2.0-py3-none-any.whl (996 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m996.9/996.9 kB\u001b[0m \u001b[31m110.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading GitPython-3.1.40-py3-none-any.whl (190 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m190.6/190.6 kB\u001b[0m \u001b[31m47.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading google_auth-2.25.2-py2.py3-none-any.whl (184 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m184.2/184.2 kB\u001b[0m \u001b[31m44.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading google_auth_oauthlib-1.2.0-py2.py3-none-any.whl (24 kB)\n",
+      "Downloading grpcio-1.60.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m102.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading importlib_resources-6.1.1-py3-none-any.whl (33 kB)\n",
+      "Downloading joblib-1.3.2-py3-none-any.whl (302 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.2/302.2 kB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading llvmlite-0.41.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 MB\u001b[0m \u001b[31m74.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading Markdown-3.5.1-py3-none-any.whl (102 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 kB\u001b[0m \u001b[31m34.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading matplotlib-3.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.6/11.6 MB\u001b[0m \u001b[31m99.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading orjson-3.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.7/138.7 kB\u001b[0m \u001b[31m38.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m96.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading protobuf-4.23.4-cp37-abi3-manylinux2014_x86_64.whl (304 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m304.5/304.5 kB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pyarrow-14.0.2-cp310-cp310-manylinux_2_28_x86_64.whl (38.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.0/38.0 MB\u001b[0m \u001b[31m78.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading pydantic-1.10.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m95.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (773 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m774.0/774.0 kB\u001b[0m \u001b[31m116.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading rich-13.7.0-py3-none-any.whl (240 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m240.6/240.6 kB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading safetensors-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m102.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading sentry_sdk-1.39.1-py2.py3-none-any.whl (254 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m254.1/254.1 kB\u001b[0m \u001b[31m71.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m104.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading threadpoolctl-3.2.0-py3-none-any.whl (15 kB)\n",
+      "Downloading uvicorn-0.25.0-py3-none-any.whl (60 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.3/60.3 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading werkzeug-3.0.1-py3-none-any.whl (226 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m67.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading fastapi-0.108.0-py3-none-any.whl (92 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading typing_extensions-4.9.0-py3-none-any.whl (32 kB)\n",
+      "Downloading google_cloud_storage-2.14.0-py2.py3-none-any.whl (121 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 kB\u001b[0m \u001b[31m36.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading httpx-0.26.0-py3-none-any.whl (75 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m28.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading nh3-0.2.15-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m108.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m66.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
+      "Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
+      "Downloading termcolor-2.4.0-py3-none-any.whl (7.7 kB)\n",
+      "Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m101.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m44.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n",
+      "Downloading botocore-1.31.64-py3-none-any.whl (11.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.3/11.3 MB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading cachetools-5.3.2-py3-none-any.whl (9.3 kB)\n",
+      "Downloading contourpy-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (310 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.7/310.7 kB\u001b[0m \u001b[31m69.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
+      "Downloading fonttools-4.47.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m102.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (239 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.5/239.5 kB\u001b[0m \u001b[31m71.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading google_api_core-2.15.0-py3-none-any.whl (121 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.0/122.0 kB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading google_cloud_core-2.4.1-py2.py3-none-any.whl (29 kB)\n",
+      "Downloading google_resumable_media-2.7.0-py2.py3-none-any.whl (80 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.6/80.6 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m102.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.5/87.5 kB\u001b[0m \u001b[31m25.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pyparsing-3.1.1-py3-none-any.whl (103 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.1/103.1 kB\u001b[0m \u001b[31m32.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading starlette-0.32.0.post1-py3-none-any.whl (70 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.0/70.0 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (80 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.3/80.3 kB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (301 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.6/301.6 kB\u001b[0m \u001b[31m80.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading markdown2-2.4.12-py2.py3-none-any.whl (41 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.2/41.2 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading googleapis_common_protos-1.62.0-py2.py3-none-any.whl (228 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m228.7/228.7 kB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pyasn1-0.5.1-py2.py3-none-any.whl (84 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.9/84.9 kB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading smmap-5.0.1-py3-none-any.whl (24 kB)\n",
+      "Building wheels for collected packages: flash-attn, optimum, rouge-score, deepspeed, fire, ffmpy, wavedrom\n",
+      "  Building wheel for flash-attn (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for flash-attn: filename=flash_attn-2.3.3-cp310-cp310-linux_x86_64.whl size=57042553 sha256=b1df92cb5bd7657d38b789dd48e907aa3e0bd2715c817eb85f3c4320bb11fb3f\n",
+      "  Stored in directory: /root/.cache/pip/wheels/e5/e6/fa/941802ec61d1afd320d27160ab1db98e6dba65381f84b76d4a\n",
+      "  Building wheel for optimum (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for optimum: filename=optimum-1.13.2-py3-none-any.whl size=395599 sha256=ff3a73120e1b6eeeda28f76e3fc8cd4cd826e5d66c869b7848ba150e7af79c62\n",
+      "  Stored in directory: /root/.cache/pip/wheels/6e/b7/2c/79405d98f0943373d8546daeae25a3d377f7659ca0cbe48699\n",
+      "  Building wheel for rouge-score (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=8118ecbbcd3529085e794c803f0ddb182fc6c6d3e8a494103b49a94abf1bec37\n",
+      "  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
+      "  Building wheel for deepspeed (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for deepspeed: filename=deepspeed-0.12.6-py3-none-any.whl size=1306729 sha256=35c46b6f0275b0d3063522e0af4f3cbd9ec1c310114d8917d87cbe2bf43346e2\n",
+      "  Stored in directory: /root/.cache/pip/wheels/a3/dc/a2/f585faaed4dec84108916dcc8e8a7c129a216df8202ca32984\n",
+      "  Building wheel for fire (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for fire: filename=fire-0.5.0-py2.py3-none-any.whl size=116934 sha256=e76d5185f237f34ec69bb8aa657497bef07408978e4f7efdaef48663bb8cd4ef\n",
+      "  Stored in directory: /root/.cache/pip/wheels/90/d4/f7/9404e5db0116bd4d43e5666eaa3e70ab53723e1e3ea40c9a95\n",
+      "  Building wheel for ffmpy (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for ffmpy: filename=ffmpy-0.3.1-py3-none-any.whl size=5579 sha256=da3b54dc0ac1a825a1a233315970ac80b8b4c53ebd9cb2a2cfdeab118f453a64\n",
+      "  Stored in directory: /root/.cache/pip/wheels/01/a6/d1/1c0828c304a4283b2c1639a09ad86f83d7c487ef34c6b4a1bf\n",
+      "  Building wheel for wavedrom (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for wavedrom: filename=wavedrom-2.0.3.post3-py2.py3-none-any.whl size=30052 sha256=7f0cbd15d63ee9c120190bac122ab51bbbfc91ee374bc3c046fadb320816c17e\n",
+      "  Stored in directory: /root/.cache/pip/wheels/9c/52/8c/38b454b42f712f325e26f633287484c7dc1ad469e1580c5954\n",
+      "Successfully built flash-attn optimum rouge-score deepspeed fire ffmpy wavedrom\n",
+      "Installing collected packages: sentencepiece, pydub, py-cpuinfo, ninja, nh3, hjson, ffmpy, bitsandbytes, appdirs, addict, xxhash, wrapt, werkzeug, websockets, tzdata, typing-extensions, threadpoolctl, termcolor, tensorboard-data-server, svgwrite, smmap, shortuuid, setproctitle, sentry-sdk, semantic-version, scipy, safetensors, rouge, regex, python-multipart, pyparsing, pynvml, pyasn1, pyarrow-hotfix, pyarrow, protobuf, orjson, oauthlib, multidict, mdurl, markdown2, markdown, llvmlite, kiwisolver, joblib, jmespath, importlib-resources, humanfriendly, hf_transfer, h11, grpcio, google-crc32c, gekko, frozenlist, fonttools, einops, docker-pycreds, dill, cycler, contourpy, colorama, cachetools, async-timeout, art, aioitertools, aiofiles, absl-py, yarl, wavedrom, uvicorn, tiktoken, scikit-learn, rsa, responses, requests-oauthlib, pydantic, pyasn1-modules, pandas, numba, nltk, multiprocess, matplotlib, markdown-it-py, httpcore, googleapis-common-protos, google-resumable-media, gitdb, fire, coloredlogs, botocore, aiosignal, xformers, tokenizers, starlette, rouge-score, rich, httpx, google-auth, GitPython, flash-attn, deepspeed, aiohttp, accelerate, wandb, transformers, gradio-client, google-auth-oauthlib, google-api-core, fastapi, altair, aiobotocore, tensorboard, s3fs, peft, gradio, google-cloud-core, fschat, datasets, bert-score, optimum, google-cloud-storage, evaluate, auto-gptq, gcsfs, axolotl\n",
+      "  Attempting uninstall: typing-extensions\n",
+      "    Found existing installation: typing_extensions 4.7.1\n",
+      "    Uninstalling typing_extensions-4.7.1:\n",
+      "      Successfully uninstalled typing_extensions-4.7.1\n",
+      "  Running setup.py develop for axolotl\n",
+      "Successfully installed GitPython-3.1.40 absl-py-2.0.0 accelerate-0.24.1 addict-2.4.0 aiobotocore-2.7.0 aiofiles-23.2.1 aiohttp-3.9.1 aioitertools-0.11.0 aiosignal-1.3.1 altair-5.2.0 appdirs-1.4.4 art-6.1 async-timeout-4.0.3 auto-gptq-0.5.1 axolotl-0.3.0 bert-score-0.3.13 bitsandbytes-0.41.3.post2 botocore-1.31.64 cachetools-5.3.2 colorama-0.4.6 coloredlogs-15.0.1 contourpy-1.2.0 cycler-0.12.1 datasets-2.16.0 deepspeed-0.12.6 dill-0.3.7 docker-pycreds-0.4.0 einops-0.7.0 evaluate-0.4.0 fastapi-0.108.0 ffmpy-0.3.1 fire-0.5.0 flash-attn-2.3.3 fonttools-4.47.0 frozenlist-1.4.1 fschat-0.2.34 gcsfs-2023.10.0 gekko-1.0.6 gitdb-4.0.11 google-api-core-2.15.0 google-auth-2.25.2 google-auth-oauthlib-1.2.0 google-cloud-core-2.4.1 google-cloud-storage-2.14.0 google-crc32c-1.5.0 google-resumable-media-2.7.0 googleapis-common-protos-1.62.0 gradio-3.50.2 gradio-client-0.6.1 grpcio-1.60.0 h11-0.14.0 hf_transfer-0.1.4 hjson-3.1.0 httpcore-1.0.2 httpx-0.26.0 humanfriendly-10.0 importlib-resources-6.1.1 jmespath-1.0.1 joblib-1.3.2 kiwisolver-1.4.5 llvmlite-0.41.1 markdown-3.5.1 markdown-it-py-3.0.0 markdown2-2.4.12 matplotlib-3.8.2 mdurl-0.1.2 multidict-6.0.4 multiprocess-0.70.15 nh3-0.2.15 ninja-1.11.1.1 nltk-3.8.1 numba-0.58.1 oauthlib-3.2.2 optimum-1.13.2 orjson-3.9.10 pandas-2.1.4 peft-0.6.0 protobuf-4.23.4 py-cpuinfo-9.0.0 pyarrow-14.0.2 pyarrow-hotfix-0.6 pyasn1-0.5.1 pyasn1-modules-0.3.0 pydantic-1.10.13 pydub-0.25.1 pynvml-11.5.0 pyparsing-3.1.1 python-multipart-0.0.6 regex-2023.12.25 requests-oauthlib-1.3.1 responses-0.18.0 rich-13.7.0 rouge-1.0.1 rouge-score-0.1.2 rsa-4.9 s3fs-2023.10.0 safetensors-0.4.1 scikit-learn-1.2.2 scipy-1.11.4 semantic-version-2.10.0 sentencepiece-0.1.99 sentry-sdk-1.39.1 setproctitle-1.3.3 shortuuid-1.0.11 smmap-5.0.1 starlette-0.32.0.post1 svgwrite-1.4.3 tensorboard-2.15.1 tensorboard-data-server-0.7.2 termcolor-2.4.0 threadpoolctl-3.2.0 tiktoken-0.5.2 tokenizers-0.15.0 transformers-4.36.2 typing-extensions-4.8.0 tzdata-2023.3 uvicorn-0.25.0 wandb-0.16.1 wavedrom-2.0.3.post3 websockets-11.0.3 werkzeug-3.0.1 wrapt-1.16.0 xformers-0.0.23 xxhash-3.4.1 yarl-1.9.4\n",
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+      "\u001b[0mCollecting git+https://github.com/huggingface/peft.git\n",
+      "  Cloning https://github.com/huggingface/peft.git to /tmp/pip-req-build-hka8xgk2\n",
+      "  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-req-build-hka8xgk2\n",
+      "  Resolved https://github.com/huggingface/peft.git to commit cf04d0353f0343cbf66627228c4495f51669af34\n",
+      "  Installing build dependencies ... \u001b[?25ldone\n",
+      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
+      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (1.26.0)\n",
+      "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (23.1)\n",
+      "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (5.9.0)\n",
+      "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (6.0.1)\n",
+      "Requirement already satisfied: torch>=1.13.0 in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (2.1.1)\n",
+      "Requirement already satisfied: transformers in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (4.36.2)\n",
+      "Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (4.65.0)\n",
+      "Requirement already satisfied: accelerate>=0.21.0 in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (0.24.1)\n",
+      "Requirement already satisfied: safetensors in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (0.4.1)\n",
+      "Requirement already satisfied: huggingface-hub>=0.17.0 in /opt/conda/lib/python3.10/site-packages (from peft==0.7.2.dev0) (0.20.1)\n",
+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft==0.7.2.dev0) (3.9.0)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft==0.7.2.dev0) (2023.10.0)\n",
+      "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft==0.7.2.dev0) (2.31.0)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.17.0->peft==0.7.2.dev0) (4.8.0)\n",
+      "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft==0.7.2.dev0) (1.11.1)\n",
+      "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft==0.7.2.dev0) (3.1)\n",
+      "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.13.0->peft==0.7.2.dev0) (3.1.2)\n",
+      "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers->peft==0.7.2.dev0) (2023.12.25)\n",
+      "Requirement already satisfied: tokenizers<0.19,>=0.14 in /opt/conda/lib/python3.10/site-packages (from transformers->peft==0.7.2.dev0) (0.15.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.13.0->peft==0.7.2.dev0) (2.1.1)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft==0.7.2.dev0) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft==0.7.2.dev0) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft==0.7.2.dev0) (1.26.18)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.17.0->peft==0.7.2.dev0) (2023.7.22)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.13.0->peft==0.7.2.dev0) (1.3.0)\n",
+      "Building wheels for collected packages: peft\n",
+      "  Building wheel for peft (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for peft: filename=peft-0.7.2.dev0-py3-none-any.whl size=169456 sha256=4c70d23e759fa6abb3827fb2f3a8683be3b24d78777d0f403bbc2c0548e5dd4b\n",
+      "  Stored in directory: /tmp/pip-ephem-wheel-cache-my5ncou6/wheels/d7/c7/de/1368fac8590e1b103ddc2ec2a28ad51d83aded1a3830e8a087\n",
+      "Successfully built peft\n",
+      "Installing collected packages: peft\n",
+      "  Attempting uninstall: peft\n",
+      "    Found existing installation: peft 0.6.0\n",
+      "    Uninstalling peft-0.6.0:\n",
+      "      Successfully uninstalled peft-0.6.0\n",
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "axolotl 0.3.0 requires peft==0.6.0, but you have peft 0.7.2.dev0 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mSuccessfully installed peft-0.7.2.dev0\n",
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "#instaling what is needed inside axolotl file\n",
+    "!pip install packaging\n",
+    "!pip install -e '.[flash-attn,deepspeed]'\n",
+    "!pip install -U git+https://github.com/huggingface/peft.git"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "82d1a380-1e87-48fe-89fe-25331326014d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The following values were not passed to `accelerate launch` and had defaults used instead:\n",
+      "\t`--num_processes` was set to a value of `3`\n",
+      "\t\tMore than one GPU was found, enabling multi-GPU training.\n",
+      "\t\tIf this was unintended please pass in `--num_processes=1`.\n",
+      "\t`--num_machines` was set to a value of `1`\n",
+      "\t`--mixed_precision` was set to a value of `'no'`\n",
+      "\t`--dynamo_backend` was set to a value of `'no'`\n",
+      "To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n",
+      "/opt/conda/lib/python3.10/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations\n",
+      "  warnings.warn(\n",
+      "[2023-12-28 15:44:09,979] [INFO] [datasets.<module>:58] [PID:2814] PyTorch version 2.1.1 available.\n",
+      "/opt/conda/lib/python3.10/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations\n",
+      "  warnings.warn(\n",
+      "[2023-12-28 15:44:10,011] [INFO] [datasets.<module>:58] [PID:2812] PyTorch version 2.1.1 available.\n",
+      "[2023-12-28 15:44:10,013] [INFO] [datasets.<module>:58] [PID:2813] PyTorch version 2.1.1 available.\n",
+      "[2023-12-28 15:44:10,805] [INFO] [axolotl.normalize_config:150] [PID:2814] [RANK:2] GPU memory usage baseline: 0.000GB (+0.317GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:10,830] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
+      "[2023-12-28 15:44:10,842] [INFO] [axolotl.normalize_config:150] [PID:2813] [RANK:1] GPU memory usage baseline: 0.000GB (+0.317GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:10,865] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
+      "[2023-12-28 15:44:10,869] [INFO] [axolotl.normalize_config:150] [PID:2812] [RANK:0] GPU memory usage baseline: 0.000GB (+0.351GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:10,887] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
+      "[2023-12-28 15:44:10,961] [INFO] [comm.py:637:init_distributed] cdb=None\n",
+      "[2023-12-28 15:44:10,994] [INFO] [comm.py:637:init_distributed] cdb=None\n",
+      "[2023-12-28 15:44:11,015] [INFO] [comm.py:637:init_distributed] cdb=None\n",
+      "[2023-12-28 15:44:11,015] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n",
+      "                                 dP            dP   dP \n",
+      "                                 88            88   88 \n",
+      "      .d8888b. dP.  .dP .d8888b. 88 .d8888b. d8888P 88 \n",
+      "      88'  `88  `8bd8'  88'  `88 88 88'  `88   88   88 \n",
+      "      88.  .88  .d88b.  88.  .88 88 88.  .88   88   88 \n",
+      "      `88888P8 dP'  `dP `88888P' dP `88888P'   dP   dP \n",
+      "                                                       \n",
+      "                                                       \n",
+      "\n",
+      "[2023-12-28 15:44:11,412] [DEBUG] [axolotl.load_tokenizer:184] [PID:2812] [RANK:0] EOS: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,412] [DEBUG] [axolotl.load_tokenizer:185] [PID:2812] [RANK:0] BOS: 1 / <s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,412] [DEBUG] [axolotl.load_tokenizer:186] [PID:2812] [RANK:0] PAD: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,412] [DEBUG] [axolotl.load_tokenizer:187] [PID:2812] [RANK:0] UNK: 0 / <unk>\u001b[39m\n",
+      "[2023-12-28 15:44:11,413] [INFO] [axolotl.load_tokenized_prepared_datasets:143] [PID:2812] [RANK:0] Loading prepared dataset from disk at tilemachos/GF_new.json/1adc45d2edc1e98ce657814412c6593c...\u001b[39m\n",
+      "[2023-12-28 15:44:11,415] [INFO] [axolotl.load_tokenized_prepared_datasets:145] [PID:2812] [RANK:0] Prepared dataset loaded from disk...\u001b[39m\n",
+      "[2023-12-28 15:44:11,432] [DEBUG] [axolotl.load_tokenizer:184] [PID:2814] [RANK:2] EOS: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,432] [DEBUG] [axolotl.load_tokenizer:185] [PID:2814] [RANK:2] BOS: 1 / <s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,432] [DEBUG] [axolotl.load_tokenizer:186] [PID:2814] [RANK:2] PAD: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,432] [DEBUG] [axolotl.load_tokenizer:187] [PID:2814] [RANK:2] UNK: 0 / <unk>\u001b[39m\n",
+      "[2023-12-28 15:44:11,530] [DEBUG] [axolotl.load_tokenizer:184] [PID:2813] [RANK:1] EOS: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,531] [DEBUG] [axolotl.load_tokenizer:185] [PID:2813] [RANK:1] BOS: 1 / <s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,531] [DEBUG] [axolotl.load_tokenizer:186] [PID:2813] [RANK:1] PAD: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:11,531] [DEBUG] [axolotl.load_tokenizer:187] [PID:2813] [RANK:1] UNK: 0 / <unk>\u001b[39m\n",
+      "[2023-12-28 15:44:12,158] [INFO] [axolotl.load_tokenized_prepared_datasets:143] [PID:2813] [RANK:1] Loading prepared dataset from disk at tilemachos/GF_new.json/1adc45d2edc1e98ce657814412c6593c...\u001b[39m\n",
+      "[2023-12-28 15:44:12,158] [INFO] [axolotl.load_tokenized_prepared_datasets:143] [PID:2814] [RANK:2] Loading prepared dataset from disk at tilemachos/GF_new.json/1adc45d2edc1e98ce657814412c6593c...\u001b[39m\n",
+      "[2023-12-28 15:44:12,160] [INFO] [axolotl.load_tokenized_prepared_datasets:145] [PID:2813] [RANK:1] Prepared dataset loaded from disk...\u001b[39m\n",
+      "[2023-12-28 15:44:12,161] [INFO] [axolotl.load_tokenized_prepared_datasets:145] [PID:2814] [RANK:2] Prepared dataset loaded from disk...\u001b[39m\n",
+      "[2023-12-28 15:44:12,236] [DEBUG] [axolotl.log:60] [PID:2812] [RANK:0] total_num_tokens: 28120\u001b[39m\n",
+      "[2023-12-28 15:44:12,238] [DEBUG] [axolotl.log:60] [PID:2812] [RANK:0] `total_supervised_tokens: 7990`\u001b[39m\n",
+      "[2023-12-28 15:44:12,238] [DEBUG] [axolotl.log:60] [PID:2812] [RANK:0] total_num_steps: 6\u001b[39m\n",
+      "[2023-12-28 15:44:12,242] [DEBUG] [axolotl.train.log:60] [PID:2812] [RANK:0] loading tokenizer... mistralai/Mistral-7B-v0.1\u001b[39m\n",
+      "[2023-12-28 15:44:12,518] [DEBUG] [axolotl.load_tokenizer:184] [PID:2812] [RANK:0] EOS: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,518] [DEBUG] [axolotl.load_tokenizer:185] [PID:2812] [RANK:0] BOS: 1 / <s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,518] [DEBUG] [axolotl.load_tokenizer:186] [PID:2812] [RANK:0] PAD: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,518] [DEBUG] [axolotl.load_tokenizer:187] [PID:2812] [RANK:0] UNK: 0 / <unk>\u001b[39m\n",
+      "[2023-12-28 15:44:12,518] [DEBUG] [axolotl.train.log:60] [PID:2812] [RANK:0] loading model and peft_config...\u001b[39m\n",
+      "[2023-12-28 15:44:12,589] [DEBUG] [axolotl.load_tokenizer:184] [PID:2814] [RANK:2] EOS: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,589] [DEBUG] [axolotl.load_tokenizer:185] [PID:2814] [RANK:2] BOS: 1 / <s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,589] [DEBUG] [axolotl.load_tokenizer:186] [PID:2814] [RANK:2] PAD: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,589] [DEBUG] [axolotl.load_tokenizer:187] [PID:2814] [RANK:2] UNK: 0 / <unk>\u001b[39m\n",
+      "[2023-12-28 15:44:12,599] [DEBUG] [axolotl.load_tokenizer:184] [PID:2813] [RANK:1] EOS: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,599] [DEBUG] [axolotl.load_tokenizer:185] [PID:2813] [RANK:1] BOS: 1 / <s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,599] [DEBUG] [axolotl.load_tokenizer:186] [PID:2813] [RANK:1] PAD: 2 / </s>\u001b[39m\n",
+      "[2023-12-28 15:44:12,599] [DEBUG] [axolotl.load_tokenizer:187] [PID:2813] [RANK:1] UNK: 0 / <unk>\u001b[39m\n",
+      "[2023-12-28 15:44:13,049] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 291, num_elems = 7.24B\n",
+      "Loading checkpoint shards: 100%|██████████████████| 2/2 [00:11<00:00,  5.81s/it]\n",
+      "Loading checkpoint shards: 100%|██████████████████| 2/2 [00:11<00:00,  5.98s/it]\n",
+      "[2023-12-28 15:44:25,395] [INFO] [axolotl.load_model:503] [PID:2813] [RANK:1] GPU memory usage after model load: 7.576GB (+0.524GB cache, +0.708GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:25,399] [INFO] [axolotl.load_model:526] [PID:2813] [RANK:1] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n",
+      "[2023-12-28 15:44:25,403] [INFO] [axolotl.load_model:538] [PID:2813] [RANK:1] converting modules to torch.bfloat16 for flash attention\u001b[39m\n",
+      "trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.04703666202518836\n",
+      "[2023-12-28 15:44:25,480] [INFO] [axolotl.load_model:568] [PID:2813] [RANK:1] GPU memory usage after adapters: 7.589GB (+1.501GB cache, +0.708GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:25,572] [INFO] [axolotl.load_model:503] [PID:2814] [RANK:2] GPU memory usage after model load: 7.576GB (+0.410GB cache, +0.708GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:25,576] [INFO] [axolotl.load_model:526] [PID:2814] [RANK:2] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n",
+      "[2023-12-28 15:44:25,580] [INFO] [axolotl.load_model:538] [PID:2814] [RANK:2] converting modules to torch.bfloat16 for flash attention\u001b[39m\n",
+      "trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.04703666202518836\n",
+      "[2023-12-28 15:44:25,660] [INFO] [axolotl.load_model:568] [PID:2814] [RANK:2] GPU memory usage after adapters: 7.589GB (+1.388GB cache, +0.708GB misc)\u001b[39m\n",
+      "Loading checkpoint shards: 100%|██████████████████| 2/2 [00:12<00:00,  6.30s/it]\n",
+      "[2023-12-28 15:44:26,170] [INFO] [axolotl.load_model:503] [PID:2812] [RANK:0] GPU memory usage after model load: 7.576GB (+0.776GB cache, +0.741GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:26,177] [INFO] [axolotl.load_model:526] [PID:2812] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training\u001b[39m\n",
+      "[2023-12-28 15:44:26,181] [INFO] [axolotl.load_model:538] [PID:2812] [RANK:0] converting modules to torch.bfloat16 for flash attention\u001b[39m\n",
+      "trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.04703666202518836\n",
+      "[2023-12-28 15:44:26,259] [INFO] [axolotl.load_model:568] [PID:2812] [RANK:0] GPU memory usage after adapters: 7.589GB (+1.753GB cache, +0.741GB misc)\u001b[39m\n",
+      "[2023-12-28 15:44:26,293] [INFO] [axolotl.train.log:60] [PID:2812] [RANK:0] Pre-saving adapter config to ./out\u001b[39m\n",
+      "[2023-12-28 15:44:26,296] [INFO] [axolotl.train.log:60] [PID:2812] [RANK:0] Starting trainer...\u001b[39m\n",
+      "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n",
+      "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n",
+      "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n",
+      "Detected CUDA files, patching ldflags\n",
+      "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n",
+      "Building extension module fused_adam...\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
+      "ninja: no work to do.\n",
+      "Loading extension module fused_adam...\n",
+      "Time to load fused_adam op: 0.05891108512878418 seconds\n",
+      "Loading extension module fused_adam...\n",
+      "Time to load fused_adam op: 0.10173463821411133 seconds\n",
+      "Loading extension module fused_adam...\n",
+      "Time to load fused_adam op: 0.10152459144592285 seconds\n",
+      "/opt/conda/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1699449201336/work/torch/csrc/tensor/python_tensor.cpp:83.)\n",
+      "  self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n",
+      "/opt/conda/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1699449201336/work/torch/csrc/tensor/python_tensor.cpp:83.)\n",
+      "  self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n",
+      "/opt/conda/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py:96: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/conda/conda-bld/pytorch_1699449201336/work/torch/csrc/tensor/python_tensor.cpp:83.)\n",
+      "  self._dummy_overflow_buf = get_accelerator().IntTensor([0])\n",
+      "Parameter Offload: Total persistent parameters: 3674112 in 193 params\n",
+      "  0%|                                                    | 0/17 [00:00<?, ?it/s]/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
+      "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
+      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
+      "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
+      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:322: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
+      "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
+      "{'loss': 2.0448, 'learning_rate': 2e-05, 'epoch': 0.06}                         \n",
+      "  6%|██▌                                         | 1/17 [00:28<07:32, 28.30s/it]\n",
+      "  0%|                                                     | 0/3 [00:00<?, ?it/s]\u001b[A\n",
+      " 67%|██████████████████████████████               | 2/3 [00:03<00:01,  1.85s/it]\u001b[A\n",
+      "                                                                                \u001b[A\n",
+      "\u001b[A{'eval_loss': 1.9694719314575195, 'eval_runtime': 11.391, 'eval_samples_per_second': 1.492, 'eval_steps_per_second': 0.263, 'epoch': 0.06}\n",
+      "  6%|██▌                                         | 1/17 [00:39<07:32, 28.30s/it]\n",
+      "100%|█████████████████████████████████████████████| 3/3 [00:07<00:00,  2.65s/it]\u001b[A\n",
+      "                                                                                \u001b[A[2023-12-28 15:45:35,358] [INFO] [axolotl.callbacks.on_step_end:122] [PID:2812] [RANK:0] GPU memory usage while training: 12.210GB (+4.259GB cache, +0.776GB misc)\u001b[39m\n",
+      " 12%|█████▏                                      | 2/17 [01:04<08:18, 33.20s/it][2023-12-28 15:45:35,358] [INFO] [axolotl.callbacks.on_step_end:122] [PID:2814] [RANK:2] GPU memory usage while training: 12.269GB (+4.522GB cache, +0.743GB misc)\u001b[39m\n",
+      "[2023-12-28 15:45:35,358] [INFO] [axolotl.callbacks.on_step_end:122] [PID:2813] [RANK:1] GPU memory usage while training: 12.283GB (+4.493GB cache, +0.743GB misc)\u001b[39m\n",
+      "{'loss': 2.0022, 'learning_rate': 4e-05, 'epoch': 0.12}                         \n",
+      "{'loss': 2.1054, 'learning_rate': 6e-05, 'epoch': 0.17}                         \n",
+      "{'loss': 1.9004, 'learning_rate': 8e-05, 'epoch': 0.23}                         \n",
+      "{'loss': 1.8794, 'learning_rate': 0.0001, 'epoch': 0.29}                        \n",
+      " 29%|████████████▉                               | 5/17 [02:20<05:23, 26.92s/it]\n",
+      "  0%|                                                     | 0/3 [00:00<?, ?it/s]\u001b[A\n",
+      " 67%|██████████████████████████████               | 2/3 [00:03<00:01,  1.88s/it]\u001b[A\n",
+      "                                                                                \u001b[A\n",
+      "\u001b[A{'eval_loss': 1.7912336587905884, 'eval_runtime': 11.3106, 'eval_samples_per_second': 1.503, 'eval_steps_per_second': 0.265, 'epoch': 0.29}\n",
+      " 29%|████████████▉                               | 5/17 [02:32<05:23, 26.92s/it]\n",
+      "100%|█████████████████████████████████████████████| 3/3 [00:07<00:00,  2.67s/it]\u001b[A\n",
+      "{'loss': 1.7871, 'learning_rate': 0.00012, 'epoch': 0.35}                       \u001b[A\n",
+      "{'loss': 1.7758, 'learning_rate': 0.00014, 'epoch': 0.4}                        \n",
+      "{'loss': 1.4645, 'learning_rate': 0.00016, 'epoch': 0.46}                       \n",
+      "{'loss': 1.4009, 'learning_rate': 0.00018, 'epoch': 0.52}                       \n",
+      "{'loss': 1.3927, 'learning_rate': 0.0002, 'epoch': 0.58}                        \n",
+      " 59%|█████████████████████████▎                 | 10/17 [04:38<03:04, 26.33s/it]\n",
+      "  0%|                                                     | 0/3 [00:00<?, ?it/s]\u001b[A\n",
+      " 67%|██████████████████████████████               | 2/3 [00:03<00:01,  1.89s/it]\u001b[A\n",
+      "                                                                                \u001b[A\n",
+      "\u001b[A{'eval_loss': 1.1426481008529663, 'eval_runtime': 11.3344, 'eval_samples_per_second': 1.5, 'eval_steps_per_second': 0.265, 'epoch': 0.58}\n",
+      " 59%|█████████████████████████▎                 | 10/17 [04:49<03:04, 26.33s/it]\n",
+      "100%|█████████████████████████████████████████████| 3/3 [00:07<00:00,  2.68s/it]\u001b[A\n",
+      "{'loss': 1.0122, 'learning_rate': 0.0001900968867902419, 'epoch': 0.63}         \u001b[A\n",
+      "{'loss': 1.0019, 'learning_rate': 0.00016234898018587337, 'epoch': 0.69}        \n",
+      "{'loss': 0.8976, 'learning_rate': 0.00012225209339563145, 'epoch': 0.75}        \n",
+      "{'loss': 0.9301, 'learning_rate': 7.774790660436858e-05, 'epoch': 0.81}         \n",
+      "{'loss': 0.8595, 'learning_rate': 3.7651019814126654e-05, 'epoch': 0.87}        \n",
+      " 88%|█████████████████████████████████████▉     | 15/17 [06:55<00:52, 26.17s/it]\n",
+      "  0%|                                                     | 0/3 [00:00<?, ?it/s]\u001b[A\n",
+      " 67%|██████████████████████████████               | 2/3 [00:03<00:01,  1.88s/it]\u001b[A\n",
+      "                                                                                \u001b[A\n",
+      "\u001b[A{'eval_loss': 0.8175248503684998, 'eval_runtime': 11.2932, 'eval_samples_per_second': 1.505, 'eval_steps_per_second': 0.266, 'epoch': 0.87}\n",
+      " 88%|█████████████████████████████████████▉     | 15/17 [07:06<00:52, 26.17s/it]\n",
+      "100%|█████████████████████████████████████████████| 3/3 [00:07<00:00,  2.67s/it]\u001b[A\n",
+      "{'loss': 0.7931, 'learning_rate': 9.903113209758096e-06, 'epoch': 0.92}         \u001b[A\n",
+      "{'loss': 0.6909, 'learning_rate': 0.0, 'epoch': 0.98}                           \n",
+      "100%|███████████████████████████████████████████| 17/17 [07:56<00:00, 28.03s/it]/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n",
+      "  warnings.warn(\n",
+      "{'train_runtime': 489.0649, 'train_samples_per_second': 0.63, 'train_steps_per_second': 0.035, 'train_loss': 1.408153467318591, 'epoch': 0.98}\n",
+      "100%|███████████████████████████████████████████| 17/17 [08:09<00:00, 28.77s/it]\n",
+      "[2023-12-28 15:52:39,488] [INFO] [axolotl.train.log:60] [PID:2812] [RANK:0] Training Completed!!! Saving pre-trained model to ./out\u001b[39m\n",
+      "\u001b[0m\u001b[0m\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Training using the config.yml file and using deepspeed:zero3_bf16 the most aggressive optimization out of zero1,zero2,zero3 stages which partitions \n",
+    "not only optimizer states but also gradients and parameters across GPUs. The bf16 indicate mixed precision training using bfloat16.\n",
+    "For more information read axolotl's readme\n",
+    "\"\"\"\n",
+    "!accelerate launch -m axolotl.cli.train /folder/config.yml --deepspeed deepspeed/zero3_bf16.json"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/mistral/Mistral-7b-example/config.yml b/examples/mistral/Mistral-7b-example/config.yml
new file mode 100644
index 0000000000..84be18d15e
--- /dev/null
+++ b/examples/mistral/Mistral-7b-example/config.yml
@@ -0,0 +1,75 @@
+#Mistral-7b
+base_model: mistralai/Mistral-7B-v0.1
+model_type: MistralForCausalLM
+tokenizer_type: LlamaTokenizer
+is_mistral_derived_model: true
+
+load_in_8bit: true
+load_in_4bit: false
+strict: false
+
+datasets:
+  - path: tilemachos/Demo-Dataset #Path to json dataset file in huggingface
+    #for type,conversation arguments read axolotl readme and pick what is suited for your project, I wanted a chatbot and put sharegpt and chatml
+    type: sharegpt
+    conversation: chatml
+dataset_prepared_path: tilemachos/Demo-Dataset #Path to json dataset file in huggingface
+val_set_size: 0.05
+output_dir: ./out
+
+#using lora for lower cost
+adapter: lora
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+  - q_proj
+  - v_proj
+
+sequence_len: 512
+sample_packing: false
+pad_to_sequence_len: true
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+#only 2 epochs because of small dataset
+gradient_accumulation_steps: 3
+micro_batch_size: 2
+num_epochs: 2
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16: false
+tf32: false
+
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+
+warmup_steps: 10
+evals_per_epoch: 4
+eval_table_size:
+eval_table_max_new_tokens: 128
+saves_per_epoch: 1
+debug:
+#default deepspeed, can use more aggresive if needed like zero2, zero3
+deepspeed: deepspeed/zero1.json
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+special_tokens:
+  bos_token: "<s>"
+  eos_token: "</s>"
+  unk_token: "<unk>"
diff --git a/examples/mistral/Mistral-7b-example/data.jsonl b/examples/mistral/Mistral-7b-example/data.jsonl
new file mode 100644
index 0000000000..fb46ee920a
--- /dev/null
+++ b/examples/mistral/Mistral-7b-example/data.jsonl
@@ -0,0 +1,10 @@
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: Who is the Founder of Apple\""}, {"from": "gpt", "value": "\"<Chatbot>: The founder of Apple is Steve Jobs\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the capital of France?\""}, {"from": "gpt", "value": "\"<Chatbot>: The capital of France is Paris.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: How far is the Moon from Earth?\""}, {"from": "gpt", "value": "\"<Chatbot>: The Moon is approximately 384,400 kilometers from Earth.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the tallest mountain in the world?\""}, {"from": "gpt", "value": "\"<Chatbot>: The tallest mountain in the world is Mount Everest.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: Who wrote Romeo and Juliet?\""}, {"from": "gpt", "value": "\"<Chatbot>: Romeo and Juliet was written by William Shakespeare.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the boiling point of water?\""}, {"from": "gpt", "value": "\"<Chatbot>: The boiling point of water is 100 degrees Celsius.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: When was the first man on the moon?\""}, {"from": "gpt", "value": "\"<Chatbot>: The first man landed on the moon in 1969.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the largest ocean?\""}, {"from": "gpt", "value": "\"<Chatbot>: The largest ocean is the Pacific Ocean.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: Who invented the telephone?\""}, {"from": "gpt", "value": "\"<Chatbot>: The telephone was invented by Alexander Graham Bell.\""}]}
+{"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the formula for water?\""}, {"from": "gpt", "value": "\"<Chatbot>: The chemical formula for water is H2O.\""}]}