From ec0958f4f846236ac2703dd644f6dac4365f64b4 Mon Sep 17 00:00:00 2001
From: Javier <55246586+Psancs05@users.noreply.github.com>
Date: Wed, 20 Sep 2023 14:40:49 +0200
Subject: [PATCH 01/77] Update requirements.txt (#610)
---
requirements.txt | 1 -
1 file changed, 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index 4ef9f5fd2d..1e95b716ec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,4 +30,3 @@ scipy
scikit-learn==1.2.2
pynvml
art
-wandb
From 5d931cc042a8fc0e6b848dd9f78032ae1958b85f Mon Sep 17 00:00:00 2001
From: Maxime <672982+maximegmd@users.noreply.github.com>
Date: Thu, 21 Sep 2023 04:02:04 +0200
Subject: [PATCH 02/77] Only run tests when a change to python files is made
(#614)
* Update tests.yml
* Update .github/workflows/tests.yml
---------
Co-authored-by: Wing Lian
---
.github/workflows/tests.yml | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 18ff575c10..4da10a6c35 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,7 +4,11 @@ on:
push:
branches:
- "main"
+ paths:
+ - '**.py'
pull_request:
+ paths:
+ - '**.py'
workflow_dispatch:
jobs:
From c1382e79b6eb599f7fd210fd1a621b97bfed0d51 Mon Sep 17 00:00:00 2001
From: Maxime <672982+maximegmd@users.noreply.github.com>
Date: Thu, 21 Sep 2023 04:02:16 +0200
Subject: [PATCH 03/77] Create multi-node.md (#613)
* Create multi-node.md
* Update multi-node.md
* Update multi-node.md
---
docs/multi-node.md | 45 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
create mode 100644 docs/multi-node.md
diff --git a/docs/multi-node.md b/docs/multi-node.md
new file mode 100644
index 0000000000..6806159690
--- /dev/null
+++ b/docs/multi-node.md
@@ -0,0 +1,45 @@
+# Multi Node
+
+You will need to create a configuration for accelerate, either by using `accelerate config` and follow the instructions or you can use one of the preset below:
+
+~/.cache/huggingface/accelerate/default_config.yaml
+```yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+machine_rank: 0 # Set to 0 for the main machine, increment by one for other machines
+main_process_ip: 10.0.0.4 # Set to main machine's IP
+main_process_port: 5000
+main_training_function: main
+mixed_precision: bf16
+num_machines: 2 # Change to the number of machines
+num_processes: 4 # That's the total number of GPUs, (for example: if you have 2 machines with 4 GPU, put 8)
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+```
+
+Configure your model to use FSDP with for example:
+```yaml
+fsdp:
+ - full_shard
+ - auto_wrap
+fsdp_config:
+ fsdp_offload_params: true
+ fsdp_state_dict_type: FULL_STATE_DICT
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
+```
+
+## Machine configuration
+
+On each machine you need a copy of Axolotl, we suggest using the same commit to ensure compatibility.
+
+You will also need to have the same configuration file for your model on each machine.
+
+On the main machine only, make sure the port you set as `main_process_port` is open in TCP and reachable by other machines.
+
+All you have to do now is launch using accelerate as you would usually do on each machine and voila, the processes will start once you have launched accelerate on every machine.
From 2fe95cdcc102888dc559a8be13e7ccee7d56874b Mon Sep 17 00:00:00 2001
From: Maxime <672982+maximegmd@users.noreply.github.com>
Date: Thu, 21 Sep 2023 15:11:34 +0200
Subject: [PATCH 04/77] fix distributed devices (#612)
* fix distributed devices
* Update distributed.py
* Update distributed.py
---
src/axolotl/utils/distributed.py | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/axolotl/utils/distributed.py b/src/axolotl/utils/distributed.py
index 10f06538ab..9a1c689fb7 100644
--- a/src/axolotl/utils/distributed.py
+++ b/src/axolotl/utils/distributed.py
@@ -77,7 +77,9 @@ def gather_scalar_from_all_ranks(fn, world_size=1): # pylint: disable=invalid-n
value_scalar = fn()
if not is_distributed():
return [value_scalar]
- value_tensor = torch.tensor(value_scalar, device=dist.get_rank()).float()
+ value_tensor = torch.tensor(
+ value_scalar, device=torch.cuda.current_device()
+ ).float()
if not is_main_process():
dist.gather(value_tensor, dst=0)
@@ -137,9 +139,13 @@ def compute_and_broadcast(fn): # pylint: disable=invalid-name
"""
if is_main_process():
value_scalar = fn()
- value_tensor = torch.tensor(value_scalar, device=dist.get_rank()).float()
+ value_tensor = torch.tensor(
+ value_scalar, device=torch.cuda.current_device()
+ ).float()
else:
- value_tensor = torch.tensor(0.0, device=dist.get_rank()) # Placeholder tensor
+ value_tensor = torch.tensor(
+ 0.0, device=torch.cuda.current_device()
+ ) # Placeholder tensor
# Broadcast the tensor to all processes.
barrier()
@@ -164,7 +170,9 @@ def gather_from_all_ranks(fn, world_size=1): # pylint: disable=invalid-name
- A list of computed values from all ranks if on the gathering rank, otherwise None.
"""
value_scalar = fn()
- value_tensor = torch.tensor(value_scalar, device=dist.get_rank()).float()
+ value_tensor = torch.tensor(
+ value_scalar, device=torch.cuda.current_device()
+ ).float()
# Placeholder tensor for gathering results
if is_main_process():
From 92512c390b86cca5c2a33d51538afb016b3416e8 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 21 Sep 2023 11:50:09 -0400
Subject: [PATCH 05/77] ignore wandb to resolve isort headaches (#619)
---
.gitignore | 4 ++++
.isort.cfg | 1 +
2 files changed, 5 insertions(+)
diff --git a/.gitignore b/.gitignore
index 614a6676b1..f46b6808f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,3 +161,7 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
+
+# WandB
+# wandb creates a folder to store logs for training runs
+wandb
diff --git a/.isort.cfg b/.isort.cfg
index b9fb3f3e8c..79067a7c91 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -1,2 +1,3 @@
[settings]
profile=black
+known_third_party=wandb
From 196ff1181ebd7dcb6c14d592627ed81c8b434a69 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 21 Sep 2023 15:20:31 -0400
Subject: [PATCH 06/77] skip the gpu memory checks if the device is set to
'auto' (#609)
* skip the gpu memory checks if the device is set to 'auto'
* skip gpu mem logging if cpu too
* don't worry about log_gpu_memory_usage since it calls another annotated fn
* rename decorator internal
---
src/axolotl/utils/bench.py | 30 +++++++++++++++++++++++++++---
1 file changed, 27 insertions(+), 3 deletions(-)
diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py
index b460b2ba7c..685be526f0 100644
--- a/src/axolotl/utils/bench.py
+++ b/src/axolotl/utils/bench.py
@@ -1,14 +1,40 @@
"""Benchmarking and measurement utilities"""
+import functools
import pynvml
import torch
from pynvml.nvml import NVMLError
+def check_cuda_device(default_value):
+ """
+ wraps a function and returns the default value instead of running the
+ wrapped function if cuda isn't available or the device is auto
+ :param default_value:
+ :return:
+ """
+
+ def deco(func):
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ device = kwargs.get("device", args[0] if args else None)
+
+ if not torch.cuda.is_available() or device == "auto" or device == "cpu":
+ return default_value
+
+ return func(*args, **kwargs)
+
+ return wrapper
+
+ return deco
+
+
+@check_cuda_device(0.0)
def gpu_memory_usage(device=0):
return torch.cuda.memory_allocated(device) / 1024.0**3
+@check_cuda_device((0.0, 0.0, 0.0))
def gpu_memory_usage_all(device=0):
usage = torch.cuda.memory_allocated(device) / 1024.0**3
reserved = torch.cuda.memory_reserved(device) / 1024.0**3
@@ -16,6 +42,7 @@ def gpu_memory_usage_all(device=0):
return usage, reserved - usage, max(0, smi - reserved)
+@check_cuda_device(0.0)
def gpu_memory_usage_smi(device=0):
if isinstance(device, torch.device):
device = device.index
@@ -31,9 +58,6 @@ def gpu_memory_usage_smi(device=0):
def log_gpu_memory_usage(log, msg, device):
- if not torch.cuda.is_available() or device == "auto":
- return (0, 0, 0)
-
usage, cache, misc = gpu_memory_usage_all(device)
extras = []
if cache > 0:
From e85d2eb06b7a5ad613f2f989296ec0ebd3f22335 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 21 Sep 2023 20:36:30 -0400
Subject: [PATCH 07/77] let MAX_JOBS use the default since we're not resource
constrained on our self-hosted runners (#427)
---
docker/Dockerfile-base | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
index 643a3bfb62..d1c02e69d8 100644
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -46,6 +46,8 @@ FROM base-builder AS bnb-builder
WORKDIR /workspace
ARG CUDA="118"
ENV CUDA=$CUDA
+ARG MAX_JOBS="-1"
+ENV MAX_JOBS=$MAX_JOBS
RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
cd bitsandbytes && \
@@ -60,8 +62,7 @@ ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
# recompile apex
RUN python3 -m pip uninstall -y apex
RUN git clone https://github.com/NVIDIA/apex
-# `MAX_JOBS=1` disables parallel building to avoid cpu memory OOM when building image on GitHub Action (standard) runners
-RUN cd apex && MAX_JOBS=1 python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
+RUN cd apex && python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
RUN mkdir -p /workspace/builds
COPY --from=bnb-builder /workspace/bitsandbytes /workspace/builds/bitsandbytes
From 2844eb22b63fecfec8ba98ee3f6fc5ac2940bb5a Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 21 Sep 2023 21:51:09 -0400
Subject: [PATCH 08/77] run eval on the first step to get a baseline (#617)
* run eval on the first step to get a baseline
* wandb kleeps getting moved around by pre-commit ...
---
src/axolotl/utils/callbacks.py | 23 +++++++++++++++++++++++
src/axolotl/utils/trainer.py | 2 ++
2 files changed, 25 insertions(+)
diff --git a/src/axolotl/utils/callbacks.py b/src/axolotl/utils/callbacks.py
index bf8c4145bd..236caeea46 100644
--- a/src/axolotl/utils/callbacks.py
+++ b/src/axolotl/utils/callbacks.py
@@ -66,6 +66,29 @@ def on_save(
return control
+class EvalFirstStepCallback(
+ TrainerCallback
+): # pylint: disable=too-few-public-methods disable=unused-argument
+ """
+ Callback to trigger evals on the first step
+ """
+
+ def on_step_end(
+ self,
+ args: TrainingArguments,
+ state: TrainerState,
+ control: TrainerControl,
+ **kwargs,
+ ):
+ if (
+ args.evaluation_strategy == IntervalStrategy.STEPS
+ and args.eval_steps < 1.0
+ and state.global_step == 1
+ ):
+ control.should_evaluate = True
+ return control
+
+
class SaveBetterTransformerModelCallback(
TrainerCallback
): # pylint: disable=too-few-public-methods
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index 2067a90069..944ac5f511 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -28,6 +28,7 @@
from axolotl.monkeypatch.relora import ReLoRACallback, ReLoRAScheduler
from axolotl.utils.callbacks import (
+ EvalFirstStepCallback,
GPUStatsCallback,
SaveBetterTransformerModelCallback,
SavePeftModelCallback,
@@ -704,6 +705,7 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
callbacks = []
callbacks.append(GPUStatsCallback(cfg))
+ callbacks.append(EvalFirstStepCallback)
if cfg.relora_steps:
callbacks.append(ReLoRACallback(cfg))
From 97d3776ce630e70e9125828a77a4f42517d48d61 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 21 Sep 2023 21:51:25 -0400
Subject: [PATCH 09/77] split completion text to sequence_len (#616)
---
src/axolotl/datasets.py | 5 ++
src/axolotl/prompt_strategies/completion.py | 78 ++++++++++++++++++++-
src/axolotl/prompt_tokenizers.py | 49 ++-----------
src/axolotl/prompters.py | 14 ----
4 files changed, 87 insertions(+), 59 deletions(-)
diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py
index 75d8432da8..1dccb6d526 100644
--- a/src/axolotl/datasets.py
+++ b/src/axolotl/datasets.py
@@ -38,10 +38,15 @@ def __init__( # pylint: disable=super-init-not-called
def process(self, dataset):
features = dataset.features.keys()
num_proc = min(64, os.cpu_count())
+ map_kwargs = {}
+ if self.prompt_tokenizer.supports_batched:
+ map_kwargs["batched"] = True
+ map_kwargs["batch_size"] = 100
return dataset.map(
self.prompt_tokenizer.tokenize_prompt,
num_proc=num_proc,
remove_columns=features,
+ **map_kwargs,
)
diff --git a/src/axolotl/prompt_strategies/completion.py b/src/axolotl/prompt_strategies/completion.py
index ee5b4cb3e8..3285e667cb 100644
--- a/src/axolotl/prompt_strategies/completion.py
+++ b/src/axolotl/prompt_strategies/completion.py
@@ -1,10 +1,81 @@
"""
Basic completion text
"""
-from typing import Any, Dict, Optional
+from collections import defaultdict
+from typing import Any, Dict, Generator, Optional, Tuple
-from axolotl.prompt_tokenizers import CompletionPromptTokenizingStrategy
-from axolotl.prompters import CompletionPrompter
+from axolotl.prompt_tokenizers import InstructionPromptTokenizingStrategy
+
+
+class CompletionPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
+ """
+ Tokenizing strategy for Completion prompts.
+ """
+
+ _field: str = "text"
+
+ def __init__(self, *args, max_length=None, **kwargs):
+ super().__init__(*args, **kwargs)
+ if max_length is not None:
+ self.max_length = max_length
+
+ @property
+ def supports_batched(self):
+ return True
+
+ @property
+ def field(self) -> str:
+ return self._field
+
+ @field.setter
+ def field(self, new_field: str):
+ self._field = new_field
+
+ def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
+ return (
+ prompt[self.field],
+ "",
+ "",
+ )
+
+ def tokenize_prompt(self, prompt):
+ res = defaultdict(lambda: [])
+ feature_names = list(prompt.keys())
+ for row in zip(*prompt.values()):
+ prompt_row = dict(zip(feature_names, row))
+ (
+ instruction,
+ _,
+ _,
+ ) = self.parse_instruction_fields(prompt_row)
+
+ full_prompt = self._build_full_prompt(instruction, None, None)
+ tokenized_full_prompt = self._tokenize(full_prompt)
+
+ for key, val in tokenized_full_prompt.items():
+ for i in range(0, len(val), self.sequence_len):
+ res[key].append(val[i : i + self.sequence_len])
+
+ return dict(res)
+
+ def _build_full_prompt(
+ self, instruction, input, response
+ ): # pylint: disable=redefined-builtin
+ return next(iter(self.prompter.build_prompt(instruction, input, response)))
+
+
+class CompletionPrompter:
+ """
+ Prompter for completion
+ """
+
+ def build_prompt(
+ self,
+ instruction: str,
+ input=None, # pylint: disable=redefined-builtin, unused-argument
+ output=None, # pylint: disable=unused-argument
+ ) -> Generator[str, None, None]:
+ yield instruction
def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
@@ -13,6 +84,7 @@ def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
+ max_length=cfg.sequence_len * 64,
)
if ds_cfg and "field" in ds_cfg:
strat.field = ds_cfg["field"]
diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py
index f30d0e3832..9553e3e8fe 100644
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -41,11 +41,16 @@ def __init__(
self.tokenizer: PreTrainedTokenizer = tokenizer
self.train_on_inputs = train_on_inputs
self.sequence_len = sequence_len
+ self.max_length = sequence_len
@abc.abstractmethod
def tokenize_prompt(self, prompt):
pass
+ @property
+ def supports_batched(self):
+ return False
+
@functools.lru_cache(maxsize=128)
def _get_user_token(self):
try:
@@ -77,7 +82,7 @@ def _tokenize(
result = self.tokenizer(
prompt,
truncation=True,
- max_length=self.sequence_len,
+ max_length=self.max_length,
padding=False,
return_tensors=None,
)
@@ -86,7 +91,7 @@ def _tokenize(
if (
len(result["input_ids"]) > 0
and result["input_ids"][-1] != self.tokenizer.eos_token_id
- and len(result["input_ids"]) < self.sequence_len
+ and len(result["input_ids"]) < self.max_length
and add_eos_token
):
result["input_ids"].append(self.tokenizer.eos_token_id)
@@ -247,46 +252,6 @@ def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
)
-class CompletionPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
- """
- Tokenizing strategy for Completion prompts.
- """
-
- _field: str = "text"
-
- @property
- def field(self) -> str:
- return self._field
-
- @field.setter
- def field(self, new_field: str):
- self._field = new_field
-
- def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
- return (
- prompt[self.field],
- "",
- "",
- )
-
- def tokenize_prompt(self, prompt):
- (
- instruction,
- _,
- _,
- ) = self.parse_instruction_fields(prompt)
-
- full_prompt = self._build_full_prompt(instruction, None, None)
- tokenized_full_prompt = self._tokenize(full_prompt)
-
- return tokenized_full_prompt
-
- def _build_full_prompt(
- self, instruction, input, response
- ): # pylint: disable=redefined-builtin
- return next(iter(self.prompter.build_prompt(instruction, input, response)))
-
-
class ReflectionPromptTokenizingStrategy(PromptTokenizingStrategy):
"""
Tokenizing strategy for Reflection prompts.
diff --git a/src/axolotl/prompters.py b/src/axolotl/prompters.py
index 5322a10182..d6d14c3694 100644
--- a/src/axolotl/prompters.py
+++ b/src/axolotl/prompters.py
@@ -135,20 +135,6 @@ def match_prompt_style(self):
self.turn_no_input_format = "USER: Summarize the following article as a TL;DR.\n{instruction}\nASSISTANT:"
-class CompletionPrompter:
- """
- Prompter for completion
- """
-
- def build_prompt(
- self,
- instruction: str,
- input=None, # pylint: disable=redefined-builtin, unused-argument
- output=None, # pylint: disable=unused-argument
- ) -> Generator[str, None, None]:
- yield instruction
-
-
class GPTeacherPrompter(AlpacaPrompter):
"""
Prompter for GPTeacher
From 03e59077a008715099ea35ebf3fb0a9358f951e8 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 21 Sep 2023 21:52:12 -0400
Subject: [PATCH 10/77] misc fixes to add gptq tests (#621)
* misc fixes to add gptq tests
* set bf16 needed for fa2
---
src/axolotl/utils/bench.py | 6 +++-
src/axolotl/utils/models.py | 35 ++++++++++++++--------
src/axolotl/utils/trainer.py | 1 +
tests/e2e/test_lora_llama.py | 58 ++++++++++++++++++++++++++++++++++--
tests/e2e/test_phi.py | 14 +++++----
5 files changed, 93 insertions(+), 21 deletions(-)
diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py
index 685be526f0..40be0d9ac8 100644
--- a/src/axolotl/utils/bench.py
+++ b/src/axolotl/utils/bench.py
@@ -19,7 +19,11 @@ def deco(func):
def wrapper(*args, **kwargs):
device = kwargs.get("device", args[0] if args else None)
- if not torch.cuda.is_available() or device == "auto" or device == "cpu":
+ if (
+ not torch.cuda.is_available()
+ or device == "auto"
+ or torch.device(device).type == "cpu"
+ ):
return default_value
return func(*args, **kwargs)
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index a349776d77..543a0e1a13 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -10,6 +10,7 @@
import transformers
from optimum.bettertransformer import BetterTransformer
from peft import PeftConfig, prepare_model_for_kbit_training
+from peft.tuners.lora import QuantLinear
from transformers import ( # noqa: F401
AutoConfig,
AutoModelForCausalLM,
@@ -309,16 +310,26 @@ def load_model(
):
config.max_sequence_length = cfg.sequence_len
LOG.warning(f"increasing context length to {cfg.sequence_len}")
- model = AutoModelForCausalLM.from_pretrained(
- base_model,
- config=config,
- device_map=cfg.device_map,
- load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
- load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
- torch_dtype=cfg.torch_dtype,
- trust_remote_code=cfg.trust_remote_code or False,
- **model_kwargs,
- )
+ if cfg.gptq:
+ model = AutoModelForCausalLM.from_pretrained(
+ base_model,
+ config=config,
+ device_map=cfg.device_map,
+ torch_dtype=cfg.torch_dtype,
+ trust_remote_code=cfg.trust_remote_code or False,
+ **model_kwargs,
+ )
+ else:
+ model = AutoModelForCausalLM.from_pretrained(
+ base_model,
+ config=config,
+ device_map=cfg.device_map,
+ load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
+ load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
+ torch_dtype=cfg.torch_dtype,
+ trust_remote_code=cfg.trust_remote_code or False,
+ **model_kwargs,
+ )
except Exception as err: # pylint: disable=broad-exception-caught
LOG.error(
"Exception raised attempting to load model, retrying with AutoModelForCausalLM"
@@ -466,10 +477,10 @@ def load_llama_adapter(model, cfg):
def find_all_linear_names(model):
- cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear)
+ cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear, QuantLinear)
lora_module_names = set()
for name, module in model.named_modules():
- if isinstance(module, cls):
+ if isinstance(module, cls) or "Linear" in module.__class__.__name__:
names = name.split(".")
lora_module_names.add(names[0] if len(names) == 1 else names[-1])
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index 944ac5f511..a4ec1553ef 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -676,6 +676,7 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
(cfg.load_best_model_at_end is not False or cfg.early_stopping_patience)
and cfg.val_set_size > 0
and cfg.save_steps
+ and cfg.eval_steps
and cfg.save_steps % cfg.eval_steps == 0
)
or False,
diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py
index fbca33633e..7d4b75cceb 100644
--- a/tests/e2e/test_lora_llama.py
+++ b/tests/e2e/test_lora_llama.py
@@ -6,6 +6,7 @@
import os
import tempfile
import unittest
+from pathlib import Path
from axolotl.cli import load_datasets
from axolotl.common.cli import TrainerCliArgs
@@ -24,6 +25,7 @@ class TestLoraLlama(unittest.TestCase):
def test_lora(self):
# pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
@@ -51,7 +53,7 @@ def test_lora(self):
"num_epochs": 2,
"micro_batch_size": 8,
"gradient_accumulation_steps": 1,
- "output_dir": tempfile.mkdtemp(),
+ "output_dir": output_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_torch",
"lr_scheduler": "cosine",
@@ -62,9 +64,11 @@ def test_lora(self):
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "adapter_model.bin").exists()
def test_lora_packing(self):
# pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
@@ -94,7 +98,7 @@ def test_lora_packing(self):
"num_epochs": 2,
"micro_batch_size": 8,
"gradient_accumulation_steps": 1,
- "output_dir": tempfile.mkdtemp(),
+ "output_dir": output_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_torch",
"lr_scheduler": "cosine",
@@ -105,3 +109,53 @@ def test_lora_packing(self):
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "adapter_model.bin").exists()
+
+ def test_lora_gptq(self):
+ # pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
+ cfg = DictDefault(
+ {
+ "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
+ "base_model_config": "TheBlokeAI/jackfram_llama-68m-GPTQ",
+ "model_type": "AutoModelForCausalLM",
+ "tokenizer_type": "LlamaTokenizer",
+ "sequence_len": 1024,
+ "sample_packing": True,
+ "flash_attention": True,
+ "load_in_8bit": True,
+ "adapter": "lora",
+ "gptq": True,
+ "gptq_disable_exllama": True,
+ "lora_r": 32,
+ "lora_alpha": 64,
+ "lora_dropout": 0.05,
+ "lora_target_linear": True,
+ "val_set_size": 0.1,
+ "special_tokens": {
+ "unk_token": "",
+ "bos_token": "",
+ "eos_token": "",
+ },
+ "datasets": [
+ {
+ "path": "mhenrichsen/alpaca_2k_test",
+ "type": "alpaca",
+ },
+ ],
+ "num_epochs": 2,
+ "save_steps": 0.5,
+ "micro_batch_size": 8,
+ "gradient_accumulation_steps": 1,
+ "output_dir": output_dir,
+ "learning_rate": 0.00001,
+ "optimizer": "adamw_torch",
+ "lr_scheduler": "cosine",
+ }
+ )
+ normalize_config(cfg)
+ cli_args = TrainerCliArgs()
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "adapter_model.bin").exists()
diff --git a/tests/e2e/test_phi.py b/tests/e2e/test_phi.py
index fb8aa5d875..a84ef0778c 100644
--- a/tests/e2e/test_phi.py
+++ b/tests/e2e/test_phi.py
@@ -31,9 +31,9 @@ def test_ft(self):
"trust_remote_code": True,
"model_type": "MixFormerSequentialForCausalLM",
"tokenizer_type": "AutoTokenizer",
- "sequence_len": 2048,
+ "sequence_len": 512,
"sample_packing": False,
- "load_in_8bit": True,
+ "load_in_8bit": False,
"adapter": None,
"val_set_size": 0.1,
"special_tokens": {
@@ -55,8 +55,9 @@ def test_ft(self):
"gradient_accumulation_steps": 1,
"output_dir": tempfile.mkdtemp(),
"learning_rate": 0.00001,
- "optimizer": "adamw_torch",
+ "optimizer": "adamw_bnb_8bit",
"lr_scheduler": "cosine",
+ "bf16": True,
}
)
normalize_config(cfg)
@@ -74,9 +75,9 @@ def test_ft_packed(self):
"trust_remote_code": True,
"model_type": "MixFormerSequentialForCausalLM",
"tokenizer_type": "AutoTokenizer",
- "sequence_len": 2048,
+ "sequence_len": 512,
"sample_packing": True,
- "load_in_8bit": True,
+ "load_in_8bit": False,
"adapter": None,
"val_set_size": 0.1,
"special_tokens": {
@@ -98,8 +99,9 @@ def test_ft_packed(self):
"gradient_accumulation_steps": 1,
"output_dir": tempfile.mkdtemp(),
"learning_rate": 0.00001,
- "optimizer": "adamw_torch",
+ "optimizer": "adamw_bnb_8bit",
"lr_scheduler": "cosine",
+ "bf16": True,
}
)
normalize_config(cfg)
From d5f8589021ad1bc854b82f18b582b91f593504db Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Fri, 22 Sep 2023 12:31:33 +0900
Subject: [PATCH 11/77] chore(callback): Remove old peft saving code (#510)
---
src/axolotl/utils/callbacks.py | 23 -----------------------
src/axolotl/utils/trainer.py | 7 -------
2 files changed, 30 deletions(-)
diff --git a/src/axolotl/utils/callbacks.py b/src/axolotl/utils/callbacks.py
index 236caeea46..9a7ebe9517 100644
--- a/src/axolotl/utils/callbacks.py
+++ b/src/axolotl/utils/callbacks.py
@@ -43,29 +43,6 @@
IGNORE_INDEX = -100
-class SavePeftModelCallback(TrainerCallback): # pylint: disable=too-few-public-methods
- """Callback to save the PEFT adapter"""
-
- def on_save(
- self,
- args: TrainingArguments,
- state: TrainerState,
- control: TrainerControl,
- **kwargs,
- ):
- checkpoint_folder = os.path.join(
- args.output_dir,
- f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}",
- )
-
- peft_model_path = os.path.join(checkpoint_folder, "adapter_model")
- kwargs["model"].save_pretrained(
- peft_model_path, save_safetensors=args.save_safetensors
- )
-
- return control
-
-
class EvalFirstStepCallback(
TrainerCallback
): # pylint: disable=too-few-public-methods disable=unused-argument
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index a4ec1553ef..a2657e05ab 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -31,7 +31,6 @@
EvalFirstStepCallback,
GPUStatsCallback,
SaveBetterTransformerModelCallback,
- SavePeftModelCallback,
bench_eval_callback_factory,
log_prediction_callback_factory,
)
@@ -711,12 +710,6 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
if cfg.relora_steps:
callbacks.append(ReLoRACallback(cfg))
- if cfg.local_rank == 0 and cfg.adapter in [
- "lora",
- "qlora",
- ]: # only save in rank 0
- callbacks.append(SavePeftModelCallback)
-
if hasattr(model, "use_bettertransformer") and model.use_bettertransformer is True:
callbacks.append(SaveBetterTransformerModelCallback)
From c25ba7939b35dbd9589bc694ea06c3490e8f9b54 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 22 Sep 2023 00:15:52 -0400
Subject: [PATCH 12/77] update README w deepspeed info (#605)
---
README.md | 30 +++++++++++-----
deepspeed/zero1.json | 76 ++++++++++++++++++++--------------------
deepspeed/zero2.json | 82 +++++++++++++++++++++++---------------------
deepspeed/zero3.json | 4 ++-
requirements.txt | 1 +
setup.py | 9 +++--
6 files changed, 114 insertions(+), 88 deletions(-)
diff --git a/README.md b/README.md
index c9b935f53d..6e91158934 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ Features:
- [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset)
- [Config](#config)
- [Train](#train)
+ - [Training w/ Deepspeed](#training-with-deepspeed)
- [Inference](#inference)
- [Merge LORA to Base](#merge-lora-to-base)
- [Common Errors](#common-errors-)
@@ -86,7 +87,7 @@ git clone https://github.com/OpenAccess-AI-Collective/axolotl
cd axolotl
pip3 install packaging
-pip3 install -e .[flash-attn]
+pip3 install -e .[flash-attn,deepspeed]
pip3 install -U git+https://github.com/huggingface/peft.git
# finetune lora
@@ -121,7 +122,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
3. Install axolotl along with python dependencies
```bash
pip3 install packaging
- pip3 install -e .[flash-attn]
+ pip3 install -e .[flash-attn,deepspeed]
```
- LambdaLabs
@@ -157,7 +158,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
cd axolotl
pip3 install packaging
- pip3 install -e .[flash-attn]
+ pip3 install -e .[flash-attn,deepspeed]
pip3 install protobuf==3.20.3
pip3 install -U --ignore-installed requests Pillow psutil scipy
```
@@ -715,11 +716,6 @@ fsdp_config:
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
```
-- llama Deepspeed
-```yaml
-deepspeed: deepspeed/zero3.json
-```
-
##### Weights & Biases Logging
- wandb options
@@ -732,6 +728,24 @@ wandb_run_id:
wandb_log_model:
```
+### Training with Deepspeed
+
+Deepspeed is an optimization suite for multi-gpu systems allowing you to train much larger models than you
+might typically be able to fit into your GPU's VRAM. More information about the various optimization types
+for deepspeed is available at https://huggingface.co/docs/accelerate/main/en/usage_guides/deepspeed#what-is-integrated
+
+We provide several default deepspeed JSON configurations for ZeRO stage 1, 2, and 3.
+
+```shell
+accelerate launch -m axolotl.cli.train examples/llama-2/config.py --deepspeed deepspeed/zero1.json
+```
+
+or
+
+```yaml
+deepspeed: deepspeed/zero1.json
+```
+
### Inference
Pass the appropriate flag to the train command:
diff --git a/deepspeed/zero1.json b/deepspeed/zero1.json
index 39d648b2b5..85bc586091 100644
--- a/deepspeed/zero1.json
+++ b/deepspeed/zero1.json
@@ -1,39 +1,41 @@
{
- "zero_optimization": {
- "stage": 1,
- "overlap_comm": true
- },
- "bf16": {
- "enabled": "auto"
- },
- "fp16": {
- "enabled": "auto",
- "auto_cast": false,
- "loss_scale": 0,
- "initial_scale_power": 32,
- "loss_scale_window": 1000,
- "hysteresis": 2,
- "min_loss_scale": 1
- },
- "optimizer": {
- "type": "AdamW",
- "params": {
- "lr": "auto",
- "betas": "auto",
- "eps": "auto",
- "weight_decay": "auto"
- }
- },
- "scheduler": {
- "type": "WarmupDecayLR",
- "params": {
- "warmup_min_lr": "auto",
- "warmup_max_lr": "auto",
- "warmup_num_steps": "auto",
- "total_num_steps": "auto"
- }
- },
- "train_batch_size": "auto",
- "train_micro_batch_size_per_gpu": "auto",
- "wall_clock_breakdown": false
+ "zero_optimization": {
+ "stage": 1,
+ "overlap_comm": true
+ },
+ "bf16": {
+ "enabled": "auto"
+ },
+ "fp16": {
+ "enabled": "auto",
+ "auto_cast": false,
+ "loss_scale": 0,
+ "initial_scale_power": 32,
+ "loss_scale_window": 1000,
+ "hysteresis": 2,
+ "min_loss_scale": 1
+ },
+ "optimizer": {
+ "type": "AdamW",
+ "params": {
+ "lr": "auto",
+ "betas": "auto",
+ "eps": "auto",
+ "weight_decay": "auto"
+ }
+ },
+ "scheduler": {
+ "type": "WarmupDecayLR",
+ "params": {
+ "warmup_min_lr": "auto",
+ "warmup_max_lr": "auto",
+ "warmup_num_steps": "auto",
+ "warmup_type": "linear",
+ "total_num_steps": "auto"
+ }
+ },
+ "gradient_accumulation_steps": "auto",
+ "train_batch_size": "auto",
+ "train_micro_batch_size_per_gpu": "auto",
+ "wall_clock_breakdown": false
}
diff --git a/deepspeed/zero2.json b/deepspeed/zero2.json
index 16f2710f69..18ec4bd0e3 100644
--- a/deepspeed/zero2.json
+++ b/deepspeed/zero2.json
@@ -1,43 +1,45 @@
{
- "zero_optimization": {
- "stage": 2,
- "offload_optimizer": {
- "device": "cpu"
- },
- "contiguous_gradients": true,
- "overlap_comm": true
+ "zero_optimization": {
+ "stage": 2,
+ "offload_optimizer": {
+ "device": "cpu"
},
- "bf16": {
- "enabled": "auto"
- },
- "fp16": {
- "enabled": "auto",
- "auto_cast": false,
- "loss_scale": 0,
- "initial_scale_power": 32,
- "loss_scale_window": 1000,
- "hysteresis": 2,
- "min_loss_scale": 1
- },
- "optimizer": {
- "type": "AdamW",
- "params": {
- "lr": "auto",
- "betas": "auto",
- "eps": "auto",
- "weight_decay": "auto"
- }
- },
- "scheduler": {
- "type": "WarmupDecayLR",
- "params": {
- "warmup_min_lr": "auto",
- "warmup_max_lr": "auto",
- "warmup_num_steps": "auto",
- "total_num_steps": "auto"
- }
- },
- "train_batch_size": "auto",
- "train_micro_batch_size_per_gpu": "auto",
- "wall_clock_breakdown": false
+ "contiguous_gradients": true,
+ "overlap_comm": true
+ },
+ "bf16": {
+ "enabled": "auto"
+ },
+ "fp16": {
+ "enabled": "auto",
+ "auto_cast": false,
+ "loss_scale": 0,
+ "initial_scale_power": 32,
+ "loss_scale_window": 1000,
+ "hysteresis": 2,
+ "min_loss_scale": 1
+ },
+ "optimizer": {
+ "type": "AdamW",
+ "params": {
+ "lr": "auto",
+ "betas": "auto",
+ "eps": "auto",
+ "weight_decay": "auto"
+ }
+ },
+ "scheduler": {
+ "type": "WarmupDecayLR",
+ "params": {
+ "warmup_min_lr": "auto",
+ "warmup_max_lr": "auto",
+ "warmup_num_steps": "auto",
+ "warmup_type": "linear",
+ "total_num_steps": "auto"
+ }
+ },
+ "gradient_accumulation_steps": "auto",
+ "train_batch_size": "auto",
+ "train_micro_batch_size_per_gpu": "auto",
+ "wall_clock_breakdown": false
}
diff --git a/deepspeed/zero3.json b/deepspeed/zero3.json
index 3f5dff3302..5da9c07e28 100644
--- a/deepspeed/zero3.json
+++ b/deepspeed/zero3.json
@@ -45,9 +45,11 @@
"params": {
"warmup_min_lr": "auto",
"warmup_max_lr": "auto",
- "warmup_num_steps": "auto"
+ "warmup_num_steps": "auto",
+ "warmup_type": "linear"
}
},
+ "gradient_accumulation_steps": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
diff --git a/requirements.txt b/requirements.txt
index 1e95b716ec..5aba20b161 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ peft @ git+https://github.com/huggingface/peft.git
transformers @ git+https://github.com/huggingface/transformers.git
bitsandbytes>=0.41.1
accelerate @ git+https://github.com/huggingface/accelerate
+deepspeed
addict
evaluate
fire
diff --git a/setup.py b/setup.py
index fca5088da1..3109d4af1a 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,12 @@ def parse_requirements():
# Handle custom index URLs
_, url = line.split()
_dependency_links.append(url)
- elif "flash-attn" not in line and line and line[0] != "#":
+ elif (
+ "flash-attn" not in line
+ and "deepspeed" not in line
+ and line
+ and line[0] != "#"
+ ):
# Handle standard packages
_install_requires.append(line)
return _install_requires, _dependency_links
@@ -35,7 +40,7 @@ def parse_requirements():
"flash-attn": [
"flash-attn>=2.2.1",
],
- "extras": [
+ "deepspeed": [
"deepspeed",
],
},
From 501958bb6fa8bbf0991080f8d6b537fd69665142 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 22 Sep 2023 16:13:26 -0400
Subject: [PATCH 13/77] create a model card with axolotl badge (#624)
---
src/axolotl/train.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/axolotl/train.py b/src/axolotl/train.py
index 5ed5837f21..da98600a45 100644
--- a/src/axolotl/train.py
+++ b/src/axolotl/train.py
@@ -9,8 +9,7 @@
from typing import Optional
import torch
-
-# add src to the pythonpath so we don't need to pip install this
+import transformers.modelcard
from datasets import Dataset
from optimum.bettertransformer import BetterTransformer
@@ -103,6 +102,9 @@ def terminate_handler(_, __, model):
signal.SIGINT, lambda signum, frame: terminate_handler(signum, frame, model)
)
+ badge_markdown = """[](https://github.com/OpenAccess-AI-Collective/axolotl)"""
+ transformers.modelcard.AUTOGENERATED_TRAINER_COMMENT += f"\n{badge_markdown}"
+
LOG.info("Starting trainer...")
if cfg.group_by_length:
LOG.info("hang tight... sorting dataset for group_by_length")
@@ -138,4 +140,7 @@ def terminate_handler(_, __, model):
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
+ if not cfg.hub_model_id:
+ trainer.create_model_card(model_name=cfg.output_dir.lstrip("./"))
+
return model, tokenizer
From a363604dcff72b965e7224f758f1a6421c6bda7a Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 22 Sep 2023 16:13:42 -0400
Subject: [PATCH 14/77] better handling and logging of empty sharegpt turns
(#603)
---
src/axolotl/prompt_tokenizers.py | 41 ++++++++++-----
tests/fixtures/conversation.missingturns.json | 52 +++++++++++++++++++
tests/test_prompt_tokenizers.py | 26 ++++++++++
3 files changed, 105 insertions(+), 14 deletions(-)
create mode 100644 tests/fixtures/conversation.missingturns.json
diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py
index 9553e3e8fe..a74da71da5 100644
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -358,10 +358,12 @@ def tokenize_prompt(self, prompt):
):
if isinstance(part, tuple):
if part[0] == "USER:":
- part = part[0] + part[1] if not user_token else part[1]
+ turn = part[0] + part[1] if not user_token else part[1]
# this is still the user query, we should
+ if not part[1].strip():
+ LOG.warning(f"user turn has empty text: {prompt}")
res = self._tokenize(
- part.strip(),
+ turn.strip(),
add_eos_token=False,
strip_bos_token=True,
)
@@ -371,10 +373,12 @@ def tokenize_prompt(self, prompt):
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
elif part[0] == "ASSISTANT:":
# TODO label assistant token/tokens w/ IGNORE_TOKEN_ID
- part = part[0] + part[1] if not assistant_token else part[1]
- # this should be the assistent response, should end with an eos token
+ turn = part[0] + part[1] if not assistant_token else part[1]
+ # this should be the assistant response, should end with an eos token
+ if not part[1].strip():
+ LOG.warning(f"assistant turn has empty text: {prompt}")
res = self._tokenize(
- part.strip(),
+ turn.strip(),
add_eos_token=True,
strip_bos_token=True,
)
@@ -409,22 +413,31 @@ def tokenize_prompt(self, prompt):
raise InvalidDataException(str(err)) from err
def _tokenize(self, prompt, add_eos_token=True, strip_bos_token=False):
- result = self.tokenizer(
- prompt,
- truncation=True,
- max_length=self.sequence_len,
- padding=False,
- return_tensors=None,
- )
+ if not prompt.strip():
+ LOG.warning("Empty text requested for tokenization.")
+ result = BatchEncoding(data={"input_ids": [], "attention_mask": []})
+ else:
+ result = self.tokenizer(
+ prompt,
+ truncation=True,
+ max_length=self.sequence_len,
+ padding=False,
+ return_tensors=None,
+ )
if (
- result["input_ids"][-1] != self.tokenizer.eos_token_id
+ len(result["input_ids"]) > 0
+ and result["input_ids"][-1] != self.tokenizer.eos_token_id
and len(result["input_ids"]) < self.sequence_len
and add_eos_token
):
result["input_ids"].append(self.tokenizer.eos_token_id)
result["attention_mask"].append(1)
- if result["input_ids"][0] == self.tokenizer.bos_token_id and strip_bos_token:
+ if (
+ len(result["input_ids"]) > 0
+ and result["input_ids"][0] == self.tokenizer.bos_token_id
+ and strip_bos_token
+ ):
result["input_ids"] = result["input_ids"][1:]
result["attention_mask"] = result["attention_mask"][1:]
diff --git a/tests/fixtures/conversation.missingturns.json b/tests/fixtures/conversation.missingturns.json
new file mode 100644
index 0000000000..b5b962d859
--- /dev/null
+++ b/tests/fixtures/conversation.missingturns.json
@@ -0,0 +1,52 @@
+{
+ "conversations": [
+ {
+ "from": "human",
+ "value": "how could i implement a minesweeper algorithm that utilises algebraic topology to solve boards?"
+ },
+ {
+ "from": "gpt",
+ "value": ""
+ },
+ {
+ "from": "human",
+ "value": "could you give me the gist of how it could though?"
+ },
+ {
+ "from": "gpt",
+ "value": "Sure. The basic idea would be to use algebraic topology to analyze the structure of the Minesweeper game board and determine the locations of mines based on certain topological invariants. This could involve representing the game board as a topological space and using topological invariants such as homotopy groups and Betti numbers to infer information about the location of mines on the board. However, as I mentioned before, implementing this would require a deep understanding of both algebraic topology and the game of Minesweeper, so it's not something that can be explained in a simple answer."
+ },
+ {
+ "from": "human",
+ "value": "give me a specific example"
+ },
+ {
+ "from": "gpt",
+ "value": "As a specific example, suppose we have a Minesweeper game board with the following configuration:\n```\n 1 1 1 1 1\n 1 ? ? ? 1\n 1 ? ? ? 1\n 1 ? ? ? 1\n 1 1 1 1 1\n```\nIn this case, the game board can be represented as a topological space, where each square on the board corresponds to a point in the space. The mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space. Using algebraic topology, we can analyze the structure of this space and determine the locations of the mines based on certain topological invariants.\n\nFor instance, one way to do this is to compute the homotopy groups of the space. Homotopy groups are a way of describing the holes in a space, and they can be used to infer information about the locations of the mines on the Minesweeper board. In the case of the board above, we can compute the first homotopy group of the space, which describes the number and \"shape\" of the holes in the space. In this case, the first homotopy group would tell us that there are four holes in the space, arranged in a square shape.\n\nBased on this information, we can infer that the mines in the game are located in the four corners of the board, as shown below:\n```\n 1 1 1 1 1\n 1 M ? ? 1\n 1 ? ? ? 1\n 1 ? ? M 1\n 1 1 1 1 1\n```\nOf course, this is just one example of how algebraic topology could be used to solve Minesweeper. There are many other possible approaches, and the specific details of how to implement them would depend on the specific game board and the topological invariants being used. As I mentioned before, implementing a solution like this would require a deep understanding of both algebraic topology and the game of Minesweeper."
+ },
+ {
+ "from": "human",
+ "value": "assuming i was someone with a deep understanding of both, could you explain the same concept to me with that assumption?"
+ },
+ {
+ "from": "gpt",
+ "value": "Sure. Assuming you have a deep understanding of both algebraic topology and the game of Minesweeper, here is how you might use algebraic topology to solve a Minesweeper game board:\n\n1. Represent the Minesweeper game board as a topological space. Each square on the board corresponds to a point in the space, and the mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space.\n2. Use topological invariants to infer information about the locations of the mines on the board. There are many possible topological invariants that could be used for this, such as homotopy groups, Betti numbers, or persistent homology.\n3. Use the information inferred from the topological invariants to determine the locations of the mines on the board. For example, if the first homotopy group of the space indicates that there are four holes in the space, arranged in a square shape, then we can infer that the mines are located in the four corners of the board.\n4. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you can use this information to avoid them and clear the board.\n\nOf course, this is just one possible approach to using algebraic topology to solve Minesweeper. There may be other ways to do it, depending on the specific game board and the topological invariants being used. However, this should give you a general idea of how algebraic topology can be used to solve Minesweeper."
+ },
+ {
+ "from": "human",
+ "value": "how would i do it with persistent homology or any of the other topological invariants?"
+ },
+ {
+ "from": "gpt",
+ "value": "To use persistent homology or any of the other topological invariants to solve a Minesweeper game board, you would follow the same basic steps as described above, but with some differences in the specific details.\n\nFor instance, here is how you might use persistent homology to solve a Minesweeper game board:\n\n1. Represent the Minesweeper game board as a topological space. Each square on the board corresponds to a point in the space, and the mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space.\n2. Compute the persistent homology of the space. Persistent homology is a way of describing the holes in a space over time, and it can be used to infer information about the locations of the mines on the Minesweeper board.\n3. Use the information from the persistent homology to determine the locations of the mines on the board. For example, if the persistent homology indicates that there are four holes in the space, arranged in a square shape, then we can infer that the mines are located in the four corners of the board.\n4. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you can use this information to avoid them and clear the board.\n\nOf course, this is just one possible approach to using persistent homology to solve Minesweeper. There may be other ways to do it, depending on the specific game board and the specific details of the persistent homology computation. However, this should give you a general idea of how persistent homology can be used to solve Minesweeper."
+ },
+ {
+ "from": "human",
+ "value": "how could i implement this in python?"
+ },
+ {
+ "from": "gpt",
+ "value": "To implement a Minesweeper solver that uses algebraic topology in Python, you would need to do the following:\n\n1. Install a Python package for algebraic topology. There are several options available, such as Gudhi, Dionysus, or scikit-tda. These packages provide algorithms and data structures for computing topological invariants, such as homotopy groups, Betti numbers, or persistent homology.\n2. Represent the Minesweeper game board as a topological space. This could be done using a data structure provided by the algebraic topology package, such as a simplicial complex or a Vietoris-Rips complex. Each square on the board would correspond to a point in the space, and the mines on the board would be represented as \"holes\" in the space.\n3. Compute the topological invariants of the space. This could be done using algorithms provided by the algebraic topology package. For example, you could compute the persistent homology of the space using the persistent homology algorithm.\n4. Use the information from the topological invariants to determine the locations of the mines on the board. This could be done using a combination of mathematical reasoning and programming. For example, if the persistent homology indicates that there are four holes in the space, arranged in a square shape, then you could use a piece of code to infer that the mines are located in the four corners of the board.\n5. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you could use a piece of code to automatically click on the safe squares and clear the board.\n\nOf course, this is just one possible approach to implementing a Minesweeper solver that uses algebraic topology in Python. There may be other ways to do it, depending on the specific details of the implementation. However, this should give you a general idea of how to get started with such a project."
+ }
+ ]
+}
diff --git a/tests/test_prompt_tokenizers.py b/tests/test_prompt_tokenizers.py
index 1dd511f6b8..a455d8b513 100644
--- a/tests/test_prompt_tokenizers.py
+++ b/tests/test_prompt_tokenizers.py
@@ -3,7 +3,9 @@
import logging
import unittest
from pathlib import Path
+from typing import Optional
+import pytest
from transformers import AutoTokenizer, LlamaTokenizer
from axolotl.prompt_strategies.alpaca_chat import NoSystemPrompter
@@ -29,6 +31,12 @@ class TestPromptTokenizationStrategies(unittest.TestCase):
Test class for prompt tokenization strategies.
"""
+ _caplog: Optional[pytest.LogCaptureFixture] = None
+
+ @pytest.fixture(autouse=True)
+ def inject_fixtures(self, caplog):
+ self._caplog = caplog
+
def setUp(self) -> None:
# pylint: disable=duplicate-code
self.tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
@@ -64,6 +72,24 @@ def test_sharegpt_integration(self):
self.assertEqual(len(example[fields]), len(tokenized_conversation[fields]))
self.assertEqual(example[fields], tokenized_conversation[fields])
+ def test_sharegpt_warnings_integration(self):
+ with open(
+ Path(__file__).parent / "fixtures/conversation.missingturns.json",
+ encoding="utf-8",
+ ) as fin:
+ data = fin.read()
+ conversation = json.loads(data)
+ prompter = ShareGPTPrompter("chat")
+ strat = ShareGPTPromptTokenizingStrategy(
+ prompter,
+ self.tokenizer,
+ False,
+ 2048,
+ )
+ with self._caplog.at_level(logging.WARNING):
+ strat.tokenize_prompt(conversation)
+ assert "assistant turn has empty text" in self._caplog.records[1].message
+
def test_no_sys_prompt(self):
"""
tests the interface between the user and assistant parts
From 923eb913047824897d7f58722c8940ddbd82c847 Mon Sep 17 00:00:00 2001
From: Maxime <672982+maximegmd@users.noreply.github.com>
Date: Fri, 22 Sep 2023 22:17:50 +0200
Subject: [PATCH 15/77] tweak: improve base builder for smaller layers (#500)
---
docker/Dockerfile-base | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
index d1c02e69d8..5d1e2c2b89 100644
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -13,16 +13,14 @@ ARG CUDA="118"
ENV PYTHON_VERSION=$PYTHON_VERSION
-RUN apt-get update
-RUN apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/*
-
-RUN wget \
+RUN apt-get update \
+ && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/*
+ && wget \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& mkdir /root/.conda \
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
- && rm -f Miniconda3-latest-Linux-x86_64.sh
-
-RUN conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}"
+ && rm -f Miniconda3-latest-Linux-x86_64.sh \
+ && conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}"
ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
From 67b98886306aba93232cb8766f95126566c23a9d Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Sat, 23 Sep 2023 13:11:27 +0900
Subject: [PATCH 16/77] Feat(doc): Add eval_sample_packing to doc (#625)
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index 6e91158934..6ec81eed3c 100644
--- a/README.md
+++ b/README.md
@@ -493,6 +493,8 @@ pad_to_sequence_len:
max_packed_sequence_len: 1024
# use efficient multi-packing with block diagonal attention and per sequence position_ids. Recommend set to 'true'
sample_packing:
+# set to 'false' if getting errors during eval with sample_packing on.
+eval_sample_packing:
# you can set these packing optimizations AFTER starting a training at least once.
# The trainer will provide recommended values for these values.
sample_packing_eff_est:
From 4fecbfe5e1cb8649d1c8cf741aec897a8677389c Mon Sep 17 00:00:00 2001
From: mhenrichsen
Date: Sun, 24 Sep 2023 18:52:53 +0200
Subject: [PATCH 17/77] default model changed
---
examples/llama-2/lora.yml | 4 ++--
examples/llama-2/qlora.yml | 4 ++--
examples/llama-2/relora.yml | 4 ++--
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 4fdcb04092..3134648fdb 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -1,5 +1,5 @@
-base_model: meta-llama/Llama-2-7b-hf
-base_model_config: meta-llama/Llama-2-7b-hf
+base_model: NousResearch/Llama-2-7b-hf
+base_model_config: NousResearch/Llama-2-7b-hf
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index ef20d9fbe3..54255320cc 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -1,5 +1,5 @@
-base_model: meta-llama/Llama-2-7b-hf
-base_model_config: meta-llama/Llama-2-7b-hf
+base_model: NousResearch/Llama-2-7b-hf
+base_model_config: NousResearch/Llama-2-7b-hf
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index b59a7da04c..2b1fad55d8 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -1,5 +1,5 @@
-base_model: meta-llama/Llama-2-7b-hf
-base_model_config: meta-llama/Llama-2-7b-hf
+base_model: NousResearch/Llama-2-7b-hf
+base_model_config: NousResearch/Llama-2-7b-hf
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true
From cfbce020e97986a785a7371e18c5e7f7c58bae5a Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Mon, 25 Sep 2023 13:48:18 +0900
Subject: [PATCH 18/77] Fix: Fail bf16 check when running on cpu during merge
(#631)
---
src/axolotl/utils/config.py | 2 +-
tests/test_validation.py | 23 +++++++++++++++++++++++
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index 1c0487ff8e..cb18380cb7 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -94,7 +94,7 @@ def validate_config(cfg):
if not cfg.bf16 and not cfg.bfloat16:
LOG.info("bf16 support detected, but not enabled for this configuration.")
else:
- if cfg.bf16 or cfg.bfloat16:
+ if not cfg.merge_lora and (cfg.bf16 or cfg.bfloat16):
raise ValueError(
"bf16 requested, but AMP is not supported on this GPU. Requires Ampere series or above."
)
diff --git a/tests/test_validation.py b/tests/test_validation.py
index f250e5cb47..d7935c1a54 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -351,3 +351,26 @@ def test_packing(self):
regex_exp = r".*set only one of max_packed_sequence_len \(deprecated soon\) or sample_packing.*"
with pytest.raises(ValueError, match=regex_exp):
validate_config(cfg)
+
+ def test_merge_lora_no_bf16_fail(self):
+ """
+ This is assumed to be run on a CPU machine, so bf16 is not supported.
+ """
+
+ cfg = DictDefault(
+ {
+ "bf16": True,
+ }
+ )
+
+ with pytest.raises(ValueError, match=r".*AMP is not supported on this GPU*"):
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "bf16": True,
+ "merge_lora": True,
+ }
+ )
+
+ validate_config(cfg)
From 5e5296a77cf185418d9d8f8f4e4b01fdf394d0ce Mon Sep 17 00:00:00 2001
From: Fernando Tarin Morales
Date: Tue, 26 Sep 2023 00:50:14 +0900
Subject: [PATCH 19/77] Added quotes to the pip install -e command to fix an
incompatibility with shells that do glob expansion like zsh (#632)
---
README.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 6ec81eed3c..25044a2361 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ git clone https://github.com/OpenAccess-AI-Collective/axolotl
cd axolotl
pip3 install packaging
-pip3 install -e .[flash-attn,deepspeed]
+pip3 install -e '.[flash-attn,deepspeed]'
pip3 install -U git+https://github.com/huggingface/peft.git
# finetune lora
@@ -122,7 +122,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
3. Install axolotl along with python dependencies
```bash
pip3 install packaging
- pip3 install -e .[flash-attn,deepspeed]
+ pip3 install -e '.[flash-attn,deepspeed]'
```
- LambdaLabs
@@ -158,7 +158,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
cd axolotl
pip3 install packaging
- pip3 install -e .[flash-attn,deepspeed]
+ pip3 install -e '.[flash-attn,deepspeed]'
pip3 install protobuf==3.20.3
pip3 install -U --ignore-installed requests Pillow psutil scipy
```
From 19a600a8b859c40cf4c3749b0b8a3db17b82a0c0 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Tue, 26 Sep 2023 22:53:28 +0900
Subject: [PATCH 20/77] Feat: Add support for upstream FA2 (#626)
* Feat: Add support for upstream FA2
* chore: add is_falcon_derived_model: true to examples
* chore: add config to readme for documentation
* feat: add extra model types
* fix: remove old falcon flash patch
* chore: pin transformers and accelerate
---
README.md | 4 +
examples/falcon/config-7b-lora.yml | 1 +
examples/falcon/config-7b-qlora.yml | 1 +
examples/falcon/config-7b.yml | 1 +
requirements.txt | 4 +-
.../monkeypatch/falcon_attn_hijack_flash.py | 101 ------------------
src/axolotl/utils/config.py | 16 +++
src/axolotl/utils/models.py | 20 ++--
8 files changed, 31 insertions(+), 117 deletions(-)
delete mode 100644 src/axolotl/monkeypatch/falcon_attn_hijack_flash.py
diff --git a/README.md b/README.md
index 25044a2361..c2d4d8ef8c 100644
--- a/README.md
+++ b/README.md
@@ -408,6 +408,10 @@ tokenizer_legacy:
# this is reported to improve training speed on some models
resize_token_embeddings_to_32x:
+# used to identify if the model is falcon/llama based
+is_falcon_derived_model:
+is_llama_derived_model:
+
# whether you are training a 4-bit GPTQ quantized model
gptq: true
gptq_groupsize: 128 # group size
diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
index a5cbdc00df..738068a474 100644
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -3,6 +3,7 @@ base_model_config: tiiuae/falcon-7b
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
+is_falcon_derived_model: true
load_in_8bit: true
load_in_4bit: false
gptq: false
diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
index 72b09b87d9..554081fcba 100644
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -6,6 +6,7 @@ base_model_config: tiiuae/falcon-7b
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
+is_falcon_derived_model: true
load_in_8bit: false
# enable 4bit for QLoRA
load_in_4bit: true
diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
index 46f4caff15..25e67a53b1 100644
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -3,6 +3,7 @@ base_model_config: tiiuae/falcon-7b
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
+is_falcon_derived_model: true
load_in_8bit: false
load_in_4bit: false
gptq: false
diff --git a/requirements.txt b/requirements.txt
index 5aba20b161..33a2157d96 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,9 +4,9 @@ torch==2.0.1
auto-gptq
packaging
peft @ git+https://github.com/huggingface/peft.git
-transformers @ git+https://github.com/huggingface/transformers.git
+transformers @ git+https://github.com/huggingface/transformers.git@0ac3875011d32dc85e0e83970507e3afe8f0febb
bitsandbytes>=0.41.1
-accelerate @ git+https://github.com/huggingface/accelerate
+accelerate @ git+https://github.com/huggingface/accelerate@80da9cfb09bb3cc9f1b385cb55d6b90d025a5fd9
deepspeed
addict
evaluate
diff --git a/src/axolotl/monkeypatch/falcon_attn_hijack_flash.py b/src/axolotl/monkeypatch/falcon_attn_hijack_flash.py
deleted file mode 100644
index ed11c55234..0000000000
--- a/src/axolotl/monkeypatch/falcon_attn_hijack_flash.py
+++ /dev/null
@@ -1,101 +0,0 @@
-"""
-Flash Attention monkey patch for Falcon
-
-copied from https://github.com/pacman100/DHS-LLM-Workshop/blob/main/chat_assistant/training/falcon_flash_attn_monkey_patch.py
-"""
-
-from typing import Optional, Tuple
-
-import torch
-import transformers
-from flash_attn import flash_attn_func
-
-
-def forward(
- self,
- hidden_states: torch.Tensor,
- alibi: Optional[torch.Tensor],
- attention_mask: torch.Tensor, # pylint: disable=unused-argument
- layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
- head_mask: Optional[torch.Tensor] = None, # pylint: disable=unused-argument
- use_cache: bool = False,
- output_attentions: bool = False, # pylint: disable=unused-argument
-):
- fused_qkv = self.query_key_value(
- hidden_states
- ) # [batch_size, seq_length, 3 x hidden_size]
- num_kv_heads = (
- self.num_heads if self.new_decoder_architecture else self.num_kv_heads
- )
- # 3 x [batch_size, seq_length, num_heads, head_dim]
- (
- query_layer,
- key_layer,
- value_layer,
- ) = self._split_heads( # pylint: disable=protected-access
- fused_qkv
- )
-
- batch_size, query_length, _, _ = query_layer.shape
-
- query_layer = query_layer.transpose(1, 2).reshape(
- batch_size * self.num_heads, query_length, self.head_dim
- )
- key_layer = key_layer.transpose(1, 2).reshape(
- batch_size * num_kv_heads,
- query_length,
- self.head_dim,
- )
- value_layer = value_layer.transpose(1, 2).reshape(
- batch_size * num_kv_heads, query_length, self.head_dim
- )
-
- past_kv_length = 0 if layer_past is None else layer_past[0].shape[1]
- query_layer, key_layer = self.maybe_rotary(query_layer, key_layer, past_kv_length)
-
- if layer_past is not None:
- past_key, past_value = layer_past
- # concatenate along seq_length dimension:
- # - key: [batch_size * self.num_heads, kv_length, head_dim]
- # - value: [batch_size * self.num_heads, kv_length, head_dim]
- key_layer = torch.cat((past_key, key_layer), dim=1)
- value_layer = torch.cat((past_value, value_layer), dim=1)
-
- # unused
- # _, kv_length, _ = key_layer.shape
- if use_cache:
- present = (key_layer, value_layer)
- else:
- present = None
- # unused
- # attention_mask_float = (attention_mask * 1.0).masked_fill(attention_mask, float("-1e9")).to(query_layer.dtype)
- query_layer_ = (
- query_layer.reshape(batch_size, self.num_heads, -1, self.head_dim)
- .transpose(1, 2)
- .to(torch.bfloat16)
- )
- key_layer_ = (
- key_layer.reshape(batch_size, num_kv_heads, -1, self.head_dim)
- .transpose(1, 2)
- .to(torch.bfloat16)
- )
- value_layer_ = (
- value_layer.reshape(batch_size, num_kv_heads, -1, self.head_dim)
- .transpose(1, 2)
- .to(torch.bfloat16)
- )
-
- if alibi is not None:
- raise ValueError("`alibi` is not supported when `use_flash_attn` is True")
-
- # below output will have shape (batch_size, seqlen, nheads, headdim)
- attn_output = flash_attn_func(query_layer_, key_layer_, value_layer_, causal=True)
- attn_output = attn_output.reshape(
- batch_size, query_length, self.num_heads * self.head_dim
- )
- output_tensor = self.dense(attn_output)
- return output_tensor, present
-
-
-def replace_falcon_attn_with_flash_attn():
- transformers.models.falcon.modeling_falcon.FalconAttention.forward = forward
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index cb18380cb7..3a574cefcc 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -86,6 +86,22 @@ def normalize_config(cfg):
or (cfg.model_type and "llama" in cfg.model_type.lower())
)
+ # figure out if the model is falcon
+ cfg.is_falcon_derived_model = (
+ (
+ hasattr(model_config, "model_type")
+ and model_config.model_type
+ in [
+ "falcon",
+ "RefinedWebModel",
+ "RefinedWeb",
+ ]
+ )
+ or cfg.is_falcon_derived_model
+ or "falcon" in cfg.base_model
+ or (cfg.model_type and "rwforcausallm" in cfg.model_type.lower())
+ )
+
log_gpu_memory_usage(LOG, "baseline", cfg.device)
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 543a0e1a13..361440931f 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -114,25 +114,13 @@ def load_model(
replace_btlm_attn_with_flash_attn(cfg.base_model)
- if hasattr(model_config, "model_type") and model_config.model_type in [
- "falcon",
- "RefinedWebModel",
- "RefinedWeb",
- ]:
- if cfg.flash_attention:
- from axolotl.monkeypatch.falcon_attn_hijack_flash import (
- replace_falcon_attn_with_flash_attn,
- )
-
- replace_falcon_attn_with_flash_attn()
-
- if cfg.is_llama_derived_model and cfg.flash_attention:
+ if cfg.is_llama_derived_model and cfg.flash_attention and cfg.sample_packing:
if cfg.device not in ["mps", "cpu"] and not inference:
from axolotl.monkeypatch.llama_attn_hijack_flash import (
replace_llama_attn_with_flash_attn,
)
- LOG.info("patching with flash attention")
+ LOG.info("patching with flash attention for sample packing")
replace_llama_attn_with_flash_attn(packed=cfg.sample_packing)
elif cfg.is_llama_derived_model and cfg.xformers_attention:
from axolotl.monkeypatch.llama_attn_hijack_xformers import (
@@ -213,6 +201,10 @@ def load_model(
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
)
+ # sample packing uses custom FA2 patch
+ if cfg.flash_attention and not cfg.sample_packing:
+ if cfg.is_llama_derived_model or cfg.is_falcon_derived_model:
+ model_kwargs["use_flash_attention_2"] = True
try:
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
from transformers import LlamaForCausalLM
From d887ad86c3bd04c2e2b87af16a29a3427634a0db Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Tue, 26 Sep 2023 10:13:20 -0400
Subject: [PATCH 21/77] eval_table isn't quite stable enough to be in default
llama configs (#637)
---
examples/llama-2/lora.yml | 2 +-
examples/llama-2/qlora.yml | 2 +-
examples/llama-2/tiny-llama.yml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 3134648fdb..106de6e320 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -56,7 +56,7 @@ flash_attention: true
warmup_steps: 10
eval_steps: 20
-eval_table_size: 5
+eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
debug:
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index 54255320cc..870c3fa848 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -58,7 +58,7 @@ flash_attention: true
warmup_steps: 10
eval_steps: 20
-eval_table_size: 5
+eval_table_size:
save_steps:
debug:
deepspeed:
diff --git a/examples/llama-2/tiny-llama.yml b/examples/llama-2/tiny-llama.yml
index a53c9c831b..d556a6d719 100644
--- a/examples/llama-2/tiny-llama.yml
+++ b/examples/llama-2/tiny-llama.yml
@@ -56,7 +56,7 @@ flash_attention: true
warmup_steps: 10
eval_steps: 20
-eval_table_size: 5
+eval_table_size:
save_steps:
debug:
deepspeed:
From e8cbf50be698413f205bd7a603c01e2aa1d83231 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Wed, 27 Sep 2023 11:12:08 -0400
Subject: [PATCH 22/77] attention_mask not needed for training (#642)
* attention_mask not needed for training
* specifically don't use attention mask for phi
* use a different check for phi
* small fixes since phi removed some values from their config
---
src/axolotl/models/phi/modeling_mixformer_sequential.py | 8 ++------
src/axolotl/utils/data.py | 2 +-
src/axolotl/utils/trainer.py | 9 ++++++++-
3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/src/axolotl/models/phi/modeling_mixformer_sequential.py b/src/axolotl/models/phi/modeling_mixformer_sequential.py
index 27bede5f01..fd2ec054c5 100644
--- a/src/axolotl/models/phi/modeling_mixformer_sequential.py
+++ b/src/axolotl/models/phi/modeling_mixformer_sequential.py
@@ -711,12 +711,8 @@ def __init__(
self.resid_dropout = nn.Dropout(config.resid_pdrop)
self.block_idx = block_idx
- self.mixer = MHA(config=config, **mixer, layer_idx=block_idx)
- mlp_cls = mlp.pop("mlp_cls")
- if mlp_cls == "fused_mlp":
- self.mlp = FusedMLP(config=config, **mlp)
- else:
- self.mlp = MLP(config=config, **mlp)
+ self.mixer = MHA(config, layer_idx=block_idx)
+ self.mlp = MLP(config)
def forward(
self,
diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
index 2dc2d82b28..9792371c7c 100644
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -76,7 +76,7 @@ def prepare_dataset(cfg, tokenizer):
with zero_first(is_main_process()):
train_dataset, eval_dataset = process_datasets_for_packing(
- cfg, train_dataset, eval_dataset
+ cfg, train_dataset, eval_dataset, tokenizer
)
if cfg.max_steps:
total_num_steps = min(
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index a2657e05ab..aee2a1b99e 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -397,7 +397,7 @@ def disable_datasets_caching():
set_caching_enabled(True)
-def process_datasets_for_packing(cfg, train_dataset, eval_dataset):
+def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer):
drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len)
with zero_first(is_main_process()):
train_dataset = train_dataset.filter(drop_long, num_proc=os.cpu_count())
@@ -414,6 +414,13 @@ def process_datasets_for_packing(cfg, train_dataset, eval_dataset):
eval_dataset = eval_dataset.map(
add_position_ids, num_proc=os.cpu_count()
)
+
+ # Phi doesn't want the attention_mask feature when training
+ if "CodeGenTokenizer" in tokenizer.__class__.__name__:
+ train_dataset = train_dataset.remove_columns("attention_mask")
+ if eval_dataset:
+ eval_dataset = eval_dataset.remove_columns("attention_mask")
+
return train_dataset, eval_dataset
From 60c7c48c97a380aefa558bbb39c7108bd5130581 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Wed, 27 Sep 2023 12:10:32 -0400
Subject: [PATCH 23/77] update for recent transformers updates (#636)
* update for recent transformers updates
* fix checkpoint forward kwargs
* just pass args into torch checkpoint
---
.../monkeypatch/llama_attn_hijack_flash.py | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
index d172d302d9..97f0477649 100644
--- a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
@@ -99,6 +99,7 @@ def flashattn_forward(
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
+ padding_mask: Optional[torch.LongTensor] = None, # pylint: disable=unused-argument
cu_seqlens: Optional[torch.Tensor] = None,
max_seqlen: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
@@ -476,6 +477,13 @@ def llama_model_forward(
dtype=torch.bool,
device=inputs_embeds.device,
)
+ padding_mask = None
+ else:
+ if 0 in attention_mask:
+ padding_mask = attention_mask
+ else:
+ padding_mask = None
+
attention_mask = (
self._prepare_decoder_attention_mask( # pylint: disable=protected-access
attention_mask,
@@ -510,7 +518,9 @@ def llama_model_forward(
def create_custom_forward(module):
def custom_forward(*inputs):
# None for past_key_value
- return module(*inputs)
+ return module(
+ *inputs,
+ )
return custom_forward
@@ -519,9 +529,10 @@ def custom_forward(*inputs):
hidden_states,
attention_mask,
position_ids,
- None,
+ past_key_value,
output_attentions,
None,
+ padding_mask,
cu_seqlens,
max_seqlen,
)
@@ -533,6 +544,7 @@ def custom_forward(*inputs):
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
+ padding_mask=padding_mask,
cu_seqlens=cu_seqlens,
max_seqlen=max_seqlen,
)
@@ -579,6 +591,7 @@ def forward(
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
+ padding_mask: Optional[torch.LongTensor] = None,
cu_seqlens: Optional[torch.Tensor] = None,
max_seqlen: Optional[torch.Tensor] = None,
) -> Tuple[
@@ -611,6 +624,7 @@ def forward(
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
+ padding_mask=padding_mask,
cu_seqlens=cu_seqlens,
max_seqlen=max_seqlen,
)
From e7d3e2dbb6784d3bb7ebdf2e6c48ed458ed5a184 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Wed, 27 Sep 2023 12:10:45 -0400
Subject: [PATCH 24/77] use fastchat conversations template (#578)
* use fastchat conversations template
* require fastchat (fschat) pip install
* handle roles dynamically from conversation
* tweak fastchat conversation with a monkeypatch to get individual turns
* fix up so it works with multiple conversation styles, and don't strip the turns
* fix sharegpt fixture now that we're using a more correct tokenization
* use a new prompter and support fastchat conversation type
* use sharegpt from prompt strategies now
* update docs, add chatml template
* add a newline after im_end token
* ensure we correctly set system message
* update per PR feedback to handle deprecated sharegpt types
* don't add duplicate wandb req
* make sharegpt fields configurable from yml
* llama2 fixes
* don't fail fatally when turns are improper
---
README.md | 7 +-
requirements.txt | 1 +
.../fastchat_conversation_turns.py | 174 ++++++++++++++++++
.../{sharegpt_simple.py => sharegpt.py} | 33 +++-
.../prompt_strategies/sharegpt_jokes.py | 4 +-
src/axolotl/prompt_tokenizers.py | 24 ++-
src/axolotl/prompters.py | 131 +++++--------
src/axolotl/utils/config.py | 19 ++
src/axolotl/utils/data.py | 11 --
src/axolotl/utils/tokenization.py | 1 +
tests/fixtures/conversation.tokenized.json | 2 +-
tests/test_prompt_tokenizers.py | 6 +-
tests/test_validation.py | 23 +++
13 files changed, 324 insertions(+), 112 deletions(-)
create mode 100644 src/axolotl/monkeypatch/fastchat_conversation_turns.py
rename src/axolotl/prompt_strategies/{sharegpt_simple.py => sharegpt.py} (63%)
diff --git a/README.md b/README.md
index c2d4d8ef8c..3b3bc9985a 100644
--- a/README.md
+++ b/README.md
@@ -180,7 +180,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
```json
{"instruction": "...", "input": "...", "output": "..."}
```
-- `sharegpt:chat`: conversations where `from` is `human`/`gpt`
+- `sharegpt`: conversations where `from` is `human`/`gpt`
```json
{"conversations": [{"from": "...", "value": "..."}]}
```
@@ -269,11 +269,11 @@ Have dataset(s) in one of the following format (JSONL recommended):
```json
{"prompt": "...", "generation": "..."}
```
-- `sharegpt_simple.load_role`: conversations where `role` is used instead of `from`
+- `sharegpt.load_role`: conversations where `role` is used instead of `from`
```json
{"conversations": [{"role": "...", "value": "..."}]}
```
-- `sharegpt_simple.load_guanaco`: conversations where `from` is `prompter`/`assistant` instead of default sharegpt
+- `sharegpt.load_guanaco`: conversations where `from` is `prompter`/`assistant` instead of default sharegpt
```json
{"conversations": [{"from": "...", "value": "..."}]}
```
@@ -443,6 +443,7 @@ datasets:
data_files: # Optional[str] path to source data files
shards: # Optional[int] number of shards to split data into
name: # Optional[str] name of dataset configuration to load
+ conversation: # Optional[str] fastchat conversation type, only used with type: sharegpt
# custom user prompt
- path: repo
diff --git a/requirements.txt b/requirements.txt
index 33a2157d96..7616d1fb0a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,3 +31,4 @@ scipy
scikit-learn==1.2.2
pynvml
art
+fschat==0.2.29
diff --git a/src/axolotl/monkeypatch/fastchat_conversation_turns.py b/src/axolotl/monkeypatch/fastchat_conversation_turns.py
new file mode 100644
index 0000000000..19313fb7e2
--- /dev/null
+++ b/src/axolotl/monkeypatch/fastchat_conversation_turns.py
@@ -0,0 +1,174 @@
+"""
+monkeypatch to add a get_turns method
+"""
+
+import logging
+from typing import Generator, Tuple
+
+from fastchat.conversation import SeparatorStyle
+
+LOG = logging.getLogger("axolotl.monkeypatch.fastchat_conversation_turns")
+
+
+def get_prompt(self) -> str:
+ ret = ""
+ for role, msg in self.get_turns():
+ ret += role + msg
+ return ret
+
+
+def get_turns( # pylint: disable=too-many-return-statements
+ self,
+) -> Generator[Tuple[str, str], None, None]:
+ """Get the prompt for generation."""
+ system_prompt = self.system_template.format(system_message=self.system_message)
+ if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
+ yield "", system_prompt + self.sep
+ for role, message in self.messages:
+ if message:
+ yield role + ": ", message + self.sep
+ else:
+ yield role + ":", ""
+ return
+ if self.sep_style == SeparatorStyle.ADD_COLON_TWO:
+ seps = [self.sep, self.sep2]
+ yield "", system_prompt + seps[0]
+ for i, (role, message) in enumerate(self.messages):
+ if message:
+ yield role + ": ", message + seps[i % 2]
+ else:
+ yield role + ":", ""
+ return
+ if self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
+ yield "", system_prompt + self.sep
+ for role, message in self.messages:
+ if message:
+ yield role + ": ", message + self.sep
+ else:
+ yield role + ": ", "" # must be end with a space
+ return
+ if self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
+ yield "", "" if system_prompt == "" else system_prompt + self.sep
+ for role, message in self.messages:
+ if message:
+ yield role + "\n", message + self.sep
+ else:
+ yield role + "\n", ""
+ return
+ if self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
+ yield "", system_prompt
+ for role, message in self.messages:
+ if message:
+ yield role, message + self.sep
+ else:
+ yield role, ""
+ return
+ if self.sep_style == SeparatorStyle.NO_COLON_TWO:
+ seps = [self.sep, self.sep2]
+ yield "", system_prompt
+ for i, (role, message) in enumerate(self.messages):
+ if message:
+ yield role, message + seps[i % 2]
+ else:
+ yield role, ""
+ return
+ if self.sep_style == SeparatorStyle.RWKV:
+ yield "", system_prompt
+ for i, (role, message) in enumerate(self.messages):
+ if message:
+ yield role + ": ", message.replace("\r\n", "\n").replace(
+ "\n\n", "\n"
+ ) + "\n\n"
+ else:
+ yield role + ":", ""
+ return
+ if self.sep_style == SeparatorStyle.LLAMA2:
+ seps = [self.sep, self.sep2]
+ if self.system_message:
+ yield "", system_prompt
+ else:
+ yield "", "[INST] "
+ for i, (role, message) in enumerate(self.messages[1:]):
+ if message:
+ yield role + " ", message + seps[i % 2]
+ else:
+ yield role, ""
+ return
+ if self.sep_style == SeparatorStyle.CHATGLM:
+ # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
+ # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
+ round_add_n = 1 if self.name == "chatglm2" else 0
+ if system_prompt:
+ yield "", system_prompt + self.sep
+
+ for i, (role, message) in enumerate(self.messages):
+ if i % 2 == 0:
+ yield "", f"[Round {i//2 + round_add_n}]{self.sep}"
+
+ if message:
+ yield f"{role}:", f"{message}{self.sep}"
+ else:
+ yield f"{role}:", ""
+ return
+ if self.sep_style == SeparatorStyle.CHATML:
+ yield "", "" if system_prompt == "" else system_prompt + self.sep + "\n"
+ for role, message in self.messages:
+ if message:
+ yield role + "\n", message + self.sep + "\n"
+ else:
+ yield role + "\n", ""
+ return
+ if self.sep_style == SeparatorStyle.CHATINTERN:
+ # source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
+ seps = [self.sep, self.sep2]
+ yield "", system_prompt
+ for i, (role, message) in enumerate(self.messages):
+ prefix = "" if i % 2 == 0 else ""
+ if message:
+ yield prefix + role + ":", message + seps[i % 2] + "\n"
+ else:
+ yield role + ":", ""
+ return
+ if self.sep_style == SeparatorStyle.DOLLY:
+ seps = [self.sep, self.sep2]
+ yield "", system_prompt
+ for i, (role, message) in enumerate(self.messages):
+ if message:
+ suffix = "\n\n" if i % 2 == 1 else ""
+ yield role + ":\n", message + seps[i % 2] + suffix
+ else:
+ yield role + ":\n", ""
+ return
+ if self.sep_style == SeparatorStyle.PHOENIX:
+ yield "", system_prompt
+ for role, message in self.messages:
+ if message:
+ yield role + ": ", "" + message + ""
+ else:
+ yield role + ": " + "", ""
+ return
+ if self.sep_style == SeparatorStyle.ROBIN:
+ yield "", system_prompt + self.sep
+ for role, message in self.messages:
+ if message:
+ yield role + ":\n", message + self.sep
+ else:
+ yield role + ":\n", ""
+ return
+ if self.sep_style == SeparatorStyle.FALCON_CHAT:
+ if self.system_message:
+ yield "", system_prompt + self.sep
+ for role, message in self.messages:
+ if message:
+ yield role + ": ", message + self.sep
+ else:
+ yield role + ":", ""
+ else:
+ raise ValueError(f"Invalid style: {self.sep_style}")
+
+
+def add_get_turns_to_conversation():
+ import fastchat.conversation
+
+ fastchat.conversation.Conversation.get_turns = get_turns
+ fastchat.conversation.Conversation.get_prompt = get_prompt
diff --git a/src/axolotl/prompt_strategies/sharegpt_simple.py b/src/axolotl/prompt_strategies/sharegpt.py
similarity index 63%
rename from src/axolotl/prompt_strategies/sharegpt_simple.py
rename to src/axolotl/prompt_strategies/sharegpt.py
index bfe0d164bc..da36e778e0 100644
--- a/src/axolotl/prompt_strategies/sharegpt_simple.py
+++ b/src/axolotl/prompt_strategies/sharegpt.py
@@ -1,12 +1,35 @@
"""Module containing the SimpleShareGPTPromptTokenizingStrategy class"""
+from typing import Any, Dict, Optional
+
+from fastchat.conversation import Conversation, SeparatorStyle, register_conv_template
from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
-from axolotl.prompters import PromptStyle, ShareGPTPrompter
+from axolotl.prompters import ShareGPTPrompterV2
+
+register_conv_template(
+ Conversation(
+ name="chatml",
+ system_template="<|im_start|>system\n{system_message}",
+ system_message="You are a helpful assistant.",
+ roles=["<|im_start|>user", "<|im_start|>assistant"],
+ sep_style=SeparatorStyle.CHATML,
+ sep="<|im_end|>\n",
+ )
+)
-def load(tokenizer, cfg):
+def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
+ conversation = (
+ ds_cfg["conversation"] if ds_cfg and "conversation" in ds_cfg else None
+ )
+ field_human = ds_cfg["field_human"] if ds_cfg and "field_human" in ds_cfg else None
+ field_model = ds_cfg["field_model"] if ds_cfg and "field_model" in ds_cfg else None
return SimpleShareGPTPromptTokenizingStrategy(
- ShareGPTPrompter(PromptStyle.CHAT.value),
+ ShareGPTPrompterV2(
+ conversation=conversation,
+ role_key_model=field_model,
+ role_key_human=field_human,
+ ),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
@@ -15,7 +38,7 @@ def load(tokenizer, cfg):
def load_role(tokenizer, cfg):
return SimpleRoleShareGPTPromptTokenizingStrategy(
- ShareGPTPrompter(PromptStyle.CHAT.value),
+ ShareGPTPrompterV2(),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
@@ -24,7 +47,7 @@ def load_role(tokenizer, cfg):
def load_guanaco(tokenizer, cfg):
return GuanacoShareGPTPromptTokenizingStrategy(
- ShareGPTPrompter(PromptStyle.CHAT.value),
+ ShareGPTPrompterV2(),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
diff --git a/src/axolotl/prompt_strategies/sharegpt_jokes.py b/src/axolotl/prompt_strategies/sharegpt_jokes.py
index ac424bf7c7..404302c81e 100644
--- a/src/axolotl/prompt_strategies/sharegpt_jokes.py
+++ b/src/axolotl/prompt_strategies/sharegpt_jokes.py
@@ -1,11 +1,11 @@
"""Module for Jokes prompts using sharegpt style """
from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
-from axolotl.prompters import PromptStyle, ShareGPTPrompter
+from axolotl.prompters import ShareGPTPrompterV2
def load(tokenizer, cfg):
return SimpleJokesShareGPTPromptTokenizingStrategy(
- ShareGPTPrompter(PromptStyle.CHAT.value),
+ ShareGPTPrompterV2(),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py
index a74da71da5..31bb73c2f7 100644
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -6,8 +6,12 @@
import logging
from typing import Dict, List, Tuple, Union
+from fastchat.conversation import Conversation
from transformers import BatchEncoding, PreTrainedTokenizer
+from axolotl.monkeypatch.fastchat_conversation_turns import (
+ add_get_turns_to_conversation,
+)
from axolotl.prompters import IGNORE_TOKEN_ID
LOG = logging.getLogger("axolotl")
@@ -18,6 +22,8 @@
LLAMA_DEFAULT_BOS_TOKEN = "" # nosec
LLAMA_DEFAULT_UNK_TOKEN = "" # nosec
+add_get_turns_to_conversation()
+
class InvalidDataException(Exception):
"""
@@ -352,18 +358,21 @@ def tokenize_prompt(self, prompt):
result, current_len = tokenize_prompt_default()
user_token = self._get_user_token()
assistant_token = self._get_assistant_token()
+ conversation: Conversation = (
+ self.prompter._conversation # pylint: disable=protected-access
+ )
try:
for _, part in enumerate(
self.prompter.build_prompt(self.get_conversation_thread(prompt))
):
if isinstance(part, tuple):
- if part[0] == "USER:":
+ if conversation.roles[0] in part[0]:
turn = part[0] + part[1] if not user_token else part[1]
# this is still the user query, we should
if not part[1].strip():
LOG.warning(f"user turn has empty text: {prompt}")
res = self._tokenize(
- turn.strip(),
+ turn,
add_eos_token=False,
strip_bos_token=True,
)
@@ -371,14 +380,14 @@ def tokenize_prompt(self, prompt):
res["input_ids"] = [user_token, *res["input_ids"]]
# everything from this is masked out from the labels
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
- elif part[0] == "ASSISTANT:":
+ elif conversation.roles[1] in part[0]:
# TODO label assistant token/tokens w/ IGNORE_TOKEN_ID
turn = part[0] + part[1] if not assistant_token else part[1]
# this should be the assistant response, should end with an eos token
if not part[1].strip():
LOG.warning(f"assistant turn has empty text: {prompt}")
res = self._tokenize(
- turn.strip(),
+ turn,
add_eos_token=True,
strip_bos_token=True,
)
@@ -389,16 +398,17 @@ def tokenize_prompt(self, prompt):
]
# not masked out from labels
labels = copy.deepcopy(res["input_ids"])
- elif part[0] == "SYSTEM:":
- part = part[1] # Ignore the system role from preamble
+ elif part[0] == "":
+ turn = part[1]
# this is only ever the first part, should include the bos token and the user query
res = self._tokenize(
- part.strip(), add_eos_token=False, strip_bos_token=False
+ turn, add_eos_token=False, strip_bos_token=False
)
# everything from this is masked out from the labels
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
else:
LOG.warning(f"unhandled role: {part[0]}")
+ continue
# pylint: disable=duplicate-code
result, current_len = parse_tokenized_to_result(
diff --git a/src/axolotl/prompters.py b/src/axolotl/prompters.py
index d6d14c3694..370e15091c 100644
--- a/src/axolotl/prompters.py
+++ b/src/axolotl/prompters.py
@@ -1,9 +1,10 @@
"""Module containing prompters"""
-import dataclasses
import logging
-from enum import Enum, auto
-from typing import Generator, List, Optional, Tuple, Union
+from enum import Enum
+from typing import Generator, Optional, Union
+
+from fastchat.conversation import Conversation, get_conv_template
LOG = logging.getLogger("axolotl")
IGNORE_TOKEN_ID = -100
@@ -214,53 +215,6 @@ def build_prompt(
yield res
-class SeparatorStyle(Enum):
- """Different separator style."""
-
- SINGLE = auto()
- TWO = auto()
- DOLLY = auto()
-
-
-# TODO clean this 💩 up
-@dataclasses.dataclass
-class Conversation:
- """A class that keeps all conversation history."""
-
- system: str
- roles: List[str]
- messages: List[List[str]]
- offset: int
- sep_style: SeparatorStyle = SeparatorStyle.SINGLE
- sep: str = "###"
- sep2: Optional[str] = None
-
- def get_prompt(self) -> Generator[Tuple[str, str], None, None]:
- # seps = [self.sep, self.sep2]
- preamble = self.system + self.sep
- yield ("SYSTEM:", preamble)
- for _, (role, message) in enumerate(self.messages):
- if message:
- yield (role + ":", " " + message)
- else:
- LOG.warning(f"role with empty message: {role}")
- yield (role + ":", "")
-
- def copy(self):
- return Conversation(
- system=self.system,
- roles=self.roles,
- messages=[[x, y] for x, y in self.messages],
- offset=self.offset,
- sep_style=self.sep_style,
- sep=self.sep,
- sep2=self.sep2,
- )
-
- def append_message(self, role, message):
- self.messages.append([role, message])
-
-
SHAREGPT_ASSERTION_FAILED_ROLE = (
"Role did not alternate between turns (gpt and human). Please check your data."
)
@@ -271,28 +225,27 @@ class ShareGPTPrompter: # pylint: disable=too-few-public-methods
A prompter that generates prompts for the ShareGPT
"""
- def __init__(self, prompt_style=None, system_prompt: Optional[str] = None):
- if prompt_style != PromptStyle.CHAT.value:
- raise ValueError(
- f"unsupported prompt_style for ShareGPTPrompter({prompt_style})"
- )
- system: str = (
- system_prompt
- if system_prompt
- else (
- "A chat between a curious user and an artificial intelligence assistant. "
- "The assistant gives helpful, detailed, and polite answers to the user's questions."
- )
- )
- self._conversation = Conversation(
- system=system,
- roles=["USER", "ASSISTANT"],
- messages=[],
- offset=0,
- sep_style=SeparatorStyle.TWO,
- sep=" ",
- sep2=" ",
- )
+ role_key_human = "human"
+ role_key_model = "gpt"
+
+ def __init__(
+ self,
+ prompt_style=None, # pylint: disable=unused-argument
+ conversation: Optional[Union[str, Conversation]] = None,
+ role_key_human: Optional[str] = None,
+ role_key_model: Optional[str] = None,
+ ):
+ if conversation:
+ if isinstance(conversation, Conversation):
+ self._conversation = conversation
+ else:
+ self._conversation = get_conv_template(conversation)
+ else:
+ self._conversation = get_conv_template("vicuna_v1.1")
+ if role_key_human:
+ self.role_key_human = role_key_human
+ if role_key_model:
+ self.role_key_model = role_key_model
def build_prompt(self, source) -> Generator[str, None, None]:
if len(source) < 2:
@@ -306,17 +259,14 @@ def build_prompt(self, source) -> Generator[str, None, None]:
# Add the conversation system prompt if provided, otherwise use the default one
if source[0]["from"] == "system":
- conv.system = source[0]["value"]
+ conv.set_system_message(source[0]["value"])
source.pop(0)
- roles = {"human": conv.roles[0], "gpt": conv.roles[1]}
+ roles = {self.role_key_human: conv.roles[0], self.role_key_model: conv.roles[1]}
try:
# Apply prompt templates
- if (
- source[0]["from"] not in roles
- or roles[source[0]["from"]] != conv.roles[0]
- ):
+ if source[0]["from"] not in roles:
# Skip the first one if it is not from human
source = source[1:]
except IndexError as err:
@@ -326,8 +276,29 @@ def build_prompt(self, source) -> Generator[str, None, None]:
conv.messages = []
for j, sentence in enumerate(source):
role = roles[sentence["from"]]
- assert role == conv.roles[j % 2], SHAREGPT_ASSERTION_FAILED_ROLE
+ if role != conv.roles[j % 2]:
+ LOG.warning(f"{SHAREGPT_ASSERTION_FAILED_ROLE}: {sentence}")
conv.append_message(role, sentence["value"])
- for part in conv.get_prompt():
+ for part in conv.get_turns():
+ if part[0] and not part[1]:
+ LOG.warning(f"role with empty message: {part[0]}")
yield part
+
+
+class ShareGPTPrompterV2(ShareGPTPrompter):
+ """
+ A V2 prompter that generates prompts for the ShareGPT
+ """
+
+ def __init__(
+ self,
+ conversation: Optional[Union[str, Conversation]] = None,
+ role_key_human: Optional[str] = None,
+ role_key_model: Optional[str] = None,
+ ):
+ super().__init__(
+ conversation=conversation,
+ role_key_human=role_key_human,
+ role_key_model=role_key_model,
+ )
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index 3a574cefcc..1dfdab2605 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -278,6 +278,25 @@ def validate_config(cfg):
"`model_type: MixFormerSequentialForCausalLM` required for sample_packing"
)
+ if cfg.datasets:
+ for idx, ds_cfg in enumerate(cfg.datasets):
+ if ds_cfg.type == "sharegpt:chat":
+ LOG.warning(
+ PendingDeprecationWarning(
+ "`type: sharegpt:chat` will soon be deprecated. simply use `type: sharegpt` instead."
+ )
+ )
+ cfg.datasets[idx].type = "sharegpt"
+ if "sharegpt_simple" in ds_cfg.type:
+ LOG.warning(
+ PendingDeprecationWarning(
+ "`type: sharegpt_simple` will soon be deprecated. simply use `type: sharegpt` instead."
+ )
+ )
+ cfg.datasets[idx].type = cfg.datasets[idx].type.replace(
+ "sharegpt_simple", "sharegpt"
+ )
+
# TODO
# MPT 7b
# https://github.com/facebookresearch/bitsandbytes/issues/25
diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
index 9792371c7c..eeac15d302 100644
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -25,7 +25,6 @@
GPTeacherPromptTokenizingStrategy,
JeopardyPromptTokenizingStrategy,
OpenAssistantPromptTokenizingStrategy,
- ShareGPTPromptTokenizingStrategy,
SummarizeTLDRPromptTokenizingStrategy,
)
from axolotl.prompters import (
@@ -35,7 +34,6 @@
MultipleChoiceConcisePrompter,
MultipleChoiceExplainPrompter,
ReflectAlpacaPrompter,
- ShareGPTPrompter,
SummarizeTLDRPrompter,
)
from axolotl.utils.dict import DictDefault
@@ -320,15 +318,6 @@ def for_d_in_datasets(dataset_configs):
)
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds)
datasets.append(ds_wrapper)
- elif d_base_type == "sharegpt":
- ds_strategy = ShareGPTPromptTokenizingStrategy(
- ShareGPTPrompter(d_prompt_style),
- tokenizer,
- cfg.train_on_inputs,
- cfg.sequence_len,
- )
- ds_wrapper = TokenizedPromptDataset(ds_strategy, ds)
- datasets.append(ds_wrapper)
else:
suffix = ""
if ":load_" in d.type:
diff --git a/src/axolotl/utils/tokenization.py b/src/axolotl/utils/tokenization.py
index 4746ceddef..2b7608b636 100644
--- a/src/axolotl/utils/tokenization.py
+++ b/src/axolotl/utils/tokenization.py
@@ -33,5 +33,6 @@ def check_example_labels(example, tokenizer, text_only=False):
LOG.info(" ".join(colored_tokens))
LOG.info("\n\n\n")
+ print(" ".join(colored_tokens))
return " ".join(colored_tokens)
diff --git a/tests/fixtures/conversation.tokenized.json b/tests/fixtures/conversation.tokenized.json
index 0ac93713bc..ea4fca0990 100644
--- a/tests/fixtures/conversation.tokenized.json
+++ b/tests/fixtures/conversation.tokenized.json
@@ -1 +1 @@
-{"input_ids": [1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 3148, 1001, 29901, 920, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, 3148, 1001, 29901, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, 3148, 1001, 29901, 2367, 592, 263, 2702, 1342, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 2, 3148, 1001, 29901, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, 3148, 1001, 29901, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, 3148, 1001, 29901, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 29889, 2], "attention_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "labels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 29889, 2]}
+{"input_ids": [1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 29871, 3148, 1001, 29901, 920, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 29871, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 2367, 592, 263, 2702, 1342, 29871, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2], "attention_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "labels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2]}
diff --git a/tests/test_prompt_tokenizers.py b/tests/test_prompt_tokenizers.py
index a455d8b513..9ee5ea588d 100644
--- a/tests/test_prompt_tokenizers.py
+++ b/tests/test_prompt_tokenizers.py
@@ -21,7 +21,7 @@
AlpacaPromptTokenizingStrategy,
ShareGPTPromptTokenizingStrategy,
)
-from axolotl.prompters import AlpacaPrompter, PromptStyle, ShareGPTPrompter
+from axolotl.prompters import AlpacaPrompter, PromptStyle, ShareGPTPrompterV2
LOG = logging.getLogger("axolotl")
@@ -60,7 +60,7 @@ def test_sharegpt_integration(self):
) as fin:
data = fin.read()
tokenized_conversation = json.loads(data)
- prompter = ShareGPTPrompter("chat")
+ prompter = ShareGPTPrompterV2()
strat = ShareGPTPromptTokenizingStrategy(
prompter,
self.tokenizer,
@@ -79,7 +79,7 @@ def test_sharegpt_warnings_integration(self):
) as fin:
data = fin.read()
conversation = json.loads(data)
- prompter = ShareGPTPrompter("chat")
+ prompter = ShareGPTPrompterV2()
strat = ShareGPTPromptTokenizingStrategy(
prompter,
self.tokenizer,
diff --git a/tests/test_validation.py b/tests/test_validation.py
index d7935c1a54..b9a57c2e9a 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -374,3 +374,26 @@ def test_merge_lora_no_bf16_fail(self):
)
validate_config(cfg)
+
+ def test_sharegpt_deprecation(self):
+ cfg = DictDefault(
+ {"datasets": [{"path": "lorem/ipsum", "type": "sharegpt:chat"}]}
+ )
+ with self._caplog.at_level(logging.WARNING):
+ validate_config(cfg)
+ assert any(
+ "`type: sharegpt:chat` will soon be deprecated." in record.message
+ for record in self._caplog.records
+ )
+ assert cfg.datasets[0].type == "sharegpt"
+
+ cfg = DictDefault(
+ {"datasets": [{"path": "lorem/ipsum", "type": "sharegpt_simple:load_role"}]}
+ )
+ with self._caplog.at_level(logging.WARNING):
+ validate_config(cfg)
+ assert any(
+ "`type: sharegpt_simple` will soon be deprecated." in record.message
+ for record in self._caplog.records
+ )
+ assert cfg.datasets[0].type == "sharegpt:load_role"
From 895f0a0723e20359b865a7f414faa06103bef7f9 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Wed, 27 Sep 2023 12:11:07 -0400
Subject: [PATCH 25/77] skip some flash attn patches unless explicitly enabled
(#643)
* skip some flash attn patches if explicitly disabled
* make the other patches optional
---
README.md | 2 +
.../monkeypatch/llama_attn_hijack_flash.py | 54 +++++++++++--------
src/axolotl/utils/models.py | 6 ++-
3 files changed, 38 insertions(+), 24 deletions(-)
diff --git a/README.md b/README.md
index 3b3bc9985a..7f3230423c 100644
--- a/README.md
+++ b/README.md
@@ -636,6 +636,8 @@ flash_optimum:
xformers_attention:
# whether to use flash attention patch https://github.com/Dao-AILab/flash-attention:
flash_attention:
+flash_attn_cross_entropy: # Whether to use flash-attention cross entropy implementation - advanced use only
+flash_attn_rms_norm: # Whether to use flash-attention rms norm implementation - advanced use only
# whether to use scaled-dot-product attention
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
sdp_attention:
diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
index 97f0477649..4f6b715756 100644
--- a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
@@ -38,7 +38,11 @@
LOG = logging.getLogger("axolotl")
-def replace_llama_attn_with_flash_attn(packed: Optional[bool] = False):
+def replace_llama_attn_with_flash_attn(
+ packed: Optional[bool] = False,
+ cross_entropy: Optional[bool] = False,
+ rms_norm: Optional[bool] = False,
+):
transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = ( # pylint: disable=protected-access
_prepare_decoder_attention_mask
)
@@ -49,33 +53,37 @@ def replace_llama_attn_with_flash_attn(packed: Optional[bool] = False):
llama_model_forward
)
- try:
- from flash_attn.losses.cross_entropy import CrossEntropyLoss
+ # skip only if explicitly disabled
+ if cross_entropy:
+ try:
+ from flash_attn.losses.cross_entropy import CrossEntropyLoss
- LOG.info("patching with flash_attn.losses.cross_entropy")
- transformers.models.llama.modeling_llama.CrossEntropyLoss = partial(
- CrossEntropyLoss, inplace_backward=True
- )
- except ImportError:
- LOG.info(
- "optimized flash-attention CrossEntropyLoss not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=xentropy_cuda_lib&subdirectory=csrc/xentropy'`)"
- )
+ LOG.info("patching with flash_attn.losses.cross_entropy")
+ transformers.models.llama.modeling_llama.CrossEntropyLoss = partial(
+ CrossEntropyLoss, inplace_backward=True
+ )
+ except ImportError:
+ LOG.info(
+ "optimized flash-attention CrossEntropyLoss not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=xentropy_cuda_lib&subdirectory=csrc/xentropy'`)"
+ )
- try:
- from flash_attn.ops.rms_norm import RMSNorm
+ # skip only if explicitly disabled
+ if rms_norm:
+ try:
+ from flash_attn.ops.rms_norm import RMSNorm
- class LlamaRMSNorm(RMSNorm):
- """Patched LLamaRMSNorm"""
+ class LlamaRMSNorm(RMSNorm):
+ """Patched LLamaRMSNorm"""
- def __init__(self, hidden_size, eps=1e-6):
- super().__init__(hidden_size, eps=eps)
+ def __init__(self, hidden_size, eps=1e-6):
+ super().__init__(hidden_size, eps=eps)
- LOG.info("patching with flash_attn.ops.rms_norm")
- transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm
- except ImportError:
- LOG.info(
- "optimized flash-attention RMSNorm not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=dropout_layer_norm&subdirectory=csrc/layer_norm'`)"
- )
+ LOG.info("patching with flash_attn.ops.rms_norm")
+ transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm
+ except ImportError:
+ LOG.info(
+ "optimized flash-attention RMSNorm not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=dropout_layer_norm&subdirectory=csrc/layer_norm'`)"
+ )
# Disable the transformation of the attention mask in LlamaModel as the flash attention
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 361440931f..07cdc4d6ed 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -121,7 +121,11 @@ def load_model(
)
LOG.info("patching with flash attention for sample packing")
- replace_llama_attn_with_flash_attn(packed=cfg.sample_packing)
+ replace_llama_attn_with_flash_attn(
+ packed=cfg.sample_packing,
+ cross_entropy=cfg.flash_attn_cross_entropy,
+ rms_norm=cfg.flash_attn_rms_norm,
+ )
elif cfg.is_llama_derived_model and cfg.xformers_attention:
from axolotl.monkeypatch.llama_attn_hijack_xformers import (
hijack_llama_attention,
From d1236f2c418f4efa2e36be82fe5cd640968f8908 Mon Sep 17 00:00:00 2001
From: Felix Yan
Date: Wed, 27 Sep 2023 19:12:10 +0300
Subject: [PATCH 26/77] Correct typos in datasets.py (#639)
---
src/axolotl/datasets.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py
index 1dccb6d526..32b2e0cc2d 100644
--- a/src/axolotl/datasets.py
+++ b/src/axolotl/datasets.py
@@ -22,7 +22,7 @@ class TokenizedPromptDataset(Dataset):
"""
Dataset that returns tokenized prompts from a stream of text files.
Args:
- prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for proccessing the data.
+ prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for processing the data.
dataset (dataset.Dataset): Dataset with text files.
"""
@@ -55,7 +55,7 @@ class ConstantLengthDataset(IterableDataset):
"""
Iterable dataset that returns constant length chunks of tokens from stream of text files.
Args:
- tokenizer (Tokenizer): The processor used for proccessing the data.
+ tokenizer (Tokenizer): The processor used for processing the data.
dataset (dataset.Dataset): Dataset with text files.
seq_length (int): Length of token sequences to return.
"""
From 8fe0e633d2715d84333ce68fdd45d2d975638a54 Mon Sep 17 00:00:00 2001
From: Ethan Smith
Date: Wed, 27 Sep 2023 10:41:31 -0700
Subject: [PATCH 27/77] Fix bug in dataset loading (#284)
* Fix bug in dataset loading
This fixes a bug when loading datasets. `d.data_files` is a list, so it cannot be directly passed to `hf_hub_download`
* Check type of data_files, and load accordingly
---
src/axolotl/utils/data.py | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
index eeac15d302..2713796774 100644
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -205,11 +205,26 @@ def for_d_in_datasets(dataset_configs):
use_auth_token=use_auth_token,
)
else:
- fp = hf_hub_download(
- repo_id=d.path,
- repo_type="dataset",
- filename=d.data_files,
- )
+ if isinstance(d.data_files, str):
+ fp = hf_hub_download(
+ repo_id=d.path,
+ repo_type="dataset",
+ filename=d.data_files,
+ )
+ elif isinstance(d.data_files, list):
+ fp = []
+ for file in d.data_files:
+ fp.append(
+ hf_hub_download(
+ repo_id=d.path,
+ repo_type="dataset",
+ filename=file,
+ )
+ )
+ else:
+ raise ValueError(
+ "data_files must be either a string or list of strings"
+ )
ds = load_dataset(
"json", name=d.name, data_files=fp, streaming=False, split=None
)
From 85b0be2ba7251e7e4cdc4be8e31dae4f88be6a8f Mon Sep 17 00:00:00 2001
From: Napuh <55241721+Napuh@users.noreply.github.com>
Date: Wed, 27 Sep 2023 23:43:35 +0200
Subject: [PATCH 28/77] Warn users to login to HuggingFace (#645)
* added warning if user is not logged in HF
* updated doc to suggest logging in to HF
---
README.md | 5 +++++
scripts/finetune.py | 2 ++
src/axolotl/cli/__init__.py | 15 +++++++++++++++
src/axolotl/cli/train.py | 2 ++
4 files changed, 24 insertions(+)
diff --git a/README.md b/README.md
index 7f3230423c..d502eec0b5 100644
--- a/README.md
+++ b/README.md
@@ -124,6 +124,11 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
pip3 install packaging
pip3 install -e '.[flash-attn,deepspeed]'
```
+ 4. (Optional) Login to Huggingface to use gated models/datasets.
+ ```bash
+ huggingface-cli login
+ ```
+ Get the token at huggingface.co/settings/tokens
- LambdaLabs
diff --git a/scripts/finetune.py b/scripts/finetune.py
index 7b6751e31c..118a97b844 100644
--- a/scripts/finetune.py
+++ b/scripts/finetune.py
@@ -7,6 +7,7 @@
from axolotl.cli import (
check_accelerate_default_config,
+ check_user_token,
do_inference,
do_merge_lora,
load_cfg,
@@ -31,6 +32,7 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
)
parsed_cfg = load_cfg(config, **kwargs)
check_accelerate_default_config()
+ check_user_token()
parser = transformers.HfArgumentParser((TrainerCliArgs))
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
return_remaining_strings=True
diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py
index 90e1d508b0..c3b580391a 100644
--- a/src/axolotl/cli/__init__.py
+++ b/src/axolotl/cli/__init__.py
@@ -14,6 +14,8 @@
# add src to the pythonpath so we don't need to pip install this
from accelerate.commands.config import config_args
from art import text2art
+from huggingface_hub import HfApi
+from huggingface_hub.utils import LocalTokenNotFoundError
from transformers import GenerationConfig, TextStreamer
from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer
@@ -247,3 +249,16 @@ def check_accelerate_default_config():
LOG.warning(
f"accelerate config file found at {config_args.default_yaml_config_file}. This can lead to unexpected errors"
)
+
+
+def check_user_token():
+ # Verify if token is valid
+ api = HfApi()
+ try:
+ user_info = api.whoami()
+ return bool(user_info)
+ except LocalTokenNotFoundError:
+ LOG.warning(
+ "Error verifying HuggingFace token. Remember to log in using `huggingface-cli login` and get your access token from https://huggingface.co/settings/tokens if you want to use gated models or datasets."
+ )
+ return False
diff --git a/src/axolotl/cli/train.py b/src/axolotl/cli/train.py
index 72a9250c8d..c64755872b 100644
--- a/src/axolotl/cli/train.py
+++ b/src/axolotl/cli/train.py
@@ -8,6 +8,7 @@
from axolotl.cli import (
check_accelerate_default_config,
+ check_user_token,
load_cfg,
load_datasets,
print_axolotl_text_art,
@@ -21,6 +22,7 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
print_axolotl_text_art()
parsed_cfg = load_cfg(config, **kwargs)
check_accelerate_default_config()
+ check_user_token()
parser = transformers.HfArgumentParser((TrainerCliArgs))
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
return_remaining_strings=True
From b6ab8aad62f5b9f75e3456a4f60fa0e2040cf34b Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Wed, 27 Sep 2023 18:41:00 -0400
Subject: [PATCH 29/77] Mistral flash attn packing (#646)
* add mistral monkeypatch
* add arg for decoder attention masl
* fix lint for duplicate code
* make sure to update transformers too
* tweak install for e2e
* move mistral patch to conditional
---
.github/workflows/tests.yml | 5 +-
requirements.txt | 2 +-
.../monkeypatch/mistral_attn_hijack_flash.py | 401 ++++++++++++++++++
src/axolotl/utils/models.py | 8 +
4 files changed, 412 insertions(+), 4 deletions(-)
create mode 100644 src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4da10a6c35..b4637fd67f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -44,7 +44,7 @@ jobs:
- name: Install dependencies
run: |
- pip3 install -e .
+ pip3 install -U -e .
pip3 install -r requirements-tests.txt
- name: Run tests
@@ -69,8 +69,7 @@ jobs:
- name: Install dependencies
run: |
- pip3 install -e .
- pip3 install flash-attn
+ pip3 install -U -e .[flash-attn]
pip3 install -r requirements-tests.txt
- name: Run e2e tests
diff --git a/requirements.txt b/requirements.txt
index 7616d1fb0a..18659daec5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ torch==2.0.1
auto-gptq
packaging
peft @ git+https://github.com/huggingface/peft.git
-transformers @ git+https://github.com/huggingface/transformers.git@0ac3875011d32dc85e0e83970507e3afe8f0febb
+transformers @ git+https://github.com/huggingface/transformers.git@78dd120
bitsandbytes>=0.41.1
accelerate @ git+https://github.com/huggingface/accelerate@80da9cfb09bb3cc9f1b385cb55d6b90d025a5fd9
deepspeed
diff --git a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
new file mode 100644
index 0000000000..f53d5d0071
--- /dev/null
+++ b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
@@ -0,0 +1,401 @@
+"""Flash attention monkey patch for mistral model"""
+# pylint: disable=duplicate-code
+
+import logging
+import math
+from typing import List, Optional, Tuple, Union
+
+import torch
+import transformers
+from einops import rearrange
+from torch import nn
+from transformers.modeling_outputs import BaseModelOutputWithPast
+from transformers.models.mistral.modeling_mistral import (
+ MistralDecoderLayer as OriginalMistralDecoderLayer,
+)
+from transformers.models.mistral.modeling_mistral import apply_rotary_pos_emb, repeat_kv
+
+from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids
+
+try:
+ from flash_attn.flash_attn_interface import ( # pylint: disable=ungrouped-imports
+ flash_attn_varlen_qkvpacked_func,
+ )
+except ImportError:
+ from flash_attn.flash_attn_interface import (
+ flash_attn_unpadded_qkvpacked_func as flash_attn_varlen_qkvpacked_func,
+ )
+
+
+LOG = logging.getLogger("axolotl.monkeypatch.mistral")
+
+
+def replace_mistral_attn_with_flash_attn(
+ packed: Optional[bool] = False,
+):
+ transformers.models.mistral.modeling_mistral.MistralModel._prepare_decoder_attention_mask = ( # pylint: disable=protected-access
+ _prepare_decoder_attention_mask
+ )
+ transformers.models.mistral.modeling_mistral.MistralAttention.forward = (
+ flashattn_forward
+ )
+ if packed:
+ transformers.models.mistral.modeling_mistral.MistralDecoderLayer = (
+ MistralDecoderLayer
+ )
+ transformers.models.mistral.modeling_mistral.MistralModel.forward = (
+ mistral_model_forward
+ )
+
+
+# Disable the transformation of the attention mask in LlamaModel as the flash attention
+# requires the attention mask to be the same as the key_padding_mask
+def _prepare_decoder_attention_mask(
+ self,
+ attention_mask,
+ input_shape,
+ inputs_embeds,
+ past_key_values_length,
+ sliding_window,
+): # pylint: disable=unused-argument
+ # [bsz, seq_len]
+ return attention_mask
+
+
+def flashattn_forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: bool = False,
+ use_cache: bool = False,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ max_seqlen: Optional[torch.Tensor] = None,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ bsz, q_len, _ = hidden_states.size()
+
+ query_states = self.q_proj(hidden_states)
+ key_states = self.k_proj(hidden_states)
+ value_states = self.v_proj(hidden_states)
+
+ query_states = query_states.view(
+ bsz, q_len, self.num_heads, self.head_dim
+ ).transpose(1, 2)
+ key_states = key_states.view(
+ bsz, q_len, self.num_key_value_heads, self.head_dim
+ ).transpose(1, 2)
+ value_states = value_states.view(
+ bsz, q_len, self.num_key_value_heads, self.head_dim
+ ).transpose(1, 2)
+
+ kv_seq_len = key_states.shape[-2]
+ if past_key_value is not None:
+ kv_seq_len += past_key_value[0].shape[-2]
+ cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+ query_states, key_states = apply_rotary_pos_emb(
+ query_states, key_states, cos, sin, position_ids
+ )
+
+ if past_key_value is not None:
+ # reuse k, v, self_attention
+ key_states = torch.cat([past_key_value[0], key_states], dim=2)
+ value_states = torch.cat([past_key_value[1], value_states], dim=2)
+
+ past_key_value = (key_states, value_states) if use_cache else None
+
+ # repeat k/v heads if n_kv_heads < n_heads
+ key_states = repeat_kv(key_states, self.num_key_value_groups)
+ value_states = repeat_kv(value_states, self.num_key_value_groups)
+
+ if cu_seqlens is not None and max_seqlen is not None and cu_seqlens.dim() == 1:
+ # special handling using sample packing
+ qkv = torch.stack(
+ [query_states, key_states, value_states], dim=2
+ ) # [bsz, nh, 3, q_len, hd]
+ qkv = qkv.transpose(1, 3) # [bsz, q_len, 3, nh, hd]
+ qkv = rearrange(qkv, "b s ... -> (b s) ...")
+
+ output = flash_attn_varlen_qkvpacked_func(
+ qkv, cu_seqlens, max_seqlen, 0.0, softmax_scale=None, causal=True
+ )
+ output = rearrange(output, "(b s) ... -> b s ...", b=bsz)
+ attn_output = output
+ if attn_output.size() != (bsz, q_len, self.num_heads, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+ attn_output = rearrange(attn_output, "b s h d -> b s (h d)")
+ attn_weights = None
+ else:
+ attn_weights = torch.matmul(
+ query_states, key_states.transpose(2, 3)
+ ) / math.sqrt(self.head_dim)
+ if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+ f" {attn_weights.size()}"
+ )
+
+ if attention_mask is not None:
+ if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+ )
+
+ attn_weights = attn_weights + attention_mask
+
+ # upcast attention to fp32
+ attn_weights = nn.functional.softmax(
+ attn_weights, dim=-1, dtype=torch.float32
+ ).to(query_states.dtype)
+ attn_output = torch.matmul(attn_weights, value_states)
+
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ attn_output = attn_output.transpose(1, 2).contiguous()
+ attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+ attn_output = self.o_proj(attn_output)
+
+ if not output_attentions:
+ attn_weights = None
+
+ return attn_output, attn_weights, past_key_value
+
+
+def mistral_model_forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+) -> Union[Tuple, BaseModelOutputWithPast]:
+ output_attentions = (
+ output_attentions
+ if output_attentions is not None
+ else self.config.output_attentions
+ )
+ output_hidden_states = (
+ output_hidden_states
+ if output_hidden_states is not None
+ else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ # retrieve input_ids and inputs_embeds
+ if input_ids is not None and inputs_embeds is not None:
+ raise ValueError(
+ "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time"
+ )
+ if input_ids is not None:
+ batch_size, seq_length = input_ids.shape
+ elif inputs_embeds is not None:
+ batch_size, seq_length, _ = inputs_embeds.shape
+ else:
+ raise ValueError(
+ "You have to specify either decoder_input_ids or decoder_inputs_embeds"
+ )
+
+ seq_length_with_past = seq_length
+ past_key_values_length = 0
+
+ if past_key_values is not None:
+ past_key_values_length = past_key_values[0][0].shape[2]
+ seq_length_with_past = seq_length_with_past + past_key_values_length
+
+ cu_seqlens = None
+ max_seqlen = None
+ if position_ids is None:
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
+ position_ids = torch.arange(
+ past_key_values_length,
+ seq_length + past_key_values_length,
+ dtype=torch.long,
+ device=device,
+ )
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+ else:
+ position_ids = position_ids.view(-1, seq_length).long()
+ cu_seqlens, max_seqlen = get_cu_seqlens_from_pos_ids(position_ids)
+ cu_seqlens = cu_seqlens.squeeze()
+
+ if inputs_embeds is None:
+ inputs_embeds = self.embed_tokens(input_ids)
+ # embed positions
+ if attention_mask is None:
+ attention_mask = torch.ones(
+ (batch_size, seq_length_with_past),
+ dtype=torch.bool,
+ device=inputs_embeds.device,
+ )
+ attention_mask = (
+ self._prepare_decoder_attention_mask( # pylint: disable=protected-access
+ attention_mask,
+ (batch_size, seq_length),
+ inputs_embeds,
+ past_key_values_length,
+ sliding_window=self.config.sliding_window,
+ )
+ )
+
+ hidden_states = inputs_embeds
+
+ if self.gradient_checkpointing and self.training:
+ if use_cache:
+ transformers.logger.warning_once(
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+ )
+ use_cache = False
+
+ # decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ next_decoder_cache = () if use_cache else None
+
+ for idx, decoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ past_key_value = past_key_values[idx] if past_key_values is not None else None
+
+ if self.gradient_checkpointing and self.training:
+
+ def create_custom_forward(module):
+ def custom_forward(*inputs):
+ # None for past_key_value
+ return module(*inputs)
+
+ return custom_forward
+
+ layer_outputs = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(decoder_layer),
+ hidden_states,
+ attention_mask,
+ position_ids,
+ past_key_value,
+ output_attentions,
+ None,
+ cu_seqlens,
+ max_seqlen,
+ )
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cu_seqlens=cu_seqlens,
+ max_seqlen=max_seqlen,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+
+ hidden_states = self.norm(hidden_states)
+
+ # add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = next_decoder_cache if use_cache else None
+ if not return_dict:
+ return tuple(
+ v
+ for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
+ if v is not None
+ )
+ return BaseModelOutputWithPast(
+ last_hidden_state=hidden_states,
+ past_key_values=next_cache,
+ hidden_states=all_hidden_states,
+ attentions=all_self_attns,
+ )
+
+
+class MistralDecoderLayer(OriginalMistralDecoderLayer):
+ """
+ patched version of MistralDecoderLayer to pass through the precalculated cu_seqlens
+ """
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False,
+ use_cache: Optional[bool] = False,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ max_seqlen: Optional[torch.Tensor] = None,
+ ) -> Tuple[
+ torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]
+ ]:
+ """
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+ (see `past_key_values`).
+ past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+ cu_seqlens (`torch.Tensor`, *optional*) cumulative sequence len when packing
+ """
+
+ residual = hidden_states
+
+ hidden_states = self.input_layernorm(hidden_states)
+
+ # Self Attention
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
+ hidden_states=hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cu_seqlens=cu_seqlens,
+ max_seqlen=max_seqlen,
+ )
+ hidden_states = residual + hidden_states
+
+ # Fully Connected
+ residual = hidden_states
+ hidden_states = self.post_attention_layernorm(hidden_states)
+ hidden_states = self.mlp(hidden_states)
+ hidden_states = residual + hidden_states
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (self_attn_weights,)
+
+ if use_cache:
+ outputs += (present_key_value,)
+
+ return outputs
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 07cdc4d6ed..63e34293ee 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -150,6 +150,14 @@ def load_model(
# Note: This might overwrite previous additional_special_tokens
tokenizer.add_special_tokens({"additional_special_tokens": [MEM_TOKEN]})
+ if cfg.is_mistral_derived_model and cfg.flash_attention:
+ from axolotl.monkeypatch.mistral_attn_hijack_flash import (
+ replace_mistral_attn_with_flash_attn,
+ )
+
+ LOG.info("patching with flash attention")
+ replace_mistral_attn_with_flash_attn(packed=cfg.sample_packing)
+
if cfg.is_llama_derived_model and cfg.xpos_rope:
from axolotl.monkeypatch.xpos_rope_llama_monkey_patch import (
replace_llama_rope_with_xpos_rope,
From 383f88d7a71286e5c74f50e3caf893bd9d071fcf Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Thu, 28 Sep 2023 10:14:41 +0900
Subject: [PATCH 30/77] Fix(cfg): Add validation for save_strategy and
eval_strategy (#633)
* Fix(cfg): Check save_strategy cfg conflict with save_steps
* Fix(cfg): Check evaluation_strategy cfg conflict with eval_steps
* chore: add extra check for steps only
---
src/axolotl/utils/config.py | 18 ++++
src/axolotl/utils/trainer.py | 15 +---
tests/test_validation.py | 168 +++++++++++++++++++++++++++++++++++
3 files changed, 190 insertions(+), 11 deletions(-)
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index 1dfdab2605..ac067b5055 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -296,6 +296,24 @@ def validate_config(cfg):
cfg.datasets[idx].type = cfg.datasets[idx].type.replace(
"sharegpt_simple", "sharegpt"
)
+ if cfg.save_strategy and cfg.save_steps and cfg.save_strategy != "steps":
+ raise ValueError(
+ "save_strategy and save_steps mismatch. Please set save_strategy to 'steps' or remove save_steps."
+ )
+
+ if (
+ cfg.evaluation_strategy
+ and cfg.eval_steps
+ and cfg.evaluation_strategy != "steps"
+ ):
+ raise ValueError(
+ "evaluation_strategy and eval_steps mismatch. Please set evaluation_strategy to 'steps' or remove eval_steps."
+ )
+
+ if cfg.val_set_size == 0 and (cfg.eval_steps or cfg.evaluation_strategy):
+ raise ValueError(
+ "eval_steps and evaluation_strategy are not supported with val_set_size == 0"
+ )
# TODO
# MPT 7b
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index aee2a1b99e..3c75e4ec53 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -604,26 +604,19 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
"sample_packing_efficiency"
] = cfg.sample_packing_eff_est
- if cfg.eval_steps and cfg.evaluation_strategy:
- # assume if the user set both, they know what they're doing
- training_arguments_kwargs["evaluation_strategy"] = cfg.evaluation_strategy
+ if cfg.eval_steps:
+ training_arguments_kwargs["evaluation_strategy"] = "steps"
training_arguments_kwargs["eval_steps"] = cfg.eval_steps
+ elif cfg.evaluation_strategy:
+ training_arguments_kwargs["evaluation_strategy"] = cfg.evaluation_strategy
elif cfg.val_set_size == 0:
# no eval set, so don't eval
training_arguments_kwargs["evaluation_strategy"] = "no"
- elif cfg.evaluation_strategy and cfg.evaluation_strategy in ["epoch", "no"]:
- # if explicitly set for epoch, just set, and eval steps don't matter
- training_arguments_kwargs["evaluation_strategy"] = cfg.evaluation_strategy
- elif cfg.eval_steps:
- # steps isn't used w/ epochs
- training_arguments_kwargs["evaluation_strategy"] = "steps"
- training_arguments_kwargs["eval_steps"] = cfg.eval_steps
else:
# we have an eval set, but no steps defined, default to use epoch
training_arguments_kwargs["evaluation_strategy"] = "epoch"
if cfg.save_steps:
- # save_steps implies save_strategy of steps
training_arguments_kwargs["save_strategy"] = "steps"
training_arguments_kwargs["save_steps"] = cfg.save_steps
elif cfg.save_strategy:
diff --git a/tests/test_validation.py b/tests/test_validation.py
index b9a57c2e9a..35d90a2cb4 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -397,3 +397,171 @@ def test_sharegpt_deprecation(self):
for record in self._caplog.records
)
assert cfg.datasets[0].type == "sharegpt:load_role"
+
+ def test_no_conflict_save_strategy(self):
+ cfg = DictDefault(
+ {
+ "save_strategy": "epoch",
+ "save_steps": 10,
+ }
+ )
+
+ with pytest.raises(
+ ValueError, match=r".*save_strategy and save_steps mismatch.*"
+ ):
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "save_strategy": "no",
+ "save_steps": 10,
+ }
+ )
+
+ with pytest.raises(
+ ValueError, match=r".*save_strategy and save_steps mismatch.*"
+ ):
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "save_strategy": "steps",
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "save_strategy": "steps",
+ "save_steps": 10,
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "save_steps": 10,
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "save_strategy": "no",
+ }
+ )
+
+ validate_config(cfg)
+
+ def test_no_conflict_eval_strategy(self):
+ cfg = DictDefault(
+ {
+ "evaluation_strategy": "epoch",
+ "eval_steps": 10,
+ }
+ )
+
+ with pytest.raises(
+ ValueError, match=r".*evaluation_strategy and eval_steps mismatch.*"
+ ):
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "evaluation_strategy": "no",
+ "eval_steps": 10,
+ }
+ )
+
+ with pytest.raises(
+ ValueError, match=r".*evaluation_strategy and eval_steps mismatch.*"
+ ):
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "evaluation_strategy": "steps",
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "evaluation_strategy": "steps",
+ "eval_steps": 10,
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "eval_steps": 10,
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "evaluation_strategy": "no",
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "evaluation_strategy": "epoch",
+ "val_set_size": 0,
+ }
+ )
+
+ with pytest.raises(
+ ValueError,
+ match=r".*eval_steps and evaluation_strategy are not supported with val_set_size == 0.*",
+ ):
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "eval_steps": 10,
+ "val_set_size": 0,
+ }
+ )
+
+ with pytest.raises(
+ ValueError,
+ match=r".*eval_steps and evaluation_strategy are not supported with val_set_size == 0.*",
+ ):
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "val_set_size": 0,
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "eval_steps": 10,
+ "val_set_size": 0.01,
+ }
+ )
+
+ validate_config(cfg)
+
+ cfg = DictDefault(
+ {
+ "evaluation_strategy": "epoch",
+ "val_set_size": 0.01,
+ }
+ )
+
+ validate_config(cfg)
From eb41f76f9286127c7e77dfe09bd59fbae0fb6fcb Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Thu, 28 Sep 2023 20:15:00 +0900
Subject: [PATCH 31/77] Feat: Add example for Mistral (#644)
* Feat: Add example for Mistral
* chore: turn off flash
* chore: add is_mistral_derived_model
* chore: update following PR
---
README.md | 3 +-
examples/mistral/config.yml | 62 +++++++++++++++++++++++++++++++++++++
src/axolotl/utils/config.py | 17 ++++++++--
3 files changed, 79 insertions(+), 3 deletions(-)
create mode 100644 examples/mistral/config.yml
diff --git a/README.md b/README.md
index d502eec0b5..3a1eb0cd7a 100644
--- a/README.md
+++ b/README.md
@@ -413,9 +413,10 @@ tokenizer_legacy:
# this is reported to improve training speed on some models
resize_token_embeddings_to_32x:
-# used to identify if the model is falcon/llama based
+# used to identify which the model is based on
is_falcon_derived_model:
is_llama_derived_model:
+is_mistral_derived_model:
# whether you are training a 4-bit GPTQ quantized model
gptq: true
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
new file mode 100644
index 0000000000..d199f947be
--- /dev/null
+++ b/examples/mistral/config.yml
@@ -0,0 +1,62 @@
+base_model: mistralai/Mistral-7B-v0.1
+base_model_config: mistralai/Mistral-7B-v0.1
+model_type: MistralForCausalLM
+tokenizer_type: LlamaTokenizer
+is_mistral_derived_model: true
+
+load_in_8bit: false
+load_in_4bit: false
+strict: false
+
+datasets:
+ - path: mhenrichsen/alpaca_2k_test
+ type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.01
+output_dir: ./out
+
+sequence_len: 8192
+sample_packing:
+pad_to_sequence_len:
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 2
+num_epochs: 3
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16: false
+tf32: false
+
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+
+warmup_steps: 10
+eval_steps: 20
+eval_table_size: 5
+eval_table_max_new_tokens: 128
+save_steps:
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+special_tokens:
+ bos_token: ""
+ eos_token: ""
+ unk_token: ""
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index ac067b5055..67f9490c47 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -82,7 +82,7 @@ def normalize_config(cfg):
cfg.is_llama_derived_model = (
(hasattr(model_config, "model_type") and model_config.model_type == "llama")
or cfg.is_llama_derived_model
- or "llama" in cfg.base_model
+ or "llama" in cfg.base_model.lower()
or (cfg.model_type and "llama" in cfg.model_type.lower())
)
@@ -98,10 +98,23 @@ def normalize_config(cfg):
]
)
or cfg.is_falcon_derived_model
- or "falcon" in cfg.base_model
+ or "falcon" in cfg.base_model.lower()
or (cfg.model_type and "rwforcausallm" in cfg.model_type.lower())
)
+ cfg.is_mistral_derived_model = (
+ (
+ hasattr(model_config, "model_type")
+ and model_config.model_type
+ in [
+ "mistral",
+ ]
+ )
+ or cfg.is_mistral_derived_model
+ or "mistral" in cfg.base_model.lower()
+ or (cfg.model_type and "mistral" in cfg.model_type.lower())
+ )
+
log_gpu_memory_usage(LOG, "baseline", cfg.device)
From b88f51512aa1524b60893ba84266088d922d84df Mon Sep 17 00:00:00 2001
From: Adarsh Shirawalmath <114558126+adarshxs@users.noreply.github.com>
Date: Thu, 28 Sep 2023 19:54:56 +0530
Subject: [PATCH 32/77] Update mistral/README.md (#647)
---
examples/mistral/README.md | 12 ++++++++++++
1 file changed, 12 insertions(+)
create mode 100644 examples/mistral/README.md
diff --git a/examples/mistral/README.md b/examples/mistral/README.md
new file mode 100644
index 0000000000..d1efb2cab3
--- /dev/null
+++ b/examples/mistral/README.md
@@ -0,0 +1,12 @@
+**Mistral 7B** is a language model with a total of 7.3 billion parameters, showcasing a notable performance across a variety of benchmarks.
+
+Fine Tune:
+```shell
+accelerate launch -m axolotl.cli.train examples/mistral/config.yml
+
+```
+
+If you run into CUDA OOM, use deepspeed with config zero2.json:
+```shell
+accelerate launch -m axolotl.cli.train examples/mistral/config.yml --deepspeed deepspeed/zero2.json
+```
From b2edaaeff6db708164c3e764bdd97e8f227c1b0f Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 28 Sep 2023 10:57:37 -0400
Subject: [PATCH 33/77] fix for flash attn w mistral w/o sammple packing (#648)
---
.../monkeypatch/mistral_attn_hijack_flash.py | 236 ++++++++++++++----
1 file changed, 188 insertions(+), 48 deletions(-)
diff --git a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
index f53d5d0071..21a6ee0842 100644
--- a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
@@ -2,13 +2,17 @@
# pylint: disable=duplicate-code
import logging
-import math
from typing import List, Optional, Tuple, Union
import torch
import transformers
from einops import rearrange
-from torch import nn
+from flash_attn.bert_padding import pad_input, unpad_input
+from flash_attn.flash_attn_interface import ( # pylint: disable=ungrouped-imports
+ flash_attn_kvpacked_func,
+ flash_attn_varlen_kvpacked_func,
+ flash_attn_varlen_qkvpacked_func,
+)
from transformers.modeling_outputs import BaseModelOutputWithPast
from transformers.models.mistral.modeling_mistral import (
MistralDecoderLayer as OriginalMistralDecoderLayer,
@@ -17,16 +21,6 @@
from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids
-try:
- from flash_attn.flash_attn_interface import ( # pylint: disable=ungrouped-imports
- flash_attn_varlen_qkvpacked_func,
- )
-except ImportError:
- from flash_attn.flash_attn_interface import (
- flash_attn_unpadded_qkvpacked_func as flash_attn_varlen_qkvpacked_func,
- )
-
-
LOG = logging.getLogger("axolotl.monkeypatch.mistral")
@@ -108,6 +102,15 @@ def flashattn_forward(
key_states = repeat_kv(key_states, self.num_key_value_groups)
value_states = repeat_kv(value_states, self.num_key_value_groups)
+ if self.training:
+ # during training q,k,v always have same seqlen
+ assert key_states.shape == query_states.shape
+ is_causal = True
+ else:
+ # turn off FA causal mask after first inference autoregressive iteration
+ # only on first autoregressive step q,k,v have same seqlen
+ is_causal = key_states.shape == query_states.shape
+
if cu_seqlens is not None and max_seqlen is not None and cu_seqlens.dim() == 1:
# special handling using sample packing
qkv = torch.stack(
@@ -120,46 +123,84 @@ def flashattn_forward(
qkv, cu_seqlens, max_seqlen, 0.0, softmax_scale=None, causal=True
)
output = rearrange(output, "(b s) ... -> b s ...", b=bsz)
- attn_output = output
- if attn_output.size() != (bsz, q_len, self.num_heads, self.head_dim):
- raise ValueError(
- f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is"
- f" {attn_output.size()}"
- )
- attn_output = rearrange(attn_output, "b s h d -> b s (h d)")
- attn_weights = None
+ elif query_states.shape == key_states.shape:
+ query_states = query_states.transpose(1, 2)
+ key_states = key_states.transpose(1, 2)
+ value_states = value_states.transpose(1, 2)
+ qkv_unpad, cu_seqlens_q, max_seqlen_q, _, output_pad_fn = generate_qkv(
+ query_states,
+ key_states,
+ value_states,
+ qkvpacked=True,
+ # We have disabled _prepare_decoder_attention_mask in LlamaModel
+ # the attention_mask should be the same as the key_padding_mask
+ key_padding_mask=attention_mask,
+ query_padding_mask=attention_mask[:, -query_states.size(1) :]
+ if attention_mask is not None
+ else None,
+ )
+ output_unpad = flash_attn_varlen_qkvpacked_func(
+ qkv_unpad,
+ cu_seqlens_q,
+ max_seqlen_q,
+ 0.0,
+ softmax_scale=None,
+ causal=is_causal,
+ )
+ output = output_pad_fn(output_unpad)
else:
- attn_weights = torch.matmul(
- query_states, key_states.transpose(2, 3)
- ) / math.sqrt(self.head_dim)
- if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
- raise ValueError(
- f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
- f" {attn_weights.size()}"
+ query_states = query_states.transpose(1, 2)
+ key_states = key_states.transpose(1, 2)
+ value_states = value_states.transpose(1, 2)
+ if attention_mask is None or attention_mask.all().item():
+ output = flash_attn_kvpacked_func(
+ query_states,
+ torch.stack([key_states, value_states], 2),
+ causal=is_causal,
)
-
- if attention_mask is not None:
- if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
- raise ValueError(
- f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
- )
-
- attn_weights = attn_weights + attention_mask
-
- # upcast attention to fp32
- attn_weights = nn.functional.softmax(
- attn_weights, dim=-1, dtype=torch.float32
- ).to(query_states.dtype)
- attn_output = torch.matmul(attn_weights, value_states)
-
- if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
- raise ValueError(
- f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
- f" {attn_output.size()}"
+ else:
+ ( # pylint: disable=unbalanced-tuple-unpacking
+ q_unpad,
+ kv_unpad,
+ cu_seqlens_q,
+ cu_seqlens_k,
+ max_seqlen_q,
+ max_seqlen_k,
+ _,
+ _,
+ output_pad_fn,
+ ) = generate_qkv(
+ query_states,
+ key_states,
+ value_states,
+ kvpacked=True,
+ key_padding_mask=attention_mask,
+ query_padding_mask=attention_mask[:, -query_states.size(1) :]
+ if attention_mask is not None
+ else None,
+ )
+ if q_unpad.dtype != kv_unpad.dtype:
+ kv_unpad = kv_unpad.to(q_unpad.dtype)
+ output_unpad = flash_attn_varlen_kvpacked_func(
+ q_unpad,
+ kv_unpad,
+ cu_seqlens_q,
+ cu_seqlens_k,
+ max_seqlen_q,
+ max_seqlen_k,
+ 0.0,
+ softmax_scale=None,
+ causal=is_causal,
)
+ output = output_pad_fn(output_unpad)
- attn_output = attn_output.transpose(1, 2).contiguous()
- attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+ attn_output = output
+ if attn_output.size() != (bsz, q_len, self.num_heads, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+ attn_output = rearrange(attn_output, "b s h d -> b s (h d)")
attn_output = self.o_proj(attn_output)
@@ -169,6 +210,105 @@ def flashattn_forward(
return attn_output, attn_weights, past_key_value
+# based on https://github.com/Dao-AILab/flash-attention/blob/364a5b/tests/test_flash_attn.py#L38
+def generate_qkv(
+ q,
+ k,
+ v,
+ query_padding_mask=None,
+ key_padding_mask=None,
+ kvpacked=False,
+ qkvpacked=False,
+): # pylint: disable=invalid-name,unnecessary-lambda-assignment
+ """
+ Arguments:
+ q: (batch_size, seqlen_q, nheads, d)
+ k: (batch_size, seqlen_k, nheads_k, d)
+ v: (batch_size, seqlen_k, nheads_k, d)
+ query_padding_mask: (batch_size, seqlen), bool
+ key_padding_mask: (batch_size, seqlen), bool
+ """
+ assert not (kvpacked and qkvpacked)
+ batch_size, seqlen_q, nheads, d = q.shape
+ _, seqlen_k, nheads_k, _ = k.shape
+ assert k.shape == (batch_size, seqlen_k, nheads_k, d)
+ assert v.shape == (batch_size, seqlen_k, nheads_k, d)
+
+ if query_padding_mask is not None:
+ q_unpad, indices_q, cu_seqlens_q, max_seqlen_q = unpad_input(
+ q, query_padding_mask
+ )
+
+ output_pad_fn = lambda output_unpad: pad_input( # noqa: E731
+ output_unpad, indices_q, batch_size, seqlen_q
+ )
+
+ else:
+ q_unpad = rearrange(q, "b s h d -> (b s) h d")
+ cu_seqlens_q = torch.arange(
+ 0,
+ (batch_size + 1) * seqlen_q,
+ step=seqlen_q,
+ dtype=torch.int32,
+ device=q_unpad.device,
+ )
+ max_seqlen_q = seqlen_q
+
+ output_pad_fn = lambda output_unpad: rearrange( # noqa: E731
+ output_unpad, "(b s) h d -> b s h d", b=batch_size
+ )
+
+ if key_padding_mask is not None:
+ k_unpad, _, cu_seqlens_k, max_seqlen_k = unpad_input(k, key_padding_mask)
+ v_unpad, _, _, _ = unpad_input(v, key_padding_mask)
+ else:
+ k_unpad = rearrange(k, "b s h d -> (b s) h d")
+ v_unpad = rearrange(v, "b s h d -> (b s) h d")
+ cu_seqlens_k = torch.arange(
+ 0,
+ (batch_size + 1) * seqlen_k,
+ step=seqlen_k,
+ dtype=torch.int32,
+ device=k_unpad.device,
+ )
+ max_seqlen_k = seqlen_k
+
+ if qkvpacked:
+ assert nheads == nheads_k
+ qkv_unpad = torch.stack([q_unpad, k_unpad, v_unpad], dim=1)
+ qkv = torch.stack([q, k, v], dim=2)
+ return (qkv_unpad, cu_seqlens_q, max_seqlen_q, qkv, output_pad_fn)
+
+ if kvpacked:
+ kv_unpad = torch.stack([k_unpad, v_unpad], dim=1)
+ kv = torch.stack([k, v], dim=2)
+ return (
+ q_unpad,
+ kv_unpad,
+ cu_seqlens_q,
+ cu_seqlens_k,
+ max_seqlen_q,
+ max_seqlen_k,
+ q,
+ kv,
+ output_pad_fn,
+ )
+
+ return (
+ q_unpad,
+ k_unpad,
+ v_unpad,
+ cu_seqlens_q,
+ cu_seqlens_k,
+ max_seqlen_q,
+ max_seqlen_k,
+ q,
+ k,
+ v,
+ output_pad_fn,
+ )
+
+
def mistral_model_forward(
self,
input_ids: torch.LongTensor = None,
From 8662e8ffe89d11569c412c65d8b4785ae87d326f Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 28 Sep 2023 12:21:51 -0400
Subject: [PATCH 34/77] don't strip the prompt for check since we don't strip
to tokenize anymore (#650)
---
src/axolotl/prompt_tokenizers.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py
index 31bb73c2f7..1b39336642 100644
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -81,7 +81,7 @@ def _tokenize(
self, prompt: str, add_eos_token: bool = True, strip_bos_token: bool = False
) -> BatchEncoding:
result: BatchEncoding
- if not prompt.strip():
+ if not prompt:
LOG.warning("Empty text requested for tokenization.")
result = BatchEncoding(data={"input_ids": [], "attention_mask": []})
else:
From 409ca0f21cc5811a0956ffe0bffa04fe9e8d45e7 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 28 Sep 2023 20:14:14 -0400
Subject: [PATCH 35/77] add support for defined train split (#654)
---
README.md | 10 +++++
src/axolotl/prompt_strategies/context_qa.py | 41 +++++++++++++++++++++
src/axolotl/utils/data.py | 10 +++++
3 files changed, 61 insertions(+)
diff --git a/README.md b/README.md
index 3a1eb0cd7a..3f1767ea48 100644
--- a/README.md
+++ b/README.md
@@ -250,6 +250,10 @@ Have dataset(s) in one of the following format (JSONL recommended):
```json
{"article": "...", "question": "...", "answer": "..."}
```
+- `context_qa.load_v2`: in context question answering (alternate)
+ ```json
+ {"context": "...", "question": "...", "answer": "..."}
+ ```
- `context_qa.load_404`: in context question answering from an article, with default response for no answer from context
```json
{"article": "...", "unanswerable_question": "..."}
@@ -356,6 +360,12 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
- path: data.jsonl # or json
ds_type: json # see other options below
type: alpaca
+
+ # dataset with splits, but no train split
+ dataset:
+ - path: knowrohit07/know_sql
+ type: context_qa.load_v2
+ train_on_split: validation
```
- loading
diff --git a/src/axolotl/prompt_strategies/context_qa.py b/src/axolotl/prompt_strategies/context_qa.py
index f7027c7e24..f87dd8b5cd 100644
--- a/src/axolotl/prompt_strategies/context_qa.py
+++ b/src/axolotl/prompt_strategies/context_qa.py
@@ -24,6 +24,15 @@ def load(tokenizer, cfg):
)
+def load_v2(tokenizer, cfg):
+ return ContextQaV2PromptTokenizingStrategy(
+ ContextV2Prompter(),
+ tokenizer,
+ cfg.train_on_inputs,
+ cfg.sequence_len,
+ )
+
+
class AlpacaContextPrompter(AlpacaPrompter):
"""
Customized system prompted for concise QA
@@ -50,6 +59,38 @@ def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
)
+class ContextQaV2PromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
+ """
+ Tokenization Strategy to combine in-context article with a question and answer
+ """
+
+ def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
+ return (
+ "Context: "
+ + prompt["context"]
+ + "\nQuestion: "
+ + prompt["question"]
+ + "\n",
+ "",
+ "Answer: " + prompt["answer"],
+ )
+
+
+class ContextV2Prompter(AlpacaPrompter):
+ """
+ Customized system prompted for concise QA
+ """
+
+ system_prompt = ""
+ system_no_input_prompt = ""
+
+ def match_prompt_style(self):
+ # pylint: disable=duplicate-code
+ self.turn_format = "{instruction}\n{input}"
+ self.turn_no_input_format = "{instruction}"
+ self.system_format = "{system}"
+
+
class AlpacaMissingInfoContextPromptTokenizingStrategy(
InstructionPromptTokenizingStrategy
):
diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
index 2713796774..34a5baaffd 100644
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -247,6 +247,16 @@ def for_d_in_datasets(dataset_configs):
d_prompt_style = d_type_split[1] if len(d_type_split) > 1 else None
if "train" in ds:
ds = ds["train"]
+ elif (
+ isinstance(ds, DatasetDict)
+ and d.train_on_split
+ and d.train_on_split in ds
+ ):
+ ds = ds[d.train_on_split]
+ elif isinstance(ds, DatasetDict):
+ raise ValueError(
+ f"no train split found for dataset {d.path}, you may specify a split with 'train_on_split: `"
+ )
if (
"input_ids" in ds.features
and "attention_mask" in ds.features
From 590d6032fdcbe915a1f7c7d8456c28db9f2d3861 Mon Sep 17 00:00:00 2001
From: ich <25612731+ein-ich@users.noreply.github.com>
Date: Fri, 29 Sep 2023 04:54:10 +0200
Subject: [PATCH 36/77] Fix bug when using pretokenized datasets (#652)
* fix pretokenized datasets readme
* check if dataset type is not set to handle pretokenized datasets
---
README.md | 2 +-
src/axolotl/utils/config.py | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 3f1767ea48..593eff6c31 100644
--- a/README.md
+++ b/README.md
@@ -317,7 +317,7 @@ Using file:
#### How to use your custom pretokenized dataset
- Do not pass a `type:`
-- Dataset must contain `input_ids`, `attention_mask`, `labels` in columns
+- Columns in Dataset must be exactly `input_ids`, `attention_mask`, `labels`
### Config
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index 67f9490c47..5a034ea0f2 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -293,6 +293,8 @@ def validate_config(cfg):
if cfg.datasets:
for idx, ds_cfg in enumerate(cfg.datasets):
+ if not ds_cfg.type:
+ continue
if ds_cfg.type == "sharegpt:chat":
LOG.warning(
PendingDeprecationWarning(
From 9ec20777ba3a6320875b36d0ce4de04a65c57797 Mon Sep 17 00:00:00 2001
From: Kyle Corbitt
Date: Thu, 28 Sep 2023 21:22:22 -0700
Subject: [PATCH 37/77] Make dataset_processes configurable (#651)
I'm using the Axolotl script to train models on https://modal.com serverless GPUs. Unfortunately, their environment seems to have some kind of bug where if I try to run `datasets.filter` with too high a `num_proc`, it throws an error and dies.
This PR adds a new configuration option `dataset_processes`, which lets you explicitly set the number of processes used to map/filter the dataset. If not included, this defaults to the current behavior of setting that to `os.cpu_count()`.
---
README.md | 3 +++
src/axolotl/utils/config.py | 2 ++
src/axolotl/utils/trainer.py | 16 +++++++++++-----
3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 593eff6c31..60013df938 100644
--- a/README.md
+++ b/README.md
@@ -487,6 +487,9 @@ datasets:
dataset_prepared_path: data/last_run_prepared
# push prepared dataset to hub
push_dataset_to_hub: # repo path
+# The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()`
+# if not set.
+dataset_processes: # defaults to os.cpu_count() if not set
# push checkpoints to hub
hub_model_id: # repo path to push finetuned model
# how to push checkpoints to hub
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index 5a034ea0f2..a8c41d95b0 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -75,6 +75,8 @@ def normalize_config(cfg):
else:
cfg.torch_dtype = torch.float32
+ cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()
+
model_config = load_model_config(cfg)
cfg.model_config_type = model_config.model_type
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index 3c75e4ec53..80ee5c8c61 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -400,19 +400,25 @@ def disable_datasets_caching():
def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer):
drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len)
with zero_first(is_main_process()):
- train_dataset = train_dataset.filter(drop_long, num_proc=os.cpu_count())
+ train_dataset = train_dataset.filter(drop_long, num_proc=cfg.dataset_processes)
if eval_dataset:
- eval_dataset = eval_dataset.filter(drop_long, num_proc=os.cpu_count())
+ eval_dataset = eval_dataset.filter(
+ drop_long, num_proc=cfg.dataset_processes
+ )
if cfg.group_by_length:
- train_dataset = train_dataset.map(add_length, num_proc=os.cpu_count())
+ train_dataset = train_dataset.map(
+ add_length, num_proc=cfg.dataset_processes
+ )
if cfg.sample_packing:
- train_dataset = train_dataset.map(add_position_ids, num_proc=os.cpu_count())
+ train_dataset = train_dataset.map(
+ add_position_ids, num_proc=cfg.dataset_processes
+ )
if cfg.eval_sample_packing is not False:
if eval_dataset:
eval_dataset = eval_dataset.map(
- add_position_ids, num_proc=os.cpu_count()
+ add_position_ids, num_proc=cfg.dataset_processes
)
# Phi doesn't want the attention_mask feature when training
From 5b0bc48fbc52f3053806bd47159f110ee99c5df5 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 29 Sep 2023 00:22:40 -0400
Subject: [PATCH 38/77] add mistral e2e tests (#649)
* mistral e2e tests
* make sure to enable flash attention for the e2e tests
* use latest transformers full sha
* uninstall first
---
.github/workflows/tests.yml | 1 +
requirements.txt | 2 +-
tests/e2e/test_mistral.py | 208 ++++++++++++++++++++++++++++++++++++
3 files changed, 210 insertions(+), 1 deletion(-)
create mode 100644 tests/e2e/test_mistral.py
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b4637fd67f..419382df74 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -69,6 +69,7 @@ jobs:
- name: Install dependencies
run: |
+ pip3 uninstall -y transformers accelerate
pip3 install -U -e .[flash-attn]
pip3 install -r requirements-tests.txt
diff --git a/requirements.txt b/requirements.txt
index 18659daec5..b02e656eb2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ torch==2.0.1
auto-gptq
packaging
peft @ git+https://github.com/huggingface/peft.git
-transformers @ git+https://github.com/huggingface/transformers.git@78dd120
+transformers @ git+https://github.com/huggingface/transformers.git@5e11d72d4d0939138fbabfebe9a69d2061519547
bitsandbytes>=0.41.1
accelerate @ git+https://github.com/huggingface/accelerate@80da9cfb09bb3cc9f1b385cb55d6b90d025a5fd9
deepspeed
diff --git a/tests/e2e/test_mistral.py b/tests/e2e/test_mistral.py
new file mode 100644
index 0000000000..4212d36408
--- /dev/null
+++ b/tests/e2e/test_mistral.py
@@ -0,0 +1,208 @@
+"""
+E2E tests for lora llama
+"""
+
+import logging
+import os
+import tempfile
+import unittest
+from pathlib import Path
+
+from transformers.utils import is_torch_bf16_gpu_available
+
+from axolotl.cli import load_datasets
+from axolotl.common.cli import TrainerCliArgs
+from axolotl.train import train
+from axolotl.utils.config import normalize_config
+from axolotl.utils.dict import DictDefault
+
+LOG = logging.getLogger("axolotl.tests.e2e")
+os.environ["WANDB_DISABLED"] = "true"
+
+
+class TestMistral(unittest.TestCase):
+ """
+ Test case for Llama models using LoRA
+ """
+
+ def test_lora(self):
+ # pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
+ cfg = DictDefault(
+ {
+ "base_model": "openaccess-ai-collective/tiny-mistral",
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
+ "flash_attention": True,
+ "sequence_len": 1024,
+ "load_in_8bit": True,
+ "adapter": "lora",
+ "lora_r": 32,
+ "lora_alpha": 64,
+ "lora_dropout": 0.05,
+ "lora_target_linear": True,
+ "val_set_size": 0.1,
+ "special_tokens": {
+ "unk_token": "",
+ "bos_token": "",
+ "eos_token": "",
+ },
+ "datasets": [
+ {
+ "path": "mhenrichsen/alpaca_2k_test",
+ "type": "alpaca",
+ },
+ ],
+ "num_epochs": 2,
+ "micro_batch_size": 2,
+ "gradient_accumulation_steps": 1,
+ "output_dir": output_dir,
+ "learning_rate": 0.00001,
+ "optimizer": "adamw_torch",
+ "lr_scheduler": "cosine",
+ "max_steps": 20,
+ "save_steps": 10,
+ "eval_steps": 10,
+ }
+ )
+ normalize_config(cfg)
+ cli_args = TrainerCliArgs()
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "adapter_model.bin").exists()
+
+ def test_lora_packing(self):
+ # pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
+ cfg = DictDefault(
+ {
+ "base_model": "openaccess-ai-collective/tiny-mistral",
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
+ "flash_attention": True,
+ "sample_packing": True,
+ "sequence_len": 1024,
+ "load_in_8bit": True,
+ "adapter": "lora",
+ "lora_r": 32,
+ "lora_alpha": 64,
+ "lora_dropout": 0.05,
+ "lora_target_linear": True,
+ "val_set_size": 0.1,
+ "special_tokens": {
+ "unk_token": "",
+ "bos_token": "",
+ "eos_token": "",
+ },
+ "datasets": [
+ {
+ "path": "mhenrichsen/alpaca_2k_test",
+ "type": "alpaca",
+ },
+ ],
+ "num_epochs": 2,
+ "micro_batch_size": 2,
+ "gradient_accumulation_steps": 1,
+ "output_dir": output_dir,
+ "learning_rate": 0.00001,
+ "optimizer": "adamw_torch",
+ "lr_scheduler": "cosine",
+ "max_steps": 20,
+ "save_steps": 10,
+ "eval_steps": 10,
+ }
+ )
+ normalize_config(cfg)
+ cli_args = TrainerCliArgs()
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "adapter_model.bin").exists()
+
+ def test_ft(self):
+ # pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
+ cfg = DictDefault(
+ {
+ "base_model": "openaccess-ai-collective/tiny-mistral",
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
+ "flash_attention": True,
+ "sequence_len": 1024,
+ "val_set_size": 0.1,
+ "special_tokens": {
+ "unk_token": "",
+ "bos_token": "",
+ "eos_token": "",
+ },
+ "datasets": [
+ {
+ "path": "mhenrichsen/alpaca_2k_test",
+ "type": "alpaca",
+ },
+ ],
+ "num_epochs": 2,
+ "micro_batch_size": 2,
+ "gradient_accumulation_steps": 1,
+ "output_dir": output_dir,
+ "learning_rate": 0.00001,
+ "optimizer": "adamw_torch",
+ "lr_scheduler": "cosine",
+ "max_steps": 20,
+ "save_steps": 10,
+ "eval_steps": 10,
+ }
+ )
+ if is_torch_bf16_gpu_available():
+ cfg.bf16 = True
+ else:
+ cfg.fp16 = True
+ normalize_config(cfg)
+ cli_args = TrainerCliArgs()
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "pytorch_model.bin").exists()
+
+ def test_ft_packing(self):
+ # pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
+ cfg = DictDefault(
+ {
+ "base_model": "openaccess-ai-collective/tiny-mistral",
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
+ "flash_attention": True,
+ "sample_packing": True,
+ "sequence_len": 1024,
+ "val_set_size": 0.1,
+ "special_tokens": {
+ "unk_token": "",
+ "bos_token": "",
+ "eos_token": "",
+ },
+ "datasets": [
+ {
+ "path": "mhenrichsen/alpaca_2k_test",
+ "type": "alpaca",
+ },
+ ],
+ "num_epochs": 2,
+ "micro_batch_size": 2,
+ "gradient_accumulation_steps": 1,
+ "output_dir": output_dir,
+ "learning_rate": 0.00001,
+ "optimizer": "adamw_torch",
+ "lr_scheduler": "cosine",
+ "max_steps": 20,
+ "save_steps": 10,
+ "eval_steps": 10,
+ }
+ )
+ if is_torch_bf16_gpu_available():
+ cfg.bf16 = True
+ else:
+ cfg.fp16 = True
+ normalize_config(cfg)
+ cli_args = TrainerCliArgs()
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "pytorch_model.bin").exists()
From a7e56d83c2c6cf35dee515ad92da46d887a60ffd Mon Sep 17 00:00:00 2001
From: Napuh <55241721+Napuh@users.noreply.github.com>
Date: Mon, 2 Oct 2023 14:40:05 +0200
Subject: [PATCH 39/77] removed duplicate on requirements.txt (#661)
---
requirements.txt | 1 -
1 file changed, 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index b02e656eb2..cf4ce1d832 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,6 @@ bitsandbytes>=0.41.1
accelerate @ git+https://github.com/huggingface/accelerate@80da9cfb09bb3cc9f1b385cb55d6b90d025a5fd9
deepspeed
addict
-evaluate
fire
PyYAML>=6.0
datasets
From f4868d733cf69d854f682d0e97e7c044841338e6 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Mon, 2 Oct 2023 08:43:40 -0400
Subject: [PATCH 40/77] make sure we also run CI tests when requirements.txt
changes (#663)
---
.github/workflows/tests.yml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 419382df74..04fe53c00a 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,9 +6,11 @@ on:
- "main"
paths:
- '**.py'
+ - 'requirements.txt'
pull_request:
paths:
- '**.py'
+ - 'requirements.txt'
workflow_dispatch:
jobs:
From e50a64e85ecd69b7a596eccbf9be90d72d0b02b6 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Mon, 2 Oct 2023 21:07:24 -0400
Subject: [PATCH 41/77] prepared dataset caching, other misc fixes (#665)
* prepared dataset caching, other misc fixes
* also don't load from disk cache unless explicit
---
examples/cerebras/qlora.yml | 2 +-
examples/code-llama/13b/lora.yml | 2 +-
examples/code-llama/13b/qlora.yml | 2 +-
examples/code-llama/34b/lora.yml | 2 +-
examples/code-llama/34b/qlora.yml | 2 +-
examples/code-llama/7b/lora.yml | 2 +-
examples/code-llama/7b/qlora.yml | 2 +-
examples/falcon/config-7b-lora.yml | 2 +-
examples/falcon/config-7b-qlora.yml | 2 +-
examples/falcon/config-7b.yml | 2 +-
examples/gptj/qlora.yml | 2 +-
examples/jeopardy-bot/config.yml | 2 +-
examples/llama-2/gptq-lora.yml | 2 +-
examples/llama-2/lora.yml | 2 +-
examples/llama-2/qlora.yml | 2 +-
examples/llama-2/relora.yml | 2 +-
examples/llama-2/tiny-llama.yml | 2 +-
examples/mistral/config.yml | 2 +-
examples/mpt-7b/config.yml | 2 +-
examples/openllama-3b/config.yml | 2 +-
examples/openllama-3b/lora.yml | 2 +-
examples/openllama-3b/qlora.yml | 2 +-
examples/phi/phi-ft.yml | 2 +-
examples/phi/phi-qlora.yml | 2 +-
examples/pythia-12b/config.yml | 2 +-
examples/pythia/lora.yml | 2 +-
examples/redpajama/config-3b.yml | 2 +-
examples/replit-3b/config-lora.yml | 2 +-
examples/xgen-7b/xgen-7b-8k-qlora.yml | 2 +-
src/axolotl/cli/__init__.py | 2 +-
src/axolotl/utils/data.py | 6 +++---
src/axolotl/utils/tokenization.py | 3 ++-
32 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml
index eec16193d6..a13517f3ef 100644
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -7,7 +7,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
adapter: qlora
lora_model_dir:
diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
index 071b2bc051..91807846b7 100644
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./lora-out
diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
index 3de4b3b4b0..9fa05ffab6 100644
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./qlora-out
diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
index e52c11181f..a342b6ebc5 100644
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./lora-out
diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
index 7e6da2e968..1501dd9a38 100644
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./qlora-out
diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
index 5e9d12e7c1..638dddc435 100644
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./lora-out
diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
index b24b5c1626..5b3b338222 100644
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./qlora-out
diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
index 738068a474..f45deb6437 100644
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -12,7 +12,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca:chat
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
adapter: lora
lora_model_dir:
diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
index 554081fcba..f593419651 100644
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -18,7 +18,7 @@ datasets:
data_files:
- Chain-of-Thought/formatted_cot_data/gsm8k_train.json
type: "alpaca:chat"
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
# enable QLoRA
adapter: qlora
diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
index 25e67a53b1..777a97b31f 100644
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -12,7 +12,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca:chat
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
adapter:
lora_model_dir:
diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml
index b41c946fc4..696747dfec 100644
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -7,7 +7,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
adapter: qlora
lora_model_dir:
diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml
index 1aaa1b5572..32e7a34ee5 100644
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -6,7 +6,7 @@ load_in_8bit: false
datasets:
- path: openaccess-ai-collective/jeopardy
type: jeopardy
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml
index 0d96e4610b..257433f26f 100644
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -15,7 +15,7 @@ hf_use_auth_token: true
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
adapter: lora
lora_model_dir:
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 106de6e320..8c0e3e910c 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./lora-out
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index 870c3fa848..b8209934cc 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./qlora-out
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index 2b1fad55d8..9f27cafeaa 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./relora-out
diff --git a/examples/llama-2/tiny-llama.yml b/examples/llama-2/tiny-llama.yml
index d556a6d719..0b56ea7d3e 100644
--- a/examples/llama-2/tiny-llama.yml
+++ b/examples/llama-2/tiny-llama.yml
@@ -12,7 +12,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./lora-out
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index d199f947be..e644bec507 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -11,7 +11,7 @@ strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
output_dir: ./out
diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml
index 0efd28ceea..8d9b429b1d 100644
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -6,7 +6,7 @@ load_in_8bit: false
datasets:
- path: vicgalle/alpaca-gpt4
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml
index 961aeabdaa..dd11d53b00 100644
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -9,7 +9,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml
index 17fa7fa8bd..fad3fb5511 100644
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -9,7 +9,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.02
adapter: lora
lora_model_dir:
diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml
index deba03fd54..80d4d727bd 100644
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -9,7 +9,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
adapter: qlora
lora_model_dir:
diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
index 9eb1080494..668eea317d 100644
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -13,7 +13,7 @@ datasets:
- path: garage-bAInd/Open-Platypus
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.05
output_dir: ./phi-sft-out
diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
index f3a06c39d5..a548b3f052 100644
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -13,7 +13,7 @@ datasets:
- path: garage-bAInd/Open-Platypus
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.05
output_dir: ./phi-sft-out
diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml
index dc06eb6b6d..4e0e1523a4 100644
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -10,7 +10,7 @@ device_map: auto
datasets:
- path: vicgalle/alpaca-gpt4
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.05
adapter:
lora_model_dir:
diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml
index dfe7ef846f..f9a83c059a 100644
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -4,7 +4,7 @@ load_in_8bit: true
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.05
adapter: lora
lora_model_dir:
diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml
index 10297d860b..97f31c87ac 100644
--- a/examples/redpajama/config-3b.yml
+++ b/examples/redpajama/config-3b.yml
@@ -7,7 +7,7 @@ load_in_8bit: false
datasets:
- path: vicgalle/alpaca-gpt4
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml
index 71a80d17df..d345e25a0b 100644
--- a/examples/replit-3b/config-lora.yml
+++ b/examples/replit-3b/config-lora.yml
@@ -5,7 +5,7 @@ load_in_8bit: false
datasets:
- path: vicgalle/alpaca-gpt4
type: alpaca
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.05
adapter: lora
lora_model_dir:
diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml
index 0f215afb94..352dcb610a 100644
--- a/examples/xgen-7b/xgen-7b-8k-qlora.yml
+++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml
@@ -16,7 +16,7 @@ datasets:
data_files:
- openassistant_best_replies_train.jsonl
type: "completion"
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
val_set_size: 0.01
# enable QLoRA
adapter: qlora
diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py
index c3b580391a..c21d931705 100644
--- a/src/axolotl/cli/__init__.py
+++ b/src/axolotl/cli/__init__.py
@@ -51,7 +51,7 @@ def print_axolotl_text_art(suffix=None):
def get_multi_line_input() -> Optional[str]:
- print("Give me an instruction (Ctrl + D to finish): ")
+ print("Give me an instruction (Ctrl + D to submit): ")
instruction = ""
for line in sys.stdin:
instruction += line # pylint: disable=consider-using-join
diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
index 34a5baaffd..bdad21fb11 100644
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -122,7 +122,7 @@ def load_tokenized_prepared_datasets(
if dataset:
...
- elif any(prepared_ds_path.glob("*")):
+ elif cfg.dataset_prepared_path and any(prepared_ds_path.glob("*")):
LOG.info(f"Loading prepared dataset from disk at {prepared_ds_path}...")
dataset = load_from_disk(str(prepared_ds_path))
LOG.info("Prepared dataset loaded from disk...")
@@ -357,7 +357,7 @@ def for_d_in_datasets(dataset_configs):
if len(datasets) > 1:
LOG.info("shuffle merged datasets")
dataset = dataset.shuffle(seed=seed)
- if cfg.local_rank == 0:
+ if cfg.local_rank == 0 and cfg.dataset_prepared_path:
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
dataset.save_to_disk(prepared_ds_path)
if cfg.push_dataset_to_hub:
@@ -425,7 +425,7 @@ def load_prepare_datasets(
if dataset:
...
- elif any(prepared_ds_path.glob("*")):
+ elif cfg.dataset_prepared_path and any(prepared_ds_path.glob("*")):
LOG.info(
f"Loading prepared packed dataset from disk at {prepared_ds_path}..."
)
diff --git a/src/axolotl/utils/tokenization.py b/src/axolotl/utils/tokenization.py
index 2b7608b636..deb9bb00ec 100644
--- a/src/axolotl/utils/tokenization.py
+++ b/src/axolotl/utils/tokenization.py
@@ -31,7 +31,8 @@ def check_example_labels(example, tokenizer, text_only=False):
)
colored_tokens.append(colored_token)
- LOG.info(" ".join(colored_tokens))
+ delimiter = "" if text_only else " "
+ LOG.info(delimiter.join(colored_tokens))
LOG.info("\n\n\n")
print(" ".join(colored_tokens))
From f34648c8b9dc254034926a42edefe4ae28a43855 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Mon, 2 Oct 2023 21:07:41 -0400
Subject: [PATCH 42/77] remove patch fix for phi (#664)
---
src/axolotl/utils/models.py | 15 ---------------
1 file changed, 15 deletions(-)
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 63e34293ee..3287c0ee93 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -1,5 +1,4 @@
"""Module for models and model loading"""
-import importlib
import logging
import math
import os
@@ -176,20 +175,6 @@ def load_model(
LOG.info("patching _expand_mask")
hijack_expand_mask()
- # special handling b/c remote MixFormers code doesn't have _no_split_modules set
- if (
- "MixFormerSequentialConfig" in model_config.__class__.__name__
- and cfg.model_type == "AutoModelForCausalLM"
- ):
- module_name = model_config.__class__.__module__.replace(
- ".configuration_mixformer_sequential", ".modeling_mixformer_sequential"
- )
- modeling_phi = importlib.import_module(module_name)
- # pylint:disable=protected-access
- modeling_phi.MixFormerSequentialForCausalLM._no_split_modules = [
- "ParallelBlock"
- ]
-
model_kwargs = {}
if cfg.model_revision:
model_kwargs["revision"] = cfg.model_revision
From 2642caedf2b61c32a1ba5a2b9307a743131e3fa6 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Mon, 2 Oct 2023 21:08:07 -0400
Subject: [PATCH 43/77] refactor to set eval_batch_size earlier if unset, so we
can warn if mismatched (#662)
---
README.md | 2 +-
src/axolotl/utils/config.py | 7 +++++++
src/axolotl/utils/trainer.py | 4 +---
3 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 60013df938..c2d8e7d8c6 100644
--- a/README.md
+++ b/README.md
@@ -571,7 +571,7 @@ torch_compile_backend: # Optional[str]
# training hyperparameters
gradient_accumulation_steps: 1
micro_batch_size: 2
-eval_batch_size: 2
+eval_batch_size:
num_epochs: 3
warmup_steps: 100
learning_rate: 0.00003
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index a8c41d95b0..9503d838cb 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -49,6 +49,8 @@ def normalize_config(cfg):
cfg.batch_size = (
cfg.batch_size or cfg.micro_batch_size * cfg.gradient_accumulation_steps
)
+ if cfg.eval_batch_size is None:
+ cfg.eval_batch_size = cfg.micro_batch_size
cfg.world_size = int(os.environ.get("WORLD_SIZE", 1))
cfg.local_rank = int(os.environ.get("LOCAL_RANK", 0))
cfg.eval_table_size = cfg.eval_table_size or 0
@@ -157,6 +159,11 @@ def validate_config(cfg):
"batch_size is not recommended. Please use gradient_accumulation_steps instead.",
"To calculate the equivalent gradient_accumulation_steps, divide batch_size / micro_batch_size / number of gpus.",
)
+ if cfg.eval_batch_size != cfg.micro_batch_size:
+ LOG.warning(
+ "eval_batch_size != micro_batch_size. This can lead to VRAM instability."
+ )
+
if cfg.load_4bit:
raise ValueError("cfg.load_4bit parameter has been deprecated")
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index 80ee5c8c61..a10a2b0e77 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -668,9 +668,7 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
max_steps=total_num_steps if cfg.max_steps else -1,
max_seq_length=cfg.sequence_len,
per_device_train_batch_size=cfg.micro_batch_size,
- per_device_eval_batch_size=cfg.eval_batch_size
- if cfg.eval_batch_size is not None
- else cfg.micro_batch_size,
+ per_device_eval_batch_size=cfg.eval_batch_size,
gradient_accumulation_steps=cfg.gradient_accumulation_steps,
eval_accumulation_steps=cfg.gradient_accumulation_steps,
num_train_epochs=cfg.num_epochs,
From 90e0d673f76f84478856434deb6024c5c869a5ad Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Tue, 3 Oct 2023 23:38:42 +0900
Subject: [PATCH 44/77] Feat: Add config yaml to section for reprod in
bug-report.yaml (#667)
* Update bug-report.yaml
* Update bug-report.yaml
* Update bug-report.yaml
---
.github/ISSUE_TEMPLATE/bug-report.yaml | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yaml b/.github/ISSUE_TEMPLATE/bug-report.yaml
index cca243fdf2..5cf332587a 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yaml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yaml
@@ -53,6 +53,13 @@ body:
validations:
required: true
+ - type: textarea
+ id: config
+ attributes:
+ label: Config yaml
+ description: |
+ Please attach the config yaml!
+
- type: textarea
id: possible-solution
attributes:
From 697c50d40872436c5f01216b264bea558eddcd76 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Wed, 4 Oct 2023 20:40:47 +0900
Subject: [PATCH 45/77] Feat: Allow usage of native Mistral FA when no
sample_packing (#669)
* Allow usage of native Mistral FA when no sample_packing
* fix: do not apply custom patch when sample_pack off
* chore: lint
* chore: pin transformer to v4.35.0.dev0
* fix: split sample_packing to separate test
---
requirements.txt | 2 +-
src/axolotl/utils/models.py | 8 +-
tests/e2e/test_mistral.py | 92 ---------------------
tests/e2e/test_mistral_samplepack.py | 118 +++++++++++++++++++++++++++
4 files changed, 125 insertions(+), 95 deletions(-)
create mode 100644 tests/e2e/test_mistral_samplepack.py
diff --git a/requirements.txt b/requirements.txt
index cf4ce1d832..554f2ec69a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ torch==2.0.1
auto-gptq
packaging
peft @ git+https://github.com/huggingface/peft.git
-transformers @ git+https://github.com/huggingface/transformers.git@5e11d72d4d0939138fbabfebe9a69d2061519547
+transformers @ git+https://github.com/huggingface/transformers.git@bd6205919aad4d3a2300a39a98a642f1cc3a5348
bitsandbytes>=0.41.1
accelerate @ git+https://github.com/huggingface/accelerate@80da9cfb09bb3cc9f1b385cb55d6b90d025a5fd9
deepspeed
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 3287c0ee93..6e520bd50e 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -149,7 +149,7 @@ def load_model(
# Note: This might overwrite previous additional_special_tokens
tokenizer.add_special_tokens({"additional_special_tokens": [MEM_TOKEN]})
- if cfg.is_mistral_derived_model and cfg.flash_attention:
+ if cfg.is_mistral_derived_model and cfg.flash_attention and cfg.sample_packing:
from axolotl.monkeypatch.mistral_attn_hijack_flash import (
replace_mistral_attn_with_flash_attn,
)
@@ -200,7 +200,11 @@ def load_model(
)
# sample packing uses custom FA2 patch
if cfg.flash_attention and not cfg.sample_packing:
- if cfg.is_llama_derived_model or cfg.is_falcon_derived_model:
+ if (
+ cfg.is_llama_derived_model
+ or cfg.is_falcon_derived_model
+ or cfg.is_mistral_derived_model
+ ):
model_kwargs["use_flash_attention_2"] = True
try:
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
diff --git a/tests/e2e/test_mistral.py b/tests/e2e/test_mistral.py
index 4212d36408..f3098f0583 100644
--- a/tests/e2e/test_mistral.py
+++ b/tests/e2e/test_mistral.py
@@ -71,53 +71,6 @@ def test_lora(self):
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (Path(output_dir) / "adapter_model.bin").exists()
- def test_lora_packing(self):
- # pylint: disable=duplicate-code
- output_dir = tempfile.mkdtemp()
- cfg = DictDefault(
- {
- "base_model": "openaccess-ai-collective/tiny-mistral",
- "base_model_config": "openaccess-ai-collective/tiny-mistral",
- "flash_attention": True,
- "sample_packing": True,
- "sequence_len": 1024,
- "load_in_8bit": True,
- "adapter": "lora",
- "lora_r": 32,
- "lora_alpha": 64,
- "lora_dropout": 0.05,
- "lora_target_linear": True,
- "val_set_size": 0.1,
- "special_tokens": {
- "unk_token": "",
- "bos_token": "",
- "eos_token": "",
- },
- "datasets": [
- {
- "path": "mhenrichsen/alpaca_2k_test",
- "type": "alpaca",
- },
- ],
- "num_epochs": 2,
- "micro_batch_size": 2,
- "gradient_accumulation_steps": 1,
- "output_dir": output_dir,
- "learning_rate": 0.00001,
- "optimizer": "adamw_torch",
- "lr_scheduler": "cosine",
- "max_steps": 20,
- "save_steps": 10,
- "eval_steps": 10,
- }
- )
- normalize_config(cfg)
- cli_args = TrainerCliArgs()
- dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
-
- train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
- assert (Path(output_dir) / "adapter_model.bin").exists()
-
def test_ft(self):
# pylint: disable=duplicate-code
output_dir = tempfile.mkdtemp()
@@ -161,48 +114,3 @@ def test_ft(self):
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
assert (Path(output_dir) / "pytorch_model.bin").exists()
-
- def test_ft_packing(self):
- # pylint: disable=duplicate-code
- output_dir = tempfile.mkdtemp()
- cfg = DictDefault(
- {
- "base_model": "openaccess-ai-collective/tiny-mistral",
- "base_model_config": "openaccess-ai-collective/tiny-mistral",
- "flash_attention": True,
- "sample_packing": True,
- "sequence_len": 1024,
- "val_set_size": 0.1,
- "special_tokens": {
- "unk_token": "",
- "bos_token": "",
- "eos_token": "",
- },
- "datasets": [
- {
- "path": "mhenrichsen/alpaca_2k_test",
- "type": "alpaca",
- },
- ],
- "num_epochs": 2,
- "micro_batch_size": 2,
- "gradient_accumulation_steps": 1,
- "output_dir": output_dir,
- "learning_rate": 0.00001,
- "optimizer": "adamw_torch",
- "lr_scheduler": "cosine",
- "max_steps": 20,
- "save_steps": 10,
- "eval_steps": 10,
- }
- )
- if is_torch_bf16_gpu_available():
- cfg.bf16 = True
- else:
- cfg.fp16 = True
- normalize_config(cfg)
- cli_args = TrainerCliArgs()
- dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
-
- train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
- assert (Path(output_dir) / "pytorch_model.bin").exists()
diff --git a/tests/e2e/test_mistral_samplepack.py b/tests/e2e/test_mistral_samplepack.py
new file mode 100644
index 0000000000..623d20b0c2
--- /dev/null
+++ b/tests/e2e/test_mistral_samplepack.py
@@ -0,0 +1,118 @@
+"""
+E2E tests for lora llama
+"""
+
+import logging
+import os
+import tempfile
+import unittest
+from pathlib import Path
+
+from transformers.utils import is_torch_bf16_gpu_available
+
+from axolotl.cli import load_datasets
+from axolotl.common.cli import TrainerCliArgs
+from axolotl.train import train
+from axolotl.utils.config import normalize_config
+from axolotl.utils.dict import DictDefault
+
+LOG = logging.getLogger("axolotl.tests.e2e")
+os.environ["WANDB_DISABLED"] = "true"
+
+
+class TestMistral(unittest.TestCase):
+ """
+ Test case for Llama models using LoRA
+ """
+
+ def test_lora_packing(self):
+ # pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
+ cfg = DictDefault(
+ {
+ "base_model": "openaccess-ai-collective/tiny-mistral",
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
+ "flash_attention": True,
+ "sample_packing": True,
+ "sequence_len": 1024,
+ "load_in_8bit": True,
+ "adapter": "lora",
+ "lora_r": 32,
+ "lora_alpha": 64,
+ "lora_dropout": 0.05,
+ "lora_target_linear": True,
+ "val_set_size": 0.1,
+ "special_tokens": {
+ "unk_token": "",
+ "bos_token": "",
+ "eos_token": "",
+ },
+ "datasets": [
+ {
+ "path": "mhenrichsen/alpaca_2k_test",
+ "type": "alpaca",
+ },
+ ],
+ "num_epochs": 2,
+ "micro_batch_size": 2,
+ "gradient_accumulation_steps": 1,
+ "output_dir": output_dir,
+ "learning_rate": 0.00001,
+ "optimizer": "adamw_torch",
+ "lr_scheduler": "cosine",
+ "max_steps": 20,
+ "save_steps": 10,
+ "eval_steps": 10,
+ }
+ )
+ normalize_config(cfg)
+ cli_args = TrainerCliArgs()
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "adapter_model.bin").exists()
+
+ def test_ft_packing(self):
+ # pylint: disable=duplicate-code
+ output_dir = tempfile.mkdtemp()
+ cfg = DictDefault(
+ {
+ "base_model": "openaccess-ai-collective/tiny-mistral",
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
+ "flash_attention": True,
+ "sample_packing": True,
+ "sequence_len": 1024,
+ "val_set_size": 0.1,
+ "special_tokens": {
+ "unk_token": "",
+ "bos_token": "",
+ "eos_token": "",
+ },
+ "datasets": [
+ {
+ "path": "mhenrichsen/alpaca_2k_test",
+ "type": "alpaca",
+ },
+ ],
+ "num_epochs": 2,
+ "micro_batch_size": 2,
+ "gradient_accumulation_steps": 1,
+ "output_dir": output_dir,
+ "learning_rate": 0.00001,
+ "optimizer": "adamw_torch",
+ "lr_scheduler": "cosine",
+ "max_steps": 20,
+ "save_steps": 10,
+ "eval_steps": 10,
+ }
+ )
+ if is_torch_bf16_gpu_available():
+ cfg.bf16 = True
+ else:
+ cfg.fp16 = True
+ normalize_config(cfg)
+ cli_args = TrainerCliArgs()
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+ assert (Path(output_dir) / "pytorch_model.bin").exists()
From e62d5901b55653e6a24ce28fc323669179e28e45 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Wed, 4 Oct 2023 20:41:26 +0900
Subject: [PATCH 46/77] chore: Clean up repetitive model kwargs (#670)
---
src/axolotl/utils/models.py | 19 +++++--------------
1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 6e520bd50e..6c8e7b8f0c 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -176,6 +176,10 @@ def load_model(
hijack_expand_mask()
model_kwargs = {}
+
+ model_kwargs["device_map"] = cfg.device_map
+ model_kwargs["torch_dtype"] = cfg.torch_dtype
+
if cfg.model_revision:
model_kwargs["revision"] = cfg.model_revision
if cfg.gptq:
@@ -206,6 +210,7 @@ def load_model(
or cfg.is_mistral_derived_model
):
model_kwargs["use_flash_attention_2"] = True
+
try:
if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
from transformers import LlamaForCausalLM
@@ -220,10 +225,8 @@ def load_model(
model = LlamaForCausalLM.from_pretrained(
base_model,
config=config,
- device_map=cfg.device_map,
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
- torch_dtype=cfg.torch_dtype,
**model_kwargs,
)
# elif model_type == "GPTNeoXForCausalLM" and cfg.flash_attention:
@@ -257,28 +260,22 @@ def load_model(
model = MixFormerSequentialForCausalLM.from_pretrained(
base_model,
- device_map=cfg.device_map,
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
- torch_dtype=cfg.torch_dtype,
**model_kwargs,
)
elif model_type and not cfg.trust_remote_code:
if cfg.gptq:
model = AutoModelForCausalLM.from_pretrained(
base_model,
- device_map=cfg.device_map,
- torch_dtype=cfg.torch_dtype,
trust_remote_code=cfg.trust_remote_code or False,
**model_kwargs,
)
else:
model = getattr(transformers, model_type).from_pretrained(
base_model,
- device_map=cfg.device_map,
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
- torch_dtype=cfg.torch_dtype,
trust_remote_code=cfg.trust_remote_code or False,
**model_kwargs,
)
@@ -307,8 +304,6 @@ def load_model(
model = AutoModelForCausalLM.from_pretrained(
base_model,
config=config,
- device_map=cfg.device_map,
- torch_dtype=cfg.torch_dtype,
trust_remote_code=cfg.trust_remote_code or False,
**model_kwargs,
)
@@ -316,10 +311,8 @@ def load_model(
model = AutoModelForCausalLM.from_pretrained(
base_model,
config=config,
- device_map=cfg.device_map,
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
- torch_dtype=cfg.torch_dtype,
trust_remote_code=cfg.trust_remote_code or False,
**model_kwargs,
)
@@ -330,10 +323,8 @@ def load_model(
LOG.exception(err)
model = AutoModelForCausalLM.from_pretrained(
base_model,
- device_map=cfg.device_map,
load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
- torch_dtype=cfg.torch_dtype,
trust_remote_code=cfg.trust_remote_code or False,
**model_kwargs,
)
From 43856c0a393fb7c4c44c56dc1a35ab7bc4bd52fd Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Wed, 4 Oct 2023 21:32:19 +0900
Subject: [PATCH 47/77] Fix(version): Update FA to work with Mistral SWA (#673)
---
requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index 554f2ec69a..cced386735 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,7 @@ addict
fire
PyYAML>=6.0
datasets
-flash-attn>=2.2.1
+flash-attn>=2.3.0
sentencepiece
wandb
einops
From e0b7eeabfd0f997d41964d1301900a6b277b4146 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Fri, 6 Oct 2023 03:50:49 +0900
Subject: [PATCH 48/77] Fix(tokenizer): Set rstrip,lstrip,norm to False (#678)
---
src/axolotl/utils/models.py | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 6c8e7b8f0c..7ef22cf5f0 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -11,6 +11,7 @@
from peft import PeftConfig, prepare_model_for_kbit_training
from peft.tuners.lora import QuantLinear
from transformers import ( # noqa: F401
+ AddedToken,
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
@@ -82,9 +83,16 @@ def load_tokenizer(cfg):
if cfg.special_tokens:
for k, val in cfg.special_tokens.items():
- tokenizer.add_special_tokens({k: val})
+ tokenizer.add_special_tokens(
+ {k: AddedToken(val, rstrip=False, lstrip=False, normalized=False)}
+ )
if cfg.tokens:
- tokenizer.add_tokens(list(cfg.tokens))
+ tokenizer.add_tokens(
+ [
+ AddedToken(token, rstrip=False, lstrip=False, normalized=False)
+ for token in cfg.tokens
+ ]
+ )
return tokenizer
From 69fac9a0205c3b4da5ef528585d6be9492673812 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Fri, 6 Oct 2023 03:56:18 +0900
Subject: [PATCH 49/77] Fix: Future deprecation warning with use_auth_token
(#680)
---
src/axolotl/utils/data.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
index bdad21fb11..bac7d96c9e 100644
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -114,7 +114,7 @@ def load_tokenized_prepared_datasets(
if cfg.push_dataset_to_hub:
dataset = load_dataset(
f"{cfg.push_dataset_to_hub}/{ds_hash}",
- use_auth_token=use_auth_token,
+ token=use_auth_token,
)
dataset = dataset["train"]
except Exception: # pylint: disable=broad-except # nosec
@@ -155,7 +155,7 @@ def for_d_in_datasets(dataset_configs):
d.path,
name=d.name,
streaming=True,
- use_auth_token=use_auth_token,
+ token=use_auth_token,
)
ds_from_hub = True
except FileNotFoundError:
@@ -202,7 +202,7 @@ def for_d_in_datasets(dataset_configs):
name=d.name,
streaming=False,
data_files=d.data_files,
- use_auth_token=use_auth_token,
+ token=use_auth_token,
)
else:
if isinstance(d.data_files, str):
@@ -417,7 +417,7 @@ def load_prepare_datasets(
)
dataset = load_dataset(
f"{cfg.push_dataset_to_hub}/{ds_hash}",
- use_auth_token=use_auth_token,
+ token=use_auth_token,
)
dataset = dataset["train"]
except Exception: # pylint: disable=broad-except # nosec
From 133e676bcc7ad961cbf78cb4823948fbc1287ef1 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Fri, 6 Oct 2023 04:09:14 +0900
Subject: [PATCH 50/77] Feat: Set WORKDIR to /workspace/axolotl (#679)
---
docker/Dockerfile | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 8608e2348b..7b121aaa7b 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -12,17 +12,18 @@ RUN apt-get update && \
WORKDIR /workspace
RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
+
+WORKDIR /workspace/axolotl
+
# If AXOLOTL_EXTRAS is set, append it in brackets
-RUN cd axolotl && \
- if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
+RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
pip install -e .[flash-attn,$AXOLOTL_EXTRAS]; \
else \
pip install -e .[flash-attn]; \
fi
# fix so that git fetch/pull from remote works
-RUN cd axolotl && \
- git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
+RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
git config --get remote.origin.fetch
# helper for huggingface-login cli
From eb480dfd68afced2c9a10632e4ff4c3cab762d78 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Fri, 6 Oct 2023 04:12:54 +0900
Subject: [PATCH 51/77] Fix: ValueError when FA + Mistral when
padding_side=right (#681)
* Fix: ValueError when FA + Mistral when padding_side=right
* fix: remove tokenizer class check
---
src/axolotl/utils/models.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 7ef22cf5f0..c05bccbf08 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -81,6 +81,9 @@ def load_tokenizer(cfg):
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ if cfg.is_mistral_derived_model:
+ tokenizer.padding_side = "left"
+
if cfg.special_tokens:
for k, val in cfg.special_tokens.items():
tokenizer.add_special_tokens(
From 2d60ba3a6ea4def14e6ab974299322a0bf90d5bb Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Thu, 5 Oct 2023 16:03:43 -0400
Subject: [PATCH 52/77] flash_attention + sample packing for stablelm 3b (#671)
* stablelm epoch fa patch
* is causal for fa
* working stablelm fa w packing
* chore: pre-commit linting
---
.../monkeypatch/btlm_attn_hijack_flash.py | 4 +-
.../monkeypatch/stablelm_attn_hijack_flash.py | 415 ++++++++++++++++++
src/axolotl/utils/models.py | 11 +
3 files changed, 429 insertions(+), 1 deletion(-)
create mode 100644 src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
diff --git a/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py b/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
index be5a705595..1275906804 100644
--- a/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
@@ -7,6 +7,7 @@
from typing import Optional, Tuple
import torch
+from accelerate import init_empty_weights
from flash_attn.flash_attn_interface import flash_attn_func
from transformers import AutoConfig, AutoModelForCausalLM
@@ -17,7 +18,8 @@ def replace_btlm_attn_with_flash_attn(model_name="cerebras/btlm-3b-8k-base"):
# this is a wonky hack to get the remotely loaded module
model_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
# we need to load the model here in order for modeling_btlm to be available
- AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+ with init_empty_weights():
+ AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
module_name = model_config.__class__.__module__.replace(
".configuration_btlm", ".modeling_btlm"
)
diff --git a/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py b/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
new file mode 100644
index 0000000000..0269f90157
--- /dev/null
+++ b/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
@@ -0,0 +1,415 @@
+# coding=utf-8
+# Copyright 2023 Stability AI, EleutherAI, and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This code is based off the following work:
+# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
+# https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt_neox/modeling_gpt_neox.py
+""" PyTorch StableLM Epoch model. """
+import importlib
+import math
+from typing import Optional, Tuple, Union
+
+import torch
+import torch.utils.checkpoint
+from accelerate import init_empty_weights
+from einops import rearrange
+from flash_attn.flash_attn_interface import ( # pylint: disable=ungrouped-imports
+ flash_attn_varlen_qkvpacked_func,
+)
+from torch import nn
+from transformers import AutoConfig, AutoModelForCausalLM
+from transformers.modeling_outputs import BaseModelOutputWithPast
+from transformers.utils import logging
+
+from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids
+
+logger = logging.get_logger(__name__)
+
+
+def replace_stablelm_attn_with_flash_attn(model_name="stabilityai/stablelm-3b-4e1t"):
+ # this is a wonky hack to get the remotely loaded module
+ model_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+ # we need to load the model here in order for modeling_stablelm_epoch to be available
+ with init_empty_weights():
+ AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+ module_name = model_config.__class__.__module__.replace(
+ ".configuration_stablelm_epoch", ".modeling_stablelm_epoch"
+ )
+ modeling_stablelm = importlib.import_module(module_name)
+ modeling_stablelm.Attention.forward = ( # pylint: disable=protected-access
+ flashattn_attn
+ )
+ modeling_stablelm.StableLMEpochModel.forward = ( # pylint: disable=protected-access
+ stablelm_model_forward
+ )
+ modeling_stablelm.DecoderLayer.forward = ( # pylint: disable=protected-access
+ decoder_layer_forward
+ )
+
+
+def rotate_half(x: torch.Tensor):
+ """Rotates half the hidden dims of the input."""
+ # pylint: disable=invalid-name
+ x1, x2 = torch.chunk(x, 2, dim=-1)
+ return torch.cat((-x2, x1), dim=-1)
+
+
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
+ # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
+ # pylint: disable=invalid-name
+ cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
+ sin = sin.squeeze(1).squeeze(0) # [seq_len, dim]
+ cos = cos[position_ids].unsqueeze(1) # [batch_size, 1, seq_len, dim]
+ sin = sin[position_ids].unsqueeze(1) # [batch_size, 1, seq_len, dim]
+ q_embed = (q * cos) + (rotate_half(q) * sin)
+ k_embed = (k * cos) + (rotate_half(k) * sin)
+ return q_embed, k_embed
+
+
+def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+ """
+ This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
+ num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+ """
+ batch, num_key_value_heads, slen, head_dim = hidden_states.shape
+ if n_rep == 1:
+ return hidden_states
+ hidden_states = hidden_states[:, :, None, :, :].expand(
+ batch, num_key_value_heads, n_rep, slen, head_dim
+ )
+ return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
+
+
+def flashattn_attn(
+ self,
+ hidden_states: torch.FloatTensor,
+ attention_mask: torch.FloatTensor,
+ position_ids: torch.LongTensor,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False, # pylint: disable=unused-argument
+ use_cache: Optional[bool] = False,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ max_seqlen: Optional[torch.Tensor] = None,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ bsz, q_len, _ = hidden_states.size()
+
+ query_states = self.q_proj(hidden_states)
+ key_states = self.k_proj(hidden_states)
+ value_states = self.v_proj(hidden_states)
+
+ query_states = query_states.view(
+ bsz, q_len, self.num_heads, self.head_dim
+ ).transpose(1, 2)
+ key_states = key_states.view(
+ bsz, q_len, self.num_key_value_heads, self.head_dim
+ ).transpose(1, 2)
+ value_states = value_states.view(
+ bsz, q_len, self.num_key_value_heads, self.head_dim
+ ).transpose(1, 2)
+
+ query_rot = query_states[..., : self.rotary_ndims]
+ query_pass = query_states[..., self.rotary_ndims :]
+ key_rot = key_states[..., : self.rotary_ndims]
+ key_pass = key_states[..., self.rotary_ndims :]
+
+ kv_seq_len = key_states.shape[-2]
+ if past_key_value is not None:
+ kv_seq_len += past_key_value[0].shape[-2]
+ cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+ query_states, key_states = apply_rotary_pos_emb(
+ query_rot, key_rot, cos, sin, position_ids
+ )
+
+ # [batch_size, num_heads, seq_len, head_dim]
+ query_states = torch.cat((query_states, query_pass), dim=-1)
+ key_states = torch.cat((key_states, key_pass), dim=-1)
+
+ if past_key_value is not None:
+ # Reuse k, v, self_attention
+ key_states = torch.cat((past_key_value[0], key_states), dim=2)
+ value_states = torch.cat((past_key_value[1], value_states), dim=2)
+
+ past_key_value = (key_states, value_states) if use_cache else None
+
+ # Repeat k/v heads if n_kv_heads < n_heads
+ key_states = repeat_kv(key_states, self.num_key_value_groups)
+ value_states = repeat_kv(value_states, self.num_key_value_groups)
+
+ if cu_seqlens is not None and max_seqlen is not None and cu_seqlens.dim() == 1:
+ # special handling using sample packing
+ qkv = torch.stack(
+ [query_states, key_states, value_states], dim=2
+ ) # [bsz, nh, 3, q_len, hd]
+ qkv = qkv.transpose(1, 3) # [bsz, q_len, 3, nh, hd]
+ qkv = rearrange(qkv, "b s ... -> (b s) ...")
+ softmax_scale = None
+
+ output = flash_attn_varlen_qkvpacked_func(
+ qkv, cu_seqlens, max_seqlen, 0.0, softmax_scale=softmax_scale, causal=True
+ )
+
+ attn_output = rearrange(output, "(b s) ... -> b s ...", b=bsz)
+ attn_output = rearrange(attn_output, "b s h d -> b s (h d)")
+ else:
+ attn_weights = torch.matmul(
+ query_states, key_states.transpose(2, 3)
+ ) / math.sqrt(self.head_dim)
+
+ if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+ f" {attn_weights.size()}"
+ )
+
+ if attention_mask is not None:
+ if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+ )
+ attn_weights = attn_weights + attention_mask
+
+ # Upcast attention to fp32
+ attn_weights = nn.functional.softmax(
+ attn_weights, dim=-1, dtype=torch.float32
+ ).to(query_states.dtype)
+ attn_output = torch.matmul(attn_weights, value_states)
+
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ # Merge heads
+ attn_output = attn_output.transpose(1, 2).contiguous()
+ attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+ # Final linear projection
+ attn_output = self.o_proj(attn_output)
+
+ return attn_output, None, past_key_value
+
+
+def decoder_layer_forward(
+ self,
+ hidden_states: Optional[torch.FloatTensor],
+ attention_mask: Optional[torch.FloatTensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False,
+ use_cache: Optional[bool] = False,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ max_seqlen: Optional[torch.Tensor] = None,
+) -> Union[
+ Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor, ...]]]
+]:
+ # pylint: disable=duplicate-code
+ residual = hidden_states
+
+ hidden_states = self.input_layernorm(hidden_states)
+
+ # Self Attention
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
+ hidden_states=hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cu_seqlens=cu_seqlens,
+ max_seqlen=max_seqlen,
+ )
+ hidden_states = residual + hidden_states
+
+ # Fully Connected
+ residual = hidden_states
+ hidden_states = self.post_attention_layernorm(hidden_states)
+ hidden_states = self.mlp(hidden_states)
+ hidden_states = residual + hidden_states
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (self_attn_weights,)
+
+ if use_cache:
+ outputs += (present_key_value,)
+
+ return outputs
+
+
+def stablelm_model_forward(
+ self,
+ input_ids: Optional[torch.LongTensor] = None,
+ attention_mask: Optional[torch.FloatTensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+) -> Union[Tuple, BaseModelOutputWithPast]:
+ # pylint: disable=duplicate-code
+ output_attentions = (
+ output_attentions
+ if output_attentions is not None
+ else self.config.output_attentions
+ )
+ output_hidden_states = (
+ output_hidden_states
+ if output_hidden_states is not None
+ else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+ return_dict = (
+ return_dict if return_dict is not None else self.config.use_return_dict
+ )
+
+ # Retrieve input_ids and inputs_embeds
+ if input_ids is not None and inputs_embeds is not None:
+ raise ValueError(
+ "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time"
+ )
+ if input_ids is not None:
+ batch_size, seq_length = input_ids.shape
+ elif inputs_embeds is not None:
+ batch_size, seq_length, _ = inputs_embeds.shape
+ else:
+ raise ValueError(
+ "You have to specify either decoder_input_ids or decoder_inputs_embeds"
+ )
+
+ seq_length_with_past = seq_length
+ past_key_values_length = 0
+
+ if past_key_values is not None:
+ past_key_values_length = past_key_values[0][0].shape[2]
+ seq_length_with_past = seq_length_with_past + past_key_values_length
+
+ cu_seqlens = None
+ max_seqlen = None
+ if position_ids is None:
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
+ position_ids = torch.arange(
+ past_key_values_length,
+ seq_length + past_key_values_length,
+ dtype=torch.long,
+ device=device,
+ )
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+ else:
+ position_ids = position_ids.view(-1, seq_length).long()
+ cu_seqlens, max_seqlen = get_cu_seqlens_from_pos_ids(position_ids)
+ cu_seqlens = cu_seqlens.squeeze()
+
+ if inputs_embeds is None:
+ inputs_embeds = self.embed_tokens(input_ids)
+ # Embed positions
+ if attention_mask is None:
+ attention_mask = torch.ones(
+ (batch_size, seq_length_with_past),
+ dtype=torch.bool,
+ device=inputs_embeds.device,
+ )
+ attention_mask = (
+ self._prepare_decoder_attention_mask( # pylint: disable=protected-access
+ attention_mask,
+ (batch_size, seq_length),
+ inputs_embeds,
+ past_key_values_length,
+ )
+ )
+
+ hidden_states = inputs_embeds
+
+ if self.gradient_checkpointing and self.training:
+ if use_cache:
+ logger.warning(
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+ )
+ use_cache = False
+
+ # Decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ next_decoder_cache = () if use_cache else None
+
+ for idx, decoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ past_key_value = past_key_values[idx] if past_key_values is not None else None
+
+ if self.gradient_checkpointing and self.training:
+
+ def create_custom_forward(module):
+ def custom_forward(*inputs):
+ # None for past_key_value
+ return module(*inputs)
+
+ return custom_forward
+
+ layer_outputs = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(decoder_layer),
+ hidden_states,
+ attention_mask,
+ position_ids,
+ past_key_value,
+ output_attentions,
+ None,
+ cu_seqlens,
+ max_seqlen,
+ )
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cu_seqlens=cu_seqlens,
+ max_seqlen=max_seqlen,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+
+ hidden_states = self.norm(hidden_states)
+
+ # Add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = next_decoder_cache if use_cache else None
+ if not return_dict:
+ return tuple(
+ v
+ for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
+ if v is not None
+ )
+ return BaseModelOutputWithPast(
+ last_hidden_state=hidden_states,
+ past_key_values=next_cache,
+ hidden_states=all_hidden_states,
+ attentions=all_self_attns,
+ )
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index c05bccbf08..aa6049bd3e 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -124,6 +124,17 @@ def load_model(
replace_btlm_attn_with_flash_attn(cfg.base_model)
+ if (
+ hasattr(model_config, "model_type")
+ and model_config.model_type == "stablelm_epoch"
+ ):
+ if cfg.flash_attention and cfg.sample_packing:
+ from axolotl.monkeypatch.stablelm_attn_hijack_flash import (
+ replace_stablelm_attn_with_flash_attn,
+ )
+
+ replace_stablelm_attn_with_flash_attn(cfg.base_model)
+
if cfg.is_llama_derived_model and cfg.flash_attention and cfg.sample_packing:
if cfg.device not in ["mps", "cpu"] and not inference:
from axolotl.monkeypatch.llama_attn_hijack_flash import (
From d4a88e4eca44240b531b64ec9373bb7ac06a1bbb Mon Sep 17 00:00:00 2001
From: Abhishek Mishra
Date: Fri, 6 Oct 2023 17:35:56 +0530
Subject: [PATCH 53/77] Adding qlora config for Mistral (#675)
* Adding qlora config for Mistral
Contains fix for Mistral FA issue - ValueError: You are attempting to perform batched generation with padding_side='right' this may lead to unexpected behaviour for Flash Attention version of Mistral. Make sure to call tokenizer.padding_side = 'left' before tokenizing the input.
Fix for now is to set sample_packing: true and pad_to_sequence_len: true
* Renamed to qlora.yml
---
examples/mistral/qlora.yml | 79 ++++++++++++++++++++++++++++++++++++++
1 file changed, 79 insertions(+)
create mode 100644 examples/mistral/qlora.yml
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
new file mode 100644
index 0000000000..5a131c5f36
--- /dev/null
+++ b/examples/mistral/qlora.yml
@@ -0,0 +1,79 @@
+base_model: mistralai/Mistral-7B-v0.1
+base_model_config: mistralai/Mistral-7B-v0.1
+model_type: MistralForCausalLM
+tokenizer_type: LlamaTokenizer
+is_mistral_derived_model: true
+
+load_in_8bit: false
+load_in_4bit: true
+strict: false
+
+datasets:
+ - path: mhenrichsen/alpaca_2k_test
+ type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.01
+output_dir: ./qlora-out
+
+adapter: qlora
+lora_model_dir:
+
+sequence_len: 8192
+sample_packing: True
+pad_to_sequence_len: True
+
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_linear: true
+lora_fan_in_fan_out:
+lora_target_modules:
+ - gate_proj
+ - down_proj
+ - up_proj
+ - q_proj
+ - v_proj
+ - k_proj
+ - o_proj
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 4
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16: false
+tf32: false
+
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+
+warmup_steps: 10
+eval_steps: 20
+eval_table_size: 5
+eval_table_max_new_tokens: 128
+save_steps:
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+special_tokens:
+ bos_token: ""
+ eos_token: ""
+ unk_token: ""
\ No newline at end of file
From 669f1d052c996a6b6c12bd146e15be30edb9be9d Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Sat, 7 Oct 2023 01:33:43 +0900
Subject: [PATCH 54/77] Fix: Higher vram usage for mistral and sample_packing
(#691)
* Fix: Higher vram usage for mistral and sample_packing
* chore: update comment
* chore: lint
---
examples/mistral/qlora.yml | 8 ++++----
src/axolotl/utils/models.py | 3 ++-
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 5a131c5f36..9c64a8c2dd 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -36,10 +36,10 @@ lora_target_modules:
- k_proj
- o_proj
-wandb_project:
-wandb_entity:
+wandb_project:
+wandb_entity:
wandb_watch:
-wandb_run_id:
+wandb_run_id:
wandb_log_model:
gradient_accumulation_steps: 4
@@ -76,4 +76,4 @@ fsdp_config:
special_tokens:
bos_token: ""
eos_token: ""
- unk_token: ""
\ No newline at end of file
+ unk_token: ""
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index aa6049bd3e..2c60f00c2b 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -81,7 +81,8 @@ def load_tokenizer(cfg):
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
os.environ["TOKENIZERS_PARALLELISM"] = "false"
- if cfg.is_mistral_derived_model:
+ # Mistral's official FA implementation requires left padding
+ if cfg.is_mistral_derived_model and cfg.flash_attention and not cfg.sample_packing:
tokenizer.padding_side = "left"
if cfg.special_tokens:
From 4c8ddf2c6f5c2509cbc87d9804f9bd6e42dc0c1a Mon Sep 17 00:00:00 2001
From: mhenrichsen
Date: Fri, 6 Oct 2023 22:58:13 +0200
Subject: [PATCH 55/77] new lr, sample pack
---
examples/mistral/config.yml | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index e644bec507..7924fc682c 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -16,8 +16,8 @@ val_set_size: 0.01
output_dir: ./out
sequence_len: 8192
-sample_packing:
-pad_to_sequence_len:
+sample_packing: true
+pad_to_sequence_len: true
wandb_project:
wandb_entity:
@@ -30,7 +30,7 @@ micro_batch_size: 2
num_epochs: 3
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
-learning_rate: 0.0002
+learning_rate: 0.000005
train_on_inputs: false
group_by_length: false
@@ -59,4 +59,4 @@ fsdp_config:
special_tokens:
bos_token: ""
eos_token: ""
- unk_token: ""
+ unk_token: ""
\ No newline at end of file
From de87ea68f6230d4998bfca3cf3fd86b29374b298 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 6 Oct 2023 22:38:15 -0400
Subject: [PATCH 56/77] fix multiline for docker (#694)
---
docker/Dockerfile-base | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
index 5d1e2c2b89..ab1a667b5b 100644
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -14,7 +14,7 @@ ARG CUDA="118"
ENV PYTHON_VERSION=$PYTHON_VERSION
RUN apt-get update \
- && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/*
+ && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/* \
&& wget \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& mkdir /root/.conda \
From 83a950bb87c4a47670668ad9abb1d6b09c6a3b69 Mon Sep 17 00:00:00 2001
From: mhenrichsen
Date: Sat, 7 Oct 2023 11:04:35 +0200
Subject: [PATCH 57/77] lint
---
examples/mistral/config.yml | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index 7924fc682c..08e3a02b19 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -59,4 +59,5 @@ fsdp_config:
special_tokens:
bos_token: ""
eos_token: ""
- unk_token: ""
\ No newline at end of file
+
+ unk_token: ""
From aca03983157612918520c4cbb1a08a9c535daf01 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Sat, 7 Oct 2023 12:20:45 -0400
Subject: [PATCH 58/77] apex not needed as amp is part of pytorch (#696)
---
docker/Dockerfile-base | 5 -----
1 file changed, 5 deletions(-)
diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
index ab1a667b5b..8f33ff2295 100644
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -57,11 +57,6 @@ FROM base-builder
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
-# recompile apex
-RUN python3 -m pip uninstall -y apex
-RUN git clone https://github.com/NVIDIA/apex
-RUN cd apex && python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
-
RUN mkdir -p /workspace/builds
COPY --from=bnb-builder /workspace/bitsandbytes /workspace/builds/bitsandbytes
From 7f2618b5f4eb4c8b2969182e80ce373c2017806a Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Sat, 7 Oct 2023 12:23:31 -0400
Subject: [PATCH 59/77] add docker images for pytorch 2.10 (#697)
---
.github/workflows/base.yml | 5 +++++
.github/workflows/main.yml | 10 ++++++++++
2 files changed, 15 insertions(+)
diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index d755b63263..5f08854842 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -25,6 +25,11 @@ jobs:
python_version: "3.10"
pytorch: 2.0.1
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX"
+ - cuda: "118"
+ cuda_version: 11.8.0
+ python_version: "3.10"
+ pytorch: 2.1.0
+ torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX"
steps:
- name: Checkout
uses: actions/checkout@v3
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a5b4d30379..5fdd2d705d 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -23,6 +23,11 @@ jobs:
python_version: "3.10"
pytorch: 2.0.1
axolotl_extras:
+ - cuda: 118
+ cuda_version: 11.8.0
+ python_version: "3.10"
+ pytorch: 2.1.0
+ axolotl_extras:
runs-on: [self-hosted, gpu, docker]
steps:
- name: Checkout
@@ -68,6 +73,11 @@ jobs:
pytorch: 2.0.1
axolotl_extras:
is_latest: true
+ - cuda: 118
+ cuda_version: 11.8.0
+ python_version: "3.10"
+ pytorch: 2.1.0
+ axolotl_extras:
runs-on: [self-hosted, gpu, docker]
steps:
- name: Checkout
From f91db198f36f329ad6d5f5f95217b4f9b7309633 Mon Sep 17 00:00:00 2001
From: mhenrichsen
Date: Sat, 7 Oct 2023 20:19:25 +0200
Subject: [PATCH 60/77] fix unneeded space (#699)
---
examples/mistral/config.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index 08e3a02b19..2a4498a119 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -59,5 +59,4 @@ fsdp_config:
special_tokens:
bos_token: ""
eos_token: ""
-
unk_token: ""
From 77c84e02fd1a7eef25cccc5b8104178d980851c7 Mon Sep 17 00:00:00 2001
From: "seungduk.kim.2304"
Date: Mon, 9 Oct 2023 02:37:54 +0900
Subject: [PATCH 61/77] Update README with some explanations (#700)
* Update README with some explanations
* revert commit-hook change
* add more explanation about batch size and gradient accum
* not use latex foromat
* decorate
* git hook again
* Attach a link that explains about LoRA hyperparameters
* update table of content
* Explanation about lora_modules_to_save
---
README.md | 250 ++++++++++++++++++++++++++++++++++++------------------
1 file changed, 167 insertions(+), 83 deletions(-)
diff --git a/README.md b/README.md
index c2d8e7d8c6..e277120e62 100644
--- a/README.md
+++ b/README.md
@@ -23,9 +23,10 @@ Features:
- [Supported Features](#axolotl-supports)
- [Quickstart](#quickstart-)
- [Installation](#installation)
- - [Docker Installation](#environment)
- - [Conda/Pip venv Installation](#condapip-venv)
- - [LambdaLabs Installation](#lambdalabs)
+ - [Docker](#docker)
+ - [Conda/Pip venv](#condapip-venv)
+ - [LambdaLabs](#lambdalabs)
+ - [Windows](#windows)
- [Dataset](#dataset)
- [How to Add Custom Prompts](#how-to-add-custom-prompts)
- [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset)
@@ -50,7 +51,7 @@ Features:
Axolotl provides a unified repository for fine-tuning
a variety of AI models with ease
- Go ahead and axolotl questions!!
+ Go ahead and Axolotl questions!!
@@ -102,7 +103,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
### Environment
-- Docker
+#### Docker
```bash
docker run --gpus '"all"' --rm -it winglian/axolotl:main-py3.10-cu118-2.0.1
```
@@ -114,12 +115,12 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
docker compose up -d
```
-- Conda/Pip venv
+#### Conda/Pip venv
1. Install python >=**3.9**
2. Install pytorch stable https://pytorch.org/get-started/locally/
- 3. Install axolotl along with python dependencies
+ 3. Install Axolotl along with python dependencies
```bash
pip3 install packaging
pip3 install -e '.[flash-attn,deepspeed]'
@@ -130,7 +131,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
```
Get the token at huggingface.co/settings/tokens
-- LambdaLabs
+#### LambdaLabs
Click to Expand
@@ -174,7 +175,8 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
```
-- Windows: Please use WSL or Docker!
+#### Windows
+Please use WSL or Docker!
### Dataset
@@ -396,15 +398,15 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
All yaml options
```yaml
-# this is the huggingface model that contains *.pt, *.safetensors, or *.bin files
-# this can also be a relative path to a model on disk
+# This is the huggingface model that contains *.pt, *.safetensors, or *.bin files
+# This can also be a relative path to a model on disk
base_model: ./llama-7b-hf
-# you can specify an ignore pattern if the model repo contains more than 1 model type (*.pt, etc)
+# You can specify an ignore pattern if the model repo contains more than 1 model type (*.pt, etc)
base_model_ignore_patterns:
-# if the base_model repo on hf hub doesn't include configuration .json files,
-# you can set that here, or leave this empty to default to base_model
+# If the base_model repo on hf hub doesn't include configuration .json files,
+# You can set that here, or leave this empty to default to base_model
base_model_config: ./llama-7b-hf
-# you can specify to choose a specific model revision from huggingface hub
+# You can specify to choose a specific model revision from huggingface hub
model_revision:
# Optional tokenizer configuration override in case you want to use a different tokenizer
# than the one defined in the base model
@@ -419,23 +421,24 @@ trust_remote_code:
tokenizer_use_fast:
# Whether to use the legacy tokenizer setting, defaults to True
tokenizer_legacy:
-# resize the model embeddings when new tokens are added to multiples of 32
-# this is reported to improve training speed on some models
+# Resize the model embeddings when new tokens are added to multiples of 32
+# This is reported to improve training speed on some models
resize_token_embeddings_to_32x:
-# used to identify which the model is based on
+# Used to identify which the model is based on
is_falcon_derived_model:
is_llama_derived_model:
+# Please note that if you set this to true, `padding_side` will be set to "left" by default
is_mistral_derived_model:
-# whether you are training a 4-bit GPTQ quantized model
+# Whether you are training a 4-bit GPTQ quantized model
gptq: true
gptq_groupsize: 128 # group size
gptq_model_v1: false # v1 or v2
-# this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
+# This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
load_in_8bit: true
-# use bitsandbytes 4 bit
+# Use bitsandbytes 4 bit
load_in_4bit:
# Use CUDA bf16
@@ -449,9 +452,9 @@ tf32: true # require >=ampere
bfloat16: true # require >=ampere
float16: true
-# a list of one or more datasets to finetune the model with
+# A list of one or more datasets to finetune the model with
datasets:
- # hf dataset repo | "json" for local dataset, make sure to fill data_files
+ # HuggingFace dataset repo | "json" for local dataset, make sure to fill data_files
- path: vicgalle/alpaca-gpt4
# The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection]
type: alpaca # format | format: (chat/instruct) | .load_
@@ -461,16 +464,16 @@ datasets:
name: # Optional[str] name of dataset configuration to load
conversation: # Optional[str] fastchat conversation type, only used with type: sharegpt
- # custom user prompt
+ # Custom user prompt
- path: repo
type:
- # the below are defaults. only set what's needed.
+ # The below are defaults. only set what's needed.
system_prompt: ""
field_system: system
field_instruction: instruction
field_output: input
- # customizable to be single line or multi-line
+ # Customizable to be single line or multi-line
system_format: "{system}"
# 'format' can include {input}
format: |-
@@ -479,13 +482,13 @@ datasets:
# 'no_input_format' cannot include {input}
no_input_format: "{instruction} "
- # for completions datsets, uses the provided field if not `text`
+ # For completions datsets, uses the provided field if not `text`
field:
-# axolotl attempts to save the dataset as an arrow after packing the data together so
+# Axolotl attempts to save the dataset as an arrow after packing the data together so
# subsequent training attempts load faster, relative path
dataset_prepared_path: data/last_run_prepared
-# push prepared dataset to hub
+# Push prepared dataset to hub
push_dataset_to_hub: # repo path
# The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()`
# if not set.
@@ -495,8 +498,8 @@ hub_model_id: # repo path to push finetuned model
# how to push checkpoints to hub
# https://huggingface.co/docs/transformers/v4.31.0/en/main_classes/trainer#transformers.TrainingArguments.hub_strategy
hub_strategy:
-# whether to use hf `use_auth_token` for loading datasets. Useful for fetching private datasets
-# required to be true when used in combination with `push_dataset_to_hub`
+# Whether to use hf `use_auth_token` for loading datasets. Useful for fetching private datasets
+# Required to be true when used in combination with `push_dataset_to_hub`
hf_use_auth_token: # boolean
# How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc. 0 for no eval.
val_set_size: 0.04
@@ -505,30 +508,34 @@ dataset_shard_num:
# Index of shard to use for whole dataset
dataset_shard_idx:
-# the maximum length of an input to train with, this should typically be less than 2048
+# The maximum length of an input to train with, this should typically be less than 2048
# as most models have a token/context limit of 2048
sequence_len: 2048
-# pad inputs so each step uses constant sized buffers
-# this will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently
+# Pad inputs so each step uses constant sized buffers
+# This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently
pad_to_sequence_len:
-# max sequence length to concatenate training samples together up to
-# inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
+# Max sequence length to concatenate training samples together up to
+# Inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
# FutureWarning: This will soon be DEPRECATED
max_packed_sequence_len: 1024
-# use efficient multi-packing with block diagonal attention and per sequence position_ids. Recommend set to 'true'
+# Use efficient multi-packing with block diagonal attention and per sequence position_ids. Recommend set to 'true'
sample_packing:
-# set to 'false' if getting errors during eval with sample_packing on.
+# Set to 'false' if getting errors during eval with sample_packing on.
eval_sample_packing:
-# you can set these packing optimizations AFTER starting a training at least once.
+# You can set these packing optimizations AFTER starting a training at least once.
# The trainer will provide recommended values for these values.
sample_packing_eff_est:
total_num_tokens:
-# if you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model
+# If you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model
adapter: lora
-# if you already have a lora model trained that you want to load, put that here
-# lora hyperparameters
+# If you already have a lora model trained that you want to load, put that here.
+# This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`.
lora_model_dir:
+
+# LoRA hyperparameters
+# For more details about the following options, see:
+# https://www.anyscale.com/blog/fine-tuning-llms-lora-or-full-parameter-an-in-depth-analysis-with-llama-2
lora_r: 8
lora_alpha: 16
lora_dropout: 0.05
@@ -540,36 +547,48 @@ lora_target_modules:
# - gate_proj
# - down_proj
# - up_proj
-lora_target_linear: # if true, will target all linear layers
+lora_target_linear: # If true, will target all linear layers
+
+# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
+# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
+# `embed_tokens` converts tokens to embeddings, and `lm_head` converts embeddings to token probabilities.
+# https://github.com/huggingface/peft/issues/334#issuecomment-1561727994
lora_modules_to_save:
# - embed_tokens
# - lm_head
+
+# Once you complete training, the model will be saved to the following directory.
+# If you merge the adapter to the base model, a subdirectory `merged` will be created under this directory.
+# Make sure `lora_model_dir` points to this directory if you want to use the trained model.
lora_out_dir:
lora_fan_in_fan_out: false
# ReLoRA configuration
-# must use either 'lora' or 'qlora' adapter, and does not support fsdp or deepspeed
-relora_steps: # number of steps per ReLoRA restart
-relora_warmup_steps: # number of per-restart warmup steps
-relora_cpu_offload: # true to perform lora weight merges on cpu during restarts, for modest gpu memory savings
+# Must use either 'lora' or 'qlora' adapter, and does not support fsdp or deepspeed
+relora_steps: # Number of steps per ReLoRA restart
+relora_warmup_steps: # Number of per-restart warmup steps
+relora_cpu_offload: # True to perform lora weight merges on cpu during restarts, for modest gpu memory savings
# wandb configuration if you're using it
wandb_mode: # "offline" to save run metadata locally and not sync to the server, "disabled" to turn off wandb
-wandb_project: # your wandb project name
-wandb_entity: # a wandb Team name if using a Team
+wandb_project: # Your wandb project name
+wandb_entity: # A wandb Team name if using a Team
wandb_watch:
-wandb_run_id: # set the name of your wandb run
+wandb_run_id: # Set the name of your wandb run
wandb_log_model: # "checkpoint" to log model to wandb Artifacts every `save_steps` or "end" to log only at the end of training
-# where to save the finished model to
+# Where to save the full-finetuned model to
output_dir: ./completed-model
-# whether to use torch.compile and which backend to use
+# Whether to use torch.compile and which backend to use
torch_compile: # bool
torch_compile_backend: # Optional[str]
-# training hyperparameters
+# Training hyperparameters
+
+# If greater than 1, backpropagation will be skipped and the gradients will be accumulated for the given number of steps.
gradient_accumulation_steps: 1
+# The number of samples to include in each batch. This is the number of samples sent to each GPU.
micro_batch_size: 2
eval_batch_size:
num_epochs: 3
@@ -577,44 +596,47 @@ warmup_steps: 100
learning_rate: 0.00003
lr_quadratic_warmup:
logging_steps:
-save_strategy: # set to `no` to skip checkpoint saves
-save_steps: # leave empty to save at each epoch
-eval_steps: # leave empty to eval at each epoch
-save_total_limit: # checkpoints saved at a time
+save_strategy: # Set to `no` to skip checkpoint saves
+save_steps: # Leave empty to save at each epoch
+eval_steps: # Leave empty to eval at each epoch
+save_total_limit: # Checkpoints saved at a time
+# Maximum number of iterations to train for. It precedes num_epochs which means that
+# if both are set, num_epochs will not be guaranteed.
+# e.g., when 1 epoch is 1000 steps => `num_epochs: 2` and `max_steps: 100` will train for 100 steps
max_steps:
-eval_table_size: # approximate number of predictions sent to wandb depending on batch size. Enabled above 0. Default is 0
-eval_table_max_new_tokens: # total number of tokens generated for predictions sent to wandb. Default is 128
+eval_table_size: # Approximate number of predictions sent to wandb depending on batch size. Enabled above 0. Default is 0
+eval_table_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
-# save model as safetensors (require safetensors package)
+# Save model as safetensors (require safetensors package)
save_safetensors:
-# whether to mask out or include the human's prompt from the training labels
+# Whether to mask out or include the human's prompt from the training labels
train_on_inputs: false
-# group similarly sized data to minimize padding
-# may be slower to start, as it must download and sort the entire dataset
-# note that training loss may have an oscillating pattern with this enabled
+# Group similarly sized data to minimize padding.
+# May be slower to start, as it must download and sort the entire dataset.
+# Note that training loss may have an oscillating pattern with this enabled.
group_by_length: false
# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
gradient_checkpointing: false
-# stop training after this many evaluation losses have increased in a row
+# Stop training after this many evaluation losses have increased in a row
# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
early_stopping_patience: 3
-# specify a scheduler and kwargs to use with the optimizer
+# Specify a scheduler and kwargs to use with the optimizer
lr_scheduler: # 'one_cycle' | 'log_sweep' | empty for cosine
lr_scheduler_kwargs:
-# for one_cycle optim
-lr_div_factor: # learning rate div factor
+# For one_cycle optim
+lr_div_factor: # Learning rate div factor
-# for log_sweep optim
+# For log_sweep optim
log_sweep_min_lr:
log_sweep_max_lr:
-# specify optimizer
+# Specify optimizer
# Valid values are driven by the Transformers OptimizerNames class, see:
# https://github.com/huggingface/transformers/blob/95b374952dc27d8511541d6f5a4e22c9ec11fb24/src/transformers/training_args.py#L134
#
@@ -640,7 +662,7 @@ log_sweep_max_lr:
# - paged_lion_32bit
# - paged_lion_8bit
optimizer:
-# specify weight decay
+# Specify weight decay
weight_decay:
# adamw hyperparams
adam_beta1:
@@ -649,49 +671,51 @@ adam_epsilon:
# Gradient clipping max norm
max_grad_norm:
-# whether to bettertransformers
+# Whether to bettertransformers
flash_optimum:
-# whether to use xformers attention patch https://github.com/facebookresearch/xformers:
+# Whether to use xformers attention patch https://github.com/facebookresearch/xformers:
xformers_attention:
-# whether to use flash attention patch https://github.com/Dao-AILab/flash-attention:
+# Whether to use flash attention patch https://github.com/Dao-AILab/flash-attention:
flash_attention:
flash_attn_cross_entropy: # Whether to use flash-attention cross entropy implementation - advanced use only
flash_attn_rms_norm: # Whether to use flash-attention rms norm implementation - advanced use only
-# whether to use scaled-dot-product attention
+# Whether to use scaled-dot-product attention
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
sdp_attention:
# Landmark attention (only llama)
landmark_attention:
# xpos RoPE see https://github.com/kaiokendev/cutoff-len-is-context-len/blob/main/util/xpos_rope_llama_monkey_patch.py
-# llama only
+# LLaMA only
xpos_rope:
# RoPE Scaling https://github.com/huggingface/transformers/pull/24653
rope_scaling:
type: # linear | dynamic
factor: # float
-# resume from a specific checkpoint dir
+# Resume from a specific checkpoint dir
resume_from_checkpoint:
-# if resume_from_checkpoint isn't set and you simply want it to start where it left off
-# be careful with this being turned on between different models
+# If resume_from_checkpoint isn't set and you simply want it to start where it left off.
+# Be careful with this being turned on between different models.
auto_resume_from_checkpoints: false
-# don't mess with this, it's here for accelerate and torchrun
+# Don't mess with this, it's here for accelerate and torchrun
local_rank:
-# add or change special tokens
+# Add or change special tokens.
+# If you add tokens here, you don't need to add them to the `tokens` list.
special_tokens:
# bos_token: ""
# eos_token: ""
# unk_token: ""
-# add extra tokens
+
+# Add extra tokens.
tokens:
# FSDP
fsdp:
fsdp_config:
-# Deepspeed config path
+# Deepspeed config path. e.g., deepspeed/zero3.json
deepspeed:
# Advanced DDP Arguments
@@ -717,6 +741,66 @@ strict:
+
+ Understanding of batch size and gradient accumulation steps
+
+Gradient accumulation means accumulating gradients over several mini-batches and updating the model weights afterward. When the samples in each batch are diverse, this technique doesn't significantly impact learning.
+
+This method allows for effective training with larger effective batch sizes without needing proportionally larger memory. Here's why:
+
+1. **Memory Consumption with Batch Size**: The primary reason increasing the batch size impacts memory is due to the storage requirements for intermediate activations. When you forward propagate a batch through a network, you have to store the activations at each layer for each sample in the batch, because these activations are used during backpropagation to compute gradients. Therefore, larger batches mean more activations, leading to greater GPU memory consumption.
+
+2. **Gradient Accumulation**: With gradient accumulation, you're effectively simulating a larger batch size by accumulating gradients over several smaller batches (or micro-batches). However, at any given time, you're only forward and backward propagating a micro-batch. This means you only store activations for the micro-batch, not the full accumulated batch. As a result, you can simulate the effect of a larger batch size without the memory cost of storing activations for a large batch.
+
+**Example 1:**
+Micro batch size: 3
+Gradient accumulation steps: 2
+Number of GPUs: 3
+Total batch size = 3 * 2 * 3 = 18
+
+```
+| GPU 1 | GPU 2 | GPU 3 |
+|----------------|----------------|----------------|
+| S1, S2, S3 | S4, S5, S6 | S7, S8, S9 |
+| e1, e2, e3 | e4, e5, e6 | e7, e8, e9 |
+|----------------|----------------|----------------|
+| → (accumulate) | → (accumulate) | → (accumulate) |
+|----------------|----------------|----------------|
+| S10, S11, S12 | S13, S14, S15 | S16, S17, S18 |
+| e10, e11, e12 | e13, e14, e15 | e16, e17, e18 |
+|----------------|----------------|----------------|
+| → (apply) | → (apply) | → (apply) |
+
+Accumulated gradient for the weight w1 after the second iteration (considering all GPUs):
+Total gradient for w1 = e1 + e2 + e3 + e4 + e5 + e6 + e7 + e8 + e9 + e10 + e11 + e12 + e13 + e14 + e15 + e16 + e17 + e18
+
+Weight update for w1:
+w1_new = w1_old - learning rate x (Total gradient for w1 / 18)
+```
+
+**Example 2:**
+Micro batch size: 2
+Gradient accumulation steps: 1
+Number of GPUs: 3
+Total batch size = 2 * 1 * 3 = 6
+
+```
+| GPU 1 | GPU 2 | GPU 3 |
+|-----------|-----------|-----------|
+| S1, S2 | S3, S4 | S5, S6 |
+| e1, e2 | e3, e4 | e5, e6 |
+|-----------|-----------|-----------|
+| → (apply) | → (apply) | → (apply) |
+
+Accumulated gradient for the weight w1 (considering all GPUs):
+Total gradient for w1 = e1 + e2 + e3 + e4 + e5 + e6
+
+Weight update for w1:
+w1_new = w1_old - learning rate × (Total gradient for w1 / 6)
+```
+
+
+
### Train
Run
From 295b2662e1a9176e18492463627890c4de0a4cb2 Mon Sep 17 00:00:00 2001
From: lukemarsden
Date: Tue, 10 Oct 2023 07:14:23 +0100
Subject: [PATCH 62/77] Get qlora mistral-7b fine tuning working on a single
4090 (#708)
---
examples/mistral/qlora.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 9c64a8c2dd..3d22c70346 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -43,7 +43,7 @@ wandb_run_id:
wandb_log_model:
gradient_accumulation_steps: 4
-micro_batch_size: 4
+micro_batch_size: 2
num_epochs: 1
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
From 11c48c5e039b1b08941b02d356d814bc62e8ac1f Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Tue, 10 Oct 2023 21:08:17 +0900
Subject: [PATCH 63/77] fix(doc): Add note on inference w sample packing (#712)
---
README.md | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/README.md b/README.md
index e277120e62..1610f7bda8 100644
--- a/README.md
+++ b/README.md
@@ -876,6 +876,10 @@ Pass the appropriate flag to the train command:
--base_model="./completed-model" --prompter=None --load_in_8bit=True
```
+Please use `--sample_packing False` if you have it on and receive the error similar to below:
+
+> RuntimeError: stack expects each tensor to be equal size, but got [1, 32, 1, 128] at entry 0 and [1, 32, 8, 128] at entry 1
+
### Merge LORA to base
Add below flag to train command above
From ace70b33c6f04f5fe9f8fbb47ae386218067ce22 Mon Sep 17 00:00:00 2001
From: atgctg <105969161+atgctg@users.noreply.github.com>
Date: Tue, 10 Oct 2023 14:32:20 +0200
Subject: [PATCH 64/77] Fix: lowercase `True` values in config (#713)
* Fix: lowercase `True` values in config
* Fix: lowercase `True` values in config
---
examples/mistral/qlora.yml | 4 ++--
examples/pythia/lora.yml | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 3d22c70346..09639d006a 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -19,8 +19,8 @@ adapter: qlora
lora_model_dir:
sequence_len: 8192
-sample_packing: True
-pad_to_sequence_len: True
+sample_packing: true
+pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16
diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml
index f9a83c059a..6ff036621f 100644
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -28,8 +28,8 @@ num_epochs: 3
learning_rate: 0.00001
train_on_inputs: false
group_by_length: false
-bf16: True
-tf32: True
+bf16: true
+tf32: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
From 5855dded3db7a205bc0d8476caf2fa46a0404c13 Mon Sep 17 00:00:00 2001
From: NanoCode012
Date: Tue, 10 Oct 2023 21:51:56 +0900
Subject: [PATCH 65/77] fix(doc): update default doc according to arg (#714)
---
README.md | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 1610f7bda8..f22ccb5939 100644
--- a/README.md
+++ b/README.md
@@ -469,12 +469,13 @@ datasets:
type:
# The below are defaults. only set what's needed.
system_prompt: ""
+ system_format: "{system}"
field_system: system
field_instruction: instruction
- field_output: input
+ field_input: input
+ field_output: output
# Customizable to be single line or multi-line
- system_format: "{system}"
# 'format' can include {input}
format: |-
User: {instruction} {input}
@@ -482,7 +483,7 @@ datasets:
# 'no_input_format' cannot include {input}
no_input_format: "{instruction} "
- # For completions datsets, uses the provided field if not `text`
+ # For `completion` datsets only, uses the provided field instead of `text` column
field:
# Axolotl attempts to save the dataset as an arrow after packing the data together so
From 490923fb78e0646c2c0ba427628a0daa72b86996 Mon Sep 17 00:00:00 2001
From: Jan Philipp Harries <2862336+jphme@users.noreply.github.com>
Date: Wed, 11 Oct 2023 13:28:12 +0200
Subject: [PATCH 66/77] Save Axolotl config as WandB artifact (#716)
---
src/axolotl/cli/__init__.py | 1 +
src/axolotl/utils/callbacks.py | 24 ++++++++++++++++++++++++
src/axolotl/utils/trainer.py | 4 ++++
3 files changed, 29 insertions(+)
diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py
index c21d931705..07a6209e46 100644
--- a/src/axolotl/cli/__init__.py
+++ b/src/axolotl/cli/__init__.py
@@ -194,6 +194,7 @@ def load_cfg(config: Path = Path("examples/"), **kwargs):
# load the config from the yaml file
with open(config, encoding="utf-8") as file:
cfg: DictDefault = DictDefault(yaml.safe_load(file))
+ cfg.axolotl_config_path = config
# if there are any options passed in the cli, if it is something that seems valid from the yaml,
# then overwrite the value
cfg_keys = cfg.keys()
diff --git a/src/axolotl/utils/callbacks.py b/src/axolotl/utils/callbacks.py
index 9a7ebe9517..458e537c63 100644
--- a/src/axolotl/utils/callbacks.py
+++ b/src/axolotl/utils/callbacks.py
@@ -514,3 +514,27 @@ def log_table_from_dataloader(name: str, table_dataloader):
return control
return LogPredictionCallback
+
+
+class SaveAxolotlConfigtoWandBCallback(TrainerCallback):
+ """Callback to save axolotl config to wandb"""
+
+ def __init__(self, axolotl_config_path):
+ self.axolotl_config_path = axolotl_config_path
+
+ def on_train_begin(
+ self,
+ args: AxolotlTrainingArguments, # pylint: disable=unused-argument
+ state: TrainerState, # pylint: disable=unused-argument
+ control: TrainerControl,
+ **kwargs, # pylint: disable=unused-argument
+ ):
+ if is_main_process():
+ try:
+ artifact = wandb.Artifact(name="axolotl-config", type="config")
+ artifact.add_file(local_path=self.axolotl_config_path)
+ wandb.run.log_artifact(artifact)
+ LOG.info("Axolotl config has been saved to WandB as an artifact.")
+ except (FileNotFoundError, ConnectionError) as err:
+ LOG.warning(f"Error while saving Axolotl config to WandB: {err}")
+ return control
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index a10a2b0e77..ee8c634966 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -30,6 +30,7 @@
from axolotl.utils.callbacks import (
EvalFirstStepCallback,
GPUStatsCallback,
+ SaveAxolotlConfigtoWandBCallback,
SaveBetterTransformerModelCallback,
bench_eval_callback_factory,
log_prediction_callback_factory,
@@ -775,6 +776,9 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
LogPredictionCallback = log_prediction_callback_factory(trainer, tokenizer)
trainer.add_callback(LogPredictionCallback(cfg))
+ if cfg.use_wandb:
+ trainer.add_callback(SaveAxolotlConfigtoWandBCallback(cfg.axolotl_config_path))
+
if cfg.do_bench_eval:
trainer.add_callback(bench_eval_callback_factory(trainer, tokenizer))
From 1c412c7e9dd228209ad63afdcd5dc430a1ef82ab Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 13 Oct 2023 07:46:07 -0400
Subject: [PATCH 67/77] improve handling of the prepared ds path and other cfg
defaults (#701)
---
src/axolotl/cli/inference.py | 1 +
src/axolotl/cli/train.py | 13 +++++++++++++
src/axolotl/common/const.py | 5 +++++
src/axolotl/utils/data.py | 4 ++--
4 files changed, 21 insertions(+), 2 deletions(-)
create mode 100644 src/axolotl/common/const.py
diff --git a/src/axolotl/cli/inference.py b/src/axolotl/cli/inference.py
index f3daac83dd..91405d8c66 100644
--- a/src/axolotl/cli/inference.py
+++ b/src/axolotl/cli/inference.py
@@ -14,6 +14,7 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
# pylint: disable=duplicate-code
print_axolotl_text_art()
parsed_cfg = load_cfg(config, **kwargs)
+ parsed_cfg.sample_packing = False
parser = transformers.HfArgumentParser((TrainerCliArgs))
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
return_remaining_strings=True
diff --git a/src/axolotl/cli/train.py b/src/axolotl/cli/train.py
index c64755872b..b49cbc6b60 100644
--- a/src/axolotl/cli/train.py
+++ b/src/axolotl/cli/train.py
@@ -1,10 +1,12 @@
"""
CLI to run training on a model
"""
+import logging
from pathlib import Path
import fire
import transformers
+from colorama import Fore
from axolotl.cli import (
check_accelerate_default_config,
@@ -14,8 +16,11 @@
print_axolotl_text_art,
)
from axolotl.common.cli import TrainerCliArgs
+from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
from axolotl.train import train
+LOG = logging.getLogger("axolotl.cli.train")
+
def do_cli(config: Path = Path("examples/"), **kwargs):
# pylint: disable=duplicate-code
@@ -27,6 +32,14 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
return_remaining_strings=True
)
+ if parsed_cli_args.prepare_ds_only and not parsed_cfg.dataset_prepared_path:
+ msg = (
+ Fore.RED
+ + "--prepare_ds_only called without dataset_prepared_path set."
+ + Fore.RESET
+ )
+ LOG.warning(msg)
+ parsed_cfg.dataset_prepared_path = DEFAULT_DATASET_PREPARED_PATH
dataset_meta = load_datasets(cfg=parsed_cfg, cli_args=parsed_cli_args)
if parsed_cli_args.prepare_ds_only:
diff --git a/src/axolotl/common/const.py b/src/axolotl/common/const.py
new file mode 100644
index 0000000000..fd34ad4694
--- /dev/null
+++ b/src/axolotl/common/const.py
@@ -0,0 +1,5 @@
+"""
+Various shared constants
+"""
+
+DEFAULT_DATASET_PREPARED_PATH = "last_run_prepared"
diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
index bac7d96c9e..c944dd27ba 100644
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -16,6 +16,7 @@
from huggingface_hub import hf_hub_download
from transformers import PreTrainedTokenizerBase
+from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
from axolotl.datasets import ConstantLengthDataset, TokenizedPromptDataset
from axolotl.prompt_strategies import load
from axolotl.prompt_tokenizers import (
@@ -44,7 +45,6 @@
)
LOG = logging.getLogger("axolotl")
-DEFAULT_DATASET_PREPARED_PATH = "last_run_prepared"
def md5(to_hash: str, encoding: str = "utf-8") -> str:
@@ -357,7 +357,7 @@ def for_d_in_datasets(dataset_configs):
if len(datasets) > 1:
LOG.info("shuffle merged datasets")
dataset = dataset.shuffle(seed=seed)
- if cfg.local_rank == 0 and cfg.dataset_prepared_path:
+ if cfg.local_rank == 0:
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
dataset.save_to_disk(prepared_ds_path)
if cfg.push_dataset_to_hub:
From 2aa1f714641da62a19a1100299c07b6f41985620 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 13 Oct 2023 08:57:28 -0400
Subject: [PATCH 68/77] fix pytorch 2.1.0 build, add multipack docs (#722)
---
.github/workflows/main.yml | 1 +
docker/Dockerfile | 4 +++
docs/multipack.md | 51 ++++++++++++++++++++++++++++++++++++++
3 files changed, 56 insertions(+)
create mode 100644 docs/multipack.md
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5fdd2d705d..f84f7f7a9a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -51,6 +51,7 @@ jobs:
build-args: |
BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}
CUDA=${{ matrix.cuda }}
+ PYTORCH_VERSION=${{ matrix.pytorch }}
file: ./docker/Dockerfile
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7b121aaa7b..ff47548bc4 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -5,6 +5,9 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
ARG AXOLOTL_EXTRAS=""
ARG CUDA="118"
ENV BNB_CUDA_VERSION=$CUDA
+ARG PYTORCH_VERSION="2.0.1"
+
+ENV PYTORCH_VERSION=$PYTORCH_VERSION
RUN apt-get update && \
apt-get install -y vim curl
@@ -16,6 +19,7 @@ RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
WORKDIR /workspace/axolotl
# If AXOLOTL_EXTRAS is set, append it in brackets
+RUN sed -i "s/torch==.*/torch==$PYTORCH_VERSION/" requirements.txt
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
pip install -e .[flash-attn,$AXOLOTL_EXTRAS]; \
else \
diff --git a/docs/multipack.md b/docs/multipack.md
new file mode 100644
index 0000000000..2a55148b2a
--- /dev/null
+++ b/docs/multipack.md
@@ -0,0 +1,51 @@
+# Multipack
+
+4k context, bsz =4,
+each character represents 256 tokens
+X represents a padding token
+
+```
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+[[ A A A A A A A A A A A ]
+ B B B B B B ]
+ C C C C C C C ]
+ D D D D ]]
+
+[[ E E E E E E E E ]
+ [ F F F F ]
+ [ G G G ]
+ [ H H H H ]]
+
+[[ I I I ]
+ [ J J J ]
+ [ K K K K K]
+ [ L L L ]]
+```
+
+after padding to longest input in each step
+```
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+[[ A A A A A A A A A A A ]
+ B B B B B B X X X X X X ]
+ C C C C C C C X X X X ]
+ D D D D X X X X X X X ]]
+
+[[ E E E E E E E E ]
+ [ F F F F X X X X ]
+ [ G G G X X X X X ]
+ [ H H H H X X X X ]]
+
+[[ I I I X X ]
+ [ J J J X X ]
+ [ K K K K K ]
+ [ L L L X X ]]
+```
+
+w packing ( note it's the same effective number of tokens per step, but a true bsz of 1)
+```
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+[[ A A A A A A A A A A A B B B B B
+ B C C C C C C C D D D D E E E E
+ E E E E F F F F F G G G H H H H
+ I I I J J J J K K K K K L L L X ]]
+```
From 3bd95283905db3e751a242aa387b3f5659f883ab Mon Sep 17 00:00:00 2001
From: Maxime <672982+maximegmd@users.noreply.github.com>
Date: Fri, 13 Oct 2023 16:00:42 +0200
Subject: [PATCH 69/77] add noisy embedding (#721)
* add noisy embedding
* fix format
* Update README.md
* Update README.md
* linter issues
* caseus fixes
---------
Co-authored-by: Maxime
---
README.md | 5 +++
.../monkeypatch/llama_embeddings_hijack.py | 40 +++++++++++++++++++
.../monkeypatch/mistral_embeddings_hijack.py | 40 +++++++++++++++++++
src/axolotl/utils/models.py | 20 ++++++++++
4 files changed, 105 insertions(+)
create mode 100644 src/axolotl/monkeypatch/llama_embeddings_hijack.py
create mode 100644 src/axolotl/monkeypatch/mistral_embeddings_hijack.py
diff --git a/README.md b/README.md
index f22ccb5939..57447b36de 100644
--- a/README.md
+++ b/README.md
@@ -672,6 +672,11 @@ adam_epsilon:
# Gradient clipping max norm
max_grad_norm:
+# Augmentation techniques
+# NEFT https://arxiv.org/abs/2310.05914, set this to a number (paper default is 5) to add noise to embeddings
+# currently only supported on Llama and Mistral
+noisy_embedding_alpha:
+
# Whether to bettertransformers
flash_optimum:
# Whether to use xformers attention patch https://github.com/facebookresearch/xformers:
diff --git a/src/axolotl/monkeypatch/llama_embeddings_hijack.py b/src/axolotl/monkeypatch/llama_embeddings_hijack.py
new file mode 100644
index 0000000000..654ca3ba82
--- /dev/null
+++ b/src/axolotl/monkeypatch/llama_embeddings_hijack.py
@@ -0,0 +1,40 @@
+"""
+patch to add noisy embeddings per https://arxiv.org/abs/2310.05914
+"""
+
+import torch
+import transformers.models.llama.modeling_llama
+from transformers.utils import logging
+
+logger = logging.get_logger(__name__)
+
+
+def replace_llama_embeddings_with_uniform_distribution(noise_alpha=5):
+ # pylint: disable=duplicate-code
+ def noised_embed(orig_embed, noise_alpha, model):
+ def new_func(input_ids):
+ # during training, we add noise to the embedding
+ # during generation, we don't add noise to the embedding
+ if model.training:
+ embed_init = orig_embed(input_ids)
+ dims = torch.tensor(embed_init.size(1) * embed_init.size(2))
+ mag_norm = noise_alpha / torch.sqrt(dims)
+ return embed_init + torch.zeros_like(embed_init).uniform_(
+ -mag_norm, mag_norm
+ )
+ return orig_embed(input_ids)
+
+ return new_func
+
+ def post_init(orig_post_init):
+ def new_func(self):
+ orig_post_init(self)
+ self.embed_tokens.forward = noised_embed(
+ self.embed_tokens.forward, noise_alpha, self
+ )
+
+ return new_func
+
+ transformers.models.llama.modeling_llama.LlamaModel.post_init = post_init(
+ transformers.models.llama.modeling_llama.LlamaModel.post_init
+ )
diff --git a/src/axolotl/monkeypatch/mistral_embeddings_hijack.py b/src/axolotl/monkeypatch/mistral_embeddings_hijack.py
new file mode 100644
index 0000000000..ed5f259650
--- /dev/null
+++ b/src/axolotl/monkeypatch/mistral_embeddings_hijack.py
@@ -0,0 +1,40 @@
+"""
+patch to add noisy embeddings per https://arxiv.org/abs/2310.05914
+"""
+
+import torch
+import transformers.models.mistral.modeling_mistral
+from transformers.utils import logging
+
+logger = logging.get_logger(__name__)
+
+
+def replace_mistral_embeddings_with_uniform_distribution(noise_alpha=5):
+ # pylint: disable=duplicate-code
+ def noised_embed(orig_embed, noise_alpha, model):
+ def new_func(input_ids):
+ # during training, we add noise to the embedding
+ # during generation, we don't add noise to the embedding
+ if model.training:
+ embed_init = orig_embed(input_ids)
+ dims = torch.tensor(embed_init.size(1) * embed_init.size(2))
+ mag_norm = noise_alpha / torch.sqrt(dims)
+ return embed_init + torch.zeros_like(embed_init).uniform_(
+ -mag_norm, mag_norm
+ )
+ return orig_embed(input_ids)
+
+ return new_func
+
+ def post_init(orig_post_init):
+ def new_func(self):
+ orig_post_init(self)
+ self.embed_tokens.forward = noised_embed(
+ self.embed_tokens.forward, noise_alpha, self
+ )
+
+ return new_func
+
+ transformers.models.mistral.modeling_mistral.MistralModel.post_init = post_init(
+ transformers.models.mistral.modeling_mistral.MistralModel.post_init
+ )
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 2c60f00c2b..c133e9eb61 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -180,6 +180,26 @@ def load_model(
LOG.info("patching with flash attention")
replace_mistral_attn_with_flash_attn(packed=cfg.sample_packing)
+ if cfg.is_llama_derived_model and cfg.noisy_embedding_alpha:
+ from axolotl.monkeypatch.llama_embeddings_hijack import (
+ replace_llama_embeddings_with_uniform_distribution,
+ )
+
+ LOG.info("patching with noisy embeddings")
+ replace_llama_embeddings_with_uniform_distribution(
+ noise_alpha=cfg.noisy_embedding_alpha
+ )
+
+ if cfg.is_mistral_derived_model and cfg.noisy_embedding_alpha:
+ from axolotl.monkeypatch.mistral_embeddings_hijack import (
+ replace_mistral_embeddings_with_uniform_distribution,
+ )
+
+ LOG.info("patching with noisy embeddings")
+ replace_mistral_embeddings_with_uniform_distribution(
+ noise_alpha=cfg.noisy_embedding_alpha
+ )
+
if cfg.is_llama_derived_model and cfg.xpos_rope:
from axolotl.monkeypatch.xpos_rope_llama_monkey_patch import (
replace_llama_rope_with_xpos_rope,
From bfbdba8614f55d430affc880fc4e3d9b5c33b13a Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 13 Oct 2023 10:27:56 -0400
Subject: [PATCH 70/77] pin xformers >= 0.0.22 (#724)
---
requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index cced386735..1092ebddfb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ flash-attn>=2.3.0
sentencepiece
wandb
einops
-xformers
+xformers>=0.0.22
optimum
hf_transfer
colorama
From f30afe4544982140f9ca60c6428a47419f0f9346 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 13 Oct 2023 11:04:39 -0400
Subject: [PATCH 71/77] misc sharegpt fixes (#723)
* support for sharegpt with assistant talking first, better masking of assistant token, allow remap of roles from dataset
* invalid role is actually not possible
* update tokenized fixture for corrected labels
---
src/axolotl/prompt_tokenizers.py | 68 +++++++++++-----------
src/axolotl/prompters.py | 6 +-
tests/fixtures/conversation.tokenized.json | 2 +-
tests/test_prompt_tokenizers.py | 67 +++++++++++++++++++++
4 files changed, 107 insertions(+), 36 deletions(-)
diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py
index 1b39336642..4e30b81a71 100644
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -2,7 +2,6 @@
import abc
import copy
-import functools
import logging
from typing import Dict, List, Tuple, Union
@@ -57,26 +56,6 @@ def tokenize_prompt(self, prompt):
def supports_batched(self):
return False
- @functools.lru_cache(maxsize=128)
- def _get_user_token(self):
- try:
- id_or_ids = self.tokenizer.convert_tokens_to_ids("<|USER|>")
- if isinstance(id_or_ids, (int,)):
- return id_or_ids
- except KeyError:
- pass
- return False
-
- @functools.lru_cache(maxsize=128)
- def _get_assistant_token(self):
- try:
- id_or_ids = self.tokenizer.convert_tokens_to_ids("<|ASSISTANT|>")
- if isinstance(id_or_ids, (int,)):
- return id_or_ids
- except KeyError:
- pass
- return False
-
def _tokenize(
self, prompt: str, add_eos_token: bool = True, strip_bos_token: bool = False
) -> BatchEncoding:
@@ -356,18 +335,34 @@ def get_conversation_thread(self, prompt):
def tokenize_prompt(self, prompt):
result, current_len = tokenize_prompt_default()
- user_token = self._get_user_token()
- assistant_token = self._get_assistant_token()
conversation: Conversation = (
- self.prompter._conversation # pylint: disable=protected-access
+ self.prompter._conversation.copy() # pylint: disable=protected-access
)
+
+ # support for custom roles from the dataset, only useful for vicuna style prompts/roles
+ role_remap = []
+ if (
+ conversation.name == "vicuna_v1.1"
+ and "roles" in prompt
+ and len(prompt["roles"]) >= 2
+ ):
+ role_remap = [
+ {"from": conversation.roles[0], "to": prompt["roles"][0]},
+ {"from": conversation.roles[1], "to": prompt["roles"][1]},
+ ]
+
try:
for _, part in enumerate(
self.prompter.build_prompt(self.get_conversation_thread(prompt))
):
if isinstance(part, tuple):
if conversation.roles[0] in part[0]:
- turn = part[0] + part[1] if not user_token else part[1]
+ role = (
+ part[0].replace(role_remap[0]["from"], role_remap[0]["to"])
+ if role_remap
+ else part[0]
+ )
+ turn = role + part[1]
# this is still the user query, we should
if not part[1].strip():
LOG.warning(f"user turn has empty text: {prompt}")
@@ -376,13 +371,16 @@ def tokenize_prompt(self, prompt):
add_eos_token=False,
strip_bos_token=True,
)
- if user_token:
- res["input_ids"] = [user_token, *res["input_ids"]]
# everything from this is masked out from the labels
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
elif conversation.roles[1] in part[0]:
# TODO label assistant token/tokens w/ IGNORE_TOKEN_ID
- turn = part[0] + part[1] if not assistant_token else part[1]
+ role = (
+ part[0].replace(role_remap[1]["from"], role_remap[1]["to"])
+ if role_remap
+ else part[0]
+ )
+ turn = role + part[1]
# this should be the assistant response, should end with an eos token
if not part[1].strip():
LOG.warning(f"assistant turn has empty text: {prompt}")
@@ -391,13 +389,17 @@ def tokenize_prompt(self, prompt):
add_eos_token=True,
strip_bos_token=True,
)
- if assistant_token:
- res["input_ids"] = [
- assistant_token,
- *res["input_ids"],
- ]
+ role_res = self._tokenize(
+ role.rstrip(),
+ add_eos_token=False,
+ strip_bos_token=True,
+ )
# not masked out from labels
labels = copy.deepcopy(res["input_ids"])
+ len_role = len(role_res["input_ids"])
+ labels[:len_role] = [IGNORE_TOKEN_ID] * min(
+ len_role, len(labels)
+ )
elif part[0] == "":
turn = part[1]
# this is only ever the first part, should include the bos token and the user query
diff --git a/src/axolotl/prompters.py b/src/axolotl/prompters.py
index 370e15091c..7cd89886aa 100644
--- a/src/axolotl/prompters.py
+++ b/src/axolotl/prompters.py
@@ -274,9 +274,11 @@ def build_prompt(self, source) -> Generator[str, None, None]:
raise err
conv.messages = []
- for j, sentence in enumerate(source):
+ for _, sentence in enumerate(source):
role = roles[sentence["from"]]
- if role != conv.roles[j % 2]:
+ if len(conv.messages) > 0 and (
+ (role == conv.messages[-1][0]) or (role not in conv.roles)
+ ):
LOG.warning(f"{SHAREGPT_ASSERTION_FAILED_ROLE}: {sentence}")
conv.append_message(role, sentence["value"])
diff --git a/tests/fixtures/conversation.tokenized.json b/tests/fixtures/conversation.tokenized.json
index ea4fca0990..4b7334cc15 100644
--- a/tests/fixtures/conversation.tokenized.json
+++ b/tests/fixtures/conversation.tokenized.json
@@ -1 +1 @@
-{"input_ids": [1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 29871, 3148, 1001, 29901, 920, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 29871, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 2367, 592, 263, 2702, 1342, 29871, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2], "attention_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "labels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2]}
+{"input_ids": [1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 29871, 3148, 1001, 29901, 920, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 29871, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 2367, 592, 263, 2702, 1342, 29871, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2], "attention_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "labels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2]}
diff --git a/tests/test_prompt_tokenizers.py b/tests/test_prompt_tokenizers.py
index 9ee5ea588d..0635bd718b 100644
--- a/tests/test_prompt_tokenizers.py
+++ b/tests/test_prompt_tokenizers.py
@@ -90,6 +90,73 @@ def test_sharegpt_warnings_integration(self):
strat.tokenize_prompt(conversation)
assert "assistant turn has empty text" in self._caplog.records[1].message
+ def test_sharegpt_warnings_turns(self):
+ conversation = {
+ "conversations": [
+ {"from": "system", "value": "lorem"},
+ {"from": "gpt", "value": "ipsum"},
+ {"from": "human", "value": "dolor"},
+ {"from": "human", "value": "dolor"},
+ {"from": "gpt", "value": "sit"},
+ ]
+ }
+ prompter = ShareGPTPrompterV2()
+ strat = ShareGPTPromptTokenizingStrategy(
+ prompter,
+ self.tokenizer,
+ False,
+ 2048,
+ )
+ with self._caplog.at_level(logging.WARNING):
+ strat.tokenize_prompt(conversation)
+ assert (
+ "Role did not alternate between turns (gpt and human)"
+ in self._caplog.records[0].message
+ )
+
+ def test_sharegpt_changes_roles(self):
+ conversation = {
+ "roles": ["USER", "CHARACTER"],
+ "conversations": [
+ {"from": "system", "value": "lorem"},
+ {"from": "gpt", "value": "ipsum"},
+ {"from": "human", "value": "dolor"},
+ {"from": "gpt", "value": "sit"},
+ ],
+ }
+ prompter = ShareGPTPrompterV2()
+ strat = ShareGPTPromptTokenizingStrategy(
+ prompter,
+ self.tokenizer,
+ False,
+ 2048,
+ )
+ with self._caplog.at_level(logging.WARNING):
+ res = strat.tokenize_prompt(conversation)
+ assert "CHARACTER" in self.tokenizer.decode(res["input_ids"])
+
+ def test_sharegpt_assistant_label_ignore(self):
+ conversation = {
+ "roles": ["user", "assistant"],
+ "conversations": [
+ {"from": "system", "value": "lorem"},
+ {"from": "gpt", "value": "ipsum"},
+ {"from": "human", "value": "dolor"},
+ {"from": "gpt", "value": "sit"},
+ ],
+ }
+ prompter = ShareGPTPrompterV2()
+ strat = ShareGPTPromptTokenizingStrategy(
+ prompter,
+ self.tokenizer,
+ False,
+ 2048,
+ )
+ with self._caplog.at_level(logging.WARNING):
+ res = strat.tokenize_prompt(conversation)
+ idx = res["input_ids"].index(20255) # assistant token
+ assert res["labels"][idx] == -100
+
def test_no_sys_prompt(self):
"""
tests the interface between the user and assistant parts
From 8d288a2ad48aefca02e2fc9773b1e63859a39411 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 13 Oct 2023 11:19:30 -0400
Subject: [PATCH 72/77] workaround for installing xformers w torch 2.1.0 (#725)
---
setup.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/setup.py b/setup.py
index 3109d4af1a..1ceda5c044 100644
--- a/setup.py
+++ b/setup.py
@@ -21,6 +21,14 @@ def parse_requirements():
):
# Handle standard packages
_install_requires.append(line)
+
+ # TODO(wing) remove once xformers release supports torch 2.1.0
+ if "torch==2.1.0" in _install_requires:
+ _install_requires.pop(_install_requires.index("xformers>=0.0.22"))
+ _install_requires.append(
+ "git+https://github.com/facebookresearch/xformers.git@main#egg=xformers"
+ )
+
return _install_requires, _dependency_links
From 7f2027d93f571ceffd56db79b66cf2c50f4c68e2 Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Fri, 13 Oct 2023 15:21:17 -0400
Subject: [PATCH 73/77] tweak for xformers install w pytorch 2.1.0 (#727)
---
setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/setup.py b/setup.py
index 1ceda5c044..ada5fcb289 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ def parse_requirements():
if "torch==2.1.0" in _install_requires:
_install_requires.pop(_install_requires.index("xformers>=0.0.22"))
_install_requires.append(
- "git+https://github.com/facebookresearch/xformers.git@main#egg=xformers"
+ "xformers @ git+https://github.com/facebookresearch/xformers.git@main"
)
return _install_requires, _dependency_links
From 3553172e3cd98beca1fc10406648a2c9af5ac63b Mon Sep 17 00:00:00 2001
From: Wing Lian
Date: Sat, 14 Oct 2023 09:27:07 -0400
Subject: [PATCH 74/77] fixes for alpaca w chatml, and don't include
attention_mask w mistral for flash attention (#728)
---
src/axolotl/prompt_strategies/alpaca_chat.py | 12 ++++++++----
src/axolotl/utils/trainer.py | 4 +++-
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/axolotl/prompt_strategies/alpaca_chat.py b/src/axolotl/prompt_strategies/alpaca_chat.py
index 17fe69be7b..975fee889e 100644
--- a/src/axolotl/prompt_strategies/alpaca_chat.py
+++ b/src/axolotl/prompt_strategies/alpaca_chat.py
@@ -1,6 +1,6 @@
-"""Module containing the AlpacaQAPromptTokenizingStrategy class"""
+"""Module for Alpaca prompt strategy classes"""
-from typing import Tuple
+from typing import Any, Dict, Optional, Tuple
from axolotl.prompt_tokenizers import (
AlpacaPromptTokenizingStrategy,
@@ -9,9 +9,13 @@
from axolotl.prompters import AlpacaPrompter, PromptStyle, UnpromptedPrompter
-def load(tokenizer, cfg):
+def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
+ prompt_style = PromptStyle.CHAT.value
+ if ds_cfg and "conversation" in ds_cfg:
+ prompt_style = ds_cfg["conversation"]
+
return AlpacaPromptTokenizingStrategy(
- AlpacaPrompter(PromptStyle.CHAT.value),
+ AlpacaPrompter(prompt_style),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index ee8c634966..820202b80b 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -423,7 +423,9 @@ def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer):
)
# Phi doesn't want the attention_mask feature when training
- if "CodeGenTokenizer" in tokenizer.__class__.__name__:
+ if "CodeGenTokenizer" in tokenizer.__class__.__name__ or (
+ cfg.is_mistral_derived_model and cfg.flash_attention
+ ):
train_dataset = train_dataset.remove_columns("attention_mask")
if eval_dataset:
eval_dataset = eval_dataset.remove_columns("attention_mask")
From e1b214c62b24594465d756452008e83f5bd82c68 Mon Sep 17 00:00:00 2001
From: Casper
Date: Sat, 14 Oct 2023 15:28:12 +0200
Subject: [PATCH 75/77] Clarify custom format example (#729)
* Clarify custom prompt format
* Simplify format
---
README.md | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/README.md b/README.md
index 57447b36de..c70abf6480 100644
--- a/README.md
+++ b/README.md
@@ -297,25 +297,24 @@ Have dataset(s) in one of the following format (JSONL recommended):
#### How to add custom prompts
-Using yaml. Example:
+For a dataset that is preprocessed for instruction purposes:
+
+```json
+{"instruction": "...", "output": "..."}
+```
+
+You can use this example in your YAML config:
+
```yaml
datasets:
- path: repo
type:
system_prompt: ""
- no_input_format: |-
- User: {instruction}<|end_of_turn|>
- Assistant:
- format: |-
- User: {instruction}
- {input}<|end_of_turn|>
- Assistant:
+ field_system: system
+ format: "[INST] {instruction} [/INST]"
+ no_input_format: "[INST] {instruction} [/INST]"
```
-Using file:
-1. Add your method to a file in [prompt_strategies](src/axolotl/prompt_strategies). Please see other files as example.
-2. Use your custom file name as the dataset type `.load_`.
-
#### How to use your custom pretokenized dataset
- Do not pass a `type:`
From a045db02146751548fec57a5d3f31382ce4e5959 Mon Sep 17 00:00:00 2001
From: Casper
Date: Mon, 16 Oct 2023 21:13:46 +0200
Subject: [PATCH 76/77] Mistral: Sliding Window Attention with Flash Attention
and Sample Packing (#732)
* Implement Mistral FA + SWA + Sample Packing
* Handle unbroadcastable tensor
* chore: lint
* Simplify _prepare_decoder_attention_mask
* Uncomment window size
* Upgrade flash-attn to minimum of 2.3.0 to support SWA
* Add original condition to avoid error during inference
* chore: lint
* use torchscript to prevent oom
* chore: pylint
---------
Co-authored-by: Wing Lian
---
setup.py | 2 +-
.../monkeypatch/mistral_attn_hijack_flash.py | 109 +++++++++++++++++-
2 files changed, 105 insertions(+), 6 deletions(-)
diff --git a/setup.py b/setup.py
index ada5fcb289..e3ee54350b 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@ def parse_requirements():
dependency_links=dependency_links,
extras_require={
"flash-attn": [
- "flash-attn>=2.2.1",
+ "flash-attn>=2.3.0",
],
"deepspeed": [
"deepspeed",
diff --git a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
index 21a6ee0842..26b511d06e 100644
--- a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
@@ -14,6 +14,9 @@
flash_attn_varlen_qkvpacked_func,
)
from transformers.modeling_outputs import BaseModelOutputWithPast
+from transformers.models.mistral.modeling_mistral import (
+ MistralAttention as OriginalMistralAttention,
+)
from transformers.models.mistral.modeling_mistral import (
MistralDecoderLayer as OriginalMistralDecoderLayer,
)
@@ -42,6 +45,44 @@ def replace_mistral_attn_with_flash_attn(
)
+@torch.jit.script
+def _make_sliding_window_causal_mask(
+ bsz: int,
+ tgt_len: int,
+ dtype: torch.dtype,
+ device: torch.device,
+ past_key_values_length: int = 0,
+ sliding_window: int = 4096,
+):
+ """
+ Make causal mask used for sliding window attention
+ """
+ tensor = torch.full(
+ (tgt_len, tgt_len),
+ fill_value=1,
+ device=device,
+ )
+ mask = torch.tril(tensor, diagonal=0)
+ # make the mask banded to account for sliding window
+ # NOTE: HF implementation is wrong as of 14-10-2023 for torch.triu, needs +1
+ mask = torch.triu(mask, diagonal=-sliding_window + 1)
+ mask = torch.log(mask).to(dtype)
+
+ if past_key_values_length > 0:
+ mask = torch.cat(
+ [
+ torch.zeros(
+ tgt_len, past_key_values_length, dtype=dtype, device=device
+ ),
+ mask,
+ ],
+ dim=-1,
+ )
+ return mask[None, None, :, :].expand(
+ bsz, 1, tgt_len, tgt_len + past_key_values_length
+ )
+
+
# Disable the transformation of the attention mask in LlamaModel as the flash attention
# requires the attention mask to be the same as the key_padding_mask
def _prepare_decoder_attention_mask(
@@ -53,11 +94,29 @@ def _prepare_decoder_attention_mask(
sliding_window,
): # pylint: disable=unused-argument
# [bsz, seq_len]
+ if attention_mask is None:
+ return attention_mask
+
+ # NOTE: attention mask and sliding masks are only broadcastable in certain scenarios.
+ # Without attention_mask.shape[0] == 1, error will trigger after eval loss but only when wandb is enabled.
+ if input_shape[-1] > 1 and attention_mask.shape[0] == 1:
+ sliding_window_mask = _make_sliding_window_causal_mask(
+ bsz=input_shape[0],
+ tgt_len=input_shape[1],
+ dtype=inputs_embeds.dtype,
+ device=inputs_embeds.device,
+ past_key_values_length=past_key_values_length,
+ sliding_window=sliding_window,
+ )
+ attention_mask = attention_mask + sliding_window_mask
+ else:
+ LOG.info("skipping sliding window mask, not broadcastable with attention mask")
+
return attention_mask
def flashattn_forward(
- self,
+ self: OriginalMistralAttention,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
@@ -91,10 +150,41 @@ def flashattn_forward(
query_states, key_states, cos, sin, position_ids
)
+ use_sliding_windows = (
+ hasattr(self.config, "sliding_window") is not None
+ and kv_seq_len > self.config.sliding_window
+ )
+
+ if use_sliding_windows:
+ window_size = (self.config.sliding_window, self.config.sliding_window)
+ else:
+ window_size = (-1, -1)
+
if past_key_value is not None:
- # reuse k, v, self_attention
- key_states = torch.cat([past_key_value[0], key_states], dim=2)
- value_states = torch.cat([past_key_value[1], value_states], dim=2)
+ # Activate slicing cache only if the config has a value `sliding_windows` attribute
+ if (
+ hasattr(self.config, "sliding_window")
+ and kv_seq_len > self.config.sliding_window
+ ):
+ slicing_tokens = kv_seq_len - self.config.sliding_window
+
+ past_key = past_key_value[0]
+ past_value = past_key_value[1]
+
+ past_key = past_key[:, :, slicing_tokens:, :].contiguous()
+ past_value = past_value[:, :, slicing_tokens:, :].contiguous()
+
+ if past_key.shape[-2] != self.config.sliding_window - 1:
+ raise ValueError(
+ f"past key much have a shape of (`batch_size, num_heads, self.config.sliding_window-1, head_dim`), got"
+ f" {past_key.shape}"
+ )
+
+ past_key_value = (past_key, past_value) if use_cache else None
+
+ if past_key_value is not None:
+ key_states = torch.cat([past_key_value[0], key_states], dim=2)
+ value_states = torch.cat([past_key_value[1], value_states], dim=2)
past_key_value = (key_states, value_states) if use_cache else None
@@ -120,7 +210,13 @@ def flashattn_forward(
qkv = rearrange(qkv, "b s ... -> (b s) ...")
output = flash_attn_varlen_qkvpacked_func(
- qkv, cu_seqlens, max_seqlen, 0.0, softmax_scale=None, causal=True
+ qkv,
+ cu_seqlens,
+ max_seqlen,
+ 0.0,
+ softmax_scale=None,
+ causal=True,
+ window_size=window_size,
)
output = rearrange(output, "(b s) ... -> b s ...", b=bsz)
elif query_states.shape == key_states.shape:
@@ -146,6 +242,7 @@ def flashattn_forward(
0.0,
softmax_scale=None,
causal=is_causal,
+ window_size=window_size,
)
output = output_pad_fn(output_unpad)
else:
@@ -157,6 +254,7 @@ def flashattn_forward(
query_states,
torch.stack([key_states, value_states], 2),
causal=is_causal,
+ window_size=window_size,
)
else:
( # pylint: disable=unbalanced-tuple-unpacking
@@ -191,6 +289,7 @@ def flashattn_forward(
0.0,
softmax_scale=None,
causal=is_causal,
+ window_size=window_size,
)
output = output_pad_fn(output_unpad)
From 91a016f410f7e9b65c05cb829ea2c39ac4917568 Mon Sep 17 00:00:00 2001
From: mhenrichsen
Date: Wed, 18 Oct 2023 16:21:34 +0200
Subject: [PATCH 77/77] badge (#739)
* badge
* fixed text
---
image/sticker_fixed.png | Bin 0 -> 379330 bytes
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 image/sticker_fixed.png
diff --git a/image/sticker_fixed.png b/image/sticker_fixed.png
new file mode 100644
index 0000000000000000000000000000000000000000..0f5c67a1922120993bd350eb1213e2d331478b76
GIT binary patch
literal 379330
zcmaI71z1#F*FJt`7+~n3BqW9|DQT%8B}8%r>5vkT2I&Fm7Aa9uBt}vZkOnD{Zb513
zPRaR=&-=XJ|N8#LbsZ+IefC~w?G^XE)}9!hhxbVc=?MVIS)&l?t_ALZ}g0V0A
zAx+}|?l)I;Wd(!h=G(v8V<{D2s73VP9c!ksphiT6|Dhv<+yiz@-)L~k={b$Cxz{}J
zYji3Up8WdTN^w-y)$zg6QPkS^-yYZA1O4dO=-58VPH0pw?Yo{9@lEeYmsUt5qbntC
z@Y}PW^DPsZL&f%SvgJ$%f#KpmPD)5*m|gAVyBS-e_5zv`hDCfnH+WxG?|I&A{kl1w
zJrBIO*wOr6++QYNz{<*IBLx6$KKHVXuPA3fQ%fd(
z09e4v2DDLO#{p36g}`3_`wovdn=(8ASWjyCJ5fC@cGUmO_Wym?-<^VEufGEPUlW_$
z!9Mu9UokcI%Qk+*|4pX<84ItI#I9(GiyiepOZ~r3_}^zk#HnBx`upkse;f)s;(rfu
zu4Vv$ytOw*e+B&s#@_x{4gb$&hd@Bv)kZ7^Ks|bd9e|+iRKhOzzgGLNFsL;E$i#P*
zpJ@gD4*j3ULj3*kcaRV+v@JD<3G?*(!(|ZmTOH)<^F!^clS2uMy1CiqTUZfL)kh2G
z19RD^A4sI%S4`#SGpV{~B&8aP6v>a_sBm2fh|U%UQy4LHbm{c*pZ8gR);JqE`K;v@
zmC9Qf%*CZGAE^wYCV#DrM-ncKL#Ng&NtQPLd`~K%^5j;d8J=G
zD0?PTms9+-vUy6*MAf{xbePb9y!z>OGnlVew*XBZ+73YD&6UHSv9GhB`}$c6c-OlK
zS$BigQ&DoyQqKfUkZDijg}W&Ln|R>|iHkr9Ar-NzV9%U%##*~RE4?to9wW-*5gr15
ztXJZ&KDVU`-*&)`PrlFZ6WC2_6FuFoi;O2nD(a-6GIa`g!5i~#%=LmL);_v-Icda>
zwb=;E5eHkw3v)Vjm#B}4s=rD+B~1n_;XG9Hfj@33t<0R8%yhc<_fZF;~X#u)ccP*w`2m#+zGozxg0qREu4s
zx)yqzcIfMh{5;=e^?Xe+{=ME>Ww7_9iMBjRv(FFP4C?P0@`vm1h%5r#e;nGHxq=GS
zwRrf*)$$?Wwv}jg70_ITcF`m1CUIJucn;tnb-cpKL&s>Af(;g@@4v+1VjdqJ*3s8r
zZoVz;^Rq|(rur+Xyw}O*#N}S2tCp||Za%&e?#hTtKKUD%4JEa+A}Opy{OY<`j#PnJ
zwf{KNCMjFOHAhWmX`hYvTboPWugQ@)9#)p{_nH~DenkZ=s}PTE18T(hLho0s2m>IY
zZrUS?=7qs?k2@mV?xo5MLSGJDTx>3lQA`1J{*{0T73PK#c+hhCEH(3{N0>PRN%bq86VZ?yQE-B5i_rm
z`L`Z@Jmux(l$;3)5vStE1iaC@5Vyp)TrukkU{t=IY^NM9gu37b7n3r;u6io4D)otN
z2~-TVj_yySg{$cpxGM^eZ47DI_x#oiUiNJ@p-Xgdax(d1-E~OKcX;QOsV;};6L22y
z^{a#o>A%X*1j1`q{N}LoghO6f*%EWAo93($BDr5s(~}R%ZtV>44`i5ne8Z(N9Tu(S
z8S=<=Tapz>kgpxV5*QlX924@4!O5hp=g@p8U5ks$bk3FwR|Ja*&~^
z`?>yF?(F2G-{p?))kgmPtnQ@CF}4swztsgvtCddJv1P?G{B!~WALsviAO&FGyc7Rv
zNN!E*K6CSY@a1`De%OVEf720Q9lf0yz0cc2b@Wq;41tV+5|Uf(C4blLX(eb=FU}F&
z`XW{C;^O=3x-klKyiatr#u?#tkfIV!c(cVGO%VD%6_Oa%z?05zTJ12>L9&%4
zfBsE-_ny_%k56m0JrD7r$x4VaG_lYZMoUtxo&K|iu&(t|3OBh904~*X;^TX2h7ACh
z+l_nGJq#i~rZ^0FmiM3Uu(HC?RB(qOOygej?C#|@UdQ%WuHfg4)`JAsJ7x{X628~9
z@83=E-5sFNPZc<(CMJ4{M%;*C3owWPv=80o`bTDMVV^hP&EZ6
zxR>ts0xdwI`}ym-$^<|Kk6g(TryA|<_-)Z*dRO*(t!~b}hf(=B&SD-f0rCiigx(1^
zl)`ET(uOrFiYvdhY~Q5=uE6~iO*3=;Xwa
zl(}nuSw(3sDfK+mQA`)wnB$}-t5(Jhv&0xi?mq1v7mv?QE2U=kvuW`KV4q-?}
z*3ImBZNxCig!yuzKJpy`dPH6m2~}gfZ*-Po3b?*Y>=cF~8itlBA91
z(PRi1aEHEMBliFOkH;kZbsJq>JHx@g<(|aWMd>MO!U(M9dW07_xNF0nrQd>q$jh3w
z_MkO5Qo|!LHK;pq<#3pZu?mvc`loY*pe|z#h#$fMZJ(mrJpIK8f%PKm-Xsu9O5)KD
zhn7YF@u4;Jz)2AZ0u!C8sn}oDPgH69#+57T+1>9{+4`(K4EK}W5G(grKIMH4tY2vY
z*zBh~&?BA@G&?)zbI?U&$a}N37022~$~*lC&*w_hTM1|f5h{Cal
zS}Kqs$u0>x5?pgGL`c?)OQn&T@-D|_+4r{Zs5uuCcE9JpEJil&H8?GHsPIjd3E0}I
zQC*33i`&NwKLS6&x<5HVc&o=YK~M1YmIXF$Nk8cerD!Lm=A|Vh$GgwA`^PX;QW=F>
z9j*9IDq#Qs_yy>VW#_DC*m$I)Gu0nX?Rb%
z5T)2RGc$TPuCIOl-5%q;arm2`pI?C$k7CKi^sm6_|7e^82B!em&B2)je~K>dM$4eLcwq1VK1PY4;
zAa2lY=@Dadxu8cq=!6e6Weysf@87x?&J-eI1=uV06obl-2$9spibx?=Qh@I*3Z?d@
zVyF;MCQw}O?*A>rR?yo^Sxz@aD?1}5nCP16(MM3iV@bRK!nHiuO1OdhW_fS8
zR1&YTu;KK42-B0oq4_aWxK|g=JInT;HtkA@;HBl9Ux4A<3W1?Tye-9^Dq8`S6!d5J`&jwnK(JvP*?&=;J9bgXKb{*D%`2yVctQDogd`XFF%&oSJg(+R;k+qo
zu*8w-TY;lGlnjg>+asCN;YAQJM3~y9UTpN8my$b}2P0jA0<%u@zh)(BuCMG{pC7)q
zcl~m1w-#g);KahS;`gw#lU=vk6N{#Q;QhsIMqmJ{Mk=|0a%#Dm7@PAqPLZ_h2Edl}
z5X-jGnSeX1S+LMhfP1*W>U{Gw&51@{gwNSwEZu{I0~c*2$w`6a1S2}vRUXiCqg#$Y
z1sQw}*=WVF5e*mO=5UVmI?_fu9_^_rK1sFSeX7SkC`NUU&|}=Or{~_@Y3Y4HEg4W)
zXeO?|)d0(ydKP$5x_0BX#~gh4D!0s}@sWWIbo=dJsI3V^n&JP=ea@4wDq>NIb*(#B
zzO9!Xp9b&6{hH*TQ?W$AA*yNbHlNWsfYM!zB%E#v&WS#zi>E4)e3_|fY2zXIz^j}I
ztz^+?&i$MRkTO2bH_|u$(LrVZ>%4eLz~(e)nWcQ)IrAe8m1VbGd5B_NQBn8UL-~Ue
z2`2ZO^R?`=)1x;M721KD*?c`8wMfOgGj;#r0Lt*akohjdz(u0V-MYGzScmy1^Za>0
zXnRn^4c#!S)gpQo_j|@bW>5C04tMHD_7fF6BlQYKAqg??6Wk4Vqsr9nx#OZPVO)awR0<3GOm4VU0VFo~Q
z50go)2$gkZ(TFC9pfb&6I$ULQ)T7LbrCrt~;=iw9B$jfN47Ek+hHn
zF~9^PRrTcSQt42wp!op*V;B1}bqW?l`*l`VxPS&*vx^rS(+V8XIZX~FXe)cbYnsA-
zk-ob6BYUi{#cg3E>Rh(xe(Fzin|AoYx+>TfCED#y*>f|1FNhv@3R<`S
z7a$N8bhAqS3q6eplCdtKBC}Z{G3RxJ*>1VqGMzE8UxLEpBLQL*ph9zBCy%(_?959)FE(Pw2mnIXLp;#+NwYK
z_R`5(QooHv<0I@!JQby;*x@LM;tI6DO*FWp-Xz##tl3xIYdJw;F6ivn9LM`IWmHrW
z>=;XY|A8RAkL|PFcRtTo)ZDx#C}8a%jt=`(!bvWIt~qmnOxApEseihQGnP_|O7x(l
ziE~s??EMr!{Rx~5nz*Z6qSKv-qS7#tcHkdxWP!c-5hcS_K<~qzTIp&Z>$!Uw7l5uu
z*DQv6&h_CxRHkzy#1}d-8^j44dnoPq^G`Ji2}#_oJAqd*JxaYnOJYOAnpI;$|AJ_+
zDlP#qf`CUfZDsFWooNb6Z@c*pcuVy*LPenf0ulvrwCGrj)vOgx_bw-8hq>cQ
zqVhrLK7xD=oYnrUI=7pSO0%N`EFBYf{fU5c_K9&ND;Ay&K8`}avu(mUfb(CVOdLve
z@QbG9QXX?8U);24c~}Bx?Q*CFNa489UKE4TFF8zT&(y%<+PDv%*(-j@ssiNI9k57M
zFp7#4fycjA%~s)L_Va;@lVQ2s{`T0g0Sd=2nJq2n!^<88d`7q7P<#VCpPPBiwSg_e
z!j~UIoST$?P|?}Z|JGNqC7Ax*aLR}E$D^8LvfGwk`Y7n|BGI*Dn_{aeiJDl}krY_5qu=Nnr!(!{p!D5e3C!zwJW~XVqVw&7IVu_+5LB?mGL?4lT<>Q5fWn%;StuLb8v~2J%`yE7ByuT~K6eI~G
zfJgYB`^e#tx`Ce{=-j(USLJ1!mtxl5*z+HkpNC&mKTNiztIg7#U{`;~+&d>WEHOK`oH<%4dG!}3s^
zzqKA#UaGbq_PO5Q`6aY6g@Wtv9m|dGVVX<@sA6HblvKT;x4dL-t`3XWQm34U^(J*n
zUX-9fkLYzQcB89e?$(Ow2E8^sW%OHO@1$j4aG>`W`*(3i}fFRMrI*K~usFxax&$4ae{HAW4`(MfGa
zIlZpw&whMYoh)sRMWhc3V2KWchI4N^?YEFM`yMQ*N250dIMZSS%k
zWQYN=P&+F{DyswTv5>h}zH01TQ@2cVj72dLA&<|ZxT<;K_^EK9-QD}XF72z}lZMLR
z9Vf%7HjYjd%70m`=igG9)Oq2n%9e{wkwM8cwUO^%sMy+Ikw8aD-tI@{ej~ZdB<-@?
zIpwG@N%3^1dLWz71QTc7wto-ob%%>`7-R$OdO?D9-dIpR_;ueyRr)tcz>fd^mtq@=WPs-a
zvXXt?;O)fcgcq_GFmE!^bWa5hM0=8e5B4Xg?Bc7*QH*KA-9ogc(?8)V71ao3xXnx)$
z!F*!3Rv)j|k%(Yok|xDMMsF;^R2`1E+_C8hC%{|@=K8HQ4&pD!AnTs7Uvm<-bCSD}
zx{!Y`S^Ifv`8{kx%kiZ_`#U4bsj5@wqvYQY^j^7TJzWZNC&n@Ev*vu*22gEMSt5kM
zg*d^RF`hgek2S@w2l1cgF!`lefe?I)WM+p>m7pjxz7&!KMDoA?Oj{_gdyD6EG
zhaU8ciT$TcBZY;m?}lXraXeODMulu>8YVJJn5(dN>z1ErD#H;d23WZLEq=eN^O=V^
z2J)e)5+9tnx~hn!IhUZ{B%kUZ^f6mp?qAu@WczSBcv{J7Y$Z2Gds|%2W6ZVR>xK2w
zN^z)(i7Ul|>p9z8*YEI~id>R~m><6>HDZG@{LqzP;k=p+%q3
zeaf`Dj}6ZMkZzdGXa!~E#UV0aubE7(S2&kNG%G+
zTLN}7CV?qT3*#p6iHpmhcXZ_bYF(#ETGgDqdooB^lJDhwpvA@PyBIL%{fGR%GI`{g
z+CCav$oNM(A^k4zDKp>xYF98pK$JkZE;$^0Tlg&T(q2X=Z8D8(TucEV9{W8Mt3b#4
zNO90;vT1dMV<=VOt-toeFb4x|^(9IzQ_?Arzum~&-f8P`*TC+Vu~3nzTd|+jw!-)X
zA5R@#ee??nq~MQz2tPI8Rygs*9Gpu(Yqi1FuOBWlwoMOWsq!R9NRiD|kJhGeK1cEp
zP`1%iYw?;$qp=7)sCU=Gx`Un<3w3lGZXU@|@B!lJZCAuc(Rx-sRwV}n7@=IL<|0eJ
zm{ug((87hkWSviUZaWox@NM+!Q8Yk^Qbt0VP}+g%8I;4ijSTx~adNUTTbV`%<`-xA
zI^G{dJ7R8WK=0Vn#5Iby+$2PC-rr8}hli@N^uT1EpIx6_omOl;Ya$ich<}aBWZ3#p
zR5|c3Ofvx#_pK%D?K5IluM|`?s|bo&;bvFyBY9qz54fF?7pLn{UHtEujmkI{9Dwr+<7|#ffJkFLemF
z6oRPAvp}pp=7il-qvb@`@03bjyde-q>FBN~I(l#QOEb-^tt-*b*E4rdu_|PM1zP^N
z9-R&{&v|?=!#&EHHEz-dI$`@BSZ;9pZ(%2bH#Hg*o=iDLSDfoNAU1^()<8$Y;NSsl
zfp_rey8B*Z7JzORHK-}|*T)!B~p-Q$H%m9RrN
zyeMa6YBO2lpYzx5ghSY8j*e_mHp3n~#UDl0wYK^>JcBO+`X47fL=3+#;cDJl5j4Mk
zkF7&_9&KSs;9nknz6e$
z-@Wq&*|2o{RUnMt4oi#i+p4U?vB#Plg#e+f59P8%TIlQV}&!5Yg*;D
zxoyowIHw;yD0P~MR2#9_%Lr>P85Q4n$1=*2C{6rO_2g}^98<=es!#?6fWAQ>
zI#uUAS^-RMCD|#uG!zAegLx1*hRGG&ni>&--W%VfnV-J>e0j5$8*Z!<>N#=EwTF4f
z9EkZ~(Q5}kU-}Q
zYn+ekO_X6U+69+v99mZT2?oF}_y@Bm`x`qbupf5ucIl%xHkX?8JKevn^N5?X)v@oz
z+5Fk5$l$)^NSh_98jHgJ*03m<{sUFk?`E93M`wjyB9ImuH?QU_ilyS3Ylgq0UyMx8K&VSoE
zl0$yIkfjCl*1h1pz^QYa?s3gdH)!%xRJ#rwH$9@uSAJ65^|z7X8)XXJp0XOx35?K)
zkI@g55O)OHe{UoSO{&XT2pH6ap{ru7rLA^;5augFc?sKIRel%7+&Bi5&DDP&c_my*
z`dMRP-r3+Ib
zb#6m**;Qo9wKCyy#T%yTs`)NVpu$-IBZZPk4~icy-26d-tML|BR=l3ZwYD^mm&RXb
z&WGLQt?ns|$uK|tK_u7&yM_tZCjQGA$N@qZhOy@h3a_+fci4sS^4Arv$;e;$`7zBg?ih>ylQz1KQS!m-^W>te^!xn|yZ+WmkBtY}}m8lc4l3P-H
zz^xyVpr9@;%&$f*E7`+%LnWxeAWM(ofjv)z0M{mzdW9!tRqQ8Ip8$_TJb@`cmuX^Y
zLz#h3KM<~3zYBVGno|axkP;cVx8KM8&0;&4;qrVlB&2m-I7&R{xe%e(_@aLB#WJ>E
zJY_zAwOZFkZ?h{Ey6+%j5+5ntU9}7O+ZLxJ*S@z=xK-I|j{7bhwBzvRqGR)MT%cP{
z6R#ClQMf4^4N?BW`)YB~TfYj6Jj%c8arHmO24B1G5AnX{$o4w~{EjA#>E8{n&Lred
z(lCFfT+I(QKFEb^OjZ7w;g@*k^|(0BQH!Os-u7{Ap7o
zasE(;Xi1x?^{Dk((`w9@G2MNQS=*B4b^Y9~8H#VQwP~$yVn379od$lLV?N)H;@tbx
zT4L>l&1Zj`n8eZ<&oSREe&OLl+-M^h)|D@7zTCDG^y$!T%TZf2K%i#~kF}rg=p!@O
zA9M2?*R6icH5((08IGnU2O#^_&VOcbZs#%gl(9E&{97@zU3
zQ>c-CLnI9bdU$X7=kNT%e@Xptcs=&FEq_SEInBVBUB`KYu(x6GV(c&905jjMRvm83
z6|6P?J;_9#K<=?>GIVuw*1AX3&m^3*?byrJex?H`L%_yMSB^vJCNptz9Bm@ignX&P
zQ$9Z(qdSdWnFeiX`40M*;S|6)Kn;p;MOGNhH!pDL9a*|0w@d4qG3nzFl{`XX&ZL*I
z-`-AW()@P9o;h=}fQ5$=`$bmOezJAhQnk_xv;IVjn?FZeucy45><3qW!Hw&vyo2U4
zpZ?S41sjywE0f;+Gto&v30m<>eEDy1@W_3^N@}4(jQFt&q
zd6L$B`K_1%v&O{ShqI`QwXL&^pHVvv(SdsD6sMV@9fmb-NAN-{xyhuKLZ2OuexTPp%g|sAAE6IQ
zX(Pl->0Q>Dy6EWRIFk((4?G@Y#{9v@2&gFJ|8BZNk97XkZ|t>;s#2tobOT^v^x@3Ot@vm}p*G_4?1wNU3r@`_GkuEiS*C68Mxz
zs5hMqY&NfSke8i($`IE5^_i_%($Hwy#~hVKhpuZ4K0g1P@tS+XYO{|nMRf4cYVRQE8^3M0L~joe&yydN5v
zNcRBu9Wqo>D~>9BMtteM-}G
z7g{6xi5&$&DefP7^#AsF9j$0@Zd`43E^@@wa=ymBd}MJoloYs6f#(Pz>o3LXWEC4y
z;{mw!$a>Hr_B0AvakdXVTGi<503pQk-JRgg^F$ICk`}n4dRDYgJIvut1&&b4<+!eC
zR0J%KG(6c}*-w9Q%+U0-plmn1b1v%`()4uhol9$4QjTYS^!Tl&Cwc<*5sEC4!&7|Y
zuG%c<0vN6VP8*6FGQ6pv$Fai-eBk}`^_D)TWPw^Iwo#8XfYH8NX{@(mV*H?+e~!uu
zz=g^_sc}jsAJO5XhEe)UC8O^}43%U$X@yOeexiTq_ccTA%)&l9uv>-s0(B;q&4Lr&
zoB!rt8^qF9^Saq*S_uhh6CsUn9$(WxQPPK@P~O`84kjOqIQ{X9(FJBzY)6A@g?=5h
zCP!af&BQDwYZdQ$Na;kT!^?Z$y!<76)Z%)zjn6M9*p*xq<14TtMQ(Lp;=&3NL8^bh
z=9f-{n(P!i5CGzRPMwX{i|m)@3fy~-VUOGfl5`!
zE0R&k3YhvQGyJWWw%tpTEFNj=7OdKx23>g>Ql&OUyS~g52tr}ugQB)Xcx0xnFdje3
z3FrNX*<16MYqDfBn3jND>C;RbS_TdwX#?SKmg(1Y|6!sLlhm8?CmY%m6H{p7`2+cd
zcFcg)qHNNabZ2Mx_$jRci|qwApo1`%eTY9ZVA(Zf9OOu}1J+gE(8N(DuvMe;njcFl
zH*aML&qJ%H#MdvGBEk$i*Y}>3Kq9Hx;IwobF(XzzEy*Z#<=E83_e)tnh;mx5+ZB}t
z&RTqaT?zzh+XPYiU~}U?F>+9ZGVQ2sp{QFJdlt*Q+DH?E8|_eX&lwkb4Oxir-YOR4
zQCw%Uv^N4pyYX+NOuAIH5u9z>wP%EU-8WrM2`k`0bi
z`4yeQxnZXwH?t>7!IG@_LEq!Gy4l$`^tv`3Qgr&5m?e+?S3&2?vkyEhPs8!s6xibU
zAe(PLd$nsbWd@Eo)&;KNpHTJ)=_*G+eKh`4Rmh}Ow4YOl1K93g?$rvH1?D2C{cBww
zbuLN_0(N
zW)xt3n{K~f=VfN>hi_hAiN4#-`O8lYmrGzO?T2EYp~JEh{zIWu^ScUZ6%Epw&*s*m
z*%IL$P!m^hB!$Mz;rLILBbo;u35h|(p_x91Yy1@C0Fq^?!iAZW1Np=BQY&IY_h}SbjmqS6A0WSaxIL-XpUx_$djH
zOL8puqDX?#d-b*b1sDg;G3<8DfKBK|(k_p-3dAUh640o+d0bxiseR@}!=#frGvD
zAIpa#z>MGSSWyocJYIaA_QiDf!Kmoai?c-Zh5RA&yZaA*?dcIbI+UU}yLTdu3D}Mj
z%sO4L5V#q?Up(-9<`reawPtgb2)5DtH%V44^WZaj>?`0)B)GQq@+9NLnb62e_?pfv
zgkRE|OTTO(-6nsu2a7DGE|@(et!5<0rbSd@S*8VmUSKp#Iu{}+oWG=5pZ*6WVJ3B
zd9cHH6BHqojCwMr8IETf8hu%C2I0^V=FxQ)sH@zbem;Nfcs7`MI!z>EFJKXVL0`e2
z?3w-eF|tmb6yezMV4Pgqu&nuP^Nrl)m!TYwes?kT(38*m0ghDv9=dyAU@>N3XlOET
zbo*ofOJ?0T{ZMGxy3tktb_~g~D7QK21t-qk^*Xicu48H!n^InKm#qpOznEQKYQ{ns
z)Xa#kByw}1ju5wykGtjA#s|!1^GoMx(8@v2uK!AlUFuy(*ny|N(irF{7>M#!fa>b
zqfC(#J(#thmvjBw&pvLDK+XKq%)!uDk~B^gTt_f@DyPO?EVng?npznK(N&Ox6#6Qr
zCF>}v?|EPLH~H>K&D(uy6#23J#QujObgyT?P2|#UwdHc|iMG6QzjFU}6Fv+go-w2w
z02LJ_Rvfvdj8usRRr#cTf;ioJ_@u|IR(3nO@`a^-K02(hMpK_^t__H1cxr(G;yzMS
zPw46k2t^uP4CeP4v+r`(_5DUg$1O{~(x9_BIuC>yA*ltOjZn3oJQYs79PvL5c)faN
z00eQRPJ}-)nTqCvF3`spj4l|x>)-CB1v^23$Gznt6B9D75E%bNx+W6p<`y3BNeVcA
z7&aZwP9=`EA`Y8Oh;n-=`kfF4U0DDSsZ`3A)>JkfzcYAj-=XGA#Ltp2f3_}(iX>Pj
zYmb~qyW}s2_T-UYcHbB0ioh+Gac=EyH(hOBN|iiaMDwcg00aaO>dujETm5Ij4t;Eb
z&pyvT8}lTBC|aVvxR?~y?HsFM=%FBRHG?vRaHhf}-pe>*IwXsAabg=Fllmk2Pn(e@
z!!C{OJKtVx4MR)*?#t(&1{AR=f_Xr7_&J=d}+1=6wGlZu_
zRIO58K9Ab*oqpC>9=pP&LIByed8bZB(j>pMg%z?S=yx&mS7T>4t?rQlg}5I@+!AeOhF@
zNbo>8M|pw>2-DYkkq*xw>y>UhGx&p-$DU(MqeM*6XYVQ3xA*D%^yu_*vt0Y9ytWhV
zBmW-(6C=3`bU<7|n7$t~h!au2d(XxrLF=7r{Gah+@!NG6InUL>$3h|Pk4&7UoHlEU
zbDyUq(8)aCPc7XWG8UcsO4M7*WfLz{3M5m^MSA>I9~CM*9HO$uAtb^{EA6vwFMs)C
zCTnIWWYN>awfgr`SKO3|YxC;ixh~Z}mYCDwO3dwevyjzzx)99eeDT2BgWbzL3A<0k
z*jo+)pH;GG1S|giFdR@CFR@Gw+a{9zCV2y$(Lud2|CA#g4x4Bp|1@V
zRc=i&9=%txs6w1J6WtFC5cku%O+w}9Fbn-V=vVphU3+;F!r}st=ICOz82Rp%i8s$+S72oLyaS#eL_^GFE!Y3f7?8iU(D3k
zE-5daJxG$^&y>Y>KIa1V4=@drf_J<2!D2#SMB@fN*0ld)+u-*P)|>Uyw8ZC@_P>(N
zJA6&Z(z@BIkN~5Gq+Zn;g;warQNZJo~obR*~MR*Z-vP6%SBO)uL0>g7mxkpG|xl7;U9~MpJtsve9SMmx38!tdo&j
zo>0|cV~;%BM}1l#_%xp*bE%bhvfoS^zv;;$65HRmd>e&Y!o!J!|#NUnC`+r@gC!H|;4puH`!I>#d((oR^jy
znIJY@QpdCrD--Ww#@5KzgXx5)=>pPQ$9pbcP990`){d2&bR$Axybe+IL3oJR4`QVS
z{(su`azi*be;sMdig~RJ69_hodT$<=t@6Eip#PR{PFOKCJUm<(gw^1B0R?O5+a%Br
z$Kvdv*8MQSpo1gji1|KemxFv>7kS>!HX7cTZSOWY0l{3UeblnH%
z731a0V>ky46^g_mlRX}0^qB^s2r~(Wk)19)Gi;Gy(jRAQPiQ7cACPdmP?mv4TblI>
zald}?oip73Br<0dCK+}Qkx$5y9Cfyj7r3XN6zu1C=4|<-Wc~k?91UH!tG;n=x~z9k1*4`f;i6p?xAYLD
zW6JT8rwW$F$bqMgJ)xJbb+ZDmIGYdl0(P&L2PDMoKBY?@0042)7$cAKSkDZy?6cD
zR5Qcs7T8{Vd%l+Cg30|6GH>-YvINyfwTN5NBQ5xVA(aV6d-?Ke77fvVeRF1mF$*hV
z=u`mNS}RSm=|S_PI*+Q3CF*WM@3O?R?5JQ~c=_OXk?_dvR{6*Ys^%H-yEBOJ_jHlM
z!cdUx{B;vG1;RaB6&4<7_s*B0U6buwthV3siiKX#x$!DRk;<=5+w5VvmursKPo`YP
zC?E$O4;R{rWi6l4&H@yrCo_E(&1I4me=mRg`}{}+H!H5~{j>b{_^*cZ1&7&IZq76+<3MtKzn%sT)mrzl?Ie
zPScZxuI+Y#PZGGYh}1@VvGzN=&S^XY8^gU$C1;0s+)rI4sdWhXt2`i&>8D~wBx4oD
zU0fjkDIef?LU2~RY2P9O?r2A;eh7#hBxD&;5?c$`i6DH`4{~T+%mU#S5=AAaJ~Abe
zusSd;JXzMkdw^Tjr>Uoa-q9nX@fIvfdmnjqN8O)TRWR)M?o2=AmSY^
z3Ed0%kmXQkxkq5NJX0_87Orz*4ZaPb$X;?MhgAWS!|}^XL8IC`#CqrTp$gg0KBGqH
zNPmbLKIcG!rrXK1NDk#@JX>YSu9ntX+{Z_h(DG30DPqcl3|5A4b`kHEG!2AJ3G4t
z(#R>>Q+y@cXYPQ@5c8*(Es+>Z@%RVn{h50+aF#;|$n<+JNSK>-&=dCB;2oY+Chb0)
zOwvsaKkyWlYZi>e)nn-ZZn?nP*5&Vn!K<+KkGYGjBo0+NAsJy|nIf$~E-z@bip&Ms
zWbKME%bkkK=JR$Fx=AUE2Fq4)RV{6~Yh@J`+|*DqwzJf|a&q1W7b4cJ7%L%f;sQS6
zFgL$hj)g?7$`9tpEd$QC6#Mf)uis~3jxg8DEnLjmEpE@YtzSGl`wack7jyR$2hJZ}
z4W!-O;sp7f$lK2(cWU-X>7rQoESv|dtHC%fa&J1J%m9*U;7QK7m{^1%LV*pm2hcm$9Sc~FKP
zC)U6GZG*e6G`Y+L%i|^qeFHLX1_--mB1$NP)8^VwnMvs6q`vFiz(IrJ(XrHdCtoa?cw7Rr*jFOO!2X+^ye7=LYu@Hrq=|&F(q*G_dMd
z4{{STCDZBoZO3qJH~k|0anXI>!e0h2wZ7S_!f_&o=*@qDQcUqf>-n0mr_yEtJ452L
z&iV(I`l*L%)XP@J?sjn?7@*&vj-M#zpDn-aO{_UG@+QE09^ul2cVfik?l48N9ag|d76{Tei
zgwg1NTwOdPXy3}B*XJ%@>jazWRf-Db&UJEh0osiDMNR<>HMPRNAn=Jv#sE<;6H%=6
z{ibtH3F`9F(hXuiLS;GHfA-98f!S4JFa}QAhf<^(y&eD&Jl?N0aS6I8Eyg+mj%p&~
z3lxxNlbW3prePRq+Xnp#dMw%`VLvi1RUvSDVtj3jep!+<3|=#t@{)T{<@`X6OQ%yI&k#shIlIyhC{vrLCZv-2QBGeBBGf~#%`OO6Q{Hi%(yGlJ|&HF4#=O~v=R#ti0ez6AR@2|!PlkZB}VM1-VuVio>>vZF%d+CO~X9Oy0Y9PS>xBsP}>D=n~
z@pLTFd!?Kan0xgs;3Gyaf1zFMggj^7n(I{4;aCz7EZ}gKyOMa9xt-Eq&l%b
z_g$+Vz<*<9w8kjniIFd4PkXwZG5ff0;_#8G-wz!WmV=O;p!reVLIjE8Yv}We+4%a?
zx`^uY9>Vsx;M1?Gqfc{GR5VOMxLD$nk~#lWjs)LM)JqmPR;1I`VRql=ZOVy&FwTl^
z%%%WH#!Qxk)#o(f<5}j(Um@?iHCdF^#9_b2ivQJ-MgwtAEERJ`x>C
z{FTF6!Sq8di-)61=SjEW6XTAhLu3*oR|Hr@E;S4DbsI)*F2ViFcOVsD~loxgFcow;n;18de;KkG_ZYqW1JL
zg5Q6{E7#r{WxRkL}v(e^lU3eTjHzX4o-3Mux|TVnljp>93{w^?ZcQ$)Bpn+Qj2{
z>`AO)mi@`P;n)-}(=NaiFDw*S*#8EE#+=0{_*Qos1_zJhh&~s(g!_gG8K~<7d?$RW_UPL<|v7ndq8*Gb=Kre3m`|
zS>IgvLkZSZUbObxR?{lbQsnHq{5jXb9G#B;Zwm7gRtYsGY{-GPyn2*|r#XmjgEaco
z^C-OKvqvSc9c4^Py35a>xae%}_giaN8x7Ma**zU+hDMWv7z(@16EvMbRIDgxL&3}a
za3EK^#;IrJz?rk_si3a
z$~%vL&m$!0R?q?av;ZS)ybb?3$QeROX8oUZ2t@|Lh;_2lP4seE&8DoB?3jT@YNS&L
zM)7{4d{Gd$s?yqX=LZ)o=A+b-$
zl6?XENr}1$bgYpfpN$3UER>8<3leJdQ2gsCD6+uxKDxUpm@4VXgC%-a25I!z{sbD`
z6Ki
zB})mgmN0IZI2;RWDyz@w{h#D`Qtkd^}(S8p+7*|BvLB-C=-K2h?sdSbP5T6IX
zv#$&i;U7#+G_TK+u__t5Y;uX&vnk|Vd9Z+R&p+>k)Mgi2!oXAYT}N$r8P8WAUrQzM
zCA=o@(j#q|Was(68xz|$ytufod*2}tr5SQ@(wD1omG-2dQKhrEsc!ny<93Mbzj#ul
zzbv&}v^Hxh`-lGAA}cnd8PYSqp)(IAP)>bs7Kd|uS_z$FCcaW2RDqR>e^OvT5hFGZ
z@^#9~oyg5h94iZJZ(t*ye!95#Ur@DQSM`B!jIhxA(vjKO13cV+
zthqmqjb;o7HkJZZH0FVX@yQcH_%y2u0KY%1Ux&tGNMFzT3ZZAag`_|;C0wV}=|C!g
ztGYm~{4`hp4-$c+jld3<=ZgWGrUyb^I?m)}FOAxzjM1T!GZX|;^{4J$tj}K4JUfzH
z%$Vr-ih!ar1IbA=@5Qg2=u~}S=@i}*U3Q;dwP;&&KYYQ1;fB5cq_M0C0yqE}1+Vp>
zf9ARvau^06gm77!1Q3Y9g`WM{LioA#>tM@Lh;uSb(swjN#Ry?V)RJAcd@U*Z#Uwx?
z;Ji=?oSkLpi4c{^N<4wl=2{NTQ(1=kfhx8aBkumRa11!5Kud15;G%MpWB){OLx$cP
z70?6ncbM!3^&Y0u&TwN|+B&LxcXPcnKlU=4;G25hN%OwQ*k2o&`RdZ2Fwbk11HI>J
zT%RVozBRV9NBz3v?8%qftl0@@({aY-td0YI{791NQgwj;8N?IIqvVGjhOLt?^;?ds^ne=IQ;`gefxWXiL5q599IM7H3w~OP{Zi(y
zwqqLa7uLofqApLgd4nMO#ms%~2j!~S4EP}oB5(bNA8heQJMYx<^kO;Veq8dsj9PGK
zq`wX)pyDn3XEWf;9$PIhr}6`nU8(UNr!wDN!qb}T2O;RaT}fv_qX=H#0zh+$rU=zd
zysfhm_>EwCufyjxu7JAUy^dFfb0j2div|wSAJqU^2=7#zYah)&Z0Vy%n7Cx30@?OW
z$PW**$tQHeo8YUbo&S|OOo}7ckB5Pnk5had?(YGn#iq1lx3-2$?-F+k+C}L{zICDp
zL)?nKDsjCvJUv^`NtA#z6{3~*wVr-z#i#My>r{7GfVpLet>3#_-A3i?HD2GFw&da@z0(J
zBmr3f0@M&klo!-QfIa3Qjfd0Tj$Fn-PKdumzDh8JbbxE3evxh!)tVK7nF%p1iiC^B
z4e2K`01JpNstyYq)PJ%?u)}#zDAu}Dg-AF-mSjQ(CZP&Qcp@x?qxJOIiCX`|IUOh;
zGlb8}O~ZO&A$B2W9EAzln$zRD$X2uEixG4RVS2CB6hgojJIMI%S8~`RwQZKmcR`!<&_%_%n|h8R~<
z&cVP^dB?PiG3Gy55jnaH0$FffcaWyrYMrM-7R2hr&gxh3HM_J|jJ*05o#Sfc7oRQq
zqfcnm!igDG;p#xDSMTbEww)pm1el=F7fnEic2!$;El)#!)`LLtJEp7h@+f($9IStW
zD00*P*2JT_Pp*ynurKbONJ$=$f@5e*xp8hPLu>{t;)Ph?=m5e}?<7RFLNnYBs+c)A
zWbpmpwEBbPpTg0~>SwvQ02kQe;D$wymN-?x!=1PfCLu77s&jIewkmYLD{(1`8|Z9@
z*a6{;mQz2IHFm#x-VvNB6)GV3tqCE`HL;a4?~qN?MZwdVz-i_O5zXymw*0Jsx{UuG
zo(Y=&cVqqy*IpWB8n@e^5zMqsQ1YaDp?m+8Zv}HudJUdj=;XTLUAPMiS+NVW6&qO=
z98cV<(sZ!uJ{?8hdq59wLu>_}ec-R$bQqxI^SW14yYN5;)YgJ3vm7qs7iV$dA$d0K
zSTg(Ax!lPUY(aVPGI8PYOAO+}_&H2jQ$8k3U8JbB&p`j6rSWP%Ae_z(QeOISo?ed6
z55m|j5gZ#J1RE3xARt6kxLB1mk~mHykoF50UQol4L5W}VltIyv@ZxB<77U!7~Nmw86-rO(jT
z$L01I#~Kqm%pY`aW5*8g7iwGA%vQ7FKckUln#hfL?q@E{XoVF6fEtq}7$YpdSB&p-
z7X>~XPG8)9$HPwKVFAB%2m&W|rYW2se7B6DJVU^{JS+OB@88|qq(j;&s9G)xGClU%
z!V%$#<)~z0V0i3COe6@Fi#h>sE4v<#eNU{A0@X=jRG%P$o3jq+oPDOoSs!bcV=8Cf
zOrRN4!;)D-G?(<_nCHjeTdlV$!VPz0u{UEjEhoIGHuVBS(tg5;qk3tEOKJq%Bt58I
za_GhWELOV1e8DrbKMsGb282zlEr_u}mLc)HGu7K?xIIq7Bomns0xI
zseXQ6hNEWAIQ97;ky=BwLY&~j#CV#3fHA{u3rCKkdHGZb;@nuHsKIaHZ`=vI(YgMc
zw^*U*BnrH43~DrTY2961-b>8E5Suak3?k^m4GE3GjTjz
zTox9V@FJSHt|MbfQ(@1kc=S*Qb`vr|nv=H(iFI<{cXf@!*Hd|UkQfiV(_&JhiVPi2
zr%2}jY6FuIgwf5GkNO`WEG)^$2=G?uFDY-8Mo>?x2j438NFTsV1XJmjGCGXAP^Nde
z(#KQ6itpV`SYD-4ie6UZQAzow#B-EI@r0pky)}0Cf5;*W_<)$C3K`Vs>f+QIpSv5x
zO`^LeGF;cN#vi&!T27m3op0*G6|aSd8=_<;yOA=|Uq)Cp*WY{D^AvLi)k>dhS8Vl3
z;ft?Uo0;}U6{B-AWA+MYc~$>6>K<^cPD#M!kf)VAE^Tp$=
z#GvR;+2@7i0$56IR`M<}(&gW4*VnreioI|e=7`Rb-~h`_l|@N@^jmouQ6
zoA|_;Tanj-j;~Dtc42vmoj2P-9)axBu;+^sK1q1V-}IRE*GSJ-&RI5yOuWoaWFiZ;
zgl^a!pnlyv6pf?#prtTf(0t^)TQ(M?F;-B8M5=G@lpF3U_I`Vp@1wgImGTplteDB4
zBFL_J6urwB-cRGe53ngejAsHX7LpibG^E0AgC56aNaAUZ;s@yFvgpeCv9=mJrl!%~
zIEcmzZmWMPSGlSB%#jmuO7l6*6{0-K`CW~!v})`DcMSgYz$!=b6O4I#H9R^U^V{HHNNI8wb6wHrNvBx2{;)-j&s|Q
zq=kzh9xIJ)7^#w7(;Dtc=)?g`iM+7-W0HV^2*yMPMec_E%Cg4ID?2IIko#VnsjYtG
zG<=purD54z8ao>dm3rERr@vj(wNX4gd;R|O+Vvn^l`dUP}8@*>J
zxE^rd`@cJr^xsX|Kf%V0eT4er01#I$K4#JAn1{u;gi+^d%cU3dqT&tz0ZN#e;)VSI
z+*|e1he^|ZuLmVV-o)lXD4iufEg^AW|KDna$kJ)1R957ll+2O`0RIV&gCWX7_vR9pNmg_$l(Fbh7eZ>z=(0OhJjIx~6}z#tq(^2_=@qsXl>Qzb`H3Cht|uL$9B
z0LpnmF~0+V;8cGmd9XUE%<+ZfOQHm8R(VM&lXTtpYbA^I%gvY_zAQj<^RKK=D}Iv5
z?t!G+6e-tRWgph#0&o1p-|Eb8CrX)dfWaZ?#qGO^sq8J!zeXDt@cr&*i!9#ef7_em
zAF)ci+TiA9P$u07@RtZy2dLp_I3pdli+*~<Gu>l0KOO7rmZ2=G@YL^Ei@nneNQ$DCV??;iQQ_iUrsI%Dg
zJUf&^*yXQBuYnyt_lBRWvb(Wk5h4JmBqG#BcT{%9Rf*<{7CuvtWGPd@h?=?FIXu0*
zK8{DO0dFFkIO>^*xY5*<85IE}2Nhu>SLajSQ8)n|Ad8iOIv&Vgs7?gHp=KbU-*P;9
z;r=+uWhqA>4WG+JgIHr}VKK&fSNZGLrgR@3OLO~DxOe(8%?Hg%Nn>1i-iWQS^2*X=
zrZ0V)@IAvw#IiGy#?__Z+kMy9&3r5dKA?a!stGm#n2o_gH#Z28?BJq~PyfQo%FXD#
zA4K~@ctr;egl`Q*2t(s|`(wjyof)C%WD@S|ZOZM;@bDfRsX62=3BMgcon8Zw$BLl!
zGjT4yNHdOq4_88E$|bbH7F6fchOfq8hC8d1CJDk!W192$uO*i>0tKU!ytBMpbv^0j
zEny?vm
z|K9|8BKENa#=nhwtOwsA%wZPk&^1||H6Cs%jQdW!7qb&5w_MI?Y>j>6??zdCgm`S{
zHt?pw&|!DC7m)O{+!`x}4@7+rvSUlW=q-65Q3_2SQcN{Ik=AEmi^}L(I&AeT{|o>0
zAc;IMrM3|++d=%)&)6VlR!DcLqJQv_&mF+ap}`X
zbS&a)plYsiEKH}^kummVs7mLan4x=XA8Juk1)gRPGr^FG+`d$jGBTHpQ7@>khnw=@
z!yb}U`tyUyqp)3CK0g0-R7W!iLMRq9tB}7!t^b>(k}6m)2%KQYmiB#@jZYPlN-Fg+
z-cgi;6%#IDohU8K7`uSGn6tqkVIg~~(>k6j7h6n^%4)-kTRqn=(|
zJGXb`&T#Z`n1;3J;1oCDr|NrPA7j%0;Drzs6%`D15?LOJ@`4N+O5)`%mc?V_SjJ*k
zT_gfYL+sx(3XJLRtrs|pa?qoI=!tK2Ec4V_yfLJ~RrR?n2v@!_IFl3$$zm+T04}a#
z;%GKD`{?<#$56gw^BwVT#DKV}FBs^}{%(yxIPQ+8Rl6ABBQ?`#H>{k?`7e5i
Sknvj1|&OI492PlI+efm6Tlb{1xNP-$$i
zP=um^-55LG$D%gJ)7Ai&yjgb*Y|CLQViIoyCuaiK$8%@&r#fKM!H70`^iwS$2^ONT
zhj}S#4W%>F-N+e#x+dYgtTsGU~`2Hz7QWo
z6dxNHSH4uR|lU+G%x?_I{JTQewU#~9Nj?DmfB_VM(($0>ww{_aPF=e_+A_2*%M
zYcLt}bC_<8IrBB%bswxp_?Hc5s^SE)T83Kdl!X=D+LCbbUR3|Yzwq}w`7p1MRVN__Vk
zNTrrEd@91Bw&_IB{Md(9o0dp~eoAoCK#pHTu9sf1aZK5G)!x9clMR2V7U1GOtC`O~
zED%9!6;R=QvauZ_=F=0+%fz%wM|Jqwu#UU{`c&m!(VC7knkN-&HNYH*Tap2)mVU>gdDwcAZ{c+qK
zn@pkrm4?KM3Qrmk`=GK1v*~hZxc`TNp|rTYI_6eO)hDdpPK|WOJ1L&Caiv{sH_V0i
z(}fPN*wn$0BC~S&{1O(6@pLC>Fwyfy=svq{eXVFx8?=9YR%X%p#xJs>(RyyB>8GnA
zf_J|C?40dCle|CwFi)tq5@T0%X0)}n&tQMLcF0m@p8~+nI>(gtdx*Zsc`J0YRm}EV
zr#Q0rqZa$mGzh&g?#|Z2Pif^e6olXN!XC`1VPTOam)xn!1v%fTXTX!PAOKmoT@(p3
zGma)(&T7fshoCVgHXPPDVOVP{N$jUsoY9MVG2x;|R)m6#E}rUN&YvMM6}~s`@x@z8
zBqVp(<5iqw;GWkIJ6^oRRR{oL;N*eZ(rPj!J}=>Ycf9rcCjg}8xk(8aQB+N$WqXwn
zh6k{Esk=mnBDW%R@Y(gp`1p=;a|ovfGFE;>@}oLH4~ENX)@bt$U7(?l<$&1^&VPEE
zp3IfQlS%;3!ZX{5Ij}6glK91#!u;0YlyVzRy^4I60{n}W8PGTTtMfgQ?ukfE?DRXhLLlQa_a2A2z^A0*I-z*Ekf
zNhoW$)n?JN_$e2&RKtpfACVcS)>ZY>Cc(GpH$09z%!O&n2H#~9Zs(+Maug^Vn`I-T
z_>mY&jbqDa8YCWf@f0>E4N!5}yoIC7`)tnQABCSJi*gm#yE)wyyia=YGQOXKjB5|o
zd-Yss+Wj%C&(q@u4DBzPdgRc5&=BdM9ewW8%|+BTTH1PDy-PO0j~+t*=0xkb?!x)L
zAJLz;X9tD%4dUTUPmJhgo9PGgIh6<$%`(f&NhouEKYc);t@&n;XprG
z;Fdi>fkUUYXfg0C8=l7G8g~vcq%^ESq7wQ@<_84FhX6qm_yY5_rPMvlR;Aiqq*3B^
zHrG<^K%Sn`iSq8E#^dJuEJsH`GI1L`o@+WBfPU*8U(N8%U2CU$OZ?fa(05d0dy@5^
z-L86@1V`B1DJS*o^#Yd`gRxxz(DECU#%dEyLbrggMjN`=TZNs
zdg`(DyPX#_LiYE?6WqTkE!fiLpFc#0DjF}(%fjw0|C~yRY>Lm>{d%U+#w<6>1`{Dh
z_np*#g9yA&fK*$WGJPVi(7ULIn#KmJt6c*^KX$o*&P4FXOaOtjLwWt@T%9(){;UBy
zOQ3{1Q(lKRV*ZVZ`pNs3=KW<2Rb)y(;;~L*P8`?u2P(vcFLBX655ra^h{4k5V|anh
zW>8mg3Z9C#r@tLgGTs9!{e-HofRVJ(-H8Ed_6z}w`gdk5ta~%#W_wLF799_Jsm6k4
z-9`(s)IFh?vlF}zZs)OGPUd5R`$JBb56|>s&DJvu-zi}cp|VwF{rH1zbb(q
zqX1coMOkQl@w5zf6$7s*UxB+KHc>iyZPDw6DIPcdW^
zmX(C8B6!(^F{khQe1Gxa_4&=g&iN
z%TUarH+Wq6ny8`W4uw*^){+hs;zog=OBC}Ijo=86l4Y}49NwbCSE4d=Ux&erzAbg0HI0+g-K5a7z*2M7mmMuK
zYkfwzKQ(0-5)^zdvN+~v-kSE^h&403`I7Op9(OjLEcz|?#)^pJVXiCGvl~X#?8D4l
zb9ha^BNzN~09`&!HNg|umrk7Hkdb%47^Uz-KBAU}|MzVC%M1+j#&<-{(giM9=2lTT
zgUav`)@$<+9~x=ot8PQV#3SDpd+vfYt&GS>N%_{?wqYnw91y@;aP1)+B@xy4(W23M
z&CkOM30G57TOF~Hbw5FJp8U{vHbPv)RVN(v5go)uvcE<9N#j)oeN
z#x~56X?^q=0OWTcut%%gc2-g!4vo0;eOj1_f5YW`V4i9%XnW>BIgB7Sl*!)ab7tV$AaQ;wWEb8@|6R$)xd%3|7)ciJO;iXy
zgw!GhZBR?rg&TI!i>V>yKADczbC}haYQBbV(|!d$;_F?7
zCr8KiuPW04(c9i_I0({M8s9!^D1~DWxMSTqwam8d*sh{Oz~Mf55(EgSgUvta{+*c1
zNA&37+uj#_m;FI9S$y5`>pHu$`tOLV#g{aq=wuARo?KT>j_aGVi!
zQ=>0KDZlsbcibbqE4aB%Q3Ekna#i;367d!Y>l;%eqp%9rxuPOl0Z{3&vq~DI$V`Wc
zx)ncKk}(?SdgL&Hh}D?zNC4e6Yq5Ijl`rwFl9T{LOb$sykih0F^dER*_Ko3k!
z&4~lIVtgkotXDbbX3n8Z1xNEu#LNt~uSV;gbe`W)XnbC9<3itNA5N5%&$>we%jJB>
zh-QIIPztaTV|amQ(}l##XS&tmvmw9x3WoHwOVJ
zoTcmMTfT+mbHCL)vg9GnLe}I_n4XiP`EYA^z>tJ^&FI=v-OG5N!?xz?)*|CIu&2Z8
zV4`dG>9k{fVpD3(=k6rQ(3jq%W1z8(jHi8jOlbXSW7UqrbEF{E_o$uH@JGElTFL0i
z=DV|!-no=ms@ei?R#^R!7w-;1ZiQ(EPTS_!bTF1~4+QW{|8R1P?(y%J*5%6nP_&qm
zKEMs!9mzP?+wRt*mC<Hs5t$WFaq4Y4vr`cKk2f>w4LMJ_!&J-+pRFyqsX
zk5Pev%gRl&;fv!=(WV4;mNovA%XbtE!IPgT(}rF(G+W}G0D0s9Q7M!mx}c-3K)d#b
z@qes1vymuq2ORqRAWM(UOCceKnZ6?W)1RQe{VR(`OP=fAv8cBq=YG-k3#H6sSfuaV
zx`#Q~oSL{f+Ki43#9k(*KWJ)#*>oz6jHkDdC_Ee2b}iU+H-KQ>;rzJ&Dv(4gV?ot%
zjDc_9jat^1!ttiKHD{G3&bZGoK@aBrUtj{kR}*KH!wB4>Kn?5QjYhDE{`BDhIe_LU
zGzmZImrB
zT!3*M+;QKdy1yf4fsNISoA#2y>V(X!!1>u(Z>STJZR@7&cZ{3gC}-{mV&6qyuY@W}
zl=BV#Lht_?QhM4*@)unm8AAU)X{Xl>K%Z1~`JV=re!Q+mcL=@yLSt`yh(MuK_W3q7
z?CXAXoBS*w#(x}=`Fu1J*Wq+H?%UeQc4Y$*E$ag*{>K|)G65(hzWr)xDM@0#^Cvv4
zuoHFDG`h}ZVZ!*qtNmHFu;~!3pyQ@|G6wAJE$>S>f|Fp&RuRegLfHm>he3
z%JB)z=QVf^+lcScrHc}?%@ih?&9XEN1_8kSR
zRLsTAN~`nC7kx9lEw3MsU-qWf+b=I!w_e7_+fLC8pN6g~+MO?52Qq!<@|BLhA!3le
zx1DxqIju=5R;FxyHST!nIfL1u#VCE~_*?Gz>VEpL&-R<|E~@4OW!+e^urzh4Rg3el
z|0>Feu}pzUhxeV8&tJjPWq%s&uOKq`oF%Ni9WPNkJw1Hx&I6c(7@2KtcH|oecT_*Q
zW)J)J^6V*iI>-^Y&YKcfvrc~vao_tXc#be{7wG3Y){RZv#{|cg%c0J4cNd2(oix2|
ztqpe$*>q?5J-(l^NZfMt6^*Lzie1lJ3f#>J6C0kKHXqxCQu+`hl_kAJR&GMA*_(EL?d5DR;mN*Sr;zp?1>;kqNy}uv90Jv5+wTCJFyK_#
zqv1vx%jiU1Gr5SckcZ4PfQqw#5tkXRbfOax27T-YVu@6qwQn!wzuJ1$;D`B}-T;
zKcdAU?Fj6GrhK@bJ+Z7<5H&x0kMcN~k5UL%{(k#<^J6*n$t&IKk6S8gm-Cp+zVK}?
zp4da*`24c{%g1rChr%R7_c2|I(NvNBao_e`D^Chx=kpDi?on+0Hfi~^8UuDEo;uHl84{HI{gC_xYnCAWC+Zwo87{~G^JsSxz9V?aQ&1_1x&ipY$N>|=4f
z6Z`z#$xGXv1U9i%G}i$fA_m2b7dvmIfeaL%L1~{Oej(1XC9N+(lociup3y-IR8u{i#|bZ^UGq&(KnU@nFBDMw3Py=?cn;fDw_wC0P=+p+S5~5Ypb>@F3X9$q&-gYyGR(i4o-G>3FYku#H
zEu*M&e2S)|LNdKJn;6&Jv?iWN8ep~uqauDl@nOrNJaGz!iaaB8jaU=5zy!2AS*&~=
zukHy7=^xIeAcoyXMnH1r8MI!k@M2RoJld_)8QfWwgpqV^k}L
zK;%Q2O`QFaOymvX3pgW8IEKD}z3w1i@KCV{%0dM(UXgp$-W4<4s$koc439oiH@|ZT
z{l=D)5sscpWl0f41D~^@8lj6i5fnnA^s!PMKR>`ZJEw`$15O>GQ`&4!oq@+-j@H5i
zREK_WG<3M2$wCXsvldsQCdQXgwv-d%dB6)y9`qX-I-!McuGU=qoz%Lun=@sMr;|jh
z794LdT!cNXp#`sQ5sMlY-q<7RsLjQKh>pw(uc+pXXU;&AJHjH6Suv#PHd{JuJm_bU
z>3f*LNAWel*rHoF@0#=Wa7FW%f%JNH1Y||aRnK^Fb#eR*H8Kt_
zm#sAIwKpW1HH>B&Vf8+>>Nn}_@sX~-6lb0|vKPqIxEauCZv1zfLmo3fyPhmsqzO3h
zgy}pNkXgSo*$I=U&4fv5Bau{7MVuV!UDmoEgsfre*5CQ2e9K?no+o_7YB{Klj1h$1
zi>*EGGYr0b5Ota5(ayYhIm03s`1^X=bhmYu>2#dX(Q(~xNbdG`o?Ybl*E$q>cjd@K
zu@x3Cz9hsu3RCdT@mwy$a$Icj;CrO5l0d=uZaGv0^Vty}*WdJ7XOo7=)aLh}F9%B^
z>O#WsSs{OPA|EUK4Ooe@ACx)vR$_)>)lC0t06
z_f={~R1^QVC`WU(-K&obt1rD$7_nZdpb@jDu4H9wcp9JU1Fj@PmqQ@ZU$lU4d>=jl
zsox(??K6(LU5pNeeH^QlEIy-MNB1mxCj!0SR!U?WmUWM5%*g>|r_{J@HpGbBc8>d;
z8p1ARkGdA8?ROc4QIuj|r^xmVUV7}_c#bs0h(YhUKQzMthnT0w;%Eo2bze%imlM9@
zpD!@~V^NE>mCYWxkz(BuaEd0_?4u~3``iwos}iS2uA)Iz)x^9+27_cy6&BstS+KW7
z1HCSE6?{C%9rSyvXe8e@s_T{$+|l?#*{i8yhyjRsuqGA`k(;
zPZNV8t;#o2czEW
zui@h+b|DI($goB4?A3mI0fV>k@fRhBe)V;uk}66GtLfB2i?!)Til64)2ueg1
zVqVuNp}r0E#Xfu80xi~6)BB<|zBb-HQ~mwdv5=GpUBmlpAW`~Dt~O(<$53&h@MW)5
zVVcKLa7X*zFRP07+b8WGide#ja_gSvL%j@*I^@hcMg&oHup!L9GElXIJ%~G1k-m+v
zU!Y;*;O^ztF72Vo@WC5mWhyYiMPZuP&_s41FpLA|<5sk@7AiD1S~PD5I$}uzKxX68H#|w
zjEXPh#2&M&f(VCfA6aAPiaN|Vlrx}P`rtLKPSQiHn03?w)yy^GUDBFGnCf)16>Y4d
zShwEjBFA3}@y*2|Z3&__M{$|^Ws?P#*yAAVHmN->`t%oNKmDNhtaZQ^7I6o<+8N*P
zH$z(%>{`3}`Jguq9rw!|?83VN%yH|F$CuSC`
zlXC9Kt<;zxW;&V*0~p6f;9v1K+m8tYYE&tSs9#KgonuljQ_OfRP@r`1#C}l`5TR0>
zqRpTBcot_emRicjC`EfrPEA%w^P1Q_R#(tfLOw(SRt2Z(8h^!rs0wTo9@W97Hc3Ap
zYZW04T;|d5$iBM6%!%EJ@NhaIIf{8Xeq6j7xIb?Rl;-u2xKZi`4;+?Oo))C;K6o%-
z@$%+Gv=K~0S8Dc_(k#Z24X0EH-m|aB=cMRRnU>XOmc3P6_2-TB{XE4YE)PY
z`I1TAsh!pi=>9d3VM!m~dk0Q%h1q(|V~V&HzH#}~>`cNnYS?;SE%tIhpJd>BpB*a0
z-hLgyy2m@Z{&MxC!QSw&fYCr&y*ToiZ;|PKh}Ge_m;5vL)oYjyoB*dPaw;VD1SQ~G
zbsyGB^#To{!eoJg$I-&~Ey9=$BlH+;YLY%gpDk-%aHV=DG0utw0olT_2`@
zj_top>+X#%sa7HfR9OXNPVv9oymSn}_{ZVImE|H-e?Z=pKNlbiWa11Y+ljTatfkVz
zmsbvH+XgbZ04%>2|D24$?xfs{CouwtZ#PvKHvKuN-Zr!>*k7=9ee@liniB&S`JNV-fK>l7b^EMGTU8}C&bZPyVlMt%*BEf|`T3PV%=-vk3Bt_*W-pFiL1S;VfbrRM-$78$nU
zS6P^bp+v;6{Zh@BmIg-ykte3=m2F6>I&;l0Tbe=&Sn}j2F^>lhBdk*Pd}YpZ^2LsO
zdaZa+4+*xb9~XeGT*@_<^jZjtmIVc{>2q$l11I
zkRNbpomS0+ZpQd%8l?LpJlWbN0B|1ua5IYCPKgu2_xdrRPz}NJwCR&c7uoBo81*G<;
zbZ`NmxhBU~(C-!jvz%+4c;G_tI0{rY_RIl5J_q%j|0Q+wC!P8c`RU_kLSQ?p1d$bp
z;*FKB3u!RNiDUaEgsj}=7bj0oEkr5=pAuwplmPSj#OHG3Q6a$J%0AgA(0z{p{ULXj
zx=G||j>O1%5|m{{_v1?6w&{UD`*Zt6;hardmCE8!3Wy?XgF(gjB->&;_$C4W7ig%>
z$I8*d@b69Inhf~>nM35G?Ni!FQNgw!18wCJRc>wTD`MM>D8wt|00V@+|f>DVRGKz~k;QZbz
z_%Kw7J^}4k^H^sFkmJuV<`cPZ%$q+I>j5?W#8xehFIk}8bHy-IyV**~lpwvj1SZES
zZ3KdZ=%gG2Tt5_0a3HrVN;j(UrTX92yy>q!J8iC2jHbdpdL@uGPsCytk}!E}QX(|w
zWaYn?#>9yJ(K0~(Mv5bY{`$XCa>g0}XL}=L2|os+Cq<|6=@_WRW!w!mBMmlnWrUp&
zIw}&7ipdDw{etCpanT?jPc%u*Sbma0>p+GOQ6#iC-GJN1laWQw9bL@5YsP{hKdH&t
zgDQx)H$pO+^OA{jSCwzj^4JUv;_lq>!Z)9CFg=NPU4xDQGn-&ldyC2Aq%53!f5dk0
z8I7Z;tSlY2ui@8k`q|vjx!HLNmwW1%wu=ScWxKm|Im$oqo0CtJi^=2dmveb`f6t?v
zoc2Yp?(qUSjUvk$ybrRG*1n}ZKI>PU)ZbB@tqOP_v(-cYjQiGS8F!WIpFsOl<33Ql
zt}2zcC%|vN}S$aM9Aj0NaL|Upc;N
z;vU5!jQ%F0u&E)e@h03gLMO+T`fUPt^fP|_l?kp?&Dq~MEjEioy@=2z-I5IEx=I5h
zWxZ3PAtfUY)rpQ^0cgQX#Fh^(aK@Tz20AMuf2Kdq@ggTN1rVE4$MVpynRr+R(
zp_a5J*ctC%VwzHiB_10)MV{w#s9WpcPk5`
zcdNrFc6U#Ouh1AiME^1fMdL27KBeP=G0|tQi;{|VL94maf4uSZt*ahjlsExs+Qn#!
z(z?h|%UtsES!-KU?8Tbr*~`_k&&4i>uRCnNi|@l78~FnFK;}aN2j%t7a10mI{*~*>
zO~VCajM!eX=^oz*oaHcEgK`)2xw#}pU`r=p7F-@@QbtQnjRI7ZmsbP#Aa?RD+X|VF
z5MPvxu4+zh%T4{JA*KBdV-2Il}X(hA-t7!D$%3SIo2%b-Biak
z7}2A7)+|9>fKr|&eq5{C7CP?<~Po{y=7$GM{py>
z`qw2bw&fCOq4ERmP1r}msPf>}7!0!DC}Pnq{Qf7ZC4&JCzQl{yT(mKfKn3sSIch@JL`L+rJ1FW
zM`du|PeXwrc$b5T>|(0YzG@^&-@Jk(Ssoo(O8Wy7C?7lr~Z
zEjJB5=YL+2&uhpkydP66d_1~l35p@kg5+S4_wW#9VnEOb-2ZSBO_0qkz`FG!1k&6b
zrHo+G*k20NRT`p!w;`dFOV*)c!($nsQ-$ZR)Xp5Op~;?42S@^_rsj~9@olh)>5xFbkAqkVDa4NZOIF|Zn?&=vY)7MD>`wTCF`84eI$wl%ZK4w`{^{;C2>>~*)pJLN
z&waGO*&Y=a)#~IfGI`4<7UfL)ZWLIXmeRX4+MH(H8RnmV(c2+b6nO-h(J{Fu2}l@0
zN&tg<F#mnEcxPeTMd9hR2Ctl}2j6X|r=-0(45<2@0
z!vwdg;6!~l?okaMdUh3Bcuakw&+NXQq{uNLBJ2|1|
z1Oi~4J)j>~D-fhBeG{Fh*e?zb4>k*CdBopOhwxAx^(peRpqrbPVrbSm)yZ`+If8W5
zVpa3s4f^y3^E?&FoxRq$652b^EmMesS4AT9{Tgw;yHcP2_p5er)!stnqC$y;K=;p+bAylr{BUx@E+&KW&Jltx_!4r-ABofZ
z{jkR)e+3bL)z7+S9iKE(>|)uJ`V^&=yD6ZW6BO(dZC>-`eLrLGRoFfgq31bADrVilyYzOE!_b`fnHK*0
zY?}D*kK|$~2-ws={mMhaqI5o*5?~ShEdmEK4YylrW%d*X{3PH`A{k}g?vEi`_l=^m
zZ+pDS;PTX3YZb*!(D>r8urN7##CLsDmKd-T2PIng&F!@pOX0n5r$dYtl9rEZBhn;x
zZKKnH>0Q(KB`P|`gAh6c9}lOJHfw+0sQ{NumqiRkU}9n-0U#hCC?xV{&`L7|iF?Q^
z=iqK~6;wYrT?bR$3(>$y_(_+{vglE-s^#L)U6OGFP>BKFW|(3=w=;CQDw;Ipitk8N
zw9vNBTf*!M?!Wvr3Rg6Ybl`efL{q3PSQ{lzaU_c}U;oWB1TN2;`o{jb$!qJHdBkay
zrEV}qh-lCL<~NUS(L&dsD!1ET6eMj`iuTS<*~G^FjM87U%&oVOx~b!=9qw*n$o<^b
zA|duyk7-Nv=NsPd-Zfc2vSK3rn_}>$M>JO)OxBS&^(3@_uiv9}mxj(b%CndhR7m+2
zt4u>18W{UK+Q*bYVWF}A$I~~k$Juq^PHZ-4Y^$-^*l29qwrv|t8Z}NDPHfw@ZRgDU
zopYUkFxNbLuf1-pd(AD48#=nx7rd&R_Wzg~5i*rtL70l!#F|-caZC=4#vTii&1HZ1
z?0Bdcr*SA6TraR95y5|k;&UE1GEchCI7((@0A28
zPN$f?yQ?1%6qE#}sHoVT(V#(3D|~UjizpbD@wi&GQ;o1H5`l`P;jvIV#S=f!55F~sjKb$%E}x>`7@y{(3$qP#%2&LuiO&zNAM=(
zBF(Q3pQb|jR9E*rejnVoS;#}*()fBUG6-g?TKPj7=+rDSwUwZq|NjLOB14$f>y!to
zb=ykD6lSH+;>NHL6Cx^je#g1ty|P&2|w1rg+sRE
zSxZNn8~5;+V|-a7AZs-JhIV4p?+vX8UeXIaCbzC2DQENsOtiGm`E?PuKycMO;K@s
z?i@v0SXg)m1)~>zWph%mVy>5JmOjG#dsR};pOOFp`7v|t_pR~k!t07!%;Ysdlfx+y
zV!?iMSw8cjs1kV6;V>2AH8(((LI|>3np-Hw{)lvzPydy7K;W9wJ#+DcREYbTbe__t
zm|w8R&5#=~j`bS@3OO_)SU`8_{pkBRzkcWc!}K1gni#OC3{h
z%ike}mzAzeOV>J#m!Rk@XWCT%WL)nbjFry(u4kAD?c!p3K127B7StPqkU*~Z%SN5U
zU#dj`ARRFyQ*(wC8gL9Dt7kA4?|S%6ITzpgPu6s;krL1i@iZ|dAlr3=NEXuG&O)!*
z*5Y(<8xRn9jUwxx)h1k?2Oj_zlMcRn+3upseaej_180<77>sIlcYDG_mq`7x;`%)K
zojvK07XmB>KR1`dehru43q+yM1Ot)p?|^x{@l&UvbeVVsh8FBrxqpRzi}Vd4=W}@l
zsZ{`--{J<0vVIu3NItLWC40wTu{(aG$;lisCZgUGu>-(Jh~dCF|MhJz@GjOSkw{d@
zn@4gWN{<8ww9KaXZ90Fv7Dw9%&a)`@TTKkBCgMz?IzJmXI
zF+=Y&<}eD01ZIi#V!5S9x~hz!lZ4|Ui|gNKq(8HTris`~0??DR#3@LuiXnd(Ib?kM
zMO%r6I)nzPR$KCE6*XIFBoP3QMf*%b^V_5~iDE1b&9v2SqYWw=qvh(Wo6Z{x>x@0x
z3~b&~byi!rAHPJ1ajW%~gSK8S{ITQhzSZmF_4iL0qL;U^@3iVQYMn*{2uDogy8JLI
z4tEY$GxhExNyW;DVHN3&5JJDk-^%Aopl!0A&%=(>S9c&NpC
z{g5deCd5j38c+&_2@`?3V#3_l^$iM245FZ*K!_&vUqJ!(d?MvH1xQ&iM|~eJcz?lJ
zCaP^^g=7eM`*`Z%!hXlUT~*M;jqP~F3u@>PMt?3OeO{!-6S`?5>!TDFemP*6NGHdN
z7g6(YdniZ$f!>5S(!xbtv)`ujA>6@W+WP84foR)FPA^Oj2g!_3{6TAsy
z%frJGD=eOK*PD6Y(i<$zblBptGpN+N#8+?V;OBA=oGto=m#JQ9}g@PVEDm+NfTnsnM?UGJ6hVpM>yQSLwW^!<)$k42S4VVU)1wp+G3l49Nqu~CH!X3E5r
z=vC{O6|`CkjPDK~@Yr<({eyFJb)}_8`whQ&HcDUZdf42BHu#m6qU0$z
zS}kPh;cTv|TW&l?G+#0139J<>c{>;Oj7vJS^*gcq+r
zY`b@Y!vXtr>eaG2Y&5jAe?`la=oTw;CcX;*rBi8vlb4_ks7_CM{1XO$h~Z$*a`8ol
zC}%{#U>$s45wBeyUoEc`ScW;a5O2N+c}0bVM*tvja`QQl;unZLOH2L5LLf_8ViFui
znq$=}ObZDy>|g5_NAMe#7#3Uv_V|ho1`wbbc+%Y3P0SDc(jc?wrtc9%dv(>w-Gf6{
zvJ&G4k3sv50bm7mo3Oyqrjkd|%>NZ*3yxmPAS|j>Lj#PP86QZe-W`rGX;z}V6X6KU
zPJrA{czPiW1j42XGW$#<-&%b_;iZ^~atH{{e$`l#r#;z|*7LMZ5el?k^d`ch)@4qv
zW7`f+AlOLpHaBAxYah&w48YytFq~EV;3{{*#^!!P&!}(j%2Z2lL|Gu2Y}1
zp=aIOxy)jcK>q3T-mZ!OgM_QsZc-<9ccg&F{d&IAfag~ISF8H4FMW!23&?J8JPj8;
z^kb?@s=QuZ6U$Jx|nIhqyZ_NPn6@pGlC4=Xy#9K%m5NR
z0P(lXR@p*w9&YXvS01G_vzm;t-e#T-Mw!F$bLq?FM(2gUs*IO*nXKj<(TnDRf#}14
zfH~!W-|@nZ3iek;5^-H#IE!6V1${8cFp!WY4r2}`UxAz?hFHlfCB)t2wht|^NpSQt
zGc)Ji)mtr}_a}48rei5pzeom*%s4$}f}oD3a&kG`!t7&Dt#TiC2N2dc>}?m?LuV3L
zTp#EAB;r6axbN$|+r_FJ0c+|D2aBQ`mi1C4jn=;iDEbr2M3=p=Eu=1M8u7jhUiVw9
zVP4PSeaF2av+GBT^)}5|uZ6pV``<|eZY5nm*K|VAhHgA>pYP9>YRsx=Qvf6dS5k{}2vVH|b`OJ~1>L^d;KTB~K%&^Avxy%Ldp1XMjl%x;k
zb0tt~ITWoItBq}az=MOEFPHsJ?@t~#crX~$Y*zO(L5nk28g0D|`tK^#Yj}8g%v`4u
z$rLWhC089yBt&*$00NsC88t$m)f2pe9*&%w=vUj@%C)j^@O;pWBs4oaeQy{En17a2
z`b}yTQB4516G#8`=Z*jj42&OxLc;Zu5k|6LGhH^H*x>ix7o{NQd874`lsXhE6l{DP
zvTwy9r1VMGC>(h`{V4iCgnIeBR%~$5-lea1^VRCf`7TTKmj#u?kjKy43keiNak2`D
ztF5jt&`-VOYe{GKZn`xcMNgQE@*oQld;y?hy#IAd%~B(af{%B-yWM4@ty0sj(F-=O
z(G#NrMZZZB``Z1G_Sb_Kg@eL#B-&-t?ZQgO=rUWf;zUs|vJvol48bHXnz?2qk13`p
zOcw0VR*5M*kn_k$2f{Q)fIG;-)1%$?WNiOg6l{0&zI(JKO$9z&;-C-0CJMj)7D|Vp
z&LKO>iA;9UdKlHM&`3>e4hPyW*G9NXdMsxz!-|L!0(_M|jGh+^ngq7ubXuM2+S0KY
zv;a`j0B0NFRn(?V9cS=sc^|KnG=pMfttxmWQteM!;jT$NgotB;bACS7d!iYABgWzt
zbfRy(ZZ{gy1G@(YCtuGuq1fp}kbLse%;P$&ck2UpAa~y)sc!%%B
zoqnZle>w|A(hvR4Yis9c-x|{Rp!xOa_DJw3l|rYmG0axGm#fRwZo_m3J-o&Vn#Mh#<2+)my4_=6J3Hzids`Z=Glf=w0=
z5!0nzo9Bn>5JYSMRxAPCO$^WGJ{?wW?<&?aFX%^6m*e2ig!^z2hBN}Y?U!%oP6L*G62x`GS)Gle>=Nw<1a40YG1pJSX45J;fI^FrQyVc
z0&S=LDb7q$HIqQuu`c*XQ1r)K^1-I>xz$-2pKae^#IAXTXI-5AkR-R!4n0$&lu0e-
zLxvNXXT~_Ia#;OJ2I#%Kz3T#1&c&Cl|KSRUa@*bRo~TNzRc{sa^hP37W)O0@R{q*C
zBH(cm>h3`HdwG4ZMutQqUu)67jlC8OHiB1v=`Ch3x(!;O6Tipnh3{k-ITU$8c++hi
z<>^gHOpcrEkO4z0xUM3B=HP}&EIYrr*l7JZWX?$hvPzbtQ~XV%*KYl#X=Ylz(N?Zf
zI+>l$aJf-b+I=HlEYf30WbRL;>rC?kpUn^KD%(mM!9l+dW?vSo=BpSFqqetmBLarg*{Otw3Q7_LLWk%5AK_2*uM9BE+&
zsjzsrFaK76)Ad#_$$dledx)$_$PeeE@%rOJr@51doUnA7D$N=-3(4U&0&URyv(;pK
zMx#=~2kTYqHPU8OEMvRc*6Is<43cNHdR>krqpY=2zP>v;#8CNVG`^pDH;;EumsWkg
zSb7#SlRPK^y;-2J=AJW;9>~
zUtcuX>DMs`e7ukcPljcwe-Y$+IaO9c?Baxnhll)_GWqYh5M7!5XF2!#XE^bD(Qqwd
zPz>4tq%U0HSLTjGNP;-3vT^3IO3!P4dchw6ItdF@_t!kKBxJV*hjbw%Acr_aLc)m$
z>8xMS={N%+ow5G9sUz9=!poI;%F?9`>1?sVjYmaXzFemYH~J7A(&l|}q@ol&Pr%X1
zapu$|ol(c<^R^Wd9qM+%?Wc_|E$8kz{ylCi{5Ni9lDuz_4fJMHsjT*&?>6~6iHVQ_
zfI@wgZ_l&^pDWI$uY2Tah0?Mn{Du+a2>4eQC#JF-?Vd!?fI$0T{PXjLAZdWh!_7f6
z+74+V1fgK3;|1;;1IW040~{dzS%O%CU1m42_pM&*q+|6x86aDwKsio>@ZE;;2(00Hw40gu*UpptNtA#Dea0M
zFjHK=HT{`IhstnMnk&hwa8IyjQx%RDYtr0Q5Rs!%D5ZFKcsOAiHF`j~MH0=ei(Dxx
zI=Vu;ORseolb?^Dvs&AI_qDEgEUn%g0LT^e|Ls<#b$2*n{_p`4Pw434k|-?CLT>If
z*8hv>X3&F9k|qrU=}uGlp(lmS;a(BPgYX}yP)+V17DF^rYpGRgFb0o8D;BKa_nn8ordaynmJ8G(M&2^zbgJZB(2LYVImx_q3(Ji-f(`}lbR)i~)
zu3?oTJ@mwh09ZXEX;2^lxVV;M3Hj@Vz=%luYQU8Ucg0>jE&hBVbcEdJ2$7!{Kasz-ml>Eo@u+s+gQg$1D{P!~sl2bWM5bT`@5&;(?uXm=-#u&R_+=hobL6v78idPWMkVrUY#oFK`=i+
zv+P__jY0pna(TuIEg(N{iprdds$ygu!KXwvb6IS7`;ysc7&Xl&gWvwLl`h@M^!7Kg
z{|&kS$1gKv1dw1_>1r8`6^jNvUQ7q^c<`hI`MWNDYO`BP4M~`fSSdx|Z7QN3${!US
z8b`Q(Wq5s<6e>Ebw$@)#vYAm=Jx+VlD+3oXj9q&%%1p+_=KKW#TPH^s{(wob+3mu3
zD6XMPx5;H?*|A8!6E>^P3wMsc*=|i7{N-{3CQj>(nN>WMTI-)xAh4dBNL>Qky`I3pveZyD0+Rn-(_5Qr?E9OkNap}nJH@-pYdUwNJ9a;T-qXE0i$wTO6qir#|w@oO#{~dQ>$R=W1)(T
z`x7DpT^RpRH6)=+3Nf^o%0x;efCe>hubjTGxmHq5e$PYj+Vi>M`q(|uR{I~V%D-m?
zUN=2I2g}dpR!x4^GcLIu@KCgHDutY1g&H??9YfN;%%lS1bb%VXN#Sxh8|AL)XnkQ7
z0fLU8rJgC^2Y_#{vF(xY<;oQS;l?=yZUBx;Z&12RDG;Q>Pc%Ywj05*UO2~
zOH=k*jW{YE*myfcXa-nvJl+J>^FLkRR~IU@+3g&6cEuKB>KlS}vzu(|G4Dq*6RvLC
zc?HJ<;Q$xb?DV;;U&UkUnY<~Wfg2mF4SqfkA^W5)KM#3&T7;%@;^~^3BJQ|!Q#fOJ
z$|SX-(+!|RgV_U^@64IXD?tO1uQJS9NY_kBxOsIlS2BOi5DOF_mKgPZf!sa?nP~fL
znv1ln?Cc}7Gjj;?r-)Nwax`nHa%=g<8xqbxr-3LlhHR<&{v6N6s)xl7pi0Rk+t}=|
zR8bbYO`XAJJJv)Nk$~I%{vN_6loK|MUdl$?IALC&Hj04n$vg14OwW}^^{=w+QFU%{
zgL($LF^5Vfk+!$N=DQ~l+Kf?ILzlP=G&f7@2pliv`L{nPgr!2fpkywTz
zCKDilI`SGN^ruKAx7}S`l^gP7XXnCH4tJl|NeYv`;b83v5xPL_bW3{aQq}=ocnmI^
z@##N!(=ks^Z%ejS20YGqTxQb4pYQ!MDwm+YlOj_QmymFWXqzecz9>@-15idSAttd@
zuFgp*@ad`%5gsn#x`it%H}-h=b({!2&ocadmQ7iV)+i)c53>HGmhPx}oy$2nH`$o-
zDuyf^Wh#xqI+MjT3X^W|&L^+ZGZ-AgeygJ|3moZ!BSm>M1rVLi=y~)0(xL8t;re>1
z3$$!pt>rZIcDpe9>?8Acdrv7j(huv#8>bBFUKwb09a9TOArf%?%xM5>-$C12%~y}y
zlAvSmsFRMNgOPrdf>v(ST2LaB00Vz*-1*?oCP0%C$67m3C~-&0BX^{UYrm{O{^MT|
zM)lAltA?NwT&dH*L&`I1!NJ?oR!cYa_gzftoaGN5V40ienIA|)YDMA{kKAFkQ02V0
zG7(WWtah<#;yfkgx!!869xDx6zaA;wsp@<GkeNV(ZeHyXvq@F
zv5GA=vb3<0Ez4Ho
zUq44r%W{r3W$8}J(w#o9deO77=+a}EJ^)JLY{|pLsy80y81vs>G-%M|pxEDngF#$&DK8FToXvB=%^jO)nr1^ov(C<7Z{
z2wV}KM{X6CW|EDP
zYSJTT1OS#C$7old4_?t&7)uP<8B`$vi4h*V!!uOz5Yp<@OZLkJJh8a48GOua^s*@j
z&~kX@6Uk$|D&jGY1N8^BvUktVKU9V4dc~ZMCJAtGm<`mr`a?oSG3n~c+t3p;{V4!n
zeeQnX8Dh@PFZ)dm4NSkv6d$74S&xSE7~Ch@L*eg_w}$wWn_-hA`==HKKU?iE$>h6(
zU`bN|%_6YqlEjdYFO;`C7LAIex>YD!pkKTn9Q!)GYD)e->)7Z+ZSan{+h9iN1K{Q~>01t$D?{oPMMrr{``QrP?bFDO;9L-t1
zXs1)7v7$^m4Z00gnjKnD;DgUglr1)O{@m_%`RvweZpX{WHaX
zB9{USTAp#+Q?rIqekx2_QGPCn!%YVXi}Ft#?;GXGrA^fvn~lz;lN)UumW%g;6WPw`
z^pSC~Gp7&kR~vZDJ|16S5LS!j8}2^&&n+#^*o^00ZbJ1MeTBX9IRZ+xlD}~~8`G||
z>P-5guqM{VwOPMB@14n1)H4ua9ok7O$9k$1`XEYWGY)r7OcNRSwsphZ1d!ky#2<^o
zkQRbR4P<+}%_lP&{c>L+EsREC8gg!5Cia;mgoOhY8U(39y=JH&t%JO2h+^Bj6L`PT
z!2OdHlmW3NB-&h##2DBv1#RZUadJ#j{R19&CM+r)FBl}X?Ox0`$hAbLG!Fd6yO(3IvSzGuvG2-3sGo=5*^z35(Y;IS=x9#i={i0Q=R#{@aJ7Qwf?gy#Txj?6#
z%P{~TWVsN-e+=@-Gft8PW_e;Hc*zolK(Nhw#*Oj?$@|Hze|bjkl=FPLj4YeA(d*(g
zd+J@|Kw;*cf|QI@zuohJl7mOT(+8vqNi%25>k4#Q%;$gvz5eMCC0~hL9@jS0=Er#x
zsDx?tu0#L|5fS(8OB@2mE4yVjr|}}%-5@wI4t+&w4N?qTDe>dUyy?fJEwg>zkn~8-
z#nE!%eT4+UM`bS|L~w~b-!^m2?k@t~Mkd{olx{MiFK(ZJn}^0?eLh$1&0a**?=P2!
z&gPX%K?Y~;Q^wLiKurqR4^*hAmis@+ei=R6o^j>fD?}+nS{=DE&{Lpq%tD--`!hH!
zRx4?ug0UeR^z6
zn@x+ofRNRO-kHU?cN3FAF(WqX5$NtL-36EJZWGNerrd4aY!4R+f3eXvw)ydfzK?;R
zNGF%s?7|FF|LypjuwP%>NLc=OE``EyHY$J?3?kw}z{ns$kVm|{pdjGnYq^UNbo!)c
zGr^w6qWn=seo^9%KV2>(`ISm_gTzhQGI+@2C(MozaIiQ)8jZx~APHPlz~}HR8bu_G
zo#IiT-sa|;-|A0#WgVZK5vy{Ijx6f2WHSYi&wI^45h$2@TzaqrNW<5Za@p_G1)g(}
zWTkb5o@zBjXap~mEbWRnq#(^=IEjG{{QgCL=L#__nSvJvrco`KUmZD>_!qT)-U)Am
z=@cUg?x#Q&jPN$(`-d`V_rX|tjaFZ?Xe9d6)s{I&VMRDlHA$}vsY0hiOFn-+v%$1V
zxi?+CB8^dVH0oA&eLc4DV0-7M?P?R9eqABfd0CY8LZ>eB)cT0{Ud8khPgNWRDx{?P
z@#n`Ib&~>0tVARqMTaU*^VPcJ)8tlmY=l9KqE#wFVOL}C5pRZA1!9WB
z@l7(IXkfiWn|I4ZiNZS0rt(Okip4c!JqT0I^>P_hurdkl
zF0+^HO>%|gXlUqVSiKg9K?cwcpO3#FL0_}MB$LPGcJKjj^}$xV(j;<%w@JW0L!BQB7KzQiB2AkF(hMUy<_VJSsIqkCPSX_uaLEsO0c5F%J&mlnfML;;nD_i}mMC8B;^AIr`9w1ft06L~n^#@Mw
zuEr1R(8kh-KCPNx_*=>cg?q*g`;)Cl*MWakeWHIc7R^||4sS8J3pZ6v^+RWK*jsH>
zzJLJKMw@H8dimYq7^heNFm9)6G!}!?&N-`8)2*e&3_~H*nlj{4jow`%K7xn2extK4
zh-tBZ-KtoswJV0l>L*MEe@aCX>Wm{DtIF#@|!}QK;zqNXSG~gD=*N)
z(?p}yv7wF*QauS
zF42mM2O3J1rAGn<9WH*)2}JPe8Z%a5#S5UPG}5W{lKs=k(%aykcD(*e7fZ)nfH+`t
zZL`C#G0Qvt((`)X5+0M;!Hx&Gs#Zd&4D}7
zB+@N0ALLh~SG+7(UwPtb7yZI6Do9_=+xW>eaPe;2<*vMlear6T?d}3;(=m6e73O=@
zh>-+RkP@GP9a7{cq3M&WI(bjTc6@s_yD>_7Ofj0~iTki?I&8hTp9tk;
z`$m4P3p;I+rBh;34r0?;LT{oAx+!Dv7AiG!k32(ujEO}U90Yt>9Dequ@1fc^l+kxzao{{}}XpRLs4_@VuDX)=*Y)?;F!)g>0aSXEe5sEL=kOCd8()civ~@cj;p-ay4{
z=pMJvnNJ*1r$k4yVc?dQjZNmUQo!@BpQ3-O#dQuIT`JuD{PcWh90=9vHNscwOBJ8?
zSGi|n!FcC&fQDELEluF%FS#$gL1Gp>)@YYwACDO$*?X{p-2HMuG>0M4V)BWB#}3rbxtq%yMhXOw*eT<
ziUpVFAsbjh6A#66dQm~a|Cn-AkrZW8e+7Nmrfu?Mb$I_o?I3E&!hXZbT~9=!zAwbv
zt^Uu0Uz|e4%D!wx)bATo);=_)y@mA53}!3|$4^(L1+X58Sx_Nex>7-CCGXdd!9PMj
zO66-w@%s&{QXj1r;@BJUxm-0)dJlM9PdKcX>AgGn?@*U*wkLCW&l=s%C~LN%T;u&Z
ztQRU-{nx?0J2u7-?zV