From fcacae3fbe4c37098cbf8a1f9e0f7a538e92dcd5 Mon Sep 17 00:00:00 2001 From: Sean Lee Date: Thu, 14 Nov 2024 14:24:16 +0800 Subject: [PATCH] add llama finetuning example --- docs/notes/training.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/notes/training.rst b/docs/notes/training.rst index b02755d..e83a5b9 100644 --- a/docs/notes/training.rst +++ b/docs/notes/training.rst @@ -74,6 +74,35 @@ You can train a powerful sentence embedding model using the `angle-trainer` cli b. LLaMA-based + .. code-block:: bash + + BiLLM_START_INDEX=0 WANDB_MODE=disabled CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master_port=2345 -m angle_emb.angle_trainer \ + --train_name_or_path SeanLee97/all_nli_angle_format_b \ + --save_dir ckpts/llama7b-nli \ + --model_name_or_path NousResearch/Llama-2-7b-chat-hf \ + --pooling_strategy avg \ + --maxlen 60 \ + --ibn_w 20.0 \ + --cosine_w 0.0 \ + --angle_w 1.0 \ + --learning_rate 2e-4 \ + --prompt_template "Represent the following sentence for semantic textual similarity: {text} <|endoftext|>" \ + --apply_lora 1 --lora_r 64 --lora_alpha 128 --lora_dropout 0.1 \ + --load_kbit 4 \ + --is_llm 1 \ + --push_to_hub 1 --hub_model_id SeanLee97/test-llama7b-nli --hub_private_repo 1 \ + --logging_steps 5 \ + --save_steps 50 \ + --warmup_steps 50 \ + --batch_size 120 \ + --gradient_accumulation_steps 32 \ + --epochs 2 \ + --fp16 1 + + + + c. BiLLaMA-based + .. code-block:: bash BiLLM_START_INDEX=0 WANDB_MODE=disabled CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master_port=2345 -m angle_emb.angle_trainer \ @@ -91,6 +120,7 @@ You can train a powerful sentence embedding model using the `angle-trainer` cli --is_llm 1 \ --apply_billm 1 \ --billm_model_class LlamaForCausalLM \ + --prompt_template "Represent the following sentence for semantic textual similarity: {text} <|endoftext|>" \ --push_to_hub 1 --hub_model_id SeanLee97/test-billm-llama7b-nli --hub_private_repo 1 \ --logging_steps 5 \ --save_steps 50 \