From 643f868416d5d2e89430ee0326f25d1b70bda485 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 23 Feb 2024 14:48:43 +0800
Subject: [PATCH] modify scripts

---
 scripts/finetune.sh               | 48 --------------------------
 scripts/finetune_full_schedule.sh | 48 --------------------------
 scripts/finetune_lora.sh          | 49 ---------------------------
 scripts/finetune_qlora.sh         | 50 ---------------------------
 scripts/finetune_sqa.sh           | 36 --------------------
 scripts/pretrain.sh               | 46 -------------------------
 scripts/pretrain_xformers.sh      | 44 ------------------------
 scripts/zero2.json                | 23 -------------
 scripts/zero3.json                | 28 ----------------
 scripts/zero3_offload.json        | 56 -------------------------------
 10 files changed, 428 deletions(-)
 delete mode 100644 scripts/finetune.sh
 delete mode 100644 scripts/finetune_full_schedule.sh
 delete mode 100644 scripts/finetune_lora.sh
 delete mode 100644 scripts/finetune_qlora.sh
 delete mode 100644 scripts/finetune_sqa.sh
 delete mode 100644 scripts/pretrain.sh
 delete mode 100644 scripts/pretrain_xformers.sh
 delete mode 100644 scripts/zero2.json
 delete mode 100644 scripts/zero3.json
 delete mode 100644 scripts/zero3_offload.json

diff --git a/scripts/finetune.sh b/scripts/finetune.sh
deleted file mode 100644
index c36c3ea..0000000
--- a/scripts/finetune.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
-
-# Uncomment and set the following variables correspondingly to run this script:
-
-################## VICUNA ##################
-# PROMPT_VERSION=v1
-# MODEL_VERSION="vicuna-v1-3-7b"
-################## VICUNA ##################
-
-################## LLaMA-2 ##################
-# PROMPT_VERSION="llava_llama_2"
-# MODEL_VERSION="llama-2-7b-chat"
-################## LLaMA-2 ##################
-
-deepspeed llava/train/train_mem.py \
-    --deepspeed ./scripts/zero2.json \
-    --model_name_or_path ./checkpoints/$MODEL_VERSION \
-    --version $PROMPT_VERSION \
-    --data_path ./playground/data/llava_instruct_80k.json \
-    --image_folder /path/to/coco/train2017 \
-    --vision_tower openai/clip-vit-large-patch14 \
-    --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
-    --mm_vision_select_layer -2 \
-    --mm_use_im_start_end False \
-    --mm_use_im_patch_token False \
-    --bf16 True \
-    --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 16 \
-    --per_device_eval_batch_size 4 \
-    --gradient_accumulation_steps 1 \
-    --evaluation_strategy "no" \
-    --save_strategy "steps" \
-    --save_steps 50000 \
-    --save_total_limit 1 \
-    --learning_rate 2e-5 \
-    --weight_decay 0. \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "cosine" \
-    --logging_steps 1 \
-    --tf32 True \
-    --model_max_length 2048 \
-    --gradient_checkpointing True \
-    --dataloader_num_workers 4 \
-    --lazy_preprocess True \
-    --report_to wandb
diff --git a/scripts/finetune_full_schedule.sh b/scripts/finetune_full_schedule.sh
deleted file mode 100644
index 2ae157c..0000000
--- a/scripts/finetune_full_schedule.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
-
-# Uncomment and set the following variables correspondingly to run this script:
-
-################## VICUNA ##################
-# PROMPT_VERSION=v1
-# MODEL_VERSION="vicuna-v1-3-7b"
-################## VICUNA ##################
-
-################## LLaMA-2 ##################
-# PROMPT_VERSION="llava_llama_2"
-# MODEL_VERSION="llama-2-7b-chat"
-################## LLaMA-2 ##################
-
-deepspeed llava/train/train_mem.py \
-    --deepspeed ./scripts/zero2.json \
-    --model_name_or_path ./checkpoints/$MODEL_VERSION \
-    --version $PROMPT_VERSION \
-    --data_path ./playground/data/llava_instruct_158k.json \
-    --image_folder /path/to/coco/train2017 \
-    --vision_tower openai/clip-vit-large-patch14 \
-    --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
-    --mm_vision_select_layer -2 \
-    --mm_use_im_start_end False \
-    --mm_use_im_patch_token False \
-    --bf16 True \
-    --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
-    --num_train_epochs 3 \
-    --per_device_train_batch_size 16 \
-    --per_device_eval_batch_size 4 \
-    --gradient_accumulation_steps 1 \
-    --evaluation_strategy "no" \
-    --save_strategy "steps" \
-    --save_steps 50000 \
-    --save_total_limit 1 \
-    --learning_rate 2e-5 \
-    --weight_decay 0. \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "cosine" \
-    --logging_steps 1 \
-    --tf32 True \
-    --model_max_length 2048 \
-    --gradient_checkpointing True \
-    --dataloader_num_workers 4 \
-    --lazy_preprocess True \
-    --report_to wandb
diff --git a/scripts/finetune_lora.sh b/scripts/finetune_lora.sh
deleted file mode 100644
index 0456106..0000000
--- a/scripts/finetune_lora.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
-
-# Uncomment and set the following variables correspondingly to run this script:
-
-################## VICUNA ##################
-# PROMPT_VERSION=v1
-# MODEL_VERSION="vicuna-v1-3-7b"
-################## VICUNA ##################
-
-################## LLaMA-2 ##################
-# PROMPT_VERSION="llava_llama_2"
-# MODEL_VERSION="llama-2-7b-chat"
-################## LLaMA-2 ##################
-
-deepspeed llava/train/train_mem.py \
-    --deepspeed ./scripts/zero2.json \
-    --lora_enable True \
-    --model_name_or_path ./checkpoints/$MODEL_VERSION \
-    --version $PROMPT_VERSION \
-    --data_path ./playground/data/llava_instruct_80k.json \
-    --image_folder /path/to/coco/train2017 \
-    --vision_tower openai/clip-vit-large-patch14 \
-    --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
-    --mm_vision_select_layer -2 \
-    --mm_use_im_start_end False \
-    --mm_use_im_patch_token False \
-    --bf16 True \
-    --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 16 \
-    --per_device_eval_batch_size 4 \
-    --gradient_accumulation_steps 1 \
-    --evaluation_strategy "no" \
-    --save_strategy "steps" \
-    --save_steps 50000 \
-    --save_total_limit 1 \
-    --learning_rate 2e-5 \
-    --weight_decay 0. \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "cosine" \
-    --logging_steps 1 \
-    --tf32 True \
-    --model_max_length 2048 \
-    --gradient_checkpointing True \
-    --lazy_preprocess True \
-    --dataloader_num_workers 4 \
-    --report_to wandb
diff --git a/scripts/finetune_qlora.sh b/scripts/finetune_qlora.sh
deleted file mode 100644
index 05744d9..0000000
--- a/scripts/finetune_qlora.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-
-# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
-
-# Uncomment and set the following variables correspondingly to run this script:
-
-################## VICUNA ##################
-# PROMPT_VERSION=v1
-# MODEL_VERSION="vicuna-v1-3-7b"
-################## VICUNA ##################
-
-################## LLaMA-2 ##################
-# PROMPT_VERSION="llava_llama_2"
-# MODEL_VERSION="llama-2-7b-chat"
-################## LLaMA-2 ##################
-
-deepspeed llava/train/train_mem.py \
-    --deepspeed ./scripts/zero2.json \
-    --lora_enable True \
-    --bits 4 \
-    --model_name_or_path ./checkpoints/$MODEL_VERSION \
-    --version $PROMPT_VERSION \
-    --data_path ./playground/data/llava_instruct_80k.json \
-    --image_folder /path/to/coco/train2017 \
-    --vision_tower openai/clip-vit-large-patch14 \
-    --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
-    --mm_vision_select_layer -2 \
-    --mm_use_im_start_end False \
-    --mm_use_im_patch_token False \
-    --bf16 True \
-    --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 16 \
-    --per_device_eval_batch_size 4 \
-    --gradient_accumulation_steps 1 \
-    --evaluation_strategy "no" \
-    --save_strategy "steps" \
-    --save_steps 50000 \
-    --save_total_limit 1 \
-    --learning_rate 2e-5 \
-    --weight_decay 0. \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "cosine" \
-    --logging_steps 1 \
-    --tf32 True \
-    --model_max_length 2048 \
-    --gradient_checkpointing True \
-    --lazy_preprocess True \
-    --dataloader_num_workers 4 \
-    --report_to wandb
diff --git a/scripts/finetune_sqa.sh b/scripts/finetune_sqa.sh
deleted file mode 100644
index 146a8cb..0000000
--- a/scripts/finetune_sqa.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-
-# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
-
-deepspeed llava/train/train_mem.py \
-    --deepspeed ./scripts/zero2.json \
-    --model_name_or_path lmsys/vicuna-13b-v1.3 \
-    --version $PROMPT_VERSION \
-    --data_path /Data/ScienceQA/data/scienceqa/llava_train_QCM-LEA.json \
-    --image_folder /Data/ScienceQA/data/scienceqa/images/train \
-    --vision_tower openai/clip-vit-large-patch14 \
-    --pretrain_mm_mlp_adapter ./checkpoints/huggingface/liuhaotian/llava-pretrain-vicuna-13b-v1.3/mm_projector.bin \
-    --mm_vision_select_layer -2 \
-    --mm_use_im_start_end False \
-    --mm_use_im_patch_token False \
-    --bf16 True \
-    --output_dir ./checkpoints/llava-vicuna-13b-v1.3-pretrain_lcs558k_plain-ScienceQA_QCM_LEA-12e \
-    --num_train_epochs 12 \
-    --per_device_train_batch_size 16 \
-    --per_device_eval_batch_size 4 \
-    --gradient_accumulation_steps 1 \
-    --evaluation_strategy "no" \
-    --save_strategy "steps" \
-    --save_steps 50000 \
-    --save_total_limit 1 \
-    --learning_rate 2e-5 \
-    --weight_decay 0. \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "cosine" \
-    --logging_steps 1 \
-    --tf32 True \
-    --model_max_length 2048 \
-    --gradient_checkpointing True \
-    --dataloader_num_workers 4 \
-    --lazy_preprocess True \
-    --report_to wandb
diff --git a/scripts/pretrain.sh b/scripts/pretrain.sh
deleted file mode 100644
index cb70599..0000000
--- a/scripts/pretrain.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
-
-# Uncomment and set the following variables correspondingly to run this script:
-
-# MODEL_VERSION=vicuna-v1-3-7b
-# MODEL_VERSION=llama-2-7b-chat
-
-########### DO NOT CHANGE ###########
-########### USE THIS FOR BOTH ###########
-PROMPT_VERSION=plain
-########### DO NOT CHANGE ###########
-
-deepspeed llava/train/train_mem.py \
-    --deepspeed ./scripts/zero2.json \
-    --model_name_or_path ./checkpoints/$MODEL_VERSION \
-    --version $PROMPT_VERSION \
-    --data_path /path/to/pretrain_data.json \
-    --image_folder /path/to/images \
-    --vision_tower openai/clip-vit-large-patch14 \
-    --tune_mm_mlp_adapter True \
-    --mm_vision_select_layer -2 \
-    --mm_use_im_start_end False \
-    --mm_use_im_patch_token False \
-    --bf16 True \
-    --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 16 \
-    --per_device_eval_batch_size 4 \
-    --gradient_accumulation_steps 1 \
-    --evaluation_strategy "no" \
-    --save_strategy "steps" \
-    --save_steps 24000 \
-    --save_total_limit 1 \
-    --learning_rate 2e-3 \
-    --weight_decay 0. \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "cosine" \
-    --logging_steps 1 \
-    --tf32 True \
-    --model_max_length 2048 \
-    --gradient_checkpointing True \
-    --dataloader_num_workers 4 \
-    --lazy_preprocess True \
-    --report_to wandb
diff --git a/scripts/pretrain_xformers.sh b/scripts/pretrain_xformers.sh
deleted file mode 100644
index 17c6fa4..0000000
--- a/scripts/pretrain_xformers.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-
-# Uncomment and set the following variables correspondingly to run this script:
-
-# MODEL_VERSION=vicuna-v1-3-7b
-# MODEL_VERSION=llama-2-7b-chat
-
-########### DO NOT CHANGE ###########
-########### USE THIS FOR BOTH ###########
-PROMPT_VERSION=plain
-########### DO NOT CHANGE ###########
-
-deepspeed llava/train/train_xformers.py \
-    --deepspeed ./scripts/zero2.json \
-    --model_name_or_path ./checkpoints/$MODEL_VERSION \
-    --version $PROMPT_VERSION \
-    --data_path /path/to/pretrain_data.json \
-    --image_folder /path/to/images \
-    --vision_tower openai/clip-vit-large-patch14 \
-    --tune_mm_mlp_adapter True \
-    --mm_vision_select_layer -2 \
-    --mm_use_im_start_end False \
-    --mm_use_im_patch_token False \
-    --bf16 False \
-    --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 4 \
-    --per_device_eval_batch_size 4 \
-    --gradient_accumulation_steps 4 \
-    --evaluation_strategy "no" \
-    --save_strategy "steps" \
-    --save_steps 24000 \
-    --save_total_limit 1 \
-    --learning_rate 2e-3 \
-    --weight_decay 0. \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "cosine" \
-    --logging_steps 1 \
-    --tf32 False \
-    --model_max_length 2048 \
-    --gradient_checkpointing True \
-    --dataloader_num_workers 4 \
-    --lazy_preprocess True \
-    --report_to wandb
diff --git a/scripts/zero2.json b/scripts/zero2.json
deleted file mode 100644
index 7a01fda..0000000
--- a/scripts/zero2.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-    "fp16": {
-        "enabled": "auto",
-        "loss_scale": 0,
-        "loss_scale_window": 1000,
-        "initial_scale_power": 16,
-        "hysteresis": 2,
-        "min_loss_scale": 1
-    },
-    "bf16": {
-        "enabled": "auto"
-    },
-    "train_micro_batch_size_per_gpu": "auto",
-    "train_batch_size": "auto",
-    "gradient_accumulation_steps": "auto",
-    "zero_optimization": {
-        "stage": 2,
-        "overlap_comm": true,
-        "contiguous_gradients": true,
-        "sub_group_size": 1e9,
-        "reduce_bucket_size": "auto"
-    }
-}
\ No newline at end of file
diff --git a/scripts/zero3.json b/scripts/zero3.json
deleted file mode 100644
index 8ff461f..0000000
--- a/scripts/zero3.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "fp16": {
-        "enabled": "auto",
-        "loss_scale": 0,
-        "loss_scale_window": 1000,
-        "initial_scale_power": 16,
-        "hysteresis": 2,
-        "min_loss_scale": 1
-    },
-    "bf16": {
-        "enabled": "auto"
-    },
-    "train_micro_batch_size_per_gpu": "auto",
-    "train_batch_size": "auto",
-    "gradient_accumulation_steps": "auto",
-    "zero_optimization": {
-        "stage": 3,
-        "overlap_comm": true,
-        "contiguous_gradients": true,
-        "sub_group_size": 1e9,
-        "reduce_bucket_size": "auto",
-        "stage3_prefetch_bucket_size": "auto",
-        "stage3_param_persistence_threshold": "auto",
-        "stage3_max_live_parameters": 1e9,
-        "stage3_max_reuse_distance": 1e9,
-        "stage3_gather_16bit_weights_on_model_save": true
-    }
-}
\ No newline at end of file
diff --git a/scripts/zero3_offload.json b/scripts/zero3_offload.json
deleted file mode 100644
index 2dcde84..0000000
--- a/scripts/zero3_offload.json
+++ /dev/null
@@ -1,56 +0,0 @@
-{
-  "fp16": {
-    "enabled": "auto",
-    "loss_scale": 0,
-    "loss_scale_window": 1000,
-    "initial_scale_power": 16,
-    "hysteresis": 2,
-    "min_loss_scale": 1
-  },
-  "bf16": {
-    "enabled": "auto"
-  },
-  "optimizer": {
-    "type": "AdamW",
-    "params": {
-      "lr": "auto",
-      "betas": "auto",
-      "eps": "auto",
-      "weight_decay": "auto"
-    }
-  },
-  "scheduler": {
-    "type": "WarmupLR",
-    "params": {
-      "warmup_min_lr": "auto",
-      "warmup_max_lr": "auto",
-      "warmup_num_steps": "auto"
-    }
-  },
-  "zero_optimization": {
-    "stage": 3,
-    "offload_optimizer": {
-      "device": "cpu",
-      "pin_memory": true
-    },
-    "offload_param": {
-      "device": "cpu",
-      "pin_memory": true
-    },
-    "overlap_comm": true,
-    "contiguous_gradients": true,
-    "sub_group_size": 1e9,
-    "reduce_bucket_size": "auto",
-    "stage3_prefetch_bucket_size": "auto",
-    "stage3_param_persistence_threshold": "auto",
-    "stage3_max_live_parameters": 1e9,
-    "stage3_max_reuse_distance": 1e9,
-    "gather_16bit_weights_on_model_save": true
-  },
-  "gradient_accumulation_steps": "auto",
-  "gradient_clipping": "auto",
-  "train_batch_size": "auto",
-  "train_micro_batch_size_per_gpu": "auto",
-  "steps_per_print": 1e5,
-  "wall_clock_breakdown": false
-}