diff --git a/examples/llava/README-minicpmv-dev.md b/examples/llava/README-minicpmv-dev.md deleted file mode 100644 index f1d76a48c8433..0000000000000 --- a/examples/llava/README-minicpmv-dev.md +++ /dev/null @@ -1,45 +0,0 @@ -## MiniCPM-V dev - -### Prepare models and code - -Clone llama.cpp: -```bash -git clone git@github.com:OpenBMB/llama.cpp.git -cd llama.cpp -git checkout minicpmv-main-dev -``` - -### Usage of MiniCPM-V 2.6 - -Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-dev-gguf) by us) - -```bash -python ./examples/llava/minicpmv-surgery.py -m ../MiniCPM-V-dev -python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-dev --minicpmv-projector ../MiniCPM-V-dev/minicpmv.projector --output-dir ../MiniCPM-V-dev/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4 -``` - -add 'res = "llama-bpe"' in convert_hf_to_gguf.py 514 line -```bash -python ./convert_hf_to_gguf.py ../MiniCPM-V-dev/model - -# quantize int4 version -./llama-quantize ../MiniCPM-V-dev/model/ggml-model-f16.gguf ../MiniCPM-V-dev/model/ggml-model-Q4_K_M.gguf Q4_K_M -``` - -Build for Linux or Mac - -```bash -make -``` - -Inference on Linux or Mac -``` -# run f16 version -./llama-minicpmv-cli -m ../MiniCPM-V-dev/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-dev/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" - -# run quantized int4 version -./llama-minicpmv-cli -m ../MiniCPM-V-dev/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-dev/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" - -# or run in interactive mode -./llama-minicpmv-cli -m ../MiniCPM-V-dev/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-dev/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i -``` diff --git a/examples/llava/README-minicpmv-pj-l.md b/examples/llava/README-minicpmv-pj-l.md new file mode 100644 index 0000000000000..8a9b1680e8e20 --- /dev/null +++ b/examples/llava/README-minicpmv-pj-l.md @@ -0,0 +1,45 @@ +## tmp_project_l + +### Prepare models and code + +Clone llama.cpp: +```bash +git clone git@github.com:OpenBMB/llama.cpp.git +cd llama.cpp + +``` + +### Usage of tmp_project_l + +Replace ['tmp_project_l'] with the actual model location + +```bash +git checkout tmp_project_l +python ./examples/llava/layer_skip.py -m ['tmp_project_l'] +git checkout minicpmv-main +python ./examples/llava/minicpmv-surgery.py -m ['tmp_project_l'] +python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ['tmp_project_l'] --minicpmv-projector ['tmp_project_l']/minicpmv.projector --output-dir ['tmp_project_l']/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4 +``` + +add 'res = "llama-bpe"' in convert_hf_to_gguf.py 514 line +```bash +python ./convert_hf_to_gguf.py ['tmp_project_l']/model +``` + +delete code in convert_hf_to_gguf.py 470 line +```bash +python ./convert_hf_to_gguf.py ['tmp_project_l']/model_skip +``` + +Build for Linux or Mac + +```bash +git checkout tmp_project_l +make +``` + +Inference on Linux or Mac +``` +# run f16 version +./minicpmv-cli -m ['tmp_project_l']/model/ggml-model-f16.gguf --mmproj ['tmp_project_l']/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" +``` \ No newline at end of file diff --git a/examples/llava/layer_skip.py b/examples/llava/layer_skip.py new file mode 100644 index 0000000000000..1cf24955c3933 --- /dev/null +++ b/examples/llava/layer_skip.py @@ -0,0 +1,60 @@ +import os +import torch +import struct +import argparse +from transformers import AutoModel, AutoTokenizer + +def save_last_n_layers(model, n, save_path): + state_dict = model.state_dict() + layer_keys = [key for key in state_dict.keys() if "layer" in key] + sorted_layer_keys = sorted(layer_keys, key=lambda x: int(x.split('.')[2])) + last_n_layer_keys = sorted_layer_keys[-(n*9):] + new_state_dict = {} + for key in last_n_layer_keys: + new_key = key.split('.') + id = new_key[2] + id = int(id)+n-32 + new_key[2] = str(id) + new_key = '.'.join(new_key) + print(key, new_key) + new_state_dict[new_key] = state_dict[key] + + for key in state_dict.keys(): + if "layer" not in key: + if "model.embed_tokens.weight" in key or "lm_head.weight" in key: + new_state_dict[key] = torch.zeros([1,4096]) + else: + new_state_dict[key] = state_dict[key] + print(key, state_dict[key].shape, new_state_dict[key].shape) + torch.save(new_state_dict, save_path) + + embedding_layer = model.model.embed_tokens + indexs = [128010, 128011, 128020, 128021] + with open(f"{model_path}/model_skip/sp.raw", "wb") as f: + for index in indexs: + indices = torch.tensor([index]) + embedding_vector = embedding_layer(indices) + tensor_list = embedding_vector.squeeze() + print(tensor_list[:3]) + for res in tensor_list: + res = struct.pack('f', res) + f.write(res) + +if __name__ == '__main__': + ap = argparse.ArgumentParser() + ap.add_argument("-m", "--model", help="Path to MiniCPM-V model") + args = ap.parse_args() + + model_path = args.model + model = AutoModel.from_pretrained(model_path, trust_remote_code=True, local_files_only=True, torch_dtype=torch.float16).llm + config = model.config + config.auto_map = { + "AutoConfig": "configuration_minicpm.MiniCPMConfig", + "AutoModel": "modeling_minicpm.MiniCPMModel", + "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM", + "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM", + "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification" + } + tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True) + tok.save_pretrained(f"{args.model}/model_skip") + save_last_n_layers(model, 8, f'{args.model}/model_skip/pytorch_model.bin')