diff --git a/examples/llava/README-minicpmv-dev.md b/examples/llava/README-minicpmv-dev.md
deleted file mode 100644
index f1d76a48c8433..0000000000000
--- a/examples/llava/README-minicpmv-dev.md
+++ /dev/null
@@ -1,45 +0,0 @@
-## MiniCPM-V dev
-
-### Prepare models and code
-
-Clone llama.cpp:
-```bash
-git clone git@github.com:OpenBMB/llama.cpp.git
-cd llama.cpp
-git checkout minicpmv-main-dev
-```
-
-### Usage of MiniCPM-V 2.6
-
-Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-dev-gguf) by us)
-
-```bash
-python ./examples/llava/minicpmv-surgery.py -m ../MiniCPM-V-dev
-python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-dev --minicpmv-projector ../MiniCPM-V-dev/minicpmv.projector --output-dir ../MiniCPM-V-dev/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4
-```
-
-add 'res = "llama-bpe"' in convert_hf_to_gguf.py 514 line
-```bash
-python ./convert_hf_to_gguf.py ../MiniCPM-V-dev/model
-
-# quantize int4 version
-./llama-quantize ../MiniCPM-V-dev/model/ggml-model-f16.gguf ../MiniCPM-V-dev/model/ggml-model-Q4_K_M.gguf Q4_K_M
-```
-
-Build for Linux or Mac
-
-```bash
-make
-```
-
-Inference on Linux or Mac
-```
-# run f16 version
-./llama-minicpmv-cli -m ../MiniCPM-V-dev/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-dev/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
-
-# run quantized int4 version
-./llama-minicpmv-cli -m ../MiniCPM-V-dev/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-dev/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg  -p "What is in the image?"
-
-# or run in interactive mode
-./llama-minicpmv-cli -m ../MiniCPM-V-dev/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-dev/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i
-```
diff --git a/examples/llava/README-minicpmv-pj-l.md b/examples/llava/README-minicpmv-pj-l.md
new file mode 100644
index 0000000000000..8a9b1680e8e20
--- /dev/null
+++ b/examples/llava/README-minicpmv-pj-l.md
@@ -0,0 +1,45 @@
+## tmp_project_l
+
+### Prepare models and code
+
+Clone llama.cpp:
+```bash
+git clone git@github.com:OpenBMB/llama.cpp.git
+cd llama.cpp
+
+```
+
+### Usage of tmp_project_l
+
+Replace ['tmp_project_l'] with the actual model location
+
+```bash
+git checkout tmp_project_l
+python ./examples/llava/layer_skip.py -m ['tmp_project_l']
+git checkout minicpmv-main
+python ./examples/llava/minicpmv-surgery.py -m ['tmp_project_l']
+python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ['tmp_project_l'] --minicpmv-projector ['tmp_project_l']/minicpmv.projector --output-dir ['tmp_project_l']/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4
+```
+
+add 'res = "llama-bpe"' in convert_hf_to_gguf.py 514 line
+```bash
+python ./convert_hf_to_gguf.py ['tmp_project_l']/model
+```
+
+delete code in convert_hf_to_gguf.py 470 line
+```bash
+python ./convert_hf_to_gguf.py ['tmp_project_l']/model_skip
+```
+
+Build for Linux or Mac
+
+```bash
+git checkout tmp_project_l
+make
+```
+
+Inference on Linux or Mac
+```
+# run f16 version
+./minicpmv-cli -m ['tmp_project_l']/model/ggml-model-f16.gguf --mmproj ['tmp_project_l']/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
+```
\ No newline at end of file
diff --git a/examples/llava/layer_skip.py b/examples/llava/layer_skip.py
new file mode 100644
index 0000000000000..1cf24955c3933
--- /dev/null
+++ b/examples/llava/layer_skip.py
@@ -0,0 +1,60 @@
+import os
+import torch
+import struct
+import argparse
+from transformers import AutoModel, AutoTokenizer
+
+def save_last_n_layers(model, n, save_path):
+    state_dict = model.state_dict()
+    layer_keys = [key for key in state_dict.keys() if "layer" in key]
+    sorted_layer_keys = sorted(layer_keys, key=lambda x: int(x.split('.')[2]))
+    last_n_layer_keys = sorted_layer_keys[-(n*9):]
+    new_state_dict = {}
+    for key in last_n_layer_keys:
+        new_key = key.split('.')
+        id = new_key[2]
+        id = int(id)+n-32
+        new_key[2] = str(id)
+        new_key = '.'.join(new_key)
+        print(key, new_key)
+        new_state_dict[new_key] = state_dict[key]
+        
+    for key in state_dict.keys():
+        if "layer" not in key:
+            if "model.embed_tokens.weight" in key or "lm_head.weight" in key:
+                new_state_dict[key] = torch.zeros([1,4096])
+            else:
+                new_state_dict[key] = state_dict[key]
+            print(key, state_dict[key].shape, new_state_dict[key].shape)
+    torch.save(new_state_dict, save_path)
+    
+    embedding_layer = model.model.embed_tokens
+    indexs = [128010, 128011, 128020, 128021]
+    with open(f"{model_path}/model_skip/sp.raw", "wb") as f:
+        for index in indexs:
+            indices = torch.tensor([index])
+            embedding_vector = embedding_layer(indices)
+            tensor_list = embedding_vector.squeeze()
+            print(tensor_list[:3])
+            for res in tensor_list:
+                res = struct.pack('f', res)
+                f.write(res)
+
+if __name__ == '__main__':
+    ap = argparse.ArgumentParser()
+    ap.add_argument("-m", "--model", help="Path to MiniCPM-V model")
+    args = ap.parse_args()
+    
+    model_path = args.model
+    model = AutoModel.from_pretrained(model_path, trust_remote_code=True, local_files_only=True, torch_dtype=torch.float16).llm
+    config = model.config
+    config.auto_map = {
+        "AutoConfig": "configuration_minicpm.MiniCPMConfig",
+        "AutoModel": "modeling_minicpm.MiniCPMModel",
+        "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",
+        "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",
+        "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
+    }
+    tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
+    tok.save_pretrained(f"{args.model}/model_skip")
+    save_last_n_layers(model, 8, f'{args.model}/model_skip/pytorch_model.bin')