Skip to content

Commit

Permalink
skip 7layers
Browse files Browse the repository at this point in the history
  • Loading branch information
zkh2016 committed Sep 19, 2024
1 parent 9d7c65f commit 721405f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 28 deletions.
55 changes: 38 additions & 17 deletions examples/llava/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,22 @@ static struct llama_model * llava_init(gpt_params * params) {
return model;
}

static void load_model2(struct llava_context *ctx, gpt_params * params){
llama_model * model2 = nullptr;
if(params->skip_model.size() > 0 && params->skip_layers > 0) {
//load last model
llama_model_params model_params = llama_model_params_from_gpt_params(*params);
model_params.init_time = true;
model_params.has_vocab = false;
//llama_model * model2 = llama_load_model_from_file(params->model.c_str(), model_params);
//llama_model * model2 = llama_load_model_from_file("/Users/zkh/Downloads/last_16/ggml-model-Q4_0.gguf", model_params);
model2 = llama_load_model_from_file(params->skip_model.c_str(), model_params);
llama_set_model_skip_layers(model2, params->skip_layers);
//llama_add_model_load_times(model, model2);
llama_set_model2(ctx->ctx_llama, model2);
}
}

static struct llava_context * llava_init_context(gpt_params * params) {
auto model = llava_init(params);
if (model == NULL) {
Expand All @@ -76,18 +92,18 @@ static struct llava_context * llava_init_context(gpt_params * params) {
ctx_params.n_ctx = params->n_ctx;
}

llama_model * model2 = nullptr;
if(params->skip_model.size() > 0 && params->skip_layers > 0) {
//load last model
llama_model_params model_params = llama_model_params_from_gpt_params(*params);
model_params.init_time = false;
model_params.has_vocab = false;
//llama_model * model2 = llama_load_model_from_file(params->model.c_str(), model_params);
//llama_model * model2 = llama_load_model_from_file("/Users/zkh/Downloads/last_16/ggml-model-Q4_0.gguf", model_params);
model2 = llama_load_model_from_file(params->skip_model.c_str(), model_params);
llama_set_model_skip_layers(model2, params->skip_layers);
//llama_add_model_load_times(model, model2);
}
// llama_model * model2 = nullptr;
// if(params->skip_model.size() > 0 && params->skip_layers > 0) {
// //load last model
// llama_model_params model_params = llama_model_params_from_gpt_params(*params);
// model_params.init_time = false;
// model_params.has_vocab = false;
// //llama_model * model2 = llama_load_model_from_file(params->model.c_str(), model_params);
// //llama_model * model2 = llama_load_model_from_file("/Users/zkh/Downloads/last_16/ggml-model-Q4_0.gguf", model_params);
// model2 = llama_load_model_from_file(params->skip_model.c_str(), model_params);
// llama_set_model_skip_layers(model2, params->skip_layers);
// //llama_add_model_load_times(model, model2);
// }

llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);

Expand All @@ -96,9 +112,9 @@ static struct llava_context * llava_init_context(gpt_params * params) {
return NULL;
}

if(params->skip_model.size() > 0 && params->skip_layers > 0) {
llama_set_model2(ctx_llama, model2);
}
// if(params->skip_model.size() > 0 && params->skip_layers > 0) {
// llama_set_model2(ctx_llama, model2);
// }

for (unsigned int i = 0; i < params->lora_adapter.size(); ++i) {
const std::string & lora_adapter = std::get<0>(params->lora_adapter[i]);
Expand Down Expand Up @@ -347,9 +363,10 @@ int main(int argc, char ** argv) {

if (params.image.size() > 0) {
auto image = params.image;
ctx_llava = minicpmv_init(&params, image, n_past);
ctx_llava = minicpmv_init(&params, image, n_past);
//release vit memory
clip_free(ctx_llava->ctx_clip);
//clip_free(ctx_llava->ctx_clip);
load_model2(ctx_llava, &params);
if (!params.prompt.empty()) {
LOG_TEE("<user>%s\n", params.prompt.c_str());
LOG_TEE("<assistant>");
Expand Down Expand Up @@ -398,7 +415,11 @@ int main(int argc, char ** argv) {
llama_print_timings(ctx_llava->ctx_llama);

ctx_llava->model = NULL;
auto free_start = ggml_time_us();
llava_free(ctx_llava);
auto free_time = ggml_time_us() - free_start;
printf("free times : %.4fms\n", free_time * 1e-3);

// }

return 0;
Expand Down
22 changes: 11 additions & 11 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6767,18 +6767,18 @@ struct llm_build_context {

const llama_model *m = &model;
int local_il = il;
if(il >= n_layer - skip_layers && model2 != nullptr){//TODO: && is_vit
m = model2;
local_il = skip_idx;
skip_idx += 1;
}
// if(model2 != nullptr){
// auto it = find(skip_list.begin(), skip_list.end(), il);
// if(it != skip_list.end()){
// local_il = it - skip_list.begin();
// m = model2;
// }
// if(il >= n_layer - skip_layers && model2 != nullptr){//TODO: && is_vit
// m = model2;
// local_il = skip_idx;
// skip_idx += 1;
// }
if(model2 != nullptr){
auto it = find(skip_list.begin(), skip_list.end(), il);
if(it != skip_list.end()){
local_il = it - skip_list.begin();
m = model2;
}
}

// norm
cur = llm_build_norm(ctx0, inpL, hparams,
Expand Down

0 comments on commit 721405f

Please sign in to comment.