From 79c9fe9556ffffebdd973837e92691d384f778fc Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 25 Aug 2024 13:37:37 +0800 Subject: [PATCH] feat: do not convert some tensors --- model.cpp | 68 +++++++++++++++++++++++++++++++++++-------------------- model.h | 1 + 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/model.cpp b/model.cpp index e693eec7..f6995b94 100644 --- a/model.cpp +++ b/model.cpp @@ -1397,10 +1397,11 @@ ggml_type ModelLoader::get_sd_wtype() { continue; } - if (tensor_storage.name.find(".weight") != std::string::npos && - (tensor_storage.name.find("time_embed") != std::string::npos || - tensor_storage.name.find("context_embedder") != std::string::npos || - tensor_storage.name.find("time_in") != std::string::npos)) { + if (ggml_is_quantized(tensor_storage.type)) { + return tensor_storage.type; + } + + if (tensor_should_be_converted(tensor_storage, GGML_TYPE_Q4_K)) { return tensor_storage.type; } } @@ -1420,7 +1421,11 @@ ggml_type ModelLoader::get_conditioner_wtype() { continue; } - if (tensor_storage.name.find(".weight") != std::string::npos) { + if (ggml_is_quantized(tensor_storage.type)) { + return tensor_storage.type; + } + + if (tensor_should_be_converted(tensor_storage, GGML_TYPE_Q4_K)) { return tensor_storage.type; } } @@ -1437,10 +1442,11 @@ ggml_type ModelLoader::get_diffusion_model_wtype() { continue; } - if (tensor_storage.name.find(".weight") != std::string::npos && - (tensor_storage.name.find("time_embed") != std::string::npos || - tensor_storage.name.find("context_embedder") != std::string::npos || - tensor_storage.name.find("time_in") != std::string::npos)) { + if (ggml_is_quantized(tensor_storage.type)) { + return tensor_storage.type; + } + + if (tensor_should_be_converted(tensor_storage, GGML_TYPE_Q4_K)) { return tensor_storage.type; } } @@ -1458,7 +1464,11 @@ ggml_type ModelLoader::get_vae_wtype() { continue; } - if (tensor_storage.name.find(".weight")) { + if (ggml_is_quantized(tensor_storage.type)) { + return tensor_storage.type; + } + + if (tensor_should_be_converted(tensor_storage, GGML_TYPE_Q4_K)) { return tensor_storage.type; } } @@ -1723,6 +1733,26 @@ bool ModelLoader::load_tensors(std::map& tenso return true; } +bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type) { + const std::string& name = tensor_storage.name; + if (type != GGML_TYPE_COUNT) { + if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) { + // Pass, do not convert + } else if (ends_with(name, ".bias")) { + // Pass, do not convert + } else if (contains(name, "img_in.") || contains(name, "time_in.in_layer.") || contains(name, "vector_in.in_layer.") || contains(name, "guidance_in.in_layer.") || contains(name, "final_layer.linear.")) { + // Pass, do not convert. For FLUX + } else if (contains(name, "x_embedder.") || contains(name, "t_embedder.") || contains(name, "y_embedder.") || contains(name, "context_embedder.")) { + // Pass, do not convert. For MMDiT + } else if (contains(name, "time_embed.") || contains(name, "label_emb.")) { + // Pass, do not convert. For Unet + } else { + return true; + } + } + return false; +} + bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) { auto backend = ggml_backend_cpu_init(); size_t mem_size = 1 * 1024 * 1024; // for padding @@ -1737,12 +1767,8 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type const std::string& name = tensor_storage.name; ggml_type tensor_type = tensor_storage.type; - if (type != GGML_TYPE_COUNT) { - if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) { - tensor_type = GGML_TYPE_F16; - } else { - tensor_type = type; - } + if (tensor_should_be_converted(tensor_storage, type)) { + tensor_type = type; } ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne); @@ -1792,15 +1818,9 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) } for (auto& tensor_storage : processed_tensor_storages) { - ggml_type tensor_type = tensor_storage.type; - if (type != GGML_TYPE_COUNT) { - if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) { - tensor_type = GGML_TYPE_F16; - } else { - tensor_type = type; - } + if (tensor_should_be_converted(tensor_storage, type)) { + tensor_storage.type = type; } - tensor_storage.type = tensor_type; mem_size += tensor_storage.nbytes() + alignment; } diff --git a/model.h b/model.h index 5cd63141..33f3fbcd 100644 --- a/model.h +++ b/model.h @@ -157,6 +157,7 @@ class ModelLoader { ggml_backend_t backend, std::set ignore_tensors = {}); bool save_to_gguf_file(const std::string& file_path, ggml_type type); + bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type); int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT); ~ModelLoader() = default;