Skip to content

Commit

Permalink
feat: add convert api (leejet#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
leejet authored Jan 14, 2024
1 parent 2b6ec97 commit 5c614e4
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 25 deletions.
21 changes: 16 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ cmake .. -DSD_METAL=ON
cmake --build . --config Release
```
### Using Flash Attention
##### Using Flash Attention
Enabling flash attention reduces memory usage by at least 400 MB. At the moment, it is not supported when CUBLAS is enabled because the kernel implementation is missing.
Expand All @@ -142,7 +142,7 @@ usage: ./bin/sd [arguments]

arguments:
-h, --help show this help message and exit
-M, --mode [txt2img or img2img] generation mode (default: txt2img)
-M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)
-t, --threads N number of threads to use during computation (default: -1).
If threads <= 0, then threads will be set to the number of CPU physical cores
-m, --model [MODEL] path to model
Expand All @@ -168,7 +168,8 @@ arguments:
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
-b, --batch-count COUNT number of images to generate.
--schedule {discrete, karras} Denoiser sigma schedule (default: discrete)
--clip-skip N number of layers to skip of clip model (default: 0)
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
--vae-tiling process vae in tiles to reduce memory usage
-v, --verbose print extra info
```
Expand All @@ -183,6 +184,16 @@ You can specify the model weight type using the `--type` parameter. The weights
- `q5_0` or `q5_1` for 5-bit integer quantization
- `q4_0` or `q4_1` for 4-bit integer quantization
#### Convert to GGUF
You can also convert weights in the formats `ckpt/safetensors/diffusers` to gguf and perform quantization in advance, avoiding the need for quantization every time you load them.
For example:
```sh
./bin/sd -M convert -m ../models/v1-5-pruned-emaonly.safetensors -o ../models/v1-5-pruned-emaonly.q8_0.gguf -v --type q8_0
```

#### txt2img example

```sh
Expand Down Expand Up @@ -240,7 +251,7 @@ Here's a simple example:
| ---- |---- |
| ![](./assets/without_lcm.png) |![](./assets/with_lcm.png) |

## Using TAESD to faster decoding
#### Using TAESD to faster decoding

You can use TAESD to accelerate the decoding of latent images by following these steps:

Expand All @@ -258,7 +269,7 @@ curl -L -O https://huggingface.co/madebyollin/taesd/blob/main/diffusion_pytorch_
sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --taesd ../models/diffusion_pytorch_model.safetensors
```

## Using ESRGAN to upscale results
#### Using ESRGAN to upscale results

You can use ESRGAN to upscale the generated images. At the moment, only the [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth) model is supported. Support for more models of this architecture will be added soon.

Expand Down
30 changes: 28 additions & 2 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,13 @@ const char* schedule_str[] = {
const char* modes_str[] = {
"txt2img",
"img2img",
"convert",
};

enum SDMode {
TXT2IMG,
IMG2IMG,
CONVERT,
MODE_COUNT
};

Expand Down Expand Up @@ -125,7 +127,7 @@ void print_usage(int argc, const char* argv[]) {
printf("\n");
printf("arguments:\n");
printf(" -h, --help show this help message and exit\n");
printf(" -M, --mode [txt2img or img2img] generation mode (default: txt2img)\n");
printf(" -M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)\n");
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
printf(" -m, --model [MODEL] path to model\n");
Expand Down Expand Up @@ -384,7 +386,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
params.n_threads = get_num_physical_cores();
}

if (params.prompt.length() == 0) {
if (params.mode != CONVERT && params.prompt.length() == 0) {
fprintf(stderr, "error: the following arguments are required: prompt\n");
print_usage(argc, argv);
exit(1);
Expand Down Expand Up @@ -432,6 +434,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
srand((int)time(NULL));
params.seed = rand();
}

if (params.mode == CONVERT) {
if (params.output_path == "output.png") {
params.output_path = "output.gguf";
}
}
}

std::string get_image_params(SDParams params, int64_t seed) {
Expand Down Expand Up @@ -479,6 +487,24 @@ int main(int argc, const char* argv[]) {
printf("%s", sd_get_system_info());
}

if (params.mode == CONVERT) {
bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype);
if (!success) {
fprintf(stderr,
"convert '%s'/'%s' to '%s' failed\n",
params.model_path.c_str(),
params.vae_path.c_str(),
params.output_path.c_str());
return 1;
} else {
printf("convert '%s'/'%s' to '%s' success\n",
params.model_path.c_str(),
params.vae_path.c_str(),
params.output_path.c_str());
return 0;
}
}

bool vae_decode_only = true;
uint8_t* input_image_buffer = NULL;
if (params.mode == IMG2IMG) {
Expand Down
132 changes: 117 additions & 15 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "ggml/ggml-backend.h"
#include "ggml/ggml.h"

#include "stable-diffusion.h"

#ifdef SD_USE_METAL
#include "ggml-metal.h"
#endif
Expand Down Expand Up @@ -609,7 +611,7 @@ bool is_safetensors_file(const std::string& file_path) {
}

size_t header_size_ = read_u64(header_size_buf);
if (header_size_ >= file_size_) {
if (header_size_ >= file_size_ || header_size_ <= 2) {
return false;
}

Expand Down Expand Up @@ -1181,6 +1183,9 @@ SDVersion ModelLoader::get_sd_version() {
if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos) {
return VERSION_XL;
}
if (tensor_storage.name.find("cond_stage_model.1") != std::string::npos) {
return VERSION_XL;
}
if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
tensor_storage.name == "text_model.embeddings.token_embedding.weight" ||
Expand Down Expand Up @@ -1218,7 +1223,35 @@ std::string ModelLoader::load_merges() {
return merges_utf8_str;
}

void remove_duplicates(std::vector<TensorStorage>& vec) {
std::unordered_map<std::string, size_t> name_to_index_map;

for (size_t i = 0; i < vec.size(); ++i) {
const std::string& current_name = vec[i].name;
auto it = name_to_index_map.find(current_name);

if (it != name_to_index_map.end()) {
vec[it->second] = vec[i];
} else {
name_to_index_map[current_name] = i;
}
}

vec.resize(name_to_index_map.size());
}

bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend) {
std::vector<TensorStorage> processed_tensor_storages;
for (auto& tensor_storage : tensor_storages) {
// LOG_DEBUG("%s", name.c_str());

if (is_unused_tensor(tensor_storage.name)) {
continue;
}

preprocess_tensor(tensor_storage, processed_tensor_storages);
}
remove_duplicates(processed_tensor_storages);
bool success = true;
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
std::string file_path = file_paths_[file_index];
Expand Down Expand Up @@ -1276,22 +1309,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
return true;
};

std::vector<TensorStorage> processed_tensor_storages;
for (auto& tensor_storage : tensor_storages) {
for (auto& tensor_storage : processed_tensor_storages) {
if (tensor_storage.file_index != file_index) {
continue;
}

// LOG_DEBUG("%s", name.c_str());

if (is_unused_tensor(tensor_storage.name)) {
continue;
}

preprocess_tensor(tensor_storage, processed_tensor_storages);
}

for (auto& tensor_storage : processed_tensor_storages) {
// LOG_DEBUG("%s", tensor_storage.name.c_str());

ggml_tensor* dst_tensor = NULL;
Expand Down Expand Up @@ -1437,7 +1458,61 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
return true;
}

int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) {
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += tensor_storages.size() * ggml_tensor_overhead();
mem_size += cal_mem_size(backend, type);
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false});

gguf_context* gguf_ctx = gguf_init_empty();

auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
const std::string& name = tensor_storage.name;

ggml_type tensor_type = tensor_storage.type;
if (type != GGML_TYPE_COUNT) {
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
tensor_type = GGML_TYPE_F16;
} else {
tensor_type = type;
}
}

ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
if (tensor == NULL) {
LOG_ERROR("ggml_new_tensor failed");
return false;
}
ggml_set_name(tensor, name.c_str());

// LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(),
// ggml_nbytes(tensor), ggml_type_name(tensor_type),
// tensor_storage.n_dims,
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);

*dst_tensor = tensor;

gguf_add_tensor(gguf_ctx, tensor);

return true;
};

bool success = load_tensors(on_new_tensor_cb, backend);
ggml_backend_free(backend);
LOG_INFO("load tensors done");
LOG_INFO("trying to save tensors to %s", file_path.c_str());
if (success) {
gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
}
ggml_free(ggml_ctx);
gguf_free(gguf_ctx);
return success;
}

int64_t ModelLoader::cal_mem_size(ggml_backend_t backend, ggml_type type) {
size_t alignment = 128;
if (backend != NULL) {
alignment = ggml_backend_get_alignment(backend);
Expand All @@ -1452,8 +1527,35 @@ int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
}

for (auto& tensor_storage : processed_tensor_storages) {
ggml_type tensor_type = tensor_storage.type;
if (type != GGML_TYPE_COUNT) {
if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
tensor_type = GGML_TYPE_F16;
} else {
tensor_type = type;
}
}
tensor_storage.type = tensor_type;
mem_size += tensor_storage.nbytes() + alignment;
}

return mem_size;
}

bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) {
ModelLoader model_loader;

if (!model_loader.init_from_file(input_path)) {
LOG_ERROR("init model loader from file failed: '%s'", input_path);
return false;
}

if (vae_path != NULL && strlen(vae_path) > 0) {
if (!model_loader.init_from_file(vae_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", vae_path);
return false;
}
}
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
return success;
}
5 changes: 3 additions & 2 deletions model.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#include <functional>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <set>

#include "ggml/ggml-backend.h"
#include "ggml/ggml.h"
Expand Down Expand Up @@ -121,7 +121,8 @@ class ModelLoader {
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
ggml_backend_t backend,
std::set<std::string> ignore_tensors = {});
int64_t cal_mem_size(ggml_backend_t backend);
bool save_to_gguf_file(const std::string& file_path, ggml_type type);
int64_t cal_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
~ModelLoader() = default;
};
#endif // __MODEL_H__
4 changes: 3 additions & 1 deletion stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
enum sd_type_t wtype);
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);

SD_API sd_image_t upscale(upscaler_ctx_t*, sd_image_t input_image, uint32_t upscale_factor);
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);

SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type);

#ifdef __cplusplus
}
Expand Down

0 comments on commit 5c614e4

Please sign in to comment.