From b5bcda2cb6ca9a8ed8b1acb9732f7bc634e1d788 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 29 Nov 2024 18:02:08 +0100 Subject: [PATCH] feat(backend): add stablediffusion-ggml Signed-off-by: Ettore Di Giacinto --- Makefile | 45 ++- .../go/image/stablediffusion-ggml/Makefile | 21 ++ .../go/image/stablediffusion-ggml/gosd.cpp | 292 ++++++++++++++++++ backend/go/image/stablediffusion-ggml/gosd.go | 59 ++++ backend/go/image/stablediffusion-ggml/gosd.h | 8 + backend/go/image/stablediffusion-ggml/main.go | 20 ++ 6 files changed, 438 insertions(+), 7 deletions(-) create mode 100644 backend/go/image/stablediffusion-ggml/Makefile create mode 100644 backend/go/image/stablediffusion-ggml/gosd.cpp create mode 100644 backend/go/image/stablediffusion-ggml/gosd.go create mode 100644 backend/go/image/stablediffusion-ggml/gosd.h create mode 100644 backend/go/image/stablediffusion-ggml/main.go diff --git a/Makefile b/Makefile index c6e80552f118..83db1725c73c 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,10 @@ TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git BARKCPP_VERSION?=v1.0.0 +# bark.cpp +STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp +STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c + ONNX_VERSION?=1.20.0 ONNX_ARCH?=x64 ONNX_OS?=linux @@ -209,6 +213,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ifeq ($(ONNX_OS),linux) ifeq ($(ONNX_ARCH),x64) ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp + ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml endif endif @@ -244,15 +249,19 @@ sources/go-llama.cpp: git checkout $(GOLLAMA_VERSION) && \ git submodule update --init --recursive --depth 1 --single-branch +sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp + $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a + +## bark.cpp sources/bark.cpp: - git clone --recursive https://github.com/PABannier/bark.cpp.git sources/bark.cpp && \ + git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \ cd sources/bark.cpp && \ git checkout $(BARKCPP_VERSION) && \ git submodule update --init --recursive --depth 1 --single-branch sources/bark.cpp/build/libbark.a: sources/bark.cpp cd sources/bark.cpp && \ - mkdir build && \ + mkdir -p build && \ cd build && \ cmake $(CMAKE_ARGS) .. && \ cmake --build . --config Release @@ -260,9 +269,6 @@ sources/bark.cpp/build/libbark.a: sources/bark.cpp backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a $(MAKE) -C backend/go/bark libbark.a -sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp - $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a - ## go-piper sources/go-piper: mkdir -p sources/go-piper @@ -276,7 +282,7 @@ sources/go-piper: sources/go-piper/libpiper_binding.a: sources/go-piper $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o -## stable diffusion +## stable diffusion (onnx) sources/go-stable-diffusion: mkdir -p sources/go-stable-diffusion cd sources/go-stable-diffusion && \ @@ -289,6 +295,30 @@ sources/go-stable-diffusion: sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a +## stablediffusion (ggml) +sources/stablediffusion-ggml.cpp: + git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \ + cd sources/stablediffusion-ggml.cpp && \ + git checkout $(STABLEDIFFUSION_GGML_VERSION) && \ + git submodule update --init --recursive --depth 1 --single-branch + +sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp + cd sources/stablediffusion-ggml.cpp && \ + mkdir -p build && \ + cd build && \ + cmake $(CMAKE_ARGS) .. && \ + cmake --build . --config Release + +backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a + $(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a + +backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/ +ifneq ($(UPX),) + $(UPX) backend-assets/grpc/stablediffusion-ggml +endif + sources/onnxruntime: mkdir -p sources/onnxruntime curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz @@ -329,7 +359,7 @@ sources/whisper.cpp: sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a -get-sources: sources/go-llama.cpp sources/go-piper sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp +get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp replace: $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp @@ -372,6 +402,7 @@ clean: ## Remove build related file $(MAKE) -C backend/cpp/grpc clean $(MAKE) -C backend/go/bark clean $(MAKE) -C backend/cpp/llama clean + $(MAKE) -C backend/go/image/stablediffusion-ggml clean rm -rf backend/cpp/llama-* || true $(MAKE) dropreplace $(MAKE) protogen-clean diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile new file mode 100644 index 000000000000..cca9bf6e89dd --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/Makefile @@ -0,0 +1,21 @@ +INCLUDE_PATH := $(abspath ./) +LIBRARY_PATH := $(abspath ./) + +AR?=ar + +BUILD_TYPE?= +# keep standard at C11 and C++11 +CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC + +# warnings +CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function + +gosd.o: + $(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c + +libsd.a: gosd.o + cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a + $(AR) rcs libsd.a gosd.o + +clean: + rm -f gosd.o libsd.a \ No newline at end of file diff --git a/backend/go/image/stablediffusion-ggml/gosd.cpp b/backend/go/image/stablediffusion-ggml/gosd.cpp new file mode 100644 index 000000000000..1c6d2821142b --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/gosd.cpp @@ -0,0 +1,292 @@ +#include +#include +#include +#include +#include +#include +#include +#include "gosd.h" + +// #include "preprocessing.hpp" +#include "flux.hpp" +#include "stable-diffusion.h" + +#define STB_IMAGE_IMPLEMENTATION +#define STB_IMAGE_STATIC +#include "stb_image.h" + +#define STB_IMAGE_WRITE_IMPLEMENTATION +#define STB_IMAGE_WRITE_STATIC +#include "stb_image_write.h" + +#define STB_IMAGE_RESIZE_IMPLEMENTATION +#define STB_IMAGE_RESIZE_STATIC +#include "stb_image_resize.h" + + + +const char* rng_type_to_str[] = { + "std_default", + "cuda", +}; + +// Names of the sampler method, same order as enum sample_method in stable-diffusion.h +const char* sample_method_str[] = { + "euler_a", + "euler", + "heun", + "dpm2", + "dpm++2s_a", + "dpm++2m", + "dpm++2mv2", + "ipndm", + "ipndm_v", + "lcm", +}; + +// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h +const char* schedule_str[] = { + "default", + "discrete", + "karras", + "exponential", + "ays", + "gits", +}; + +const char* modes_str[] = { + "txt2img", + "img2img", + "img2vid", + "convert", +}; + +enum SDMode { + TXT2IMG, + IMG2IMG, + IMG2VID, + CONVERT, + MODE_COUNT +}; + +struct SDParams { + int n_threads = -1; + SDMode mode = TXT2IMG; + std::string model_path; + std::string clip_l_path; + std::string clip_g_path; + std::string t5xxl_path; + std::string diffusion_model_path; + std::string vae_path; + std::string taesd_path; + std::string esrgan_path; + std::string controlnet_path; + std::string embeddings_path; + std::string stacked_id_embeddings_path; + std::string input_id_images_path; + sd_type_t wtype = SD_TYPE_COUNT; + std::string lora_model_dir; + std::string output_path = "output.png"; + std::string input_path; + std::string control_image_path; + + std::string prompt; + std::string negative_prompt; + float min_cfg = 1.0f; + float cfg_scale = 7.0f; + float guidance = 3.5f; + float style_ratio = 20.f; + int clip_skip = -1; // <= 0 represents unspecified + int width = 512; + int height = 512; + int batch_count = 1; + + int video_frames = 6; + int motion_bucket_id = 127; + int fps = 6; + float augmentation_level = 0.f; + + sample_method_t sample_method = EULER_A; + schedule_t schedule = DEFAULT; + int sample_steps = 20; + float strength = 0.75f; + float control_strength = 0.9f; + rng_type_t rng_type = CUDA_RNG; + int64_t seed = 42; + bool verbose = false; + bool vae_tiling = false; + bool control_net_cpu = false; + bool normalize_input = false; + bool clip_on_cpu = false; + bool vae_on_cpu = false; + bool diffusion_flash_attn = false; + bool canny_preprocess = false; + bool color = false; + int upscale_repeats = 1; + + std::vector skip_layers = {7, 8, 9}; + float slg_scale = 0.; + float skip_layer_start = 0.01; + float skip_layer_end = 0.2; +}; + +void print_params(SDParams params) { + printf("Option: \n"); + printf(" n_threads: %d\n", params.n_threads); + printf(" mode: %s\n", modes_str[params.mode]); + printf(" model_path: %s\n", params.model_path.c_str()); + printf(" wtype: %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified"); + printf(" clip_l_path: %s\n", params.clip_l_path.c_str()); + printf(" clip_g_path: %s\n", params.clip_g_path.c_str()); + printf(" t5xxl_path: %s\n", params.t5xxl_path.c_str()); + printf(" diffusion_model_path: %s\n", params.diffusion_model_path.c_str()); + printf(" vae_path: %s\n", params.vae_path.c_str()); + printf(" taesd_path: %s\n", params.taesd_path.c_str()); + printf(" esrgan_path: %s\n", params.esrgan_path.c_str()); + printf(" controlnet_path: %s\n", params.controlnet_path.c_str()); + printf(" embeddings_path: %s\n", params.embeddings_path.c_str()); + printf(" stacked_id_embeddings_path: %s\n", params.stacked_id_embeddings_path.c_str()); + printf(" input_id_images_path: %s\n", params.input_id_images_path.c_str()); + printf(" style ratio: %.2f\n", params.style_ratio); + printf(" normalize input image : %s\n", params.normalize_input ? "true" : "false"); + printf(" output_path: %s\n", params.output_path.c_str()); + printf(" init_img: %s\n", params.input_path.c_str()); + printf(" control_image: %s\n", params.control_image_path.c_str()); + printf(" clip on cpu: %s\n", params.clip_on_cpu ? "true" : "false"); + printf(" controlnet cpu: %s\n", params.control_net_cpu ? "true" : "false"); + printf(" vae decoder on cpu:%s\n", params.vae_on_cpu ? "true" : "false"); + printf(" diffusion flash attention:%s\n", params.diffusion_flash_attn ? "true" : "false"); + printf(" strength(control): %.2f\n", params.control_strength); + printf(" prompt: %s\n", params.prompt.c_str()); + printf(" negative_prompt: %s\n", params.negative_prompt.c_str()); + printf(" min_cfg: %.2f\n", params.min_cfg); + printf(" cfg_scale: %.2f\n", params.cfg_scale); + printf(" slg_scale: %.2f\n", params.slg_scale); + printf(" guidance: %.2f\n", params.guidance); + printf(" clip_skip: %d\n", params.clip_skip); + printf(" width: %d\n", params.width); + printf(" height: %d\n", params.height); + printf(" sample_method: %s\n", sample_method_str[params.sample_method]); + printf(" schedule: %s\n", schedule_str[params.schedule]); + printf(" sample_steps: %d\n", params.sample_steps); + printf(" strength(img2img): %.2f\n", params.strength); + printf(" rng: %s\n", rng_type_to_str[params.rng_type]); + printf(" seed: %ld\n", params.seed); + printf(" batch_count: %d\n", params.batch_count); + printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false"); + printf(" upscale_repeats: %d\n", params.upscale_repeats); +} + + + sd_ctx_t* sd_c; + +int load_model(char *model, char *schedule_selected, int threads) { + + int schedule_found = -1; + for (int d = 0; d < N_SCHEDULES; d++) { + if (!strcmp(schedule_selected, schedule_str[d])) { + schedule_found = d; + } + } + if (schedule_found == -1) { + printf("invalid scheduler\n"); + return 1; + } + schedule_t schedule = (schedule_t)schedule_found; + + sd_ctx_t* sd_ctx = new_sd_ctx(model, + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + false, + false, + true, + threads, + SD_TYPE_COUNT, + STD_DEFAULT_RNG, + schedule, + false, + false, + false, + false); + + if (sd_ctx == NULL) { + printf("new_sd_ctx_t failed\n"); + return 1; + } + + sd_c = sd_ctx; + + return 0; +} + +int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char* sample_method_selected, char *dst ) { + + sd_image_t* results; + + + int sample_method_found = -1; + for (int m = 0; m < N_SAMPLE_METHODS; m++) { + if (!strcmp(sample_method_selected, sample_method_str[m])) { + sample_method_found = m; + } + } + if (sample_method_found == -1) { + printf("generate failed\n"); + return 1; + } + sample_method_t sample_method = (sample_method_t)sample_method_found; + std::vector skip_layers = {7, 8, 9}; + results = txt2img(sd_c, + text, + negativeText, + -1, //clip_skip + 7.0f, // sfg_scale + 3.5f, + width, + height, + sample_method, + steps, + seed, + 1, + NULL, + 0.9f, + 20.f, + false, + "", + skip_layers.data(), + skip_layers.size(), + 0, + 0.01, + 0.2); + + if (results == NULL) { + printf("generate failed\n"); + return 1; + } + + if (results[0].data == NULL) { + printf("generate failed\n"); + return 1; + } + + stbi_write_png(dst, results[0].width, results[0].height, results[0].channel, + results[0].data, 0, ""); + printf("save result image to '%s'\n", dst); + free(results[0].data); + results[0].data = NULL; + + free(results); +} + +int unload() { + free_sd_ctx(sd_c); +} + diff --git a/backend/go/image/stablediffusion-ggml/gosd.go b/backend/go/image/stablediffusion-ggml/gosd.go new file mode 100644 index 000000000000..050ff80da5d9 --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/gosd.go @@ -0,0 +1,59 @@ +package main + +// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include +// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp +// #include +// #include +import "C" + +import ( + "fmt" + "unsafe" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +type SDGGML struct { + base.SingleThread + threads int +} + +func (sd *SDGGML) Load(opts *pb.ModelOptions) error { + + sd.threads = int(opts.Threads) + + schedulerType := C.CString(opts.SchedulerType) + defer C.free(unsafe.Pointer(schedulerType)) + + modelFile := C.CString(opts.ModelFile) + defer C.free(unsafe.Pointer(modelFile)) + + ret := C.load_model(modelFile, schedulerType, C.int(opts.Threads)) + if ret != 0 { + return fmt.Errorf("inference failed") + } + + return nil +} + +func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error { + t := C.CString(opts.PositivePrompt) + defer C.free(unsafe.Pointer(t)) + + dst := C.CString(opts.Dst) + defer C.free(unsafe.Pointer(dst)) + + negative := C.CString(opts.NegativePrompt) + defer C.free(unsafe.Pointer(negative)) + + sampleMethod := C.CString(opts.EnableParameters) + defer C.free(unsafe.Pointer(sampleMethod)) + + ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), sampleMethod, dst) + if ret != 0 { + return fmt.Errorf("inference failed") + } + + return nil +} diff --git a/backend/go/image/stablediffusion-ggml/gosd.h b/backend/go/image/stablediffusion-ggml/gosd.h new file mode 100644 index 000000000000..300bf0305a99 --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/gosd.h @@ -0,0 +1,8 @@ +#ifdef __cplusplus +extern "C" { +#endif +int load_model(char *model, char *schedule_selected, int threads); +int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char* sample_method_selected, char *dst ); +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/backend/go/image/stablediffusion-ggml/main.go b/backend/go/image/stablediffusion-ggml/main.go new file mode 100644 index 000000000000..acee74fac0d4 --- /dev/null +++ b/backend/go/image/stablediffusion-ggml/main.go @@ -0,0 +1,20 @@ +package main + +// Note: this is started internally by LocalAI and a server is allocated for each model +import ( + "flag" + + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +func main() { + flag.Parse() + + if err := grpc.StartServer(*addr, &SDGGML{}); err != nil { + panic(err) + } +}