From b5bcda2cb6ca9a8ed8b1acb9732f7bc634e1d788 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Fri, 29 Nov 2024 18:02:08 +0100
Subject: [PATCH] feat(backend): add stablediffusion-ggml

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile                                      |  45 ++-
 .../go/image/stablediffusion-ggml/Makefile    |  21 ++
 .../go/image/stablediffusion-ggml/gosd.cpp    | 292 ++++++++++++++++++
 backend/go/image/stablediffusion-ggml/gosd.go |  59 ++++
 backend/go/image/stablediffusion-ggml/gosd.h  |   8 +
 backend/go/image/stablediffusion-ggml/main.go |  20 ++
 6 files changed, 438 insertions(+), 7 deletions(-)
 create mode 100644 backend/go/image/stablediffusion-ggml/Makefile
 create mode 100644 backend/go/image/stablediffusion-ggml/gosd.cpp
 create mode 100644 backend/go/image/stablediffusion-ggml/gosd.go
 create mode 100644 backend/go/image/stablediffusion-ggml/gosd.h
 create mode 100644 backend/go/image/stablediffusion-ggml/main.go

diff --git a/Makefile b/Makefile
index c6e80552f118..83db1725c73c 100644
--- a/Makefile
+++ b/Makefile
@@ -30,6 +30,10 @@ TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
 BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
 BARKCPP_VERSION?=v1.0.0
 
+# bark.cpp
+STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
+STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c
+
 ONNX_VERSION?=1.20.0
 ONNX_ARCH?=x64
 ONNX_OS?=linux
@@ -209,6 +213,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ifeq ($(ONNX_OS),linux)
 ifeq ($(ONNX_ARCH),x64)
 	ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
+	ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
 endif
 endif
 
@@ -244,15 +249,19 @@ sources/go-llama.cpp:
 	git checkout $(GOLLAMA_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 
+sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
+	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
+
+## bark.cpp
 sources/bark.cpp:
-	git clone --recursive https://github.com/PABannier/bark.cpp.git sources/bark.cpp && \
+	git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
 	cd sources/bark.cpp && \
 	git checkout $(BARKCPP_VERSION) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 
 sources/bark.cpp/build/libbark.a: sources/bark.cpp
 	cd sources/bark.cpp && \
-	mkdir build && \
+	mkdir -p build && \
 	cd build && \
 	cmake $(CMAKE_ARGS) .. && \
 	cmake --build . --config Release
@@ -260,9 +269,6 @@ sources/bark.cpp/build/libbark.a: sources/bark.cpp
 backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
 	$(MAKE) -C backend/go/bark libbark.a
 
-sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
-	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
-
 ## go-piper
 sources/go-piper:
 	mkdir -p sources/go-piper
@@ -276,7 +282,7 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 
-## stable diffusion
+## stable diffusion (onnx)
 sources/go-stable-diffusion:
 	mkdir -p sources/go-stable-diffusion
 	cd sources/go-stable-diffusion && \
@@ -289,6 +295,30 @@ sources/go-stable-diffusion:
 sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
 	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
 
+## stablediffusion (ggml)
+sources/stablediffusion-ggml.cpp:
+	git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
+	cd sources/stablediffusion-ggml.cpp && \
+	git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
+	cd sources/stablediffusion-ggml.cpp && \
+	mkdir -p build && \
+	cd build && \
+	cmake $(CMAKE_ARGS) .. && \
+	cmake --build . --config Release
+
+backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
+	$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
+
+backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/stablediffusion-ggml
+endif
+
 sources/onnxruntime:
 	mkdir -p sources/onnxruntime
 	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
@@ -329,7 +359,7 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/go-piper sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
 
 replace:
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
@@ -372,6 +402,7 @@ clean: ## Remove build related file
 	$(MAKE) -C backend/cpp/grpc clean
 	$(MAKE) -C backend/go/bark clean
 	$(MAKE) -C backend/cpp/llama clean
+	$(MAKE) -C backend/go/image/stablediffusion-ggml clean
 	rm -rf backend/cpp/llama-* || true
 	$(MAKE) dropreplace
 	$(MAKE) protogen-clean
diff --git a/backend/go/image/stablediffusion-ggml/Makefile b/backend/go/image/stablediffusion-ggml/Makefile
new file mode 100644
index 000000000000..cca9bf6e89dd
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/Makefile
@@ -0,0 +1,21 @@
+INCLUDE_PATH := $(abspath ./)
+LIBRARY_PATH := $(abspath ./)
+
+AR?=ar
+
+BUILD_TYPE?=
+# keep standard at C11 and C++11
+CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
+
+# warnings
+CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+
+gosd.o:
+	$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
+
+libsd.a: gosd.o
+	cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
+	$(AR) rcs libsd.a gosd.o
+
+clean:
+	rm -f gosd.o libsd.a
\ No newline at end of file
diff --git a/backend/go/image/stablediffusion-ggml/gosd.cpp b/backend/go/image/stablediffusion-ggml/gosd.cpp
new file mode 100644
index 000000000000..1c6d2821142b
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/gosd.cpp
@@ -0,0 +1,292 @@
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <iostream>
+#include <random>
+#include <string>
+#include <vector>
+#include "gosd.h"
+
+// #include "preprocessing.hpp"
+#include "flux.hpp"
+#include "stable-diffusion.h"
+
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_STATIC
+#include "stb_image.h"
+
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_STATIC
+#include "stb_image_write.h"
+
+#define STB_IMAGE_RESIZE_IMPLEMENTATION
+#define STB_IMAGE_RESIZE_STATIC
+#include "stb_image_resize.h"
+
+
+
+const char* rng_type_to_str[] = {
+    "std_default",
+    "cuda",
+};
+
+// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
+const char* sample_method_str[] = {
+    "euler_a",
+    "euler",
+    "heun",
+    "dpm2",
+    "dpm++2s_a",
+    "dpm++2m",
+    "dpm++2mv2",
+    "ipndm",
+    "ipndm_v",
+    "lcm",
+};
+
+// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
+const char* schedule_str[] = {
+    "default",
+    "discrete",
+    "karras",
+    "exponential",
+    "ays",
+    "gits",
+};
+
+const char* modes_str[] = {
+    "txt2img",
+    "img2img",
+    "img2vid",
+    "convert",
+};
+
+enum SDMode {
+    TXT2IMG,
+    IMG2IMG,
+    IMG2VID,
+    CONVERT,
+    MODE_COUNT
+};
+
+struct SDParams {
+    int n_threads = -1;
+    SDMode mode   = TXT2IMG;
+    std::string model_path;
+    std::string clip_l_path;
+    std::string clip_g_path;
+    std::string t5xxl_path;
+    std::string diffusion_model_path;
+    std::string vae_path;
+    std::string taesd_path;
+    std::string esrgan_path;
+    std::string controlnet_path;
+    std::string embeddings_path;
+    std::string stacked_id_embeddings_path;
+    std::string input_id_images_path;
+    sd_type_t wtype = SD_TYPE_COUNT;
+    std::string lora_model_dir;
+    std::string output_path = "output.png";
+    std::string input_path;
+    std::string control_image_path;
+
+    std::string prompt;
+    std::string negative_prompt;
+    float min_cfg     = 1.0f;
+    float cfg_scale   = 7.0f;
+    float guidance    = 3.5f;
+    float style_ratio = 20.f;
+    int clip_skip     = -1;  // <= 0 represents unspecified
+    int width         = 512;
+    int height        = 512;
+    int batch_count   = 1;
+
+    int video_frames         = 6;
+    int motion_bucket_id     = 127;
+    int fps                  = 6;
+    float augmentation_level = 0.f;
+
+    sample_method_t sample_method = EULER_A;
+    schedule_t schedule           = DEFAULT;
+    int sample_steps              = 20;
+    float strength                = 0.75f;
+    float control_strength        = 0.9f;
+    rng_type_t rng_type           = CUDA_RNG;
+    int64_t seed                  = 42;
+    bool verbose                  = false;
+    bool vae_tiling               = false;
+    bool control_net_cpu          = false;
+    bool normalize_input          = false;
+    bool clip_on_cpu              = false;
+    bool vae_on_cpu               = false;
+    bool diffusion_flash_attn     = false;
+    bool canny_preprocess         = false;
+    bool color                    = false;
+    int upscale_repeats           = 1;
+
+    std::vector<int> skip_layers = {7, 8, 9};
+    float slg_scale              = 0.;
+    float skip_layer_start       = 0.01;
+    float skip_layer_end         = 0.2;
+};
+
+void print_params(SDParams params) {
+    printf("Option: \n");
+    printf("    n_threads:         %d\n", params.n_threads);
+    printf("    mode:              %s\n", modes_str[params.mode]);
+    printf("    model_path:        %s\n", params.model_path.c_str());
+    printf("    wtype:             %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified");
+    printf("    clip_l_path:       %s\n", params.clip_l_path.c_str());
+    printf("    clip_g_path:       %s\n", params.clip_g_path.c_str());
+    printf("    t5xxl_path:        %s\n", params.t5xxl_path.c_str());
+    printf("    diffusion_model_path:   %s\n", params.diffusion_model_path.c_str());
+    printf("    vae_path:          %s\n", params.vae_path.c_str());
+    printf("    taesd_path:        %s\n", params.taesd_path.c_str());
+    printf("    esrgan_path:       %s\n", params.esrgan_path.c_str());
+    printf("    controlnet_path:   %s\n", params.controlnet_path.c_str());
+    printf("    embeddings_path:   %s\n", params.embeddings_path.c_str());
+    printf("    stacked_id_embeddings_path:   %s\n", params.stacked_id_embeddings_path.c_str());
+    printf("    input_id_images_path:   %s\n", params.input_id_images_path.c_str());
+    printf("    style ratio:       %.2f\n", params.style_ratio);
+    printf("    normalize input image :  %s\n", params.normalize_input ? "true" : "false");
+    printf("    output_path:       %s\n", params.output_path.c_str());
+    printf("    init_img:          %s\n", params.input_path.c_str());
+    printf("    control_image:     %s\n", params.control_image_path.c_str());
+    printf("    clip on cpu:       %s\n", params.clip_on_cpu ? "true" : "false");
+    printf("    controlnet cpu:    %s\n", params.control_net_cpu ? "true" : "false");
+    printf("    vae decoder on cpu:%s\n", params.vae_on_cpu ? "true" : "false");
+    printf("    diffusion flash attention:%s\n", params.diffusion_flash_attn ? "true" : "false");
+    printf("    strength(control): %.2f\n", params.control_strength);
+    printf("    prompt:            %s\n", params.prompt.c_str());
+    printf("    negative_prompt:   %s\n", params.negative_prompt.c_str());
+    printf("    min_cfg:           %.2f\n", params.min_cfg);
+    printf("    cfg_scale:         %.2f\n", params.cfg_scale);
+    printf("    slg_scale:         %.2f\n", params.slg_scale);
+    printf("    guidance:          %.2f\n", params.guidance);
+    printf("    clip_skip:         %d\n", params.clip_skip);
+    printf("    width:             %d\n", params.width);
+    printf("    height:            %d\n", params.height);
+    printf("    sample_method:     %s\n", sample_method_str[params.sample_method]);
+    printf("    schedule:          %s\n", schedule_str[params.schedule]);
+    printf("    sample_steps:      %d\n", params.sample_steps);
+    printf("    strength(img2img): %.2f\n", params.strength);
+    printf("    rng:               %s\n", rng_type_to_str[params.rng_type]);
+    printf("    seed:              %ld\n", params.seed);
+    printf("    batch_count:       %d\n", params.batch_count);
+    printf("    vae_tiling:        %s\n", params.vae_tiling ? "true" : "false");
+    printf("    upscale_repeats:   %d\n", params.upscale_repeats);
+}
+
+
+ sd_ctx_t* sd_c;
+
+int load_model(char *model, char *schedule_selected, int threads) {
+
+    int schedule_found            = -1;
+    for (int d = 0; d < N_SCHEDULES; d++) {
+        if (!strcmp(schedule_selected, schedule_str[d])) {
+            schedule_found = d;
+        }
+    }
+    if (schedule_found == -1) {
+        printf("invalid scheduler\n");
+        return 1;
+    }
+    schedule_t schedule = (schedule_t)schedule_found;
+
+    sd_ctx_t* sd_ctx = new_sd_ctx(model,
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  "",
+                                  false,
+                                  false,
+                                  true,
+                                  threads,
+                                  SD_TYPE_COUNT,
+                                  STD_DEFAULT_RNG,
+                                  schedule,
+                                  false,
+                                  false,
+                                  false,
+                                  false);
+
+    if (sd_ctx == NULL) {
+        printf("new_sd_ctx_t failed\n");
+        return 1;
+    }
+
+    sd_c = sd_ctx;
+
+    return 0;
+}
+
+int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char* sample_method_selected, char *dst ) {
+
+    sd_image_t* results;
+
+
+    int sample_method_found            = -1;
+    for (int m = 0; m < N_SAMPLE_METHODS; m++) {
+        if (!strcmp(sample_method_selected, sample_method_str[m])) {
+            sample_method_found = m;
+        }
+    }
+    if (sample_method_found == -1) {
+        printf("generate failed\n");
+        return 1;
+    }
+    sample_method_t sample_method = (sample_method_t)sample_method_found;
+    std::vector<int> skip_layers = {7, 8, 9};
+    results = txt2img(sd_c,
+                            text,
+                            negativeText,
+                            -1, //clip_skip
+                            7.0f, // sfg_scale
+                            3.5f,
+                            width,
+                            height,
+                            sample_method, 
+                            steps,
+                            seed,
+                            1,
+                            NULL,
+                            0.9f,
+                            20.f,
+                            false,
+                            "",
+                            skip_layers.data(),
+                            skip_layers.size(),
+                            0,
+                            0.01,
+                            0.2);
+
+    if (results == NULL) {
+        printf("generate failed\n");
+        return 1;
+    }
+
+    if (results[0].data == NULL) {
+        printf("generate failed\n");
+        return 1;
+    }
+
+  stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
+                       results[0].data, 0, "");
+    printf("save result image to '%s'\n", dst);
+    free(results[0].data);
+    results[0].data = NULL;
+
+    free(results);
+}
+
+int unload() {
+    free_sd_ctx(sd_c);
+}
+
diff --git a/backend/go/image/stablediffusion-ggml/gosd.go b/backend/go/image/stablediffusion-ggml/gosd.go
new file mode 100644
index 000000000000..050ff80da5d9
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/gosd.go
@@ -0,0 +1,59 @@
+package main
+
+// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
+// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
+// #include <gosd.h>
+// #include <stdlib.h>
+import "C"
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+)
+
+type SDGGML struct {
+	base.SingleThread
+	threads int
+}
+
+func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
+
+	sd.threads = int(opts.Threads)
+
+	schedulerType := C.CString(opts.SchedulerType)
+	defer C.free(unsafe.Pointer(schedulerType))
+
+	modelFile := C.CString(opts.ModelFile)
+	defer C.free(unsafe.Pointer(modelFile))
+
+	ret := C.load_model(modelFile, schedulerType, C.int(opts.Threads))
+	if ret != 0 {
+		return fmt.Errorf("inference failed")
+	}
+
+	return nil
+}
+
+func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
+	t := C.CString(opts.PositivePrompt)
+	defer C.free(unsafe.Pointer(t))
+
+	dst := C.CString(opts.Dst)
+	defer C.free(unsafe.Pointer(dst))
+
+	negative := C.CString(opts.NegativePrompt)
+	defer C.free(unsafe.Pointer(negative))
+
+	sampleMethod := C.CString(opts.EnableParameters)
+	defer C.free(unsafe.Pointer(sampleMethod))
+
+	ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), sampleMethod, dst)
+	if ret != 0 {
+		return fmt.Errorf("inference failed")
+	}
+
+	return nil
+}
diff --git a/backend/go/image/stablediffusion-ggml/gosd.h b/backend/go/image/stablediffusion-ggml/gosd.h
new file mode 100644
index 000000000000..300bf0305a99
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/gosd.h
@@ -0,0 +1,8 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+int load_model(char *model, char *schedule_selected, int threads);
+int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char* sample_method_selected, char *dst );
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/backend/go/image/stablediffusion-ggml/main.go b/backend/go/image/stablediffusion-ggml/main.go
new file mode 100644
index 000000000000..acee74fac0d4
--- /dev/null
+++ b/backend/go/image/stablediffusion-ggml/main.go
@@ -0,0 +1,20 @@
+package main
+
+// Note: this is started internally by LocalAI and a server is allocated for each model
+import (
+	"flag"
+
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
+)
+
+var (
+	addr = flag.String("addr", "localhost:50051", "the address to connect to")
+)
+
+func main() {
+	flag.Parse()
+
+	if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
+		panic(err)
+	}
+}