diff --git a/Makefile b/Makefile index 9d8d3e957db4..b5d471565c76 100644 --- a/Makefile +++ b/Makefile @@ -30,15 +30,9 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d # go-piper version PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7 -# go-bloomz version -BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f - # stablediffusion version STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632 -# Go-ggllm -GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b - export BUILD_TYPE?= export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export CMAKE_ARGS?= @@ -129,7 +123,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -146,14 +140,6 @@ gpt4all: git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1 -## go-ggllm -go-ggllm: - git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm - cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1 - -go-ggllm/libggllm.a: go-ggllm - $(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a - ## go-piper go-piper: git clone --recurse-submodules https://github.com/mudler/go-piper go-piper @@ -180,14 +166,6 @@ go-rwkv: go-rwkv/librwkv.a: go-rwkv cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. -## bloomz -bloomz: - git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz - cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1 - -bloomz/libbloomz.a: bloomz - cd bloomz && make libbloomz.a - go-bert/libgobert.a: go-bert $(MAKE) -C go-bert libgobert.a @@ -241,7 +219,7 @@ go-llama-stable/libbinding.a: go-llama-stable go-piper/libpiper_binding.a: go-piper $(MAKE) -C go-piper libpiper_binding.a example/main -get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion +get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion touch $@ replace: @@ -250,10 +228,8 @@ replace: $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert - $(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper - $(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm prepare-sources: get-sources replace $(GOCMD) mod download @@ -269,9 +245,7 @@ rebuild: ## Rebuilds the project $(MAKE) -C whisper.cpp clean $(MAKE) -C go-stable-diffusion clean $(MAKE) -C go-bert clean - $(MAKE) -C bloomz clean $(MAKE) -C go-piper clean - $(MAKE) -C go-ggllm clean $(MAKE) build prepare: prepare-sources $(OPTIONAL_TARGETS) @@ -289,10 +263,8 @@ clean: ## Remove build related file rm -rf ./backend-assets rm -rf ./go-rwkv rm -rf ./go-bert - rm -rf ./bloomz rm -rf ./whisper.cpp rm -rf ./go-piper - rm -rf ./go-ggllm rm -rf $(BINARY_NAME) rm -rf release/ $(MAKE) -C backend/cpp/llama clean @@ -418,10 +390,6 @@ protogen-python: backend-assets/grpc: mkdir -p backend-assets/grpc -backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/ - backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \ @@ -486,10 +454,6 @@ backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/ -backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/ - backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/ diff --git a/cmd/grpc/bloomz/main.go b/cmd/grpc/bloomz/main.go deleted file mode 100644 index 8d6303ba3959..000000000000 --- a/cmd/grpc/bloomz/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - bloomz "github.com/go-skynet/LocalAI/pkg/backend/llm/bloomz" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil { - panic(err) - } -} diff --git a/cmd/grpc/falcon/main.go b/cmd/grpc/falcon/main.go deleted file mode 100644 index 8ddf6236af22..000000000000 --- a/cmd/grpc/falcon/main.go +++ /dev/null @@ -1,25 +0,0 @@ -package main - -// GRPC Falcon server - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - falcon "github.com/go-skynet/LocalAI/pkg/backend/llm/falcon" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil { - panic(err) - } -} diff --git a/pkg/backend/llm/bloomz/bloomz.go b/pkg/backend/llm/bloomz/bloomz.go deleted file mode 100644 index 0775c77d153b..000000000000 --- a/pkg/backend/llm/bloomz/bloomz.go +++ /dev/null @@ -1,59 +0,0 @@ -package bloomz - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - "github.com/go-skynet/bloomz.cpp" -) - -type LLM struct { - base.SingleThread - - bloomz *bloomz.Bloomz -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - model, err := bloomz.New(opts.ModelFile) - llm.bloomz = model - return err -} - -func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption { - predictOptions := []bloomz.PredictOption{ - bloomz.SetTemperature(float64(opts.Temperature)), - bloomz.SetTopP(float64(opts.TopP)), - bloomz.SetTopK(int(opts.TopK)), - bloomz.SetTokens(int(opts.Tokens)), - bloomz.SetThreads(int(opts.Threads)), - } - - if opts.Seed != 0 { - predictOptions = append(predictOptions, bloomz.SetSeed(int(opts.Seed))) - } - - return predictOptions -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/pkg/backend/llm/falcon/falcon.go b/pkg/backend/llm/falcon/falcon.go deleted file mode 100644 index 4b96b71f3333..000000000000 --- a/pkg/backend/llm/falcon/falcon.go +++ /dev/null @@ -1,145 +0,0 @@ -package falcon - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - ggllm "github.com/mudler/go-ggllm.cpp" -) - -type LLM struct { - base.SingleThread - - falcon *ggllm.Falcon -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - ggllmOpts := []ggllm.ModelOption{} - if opts.ContextSize != 0 { - ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize))) - } - // F16 doesn't seem to produce good output at all! - //if c.F16 { - // llamaOpts = append(llamaOpts, llama.EnableF16Memory) - //} - - if opts.NGPULayers != 0 { - ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers))) - } - - ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap)) - ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU)) - ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit)) - if opts.NBatch != 0 { - ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch))) - } else { - ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512)) - } - - model, err := ggllm.New(opts.ModelFile, ggllmOpts...) - llm.falcon = model - return err -} - -func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption { - predictOptions := []ggllm.PredictOption{ - ggllm.SetTemperature(float64(opts.Temperature)), - ggllm.SetTopP(float64(opts.TopP)), - ggllm.SetTopK(int(opts.TopK)), - ggllm.SetTokens(int(opts.Tokens)), - ggllm.SetThreads(int(opts.Threads)), - } - - if opts.PromptCacheAll { - predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll) - } - - if opts.PromptCacheRO { - predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO) - } - - // Expected absolute path - if opts.PromptCachePath != "" { - predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath)) - } - - if opts.Mirostat != 0 { - predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat))) - } - - if opts.MirostatETA != 0 { - predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA))) - } - - if opts.MirostatTAU != 0 { - predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU))) - } - - if opts.Debug { - predictOptions = append(predictOptions, ggllm.Debug) - } - - predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...)) - - if opts.PresencePenalty != 0 { - predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty))) - } - - if opts.NKeep != 0 { - predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep))) - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch))) - } - - if opts.IgnoreEOS { - predictOptions = append(predictOptions, ggllm.IgnoreEOS) - } - - if opts.Seed != 0 { - predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed))) - } - - //predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed)) - - predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty))) - predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock)) - predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap)) - predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU)) - predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit)) - predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ))) - predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP))) - return predictOptions -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - - predictOptions := buildPredictOptions(opts) - - predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool { - if token == "<|endoftext|>" { - return true - } - results <- token - return true - })) - - go func() { - _, err := llm.falcon.Predict(opts.Prompt, predictOptions...) - if err != nil { - fmt.Println("err: ", err) - } - close(results) - }() - - return nil -} diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 5ad9500ba148..fbc4746b6dea 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -18,7 +18,6 @@ const ( LlamaBackend = "llama" LlamaStableBackend = "llama-stable" LLamaCPP = "llama-cpp" - BloomzBackend = "bloomz" StarcoderBackend = "starcoder" GPTJBackend = "gptj" DollyBackend = "dolly" @@ -30,7 +29,6 @@ const ( Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" Gpt4All = "gpt4all" - FalconBackend = "falcon" FalconGGMLBackend = "falcon-ggml" BertEmbeddingsBackend = "bert-embeddings" @@ -46,7 +44,6 @@ var AutoLoadBackends []string = []string{ LlamaStableBackend, LlamaBackend, Gpt4All, - FalconBackend, GPTNeoXBackend, BertEmbeddingsBackend, FalconGGMLBackend, @@ -56,7 +53,6 @@ var AutoLoadBackends []string = []string{ MPTBackend, ReplitBackend, StarcoderBackend, - BloomzBackend, RwkvBackend, WhisperBackend, StableDiffusionBackend,