diff --git a/Makefile b/Makefile index a95abd48c93e..5d0a51caf8e8 100644 --- a/Makefile +++ b/Makefile @@ -145,7 +145,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -529,10 +529,6 @@ backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/lib CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/ -backend-assets/grpc/falcon-ggml: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./backend/go/llm/falcon-ggml/ - backend-assets/grpc/starcoder: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./backend/go/llm/starcoder/ diff --git a/README.md b/README.md index dbed541d1a68..45512a45e952 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,6 @@ LocalAI couldn't have been built without the help of great software already avai - https://github.com/ggerganov/whisper.cpp - https://github.com/saharNooby/rwkv.cpp - https://github.com/rhasspy/piper -- https://github.com/cmp-nct/ggllm.cpp ## 🤗 Contributors diff --git a/backend/go/llm/falcon-ggml/main.go b/backend/go/llm/falcon-ggml/main.go deleted file mode 100644 index 426ae8233e1c..000000000000 --- a/backend/go/llm/falcon-ggml/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.Falcon{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/transformers/falcon.go b/backend/go/llm/transformers/falcon.go deleted file mode 100644 index 5299fb02f5ee..000000000000 --- a/backend/go/llm/transformers/falcon.go +++ /dev/null @@ -1,43 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Falcon struct { - base.SingleThread - - falcon *transformers.Falcon -} - -func (llm *Falcon) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewFalcon(opts.ModelFile) - llm.falcon = model - return err -} - -func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) { - return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 0792de517890..c123296995e6 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -104,7 +104,6 @@ LocalAI couldn't have been built without the help of great software already avai - https://github.com/ggerganov/whisper.cpp - https://github.com/saharNooby/rwkv.cpp - https://github.com/rhasspy/piper -- https://github.com/cmp-nct/ggllm.cpp ## 🤗 Contributors diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index 3f46dadd3330..98446e1dfd0f 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -16,7 +16,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| -| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | +| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | | [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | GPT | no | yes | N/A | | [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | GPT | no | yes | N/A | | [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | GPT | no | yes | N/A | @@ -35,7 +35,6 @@ LocalAI will attempt to automatically load models which are not explicitly confi | [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion)) | stablediffusion | no | Image | no | no | N/A | | [langchain-huggingface](https://github.com/tmc/langchaingo) | Any text generators available on HuggingFace through API | yes | GPT | no | no | N/A | | [piper](https://github.com/rhasspy/piper) ([binding](https://github.com/mudler/go-piper)) | Any piper onnx model | no | Text to voice | no | no | N/A | -| [falcon](https://github.com/cmp-nct/ggllm.cpp/tree/c12b2d65f732a0d8846db2244e070f0f3e73505c) ([binding](https://github.com/mudler/go-ggllm.cpp)) | Falcon *** | yes | GPT | no | yes | CUDA | | [sentencetransformers](https://github.com/UKPLab/sentence-transformers) | BERT | no | Embeddings only | yes | no | N/A | | `bark` | bark | no | Audio generation | no | no | yes | | `autogptq` | GPTQ | yes | GPT | yes | no | N/A | diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index e293669a7e14..8c5abdcc0f7b 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -34,7 +34,6 @@ const ( Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" Gpt4All = "gpt4all" - FalconGGMLBackend = "falcon-ggml" BertEmbeddingsBackend = "bert-embeddings" RwkvBackend = "rwkv" @@ -55,7 +54,6 @@ var AutoLoadBackends []string = []string{ Gpt4All, GPTNeoXBackend, BertEmbeddingsBackend, - FalconGGMLBackend, GPTJBackend, Gpt2Backend, DollyBackend,