diff --git a/.gitignore b/.gitignore index 65eb92570f6f..9f31131f6602 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,6 @@ prepare-sources go-ggml-transformers go-gpt2 -go-rwkv whisper.cpp /bloomz go-bert diff --git a/Makefile b/Makefile index bc54f319b924..8a7c6143f8c2 100644 --- a/Makefile +++ b/Makefile @@ -10,10 +10,6 @@ GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445 -# go-rwkv version -RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp -RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 - # whisper.cpp version WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d @@ -209,7 +205,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server -ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad @@ -272,20 +267,6 @@ sources/go-piper: sources/go-piper/libpiper_binding.a: sources/go-piper $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o - -## RWKV -sources/go-rwkv.cpp: - mkdir -p sources/go-rwkv.cpp - cd sources/go-rwkv.cpp && \ - git init && \ - git remote add origin $(RWKV_REPO) && \ - git fetch origin && \ - git checkout $(RWKV_VERSION) && \ - git submodule update --init --recursive --depth 1 --single-branch - -sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp - cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. - ## stable diffusion sources/go-stable-diffusion: mkdir -p sources/go-stable-diffusion @@ -339,10 +320,9 @@ sources/whisper.cpp: sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a -get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp +get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp replace: - $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp @@ -352,7 +332,6 @@ replace: $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp dropreplace: - $(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp @@ -368,7 +347,6 @@ prepare-sources: get-sources replace rebuild: ## Rebuilds the project $(GOCMD) clean -cache $(MAKE) -C sources/go-llama.cpp clean - $(MAKE) -C sources/go-rwkv.cpp clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean $(MAKE) -C sources/go-bert.cpp clean @@ -477,8 +455,6 @@ test-models/testmodel.ggml: wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav - wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv - wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json cp tests/models_fixtures/* test-models prepare-test: grpcs @@ -855,13 +831,6 @@ ifneq ($(UPX),) $(UPX) backend-assets/grpc/piper endif -backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/rwkv -endif - backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion diff --git a/README.md b/README.md index 1e4b0c38fbe8..2fd89863ef89 100644 --- a/README.md +++ b/README.md @@ -241,7 +241,6 @@ LocalAI couldn't have been built without the help of great software already avai - https://github.com/antimatter15/alpaca.cpp - https://github.com/EdVince/Stable-Diffusion-NCNN - https://github.com/ggerganov/whisper.cpp -- https://github.com/saharNooby/rwkv.cpp - https://github.com/rhasspy/piper ## 🤗 Contributors diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 7b75de5bd0cf..0fde74cbd3a6 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2299,6 +2299,7 @@ static void params_parse(const backend::ModelOptions* request, params.use_mmap = request->mmap(); params.flash_attn = request->flashattention(); params.no_kv_offload = request->nokvoffload(); + params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops) params.embedding = request->embeddings(); diff --git a/backend/go/llm/rwkv/main.go b/backend/go/llm/rwkv/main.go deleted file mode 100644 index acf4408799e1..000000000000 --- a/backend/go/llm/rwkv/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &LLM{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/rwkv/rwkv.go b/backend/go/llm/rwkv/rwkv.go deleted file mode 100644 index fe9cd8156af5..000000000000 --- a/backend/go/llm/rwkv/rwkv.go +++ /dev/null @@ -1,95 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - "path/filepath" - - "github.com/donomii/go-rwkv.cpp" - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" -) - -const tokenizerSuffix = ".tokenizer.json" - -type LLM struct { - base.SingleThread - - rwkv *rwkv.RwkvState -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - tokenizerFile := opts.Tokenizer - if tokenizerFile == "" { - modelFile := filepath.Base(opts.ModelFile) - tokenizerFile = modelFile + tokenizerSuffix - } - modelPath := filepath.Dir(opts.ModelFile) - tokenizerPath := filepath.Join(modelPath, tokenizerFile) - - model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads())) - - if model == nil { - return fmt.Errorf("rwkv could not load model") - } - llm.rwkv = model - return nil -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - stopWord := "\n" - if len(opts.StopPrompts) > 0 { - stopWord = opts.StopPrompts[0] - } - - if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil { - return "", err - } - - response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil) - - return response, nil -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - - stopWord := "\n" - if len(opts.StopPrompts) > 0 { - stopWord = opts.StopPrompts[0] - } - - if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil { - fmt.Println("Error processing input: ", err) - return - } - - llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool { - results <- s - return true - }) - close(results) - }() - - return nil -} - -func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) { - tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt) - if err != nil { - return pb.TokenizationResponse{}, err - } - - l := len(tokens) - i32Tokens := make([]int32, l) - - for i, t := range tokens { - i32Tokens[i] = int32(t.ID) - } - - return pb.TokenizationResponse{ - Length: int32(l), - Tokens: i32Tokens, - }, nil -} diff --git a/go.mod b/go.mod index 109cd906754c..3bc625acaf69 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,6 @@ require ( github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b github.com/containerd/containerd v1.7.19 github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 - github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44 github.com/elliotchance/orderedmap/v2 v2.2.0 github.com/fsnotify/fsnotify v1.7.0 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index a5bedf79a7a6..dc8e84f7cf6e 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -46,7 +46,6 @@ const ( LLamaCPPGRPC = "llama-cpp-grpc" BertEmbeddingsBackend = "bert-embeddings" - RwkvBackend = "rwkv" WhisperBackend = "whisper" StableDiffusionBackend = "stablediffusion" TinyDreamBackend = "tinydream" diff --git a/tests/models_fixtures/rwkv.yaml b/tests/models_fixtures/rwkv.yaml index 3b47fa0a9a37..bf54394fd771 100644 --- a/tests/models_fixtures/rwkv.yaml +++ b/tests/models_fixtures/rwkv.yaml @@ -1,18 +1,23 @@ name: rwkv_test parameters: - model: rwkv + model: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf top_k: 80 temperature: 0.9 - max_tokens: 100 + max_tokens: 4098 top_p: 0.8 -context_size: 1024 -backend: "rwkv" -cutwords: -- "Bob:.*" +context_size: 4098 + roles: - user: "Bob:" - system: "Alice:" - assistant: "Alice:" + user: "User: " + system: "System: " + assistant: "Assistant: " + +stopwords: +- 'Assistant:' + template: - completion: rwkv_completion - chat: rwkv_chat \ No newline at end of file + chat: | + {{.Input}} + Assistant: + completion: | + {{.Input}} \ No newline at end of file diff --git a/tests/models_fixtures/rwkv_chat.tmpl b/tests/models_fixtures/rwkv_chat.tmpl deleted file mode 100644 index d2c0511eef26..000000000000 --- a/tests/models_fixtures/rwkv_chat.tmpl +++ /dev/null @@ -1,13 +0,0 @@ -The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob. - -Bob: Hello Alice, how are you doing? - -Alice: Hi Bob! Thanks, I'm fine. What about you? - -Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while? - -Alice: Not at all! I'm listening. - -{{.Input}} - -Alice: \ No newline at end of file diff --git a/tests/models_fixtures/rwkv_completion.tmpl b/tests/models_fixtures/rwkv_completion.tmpl deleted file mode 100644 index 8450377fd7df..000000000000 --- a/tests/models_fixtures/rwkv_completion.tmpl +++ /dev/null @@ -1 +0,0 @@ -Complete the following sentence: {{.Input}} \ No newline at end of file