From 9b46dcf00641c129df6a30853c5ada163adbf76e Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 26 Nov 2024 09:49:05 +0100
Subject: [PATCH 1/9] chore(model-gallery): :arrow_up: update checksum (#4261)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c7e82259ee21..b2008fdd86c7 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -678,8 +678,8 @@
       model: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
   files:
     - filename: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
-      sha256: 0b1c10da004ffd61b860c9058265e9bdb7f53c7be8e87feece8896d680f5b8be
       uri: huggingface://QuantFactory/Llama-Sentient-3.2-3B-Instruct-GGUF/Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
+      sha256: 3f855ce0522bfdc39fc826162ba6d89f15cc3740c5207da10e70baa3348b7812
 - &qwen25
   ## Qwen2.5
   name: "qwen2.5-14b-instruct"
@@ -3496,7 +3496,7 @@
     - https://huggingface.co/AIDC-AI/Marco-o1
     - https://huggingface.co/QuantFactory/Marco-o1-GGUF
   description: |
-     Marco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: "Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?"
+    Marco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: "Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?"
   overrides:
     parameters:
       model: Marco-o1.Q4_K_M.gguf

From eeb22317b519ac47cb8847d9ca706c723e416482 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Nov 2024 09:49:29 +0100
Subject: [PATCH 2/9] chore(deps): Bump dcarbone/install-yq-action from 1.3.0
 to 1.3.1 (#4253)

Bumps [dcarbone/install-yq-action](https://github.com/dcarbone/install-yq-action) from 1.3.0 to 1.3.1.
- [Release notes](https://github.com/dcarbone/install-yq-action/releases)
- [Commits](https://github.com/dcarbone/install-yq-action/compare/v1.3.0...v1.3.1)

---
updated-dependencies:
- dependency-name: dcarbone/install-yq-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/checksum_checker.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/checksum_checker.yaml b/.github/workflows/checksum_checker.yaml
index 27b0a8a9deeb..132443341edf 100644
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -23,7 +23,7 @@ jobs:
           sudo pip install --upgrade pip
           pip install huggingface_hub
       - name: 'Setup yq'
-        uses: dcarbone/install-yq-action@v1.3.0
+        uses: dcarbone/install-yq-action@v1.3.1
         with:
           version: 'v4.44.2'
           download-compressed: true

From 7492179c6711ce4ce44d6013216ac5e836a4ab64 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Nov 2024 09:50:24 +0100
Subject: [PATCH 3/9] chore(model): add llama-3.1_openscholar-8b to the gallery
 (#4262)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b2008fdd86c7..4f95cf4ed19d 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2165,6 +2165,20 @@
     - filename: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
       sha256: 9eaa08a4872a26f56fe34b27a99f7bd0d22ee2b2d1c84cfcde2091b5f61af5fa
       uri: huggingface://mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF/Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1_openscholar-8b"
+  urls:
+    - https://huggingface.co/OpenScholar/Llama-3.1_OpenScholar-8B
+    - https://huggingface.co/bartowski/Llama-3.1_OpenScholar-8B-GGUF
+  description: |
+    Llama-3.1_OpenScholar-8B is a fine-tuned 8B for scientific literature synthesis. The Llama-3.1_OpenScholar-8B us trained on the os-data dataset. Developed by: University of Washigton, Allen Institute for AI (AI2)
+  overrides:
+    parameters:
+      model: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
+      sha256: 54865fc86451959b495c494a51bb1806c8b62bf1415600f0da2966a8a1fe6c7d
+      uri: huggingface://bartowski/Llama-3.1_OpenScholar-8B-GGUF/Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "humanish-roleplay-llama-3.1-8b-i1"

From 404ca3cc2327758ab846a0f45087f906c823defc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Nov 2024 11:12:57 +0100
Subject: [PATCH 4/9] chore(deps): bump llama.cpp to
 `47f931c8f9a26c072d71224bc8013cc66ea9e445` (#4263)

chore(deps): bump llama.cpp to '47f931c8f9a26c072d71224bc8013cc66ea9e445'

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile                          | 2 +-
 backend/cpp/llama/grpc-server.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index be4971d621dc..bc54f319b924 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=cce5a9007572c6e9fa522296b77571d2e5071357
+CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index d21735daa396..7b75de5bd0cf 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -203,7 +203,7 @@ struct llama_client_slot
     std::string stopping_word;
 
     // sampling
-    struct common_sampler_params sparams;
+    struct common_params_sampling sparams;
     common_sampler *ctx_sampling = nullptr;
 
     int32_t ga_i = 0;   // group-attention state
@@ -662,7 +662,7 @@ struct llama_server_context
 
     bool launch_slot_with_data(llama_client_slot* &slot, json data) {
         slot_params default_params;
-        common_sampler_params default_sparams;
+        common_params_sampling default_sparams;
  
         slot->params.stream             = json_value(data, "stream",            false);
         slot->params.cache_prompt       = json_value(data, "cache_prompt",      false);

From f1b86d6e7ff32ac844d484be2e52746fd2f003de Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:22:03 +0100
Subject: [PATCH 5/9] Revert "chore(deps): Bump whisper-timestamped from 1.14.2
 to 1.15.8 in /backend/python/openvoice" (#4267)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert "chore(deps): Bump whisper-timestamped from 1.14.2 to 1.15.8 in /backe…"

This reverts commit 0f8f249465082cbcd61e7f985fd4e44a384817a5.
---
 backend/python/openvoice/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index b246285c0cf5..729c1a28f166 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -12,7 +12,7 @@ numpy==1.22.0
 eng_to_ipa==0.0.2
 inflect==7.0.0
 unidecode==1.3.7
-whisper-timestamped==1.15.8
+whisper-timestamped==1.14.2
 openai
 python-dotenv
 pypinyin==0.50.0

From 03800cccebe8f125801ad9dae1d2a2940ffb53ca Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:22:10 +0100
Subject: [PATCH 6/9] =?UTF-8?q?Revert=20"chore(deps):=20Bump=20faster-whis?=
 =?UTF-8?q?per=20from=200.9.0=20to=201.1.0=20in=20/back=E2=80=A6=20(#4268)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert "chore(deps): Bump faster-whisper from 0.9.0 to 1.1.0 in /backend/pyth…"

This reverts commit 6c8e870812724db2f8c62fc1cc5fbf8cbd5da51c.
---
 backend/python/openvoice/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index 729c1a28f166..d38351b1ed87 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -5,7 +5,7 @@ optimum[openvino]
 grpcio==1.68.0
 protobuf
 librosa==0.9.1
-faster-whisper==1.1.0
+faster-whisper==0.9.0
 pydub==0.25.1
 wavmark==0.0.3
 numpy==1.22.0

From 2b62260b6dcdcc19e3bf403255e17aa3bb6e1ff9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:22:55 +0100
Subject: [PATCH 7/9] feat(models): use rwkv from llama.cpp (#4264)

feat(rwkv): use rwkv from llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .gitignore                                 |  1 -
 Makefile                                   | 33 +-------
 README.md                                  |  1 -
 backend/cpp/llama/grpc-server.cpp          |  1 +
 backend/go/llm/rwkv/main.go                | 21 -----
 backend/go/llm/rwkv/rwkv.go                | 95 ----------------------
 go.mod                                     |  1 -
 pkg/model/initializers.go                  |  1 -
 tests/models_fixtures/rwkv.yaml            | 27 +++---
 tests/models_fixtures/rwkv_chat.tmpl       | 13 ---
 tests/models_fixtures/rwkv_completion.tmpl |  1 -
 11 files changed, 18 insertions(+), 177 deletions(-)
 delete mode 100644 backend/go/llm/rwkv/main.go
 delete mode 100644 backend/go/llm/rwkv/rwkv.go
 delete mode 100644 tests/models_fixtures/rwkv_chat.tmpl
 delete mode 100644 tests/models_fixtures/rwkv_completion.tmpl

diff --git a/.gitignore b/.gitignore
index 65eb92570f6f..9f31131f6602 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,6 @@ prepare-sources
 
 go-ggml-transformers
 go-gpt2
-go-rwkv
 whisper.cpp
 /bloomz
 go-bert
diff --git a/Makefile b/Makefile
index bc54f319b924..8a7c6143f8c2 100644
--- a/Makefile
+++ b/Makefile
@@ -10,10 +10,6 @@ GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
 CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
 
-# go-rwkv version
-RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
-RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
-
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
 WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
@@ -209,7 +205,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
-ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
 ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
@@ -272,20 +267,6 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 
-
-## RWKV
-sources/go-rwkv.cpp:
-	mkdir -p sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && \
-	git init && \
-	git remote add origin $(RWKV_REPO) && \
-	git fetch origin && \
-	git checkout $(RWKV_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
-
 ## stable diffusion
 sources/go-stable-diffusion:
 	mkdir -p sources/go-stable-diffusion
@@ -339,10 +320,9 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
 
 replace:
-	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
@@ -352,7 +332,6 @@ replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
 
 dropreplace:
-	$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
@@ -368,7 +347,6 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
-	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
 	$(MAKE) -C sources/go-bert.cpp clean
@@ -477,8 +455,6 @@ test-models/testmodel.ggml:
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
-	wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
-	wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
 	cp tests/models_fixtures/* test-models
 
 prepare-test: grpcs
@@ -855,13 +831,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/piper
 endif
 
-backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/rwkv
-endif
-
 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
diff --git a/README.md b/README.md
index 1e4b0c38fbe8..2fd89863ef89 100644
--- a/README.md
+++ b/README.md
@@ -241,7 +241,6 @@ LocalAI couldn't have been built without the help of great software already avai
 - https://github.com/antimatter15/alpaca.cpp
 - https://github.com/EdVince/Stable-Diffusion-NCNN
 - https://github.com/ggerganov/whisper.cpp
-- https://github.com/saharNooby/rwkv.cpp
 - https://github.com/rhasspy/piper
 
 ## 🤗 Contributors
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 7b75de5bd0cf..0fde74cbd3a6 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2299,6 +2299,7 @@ static void params_parse(const backend::ModelOptions* request,
     params.use_mmap = request->mmap();
     params.flash_attn = request->flashattention();
     params.no_kv_offload = request->nokvoffload();
+    params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
 
     params.embedding = request->embeddings();
 
diff --git a/backend/go/llm/rwkv/main.go b/backend/go/llm/rwkv/main.go
deleted file mode 100644
index acf4408799e1..000000000000
--- a/backend/go/llm/rwkv/main.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each model
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
-		panic(err)
-	}
-}
diff --git a/backend/go/llm/rwkv/rwkv.go b/backend/go/llm/rwkv/rwkv.go
deleted file mode 100644
index fe9cd8156af5..000000000000
--- a/backend/go/llm/rwkv/rwkv.go
+++ /dev/null
@@ -1,95 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-	"path/filepath"
-
-	"github.com/donomii/go-rwkv.cpp"
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-const tokenizerSuffix = ".tokenizer.json"
-
-type LLM struct {
-	base.SingleThread
-
-	rwkv *rwkv.RwkvState
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	tokenizerFile := opts.Tokenizer
-	if tokenizerFile == "" {
-		modelFile := filepath.Base(opts.ModelFile)
-		tokenizerFile = modelFile + tokenizerSuffix
-	}
-	modelPath := filepath.Dir(opts.ModelFile)
-	tokenizerPath := filepath.Join(modelPath, tokenizerFile)
-
-	model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
-
-	if model == nil {
-		return fmt.Errorf("rwkv could not load model")
-	}
-	llm.rwkv = model
-	return nil
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	stopWord := "\n"
-	if len(opts.StopPrompts) > 0 {
-		stopWord = opts.StopPrompts[0]
-	}
-
-	if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
-		return "", err
-	}
-
-	response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
-
-	return response, nil
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	go func() {
-
-		stopWord := "\n"
-		if len(opts.StopPrompts) > 0 {
-			stopWord = opts.StopPrompts[0]
-		}
-
-		if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
-			fmt.Println("Error processing input: ", err)
-			return
-		}
-
-		llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
-			results <- s
-			return true
-		})
-		close(results)
-	}()
-
-	return nil
-}
-
-func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
-	tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
-	if err != nil {
-		return pb.TokenizationResponse{}, err
-	}
-
-	l := len(tokens)
-	i32Tokens := make([]int32, l)
-
-	for i, t := range tokens {
-		i32Tokens[i] = int32(t.ID)
-	}
-
-	return pb.TokenizationResponse{
-		Length: int32(l),
-		Tokens: i32Tokens,
-	}, nil
-}
diff --git a/go.mod b/go.mod
index 109cd906754c..3bc625acaf69 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,6 @@ require (
 	github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b
 	github.com/containerd/containerd v1.7.19
 	github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
-	github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44
 	github.com/elliotchance/orderedmap/v2 v2.2.0
 	github.com/fsnotify/fsnotify v1.7.0
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index a5bedf79a7a6..dc8e84f7cf6e 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -46,7 +46,6 @@ const (
 	LLamaCPPGRPC = "llama-cpp-grpc"
 
 	BertEmbeddingsBackend  = "bert-embeddings"
-	RwkvBackend            = "rwkv"
 	WhisperBackend         = "whisper"
 	StableDiffusionBackend = "stablediffusion"
 	TinyDreamBackend       = "tinydream"
diff --git a/tests/models_fixtures/rwkv.yaml b/tests/models_fixtures/rwkv.yaml
index 3b47fa0a9a37..bf54394fd771 100644
--- a/tests/models_fixtures/rwkv.yaml
+++ b/tests/models_fixtures/rwkv.yaml
@@ -1,18 +1,23 @@
 name: rwkv_test
 parameters:
-  model: rwkv
+  model: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
   top_k: 80
   temperature: 0.9
-  max_tokens: 100
+  max_tokens: 4098
   top_p: 0.8
-context_size: 1024
-backend: "rwkv"
-cutwords:
-- "Bob:.*"
+context_size: 4098
+
 roles:
-  user: "Bob:"
-  system: "Alice:"
-  assistant: "Alice:"
+  user: "User: "
+  system: "System: "
+  assistant: "Assistant: "
+
+stopwords:
+- 'Assistant:'
+
 template:
-  completion: rwkv_completion
-  chat: rwkv_chat
\ No newline at end of file
+  chat: |
+    {{.Input}}
+    Assistant: 
+  completion: |
+    {{.Input}}
\ No newline at end of file
diff --git a/tests/models_fixtures/rwkv_chat.tmpl b/tests/models_fixtures/rwkv_chat.tmpl
deleted file mode 100644
index d2c0511eef26..000000000000
--- a/tests/models_fixtures/rwkv_chat.tmpl
+++ /dev/null
@@ -1,13 +0,0 @@
-The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
-
-Bob: Hello Alice, how are you doing?
-
-Alice: Hi Bob! Thanks, I'm fine. What about you?
-
-Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
-
-Alice: Not at all! I'm listening.
-
-{{.Input}}
-
-Alice: 
\ No newline at end of file
diff --git a/tests/models_fixtures/rwkv_completion.tmpl b/tests/models_fixtures/rwkv_completion.tmpl
deleted file mode 100644
index 8450377fd7df..000000000000
--- a/tests/models_fixtures/rwkv_completion.tmpl
+++ /dev/null
@@ -1 +0,0 @@
-Complete the following sentence: {{.Input}} 
\ No newline at end of file

From 369110e6bfe6cceb8e2ffee2883c8fdd0bb5723d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:51:37 +0100
Subject: [PATCH 8/9] chore(model): add rwkv-6-world-7b to the gallery (#4270)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 gallery/rwkv.yaml  | 23 +++++++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 gallery/rwkv.yaml

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4f95cf4ed19d..bdd952c82750 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,27 @@
 ---
+- &rwkv
+  url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
+  name: "rwkv-6-world-7b"
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/RWKV/rwkv-6-world-7b
+    - https://huggingface.co/bartowski/rwkv-6-world-7b-GGUF
+  tags:
+    - llm
+    - rwkv
+    - cpu
+    - gpu
+    - rnn
+  description: |
+    RWKV (pronounced RwaKuv) is an RNN with GPT-level LLM performance, and can also be directly trained like a GPT transformer (parallelizable). We are at RWKV-7.
+    So it's combining the best of RNN and transformer - great performance, fast inference, fast training, saves VRAM, "infinite" ctxlen, and free text embedding. Moreover it's 100% attention-free, and a Linux Foundation AI project.
+  overrides:
+    parameters:
+      model: rwkv-6-world-7b-Q4_K_M.gguf
+  files:
+    - filename: rwkv-6-world-7b-Q4_K_M.gguf
+      sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273
+      uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
 - &qwen25coder
   name: "qwen2.5-coder-14b"
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
diff --git a/gallery/rwkv.yaml b/gallery/rwkv.yaml
new file mode 100644
index 000000000000..41dfcfad4071
--- /dev/null
+++ b/gallery/rwkv.yaml
@@ -0,0 +1,23 @@
+---
+name: "rwkv"
+
+config_file: |
+    parameters:
+      top_k: 80
+      temperature: 0.9
+      max_tokens: 4098
+      top_p: 0.8
+    context_size: 4098
+
+    roles:
+      user: "User: "
+      system: "System: "
+      assistant: "Assistant: "
+
+    stopwords:
+    - 'Assistant:'
+
+    template:
+      chat: "{{.Input}}\nAssistant: "
+      completion: |
+        {{.Input}}

From e8128a339a4f5534d3bd0efcf2b1202c3e927fa2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:51:55 +0100
Subject: [PATCH 9/9] chore(scripts): handle summarization errors (#4271)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 scripts/model_gallery_info.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/model_gallery_info.py b/scripts/model_gallery_info.py
index 7df537017f66..ee2580377688 100644
--- a/scripts/model_gallery_info.py
+++ b/scripts/model_gallery_info.py
@@ -103,7 +103,10 @@ def format_description(description):
     if readmeFile:
         # If there is a README file, read it
         readme = fs.read_text(readmeFile)
-        summarized_readme = summarize(readme)
+        try:
+            summarized_readme = summarize(readme)
+        except Exception as e:
+            print(f"Error summarizing the README: {str(e)}", file=sys.stderr)            
         summarized_readme = format_description(summarized_readme)
 
     print("Model correctly processed")