From 4dd9ac39b079b3b22a8b37f89e3a1780ba4a2616 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 23 Dec 2024 12:34:39 +0100
Subject: [PATCH 01/28] chore(ci): comment arm64 job until we find a native CI
 runner (#4452)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/image.yml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 400bcdc743dd..e806f123f143 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -362,16 +362,16 @@ jobs:
             base-image: "ubuntu:22.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "0"
-            platforms: 'linux/arm64'
-            tag-latest: 'false'
-            tag-suffix: '-nvidia-l4t-arm64-core'
-            latest-image: 'latest-nvidia-l4t-arm64-core'
-            ffmpeg: 'true'
-            image-type: 'core'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
-            runs-on: 'arc-runner-set'
-            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'true'
\ No newline at end of file
+          # - build-type: 'cublas'
+          #   cuda-major-version: "12"
+          #   cuda-minor-version: "0"
+          #   platforms: 'linux/arm64'
+          #   tag-latest: 'false'
+          #   tag-suffix: '-nvidia-l4t-arm64-core'
+          #   latest-image: 'latest-nvidia-l4t-arm64-core'
+          #   ffmpeg: 'true'
+          #   image-type: 'core'
+          #   base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+          #   runs-on: 'arc-runner-set'
+          #   makeflags: "--jobs=4 --output-sync=target"
+          #   skip-drivers: 'true'
\ No newline at end of file

From 138cd97ce7f599cab64331f0d1a743b38b32f587 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 23 Dec 2024 15:37:57 +0100
Subject: [PATCH 02/28] chore(ci): try to add CirrusCI to build arm64 images
 natively

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .cirrus.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .cirrus.yml

diff --git a/.cirrus.yml b/.cirrus.yml
new file mode 100644
index 000000000000..595bd38671ec
--- /dev/null
+++ b/.cirrus.yml
@@ -0,0 +1,24 @@
+## This builds the docker image for L4T with the arm64 architecture
+docker_builder:
+  only_if: $CIRRUS_TAG != '' || $CIRRUS_BRANCH != ''
+  env:
+    CIRRUS_ARCH: arm64
+    IMAGE_NAME: localai/localai
+    DOCKER_USERNAME: ENCRYPTED[!944ba150d3a7a27f3a9b7d3cc0f481989907ddeea9391a345ec751b70de1620f064cce952491a2fe2b434b1b9e0270b2!]
+    DOCKER_PASSWORD: ENCRYPTED[!2c9b4746de765859c11df043adafe1247876c7d535edc439eac66d8c1e88029b96e67a8023c36dfec30ee78ca5109089!]
+  build_script: |
+    BUILD_ARGS="--build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core"
+    if [ -n "$CIRRUS_TAG" ]; then
+      docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core .
+      docker tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core $IMAGE_NAME:latest-nvidia-l4t-arm64-core
+    else
+      docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core .
+    fi
+  login_script: echo $DOCKER_PASSWORD | docker login -u $DOCKER_USERNAME --password-stdin
+  push_script: |
+    if [ -n "$CIRRUS_TAG" ]; then
+      docker push $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core
+      docker push $IMAGE_NAME:latest-nvidia-l4t-arm64-core
+    else
+      docker push $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core
+    fi
\ No newline at end of file

From 6477913e8f7d9e3c83184bcd80a32dc1ce2bd8e4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 23 Dec 2024 16:43:32 +0100
Subject: [PATCH 03/28] chore(ci): increase task timeout

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .cirrus.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 595bd38671ec..e9fe2db2f78c 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,6 +1,7 @@
 ## This builds the docker image for L4T with the arm64 architecture
 docker_builder:
   only_if: $CIRRUS_TAG != '' || $CIRRUS_BRANCH != ''
+  timeout_in: 120m
   env:
     CIRRUS_ARCH: arm64
     IMAGE_NAME: localai/localai
@@ -21,4 +22,4 @@ docker_builder:
       docker push $IMAGE_NAME:latest-nvidia-l4t-arm64-core
     else
       docker push $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core
-    fi
\ No newline at end of file
+    fi

From a3b675b09ed47ec2dd0f3f373e6dab9d03f9ab99 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 23 Dec 2024 18:31:50 +0100
Subject: [PATCH 04/28] Delete .cirrus.yml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .cirrus.yml | 25 -------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 .cirrus.yml

diff --git a/.cirrus.yml b/.cirrus.yml
deleted file mode 100644
index e9fe2db2f78c..000000000000
--- a/.cirrus.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-## This builds the docker image for L4T with the arm64 architecture
-docker_builder:
-  only_if: $CIRRUS_TAG != '' || $CIRRUS_BRANCH != ''
-  timeout_in: 120m
-  env:
-    CIRRUS_ARCH: arm64
-    IMAGE_NAME: localai/localai
-    DOCKER_USERNAME: ENCRYPTED[!944ba150d3a7a27f3a9b7d3cc0f481989907ddeea9391a345ec751b70de1620f064cce952491a2fe2b434b1b9e0270b2!]
-    DOCKER_PASSWORD: ENCRYPTED[!2c9b4746de765859c11df043adafe1247876c7d535edc439eac66d8c1e88029b96e67a8023c36dfec30ee78ca5109089!]
-  build_script: |
-    BUILD_ARGS="--build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core"
-    if [ -n "$CIRRUS_TAG" ]; then
-      docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core .
-      docker tag $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core $IMAGE_NAME:latest-nvidia-l4t-arm64-core
-    else
-      docker build $BUILD_ARGS --tag $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core .
-    fi
-  login_script: echo $DOCKER_PASSWORD | docker login -u $DOCKER_USERNAME --password-stdin
-  push_script: |
-    if [ -n "$CIRRUS_TAG" ]; then
-      docker push $IMAGE_NAME:$CIRRUS_TAG-nvidia-l4t-arm64-core
-      docker push $IMAGE_NAME:latest-nvidia-l4t-arm64-core
-    else
-      docker push $IMAGE_NAME:$CIRRUS_BRANCH-nvidia-l4t-arm64-core
-    fi

From cab9f88ca4acf42be989fd808ace6fdec12c820d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 23 Dec 2024 18:59:33 +0100
Subject: [PATCH 05/28] chore(docs): add nvidia l4t instructions (#4454)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/docs/reference/nvidia-l4t.md | 35 +++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 docs/content/docs/reference/nvidia-l4t.md

diff --git a/docs/content/docs/reference/nvidia-l4t.md b/docs/content/docs/reference/nvidia-l4t.md
new file mode 100644
index 000000000000..028ee5318fef
--- /dev/null
+++ b/docs/content/docs/reference/nvidia-l4t.md
@@ -0,0 +1,35 @@
+
++++
+disableToc = false
+title = "Running on Nvidia ARM64"
+weight = 27
++++
+
+LocalAI can be run on Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. The following instructions will guide you through building the LocalAI container for Nvidia ARM64 devices.
+
+## Prerequisites
+
+- Docker engine installed (https://docs.docker.com/engine/install/ubuntu/)
+- Nvidia container toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap)
+
+## Build the container
+
+Build the LocalAI container for Nvidia ARM64 devices using the following command:
+
+```bash
+git clone https://github.com/mudler/LocalAI
+
+cd LocalAI
+
+docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t localai-orin .
+```
+
+## Usage
+
+Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
+
+```bash
+docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all localai-orin
+```
+
+Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.

From 0eb2911aad47d5e5c2c68c02f12abfe7456f3027 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 23 Dec 2024 19:11:31 +0100
Subject: [PATCH 06/28] chore(llava): update clip.patch (#4453)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/cpp/llama/patches/01-llava.patch | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/backend/cpp/llama/patches/01-llava.patch b/backend/cpp/llama/patches/01-llava.patch
index fa122da257cd..77124628c83b 100644
--- a/backend/cpp/llama/patches/01-llava.patch
+++ b/backend/cpp/llama/patches/01-llava.patch
@@ -1,13 +1,13 @@
 diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
-index 342042ff..224db9b5 100644
+index 3cd0d2fa..6c5e811a 100644
 --- a/examples/llava/clip.cpp
 +++ b/examples/llava/clip.cpp
-@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
-             struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
-             int* patches_data = (int*)malloc(ggml_nbytes(patches));
-             for (int i = 0; i < num_patches; i++) {
--                patches_data[i] = i + 1;
-+                patches_data[i] = i;
-             }
-             ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
-             free(patches_data);
\ No newline at end of file
+@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
+                 struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
+                 int* patches_data = (int*)malloc(ggml_nbytes(patches));
+                 for (int i = 0; i < num_patches; i++) {
+-                    patches_data[i] = i + 1;
++                    patches_data[i] = i;
+                 }
+                 ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
+                 free(patches_data);
\ No newline at end of file

From 159a7f6df2140305261dccb499e0e87690ae4d18 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Dec 2024 22:15:38 +0000
Subject: [PATCH 07/28] chore(deps): Bump docs/themes/hugo-theme-relearn from
 `bd1f3d3` to `ec88e24` (#4460)

chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `bd1f3d3` to `ec88e24`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/bd1f3d3432632c61bb12e7ec0f7673fed0289f19...ec88e24f46955bcf1aa3f38ac143982eff08d8a6)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/themes/hugo-theme-relearn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index bd1f3d343263..ec88e24f4695 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit bd1f3d3432632c61bb12e7ec0f7673fed0289f19
+Subproject commit ec88e24f46955bcf1aa3f38ac143982eff08d8a6

From d0adbee75d4df4b67b2208c7024fc94d6a194d4c Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 24 Dec 2024 10:55:30 +0100
Subject: [PATCH 08/28] chore: :arrow_up: Update ggerganov/llama.cpp to
 `32d6ee6385b3fc908b283f509b845f757a6e7206` (#4486)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c4e7b892d40e..682d7e63ce68 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=ebdee9478ca7ba65497b9b96f7457698c6ee5115
+CPPLLAMA_VERSION?=32d6ee6385b3fc908b283f509b845f757a6e7206
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From c7c275c7c83aecaefc91754627e8aedf29e9814f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 24 Dec 2024 10:56:02 +0100
Subject: [PATCH 09/28] chore(model-gallery): :arrow_up: update checksum
 (#4487)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 0855ae7aa6b3..4984f783d9f1 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -15,8 +15,8 @@
       model: Falcon3-1B-Instruct-Q4_K_M.gguf
   files:
     - filename: Falcon3-1B-Instruct-Q4_K_M.gguf
-      sha256: d351a6506b7d21221f3858b04d98c8b1b7b108b85acde2b13b69d9cb06e2a7e9
       uri: huggingface://bartowski/Falcon3-1B-Instruct-GGUF/Falcon3-1B-Instruct-Q4_K_M.gguf
+      sha256: 1c92013dac1ab6e703e787f3e0829ca03cc95311e4c113a77950d15ff6dea7b3
   tags:
     - llm
     - gguf
@@ -34,8 +34,8 @@
       model: Falcon3-3B-Instruct-Q4_K_M.gguf
   files:
     - filename: Falcon3-3B-Instruct-Q4_K_M.gguf
-      sha256: e6d81653ee28c6944e4f9ab626882faabb69db8019ddcf87f2732d05f3d9158a
       uri: huggingface://bartowski/Falcon3-3B-Instruct-GGUF/Falcon3-3B-Instruct-Q4_K_M.gguf
+      sha256: 6ea6cecba144fe5b711ca07ae4263ccdf6ee6419807a46220419189da8446557
 - !!merge <<: *falcon3
   name: "falcon3-10b-instruct"
   urls:
@@ -46,8 +46,8 @@
       model: Falcon3-10B-Instruct-Q4_K_M.gguf
   files:
     - filename: Falcon3-10B-Instruct-Q4_K_M.gguf
-      sha256: 6d54a35d740a616061d6c7d7740d64f4339410e58aaba985aa9e1ea79c7e882a
       uri: huggingface://bartowski/Falcon3-10B-Instruct-GGUF/Falcon3-10B-Instruct-Q4_K_M.gguf
+      sha256: 0a33327bd71e1788a8e9f17889824a17a65efd3f96a4b2a5e2bc6ff2f39b8241
 - &intellect1
   name: "intellect-1-instruct"
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
@@ -915,8 +915,8 @@
       model: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf
   files:
     - filename: Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf
-      sha256: 1a0f0aef16e5be46be827c430cbe5ce4b2915b2f4a1dd60b98792004d39b9f52
       uri: huggingface://bartowski/Llama-Song-Stream-3B-Instruct-GGUF/Llama-Song-Stream-3B-Instruct-Q4_K_M.gguf
+      sha256: 62e4a79eb7a0f80184dc37ab01a5490708e600dad5f074de8bcda6ec5a77cca8
 - !!merge <<: *llama32
   name: "llama-chat-summary-3.2-3b"
   urls:
@@ -1932,19 +1932,7 @@
   urls:
     - https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerCreative-Mix
     - https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerCreative-Mix-GGUF
-  description: |
-    ZeroXClem/Qwen2.5-7B-HomerCreative-Mix is an advanced language model meticulously crafted by merging four pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, and the foundational conversational strengths of Homer-v0.5-Qwen2.5-7B. The resulting model excels in creative text generation, contextual understanding, and dynamic conversational interactions.
-    🚀 Merged Models
-
-    This model merge incorporates the following:
-
-        bunnycore/Qandora-2.5-7B-Creative: Specializes in creative text generation, enhancing the model's ability to produce imaginative and diverse content.
-
-        bunnycore/Qwen2.5-7B-Instruct-Fusion: Focuses on instruction-following capabilities, improving the model's performance in understanding and executing user commands.
-
-        allknowingroger/HomerSlerp1-7B: Utilizes spherical linear interpolation (SLERP) to blend model weights smoothly, ensuring a harmonious integration of different model attributes.
-
-        newsbang/Homer-v0.5-Qwen2.5-7B: Acts as the foundational conversational model, providing robust language comprehension and generation capabilities.
+  description: "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix is an advanced language model meticulously crafted by merging four pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, and the foundational conversational strengths of Homer-v0.5-Qwen2.5-7B. The resulting model excels in creative text generation, contextual understanding, and dynamic conversational interactions.\n\U0001F680 Merged Models\n\nThis model merge incorporates the following:\n\n    bunnycore/Qandora-2.5-7B-Creative: Specializes in creative text generation, enhancing the model's ability to produce imaginative and diverse content.\n\n    bunnycore/Qwen2.5-7B-Instruct-Fusion: Focuses on instruction-following capabilities, improving the model's performance in understanding and executing user commands.\n\n    allknowingroger/HomerSlerp1-7B: Utilizes spherical linear interpolation (SLERP) to blend model weights smoothly, ensuring a harmonious integration of different model attributes.\n\n    newsbang/Homer-v0.5-Qwen2.5-7B: Acts as the foundational conversational model, providing robust language comprehension and generation capabilities.\n"
   overrides:
     parameters:
       model: Qwen2.5-7B-HomerCreative-Mix.Q4_K_M.gguf
@@ -9960,7 +9948,7 @@
 - !!merge <<: *llama32
   name: "bert-embeddings"
   description: |
-      llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
+    llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
   tags:
     - embeddings
   overrides:

From 6c716982996fcccb5f7f12aa72ecbf72cfea4a46 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 24 Dec 2024 10:59:36 +0100
Subject: [PATCH 10/28] chore(model gallery): add l3.3-ms-evalebis-70b (#4488)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4984f783d9f1..7c8b1ca30cf6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -160,6 +160,21 @@
     - filename: Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf
       sha256: 090b2288810c5f6f680ff5cb4bc97665393d115c011fcd54dca6aec02e74a983
       uri: huggingface://bartowski/Llama-3.3-70B-Instruct-ablated-GGUF/Llama-3.3-70B-Instruct-ablated-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-ms-evalebis-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/e49ykknqXee3Ihr-3BIl_.png
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-MS-Evalebis-70b
+    - https://huggingface.co/bartowski/L3.3-MS-Evalebis-70b-GGUF
+  description: |
+    This model was created as I liked the storytelling of EVA, the prose and details of scenes from EURYALE and Anubis, my goal is to merge the robust storytelling of all three models while attempting to maintain the positives of the models.
+  overrides:
+    parameters:
+      model: L3.3-MS-Evalebis-70b-Q4_K_M.gguf
+  files:
+    - filename: L3.3-MS-Evalebis-70b-Q4_K_M.gguf
+      sha256: 5515110ab6a583f6eb360533e3c5b3dda6d402af407c0b0f2b34a2a57b5224d5
+      uri: huggingface://bartowski/L3.3-MS-Evalebis-70b-GGUF/L3.3-MS-Evalebis-70b-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From d65c10cee7120a3706502c6990b287273cf31240 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 24 Dec 2024 11:04:19 +0100
Subject: [PATCH 11/28] chore(model gallery): add tqwendo-36b (#4489)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 7c8b1ca30cf6..8191fd27758a 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2313,6 +2313,23 @@
     - filename: QwQ-LCoT-7B-Instruct-Q4_K_M.gguf
       sha256: 1df2e4ff0093a9632687b73969153442776b0ffc1c3c68e7f559472f9cea1945
       uri: huggingface://bartowski/QwQ-LCoT-7B-Instruct-GGUF/QwQ-LCoT-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tqwendo-36b"
+  icon: "https://cdn-uploads.huggingface.co/production/uploads/6379683a81c1783a4a2ddba8/DI7Yw8Fs8eukluzKTHjEH.png"
+  urls:
+    - https://huggingface.co/nisten/tqwendo-36b
+    - https://huggingface.co/bartowski/tqwendo-36b-GGUF
+  description: |
+    There is a draft model to go with this one for speculative decoding and chain of thought reasoning: https://huggingface.co/nisten/qwen2.5-coder-7b-abliterated-128k-AWQ
+
+    Using the above 4bit 7b in conjuction with the 36b is meant to setup a chain-of-thought reasoner, evaluator similar to what O1-O3 is probably doing. This way the 7b 4bit only uses up an extra 4-6Gb on the gpu, but greatly both speeds up speculative decoding AND also chain-of-throught evals.
+  overrides:
+    parameters:
+      model: tqwendo-36b-Q4_K_M.gguf
+  files:
+    - filename: tqwendo-36b-Q4_K_M.gguf
+      sha256: 890ff05fb717c67848d5c02ad62b2c26fdcdd20f7cc94ade8095869784c0cc82
+      uri: huggingface://bartowski/tqwendo-36b-GGUF/tqwendo-36b-Q4_K_M.gguf
 - &smollm
   ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"

From 60bf7c9dd7c15c8b9083cec6cf96a31187e8f8c8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 24 Dec 2024 11:04:31 +0100
Subject: [PATCH 12/28] chore(model gallery): add rombos-llm-70b-llama-3.3
 (#4490)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 8191fd27758a..c0a48f127ee0 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -198,6 +198,24 @@
     - filename: rwkv-6-world-7b-Q4_K_M.gguf
       sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273
       uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "rombos-llm-70b-llama-3.3"
+  icon: "https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg"
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-LLM-70b-Llama-3.3
+    - https://huggingface.co/bartowski/Rombos-LLM-70b-Llama-3.3-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    You know the drill by now.
+    Here is the paper. Have fun.
+    https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  overrides:
+    parameters:
+      model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+  files:
+    - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+      sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577
+      uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
 - &qwen25coder
   name: "qwen2.5-coder-14b"
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"

From b9690537010c150e19a711e176c98feda656102a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 24 Dec 2024 11:10:56 +0100
Subject: [PATCH 13/28] chore(gallery): re-order

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/index.yaml | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c0a48f127ee0..60333a704a04 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -175,6 +175,24 @@
     - filename: L3.3-MS-Evalebis-70b-Q4_K_M.gguf
       sha256: 5515110ab6a583f6eb360533e3c5b3dda6d402af407c0b0f2b34a2a57b5224d5
       uri: huggingface://bartowski/L3.3-MS-Evalebis-70b-GGUF/L3.3-MS-Evalebis-70b-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "rombos-llm-70b-llama-3.3"
+  icon: "https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg"
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-LLM-70b-Llama-3.3
+    - https://huggingface.co/bartowski/Rombos-LLM-70b-Llama-3.3-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    You know the drill by now.
+    Here is the paper. Have fun.
+    https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  overrides:
+    parameters:
+      model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+  files:
+    - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+      sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577
+      uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"
@@ -198,24 +216,6 @@
     - filename: rwkv-6-world-7b-Q4_K_M.gguf
       sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273
       uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
-- !!merge <<: *llama33
-  name: "rombos-llm-70b-llama-3.3"
-  icon: "https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg"
-  urls:
-    - https://huggingface.co/rombodawg/Rombos-LLM-70b-Llama-3.3
-    - https://huggingface.co/bartowski/Rombos-LLM-70b-Llama-3.3-GGUF
-    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
-  description: |
-    You know the drill by now.
-    Here is the paper. Have fun.
-    https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
-  overrides:
-    parameters:
-      model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
-  files:
-    - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
-      sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577
-      uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
 - &qwen25coder
   name: "qwen2.5-coder-14b"
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"

From bc8dd3ad14a64464567d8a4fc1e766fdfb1cbd99 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 24 Dec 2024 22:44:11 +0100
Subject: [PATCH 14/28] chore: :arrow_up: Update ggerganov/llama.cpp to
 `2cd43f4900ba0e34124fdcbf02a7f9df25a10a3d` (#4491)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 682d7e63ce68..142280949dbf 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=32d6ee6385b3fc908b283f509b845f757a6e7206
+CPPLLAMA_VERSION?=2cd43f4900ba0e34124fdcbf02a7f9df25a10a3d
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 1d630e41859f392a9d98149ec16b5d153bd3b9da Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 24 Dec 2024 23:04:50 +0100
Subject: [PATCH 15/28] chore(model-gallery): :arrow_up: update checksum
 (#4492)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 60333a704a04..d1a055b36db2 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -191,8 +191,8 @@
       model: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
   files:
     - filename: Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
-      sha256: 5f735b704cccf4db030b8b0f118eae6e7718bd6b50c53272dabcf48c89a37577
       uri: huggingface://bartowski/Rombos-LLM-70b-Llama-3.3-GGUF/Rombos-LLM-70b-Llama-3.3-Q4_K_M.gguf
+      sha256: 613008b960f6fff346b5dec71a87cd7ecdaff205bfea6332bd8fe2bb46177352
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From 816ae7a53a00b881c9d8fc68efe2b60d5dd8b82d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 25 Dec 2024 11:47:11 +0100
Subject: [PATCH 16/28] chore(model gallery): add fastllama-3.2-1b-instruct
 (#4493)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d1a055b36db2..78df4904a862 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -964,6 +964,21 @@
     - filename: Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf
       sha256: ed1be20d2374aa6db9940923f41fa229bd7ebe13d41b1ff1ff18a6f87e99df79
       uri: huggingface://bartowski/Llama-Chat-Summary-3.2-3B-GGUF/Llama-Chat-Summary-3.2-3B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "fastllama-3.2-1b-instruct"
+  icon: https://huggingface.co/suayptalha/FastLlama-3.2-1B-Instruct/resolve/main/FastLlama.png
+  urls:
+    - https://huggingface.co/suayptalha/FastLlama-3.2-1B-Instruct
+    - https://huggingface.co/bartowski/FastLlama-3.2-1B-Instruct-GGUF
+  description: |
+    FastLlama is a highly optimized version of the Llama-3.2-1B-Instruct model. Designed for superior performance in constrained environments, it combines speed, compactness, and high accuracy. This version has been fine-tuned using the MetaMathQA-50k section of the HuggingFaceTB/smoltalk dataset to enhance its mathematical reasoning and problem-solving abilities.
+  overrides:
+    parameters:
+      model: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: FastLlama-3.2-1B-Instruct-Q4_K_M.gguf
+      sha256: 3c0303e9560c441a9abdcd0e4c04c47e7f6b21277c1e8c00eed94fc656da0be9
+      uri: huggingface://bartowski/FastLlama-3.2-1B-Instruct-GGUF/FastLlama-3.2-1B-Instruct-Q4_K_M.gguf
 - &qwen25
   ## Qwen2.5
   name: "qwen2.5-14b-instruct"

From 8f6332ab23472b4591248a2717c263ea5b389482 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 25 Dec 2024 11:47:22 +0100
Subject: [PATCH 17/28] chore(model gallery): add
 dans-personalityengine-v1.1.0-12b (#4494)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 78df4904a862..0eb95b7b96cd 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5083,6 +5083,21 @@
     - filename: NaturalLM-7B-Instruct-Q4_K_M.gguf
       sha256: 15b2f34116f690fea35790a9392b8a2190fe25827e370d426e88a2a543f4dcee
       uri: huggingface://bartowski/NaturalLM-7B-Instruct-GGUF/NaturalLM-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "dans-personalityengine-v1.1.0-12b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/PocketDoc/Dans-PersonalityEngine-V1.1.0-12b
+    - https://huggingface.co/bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF
+  description: |
+    This model series is intended to be multifarious in its capabilities and should be quite capable at both co-writing and roleplay as well as find itself quite at home performing sentiment analysis or summarization as part of a pipeline. It has been trained on a wide array of one shot instructions, multi turn instructions, tool use, role playing scenarios, text adventure games, co-writing, and much more.
+  overrides:
+    parameters:
+      model: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf
+  files:
+    - filename: Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf
+      sha256: a1afb9fddfa3f2847ed710cc374b4f17e63a75f7e10d8871cf83983c2f5415ab
+      uri: huggingface://bartowski/Dans-PersonalityEngine-V1.1.0-12b-GGUF/Dans-PersonalityEngine-V1.1.0-12b-Q4_K_M.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"

From 1a74af1492709173ed347cbde9ad53698b9922ac Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 25 Dec 2024 11:47:33 +0100
Subject: [PATCH 18/28] chore(model gallery): add llama-3.1-8b-open-sft (#4495)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 0eb95b7b96cd..2a765c37a7f5 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -4193,6 +4193,20 @@
     - filename: orca_mini_v8_1_70b-Q4_K_M.gguf
       sha256: 97627730b028d4d7a349ae0b8e219207163ec425e4e1c057e445b2a66b61fdfa
       uri: huggingface://bartowski/orca_mini_v8_1_70b-GGUF/orca_mini_v8_1_70b-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-open-sft"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-3.1-8B-Open-SFT
+    - https://huggingface.co/bartowski/Llama-3.1-8B-Open-SFT-GGUF
+  description: |
+    The Llama-3.1-8B-Open-SFT model is a fine-tuned version of meta-llama/Llama-3.1-8B-Instruct, designed for advanced text generation tasks, including conversational interactions, question answering, and chain-of-thought reasoning. This model leverages Supervised Fine-Tuning (SFT) using the O1-OPEN/OpenO1-SFT dataset to provide enhanced performance in context-sensitive and instruction-following tasks.
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-Open-SFT-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-Open-SFT-Q4_K_M.gguf
+      sha256: ce75152763c48c5386fe59652cc921aae456da36ab82af3d9e2080f603f45132
+      uri: huggingface://bartowski/Llama-3.1-8B-Open-SFT-GGUF/Llama-3.1-8B-Open-SFT-Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 5c29e0cd4d1c79da4b6e0416f16384478c1f5f44 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 25 Dec 2024 22:43:06 +0100
Subject: [PATCH 19/28] chore: :arrow_up: Update ggerganov/llama.cpp to
 `9ba399dfa7f115effc63d48e6860a94c9faa31b2` (#4496)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 142280949dbf..16f7a6b08a64 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=2cd43f4900ba0e34124fdcbf02a7f9df25a10a3d
+CPPLLAMA_VERSION?=9ba399dfa7f115effc63d48e6860a94c9faa31b2
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 1a14c7d45a770ea1a4a0dc9af37a61ae4f48e373 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 26 Dec 2024 10:47:54 +0100
Subject: [PATCH 20/28] chore(model gallery): add qvq-72b-preview (#4498)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 2a765c37a7f5..df4f03668084 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2363,6 +2363,25 @@
     - filename: tqwendo-36b-Q4_K_M.gguf
       sha256: 890ff05fb717c67848d5c02ad62b2c26fdcdd20f7cc94ade8095869784c0cc82
       uri: huggingface://bartowski/tqwendo-36b-GGUF/tqwendo-36b-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qvq-72b-preview"
+  urls:
+    - https://huggingface.co/Qwen/QVQ-72B-Preview
+    - https://huggingface.co/bartowski/QVQ-72B-Preview-GGUF
+  description: |
+    QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.
+    QVQ-72B-Preview has achieved remarkable performance on various benchmarks. It scored a remarkable 70.3% on the Multimodal Massive Multi-task Understanding (MMMU) benchmark, showcasing QVQ's powerful ability in multidisciplinary understanding and reasoning. Furthermore, the significant improvements on MathVision highlight the model's progress in mathematical reasoning tasks. OlympiadBench also demonstrates the model's enhanced ability to tackle challenging problems.
+  overrides:
+    mmproj: mmproj-QVQ-72B-Preview-f16.gguf
+    parameters:
+      model: QVQ-72B-Preview-Q4_K_M.gguf
+  files:
+    - filename: QVQ-72B-Preview-Q4_K_M.gguf
+      sha256: 0fab6809995614c19e4b4c23e3191824944a04999f742486278f0d9929dc82ae
+      uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/QVQ-72B-Preview-Q4_K_M.gguf
+    - filename: mmproj-QVQ-72B-Preview-f16.gguf
+      sha256: 85110223f39aa1aad887052d269074afbd52a49ae02c53b66753b033662cc8e6
+      uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/mmproj-QVQ-72B-Preview-f16.gguf
 - &smollm
   ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"

From 9572f0577b8f12166dd82d54d5a839661b8a3928 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 26 Dec 2024 10:52:57 +0100
Subject: [PATCH 21/28] chore(model gallery): add teleut-7b-rp (#4499)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index df4f03668084..b636abedfb3d 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2382,6 +2382,22 @@
     - filename: mmproj-QVQ-72B-Preview-f16.gguf
       sha256: 85110223f39aa1aad887052d269074afbd52a49ae02c53b66753b033662cc8e6
       uri: huggingface://bartowski/QVQ-72B-Preview-GGUF/mmproj-QVQ-72B-Preview-f16.gguf
+- !!merge <<: *qwen25
+  name: "teleut-7b-rp"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/2y6PHgWe4ewoMFlgn-p3d.png
+  urls:
+    - https://huggingface.co/allura-org/Teleut-7b-RP
+    - https://huggingface.co/bartowski/Teleut-7b-RP-GGUF
+  description: |
+    A roleplay-focused LoRA finetune of Teleut 7b. Methodology and hyperparams inspired by SorcererLM and Slush.
+    Dataset: The worst mix of data you've ever seen. Like, seriously, you do not want to see the things that went into this model. It's bad.
+  overrides:
+    parameters:
+      model: Teleut-7b-RP-Q4_K_M.gguf
+  files:
+    - filename: Teleut-7b-RP-Q4_K_M.gguf
+      sha256: 74d9a0974c48f16677da8891ac76ed89ed04f246275b9ca8316d25e1e86ce89f
+      uri: huggingface://bartowski/Teleut-7b-RP-GGUF/Teleut-7b-RP-Q4_K_M.gguf
 - &smollm
   ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"

From 9cbf168dc0a00b178b45c2047987811bd698f531 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 26 Dec 2024 22:44:36 +0100
Subject: [PATCH 22/28] chore: :arrow_up: Update ggerganov/llama.cpp to
 `d79d8f39b4da6deca4aea8bf130c6034c482b320` (#4500)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 16f7a6b08a64..2aa34b7ce35f 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=9ba399dfa7f115effc63d48e6860a94c9faa31b2
+CPPLLAMA_VERSION?=d79d8f39b4da6deca4aea8bf130c6034c482b320
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From f3bbdef77dbbd92dba5fd4ff3ae6617ae8e5831c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 27 Dec 2024 11:15:56 +0100
Subject: [PATCH 23/28] chore(model gallery): add
 falcon3-1b-instruct-abliterated (#4501)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b636abedfb3d..1446b3430284 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -48,6 +48,21 @@
     - filename: Falcon3-10B-Instruct-Q4_K_M.gguf
       uri: huggingface://bartowski/Falcon3-10B-Instruct-GGUF/Falcon3-10B-Instruct-Q4_K_M.gguf
       sha256: 0a33327bd71e1788a8e9f17889824a17a65efd3f96a4b2a5e2bc6ff2f39b8241
+- !!merge <<: *falcon3
+  name: "falcon3-1b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-1B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-1B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-1B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 416d15ce58334b7956818befb088d46c1e3e7153ebf2da2fb9769a5b1ff934a1
+      uri: huggingface://bartowski/Falcon3-1B-Instruct-abliterated-GGUF/Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
 - &intellect1
   name: "intellect-1-instruct"
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

From 7f51e2dddf1e0347d9d5e4719bbca95262190677 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 27 Dec 2024 11:20:10 +0100
Subject: [PATCH 24/28] chore(model gallery): add
 falcon3-3b-instruct-abliterated (#4502)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 1446b3430284..d93cb1e153fa 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -63,6 +63,21 @@
     - filename: Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
       sha256: 416d15ce58334b7956818befb088d46c1e3e7153ebf2da2fb9769a5b1ff934a1
       uri: huggingface://bartowski/Falcon3-1B-Instruct-abliterated-GGUF/Falcon3-1B-Instruct-abliterated-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "falcon3-3b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-3B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-3B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-3B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 83773b77b0e34ef115f8a6508192e9f1d3426a61456744493f65cfe1e7f90aa9
+      uri: huggingface://bartowski/Falcon3-3B-Instruct-abliterated-GGUF/Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
 - &intellect1
   name: "intellect-1-instruct"
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

From 0093985e7c09acf71d3eb012a2e42eac380b9cea Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 27 Dec 2024 11:24:13 +0100
Subject: [PATCH 25/28] chore(model gallery): add
 falcon3-10b-instruct-abliterated (#4503)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d93cb1e153fa..4f00f89c4d29 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -78,6 +78,21 @@
     - filename: Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
       sha256: 83773b77b0e34ef115f8a6508192e9f1d3426a61456744493f65cfe1e7f90aa9
       uri: huggingface://bartowski/Falcon3-3B-Instruct-abliterated-GGUF/Falcon3-3B-Instruct-abliterated-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "falcon3-10b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-10B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-10B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-10B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 5940df2ff88e5be93dbe0766b2a9683d7e73c204a69a1348a37f835cf2b5f767
+      uri: huggingface://bartowski/Falcon3-10B-Instruct-abliterated-GGUF/Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
 - &intellect1
   name: "intellect-1-instruct"
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

From 2a7222c6aac7b962582a98bc60746e81393b045b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 27 Dec 2024 11:29:34 +0100
Subject: [PATCH 26/28] chore(model gallery): add
 falcon3-7b-instruct-abliterated (#4504)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4f00f89c4d29..121b363ed55b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -93,6 +93,21 @@
     - filename: Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
       sha256: 5940df2ff88e5be93dbe0766b2a9683d7e73c204a69a1348a37f835cf2b5f767
       uri: huggingface://bartowski/Falcon3-10B-Instruct-abliterated-GGUF/Falcon3-10B-Instruct-abliterated-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "falcon3-7b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/huihui-ai/Falcon3-7B-Instruct-abliterated
+    - https://huggingface.co/bartowski/Falcon3-7B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of tiiuae/Falcon3-7B-Instruct created with abliteration (see remove-refusals-with-transformers to know more about it).
+    This is a crude, proof-of-concept implementation to remove refusals from an LLM model without using TransformerLens.
+  overrides:
+    parameters:
+      model: Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf
+  files:
+    - filename: Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf
+      sha256: 68e10e638668acaa49fb7919224c7d8bcf1798126c7a499c4d9ec3b81313f8c8
+      uri: huggingface://bartowski/Falcon3-7B-Instruct-abliterated-GGUF/Falcon3-7B-Instruct-abliterated-Q4_K_M.gguf
 - &intellect1
   name: "intellect-1-instruct"
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

From 58524d40c9067aabc613e7c064490dd2149bd21a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 27 Dec 2024 15:13:06 +0100
Subject: [PATCH 27/28] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index ef950bf1298e..8fd0f4e7d7f3 100644
--- a/README.md
+++ b/README.md
@@ -126,10 +126,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 
 ## 🚀 [Features](https://localai.io/features/)
 
-- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
+- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
-- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
+- 🎨 [Image generation](https://localai.io/features/image-generation)
 - 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)

From afd0af987d6c6af79fd38b3220320b9901092ce9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 27 Dec 2024 15:17:02 +0100
Subject: [PATCH 28/28] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 8fd0f4e7d7f3..9cf758f85209 100644
--- a/README.md
+++ b/README.md
@@ -137,6 +137,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
 - 📈 [Reranker API](https://localai.io/features/reranker/)
 - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
+- 🔊 Voice activity detection (Silero-VAD support)
 - 🌍 Integrated WebUI!
 
 ## 💻 Usage