From e7a962c70a6c86e77f1f2b64b5bebcfb9f74fa81 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 2 May 2024 10:57:54 +0800 Subject: [PATCH 1/9] update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 427cd9822dacc..363f94bcbce63 100644 --- a/README.md +++ b/README.md @@ -32,11 +32,11 @@ For more information, be sure to run the program with the `--help` flag, or [che ## OSX and Linux ### Linux Usage (Precompiled Binary, Recommended) -On Linux, we provide a `koboldcpp-linux-x64` PyInstaller prebuilt binary on the **[releases](https://github.com/LostRuins/koboldcpp/releases/latest)** page for modern systems. Simply download and run the binary. +On Linux, we provide a `koboldcpp-linux-x64-cuda1150` PyInstaller prebuilt binary on the **[releases](https://github.com/LostRuins/koboldcpp/releases/latest)** page for modern systems. Simply download and run the binary. Alternatively, you can also install koboldcpp to the current directory by running the following terminal command: ``` -curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/download/koboldcpp-linux-x64 && chmod +x koboldcpp +curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/download/koboldcpp-linux-x64-cuda1150 && chmod +x koboldcpp ``` After running this command you can launch Koboldcpp from the current directory using `./koboldcpp` in the terminal (for CLI usage, run with `--help`). From fb7e72352ec26c78e4956ec46f9db729d7788b25 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 2 May 2024 14:17:48 +0800 Subject: [PATCH 2/9] benchmark includes ver --- koboldcpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index a5ca38809a8f2..043354d546e51 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -3197,7 +3197,7 @@ def onready_subprocess(): s_pp = float(benchmaxctx-benchlen)/t_pp s_gen = float(benchlen)/t_gen datetimestamp = datetime.now(timezone.utc) - print(f"\nBenchmark Completed - Results:\n======") + print(f"\nBenchmark Completed - v{KcppVersion} Results:\n======") print(f"Timestamp: {datetimestamp}") print(f"Backend: {libname}") print(f"Layers: {args.gpulayers}") From 0d8c4a9b73eda0c20a8cf191431288f6ede78d25 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 2 May 2024 14:21:44 +0800 Subject: [PATCH 3/9] remove quick lowvram option --- koboldcpp.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 043354d546e51..5f696008fdd1f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1987,7 +1987,6 @@ def changerunmode(a,b,c): if index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw") - quick_lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw") mmq_box.grid(row=4, column=1, padx=8, pady=1, stick="nw") quick_mmq_box.grid(row=4, column=1, padx=8, pady=1, stick="nw") splitmode_box.grid(row=5, column=1, padx=8, pady=1, stick="nw") @@ -1995,7 +1994,6 @@ def changerunmode(a,b,c): tensor_split_entry.grid(row=8, column=1, padx=8, pady=1, stick="nw") else: lowvram_box.grid_forget() - quick_lowvram_box.grid_forget() mmq_box.grid_forget() quick_mmq_box.grid_forget() tensor_split_label.grid_forget() @@ -2033,7 +2031,6 @@ def changerunmode(a,b,c): quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") quick_gpuname_label.configure(text_color="#ffff00") quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 6, 50,"How many layers to offload onto the GPU.\nVRAM intensive, usage increases with model and context size.\nRequires some trial and error to find the best fit value.") - quick_lowvram_box = makecheckbox(quick_tab, "Low VRAM (No KV offload)", lowvram_var, 4,0,tooltiptxt="Avoid offloading KV Cache or scratch buffers to VRAM.\nAllows more layers to fit, but may result in a speed loss.") quick_mmq_box = makecheckbox(quick_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1,tooltiptxt="Enable MMQ mode instead of CuBLAS for prompt processing. Read the wiki. Speed may vary.") From 4c5d307f597291355f129c0e085a504f2665344e Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 2 May 2024 23:11:24 +0800 Subject: [PATCH 4/9] fixed benchmark interrupt (+2 squashed commit) Squashed commit: [6e334c8b] require enter key to be pressed [d50d49b6] fixed bench script --- koboldcpp.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 5f696008fdd1f..fa419100eac1f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1502,9 +1502,6 @@ def show_new_gui(): from tkinter.filedialog import askopenfilename from tkinter.filedialog import asksaveasfile - global using_gui_launcher - using_gui_launcher = True - # if args received, launch if len(sys.argv) != 1: import tkinter as tk @@ -1568,6 +1565,8 @@ def on_resize(event): ctk.set_widget_scaling(smallratio) root.bind("", on_resize) + global using_gui_launcher + using_gui_launcher = True # trigger empty tooltip then remove it def show_tooltip(event, tooltip_text=None): @@ -3217,7 +3216,11 @@ def onready_subprocess(): file.write(f"\n{datetimestamp},{libname},{args.gpulayers},{benchmodel},{benchmaxctx},{benchlen},{t_pp:.2f},{s_pp:.2f},{t_gen:.2f},{s_gen:.2f},{(t_pp+t_gen):.2f},{resultok},{result}") except Exception as e: print(f"Error writing benchmark to file: {e}") - + global using_gui_launcher + if using_gui_launcher and not save_to_file: + print("===") + print("Press ENTER key to exit.", flush=True) + input() if start_server: if args.remotetunnel: @@ -3229,11 +3232,6 @@ def onready_subprocess(): else: # Flush stdout for previous win32 issue so the client can see output. print(f"Server was not started, main function complete. Idling.", flush=True) - global using_gui_launcher - if using_gui_launcher: - print("===") - print("Press a key to exit", flush=True) - input() def run_in_queue(launch_args, input_queue, output_queue): main(launch_args, start_server=False) From a34a09d19698c85ab9ef95ada54d83c55519c3fa Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 3 May 2024 15:57:13 +0800 Subject: [PATCH 5/9] replace destroy with quit for tk --- koboldcpp.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index fa419100eac1f..c64fee035c51a 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1508,7 +1508,8 @@ def show_new_gui(): root = tk.Tk() #we dont want the useless window to be visible, but we want it in taskbar root.attributes("-alpha", 0) args.model_param = askopenfilename(title="Select ggml model .bin or .gguf file or .kcpps config") - root.destroy() + root.withdraw() + root.quit() if args.model_param and args.model_param!="" and args.model_param.lower().endswith('.kcpps'): loadconfigfile(args.model_param) if not args.model_param and not args.sdconfig: @@ -2190,7 +2191,8 @@ def guilaunch(): model_var.set(tmp) nonlocal nextstate nextstate = 1 - root.destroy() + root.withdraw() + root.quit() pass def export_vars(): @@ -2496,7 +2498,8 @@ def show_gui_msgbox(title,message): root = tk.Tk() root.attributes("-alpha", 0) messagebox.showerror(title=title, message=message) - root.destroy() + root.withdraw() + root.quit() except Exception as ex2: pass @@ -3181,8 +3184,8 @@ def onready_subprocess(): else: print(f"\nRunning benchmark (Not Saved)...") - benchprompt = "11111111" - for i in range(0,10): #generate massive prompt + benchprompt = "1111111111111111" + for i in range(0,12): #generate massive prompt benchprompt += benchprompt genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,use_default_badwordsids=True) result = genout['text'] From b6bfab128f3b39c117f3e65aedd06a7475d5566d Mon Sep 17 00:00:00 2001 From: henk717 Date: Fri, 3 May 2024 11:12:57 +0200 Subject: [PATCH 6/9] CUDA 12 CI (#815) * Allow KCPP_CUDA to specify CUDA version * CUDA 12 CI Linux * CUDA 12 CI * Fix KCPP_CUDA indent * KCPP_CUDA ENV Fix StackOverflow is bad for advice sometimes.... * Lowcase cuda on output filename * Strip . from filename output --- .../kcpp-build-release-linux-cuda12.yaml | 33 ++++++++++++++++++ .../kcpp-build-release-win-cuda12.yaml | 34 +++++++++++++++++++ koboldcpp.sh | 21 +++++++++--- 3 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/kcpp-build-release-linux-cuda12.yaml create mode 100644 .github/workflows/kcpp-build-release-win-cuda12.yaml diff --git a/.github/workflows/kcpp-build-release-linux-cuda12.yaml b/.github/workflows/kcpp-build-release-linux-cuda12.yaml new file mode 100644 index 0000000000000..0cbeaf9a2b942 --- /dev/null +++ b/.github/workflows/kcpp-build-release-linux-cuda12.yaml @@ -0,0 +1,33 @@ +name: Koboldcpp Builder Linux CUDA12 + +on: workflow_dispatch +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + KCPP_CUDA: 12.1.0 + +jobs: + linux: + runs-on: ubuntu-20.04 + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + ref: concedo_experimental + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install git curl bzip2 + + - name: Build + id: make_build + run: | + ./koboldcpp.sh dist + + - name: Save artifact + uses: actions/upload-artifact@v3 + with: + name: kcpp_linux_binary + path: dist/ diff --git a/.github/workflows/kcpp-build-release-win-cuda12.yaml b/.github/workflows/kcpp-build-release-win-cuda12.yaml new file mode 100644 index 0000000000000..ed9065d939728 --- /dev/null +++ b/.github/workflows/kcpp-build-release-win-cuda12.yaml @@ -0,0 +1,34 @@ +name: Koboldcpp Builder Windows CUDA12 + +on: workflow_dispatch +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +jobs: + windows: + runs-on: windows-2019 + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + ref: concedo_experimental + + - uses: Jimver/cuda-toolkit@v0.2.11 + id: cuda-toolkit + with: + cuda: '12.1.0' + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" + cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} + + - name: Save artifact + uses: actions/upload-artifact@v3 + with: + name: kcpp_windows_cuda_binary + path: build/bin/Release/ diff --git a/koboldcpp.sh b/koboldcpp.sh index b2923ec4ff7bd..587270d2add22 100755 --- a/koboldcpp.sh +++ b/koboldcpp.sh @@ -1,13 +1,24 @@ #!/bin/bash + if [ ! -f "bin/micromamba" ]; then curl -Ls https://anaconda.org/conda-forge/micromamba/1.5.3/download/linux-64/micromamba-1.5.3-0.tar.bz2 | tar -xvj bin/micromamba fi if [[ ! -f "conda/envs/linux/bin/python" || $1 == "rebuild" ]]; then - bin/micromamba create --no-shortcuts -r conda -n linux -f environment.yaml -y - bin/micromamba create --no-shortcuts -r conda -n linux -f environment.yaml -y + cp environment.yaml environment.tmp.yaml + if [ -n "$KCPP_CUDA" ]; then + sed -i -e "s/nvidia\/label\/cuda-11.5.0/nvidia\/label\/cuda-$KCPP_CUDA/g" environment.tmp.yaml + else + KCPP_CUDA=11.5.0 + fi + bin/micromamba create --no-shortcuts -r conda -n linux -f environment.tmp.yaml -y + bin/micromamba create --no-shortcuts -r conda -n linux -f environment.tmp.yaml -y bin/micromamba run -r conda -n linux make clean + echo $KCPP_CUDA > conda/envs/linux/cudaver + echo rm environment.tmp.yaml fi +KCPP_CUDA=$( Date: Fri, 3 May 2024 18:13:39 +0800 Subject: [PATCH 7/9] add kobble tiny to readme --- README.md | 4 ++-- colab.ipynb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 363f94bcbce63..cc85b3ce98dd8 100644 --- a/README.md +++ b/README.md @@ -90,8 +90,8 @@ There are some community made AUR packages (Maintained by @AlpinDale) available: - Clone the repo `git clone https://github.com/LostRuins/koboldcpp.git` - Navigate to the koboldcpp folder `cd koboldcpp` - Build the project `make` -- Grab a small GGUF model, such as `wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf` -- Start the python server `python koboldcpp.py --model phi-2.Q2_K.gguf` +- Grab a small GGUF model, such as `wget https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf` +- Start the python server `python koboldcpp.py --model KobbleTiny-Q4_K.gguf` - Connect to `http://localhost:5001` on your mobile browser - If you encounter any errors, make sure your packages are up-to-date with `pkg up` - GPU acceleration for Termux may be possible but I have not explored it. If you find a good cross-device solution, do share or PR it. diff --git a/colab.ipynb b/colab.ipynb index ca791c6859e34..606069712b94b 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -48,7 +48,7 @@ "source": [ "#@title v-- Enter your model below and then click this to start Koboldcpp\r\n", "\r\n", - "Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/afrideva/phi-2-uncensored-GGUF/resolve/main/phi-2-uncensored.q3_k_m.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\"]{allow-input: true}\r\n", + "Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\"]{allow-input: true}\r\n", "Layers = 99 #@param [99]{allow-input: true}\r\n", "ContextSize = 4096 #@param [4096] {allow-input: true}\r\n", "ForceRebuild = False #@param {type:\"boolean\"}\r\n", From 89db8afded10625a75dbabb6e04656832158f159 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 4 May 2024 10:07:54 +0800 Subject: [PATCH 8/9] revert moondream to try and fix llava --- examples/llava/clip.cpp | 71 +++++++---------------------------------- 1 file changed, 11 insertions(+), 60 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index e3c9bcd4364aa..f568f470c8f5c 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -104,7 +104,6 @@ static std::string format(const char * fmt, ...) { #define TN_POS_EMBD "%s.position_embd.weight" #define TN_CLASS_EMBD "v.class_embd" #define TN_PATCH_EMBD "v.patch_embd.weight" -#define TN_PATCH_BIAS "v.patch_embd.bias" #define TN_ATTN_K "%s.blk.%d.attn_k.%s" #define TN_ATTN_Q "%s.blk.%d.attn_q.%s" #define TN_ATTN_V "%s.blk.%d.attn_v.%s" @@ -426,7 +425,6 @@ struct clip_vision_model { // embeddings struct ggml_tensor * class_embedding; struct ggml_tensor * patch_embeddings; - struct ggml_tensor * patch_bias; struct ggml_tensor * position_embeddings; struct ggml_tensor * pre_ln_w; @@ -503,11 +501,6 @@ struct clip_ctx { bool use_gelu = false; int32_t ftype = 1; - bool has_class_embedding = true; - bool has_pre_norm = true; - bool has_post_norm = false; - bool has_patch_bias = false; - struct gguf_context * ctx_gguf; struct ggml_context * ctx_data; @@ -533,7 +526,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 const int patch_size = hparams.patch_size; const int num_patches = ((image_size / patch_size) * (image_size / patch_size)); const int num_patches_per_side = image_size / patch_size; GGML_UNUSED(num_patches_per_side); - const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0); + const int num_positions = num_patches + 1; const int hidden_size = hparams.hidden_size; const int n_head = hparams.n_head; const int d_head = hidden_size / n_head; @@ -564,23 +557,16 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 inp = ggml_reshape_3d(ctx0, inp, num_patches, hidden_size, batch_size); inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3)); - if (ctx->has_patch_bias) { - // inp = ggml_add(ctx0, inp, ggml_repeat(ctx0, model.patch_bias, inp)); - inp = ggml_add(ctx0, inp, model.patch_bias); - } - // concat class_embeddings and patch_embeddings - struct ggml_tensor * embeddings = inp; - if (ctx->has_class_embedding) { - embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size); - embeddings = ggml_acc(ctx0, embeddings, model.class_embedding, - embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0); - embeddings = ggml_acc(ctx0, embeddings, inp, - embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]); - } + struct ggml_tensor * embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size); ggml_set_name(embeddings, "embeddings"); ggml_set_input(embeddings); + embeddings = ggml_acc(ctx0, embeddings, model.class_embedding, + embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0); + + embeddings = ggml_acc(ctx0, embeddings, inp, + embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]); struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions); ggml_set_name(positions, "positions"); @@ -590,7 +576,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 ggml_add(ctx0, embeddings, ggml_get_rows(ctx0, model.position_embeddings, positions)); // pre-layernorm - if (ctx->has_pre_norm) { + { embeddings = ggml_norm(ctx0, embeddings, eps); ggml_set_name(embeddings, "pre_ln"); @@ -678,14 +664,6 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 embeddings = cur; } - // post-layernorm - if (ctx->has_post_norm) { - embeddings = ggml_norm(ctx0, embeddings, eps); - ggml_set_name(embeddings, "post_ln"); - - embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.post_ln_w), model.post_ln_b); - } - // llava projector { embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]); @@ -1170,39 +1148,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { } - try { - vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD); - new_clip->has_class_embedding = true; - } catch (const std::exception& e) { - new_clip->has_class_embedding = false; - } - - try { - vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight")); - vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias")); - new_clip->has_pre_norm = true; - } catch (std::exception & e) { - new_clip->has_pre_norm = false; - } - - try { - vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight")); - vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias")); - new_clip->has_post_norm = true; - } catch (std::exception & e) { - new_clip->has_post_norm = false; - } - - try { - vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS); - new_clip->has_patch_bias = true; - } catch (std::exception & e) { - new_clip->has_patch_bias = false; - } - try { vision_model.patch_embeddings = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD); + vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD); vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v")); + vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight")); + vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias")); } catch(const std::exception& e) { LOG_TEE("%s: failed to load vision model tensors\n", __func__); } From a3718c6354dd0459154fa5297f1f29587845bc76 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 4 May 2024 10:38:20 +0800 Subject: [PATCH 9/9] 1.64.1 to fix llava issues --- koboldcpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index c64fee035c51a..edf732bc3eb78 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -636,7 +636,7 @@ def string_contains_sequence_substring(inputstr,sequences): modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.64" +KcppVersion = "1.64.1" showdebug = True showsamplerwarning = True showmaxctxwarning = True