Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/concedo'
Browse files Browse the repository at this point in the history
  • Loading branch information
YellowRoseCx committed Oct 1, 2023
2 parents 354fa72 + bc841ec commit 1c4e4a6
Show file tree
Hide file tree
Showing 80 changed files with 11,937 additions and 4,300 deletions.
37 changes: 25 additions & 12 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,44 @@
.vs/
.vscode/

lcov-report/
gcovr-report/

build*/
out/
tmp/

models/*
models-mnt

/main
/quantize
/quantize-stats
/result
/perplexity
/embedding
/train-text-from-scratch
/convert-llama2c-to-ggml
/simple
/benchmark-matmult
/vdot
/server
/Pipfile
/baby-llama
/beam-search
/benchmark-matmult
/convert-llama2c-to-ggml
/embd-input-test
/embedding
/gguf
/gguf-llama-simple
/libllama.so
/llama-bench
/main
/metal
/perplexity
/q8dot
/quantize
/quantize-stats
/result
/save-load-state
/server
/simple
/batched
/export-lora
/finetune
/speculative
/parallel
/train-text-from-scratch
/vdot
build-info.h
arm_neon.h
compile_commands.json
Expand Down
51 changes: 26 additions & 25 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -251,37 +251,38 @@ endif()

if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
set(c_flags
-Wall
-Wextra
-Wpedantic
-Wcast-qual
-Wdouble-promotion
-Wshadow
-Wstrict-prototypes
-Wpointer-arith
-Wmissing-prototypes
-Werror=implicit-int
-Wno-unused-function
)
set(cxx_flags
-Wall
-Wextra
-Wpedantic
-Wcast-qual
-Wmissing-declarations
-Wno-unused-function
-Wno-multichar
)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# g++ only
set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
-Werror=implicit-function-declaration)
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)

if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)

if (
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
)
set(c_flags ${c_flags} -Wdouble-promotion)
endif()
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
set(c_flags ${c_flags} -Wdouble-promotion)
set(cxx_flags ${cxx_flags} -Wno-array-bounds)

if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
set(cxx_flags ${cxx_flags} -Wno-format-truncation)
endif()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
set(cxx_flags ${cxx_flags} -Wextra-semi)
endif()
endif()
else()
# todo : msvc
endif()

add_compile_options(
${warning_flags}
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
)
Expand Down
89 changes: 69 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,15 @@ endif
#

# keep standard at C11 and C++11
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE
LDFLAGS =

ifndef LLAMA_NO_K_QUANTS
CFLAGS += -DGGML_USE_K_QUANTS
CXXFLAGS += -DGGML_USE_K_QUANTS
endif

# these are used on windows, to build some libraries with extra old device compatibility
SIMPLECFLAGS =
FULLCFLAGS =
Expand Down Expand Up @@ -285,19 +290,17 @@ ifeq ($(OS),Windows_NT)
endif
else
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS)
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS)

ifdef LLAMA_OPENBLAS
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS)
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS)
endif
ifdef LLAMA_CLBLAST
ifeq ($(UNAME_S),Darwin)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS)
else
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS)
endif
endif

ifdef LLAMA_CUBLAS
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o [email protected] $(CUBLASLD_FLAGS) $(LDFLAGS)
endif
Expand Down Expand Up @@ -351,12 +354,20 @@ ggml_cublas.o: ggml.c ggml.h ggml-cuda.h k_quants.h
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@

#quants K
KQ1 =
KQ2 =
KQ3 =
ifndef LLAMA_NO_K_QUANTS
KQ1 = k_quants.o
KQ2 = k_quants_noavx2.o
KQ3 = k_quants_failsafe.o
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
k_quants_noavx2.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
k_quants_failsafe.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
endif # LLAMA_NO_K_QUANTS

#there's no intrinsics or special gpu ops used here, so we can have a universal object
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
Expand Down Expand Up @@ -416,7 +427,7 @@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER)
clean:
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so koboldcpp_hipblas.so

main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o common.o console.o grammar-parser.o $(OBJS)
main: examples/main/main.cpp build-info.h ggml.o $(KQ1) ggml-alloc.o llama.o common.o console.o grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
@echo
@echo '==== Run ./main -h for help. ===='
Expand All @@ -425,31 +436,69 @@ main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)


#generated libraries
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o $(KQ1) ggml-alloc.o grammar-parser.o $(OBJS)
$(DEFAULT_BUILD)
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)

ifdef OPENBLAS_BUILD
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o $(KQ1) ggml-alloc.o grammar-parser.o $(OBJS)
$(OPENBLAS_BUILD)
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o ggml-alloc.o grammar-parser.o $(OBJS)
else
koboldcpp_openblas:
$(DONOTHING)
endif

ifdef FAILSAFE_BUILD
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o $(KQ3) ggml-alloc.o grammar-parser.o $(OBJS)
$(FAILSAFE_BUILD)
koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o ggml-alloc.o grammar-parser.o $(OBJS)
else
koboldcpp_failsafe:
$(DONOTHING)
endif

ifdef NOAVX2_BUILD
koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o $(KQ2) ggml-alloc.o grammar-parser.o $(OBJS)
$(NOAVX2_BUILD)
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
else
koboldcpp_noavx2:
$(DONOTHING)
endif

ifdef CLBLAST_BUILD
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o $(KQ1) ggml-alloc.o grammar-parser.o $(OBJS)
$(CLBLAST_BUILD)
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(CUBLAS_OBJS) $(OBJS)
else
koboldcpp_clblast:
$(DONOTHING)
endif

ifdef CUBLAS_BUILD
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o $(KQ1) ggml-alloc.o grammar-parser.o $(CUBLAS_OBJS) $(OBJS)
$(CUBLAS_BUILD)
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(HIP_OBJS) $(OBJS)
else
koboldcpp_cublas:
$(DONOTHING)
endif

ifdef HIPBLAS_BUILD
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o $(KQ1) ggml-alloc.o grammar-parser.o $(HIP_OBJS) $(OBJS)
$(HIPBLAS_BUILD)
else
koboldcpp_hipblas:
$(DONOTHING)
endif

quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o
# tools
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o $(KQ1) ggml-alloc.o
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_gptj: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
quantize_gptj: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_gpt2: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
quantize_gpt2: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_neox: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
quantize_neox: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_mpt: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp
quantize_mpt: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS) $(HIP_OBJS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(HIPLDFLAGS)
Expand Down
6 changes: 4 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ let platforms: [SupportedPlatform]? = [
.tvOS(.v14)
]
let exclude: [String] = []
let additionalSources: [String] = ["ggml-metal.m"]
let additionalSources: [String] = ["ggml-metal.m", "ggml-metal.metal"]
let additionalSettings: [CSetting] = [
.unsafeFlags(["-fno-objc-arc"]),
.define("GGML_SWIFT"),
Expand Down Expand Up @@ -44,7 +44,9 @@ let package = Package(
cSettings: [
.unsafeFlags(["-Wno-shorten-64-to-32"]),
.define("GGML_USE_K_QUANTS"),
.define("GGML_USE_ACCELERATE")
.define("GGML_USE_ACCELERATE"),
.define("ACCELERATE_NEW_LAPACK"),
.define("ACCELERATE_LAPACK_ILP64")
] + additionalSettings,
linkerSettings: [
.linkedFramework("Accelerate")
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ For more information, be sure to run the program with the `--help` flag.
- If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas` flag, make sure you select the correct .exe with CUDA support.
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1` or using the provided CMake file (best for visual studio users). If you use the CMake file to build, copy the `koboldcpp_cublas.dll` generated into the same directory as the `koboldcpp.py` file. If you are bundling executables, you may need to include CUDA dynamic libraries (such as `cublasLt64_11.dll` and `cublas64_11.dll`) in order for the executable to work correctly on a different PC.

## AMD
- Please check out https://github.com/YellowRoseCx/koboldcpp-rocm

## Questions and Help
- **First, please check out [The KoboldCpp FAQ and Knowledgebase](https://github.com/LostRuins/koboldcpp/wiki) which may already have answers to your questions! Also please search through past issues and discussions.**
- If you cannot find an answer, open an issue on this github, or find us on the [KoboldAI Discord](https://koboldai.org/discord).
Expand All @@ -94,7 +97,7 @@ For more information, be sure to run the program with the `--help` flag.
- The other files are also under the AGPL v3.0 License unless otherwise stated

## Notes
- Generation delay scales linearly with original prompt length. If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. CLBlast speeds this up even further, and `--gpulayers` + `--useclblast` more so.
- Generation delay scales linearly with original prompt length. If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. CLBlast speeds this up even further, and `--gpulayers` + `--useclblast` or `--usecublas` more so.
- I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat`
- Supported GGML models (Includes backward compatibility for older versions/legacy GGML models, though some newer features might be unavailable):
- LLAMA and LLAMA2 (LLaMA / Alpaca / GPT4All / Vicuna / Koala / Pygmalion 7B / Metharme 7B / WizardLM and many more)
Expand All @@ -103,4 +106,5 @@ For more information, be sure to run the program with the `--help` flag.
- RWKV
- GPT-NeoX / Pythia / StableLM / Dolly / RedPajama
- MPT models
- Falcon (GGUF only)

20 changes: 18 additions & 2 deletions Remote-Link.cmd
Original file line number Diff line number Diff line change
@@ -1,2 +1,18 @@
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-windows-amd64.exe -o cloudflared.exe
cloudflared.exe tunnel --url localhost:5001
: # This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet
: # It should work out of the box on both linux and windows
: # ======
: # WINDOWS PORTION
:<<BATCH
@echo off
echo Starting Cloudflare Tunnel for Windows
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-windows-amd64.exe -o cloudflared.exe
cloudflared.exe tunnel --url localhost:5001
GOTO ENDING
BATCH
: # LINUX PORTION
echo 'Starting Cloudflare Tunnel for Linux'
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -o 'cloudflared-linux-amd64' #
chmod +x 'cloudflared-linux-amd64' #
./cloudflared-linux-amd64 tunnel --url http://localhost:5001 #
exit #
:ENDING
28 changes: 15 additions & 13 deletions class.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@ def get_requested_parameters(self, model_name, model_path, menu_path, parameters
"extra_classes": "",
'children': [{'text': 'False', 'value': False}, {'text': 'True', 'value': True}],
})
requested_parameters.append({
"uitype": "text",
"unit": "text",
"label": "GPU ID",
"id": "kcpp_tensor_split_str",
"default": "1",
"check": {"value": "", 'check': "!="},
"tooltip": "Which GPU's do we use? For example:1 2",
"menu_path": "",
"refresh_model_inputs": False,
"extra_classes": ""
})
requested_parameters.append({
"uitype": "dropdown",
"unit": "int",
Expand All @@ -202,18 +214,6 @@ def get_requested_parameters(self, model_name, model_path, menu_path, parameters
"extra_classes": "",
'children': [{'text': 'False', 'value': 0}, {'text': 'True', 'value': 1}],
})
requested_parameters.append({
"uitype": "text",
"unit": "text",
"label": "Tensor Split",
"id": "kcpp_tensor_split_str",
"default": self.kcpp_tensor_split_str,
"check": {"value": "", 'check': "!="},
"tooltip": "Tensor Split, values are space separated",
"menu_path": "",
"refresh_model_inputs": False,
"extra_classes": ""
})
return requested_parameters

def set_input_parameters(self, parameters):
Expand All @@ -232,6 +232,7 @@ def set_input_parameters(self, parameters):
self.kcpp_tensor_split = []
for s in splits:
self.kcpp_tensor_split.append(int(s))
print(self.kcpp_tensor_split)

accel = parameters["kcpp_accelerator"]
if accel==0:
Expand Down Expand Up @@ -271,7 +272,8 @@ def _load(self, save_model: bool, initial_load: bool) -> None:
blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None, onready='', multiuser=False)
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
onready='', multiuser=False, foreground=False)

koboldcpp.main(kcppargs,False) #initialize library without enabling Lite http server
kcpp_backend_loaded = True
Expand Down
Loading

0 comments on commit 1c4e4a6

Please sign in to comment.