forked from LostRuins/koboldcpp
-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/concedo'
- Loading branch information
Showing
80 changed files
with
11,937 additions
and
4,300 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,10 +39,15 @@ endif | |
# | ||
|
||
# keep standard at C11 and C++11 | ||
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE | ||
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE | ||
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE | ||
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE | ||
LDFLAGS = | ||
|
||
ifndef LLAMA_NO_K_QUANTS | ||
CFLAGS += -DGGML_USE_K_QUANTS | ||
CXXFLAGS += -DGGML_USE_K_QUANTS | ||
endif | ||
|
||
# these are used on windows, to build some libraries with extra old device compatibility | ||
SIMPLECFLAGS = | ||
FULLCFLAGS = | ||
|
@@ -285,19 +290,17 @@ ifeq ($(OS),Windows_NT) | |
endif | ||
else | ||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS) | ||
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS) | ||
|
||
ifdef LLAMA_OPENBLAS | ||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS) | ||
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS) | ||
endif | ||
ifdef LLAMA_CLBLAST | ||
ifeq ($(UNAME_S),Darwin) | ||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS) | ||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS) | ||
else | ||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS) | ||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o [email protected] $(LDFLAGS) | ||
endif | ||
endif | ||
|
||
ifdef LLAMA_CUBLAS | ||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o [email protected] $(CUBLASLD_FLAGS) $(LDFLAGS) | ||
endif | ||
|
@@ -351,12 +354,20 @@ ggml_cublas.o: ggml.c ggml.h ggml-cuda.h k_quants.h | |
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ | ||
|
||
#quants K | ||
KQ1 = | ||
KQ2 = | ||
KQ3 = | ||
ifndef LLAMA_NO_K_QUANTS | ||
KQ1 = k_quants.o | ||
KQ2 = k_quants_noavx2.o | ||
KQ3 = k_quants_failsafe.o | ||
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h | ||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ | ||
k_quants_noavx2.o: k_quants.c k_quants.h ggml.h ggml-cuda.h | ||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@ | ||
k_quants_failsafe.o: k_quants.c k_quants.h ggml.h ggml-cuda.h | ||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ | ||
endif # LLAMA_NO_K_QUANTS | ||
|
||
#there's no intrinsics or special gpu ops used here, so we can have a universal object | ||
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h | ||
|
@@ -416,7 +427,7 @@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER) | |
clean: | ||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so koboldcpp_hipblas.so | ||
|
||
main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o common.o console.o grammar-parser.o $(OBJS) | ||
main: examples/main/main.cpp build-info.h ggml.o $(KQ1) ggml-alloc.o llama.o common.o console.o grammar-parser.o $(OBJS) | ||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) | ||
@echo | ||
@echo '==== Run ./main -h for help. ====' | ||
|
@@ -425,31 +436,69 @@ main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o | |
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS) | ||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) | ||
|
||
|
||
#generated libraries | ||
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS) | ||
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o $(KQ1) ggml-alloc.o grammar-parser.o $(OBJS) | ||
$(DEFAULT_BUILD) | ||
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS) | ||
|
||
ifdef OPENBLAS_BUILD | ||
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o $(KQ1) ggml-alloc.o grammar-parser.o $(OBJS) | ||
$(OPENBLAS_BUILD) | ||
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o ggml-alloc.o grammar-parser.o $(OBJS) | ||
else | ||
koboldcpp_openblas: | ||
$(DONOTHING) | ||
endif | ||
|
||
ifdef FAILSAFE_BUILD | ||
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o $(KQ3) ggml-alloc.o grammar-parser.o $(OBJS) | ||
$(FAILSAFE_BUILD) | ||
koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o ggml-alloc.o grammar-parser.o $(OBJS) | ||
else | ||
koboldcpp_failsafe: | ||
$(DONOTHING) | ||
endif | ||
|
||
ifdef NOAVX2_BUILD | ||
koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o $(KQ2) ggml-alloc.o grammar-parser.o $(OBJS) | ||
$(NOAVX2_BUILD) | ||
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS) | ||
else | ||
koboldcpp_noavx2: | ||
$(DONOTHING) | ||
endif | ||
|
||
ifdef CLBLAST_BUILD | ||
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o $(KQ1) ggml-alloc.o grammar-parser.o $(OBJS) | ||
$(CLBLAST_BUILD) | ||
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(CUBLAS_OBJS) $(OBJS) | ||
else | ||
koboldcpp_clblast: | ||
$(DONOTHING) | ||
endif | ||
|
||
ifdef CUBLAS_BUILD | ||
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o $(KQ1) ggml-alloc.o grammar-parser.o $(CUBLAS_OBJS) $(OBJS) | ||
$(CUBLAS_BUILD) | ||
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(HIP_OBJS) $(OBJS) | ||
else | ||
koboldcpp_cublas: | ||
$(DONOTHING) | ||
endif | ||
|
||
ifdef HIPBLAS_BUILD | ||
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o $(KQ1) ggml-alloc.o grammar-parser.o $(HIP_OBJS) $(OBJS) | ||
$(HIPBLAS_BUILD) | ||
else | ||
koboldcpp_hipblas: | ||
$(DONOTHING) | ||
endif | ||
|
||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o | ||
# tools | ||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o $(KQ1) ggml-alloc.o | ||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) | ||
quantize_gptj: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp | ||
quantize_gptj: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp | ||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) | ||
quantize_gpt2: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp | ||
quantize_gpt2: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp | ||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) | ||
quantize_neox: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp | ||
quantize_neox: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp | ||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) | ||
quantize_mpt: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp | ||
quantize_mpt: ggml.o llama.o $(KQ1) ggml-alloc.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp | ||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) | ||
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS) $(HIP_OBJS) $(OBJS) | ||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(HIPLDFLAGS) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,18 @@ | ||
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-windows-amd64.exe -o cloudflared.exe | ||
cloudflared.exe tunnel --url localhost:5001 | ||
: # This script will help setup a cloudflared tunnel for accessing KoboldCpp over the internet | ||
: # It should work out of the box on both linux and windows | ||
: # ====== | ||
: # WINDOWS PORTION | ||
:<<BATCH | ||
@echo off | ||
echo Starting Cloudflare Tunnel for Windows | ||
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-windows-amd64.exe -o cloudflared.exe | ||
cloudflared.exe tunnel --url localhost:5001 | ||
GOTO ENDING | ||
BATCH | ||
: # LINUX PORTION | ||
echo 'Starting Cloudflare Tunnel for Linux' | ||
curl -L https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -o 'cloudflared-linux-amd64' # | ||
chmod +x 'cloudflared-linux-amd64' # | ||
./cloudflared-linux-amd64 tunnel --url http://localhost:5001 # | ||
exit # | ||
:ENDING |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.