diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml index 67b57522fb0a..ea1de749487a 100644 --- a/.github/workflows/test-gpu.yml +++ b/.github/workflows/test-gpu.yml @@ -40,6 +40,8 @@ jobs: if [ ! -e /run/systemd/system ]; then sudo mkdir /run/systemd/system fi + sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }} + sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }} make \ TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \ BUILD_TYPE=cublas \ @@ -57,4 +59,5 @@ jobs: make \ TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \ teardown-e2e || true + sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true docker system prune -f -a --volumes || true diff --git a/Makefile b/Makefile index affba0ecf2b3..b5d471565c76 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7 -CPPLLAMA_VERSION?=465219b9143ac01db0990bbcb0a081ef72ec2008 +CPPLLAMA_VERSION?=9d02956443e5c1ded29b7b5ed8a21bc01ba6f563 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all @@ -30,15 +30,9 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d # go-piper version PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7 -# go-bloomz version -BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f - # stablediffusion version STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632 -# Go-ggllm -GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b - export BUILD_TYPE?= export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export CMAKE_ARGS?= @@ -129,7 +123,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -146,14 +140,6 @@ gpt4all: git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1 -## go-ggllm -go-ggllm: - git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm - cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1 - -go-ggllm/libggllm.a: go-ggllm - $(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a - ## go-piper go-piper: git clone --recurse-submodules https://github.com/mudler/go-piper go-piper @@ -180,14 +166,6 @@ go-rwkv: go-rwkv/librwkv.a: go-rwkv cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. -## bloomz -bloomz: - git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz - cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1 - -bloomz/libbloomz.a: bloomz - cd bloomz && make libbloomz.a - go-bert/libgobert.a: go-bert $(MAKE) -C go-bert libgobert.a @@ -241,7 +219,7 @@ go-llama-stable/libbinding.a: go-llama-stable go-piper/libpiper_binding.a: go-piper $(MAKE) -C go-piper libpiper_binding.a example/main -get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion +get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion touch $@ replace: @@ -250,10 +228,8 @@ replace: $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert - $(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper - $(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm prepare-sources: get-sources replace $(GOCMD) mod download @@ -269,9 +245,7 @@ rebuild: ## Rebuilds the project $(MAKE) -C whisper.cpp clean $(MAKE) -C go-stable-diffusion clean $(MAKE) -C go-bert clean - $(MAKE) -C bloomz clean $(MAKE) -C go-piper clean - $(MAKE) -C go-ggllm clean $(MAKE) build prepare: prepare-sources $(OPTIONAL_TARGETS) @@ -289,10 +263,8 @@ clean: ## Remove build related file rm -rf ./backend-assets rm -rf ./go-rwkv rm -rf ./go-bert - rm -rf ./bloomz rm -rf ./whisper.cpp rm -rf ./go-piper - rm -rf ./go-ggllm rm -rf $(BINARY_NAME) rm -rf release/ $(MAKE) -C backend/cpp/llama clean @@ -418,10 +390,6 @@ protogen-python: backend-assets/grpc: mkdir -p backend-assets/grpc -backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/ - backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \ @@ -486,10 +454,6 @@ backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/ -backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/ - backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/ diff --git a/backend/cpp/llama/CMakeLists.txt b/backend/cpp/llama/CMakeLists.txt index 116283fbbb6a..2d804d440d2d 100644 --- a/backend/cpp/llama/CMakeLists.txt +++ b/backend/cpp/llama/CMakeLists.txt @@ -4,6 +4,11 @@ set(TARGET grpc-server) set(_PROTOBUF_LIBPROTOBUF libprotobuf) set(_REFLECTION grpc++_reflection) +if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + link_directories("/opt/homebrew/lib") + include_directories("/opt/homebrew/include") +endif() + find_package(absl CONFIG REQUIRED) find_package(Protobuf CONFIG REQUIRED) find_package(gRPC CONFIG REQUIRED) @@ -15,8 +20,7 @@ find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${Protobuf_INCLUDE_DIRS}) -message(STATUS "Using protobuf ${Protobuf_VERSION} ${Protobuf_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}") - +message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}") # Proto file get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 50d59ab1f34e..c177fa73b3cf 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -275,11 +275,11 @@ struct llama_server_context if (suff_rm_leading_spc && suffix_tokens[0] == space_token) { suffix_tokens.erase(suffix_tokens.begin()); } - prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(ctx)); - prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(ctx)); // always add BOS - prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(ctx)); + prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model)); + prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS + prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model)); prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); - prefix_tokens.push_back(llama_token_middle(ctx)); + prefix_tokens.push_back(llama_token_middle(model)); auto prompt_tokens = prefix_tokens; @@ -419,7 +419,7 @@ struct llama_server_context if (params.n_predict == 0) { has_next_token = false; - result.tok = llama_token_eos(ctx); + result.tok = llama_token_eos(model); return result; } @@ -453,7 +453,7 @@ struct llama_server_context // decrement remaining sampling budget --n_remain; - if (!embd.empty() && embd.back() == llama_token_eos(ctx)) + if (!embd.empty() && embd.back() == llama_token_eos(model)) { // stopping_word = llama_token_to_piece(ctx, embd.back()); has_next_token = false; @@ -594,7 +594,7 @@ static void parse_options_completion(bool streaming,const backend::PredictOption if (predict->ignoreeos()) { - llama.params.sparams.logit_bias[llama_token_eos(llama.ctx)] = -INFINITY; + llama.params.sparams.logit_bias[llama_token_eos(llama.model)] = -INFINITY; } // const auto &logit_bias = body.find("logit_bias"); @@ -676,7 +676,7 @@ static void params_parse(const backend::ModelOptions* request, } static bool is_at_eob(llama_server_context &server_context, const llama_token *tokens, const size_t n_tokens) { - return n_tokens && tokens[n_tokens-1] == llama_token_eos(server_context.ctx); + return n_tokens && tokens[n_tokens-1] == llama_token_eos(server_context.model); } // Function matching type llama_beam_search_callback_fn_t. diff --git a/cmd/grpc/bloomz/main.go b/cmd/grpc/bloomz/main.go deleted file mode 100644 index 8d6303ba3959..000000000000 --- a/cmd/grpc/bloomz/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - bloomz "github.com/go-skynet/LocalAI/pkg/backend/llm/bloomz" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil { - panic(err) - } -} diff --git a/cmd/grpc/falcon/main.go b/cmd/grpc/falcon/main.go deleted file mode 100644 index 8ddf6236af22..000000000000 --- a/cmd/grpc/falcon/main.go +++ /dev/null @@ -1,25 +0,0 @@ -package main - -// GRPC Falcon server - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - falcon "github.com/go-skynet/LocalAI/pkg/backend/llm/falcon" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil { - panic(err) - } -} diff --git a/examples/configurations/README.md b/examples/configurations/README.md new file mode 100644 index 000000000000..2709f39e27f9 --- /dev/null +++ b/examples/configurations/README.md @@ -0,0 +1,42 @@ +## Advanced configuration + +This section contains examples on how to install models manually with config files. + +### Prerequisites + +First clone LocalAI: + +```bash +git clone https://github.com/go-skynet/LocalAI + +cd LocalAI +``` + +Setup the model you prefer from the examples below and then start LocalAI: + +```bash +docker compose up -d --pull always +``` + +If LocalAI is already started, you can restart it with + +```bash +docker compose restart +``` + +See also the getting started: https://localai.io/basics/getting_started/ + +### Mistral + +To setup mistral copy the files inside `mistral` in the `models` folder: + +```bash +cp -r examples/configurations/mistral/* models/ +``` + +Now download the model: + +```bash +wget https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf -O models/mistral-7b-openorca.Q6_K.gguf +``` + diff --git a/examples/configurations/mistral/chatml-block.tmpl b/examples/configurations/mistral/chatml-block.tmpl new file mode 100644 index 000000000000..cc86392a9e9e --- /dev/null +++ b/examples/configurations/mistral/chatml-block.tmpl @@ -0,0 +1,3 @@ +{{.Input}} +<|im_start|>assistant + diff --git a/examples/configurations/mistral/chatml.tmpl b/examples/configurations/mistral/chatml.tmpl new file mode 100644 index 000000000000..09e25322d40c --- /dev/null +++ b/examples/configurations/mistral/chatml.tmpl @@ -0,0 +1,3 @@ +<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} +{{if .Content}}{{.Content}}{{end}} +<|im_end|> diff --git a/examples/configurations/mistral/completion.tmpl b/examples/configurations/mistral/completion.tmpl new file mode 100644 index 000000000000..9867cfcd3430 --- /dev/null +++ b/examples/configurations/mistral/completion.tmpl @@ -0,0 +1 @@ +{{.Input}} \ No newline at end of file diff --git a/examples/configurations/mistral/mistral.yaml b/examples/configurations/mistral/mistral.yaml new file mode 100644 index 000000000000..d2927f06fae5 --- /dev/null +++ b/examples/configurations/mistral/mistral.yaml @@ -0,0 +1,16 @@ +name: mistral +mmap: true +parameters: + model: mistral-7b-openorca.Q6_K.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 +template: + chat_message: chatml + chat: chatml-block + completion: completion +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +threads: 4 diff --git a/go.mod b/go.mod index d83ec16a6ebc..3262a68b6187 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 github.com/onsi/ginkgo/v2 v2.13.0 - github.com/onsi/gomega v1.28.0 + github.com/onsi/gomega v1.28.1 github.com/otiai10/openaigo v1.6.0 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/prometheus/client_golang v1.17.0 @@ -89,7 +89,7 @@ require ( github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect - github.com/google/go-cmp v0.5.9 // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/klauspost/compress v1.16.7 // indirect diff --git a/go.sum b/go.sum index 5041f8324f3f..239bb85d4615 100644 --- a/go.sum +++ b/go.sum @@ -73,6 +73,8 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= @@ -153,6 +155,8 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/onsi/gomega v1.28.0 h1:i2rg/p9n/UqIDAMFUJ6qIUUMcsqOuUHgbpbu235Vr1c= github.com/onsi/gomega v1.28.0/go.mod h1:A1H2JE76sI14WIP57LMKj7FVfCHx3g3BcZVjJG8bjX8= +github.com/onsi/gomega v1.28.1 h1:MijcGUbfYuznzK/5R4CPNoUP/9Xvuo20sXfEm6XxoTA= +github.com/onsi/gomega v1.28.1/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/otiai10/openaigo v1.6.0 h1:YTQEbtDSvawETOB/Kmb/6JvuHdHH/eIpSQfHVufiwY8= diff --git a/pkg/backend/llm/bloomz/bloomz.go b/pkg/backend/llm/bloomz/bloomz.go deleted file mode 100644 index 0775c77d153b..000000000000 --- a/pkg/backend/llm/bloomz/bloomz.go +++ /dev/null @@ -1,59 +0,0 @@ -package bloomz - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - "github.com/go-skynet/bloomz.cpp" -) - -type LLM struct { - base.SingleThread - - bloomz *bloomz.Bloomz -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - model, err := bloomz.New(opts.ModelFile) - llm.bloomz = model - return err -} - -func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption { - predictOptions := []bloomz.PredictOption{ - bloomz.SetTemperature(float64(opts.Temperature)), - bloomz.SetTopP(float64(opts.TopP)), - bloomz.SetTopK(int(opts.TopK)), - bloomz.SetTokens(int(opts.Tokens)), - bloomz.SetThreads(int(opts.Threads)), - } - - if opts.Seed != 0 { - predictOptions = append(predictOptions, bloomz.SetSeed(int(opts.Seed))) - } - - return predictOptions -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/pkg/backend/llm/falcon/falcon.go b/pkg/backend/llm/falcon/falcon.go deleted file mode 100644 index 4b96b71f3333..000000000000 --- a/pkg/backend/llm/falcon/falcon.go +++ /dev/null @@ -1,145 +0,0 @@ -package falcon - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - ggllm "github.com/mudler/go-ggllm.cpp" -) - -type LLM struct { - base.SingleThread - - falcon *ggllm.Falcon -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - ggllmOpts := []ggllm.ModelOption{} - if opts.ContextSize != 0 { - ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize))) - } - // F16 doesn't seem to produce good output at all! - //if c.F16 { - // llamaOpts = append(llamaOpts, llama.EnableF16Memory) - //} - - if opts.NGPULayers != 0 { - ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers))) - } - - ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap)) - ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU)) - ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit)) - if opts.NBatch != 0 { - ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch))) - } else { - ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512)) - } - - model, err := ggllm.New(opts.ModelFile, ggllmOpts...) - llm.falcon = model - return err -} - -func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption { - predictOptions := []ggllm.PredictOption{ - ggllm.SetTemperature(float64(opts.Temperature)), - ggllm.SetTopP(float64(opts.TopP)), - ggllm.SetTopK(int(opts.TopK)), - ggllm.SetTokens(int(opts.Tokens)), - ggllm.SetThreads(int(opts.Threads)), - } - - if opts.PromptCacheAll { - predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll) - } - - if opts.PromptCacheRO { - predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO) - } - - // Expected absolute path - if opts.PromptCachePath != "" { - predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath)) - } - - if opts.Mirostat != 0 { - predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat))) - } - - if opts.MirostatETA != 0 { - predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA))) - } - - if opts.MirostatTAU != 0 { - predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU))) - } - - if opts.Debug { - predictOptions = append(predictOptions, ggllm.Debug) - } - - predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...)) - - if opts.PresencePenalty != 0 { - predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty))) - } - - if opts.NKeep != 0 { - predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep))) - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch))) - } - - if opts.IgnoreEOS { - predictOptions = append(predictOptions, ggllm.IgnoreEOS) - } - - if opts.Seed != 0 { - predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed))) - } - - //predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed)) - - predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty))) - predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock)) - predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap)) - predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU)) - predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit)) - predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ))) - predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP))) - return predictOptions -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - - predictOptions := buildPredictOptions(opts) - - predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool { - if token == "<|endoftext|>" { - return true - } - results <- token - return true - })) - - go func() { - _, err := llm.falcon.Predict(opts.Prompt, predictOptions...) - if err != nil { - fmt.Println("err: ", err) - } - close(results) - }() - - return nil -} diff --git a/pkg/gallery/gallery.go b/pkg/gallery/gallery.go index 1fd961725606..7957ed59d638 100644 --- a/pkg/gallery/gallery.go +++ b/pkg/gallery/gallery.go @@ -8,6 +8,7 @@ import ( "github.com/go-skynet/LocalAI/pkg/utils" "github.com/imdario/mergo" + "github.com/rs/zerolog/log" "gopkg.in/yaml.v2" ) @@ -166,7 +167,9 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) return yaml.Unmarshal(d, &models) }) if err != nil { - + if yamlErr, ok := err.(*yaml.TypeError); ok { + log.Debug().Msgf("YAML errors: %s\n\nwreckage of models: %+v", strings.Join(yamlErr.Errors, "\n"), models) + } return models, err } diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 5ad9500ba148..fbc4746b6dea 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -18,7 +18,6 @@ const ( LlamaBackend = "llama" LlamaStableBackend = "llama-stable" LLamaCPP = "llama-cpp" - BloomzBackend = "bloomz" StarcoderBackend = "starcoder" GPTJBackend = "gptj" DollyBackend = "dolly" @@ -30,7 +29,6 @@ const ( Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" Gpt4All = "gpt4all" - FalconBackend = "falcon" FalconGGMLBackend = "falcon-ggml" BertEmbeddingsBackend = "bert-embeddings" @@ -46,7 +44,6 @@ var AutoLoadBackends []string = []string{ LlamaStableBackend, LlamaBackend, Gpt4All, - FalconBackend, GPTNeoXBackend, BertEmbeddingsBackend, FalconGGMLBackend, @@ -56,7 +53,6 @@ var AutoLoadBackends []string = []string{ MPTBackend, ReplitBackend, StarcoderBackend, - BloomzBackend, RwkvBackend, WhisperBackend, StableDiffusionBackend,