Skip to content

Commit

Permalink
fix: avoid llama.cpp's racing (#923)
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys authored Nov 30, 2023
1 parent b1dcea0 commit ffd5ef3
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions crates/llama-cpp-bindings/src/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <vector>
#include <deque>
#include <unordered_set>
#include <mutex>

#include <ggml.h>
#include <llama.h>
Expand Down Expand Up @@ -126,6 +127,8 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
}

rust::Vec<StepOutput> step() override {
std::lock_guard<std::mutex> guard(g_mutex_);

auto* ctx = ctx_.get();
auto n_vocab = llama_n_vocab(llama_get_model(ctx));

Expand Down Expand Up @@ -275,8 +278,15 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
std::unordered_set<uint32_t> stopped_requests_;

uint32_t parallelism_;

// llama.cpp is not thread safe
// FIXME(meng): remove the mutex once https://github.com/ggerganov/llama.cpp/issues/3960 is fixed
// and integrated to tabby's fork.
static std::mutex g_mutex_;
};

std::mutex TextInferenceEngineImpl::g_mutex_;

static int g_llama_cpp_log_level = 0;
static void llama_log_callback(ggml_log_level level, const char * text, void * user_data) {
(void)user_data;
Expand Down

0 comments on commit ffd5ef3

Please sign in to comment.