Skip to content

Commit

Permalink
Merge branch 'nomic-ai:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
abdulrahman305 authored Feb 2, 2024
2 parents 5ccb2bf + 10e3f7b commit 3fc5977
Show file tree
Hide file tree
Showing 79 changed files with 2,236 additions and 1,162 deletions.
17 changes: 7 additions & 10 deletions gpt4all-backend/bert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,9 @@ void bert_eval(

struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q);
// KQ = soft_max(KQ / sqrt(head width))
KQ = ggml_soft_max(ctx0,
ggml_scale(ctx0,
KQ,
ggml_new_f32(ctx0, 1.0f / sqrt((float)d_head))));
KQ = ggml_soft_max(
ctx0, ggml_scale(ctx0, KQ, 1.0f / sqrt((float)d_head))
);

V = ggml_cont(ctx0, ggml_transpose(ctx0, V));
struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V, KQ);
Expand Down Expand Up @@ -490,10 +489,6 @@ struct bert_ctx * bert_load_from_file(const char *fname)
#endif

bert_ctx * new_bert = new bert_ctx;
#if defined(GGML_USE_KOMPUTE)
new_bert->buf_compute.force_cpu = true;
new_bert->work_buf.force_cpu = true;
#endif

bert_model & model = new_bert->model;
bert_vocab & vocab = new_bert->vocab;
Expand Down Expand Up @@ -714,9 +709,10 @@ Bert::~Bert() {
bert_free(d_ptr->ctx);
}

bool Bert::loadModel(const std::string &modelPath, int n_ctx)
bool Bert::loadModel(const std::string &modelPath, int n_ctx, int ngl)
{
(void)n_ctx;
(void)ngl;
d_ptr->ctx = bert_load_from_file(modelPath.c_str());
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
d_ptr->modelLoaded = d_ptr->ctx != nullptr;
Expand All @@ -729,10 +725,11 @@ bool Bert::isModelLoaded() const
return d_ptr->modelLoaded;
}

size_t Bert::requiredMem(const std::string &modelPath, int n_ctx)
size_t Bert::requiredMem(const std::string &modelPath, int n_ctx, int ngl)
{
(void)modelPath;
(void)n_ctx;
(void)ngl;
return 0;
}

Expand Down
4 changes: 2 additions & 2 deletions gpt4all-backend/bert_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ class Bert : public LLModel {

bool supportsEmbedding() const override { return true; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath, int n_ctx) override;
bool loadModel(const std::string &modelPath, int n_ctx, int ngl) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath, int n_ctx) override;
size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override;
size_t stateSize() const override;
size_t saveState(uint8_t *dest) const override;
size_t restoreState(const uint8_t *src) override;
Expand Down
12 changes: 5 additions & 7 deletions gpt4all-backend/gptj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,7 @@ bool gptj_eval(
struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);

// KQ_scaled = KQ / sqrt(n_embd/n_head)
struct ggml_tensor * KQ_scaled =
ggml_scale(ctx0,
KQ,
ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd)/n_head))
);
struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrt(float(n_embd)/n_head));

// KQ_masked = mask_past(KQ_scaled)
struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past);
Expand Down Expand Up @@ -676,17 +672,19 @@ GPTJ::GPTJ()
d_ptr->modelLoaded = false;
}

size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx) {
size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx, int ngl) {
(void)n_ctx;
(void)ngl;
gptj_model dummy_model;
gpt_vocab dummy_vocab;
size_t mem_req;
gptj_model_load(modelPath, dummy_model, dummy_vocab, &mem_req);
return mem_req;
}

bool GPTJ::loadModel(const std::string &modelPath, int n_ctx) {
bool GPTJ::loadModel(const std::string &modelPath, int n_ctx, int ngl) {
(void)n_ctx;
(void)ngl;
std::mt19937 rng(time(NULL));
d_ptr->rng = rng;

Expand Down
4 changes: 2 additions & 2 deletions gpt4all-backend/gptj_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ class GPTJ : public LLModel {

bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath, int n_ctx) override;
bool loadModel(const std::string &modelPath, int n_ctx, int ngl) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath, int n_ctx) override;
size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override;
size_t stateSize() const override;
size_t saveState(uint8_t *dest) const override;
size_t restoreState(const uint8_t *src) override;
Expand Down
2 changes: 1 addition & 1 deletion gpt4all-backend/llama.cpp-mainline
3 changes: 1 addition & 2 deletions gpt4all-backend/llama.cpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ if (LLAMA_KOMPUTE)
DEPENDS ${LLAMA_DIR}/${source}
${LLAMA_DIR}/kompute-shaders/common.comp
${LLAMA_DIR}/kompute-shaders/op_getrows.comp
${LLAMA_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
${LLAMA_DIR}/kompute-shaders/op_mul_mv_q_n.comp
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${LLAMA_DIR}/${source}
COMMENT "Compiling ${source} to ${source}.spv"
Expand Down Expand Up @@ -231,7 +232,6 @@ if (LLAMA_KOMPUTE)
kompute-shaders/op_add.comp
kompute-shaders/op_addrow.comp
kompute-shaders/op_mul.comp
kompute-shaders/op_mulrow.comp
kompute-shaders/op_silu.comp
kompute-shaders/op_relu.comp
kompute-shaders/op_gelu.comp
Expand Down Expand Up @@ -264,7 +264,6 @@ if (LLAMA_KOMPUTE)
shaderop_add.h
shaderop_addrow.h
shaderop_mul.h
shaderop_mulrow.h
shaderop_silu.h
shaderop_relu.h
shaderop_gelu.h
Expand Down
Loading

0 comments on commit 3fc5977

Please sign in to comment.