Skip to content

Commit

Permalink
Merge branch 'nomic-ai:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
abdulrahman305 authored Feb 26, 2024
2 parents 1d07ae2 + f8b1069 commit 8e5a234
Show file tree
Hide file tree
Showing 44 changed files with 1,735 additions and 1,005 deletions.
18 changes: 12 additions & 6 deletions .circleci/continue_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ jobs:
$Env:Path += ";$MinGwBin"
$Env:Path += ";C:\Program Files\CMake\bin"
$Env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
$Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
cd gpt4all-backend
mkdir runtimes/win-x64
cd runtimes/win-x64
Expand Down Expand Up @@ -651,6 +652,7 @@ jobs:
command: |
$Env:Path += ";C:\Program Files\CMake\bin"
$Env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
$Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
cd gpt4all-backend
mkdir runtimes/win-x64_msvc
cd runtimes/win-x64_msvc
Expand Down Expand Up @@ -1107,8 +1109,12 @@ workflows:
jobs:
- hold:
type: approval
- csharp-hold:
type: approval
- nuget-hold:
type: approval
- nodejs-hold:
type: approval
- npm-hold:
type: approval
- build-bindings-backend-linux:
Expand Down Expand Up @@ -1151,21 +1157,21 @@ workflows:
branches:
only:
requires:
- npm-hold
- nodejs-hold
- build-bindings-backend-linux
- build-nodejs-windows:
filters:
branches:
only:
requires:
- npm-hold
- nodejs-hold
- build-bindings-backend-windows-msvc
- build-nodejs-macos:
filters:
branches:
only:
requires:
- npm-hold
- nodejs-hold
- build-bindings-backend-macos


Expand All @@ -1175,21 +1181,21 @@ workflows:
branches:
only:
requires:
- nuget-hold
- csharp-hold
- build-bindings-backend-linux
- build-csharp-windows:
filters:
branches:
only:
requires:
- nuget-hold
- csharp-hold
- build-bindings-backend-windows
- build-csharp-macos:
filters:
branches:
only:
requires:
- nuget-hold
- csharp-hold
- build-bindings-backend-macos
- store-and-upload-nupkgs:
filters:
Expand Down
2 changes: 1 addition & 1 deletion gpt4all-backend/llama.cpp-mainline
Submodule llama.cpp-mainline updated 1 files
+35 −11 llama.cpp
21 changes: 14 additions & 7 deletions gpt4all-backend/llamamodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ static int llama_sample_top_p_top_k(
int last_n_tokens_size,
int top_k,
float top_p,
float min_p,
float temp,
float repeat_penalty,
int32_t pos) {
Expand All @@ -83,6 +84,7 @@ static int llama_sample_top_p_top_k(
llama_sample_tail_free(ctx, &candidates_p, 1.0f, 1);
llama_sample_typical(ctx, &candidates_p, 1.0f, 1);
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
llama_sample_min_p(ctx, &candidates_p, min_p, 1);
llama_sample_temp(ctx, &candidates_p, temp);
return llama_sample_token(ctx, &candidates_p);
}
Expand Down Expand Up @@ -260,19 +262,23 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->model_params.progress_callback = &LLModel::staticProgressCallback;
d_ptr->model_params.progress_callback_user_data = this;

#ifdef GGML_USE_METAL
#ifdef GGML_USE_KOMPUTE
if (d_ptr->device != -1) {
d_ptr->model_params.main_gpu = d_ptr->device;
d_ptr->model_params.n_gpu_layers = ngl;
}
#elif defined(GGML_USE_METAL)
(void)ngl;

if (llama_verbose()) {
std::cerr << "llama.cpp: using Metal" << std::endl;
}

// always fully offload on Metal
// TODO(cebtenzzre): use this parameter to allow using more than 53% of system RAM to load a model
d_ptr->model_params.n_gpu_layers = 100;
#elif defined(GGML_USE_KOMPUTE)
if (d_ptr->device != -1) {
d_ptr->model_params.main_gpu = d_ptr->device;
d_ptr->model_params.n_gpu_layers = ngl;
}
#else
(void)ngl;
#endif

d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
Expand Down Expand Up @@ -388,7 +394,7 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const
const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size());
return llama_sample_top_p_top_k(d_ptr->ctx,
promptCtx.tokens.data() + promptCtx.tokens.size() - n_prev_toks,
n_prev_toks, promptCtx.top_k, promptCtx.top_p, promptCtx.temp,
n_prev_toks, promptCtx.top_k, promptCtx.top_p, promptCtx.min_p, promptCtx.temp,
promptCtx.repeat_penalty, promptCtx.n_last_batch_tokens - 1);
}

Expand Down Expand Up @@ -469,6 +475,7 @@ std::vector<LLModel::GPUDevice> LLamaModel::availableGPUDevices(size_t memoryReq
return devices;
}
#else
(void)memoryRequired;
std::cerr << __func__ << ": built without Kompute\n";
#endif

Expand Down
1 change: 1 addition & 0 deletions gpt4all-backend/llmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class LLModel {
int32_t n_predict = 200;
int32_t top_k = 40;
float top_p = 0.9f;
float min_p = 0.0f;
float temp = 0.9f;
int32_t n_batch = 9;
float repeat_penalty = 1.10f;
Expand Down
2 changes: 2 additions & 0 deletions gpt4all-backend/llmodel_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
wrapper->promptContext.n_predict = ctx->n_predict;
wrapper->promptContext.top_k = ctx->top_k;
wrapper->promptContext.top_p = ctx->top_p;
wrapper->promptContext.min_p = ctx->min_p;
wrapper->promptContext.temp = ctx->temp;
wrapper->promptContext.n_batch = ctx->n_batch;
wrapper->promptContext.repeat_penalty = ctx->repeat_penalty;
Expand All @@ -156,6 +157,7 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
ctx->n_predict = wrapper->promptContext.n_predict;
ctx->top_k = wrapper->promptContext.top_k;
ctx->top_p = wrapper->promptContext.top_p;
ctx->min_p = wrapper->promptContext.min_p;
ctx->temp = wrapper->promptContext.temp;
ctx->n_batch = wrapper->promptContext.n_batch;
ctx->repeat_penalty = wrapper->promptContext.repeat_penalty;
Expand Down
1 change: 1 addition & 0 deletions gpt4all-backend/llmodel_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ struct llmodel_prompt_context {
int32_t n_predict; // number of tokens to predict
int32_t top_k; // top k logits to sample from
float top_p; // nucleus sampling probability threshold
float min_p; // Min P sampling
float temp; // temperature to adjust model's output distribution
int32_t n_batch; // number of predictions to generate in parallel
float repeat_penalty; // penalty factor for repeated tokens
Expand Down
2 changes: 2 additions & 0 deletions gpt4all-bindings/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def _old_loop(gpt4all_instance):
n_predict=200,
top_k=40,
top_p=0.9,
min_p=0.0,
temp=0.9,
n_batch=9,
repeat_penalty=1.1,
Expand Down Expand Up @@ -156,6 +157,7 @@ def _new_loop(gpt4all_instance):
temp=0.9,
top_k=40,
top_p=0.9,
min_p=0.0,
repeat_penalty=1.1,
repeat_last_n=64,
n_batch=9,
Expand Down
9 changes: 9 additions & 0 deletions gpt4all-bindings/csharp/Gpt4All/Bindings/LLPromptContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ public float TopP
set => _ctx.top_p = value;
}

/// <summary>
/// min p sampling probability threshold
/// </summary>
public float MinP
{
get => _ctx.min_p;
set => _ctx.min_p = value;
}

/// <summary>
/// temperature to adjust model's output distribution
/// </summary>
Expand Down
2 changes: 2 additions & 0 deletions gpt4all-bindings/csharp/Gpt4All/Bindings/NativeMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public unsafe partial struct llmodel_prompt_context

public float top_p;

public float min_p;

public float temp;

[NativeTypeName("int32_t")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public static string Dump(this LLModelPromptContext context)
n_predict = {ctx.n_predict}
top_k = {ctx.top_k}
top_p = {ctx.top_p}
min_p = {ctx.min_p}
temp = {ctx.temp}
n_batch = {ctx.n_batch}
repeat_penalty = {ctx.repeat_penalty}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public static LLModelPromptContext ToPromptContext(this PredictRequestOptions op
TokensSize = opts.TokensSize,
TopK = opts.TopK,
TopP = opts.TopP,
MinP = opts.MinP,
PastNum = opts.PastConversationTokensNum,
RepeatPenalty = opts.RepeatPenalty,
Temperature = opts.Temperature,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ public record PredictRequestOptions

public float TopP { get; init; } = 0.9f;

public float MinP { get; init; } = 0.0f;

public float Temperature { get; init; } = 0.1f;

public int Batches { get; init; } = 8;
Expand Down
4 changes: 3 additions & 1 deletion gpt4all-bindings/golang/binding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ std::string res = "";
void * mm;

void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n, float repeat_penalty, int n_ctx, int tokens, int top_k,
float top_p, float temp, int n_batch,float ctx_erase)
float top_p, float min_p, float temp, int n_batch,float ctx_erase)
{
llmodel_model* model = (llmodel_model*) m;

Expand Down Expand Up @@ -69,6 +69,7 @@ void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n,
.n_predict = 50,
.top_k = 10,
.top_p = 0.9,
.min_p = 0.0,
.temp = 1.0,
.n_batch = 1,
.repeat_penalty = 1.2,
Expand All @@ -83,6 +84,7 @@ void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n,
prompt_context->top_k = top_k;
prompt_context->context_erase = ctx_erase;
prompt_context->top_p = top_p;
prompt_context->min_p = min_p;
prompt_context->temp = temp;
prompt_context->n_batch = n_batch;

Expand Down
4 changes: 2 additions & 2 deletions gpt4all-bindings/golang/binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ extern "C" {
void* load_model(const char *fname, int n_threads);

void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n, float repeat_penalty, int n_ctx, int tokens, int top_k,
float top_p, float temp, int n_batch,float ctx_erase);
float top_p, float min_p, float temp, int n_batch,float ctx_erase);

void free_model(void *state_ptr);

extern unsigned char getTokenCallback(void *, char *);

#ifdef __cplusplus
}
#endif
#endif
4 changes: 2 additions & 2 deletions gpt4all-bindings/golang/gpt4all.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ package gpt4all
// #cgo LDFLAGS: -lgpt4all -lm -lstdc++ -ldl
// void* load_model(const char *fname, int n_threads);
// void model_prompt( const char *prompt, void *m, char* result, int repeat_last_n, float repeat_penalty, int n_ctx, int tokens, int top_k,
// float top_p, float temp, int n_batch,float ctx_erase);
// float top_p, float min_p, float temp, int n_batch,float ctx_erase);
// void free_model(void *state_ptr);
// extern unsigned char getTokenCallback(void *, char *);
// void llmodel_set_implementation_search_path(const char *path);
Expand Down Expand Up @@ -58,7 +58,7 @@ func (l *Model) Predict(text string, opts ...PredictOption) (string, error) {
out := make([]byte, po.Tokens)

C.model_prompt(input, l.state, (*C.char)(unsafe.Pointer(&out[0])), C.int(po.RepeatLastN), C.float(po.RepeatPenalty), C.int(po.ContextSize),
C.int(po.Tokens), C.int(po.TopK), C.float(po.TopP), C.float(po.Temperature), C.int(po.Batch), C.float(po.ContextErase))
C.int(po.Tokens), C.int(po.TopK), C.float(po.TopP), C.float(po.MinP), C.float(po.Temperature), C.int(po.Batch), C.float(po.ContextErase))

res := C.GoString((*C.char)(unsafe.Pointer(&out[0])))
res = strings.TrimPrefix(res, " ")
Expand Down
10 changes: 9 additions & 1 deletion gpt4all-bindings/golang/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package gpt4all

type PredictOptions struct {
ContextSize, RepeatLastN, Tokens, TopK, Batch int
TopP, Temperature, ContextErase, RepeatPenalty float64
TopP, MinP, Temperature, ContextErase, RepeatPenalty float64
}

type PredictOption func(p *PredictOptions)
Expand All @@ -11,6 +11,7 @@ var DefaultOptions PredictOptions = PredictOptions{
Tokens: 200,
TopK: 10,
TopP: 0.90,
MinP: 0.0,
Temperature: 0.96,
Batch: 1,
ContextErase: 0.55,
Expand Down Expand Up @@ -50,6 +51,13 @@ func SetTopP(topp float64) PredictOption {
}
}

// SetMinP sets the value for min p sampling
func SetMinP(minp float64) PredictOption {
return func(p *PredictOptions) {
p.MinP = minp
}
}

// SetRepeatPenalty sets the repeat penalty.
func SetRepeatPenalty(ce float64) PredictOption {
return func(p *PredictOptions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ private GenerationConfig() {
n_predict.set(128);
top_k.set(40);
top_p.set(0.95);
min_p.set(0.0);
temp.set(0.28);
n_batch.set(8);
repeat_penalty.set(1.1);
Expand Down Expand Up @@ -71,6 +72,11 @@ public Builder withTopP(float top_p) {
return this;
}

public Builder withMinP(float min_p) {
configToBuild.min_p.set(min_p);
return this;
}

public Builder withTemp(float temp) {
configToBuild.temp.set(temp);
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class LLModelPromptContext extends Struct {
public final int32_t n_predict = new int32_t();
public final int32_t top_k = new int32_t();
public final Float top_p = new Float();
public final Float min_p = new Float();
public final Float temp = new Float();
public final int32_t n_batch = new int32_t();
public final Float repeat_penalty = new Float();
Expand Down
Loading

0 comments on commit 8e5a234

Please sign in to comment.