From fc54ef0d1c138133a01933296d50a36a1ab64735 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 21 Aug 2024 11:04:34 +0200 Subject: [PATCH 01/47] server : support reading arguments from environment variables (#9105) * server : support reading arguments from environment variables * add -fa and -dt * readme : specify non-arg env var --- common/common.cpp | 64 +++++++++++++++++++++++++++++++++----- common/common.h | 2 +- examples/server/README.md | 19 +++++++++++ examples/server/server.cpp | 3 ++ 4 files changed, 80 insertions(+), 8 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 382d585a5e6f9..59e8296604c9c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -77,6 +77,41 @@ using json = nlohmann::ordered_json; +// +// Environment variable utils +// + +template +static typename std::enable_if::value, void>::type +get_env(std::string name, T & target) { + char * value = std::getenv(name.c_str()); + target = value ? std::string(value) : target; +} + +template +static typename std::enable_if::value && std::is_integral::value, void>::type +get_env(std::string name, T & target) { + char * value = std::getenv(name.c_str()); + target = value ? std::stoi(value) : target; +} + +template +static typename std::enable_if::value, void>::type +get_env(std::string name, T & target) { + char * value = std::getenv(name.c_str()); + target = value ? std::stof(value) : target; +} + +template +static typename std::enable_if::value, void>::type +get_env(std::string name, T & target) { + char * value = std::getenv(name.c_str()); + if (value) { + std::string val(value); + target = val == "1" || val == "true"; + } +} + // // CPU utils // @@ -220,12 +255,6 @@ int32_t cpu_get_num_math() { // CLI argument parsing // -void gpt_params_handle_hf_token(gpt_params & params) { - if (params.hf_token.empty() && std::getenv("HF_TOKEN")) { - params.hf_token = std::getenv("HF_TOKEN"); - } -} - void gpt_params_handle_model_default(gpt_params & params) { if (!params.hf_repo.empty()) { // short-hand to avoid specifying --hf-file -> default it to --model @@ -273,7 +302,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { gpt_params_handle_model_default(params); - gpt_params_handle_hf_token(params); + if (params.hf_token.empty()) { + get_env("HF_TOKEN", params.hf_token); + } if (params.escape) { string_process_escapes(params.prompt); @@ -293,6 +324,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { return true; } +void gpt_params_parse_from_env(gpt_params & params) { + // we only care about server-related params for now + get_env("LLAMA_ARG_MODEL", params.model); + get_env("LLAMA_ARG_THREADS", params.n_threads); + get_env("LLAMA_ARG_CTX_SIZE", params.n_ctx); + get_env("LLAMA_ARG_N_PARALLEL", params.n_parallel); + get_env("LLAMA_ARG_BATCH", params.n_batch); + get_env("LLAMA_ARG_UBATCH", params.n_ubatch); + get_env("LLAMA_ARG_N_GPU_LAYERS", params.n_gpu_layers); + get_env("LLAMA_ARG_THREADS_HTTP", params.n_threads_http); + get_env("LLAMA_ARG_CHAT_TEMPLATE", params.chat_template); + get_env("LLAMA_ARG_N_PREDICT", params.n_predict); + get_env("LLAMA_ARG_ENDPOINT_METRICS", params.endpoint_metrics); + get_env("LLAMA_ARG_ENDPOINT_SLOTS", params.endpoint_slots); + get_env("LLAMA_ARG_EMBEDDINGS", params.embedding); + get_env("LLAMA_ARG_FLASH_ATTN", params.flash_attn); + get_env("LLAMA_ARG_DEFRAG_THOLD", params.defrag_thold); +} + bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { const auto params_org = params; // the example can modify the default params diff --git a/common/common.h b/common/common.h index df23460a50fe0..f603ba2be1d35 100644 --- a/common/common.h +++ b/common/common.h @@ -267,7 +267,7 @@ struct gpt_params { std::string lora_outfile = "ggml-lora-merged-f16.gguf"; }; -void gpt_params_handle_hf_token(gpt_params & params); +void gpt_params_parse_from_env(gpt_params & params); void gpt_params_handle_model_default(gpt_params & params); bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params); diff --git a/examples/server/README.md b/examples/server/README.md index 930ae15f64d8b..abe245271195b 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -247,6 +247,25 @@ logging: --log-append Don't truncate the old log file. ``` +Available environment variables (if specified, these variables will override parameters specified in arguments): + +- `LLAMA_CACHE` (cache directory, used by `--hf-repo`) +- `HF_TOKEN` (Hugging Face access token, used when accessing a gated model with `--hf-repo`) +- `LLAMA_ARG_MODEL` +- `LLAMA_ARG_THREADS` +- `LLAMA_ARG_CTX_SIZE` +- `LLAMA_ARG_N_PARALLEL` +- `LLAMA_ARG_BATCH` +- `LLAMA_ARG_UBATCH` +- `LLAMA_ARG_N_GPU_LAYERS` +- `LLAMA_ARG_THREADS_HTTP` +- `LLAMA_ARG_CHAT_TEMPLATE` +- `LLAMA_ARG_N_PREDICT` +- `LLAMA_ARG_ENDPOINT_METRICS` +- `LLAMA_ARG_ENDPOINT_SLOTS` +- `LLAMA_ARG_EMBEDDINGS` +- `LLAMA_ARG_FLASH_ATTN` +- `LLAMA_ARG_DEFRAG_THOLD` ## Build diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ce711eadd29ac..e79e7aa2cb846 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2507,6 +2507,9 @@ int main(int argc, char ** argv) { return 1; } + // parse arguments from environment variables + gpt_params_parse_from_env(params); + // TODO: not great to use extern vars server_log_json = params.log_json; server_verbose = params.verbosity > 0; From 5bf527a6aec241249793be17e4e3b7a0dbed59b2 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 21 Aug 2024 23:57:15 +0800 Subject: [PATCH 02/47] added xtc sampler --- common/common.h | 2 ++ expose.h | 2 ++ gpttype_adapter.cpp | 54 ++++++++++++++++++++++++++++++++++++-- klite.embd | 63 +++++++++++++++++++++++++++++++++++++++++---- koboldcpp.py | 8 +++++- 5 files changed, 121 insertions(+), 8 deletions(-) diff --git a/common/common.h b/common/common.h index c9b7da077ec58..0cbe5eeec6596 100644 --- a/common/common.h +++ b/common/common.h @@ -127,6 +127,8 @@ struct gpt_params { int32_t dry_allowed_length = 2; // repeated sequences longer than this are penalized int32_t dry_penalty_last_n = 0; // how many tokens to scan for repetitions (0 = entire context) std::vector dry_sequence_breakers; // DRY sequence breakers + float xtc_threshold = 0; + float xtc_probability = 0; // DynaTemp! float dynatemp_range = 0.0f; // enables DynaTemp if greater than 0. dynatemp_min = temperature - dt_range, dynatemp_max = temperature + dt_range diff --git a/expose.h b/expose.h index 42d0ff31e2d35..aa23b57a288d9 100644 --- a/expose.h +++ b/expose.h @@ -89,6 +89,8 @@ struct generation_inputs const int dry_allowed_length = 0; const int dry_penalty_last_n = 0; const char * dry_sequence_breakers[dry_seq_break_max] = {}; + const float xtc_threshold = 0.0f; + const float xtc_probability = 0.0f; const samplers sampler_order[KCPP_SAMPLER_MAX] = {}; const int sampler_len = 0; const bool allow_eos_token = false; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 4ad320826ad96..0c4276d1e264b 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -501,6 +501,50 @@ void sample_top_a(llama_token_data_array * candidates, float a, size_t min_keep) candidates->size = last_idx; } +void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float xtc_probability, std::mt19937 & rng, size_t min_keep) +{ + if (xtc_threshold <= 0.0f || xtc_probability <= 0.0f || candidates->size <= 1) { + return; + } + + std::uniform_real_distribution dist(0.0f, 1.0f); + float roll = dist(rng); + if(roll>=xtc_probability) //if dice roll fails, skip xtc + { + return; + } + + llama_sample_softmax(nullptr, candidates); + + //calculate how many tokens cross the xtc threshold + size_t last_idx = candidates->size; + for (size_t i = 0; i < candidates->size; ++i) { + // Go until we reach a value under the threshold + float checkprob = candidates->data[i].p; + if (checkprob < xtc_threshold && i >= min_keep) { + last_idx = i; + break; + } + } + + if(last_idx>1) //if there are 2 or more viable candidates + { + // drop all tokens except those above threshold + candidates->size = last_idx; + + // then remove all other tokens EXCEPT the least likely one + for (size_t i = 0; i < candidates->size - 1; ++i) { + candidates->data[i].logit = -999.0f; //infinity gets wonky results downstream, this hack works well enough + } + candidates->sorted = false; + + } //otherwise xtc does not do anything + + // printf("\n\nCandidates: %d, Threshold: %f, LastIdx: %d",candidates->size,xtc_threshold,last_idx); + // printf("\nCandidates: %f %f %f %f\n",candidates->data[0].p,candidates->data[1].p,candidates->data[2].p,candidates->data[3].p); + +} + void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float penalty_base, int allowed_length, const std::unordered_multimap>& restart_sequences, llama_token_data_array * candidates) { if (penalty_multiplier <= 0.0f || penalty_base <= 0.0f) { return; @@ -822,7 +866,8 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar } int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float rep_pen_slope, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float temp, std::mt19937 & rng, -int mirostat, float mirostat_tau, float mirostat_eta, float dry_multiplier, float dry_base, int dry_allowed_length, int dry_penalty_last_n, const std::vector & sampler_order, llama_grammar * grammar, float dynatemp_range, float dynatemp_exponent, float smoothing_factor) +int mirostat, float mirostat_tau, float mirostat_eta, float dry_multiplier, float dry_base, int dry_allowed_length, int dry_penalty_last_n, float xtc_threshold, float xtc_probability, +const std::vector & sampler_order, llama_grammar * grammar, float dynatemp_range, float dynatemp_exponent, float smoothing_factor) { int id = 0; std::vector candidates; @@ -843,6 +888,7 @@ int mirostat, float mirostat_tau, float mirostat_eta, float dry_multiplier, floa sample_grammar(file_format, n_vocab, &candidates_p, grammar); } + //dry always first as logits cannot be resorted sample_dry(n_ctx, dry_penalty_last_n, dry_multiplier, dry_base, dry_allowed_length, dry_sequence_breakers, &candidates_p); //prefilter to top 5k tokens for improved speed @@ -909,6 +955,8 @@ int mirostat, float mirostat_tau, float mirostat_eta, float dry_multiplier, floa break; } } + //xtc always last + sample_xtc(&candidates_p, xtc_threshold, xtc_probability, rng, 1); id = sample_token(&candidates_p, rng); } @@ -2088,6 +2136,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs) kcpp_params->dry_base = inputs.dry_base; kcpp_params->dry_allowed_length = inputs.dry_allowed_length; kcpp_params->dry_penalty_last_n = inputs.dry_penalty_last_n; + kcpp_params->xtc_threshold = inputs.xtc_threshold; + kcpp_params->xtc_probability = inputs.xtc_probability; kcpp_params->dynatemp_range = inputs.dynatemp_range; kcpp_params->dynatemp_exponent = inputs.dynatemp_exponent; kcpp_params->n_ctx = inputs.max_context_length; @@ -2662,7 +2712,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng, kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, kcpp_params->dry_multiplier, kcpp_params->dry_base, - kcpp_params->dry_allowed_length, kcpp_params->dry_penalty_last_n, + kcpp_params->dry_allowed_length, kcpp_params->dry_penalty_last_n, kcpp_params->xtc_threshold, kcpp_params->xtc_probability, sampler_order, grammar, dynatemp_range, dynatemp_exponent, smoothing_factor); if (grammar != nullptr) { diff --git a/klite.embd b/klite.embd index 151bc2e077456..fd1055a196a02 100644 --- a/klite.embd +++ b/klite.embd @@ -12,7 +12,7 @@ Current version indicated by LITEVER below. --> @@ -4332,7 +4332,7 @@ Current version indicated by LITEVER below. auto_ctxlen: true, auto_genamt: true, rep_pen: 1.07, - rep_pen_range: 320, + rep_pen_range: 360, rep_pen_slope: 0.7, temperature: 0.7, dynatemp_range: 0.0, @@ -4382,7 +4382,7 @@ Current version indicated by LITEVER below. rep_pen_slope: defaultsettings.rep_pen_slope, sampler_order: defaultsettings.sampler_order }, - {"preset":"Simple Logical","description":"A very predictable preset with low randomness.","temp":0.25,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":100,"top_p":0.6,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.01,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Balanced","description":"A good balanced preset with medium randomness.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":100,"top_p":0.92,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.07,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Creative","description":"A wild and unpredictable preset with higher randomness.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":100,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.15,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Basic Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic DynaTemp","description":"A good default for DynaTemp, only works on backends with it.","temp":1.25,"dynatemp_range":0.75,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SmoothSample","description":"A good default for Smooth Sampling, only works on backends with it.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.25,"genamt":200,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SillyTavern","description":"Similar to default preset used in SillyTavern.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":40,"top_p":0.6,"min_p":0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1.0,"rep_pen":1.18,"rep_pen_range":1024,"rep_pen_slope":0.8,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"CoherentCreativity (Legacy)","description":"Legacy preset. A good balance between coherence, creativity, and quality of prose.","genamt":200,"rep_pen":1.2,"rep_pen_range":320,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.5,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Godlike (Legacy)","description":"Legacy preset. Makes AI give a descriptive and sensual output.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"LiminalDrift (Legacy)","description":"Legacy preset. Sometimes surreal situations arise based on information already present in the story.","temp":0.66,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]} + {"preset":"Simple Logical","description":"A very predictable preset with low randomness.","temp":0.25,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":100,"top_p":0.6,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.01,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Balanced","description":"A good balanced preset with medium randomness.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":100,"top_p":0.92,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.07,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Creative","description":"A wild and unpredictable preset with higher randomness.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":100,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.15,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Basic Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic DynaTemp","description":"A good default for DynaTemp, only works on backends with it.","temp":1.25,"dynatemp_range":0.75,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SmoothSample","description":"A good default for Smooth Sampling, only works on backends with it.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.25,"genamt":200,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SillyTavern","description":"Similar to default preset used in SillyTavern.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":40,"top_p":0.6,"min_p":0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1.0,"rep_pen":1.18,"rep_pen_range":1024,"rep_pen_slope":0.8,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"CoherentCreativity (Legacy)","description":"Legacy preset. A good balance between coherence, creativity, and quality of prose.","genamt":200,"rep_pen":1.2,"rep_pen_range":360,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.5,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Godlike (Legacy)","description":"Legacy preset. Makes AI give a descriptive and sensual output.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"LiminalDrift (Legacy)","description":"Legacy preset. Sometimes surreal situations arise based on information already present in the story.","temp":0.66,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"genamt":200,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]} ]; const instructpresets = [ @@ -4547,6 +4547,23 @@ Current version indicated by LITEVER below. } } + function restore_endpoint_dropdowns() + { + var cep = document.getElementById("customapidropdown"); + var cephide = document.getElementById("unusedcustomapidropdown"); + + //remove all unwanted options from the endpoint dropdown in case it is used + for (var i = cep.options.length - 1; i >= 0; i--) { + cep.remove(i); + } + for (var i = 0; i < cephide.options.length; ++i) { + var newOption = document.createElement("option"); + newOption.value = cephide.options[i].value; + newOption.text = cephide.options[i].text; + cep.add(newOption); + } + } + //attempt to load settings function init() { @@ -4569,6 +4586,17 @@ Current version indicated by LITEVER below. //disable debug log if not local let dbgmode = urlParams.get('dbg'); + //duplicate endpoint dropdown array for backup + var cep = document.getElementById("customapidropdown"); + var cephide = document.getElementById("unusedcustomapidropdown"); + var cepoptions = cep.options; + for (var i = 0; i < cepoptions.length; ++i) { + var newOption = document.createElement("option"); + newOption.value = cepoptions[i].value; + newOption.text = cepoptions[i].text; + cephide.add(newOption); + } + if (localflag) { let inputport = urlParams.get('port'); @@ -4617,8 +4645,6 @@ Current version indicated by LITEVER below. } //remove all unwanted options from the endpoint dropdown in case it is used - var cep = document.getElementById("customapidropdown"); - var cepoptions = cep.options; for (var i = cepoptions.length - 1; i >= 0; i--) { if (cepoptions[i].value !== "1" && cepoptions[i].value !== "2") { cep.remove(i); @@ -8624,7 +8650,15 @@ Current version indicated by LITEVER below. } else if(localflag && no_txt_model && !has_txt2img && !koboldcpp_has_vision && !koboldcpp_has_whisper) { - msgbox("This KoboldCpp instance has no models loaded. You can still use the WebUI to edit or view existing stories.

You can also connect to an external service instead","No Models Loaded",true); + msgboxYesNo("This KoboldCpp instance has no models loaded. You can still use the WebUI to edit or view existing stories.

Would you like to connect to an external API service?","No Models Loaded", + ()=>{ + localflag = false; + hide_popups(); + restore_endpoint_dropdowns(); + document.getElementById("customapidropdown").value = "1"; + render_gametext(false); + display_endpoint_container(); + },()=>{},true); } }else{ @@ -17404,6 +17438,7 @@ Current version indicated by LITEVER below.
Select your AI provider
+