diff --git a/mikupad.html b/mikupad.html index 890b87b..374d387 100644 --- a/mikupad.html +++ b/mikupad.html @@ -999,6 +999,7 @@ margin-left: auto; margin-right: auto; padding: 2px 10px; + max-width: 215px; } button { @@ -1473,7 +1474,7 @@ case API_LLAMA_CPP: return await llamaCppTokenCount({ endpoint, endpointAPIKey, signal, ...options }); case API_KOBOLD_CPP: - return await koboldCppTokenCount({ endpoint, signal, ...options }); + return await koboldCppTokenCount({ endpoint, endpointAPIKey, signal, ...options }); case API_OPENAI_COMPAT: // These endpoints don't have a token count endpoint... if (new URL(endpoint).host === 'api.openai.com' || new URL(endpoint).host === 'api.together.xyz') @@ -1507,7 +1508,7 @@ case API_LLAMA_CPP: return await llamaCppTokenize({ endpoint, endpointAPIKey, signal, ...options }); case API_KOBOLD_CPP: - return await koboldCppTokenize({ endpoint, signal, ...options }); + return await koboldCppTokenize({ endpoint, endpointAPIKey, signal, ...options }); case API_OPENAI_COMPAT: // These endpoints don't have a tokenenizer endpoint... if (new URL(endpoint).host === 'api.openai.com' || new URL(endpoint).host === 'api.together.xyz') @@ -1546,7 +1547,7 @@ case API_LLAMA_CPP: return yield* await llamaCppCompletion({ endpoint, endpointAPIKey, signal, ...options }); case API_KOBOLD_CPP: - return yield* await koboldCppCompletion({ endpoint, signal, ...options }); + return yield* await koboldCppCompletion({ endpoint, endpointAPIKey, signal, ...options }); case API_OPENAI_COMPAT: return yield* await openaiCompletion({ endpoint, endpointAPIKey, signal, ...options }); case API_AI_HORDE: @@ -1554,6 +1555,14 @@ } } +export async function* chatCompletion({ endpoint, endpointAPI, endpointAPIKey, signal, ...options }) { + endpoint = normalizeEndpoint(endpoint, endpointAPI); + switch (endpointAPI) { + case API_OPENAI_COMPAT: + return yield* await openaiChatCompletion({ endpoint, endpointAPIKey, signal, ...options }); + } +} + export async function abortCompletion({ endpoint, endpointAPI, ...options }) { endpoint = normalizeEndpoint(endpoint, endpointAPI); switch (endpointAPI) { @@ -1695,31 +1704,46 @@ }, body: JSON.stringify({ ...options, - stream: true, cache_prompt: true, }), signal, }); if (!res.ok) throw new Error(`HTTP ${res.status}`); - for await (const chunk of parseEventStream(res.body)) { - const probs = chunk.completion_probabilities?.[0]?.probs ?? []; - const prob = probs.find(p => p.tok_str === chunk.content)?.prob; - yield { - content: chunk.content, - ...(probs.length > 0 ? { - prob: prob ?? -1, - completion_probabilities: chunk.completion_probabilities - } : {}) - }; + if (options.stream) { + for await (const chunk of parseEventStream(res.body)) { + const probs = chunk.completion_probabilities?.[0]?.probs ?? []; + const prob = probs.find(p => p.tok_str === chunk.content)?.prob; + yield { + content: chunk.content, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: chunk.completion_probabilities + } : {}) + }; + } + } else { + const { completion_probabilities } = await res.json(); + for (const chunk of completion_probabilities) { + const probs = chunk.probs ?? []; + const prob = probs.find(p => p.tok_str === chunk.content)?.prob; + yield { + content: chunk.content, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [chunk] + } : {}) + }; + } } } -async function koboldCppTokenCount({ endpoint, proxyEndpoint, signal, ...options }) { +async function koboldCppTokenCount({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { const res = await fetch(`${proxyEndpoint ?? endpoint}/api/extra/tokencount`, { method: 'POST', headers: { 'Content-Type': 'application/json', + ...(endpointAPIKey ? { 'Authorization': `Bearer ${endpointAPIKey}` } : {}), ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ @@ -1733,11 +1757,12 @@ return value; } -async function koboldCppTokenize({ endpoint, proxyEndpoint, signal, ...options }) { +async function koboldCppTokenize({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { const res = await fetch(`${proxyEndpoint ?? endpoint}/api/extra/tokencount`, { method: 'POST', headers: { 'Content-Type': 'application/json', + ...(endpointAPIKey ? { 'Authorization': `Bearer ${endpointAPIKey}` } : {}), ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ @@ -1769,6 +1794,7 @@ } swapOption("n_ctx", "max_context_length"); swapOption("n_predict", "max_length"); + swapOption("n_probs", "logprobs"); swapOption("repeat_penalty", "rep_pen"); swapOption("repeat_last_n", "rep_pen_range"); swapOption("tfs_z", "tfs"); @@ -1779,23 +1805,48 @@ return options; } -async function* koboldCppCompletion({ endpoint, proxyEndpoint, signal, ...options }) { - const res = await fetch(`${proxyEndpoint ?? endpoint}/api/extra/generate/stream`, { +async function* koboldCppCompletion({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { + const res = await fetch(`${proxyEndpoint ?? endpoint}/api/${options.stream ? 'extra/generate/stream' : 'v1/generate'}`, { method: 'POST', headers: { 'Content-Type': 'application/json', + ...(endpointAPIKey ? { 'Authorization': `Bearer ${endpointAPIKey}` } : {}), ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ - ...koboldCppConvertOptions(options, endpoint), - stream: true, + ...koboldCppConvertOptions(options, endpoint) }), signal, }); if (!res.ok) throw new Error(`HTTP ${res.status}`); - for await (const chunk of parseEventStream(res.body)) { - yield { content: chunk.token }; + if (options.stream) { + for await (const chunk of parseEventStream(res.body)) { + yield { content: chunk.token }; + } + } else { + const { results } = await res.json(); + const chunks = results?.[0].logprobs?.content ?? []; + for (const chunk of chunks) { + const { token, top_logprobs } = chunk; + + const probs = Object.values(top_logprobs).map(({ token, logprob }) => ({ + tok_str: token, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === token)?.prob; + + yield { + content: token, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: token, + probs + }] + } : {}) + }; + } } } @@ -2017,7 +2068,7 @@ return data.map(item => item.id); } -function openaiConvertOptions(options, endpoint){ +function openaiConvertOptions(options, endpoint, isChat) { const isOpenAI = endpoint.toLowerCase().includes("openai.com"); const isTogetherAI = endpoint.toLowerCase().includes("together.xyz"); const isOpenRouter = endpoint.toLowerCase().includes("openrouter.ai"); @@ -2048,7 +2099,12 @@ } swapOption("n_ctx", "max_context_length"); swapOption("n_predict", "max_tokens"); - swapOption("n_probs", "logprobs"); + if (isChat) { + options.logprobs = true; + swapOption("n_probs", "top_logprobs"); + } else { + swapOption("n_probs", "logprobs"); + } swapOption("repeat_penalty", "repetition_penalty"); swapOption("repeat_last_n", "repetition_penalty_range"); swapOption("tfs_z", "tfs"); @@ -2067,37 +2123,168 @@ ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ - ...openaiConvertOptions(options, endpoint), - stream: true, + ...openaiConvertOptions(options, endpoint) }), signal, }); - if (!res.ok) - throw new Error(`HTTP ${res.status}`); - for await (const chunk of parseEventStream(res.body)) { - if (!chunk.choices || chunk.choices.length === 0) { - continue; // Skip if there are no choices (should never happen) + + if (!res.ok) { + let json; + try { + json = await res.json(); + } catch {} + if (json?.error?.message) { + throw new Error(json.error.message); } + throw new Error(`HTTP ${res.status}`); + } - const { text, logprobs } = chunk.choices[0]; - const top_logprobs = logprobs?.top_logprobs?.[0] ?? {}; - - const probs = Object.entries(top_logprobs).map(([tok, logprob]) => ({ - tok_str: tok, - prob: Math.exp(logprob) - })); - const prob = probs.find(p => p.tok_str === text)?.prob; + if (options.stream) { + for await (const chunk of parseEventStream(res.body)) { + if (!chunk.choices || chunk.choices.length === 0) { + if (chunk.content) { + yield { content: chunk.content }; + } + continue; + } + + const { text, logprobs } = chunk.choices[0]; + const top_logprobs = logprobs?.top_logprobs?.[0] ?? {}; + + const probs = Object.entries(top_logprobs).map(([tok, logprob]) => ({ + tok_str: tok, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === text)?.prob; + + yield { + content: text, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: text, + probs + }] + } : {}) + }; + } + } else { + const { choices } = await res.json(); + if (choices?.[0].logprobs?.tokens) { + const logprobs = choices?.[0].logprobs; + const chunks = Object.values(logprobs.tokens).map((token, i) => { + return { text: token, logprobs: { top_logprobs: [ logprobs.top_logprobs[i] ] } }; + }); + for (const chunk of chunks) { + const { text, logprobs } = chunk; + const top_logprobs = logprobs?.top_logprobs?.[0] ?? {}; + + const probs = Object.entries(top_logprobs).map(([tok, logprob]) => ({ + tok_str: tok, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === text)?.prob; - yield { - content: text, - ...(probs.length > 0 ? { - prob: prob ?? -1, - completion_probabilities: [{ + yield { content: text, - probs - }] - } : {}) - }; + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: text, + probs + }] + } : {}) + }; + } + } else if (choices?.[0].text) { + yield { content: choices[0].text }; + } + } +} + +async function* openaiChatCompletion({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { + const res = await fetch(`${proxyEndpoint ?? endpoint}/v1/chat/completions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${endpointAPIKey}`, + ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) + }, + body: JSON.stringify({ + ...openaiConvertOptions(options, endpoint, true) + }), + signal, + }); + + if (!res.ok) { + let json; + try { + json = await res.json(); + } catch {} + if (json?.error?.message) { + throw new Error(json.error.message); + } + throw new Error(`HTTP ${res.status}`); + } + + if (options.stream) { + for await (const chunk of parseEventStream(res.body)) { + const token = chunk.choices[0].delta.content; + const top_logprobs = chunk.choices[0].logprobs?.content?.[0]?.top_logprobs ?? {}; + if (!token) { + continue + } + + const probs = Object.values(top_logprobs).map(({ token, logprob }) => ({ + tok_str: token, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === token)?.prob; + + yield { + content: token, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: token, + probs + }] + } : {}) + }; + } + } else { + const { choices } = await res.json(); + const chunks = choices?.[0].logprobs?.content ?? []; + if (chunks.length) { + for (const chunk of chunks) { + const { token, top_logprobs } = chunk; + if (!token) { + continue + } + + const probs = Object.values(top_logprobs).map(({ token, logprob }) => ({ + tok_str: token, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === token)?.prob; + + yield { + content: token, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: token, + probs + }] + } : {}) + }; + } + } else { + const content = choices?.[0].message?.content; + if (content) { + yield { content: content }; + } + } } } @@ -2329,7 +2516,7 @@ function Checkbox({ label, value, hidden, onValueChange, ...props }) { return html` -