Skip to content

Commit

Permalink
feat: 更新openai兼容格式API的图片输入能力
Browse files Browse the repository at this point in the history
  • Loading branch information
TBXark committed Dec 12, 2024
1 parent cad6e6e commit 11a0673
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 30 deletions.
2 changes: 1 addition & 1 deletion dist/buildinfo.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 19 additions & 12 deletions dist/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions packages/lib/core/src/agent/azure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import type {
ImageAgent,
LLMChatParams,
} from './types';
import { renderOpenAIMessages } from './openai';
import { ImageSupportFormat, renderOpenAIMessages } from './openai';
import { requestChatCompletions } from './request';
import { convertStringToResponseMessages, loadModelsList } from './utils';

Expand Down Expand Up @@ -45,7 +45,7 @@ export class AzureChatAI extends AzureBase implements ChatAgent {
};
const body = {
...context.OPENAI_API_EXTRA_PARAMS,
messages: await renderOpenAIMessages(prompt, messages, true),
messages: await renderOpenAIMessages(prompt, messages, [ImageSupportFormat.URL, ImageSupportFormat.BASE64]),
stream: onStream != null,
};
return convertStringToResponseMessages(requestChatCompletions(url, header, body, onStream));
Expand Down
2 changes: 1 addition & 1 deletion packages/lib/core/src/agent/cohere.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export class Cohere implements ChatAgent {
'Accept': onStream !== null ? 'text/event-stream' : 'application/json',
};
const body = {
messages: await renderOpenAIMessages(prompt, messages),
messages: await renderOpenAIMessages(prompt, messages, null),
model: context.COHERE_CHAT_MODEL,
stream: onStream != null,
};
Expand Down
4 changes: 2 additions & 2 deletions packages/lib/core/src/agent/gemini.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { AgentUserConfig } from '#/config';
import type { ChatAgent, ChatAgentResponse, ChatStreamTextHandler, LLMChatParams } from './types';
import { renderOpenAIMessages } from './openai';
import { ImageSupportFormat, renderOpenAIMessages } from './openai';
import { requestChatCompletions } from './request';
import { convertStringToResponseMessages, loadModelsList } from './utils';

Expand All @@ -25,7 +25,7 @@ export class Gemini implements ChatAgent {
'Accept': onStream !== null ? 'text/event-stream' : 'application/json',
};
const body = {
messages: await renderOpenAIMessages(prompt, messages),
messages: await renderOpenAIMessages(prompt, messages, [ImageSupportFormat.BASE64]),
model: context.GOOGLE_COMPLETIONS_MODEL,
stream: onStream != null,
};
Expand Down
4 changes: 2 additions & 2 deletions packages/lib/core/src/agent/mistralai.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { AgentUserConfig } from '#/config';
import type { ChatAgent, ChatAgentResponse, ChatStreamTextHandler, LLMChatParams } from './types';
import { renderOpenAIMessages } from './openai';
import { ImageSupportFormat, renderOpenAIMessages } from './openai';
import { requestChatCompletions } from './request';
import { convertStringToResponseMessages, loadModelsList } from './utils';

Expand All @@ -26,7 +26,7 @@ export class Mistral implements ChatAgent {

const body = {
model: context.MISTRAL_CHAT_MODEL,
messages: await renderOpenAIMessages(prompt, messages),
messages: await renderOpenAIMessages(prompt, messages, [ImageSupportFormat.URL]),
stream: onStream != null,
};

Expand Down
22 changes: 15 additions & 7 deletions packages/lib/core/src/agent/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ import { imageToBase64String, renderBase64DataURI } from '#/utils/image';
import { requestChatCompletions } from './request';
import { convertStringToResponseMessages, extractImageContent, loadModelsList } from './utils';

async function renderOpenAIMessage(item: HistoryItem, supportImage?: boolean): Promise<any> {
export enum ImageSupportFormat {
URL = 'url',
BASE64 = 'base64',
}

async function
renderOpenAIMessage(item: HistoryItem, supportImage?: ImageSupportFormat[] | null): Promise<any> {
const res: any = {
role: item.role,
content: item.content,
Expand All @@ -26,17 +32,19 @@ async function renderOpenAIMessage(item: HistoryItem, supportImage?: boolean): P
break;
case 'image':
if (supportImage) {
const isSupportURL = supportImage.includes(ImageSupportFormat.URL);
const isSupportBase64 = supportImage.includes(ImageSupportFormat.BASE64);
const data = extractImageContent(content.image);
if (data.url) {
if (ENV.TELEGRAM_IMAGE_TRANSFER_MODE === 'base64') {
if (ENV.TELEGRAM_IMAGE_TRANSFER_MODE === 'base64' && isSupportBase64) {
contents.push(await imageToBase64String(data.url).then((data) => {
return { type: 'image_url', image_url: { url: renderBase64DataURI(data) } };
}));
} else {
} else if (isSupportURL) {
contents.push({ type: 'image_url', image_url: { url: data.url } });
}
} else if (data.base64) {
contents.push({ type: 'image_url', image_url: { url: data.base64 } });
} else if (data.base64 && isSupportBase64) {
contents.push({ type: 'image_base64', image_base64: { base64: data.base64 } });
}
}
break;
Expand All @@ -49,7 +57,7 @@ async function renderOpenAIMessage(item: HistoryItem, supportImage?: boolean): P
return res;
}

export async function renderOpenAIMessages(prompt: string | undefined, items: HistoryItem[], supportImage?: boolean): Promise<any[]> {
export async function renderOpenAIMessages(prompt: string | undefined, items: HistoryItem[], supportImage?: ImageSupportFormat[] | null): Promise<any[]> {
const messages = await Promise.all(items.map(r => renderOpenAIMessage(r, supportImage)));
if (prompt) {
if (messages.length > 0 && messages[0].role === 'system') {
Expand Down Expand Up @@ -93,7 +101,7 @@ export class OpenAI extends OpenAIBase implements ChatAgent {
const body = {
model: context.OPENAI_CHAT_MODEL,
...context.OPENAI_API_EXTRA_PARAMS,
messages: await renderOpenAIMessages(prompt, messages, true),
messages: await renderOpenAIMessages(prompt, messages, [ImageSupportFormat.URL, ImageSupportFormat.BASE64]),
stream: onStream != null,
};

Expand Down
2 changes: 1 addition & 1 deletion packages/lib/core/src/agent/workersai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export class WorkersChat extends WorkerBase implements ChatAgent {
Authorization: `Bearer ${token}`,
};
const body = {
messages: await renderOpenAIMessages(prompt, messages),
messages: await renderOpenAIMessages(prompt, messages, null),
stream: onStream !== null,
};

Expand Down
4 changes: 2 additions & 2 deletions packages/lib/core/src/config/version.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
export const BUILD_TIMESTAMP = 1733984023;
export const BUILD_VERSION = '84e3212';
export const BUILD_TIMESTAMP = 1733987738;
export const BUILD_VERSION = 'e312e62';
8 changes: 8 additions & 0 deletions packages/lib/core/src/utils/image/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,11 @@ export async function imageToBase64String(url: string): Promise<Base64DataWithFo
export function renderBase64DataURI(params: Base64DataWithFormat): string {
return `data:${params.format};base64,${params.data}`;
}

export function extraBase64DataFromBase64URI(dataURI: string): Base64DataWithFormat {
const [format, data] = dataURI.split(';base64,');
return {
format: format.replace('data:', ''),
data,
};
}

0 comments on commit 11a0673

Please sign in to comment.