From 02d773bfd739dc9e6e368c309cfb21fb6b0abafe Mon Sep 17 00:00:00 2001 From: ZHallen122 Date: Sat, 2 Nov 2024 19:05:18 -0400 Subject: [PATCH 1/6] init basic system prompt --- llm-server/src/prompts/systemPrompt.ts | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 llm-server/src/prompts/systemPrompt.ts diff --git a/llm-server/src/prompts/systemPrompt.ts b/llm-server/src/prompts/systemPrompt.ts new file mode 100644 index 0000000..f1edd0e --- /dev/null +++ b/llm-server/src/prompts/systemPrompt.ts @@ -0,0 +1,7 @@ +// Define and export the system prompts object +export const systemPrompts = { + 'codefox-basic': { + systemPrompt: `You are CodeFox, an advanced and powerful AI specialized in code generation and software engineering. + Your purpose is to help developers build complete and efficient applications by providing well-structured, optimized, and maintainable code.`, + }, +}; From c22001dbf4cfff43bb1336d81ac8db4763781a3d Mon Sep 17 00:00:00 2001 From: ZHallen122 Date: Sat, 2 Nov 2024 19:06:06 -0400 Subject: [PATCH 2/6] make code directory look consistant --- llm-server/src/{prompts => prompt}/systemPrompt.ts | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llm-server/src/{prompts => prompt}/systemPrompt.ts (100%) diff --git a/llm-server/src/prompts/systemPrompt.ts b/llm-server/src/prompt/systemPrompt.ts similarity index 100% rename from llm-server/src/prompts/systemPrompt.ts rename to llm-server/src/prompt/systemPrompt.ts From 2baa14bb39bc0b1f9b7f43b171b83e37ac6b65e9 Mon Sep 17 00:00:00 2001 From: ZHallen122 Date: Sat, 2 Nov 2024 21:46:19 -0400 Subject: [PATCH 3/6] update pass model name --- backend/src/chat/chat.resolver.ts | 2 +- backend/src/chat/chat.service.ts | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/backend/src/chat/chat.resolver.ts b/backend/src/chat/chat.resolver.ts index 431c991..57fc8bf 100644 --- a/backend/src/chat/chat.resolver.ts +++ b/backend/src/chat/chat.resolver.ts @@ -53,7 +53,7 @@ export class ChatResolver { MessageRole.User, ); - const iterator = this.chatProxyService.streamChat(input.message); + const iterator = this.chatProxyService.streamChat(input); let accumulatedContent = ''; for await (const chunk of iterator) { diff --git a/backend/src/chat/chat.service.ts b/backend/src/chat/chat.service.ts index 0e5579b..e1b356a 100644 --- a/backend/src/chat/chat.service.ts +++ b/backend/src/chat/chat.service.ts @@ -5,7 +5,11 @@ import { Message, MessageRole } from 'src/chat/message.model'; import { InjectRepository } from '@nestjs/typeorm'; import { Repository } from 'typeorm'; import { User } from 'src/user/user.model'; -import { NewChatInput, UpdateChatTitleInput } from 'src/chat/dto/chat.input'; +import { + ChatInput, + NewChatInput, + UpdateChatTitleInput, +} from 'src/chat/dto/chat.input'; type CustomAsyncIterableIterator = AsyncIterator & { [Symbol.asyncIterator](): AsyncIterableIterator; @@ -17,8 +21,12 @@ export class ChatProxyService { constructor(private httpService: HttpService) {} - streamChat(input: string): CustomAsyncIterableIterator { - this.logger.debug('request chat input: ' + input); + streamChat( + input: ChatInput, + ): CustomAsyncIterableIterator { + this.logger.debug( + `Request chat input: ${input.message} with model: ${input.model}`, + ); let isDone = false; let responseSubscription: any; const chunkQueue: ChatCompletionChunk[] = []; @@ -60,7 +68,7 @@ export class ChatProxyService { responseSubscription = this.httpService .post( 'http://localhost:3001/chat/completion', - { content: input }, + { content: input.message, model: input.model }, { responseType: 'stream' }, ) .subscribe({ From c24518f2b9663f659c7aabdbef6be9c3a072b780 Mon Sep 17 00:00:00 2001 From: ZHallen122 Date: Sat, 2 Nov 2024 21:46:50 -0400 Subject: [PATCH 4/6] update to use system prompt --- llm-server/src/llm-provider.ts | 5 +++-- llm-server/src/main.ts | 14 +++++++++++-- llm-server/src/model/llama-model-provider.ts | 5 +++-- llm-server/src/model/model-provider.ts | 3 ++- llm-server/src/model/openai-model-provider.ts | 20 ++++++++++++++++--- llm-server/src/type/GenerateMessage.ts | 5 +++++ 6 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 llm-server/src/type/GenerateMessage.ts diff --git a/llm-server/src/llm-provider.ts b/llm-server/src/llm-provider.ts index 2286a9d..f47f999 100644 --- a/llm-server/src/llm-provider.ts +++ b/llm-server/src/llm-provider.ts @@ -3,6 +3,7 @@ import { ModelProvider } from './model/model-provider'; import { OpenAIModelProvider } from './model/openai-model-provider'; import { LlamaModelProvider } from './model/llama-model-provider'; import { Logger } from '@nestjs/common'; +import { GenerateMessageParams } from './type/GenerateMessage'; export interface ChatMessageInput { content: string; @@ -32,10 +33,10 @@ export class LLMProvider { } async generateStreamingResponse( - content: string, + params: GenerateMessageParams, res: Response, ): Promise { - await this.modelProvider.generateStreamingResponse(content, res); + await this.modelProvider.generateStreamingResponse(params, res); } async getModelTags(res: Response): Promise { diff --git a/llm-server/src/main.ts b/llm-server/src/main.ts index f062f05..fbc7391 100644 --- a/llm-server/src/main.ts +++ b/llm-server/src/main.ts @@ -1,6 +1,7 @@ import { Logger } from '@nestjs/common'; import { ChatMessageInput, LLMProvider } from './llm-provider'; import express, { Express, Request, Response } from 'express'; +import { GenerateMessageParams } from './type/GenerateMessage'; export class App { private readonly logger = new Logger(App.name); @@ -27,13 +28,22 @@ export class App { this.logger.log('Received chat request.'); try { this.logger.debug(JSON.stringify(req.body)); - const { content } = req.body as ChatMessageInput; + const { content, model } = req.body as ChatMessageInput & { + model: string; + }; + + const params: GenerateMessageParams = { + model: model || 'gpt-3.5-turbo', // Default to 'gpt-3.5-turbo' if model is not provided + message: content, + role: 'user', + }; + this.logger.debug(`Request content: "${content}"`); res.setHeader('Content-Type', 'text/event-stream'); res.setHeader('Cache-Control', 'no-cache'); res.setHeader('Connection', 'keep-alive'); this.logger.debug('Response headers set for streaming.'); - await this.llmProvider.generateStreamingResponse(content, res); + await this.llmProvider.generateStreamingResponse(params, res); } catch (error) { this.logger.error('Error in chat endpoint:', error); res.status(500).json({ error: 'Internal server error' }); diff --git a/llm-server/src/model/llama-model-provider.ts b/llm-server/src/model/llama-model-provider.ts index 07a24b7..b4274d2 100644 --- a/llm-server/src/model/llama-model-provider.ts +++ b/llm-server/src/model/llama-model-provider.ts @@ -8,6 +8,7 @@ import { } from 'node-llama-cpp'; import { ModelProvider } from './model-provider.js'; import { Logger } from '@nestjs/common'; +import { GenerateMessageParams } from '../type/GenerateMessage'; //TODO: using protocol class export class LlamaModelProvider extends ModelProvider { @@ -33,7 +34,7 @@ export class LlamaModelProvider extends ModelProvider { } async generateStreamingResponse( - content: string, + { model, message, role = 'user' }: GenerateMessageParams, res: Response, ): Promise { this.logger.log('Generating streaming response with Llama...'); @@ -44,7 +45,7 @@ export class LlamaModelProvider extends ModelProvider { let chunkCount = 0; const startTime = Date.now(); try { - await session.prompt(content, { + await session.prompt(message, { onTextChunk: chunk => { chunkCount++; this.logger.debug(`Sending chunk #${chunkCount}: "${chunk}"`); diff --git a/llm-server/src/model/model-provider.ts b/llm-server/src/model/model-provider.ts index 07f6a0b..4d82329 100644 --- a/llm-server/src/model/model-provider.ts +++ b/llm-server/src/model/model-provider.ts @@ -1,9 +1,10 @@ import { Response } from 'express'; +import { GenerateMessageParams } from '../type/GenerateMessage'; export abstract class ModelProvider { abstract initialize(): Promise; abstract generateStreamingResponse( - content: string, + params: GenerateMessageParams, res: Response, ): Promise; diff --git a/llm-server/src/model/openai-model-provider.ts b/llm-server/src/model/openai-model-provider.ts index f48c30f..b3e8c17 100644 --- a/llm-server/src/model/openai-model-provider.ts +++ b/llm-server/src/model/openai-model-provider.ts @@ -2,6 +2,10 @@ import { Response } from 'express'; import OpenAI from 'openai'; import { ModelProvider } from './model-provider'; import { Logger } from '@nestjs/common'; +import { systemPrompts } from '../prompt/systemPrompt'; +import { ChatCompletionMessageParam } from 'openai/resources/chat/completions'; +import { GenerateMessageParams } from '../type/GenerateMessage'; + export class OpenAIModelProvider extends ModelProvider { private readonly logger = new Logger(OpenAIModelProvider.name); private openai: OpenAI; @@ -15,21 +19,31 @@ export class OpenAIModelProvider extends ModelProvider { } async generateStreamingResponse( - content: string, + { model, message, role = 'user' }: GenerateMessageParams, res: Response, ): Promise { this.logger.log('Generating streaming response with OpenAI...'); const startTime = Date.now(); + // Set SSE headers res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', Connection: 'keep-alive', }); + + // Get the system prompt based on the model + const systemPrompt = systemPrompts['codefox-basic']?.systemPrompt || ''; + + // Prepare the messages array, including system prompt if available + const messages: ChatCompletionMessageParam[] = systemPrompt + ? [{ role: 'system', content: systemPrompt }] + : [{ role: role as 'user' | 'system' | 'assistant', content: message }]; + try { const stream = await this.openai.chat.completions.create({ - model: 'gpt-3.5-turbo', - messages: [{ role: 'user', content: content }], + model, + messages, stream: true, }); let chunkCount = 0; diff --git a/llm-server/src/type/GenerateMessage.ts b/llm-server/src/type/GenerateMessage.ts new file mode 100644 index 0000000..c7d8f6d --- /dev/null +++ b/llm-server/src/type/GenerateMessage.ts @@ -0,0 +1,5 @@ +export interface GenerateMessageParams { + model: string; // Model to use, e.g., 'gpt-3.5-turbo' + message: string; // User's message or query + role?: 'user' | 'system' | 'assistant' | 'tool' | 'function'; // Optional role +} From 445153440179593d9fba453f158a0788e586896c Mon Sep 17 00:00:00 2001 From: Jackson Chen <541898146chen@gmail.com> Date: Sun, 3 Nov 2024 18:11:29 -0600 Subject: [PATCH 5/6] feat: Refactor OpenAIModelProvider to improve streaming response This commit refactors the OpenAIModelProvider class to improve the streaming response when generating chat completions with OpenAI. It updates the messages array to include the system prompt and user message, and removes unnecessary comments. This change aims to enhance the overall performance and reliability of the streaming response feature. Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- llm-server/src/model/openai-model-provider.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llm-server/src/model/openai-model-provider.ts b/llm-server/src/model/openai-model-provider.ts index b3e8c17..93c990c 100644 --- a/llm-server/src/model/openai-model-provider.ts +++ b/llm-server/src/model/openai-model-provider.ts @@ -35,10 +35,10 @@ export class OpenAIModelProvider extends ModelProvider { // Get the system prompt based on the model const systemPrompt = systemPrompts['codefox-basic']?.systemPrompt || ''; - // Prepare the messages array, including system prompt if available - const messages: ChatCompletionMessageParam[] = systemPrompt - ? [{ role: 'system', content: systemPrompt }] - : [{ role: role as 'user' | 'system' | 'assistant', content: message }]; + const messages: ChatCompletionMessageParam[] = [ + { role: 'system', content: systemPrompt }, + { role: role as 'user' | 'system' | 'assistant', content: message }, + ]; try { const stream = await this.openai.chat.completions.create({ @@ -46,6 +46,7 @@ export class OpenAIModelProvider extends ModelProvider { messages, stream: true, }); + let chunkCount = 0; for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content || ''; @@ -55,6 +56,7 @@ export class OpenAIModelProvider extends ModelProvider { res.write(`data: ${JSON.stringify(chunk)}\n\n`); } } + const endTime = Date.now(); this.logger.log( `Response generation completed. Total chunks: ${chunkCount}`, @@ -73,20 +75,18 @@ export class OpenAIModelProvider extends ModelProvider { async getModelTagsResponse(res: Response): Promise { this.logger.log('Fetching available models from OpenAI...'); - // Set SSE headers res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', Connection: 'keep-alive', }); + try { const startTime = Date.now(); const models = await this.openai.models.list(); - const response = { - models: models, // Wrap the models in the required structure + models: models, }; - const endTime = Date.now(); this.logger.log( `Model fetching completed. Total models: ${models.data.length}`, From d6eee3c9b958601dd03f042f727147d69b09148b Mon Sep 17 00:00:00 2001 From: ZHallen122 Date: Sun, 3 Nov 2024 19:24:16 -0500 Subject: [PATCH 6/6] add system prompt to llama --- llm-server/src/model/llama-model-provider.ts | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/llm-server/src/model/llama-model-provider.ts b/llm-server/src/model/llama-model-provider.ts index b4274d2..2b25159 100644 --- a/llm-server/src/model/llama-model-provider.ts +++ b/llm-server/src/model/llama-model-provider.ts @@ -8,6 +8,8 @@ import { } from 'node-llama-cpp'; import { ModelProvider } from './model-provider.js'; import { Logger } from '@nestjs/common'; +import { systemPrompts } from '../prompt/systemPrompt'; +import { ChatCompletionMessageParam } from 'openai/resources/chat/completions'; import { GenerateMessageParams } from '../type/GenerateMessage'; //TODO: using protocol class @@ -44,8 +46,22 @@ export class LlamaModelProvider extends ModelProvider { this.logger.log('LlamaChatSession created.'); let chunkCount = 0; const startTime = Date.now(); + + // Get the system prompt based on the model + const systemPrompt = systemPrompts['codefox-basic']?.systemPrompt || ''; + + const messages = [ + { role: 'system', content: systemPrompt }, + { role: role as 'user' | 'system' | 'assistant', content: message }, + ]; + + // Convert messages array to a single formatted string for Llama + const formattedPrompt = messages + .map(({ role, content }) => `${role}: ${content}`) + .join('\n'); + try { - await session.prompt(message, { + await session.prompt(formattedPrompt, { onTextChunk: chunk => { chunkCount++; this.logger.debug(`Sending chunk #${chunkCount}: "${chunk}"`);