From 621b91409afaf658cb98b7be6af30a4566b613d6 Mon Sep 17 00:00:00 2001 From: SuZhou-Joe Date: Wed, 13 Dec 2023 17:13:57 +0800 Subject: [PATCH] feat: more robust parser to get suggested actions (#65) * feat: more robust parser to get suggested actions Signed-off-by: SuZhou-Joe * feat: optimize code and optimize performance Signed-off-by: SuZhou-Joe --------- Signed-off-by: SuZhou-Joe --- .../parsers/basic_input_output_parser.test.ts | 134 +++++++++++++++--- server/parsers/basic_input_output_parser.ts | 87 ++++++++++-- .../agent_framework_storage_service.ts | 5 +- server/types.ts | 6 +- server/utils/message_parser_runner.ts | 6 +- 5 files changed, 204 insertions(+), 34 deletions(-) diff --git a/server/parsers/basic_input_output_parser.test.ts b/server/parsers/basic_input_output_parser.test.ts index 0e6c7420..549cbacc 100644 --- a/server/parsers/basic_input_output_parser.test.ts +++ b/server/parsers/basic_input_output_parser.test.ts @@ -3,17 +3,20 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { BasicInputOutputParser } from './basic_input_output_parser'; +import { BasicInputOutputParser, parseSuggestedActions } from './basic_input_output_parser'; describe('BasicInputOutputParser', () => { it('return input and output', async () => { + const item = { + input: 'input', + response: 'response', + conversation_id: '', + interaction_id: 'interaction_id', + create_time: '', + }; expect( - await BasicInputOutputParser.parserProvider({ - input: 'input', - response: 'response', - conversation_id: '', - interaction_id: 'interaction_id', - create_time: '', + await BasicInputOutputParser.parserProvider(item, { + interactions: [item], }) ).toEqual([ { @@ -32,16 +35,19 @@ describe('BasicInputOutputParser', () => { }); it('return suggestions when additional_info has related info', async () => { + const item = { + input: 'input', + response: 'response', + conversation_id: '', + interaction_id: 'interaction_id', + create_time: '', + additional_info: { + 'QuestionSuggestor.output': '["Foo", "Bar"]', + }, + }; expect( - await BasicInputOutputParser.parserProvider({ - input: 'input', - response: 'response', - conversation_id: '', - interaction_id: 'interaction_id', - create_time: '', - additional_info: { - 'QuestionSuggestor.output': '["Foo", "Bar"]', - }, + await BasicInputOutputParser.parserProvider(item, { + interactions: [item], }) ).toEqual([ { @@ -68,14 +74,54 @@ describe('BasicInputOutputParser', () => { ]); }); + it("should only parse latest interaction's suggestions field", async () => { + const item = { + input: 'input', + response: 'response', + conversation_id: '', + interaction_id: 'interaction_id', + create_time: '', + additional_info: { + 'QuestionSuggestor.output': '["Foo", "Bar"]', + }, + }; + expect( + await BasicInputOutputParser.parserProvider(item, { + interactions: [ + item, + { + ...item, + interaction_id: 'foo', + }, + ], + }) + ).toEqual([ + { + type: 'input', + contentType: 'text', + content: 'input', + }, + { + type: 'output', + contentType: 'markdown', + content: 'response', + traceId: 'interaction_id', + suggestedActions: [], + }, + ]); + }); + it('sanitizes markdown outputs', async () => { - const outputs = await BasicInputOutputParser.parserProvider({ + const item = { input: 'test question', response: 'normal text image !!!!!!![](http://evil.com/) ![image](http://evil.com/) [good link](https://link)', conversation_id: 'test-session', interaction_id: 'interaction_id', create_time: '', + }; + const outputs = await BasicInputOutputParser.parserProvider(item, { + interactions: [item], }); expect(outputs).toEqual([ @@ -95,3 +141,57 @@ describe('BasicInputOutputParser', () => { ]); }); }); + +describe('parseSuggestedActions', () => { + it('should return parsed array when string matches valid json format', () => { + expect(parseSuggestedActions('["1", "2"]')).toEqual(['1', '2']); + }); + + it('should return parsed array when there is json inside the string', () => { + expect(parseSuggestedActions('Here are result { "response": ["1", "2"] }')).toEqual(['1', '2']); + }); + + it('should return parsed array when there is {} inside the string', () => { + expect(parseSuggestedActions('Here are result { "response": ["{1}", "{2}"] }')).toEqual([ + '{1}', + '{2}', + ]); + }); + + it('should return parsed array when there is additional field in response', () => { + expect( + parseSuggestedActions('Here are result { "response": ["{1}", "{2}"], "foo": "bar" }') + ).toEqual(['{1}', '{2}']); + }); + + it('should return empty array when value is not a string array', () => { + expect(parseSuggestedActions('[{ "a": 1 }, "{2}"]')).toEqual([]); + }); + + it('should return empty array when there is no json-like string inside', () => { + expect(parseSuggestedActions('Here are result "response": ["1", "2"], "foo": "bar" }')).toEqual( + [] + ); + }); + + it('should return empty array when the json-like string is invalid', () => { + expect( + parseSuggestedActions('Here are result { response": ["{1}", "{2}"], "foo": "bar" }') + ).toEqual([]); + }); + + it('should return empty array when the key is not response', () => { + expect( + parseSuggestedActions('Here are result { "result": ["{1}", "{2}"], "foo": "bar" }') + ).toEqual([]); + }); + + it('should return empty array when the json is invalid', () => { + expect(parseSuggestedActions('Here are result { "response": 1, "foo": "bar" }')).toEqual([]); + }); + + it('should return empty array when input is not valid', () => { + expect(parseSuggestedActions('')).toEqual([]); + expect(parseSuggestedActions((null as unknown) as string)).toEqual([]); + }); +}); diff --git a/server/parsers/basic_input_output_parser.ts b/server/parsers/basic_input_output_parser.ts index d71e26f3..a43d090d 100644 --- a/server/parsers/basic_input_output_parser.ts +++ b/server/parsers/basic_input_output_parser.ts @@ -5,7 +5,8 @@ import createDOMPurify from 'dompurify'; import { JSDOM } from 'jsdom'; -import { IInput, IOutput, Interaction } from '../../common/types/chat_saved_object_attributes'; +import { IInput, IOutput } from '../../common/types/chat_saved_object_attributes'; +import { MessageParser } from '../types'; const sanitize = (content: string) => { const window = new JSDOM('').window; @@ -13,19 +14,81 @@ const sanitize = (content: string) => { return DOMPurify.sanitize(content, { FORBID_TAGS: ['img'] }).replace(/!+\[/g, '['); }; -export const BasicInputOutputParser = { +const isStringArray = (array?: unknown): array is string[] => + Array.isArray(array) && array.every((item) => typeof item === 'string'); + +export const parseSuggestedActions = (value: string): string[] => { + if (!value) { + return []; + } + const suggestedOutputString = value; + let suggestedActions: string[] = []; + try { + suggestedActions = JSON.parse(suggestedOutputString); + } catch (e) { + suggestedActions = []; + } + + if (suggestedActions.length) { + if (isStringArray(suggestedActions)) { + return suggestedActions; + } + + return []; + } + + /** + * Get json-like substring from a string + * + * /\{ // Match an opening curly brace + * .* // Match any preleading spaces and letters + * response[^\n]*\: // Match "response" key because suggestion tool uses { response: [action1, action2] } + * // in its prompt so that the parsedResult may contain a "response" field. + * // If prompt changed, the logic here need to change accordingly. + * .* // Match any any string behind the "response:" + * \}/g // Match a closing curly brace, and 'g' flag for global search + * + */ + const jsonPattern = /\{.*response[^\n]*\:.*\}/g; + + /** + * Use the regular expression to find the JSON substring + */ + const match = value.match(jsonPattern); + + const matchedResult = match && match[0]; + + if (!matchedResult) { + return []; + } + + try { + const parsedResult = JSON.parse(matchedResult); + + if (parsedResult?.response && isStringArray(parsedResult.response)) { + return parsedResult.response; + } + } catch (e) { + return []; + } + + return []; +}; + +export const BasicInputOutputParser: MessageParser = { order: 0, id: 'output_message', - async parserProvider(interaction: Interaction) { - const suggestedOutputString = interaction.additional_info?.['QuestionSuggestor.output'] as - | string - | null; - let suggestedActions: string[] = []; - try { - suggestedActions = JSON.parse(suggestedOutputString || '[]'); - } catch (e) { - suggestedActions = []; - } + async parserProvider(interaction, options) { + /** + * From UX, only the last interaction need to parse suggestedActions. + */ + const isLatestInteraction = + options.interactions.reverse()[0]?.interaction_id === interaction.interaction_id; + const suggestedActions = isLatestInteraction + ? parseSuggestedActions( + (interaction.additional_info?.['QuestionSuggestor.output'] as string | null) || '' + ) + : []; const inputItem: IInput = { type: 'input', contentType: 'text', diff --git a/server/services/storage/agent_framework_storage_service.ts b/server/services/storage/agent_framework_storage_service.ts index c7a2c4be..942325c7 100644 --- a/server/services/storage/agent_framework_storage_service.ts +++ b/server/services/storage/agent_framework_storage_service.ts @@ -56,7 +56,10 @@ export class AgentFrameworkStorageService implements StorageService { let finalMessages: IMessage[] = []; for (const interaction of finalInteractions) { - finalMessages = [...finalMessages, ...(await messageParserRunner.run(interaction))]; + finalMessages = [ + ...finalMessages, + ...(await messageParserRunner.run(interaction, { interactions: finalInteractions })), + ]; } return { title: conversation.body.name, diff --git a/server/types.ts b/server/types.ts index 3e93ce8f..719d76c2 100644 --- a/server/types.ts +++ b/server/types.ts @@ -11,6 +11,10 @@ export interface AssistantPluginSetup {} // eslint-disable-next-line @typescript-eslint/no-empty-interface export interface AssistantPluginStart {} +export interface ProviderOptions { + interactions: Interaction[]; +} + export interface MessageParser { /** * The id of the parser, should be unique among the parsers. @@ -26,7 +30,7 @@ export interface MessageParser { /** * parserProvider is the callback that will be triggered in each message */ - parserProvider: (interaction: Interaction) => Promise; + parserProvider: (interaction: Interaction, options: ProviderOptions) => Promise; } export interface RoutesOptions { diff --git a/server/utils/message_parser_runner.ts b/server/utils/message_parser_runner.ts index 60534247..29b865ca 100644 --- a/server/utils/message_parser_runner.ts +++ b/server/utils/message_parser_runner.ts @@ -4,11 +4,11 @@ */ import { IMessage, Interaction } from '../../common/types/chat_saved_object_attributes'; -import { MessageParser } from '../types'; +import { MessageParser, ProviderOptions } from '../types'; export class MessageParserRunner { constructor(private readonly messageParsers: MessageParser[]) {} - async run(interaction: Interaction): Promise { + async run(interaction: Interaction, options: ProviderOptions): Promise { const sortedParsers = [...this.messageParsers]; sortedParsers.sort((parserA, parserB) => { const { order: orderA = 999 } = parserA; @@ -19,7 +19,7 @@ export class MessageParserRunner { for (const messageParser of sortedParsers) { let tempResult: IMessage[] = []; try { - tempResult = await messageParser.parserProvider(interaction); + tempResult = await messageParser.parserProvider(interaction, options); /** * Make sure the tempResult is an array. */