From b11d4055c11091eeb80f53b2c8fcf385f81bf555 Mon Sep 17 00:00:00 2001 From: Liqun Li Date: Mon, 23 Dec 2024 14:48:18 +0800 Subject: [PATCH] Optimize Planner's reasoning capability (#450) --- README.md | 3 +- .../planner_examples/example-planner-2.yaml | 19 +++++----- .../example-planner-echo.yaml | 9 +++-- .../planner_examples/example-planner.yaml | 35 ++++++++++--------- taskweaver/memory/attachment.py | 2 +- taskweaver/planner/planner_prompt.yaml | 17 +++++---- 6 files changed, 47 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index b7b8a1c3..9a36b040 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Unlike many agent frameworks that only track the chat history with LLMs in text, ## 🆕 News +- 📅2024-12-23: TaskWeaver has been integrated with the [AgentOps](https://microsoft.github.io/TaskWeaver/docs/observability) for better observability and monitoring.🔍 - 📅2024-09-13: We introduce the shared memory to store information that is shared between the roles in TaskWeaver. Please check the [memory](https://microsoft.github.io/TaskWeaver/docs/memory) for more details.🧠 - 📅2024-09-13: We have enhanced the experience feature by allowing static and dynamic experience selection. Please check the [experience](https://microsoft.github.io/TaskWeaver/blog/experience) for more details.📚 - 📅2024-07-02: We have optimized TaskWeaver to support not-that-large language models served locally. Please check this [post](https://microsoft.github.io/TaskWeaver/blog/local_llm) for more details.🔗 @@ -31,7 +32,7 @@ Unlike many agent frameworks that only track the chat history with LLMs in text, - 📅2024-03-27: TaskWeaver now switches to `container` mode by default for code execution. Please check the [code execution](https://microsoft.github.io/TaskWeaver/docs/code_execution) for more details.🐳 - 📅2024-03-07: TaskWeaver now supports configuration of different LLMs for various components, such as the Planner and CodeInterpreter. Please check the [multi-llm](https://microsoft.github.io/TaskWeaver/docs/llms/multi-llm) for more details.🔗 - 📅2024-03-04: TaskWeaver now supports a [container](https://microsoft.github.io/TaskWeaver/docs/code_execution) mode, which provides a more secure environment for code execution.🐳 -- 📅2024-02-28: TaskWeaver now offers a [CLI-only](https://microsoft.github.io/TaskWeaver/docs/advanced/cli_only) mode, enabling users to interact seamlessly with the Command Line Interface (CLI) using natural language.📟 + diff --git a/project/examples/planner_examples/example-planner-2.yaml b/project/examples/planner_examples/example-planner-2.yaml index 66e67e98..5bd4f173 100644 --- a/project/examples/planner_examples/example-planner-2.yaml +++ b/project/examples/planner_examples/example-planner-2.yaml @@ -11,11 +11,14 @@ rounds: send_from: Planner send_to: User attachment_list: - - type: init_plan - content: |- - 1. Respond to the user's greeting - - type: plan - content: |- - 1. Respond to the user's greeting - - type: current_plan_step - content: 1. Respond to the user's greeting \ No newline at end of file + - type: reasoning + content: |- + The user greets the Planner + - type: init_plan + content: |- + 1. Respond to the user's greeting + - type: plan + content: |- + 1. Respond to the user's greeting + - type: current_plan_step + content: 1. Respond to the user's greeting \ No newline at end of file diff --git a/project/examples/planner_examples/example-planner-echo.yaml b/project/examples/planner_examples/example-planner-echo.yaml index 0971d373..06ffd29b 100644 --- a/project/examples/planner_examples/example-planner-echo.yaml +++ b/project/examples/planner_examples/example-planner-echo.yaml @@ -11,6 +11,9 @@ rounds: send_from: Planner send_to: Echo attachment_list: + - type: reasoning + content: |- + The user wants to echo the input 'Hello World' - type: init_plan content: |- 1. Ask Echo to echo the user's input, 'Hello World' @@ -27,6 +30,9 @@ rounds: send_from: Planner send_to: User attachment_list: + - type: reasoning + content: |- + The user query is successfully answered - type: init_plan content: |- 1. Ask Echo to echo the user's input, 'Hello World' @@ -35,7 +41,4 @@ rounds: 1. Ask Echo to echo the user's input, 'Hello World' - type: current_plan_step content: 1. Ask Echo to echo the user's input, 'Hello World' - - type: review - content: |- - The user query is successfully answered diff --git a/project/examples/planner_examples/example-planner.yaml b/project/examples/planner_examples/example-planner.yaml index bf2ef921..68f2cc9d 100644 --- a/project/examples/planner_examples/example-planner.yaml +++ b/project/examples/planner_examples/example-planner.yaml @@ -11,17 +11,20 @@ rounds: send_from: Planner send_to: CodeInterpreter attachment_list: - - type: init_plan - content: |- - 1. Load the data file - 2. Count the rows of the loaded data - 3. Check the execution result and report the result to the user - - type: plan - content: |- - 1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data - 2. Check the execution result and report the result to the user if it is correct - - type: current_plan_step - content: 1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data + - type: reasoning + content: |- + The user wants to count the rows of the data file /home/data.csv. The first step is to load the data file and count the rows of the loaded data. + - type: init_plan + content: |- + 1. Load the data file + 2. Count the rows of the loaded data + 3. Check the execution result and report the result to the user + - type: plan + content: |- + 1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data + 2. Check the execution result and report the result to the user if it is correct + - type: current_plan_step + content: 1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data - message: Load the data file /home/data.csv successfully and there are 100 rows in the data file send_from: CodeInterpreter send_to: Planner @@ -30,6 +33,11 @@ rounds: send_from: Planner send_to: User attachment_list: + - type: reasoning + content: |- + The data file /home/data.csv is loaded and there are 100 rows in the data file + The execution result is correct + The user query is successfully answered - type: init_plan content: |- 1. Load the data file @@ -41,8 +49,3 @@ rounds: 2. Check the execution result and report the result to the user if it is correct - type: current_plan_step content: 2. report the result to the user - - type: review - content: |- - The data file /home/data.csv is loaded and there are 100 rows in the data file - The execution result is correct - The user query is successfully answered \ No newline at end of file diff --git a/taskweaver/memory/attachment.py b/taskweaver/memory/attachment.py index c7b5bfd6..f95566bf 100644 --- a/taskweaver/memory/attachment.py +++ b/taskweaver/memory/attachment.py @@ -12,7 +12,7 @@ class AttachmentType(Enum): init_plan = "init_plan" plan = "plan" current_plan_step = "current_plan_step" - review = "review" + reasoning = "reasoning" # CodeInterpreter - generate code thought = "thought" diff --git a/taskweaver/planner/planner_prompt.yaml b/taskweaver/planner/planner_prompt.yaml index d6426f6d..b4703a74 100644 --- a/taskweaver/planner/planner_prompt.yaml +++ b/taskweaver/planner/planner_prompt.yaml @@ -21,15 +21,14 @@ instruction_template: |- ## Planner Character - Planner's main job is to make planning and to instruct Workers to resolve the request from the User. - - Planner can conduct basic analysis (e.g., comprehension, extraction, etc.) to solve simple problems after reading the messages from the User and the Workers. - - Planner should first try to solve the task by itself before reaching out to the Workers for their special expertise. + - Planner should independently handle basic tasks such as information extraction from text files, using its reasoning and comprehension skills before considering the involvement of Workers. - Planner can assign different subtasks to different Workers, and each subtask should be assigned to only one Worker. - Planner must reject the User's request if it contains potential security risks or illegal activities. - Planner should ask the User to provide additional information critical for problem solving, but only after trying the best. - Planner can talk to the User and Workers by specifying the `send_to` field in the response, but MUST NOT talk to the Planner itself. - Planner should refine the plan according to its observations from the replies of the Workers or the new requests of User. - Planner needs to inform Workers on the User's request, the current step, and necessary information to complete the task. - - Planner must check the Worker's response and provide feedback to the Worker if the response is incorrect or incomplete. + - Planner must thoroughly review Worker's response and provide feedback to the Worker if the response is incorrect or incomplete. - Planner can ignore the permission or file access issues since Workers are powerful and can handle them. ## Planner's planning process @@ -120,6 +119,10 @@ response_json_schema: |- "response": { "type": "object", "properties": { + "reasoning": { + "type": "string", + "description": "The reasoning of the Planner's decision. It should include the analysis of the User's request, the Workers' responses, and the current environment context." + }, "init_plan": { "type": "string", "description": "The initial plan to decompose the User's task into subtasks and list them as the detailed subtask steps. The initial plan must contain dependency annotations for sequential and interactive dependencies." @@ -132,10 +135,6 @@ response_json_schema: |- "type": "string", "description": "The current step Planner is executing." }, - "review": { - "type": "string", - "description": "The review of the current step. If the Worker's response is incorrect or incomplete, Planner should provide feedback to the Worker." - }, "send_to": { "type": "string", "description": "The name of character (User or name of the Worker) that Planner wants to speak to." @@ -146,12 +145,12 @@ response_json_schema: |- } }, "required": [ + "reasoning", "init_plan", "plan", "current_plan_step", "send_to", - "message", - "review" + "message" ], "additionalProperties": false }