From cb28f7c256e1ae6fa0e452e366eaee80d2d29a06 Mon Sep 17 00:00:00 2001
From: Liqun Li <liqli@microsoft.com>
Date: Tue, 19 Dec 2023 13:41:23 +0800
Subject: [PATCH 1/5] refactor code generator's chat structure (#78)

Moving original chat content from "executor" to "user". Now, the user is
responsible to report a feedback to the CG. One consequence is that we
need to add an extra post from the user if the end of the conversation
is "ci->user". Because otherwise, there will be no execution result.

This can only happen in examples or during chat history summarization.

misc:
- changed cg's prompt file name
- fixed typos in prompts
- fixed old uts and add new uts
---
 docs/example.md                               |  2 +-
 .../example1-codeinterpreter.yaml             |  4 +-
 .../example2-codeinterpreter.yaml             |  2 +-
 project/plugins/klarna_search.yaml            |  2 +
 project/plugins/sql_pull_data.py              |  3 +
 project/plugins/sql_pull_data.yaml            |  5 +-
 .../code_generator/code_generator.py          | 95 +++++++++++++++----
 ...prompt.yaml => code_generator_prompt.yaml} | 45 ++++-----
 .../code_generator/compression_prompt.yaml    |  6 +-
 taskweaver/memory/compression.py              |  8 +-
 taskweaver/memory/post.py                     |  4 +-
 taskweaver/planner/planner.py                 |  2 +-
 taskweaver/role/translator.py                 |  8 +-
 .../data/prompts/generator_prompt.yaml        | 47 +++++----
 tests/unit_tests/test_code_generator.py       | 68 ++++++++-----
 tests/unit_tests/test_embedding.py            |  4 +
 tests/unit_tests/test_round_compressor.py     | 83 ++++++++++++++++
 17 files changed, 272 insertions(+), 116 deletions(-)
 rename taskweaver/code_interpreter/code_generator/{code_generator_json_prompt.yaml => code_generator_prompt.yaml} (51%)
 create mode 100644 tests/unit_tests/test_round_compressor.py

diff --git a/docs/example.md b/docs/example.md
index 9aae4a71..461d390f 100644
--- a/docs/example.md
+++ b/docs/example.md
@@ -79,7 +79,7 @@ rounds:
 
 A code interpreter example tells LLMs how to generate code or orchestrate plugins to perform a specific task.
 The task is from the planner. Before constructing the code interpreter example, we strongly encourage you to
-read the [code generator prompt](../taskweaver/code_interpreter/code_generator/code_generator_json_prompt.yaml). 
+read the [code generator prompt](../taskweaver/code_interpreter/code_generator/code_generator_prompt.yaml). 
 
 The following is an example of a code interpreter example which contains 2 posts.
 Each post contains a message, a sender, a receiver, and a list of attachments.
diff --git a/project/codeinterpreter_examples/example1-codeinterpreter.yaml b/project/codeinterpreter_examples/example1-codeinterpreter.yaml
index 5011145e..97a7c717 100644
--- a/project/codeinterpreter_examples/example1-codeinterpreter.yaml
+++ b/project/codeinterpreter_examples/example1-codeinterpreter.yaml
@@ -8,12 +8,12 @@ rounds:
         send_from: Planner
         send_to: CodeInterpreter
         attachment_list: []
-      - message: Greetings! {ROLE_NAME} can understand the user request and generate syntactically correct python code to complete tasks and can utilize pre-defined plugins in the form of python functions to achieve tasks.
+      - message: Greetings! I can understand the user request and generate syntactically correct python code to complete tasks and can utilize pre-defined plugins in the form of python functions to achieve tasks.
         send_from: CodeInterpreter
         send_to: Planner
         attachment_list:
           - type: text
-            content: Greetings! {ROLE_NAME} can understand the user request and generate syntactically correct python code to complete tasks and can utilize pre-defined plugins in the form of python functions to achieve tasks.
+            content: Greetings! I can understand the user request and generate syntactically correct python code to complete tasks and can utilize pre-defined plugins in the form of python functions to achieve tasks.
           - type: verification
             content: NONE
           - type: code_error
diff --git a/project/codeinterpreter_examples/example2-codeinterpreter.yaml b/project/codeinterpreter_examples/example2-codeinterpreter.yaml
index d560e3f5..e17108c7 100644
--- a/project/codeinterpreter_examples/example2-codeinterpreter.yaml
+++ b/project/codeinterpreter_examples/example2-codeinterpreter.yaml
@@ -41,7 +41,7 @@ rounds:
         send_to: Planner
         attachment_list:
           - type: thought
-            content: "{ROLE_NAME} understands that the execution of the previous round has fell."
+            content: "{ROLE_NAME} understands that the execution of the previous round has failed."
           - type: thought
             content: "{ROLE_NAME} understands that the file /abc/def.txt does not exist and will not attempt to read it again."
           - type: text
diff --git a/project/plugins/klarna_search.yaml b/project/plugins/klarna_search.yaml
index 18907092..187e8c1e 100644
--- a/project/plugins/klarna_search.yaml
+++ b/project/plugins/klarna_search.yaml
@@ -3,6 +3,8 @@ enabled: true
 required: false
 description: >-
   Search and compare prices from thousands of online shops. Only available in the US.
+  This plugin only takes user requests when searching for merchandise.
+  If not clear, confirm with the user if they want to search for merchandise from Klarna.
 
 parameters:
   - name: query
diff --git a/project/plugins/sql_pull_data.py b/project/plugins/sql_pull_data.py
index 8f5d4490..78da603b 100644
--- a/project/plugins/sql_pull_data.py
+++ b/project/plugins/sql_pull_data.py
@@ -37,6 +37,9 @@ def __call__(self, query: str):
             {schema}
 
             Question: {question}
+            Please only write the sql query.
+            Do not add any comments or extra text.
+            Do not wrap the query in quotes or ```sql.
             SQL Query:"""
         prompt = ChatPromptTemplate.from_template(template)
 
diff --git a/project/plugins/sql_pull_data.yaml b/project/plugins/sql_pull_data.yaml
index 2dc3ff14..e2b343a3 100644
--- a/project/plugins/sql_pull_data.yaml
+++ b/project/plugins/sql_pull_data.yaml
@@ -2,8 +2,9 @@ name: sql_pull_data
 enabled: true
 required: false
 description: >-
-  Pull data from a SQL database. This plugin takes user requests when obtaining data from database is explicitly mentioned.
-  Otherwise, it is not sure if the user wants to pull data from database or not.
+  Pull data from a SQL database. 
+  This plugin takes user requests when obtaining data from database is explicitly mentioned.
+  Otherwise, confirm with the user if they want to pull data from this database.
   The data from this database can only used for anomaly detection.
 
 parameters:
diff --git a/taskweaver/code_interpreter/code_generator/code_generator.py b/taskweaver/code_interpreter/code_generator/code_generator.py
index bc531be1..178c8c07 100644
--- a/taskweaver/code_interpreter/code_generator/code_generator.py
+++ b/taskweaver/code_interpreter/code_generator/code_generator.py
@@ -27,7 +27,7 @@ def _configure(self) -> None:
             "prompt_file_path",
             os.path.join(
                 os.path.dirname(os.path.abspath(__file__)),
-                "code_generator_json_prompt.yaml",
+                "code_generator_prompt.yaml",
             ),
         )
         self.example_base_path = self._get_path(
@@ -147,7 +147,7 @@ def compose_prompt(
             self.examples = self.load_examples(plugin_only=self.plugin_only)
         for i, example in enumerate(self.examples):
             chat_history.extend(
-                self.compose_conversation(example.rounds, example.plugins),
+                self.compose_conversation(example.rounds, example.plugins, add_requirements=False),
             )
 
         summary = None
@@ -155,7 +155,7 @@ def compose_prompt(
             summary, rounds = self.round_compressor.compress_rounds(
                 rounds,
                 rounds_formatter=lambda _rounds: str(
-                    self.compose_conversation(_rounds, plugins),
+                    self.compose_conversation(_rounds, plugins, add_requirements=False),
                 ),
                 use_back_up_engine=True,
                 prompt_template=self.compression_template,
@@ -171,6 +171,12 @@ def compose_prompt(
         )
         return chat_history
 
+    def format_attachment(self, attachment: Attachment):
+        if attachment.type == AttachmentType.thought:
+            return attachment.content.format(ROLE_NAME=self.role_name)
+        else:
+            return attachment.content
+
     def compose_conversation(
         self,
         rounds: List[Round],
@@ -178,20 +184,23 @@ def compose_conversation(
         add_requirements: bool = False,
         summary: Optional[str] = None,
     ) -> List[ChatMessageType]:
-        def format_attachment(attachment: Attachment):
-            if attachment.type == AttachmentType.thought:
-                return attachment.content.format(ROLE_NAME=self.role_name)
-            else:
-                return attachment.content
-
         chat_history: List[ChatMessageType] = []
+        ignored_types = [
+            AttachmentType.revise_message,
+            AttachmentType.verification,
+            AttachmentType.code_error,
+            AttachmentType.execution_status,
+            AttachmentType.execution_result,
+        ]
+
         is_first_post = True
+        last_post: Post = None
         for round_index, conversation_round in enumerate(rounds):
             for post_index, post in enumerate(conversation_round.post_list):
                 # compose user query
                 user_message = ""
                 assistant_message = ""
-
+                is_final_post = round_index == len(rounds) - 1 and post_index == len(conversation_round.post_list) - 1
                 if is_first_post:
                     user_message = (
                         self.conversation_head_template.format(
@@ -209,37 +218,53 @@ def format_attachment(attachment: Attachment):
                     enrichment = ""
                     if plan is not None:
                         enrichment = (
-                            f"To complete this request:{user_query}\n\n"
+                            f"To complete this request: {user_query}\n\n"
                             f"I have drawn up a plan: \n{plan}\n\n"
                             f"Please proceed with this step of this plan:"
                         )
 
+                    user_feedback = "None"
+                    if last_post is not None and last_post.send_from == "CodeInterpreter":
+                        user_feedback = format_code_feedback(last_post)
+
                     user_message += self.user_message_head_template.format(
+                        FEEDBACK=user_feedback,
                         MESSAGE=f"{enrichment}{post.message}",
                     )
-                elif post.send_from == "CodeInterpreter" and post.send_to == "CodeInterpreter":
+                elif post.send_from == post.send_to == "CodeInterpreter":
                     # for code correction
                     user_message += self.user_message_head_template.format(
+                        FEEDBACK=format_code_feedback(post),
                         MESSAGE=f"{post.get_attachment(AttachmentType.revise_message)[0]}",
                     )
 
                     assistant_message = self.post_translator.post_to_raw_text(
                         post=post,
-                        content_formatter=format_attachment,
+                        content_formatter=self.format_attachment,
                         if_format_message=False,
                         if_format_send_to=False,
-                        ignore_types=[AttachmentType.revise_message],
+                        ignored_types=ignored_types,
                     )
                 elif post.send_from == "CodeInterpreter" and post.send_to == "Planner":
+                    if is_final_post:
+                        # This user message is added to make the conversation complete
+                        # It is used to make sure the last assistant message has a feedback
+                        # This is only used for examples or context summarization
+                        user_message += self.user_message_head_template.format(
+                            FEEDBACK=format_code_feedback(post),
+                            MESSAGE="This is the feedback.",
+                        )
+
                     assistant_message = self.post_translator.post_to_raw_text(
                         post=post,
-                        content_formatter=format_attachment,
+                        content_formatter=self.format_attachment,
                         if_format_message=False,
                         if_format_send_to=False,
-                        ignore_types=[AttachmentType.revise_message],
+                        ignored_types=ignored_types,
                     )
                 else:
                     raise ValueError(f"Invalid post: {post}")
+                last_post = post
 
                 if len(assistant_message) > 0:
                     chat_history.append(
@@ -250,11 +275,9 @@ def format_attachment(attachment: Attachment):
                     )
                 if len(user_message) > 0:
                     # add requirements to the last user message
-                    if add_requirements and post_index == len(conversation_round.post_list) - 1:
+                    if is_final_post and add_requirements:
                         user_message += "\n" + self.query_requirements_template.format(
-                            PLUGIN_ONLY_PROMPT=self.compose_verification_requirements(
-                                plugins,
-                            ),
+                            CODE_GENERATION_REQUIREMENTS=self.compose_verification_requirements(plugins),
                             ROLE_NAME=self.role_name,
                         )
                     chat_history.append(
@@ -300,7 +323,7 @@ def reply(
 
         prompt = self.compose_prompt(rounds, self.plugin_pool)
 
-        def early_stop(_type: AttachmentType, value: str):
+        def early_stop(_type: AttachmentType, value: str) -> bool:
             if _type in [AttachmentType.text, AttachmentType.python, AttachmentType.sample]:
                 return True
             else:
@@ -377,3 +400,33 @@ def format_output_revision_message() -> str:
         "Don't surround the JSON with ```json and ```, just send the JSON object directly.\n"
         "Please try again."
     )
+
+
+def format_code_feedback(post: Post) -> str:
+    feedback = ""
+    verification_status = ""
+    execution_status = ""
+    for attachment in post.attachment_list:
+        if attachment.type == AttachmentType.verification and attachment.content == "CORRECT":
+            feedback += "## Verification\nI have verified that your code is CORRECT.\n"
+            verification_status = "CORRECT"
+        elif attachment.type == AttachmentType.verification and attachment.content == "NONE":
+            feedback += "## Verification\nNo code verification.\n"
+            verification_status = "NONE"
+        elif attachment.type == AttachmentType.verification and attachment.content == "INCORRECT":
+            feedback += "## Verification\nYour code is INCORRECT with the following error:\n"
+            verification_status = "INCORRECT"
+        elif attachment.type == AttachmentType.code_error and verification_status == "INCORRECT":
+            feedback += f"{attachment.content}\n"
+        elif attachment.type == AttachmentType.execution_status and attachment.content == "NONE":
+            feedback += "## Execution\nNo code execution.\n"
+            execution_status = "NONE"
+        elif attachment.type == AttachmentType.execution_status and attachment.content == "SUCCESS":
+            feedback += "## Execution\nYour code has been executed successfully with the following result:\n"
+            execution_status = "SUCCESS"
+        elif attachment.type == AttachmentType.execution_status and attachment.content == "FAILURE":
+            feedback += "## Execution\nYour code has failed to execute with the following error:\n"
+            execution_status = "FAILURE"
+        elif attachment.type == AttachmentType.execution_result and execution_status != "NONE":
+            feedback += f"{attachment.content}\n"
+    return feedback
diff --git a/taskweaver/code_interpreter/code_generator/code_generator_json_prompt.yaml b/taskweaver/code_interpreter/code_generator/code_generator_prompt.yaml
similarity index 51%
rename from taskweaver/code_interpreter/code_generator/code_generator_json_prompt.yaml
rename to taskweaver/code_interpreter/code_generator/code_generator_prompt.yaml
index bbc7d1a9..3403cef7 100644
--- a/taskweaver/code_interpreter/code_generator/code_generator_json_prompt.yaml
+++ b/taskweaver/code_interpreter/code_generator/code_generator_prompt.yaml
@@ -4,46 +4,38 @@ content: |-
     - Each conversation starts with "==============================\n## Conversation Start"
     - Each conversation has multiple rounds, each round starts with "-----------------------------"
     - Each conversation has a context summary and definitions of plugin functions, both could be none.
+    - Each conversation is between the {ROLE_NAME} and the User.
     
     ## On {ROLE_NAME}'s profile and general capabilities:
     - {ROLE_NAME} can understand the user request and generate syntactically correct python code to complete tasks.
-    - {ROLE_NAME} can utilize pre-defined plugins of python functions to achieve tasks.
+    - {ROLE_NAME} can utilize pre-defined python functions (a.k.a plugins) to achieve tasks.
     - {ROLE_NAME} is prohibited to define functions that have been defined as plugins.
     - {ROLE_NAME} is prohibited to use plugins defined in previous conversations.
     - {ROLE_NAME} can only refer to variables in the generated code from previous successful rounds in the current Conversation, but should not refer to any information from failed rounds, rounds that have not been executed, or previous Conversations.
-    - {ROLE_NAME} should import other libraries if needed; if the library is not pre-installed, {ROLE_NAME} should install it in {EXECUTOR_NAME} as long as the user does not forbid it.
-    - {ROLE_NAME} verifies the correctness of the generated code. If the code is incorrect, {ROLE_NAME} will generate a verification error message.
+    - {ROLE_NAME} should import other libraries if needed; if the library is not pre-installed, {ROLE_NAME} should install it (with !pip) as long as the user does not forbid it.
+    - {ROLE_NAME} must respond to the User's feedback with a new code that addresses the feedback.
     
-    ## On {EXECUTOR_NAME}'s profile and general capabilities:
-    - {EXECUTOR_NAME} executes the generated python code from {ROLE_NAME}.
-    - {EXECUTOR_NAME} is backed by a stateful python Jupyter kernel. 
-    - {EXECUTOR_NAME} has three possible status: SUCCESS, FAILURE, and NONE.
-      - SUCCESS means the code has been executed successfully.
-      - FAILURE means the code has been executed unsuccessfully due to exceptions or errors.
-      - NONE means no code has not been executed.
+    ## On User's profile and general capabilities:
+    - Upon receiving code from {ROLE_NAME}, the User will verify the correctness of the generated code by {ROLE_NAME} before executing it.
+    - User executes the generated python code from {ROLE_NAME} in a stateful Python Jupyter kernel. 
+    - If any error occurs during the verification or execution, the User will provide feedback to the {ROLE_NAME}.
 
-    ## On response format:
-    - The response is a JSON list of dictionaries, each dictionary represents a reply that has a key named 'type' and a key named 'content'.
-    - The JSON list contains replies from {ROLE_NAME} and {EXECUTOR_NAME}.
+    ## On {ROLE_NAME}'s response format:
+    - The response is a JSON array of dictionaries, each dictionary has a key named 'type' and a key named 'content', i.e., {{"response": [{{"type": "thought", "content": "..." }}, ...]}}
     - {ROLE_NAME} generates the reply to the user with 'type' that must be one of the following:
       - "thought": the thoughts on the intermediate steps
       - "sample": textual descriptions including the sample code 
       - "python": the code that can be executed by {EXECUTOR_NAME}; comments must be added calling functions from the pre-defined plugins, including the description of the function and the parameters.
       - "text": the direct response in text without code
-      - "verification": the verification status on correctness of the generated code that can be CORRECT, INCORRECT, or NONE
-      - "code_error": the verification error message if the generated code is INCORRECT
-    - The JSON list can include multiple thought replies, but it can have only one of the following: sample, python, or text, exclusively.
-    - {EXECUTOR_NAME} generates replies to the user with 'type' that must be one of the following:
-      - "execution_status": the execution status of the code generated by {ROLE_NAME}, could be SUCCESS, FAILURE, or NONE
-      - "execution_result": the code execution result by {EXECUTOR_NAME} including the output and the error message
-    - The value of 'content' is a string that contains the actual content of the reply in markdown syntax.
+    - The "response" array can include multiple thought replies, but it can have only one of sample, python, or text, exclusively.
+    - The value of "content" is a string that contains the actual content and {ROLE_NAME} must be very careful about escaping the special characters (e.g., '\', '/', and '"') in the string for JSON format.
     
 conversation_head: |-
     ==============================
     ## Conversation Start
     
     ### Context Summary
-    The context summary of the previous rounds and a list of variables that {ROLE_NAME} can refer to:
+    The context summary of previous rounds and the variables that {ROLE_NAME} can refer to:
     {SUMMARY}
     
     ### Plugin Functions
@@ -52,13 +44,18 @@ conversation_head: |-
 
 user_message_head: |-
     -----------------------------
-    - User: {MESSAGE}
+    # Feedback of the code in the last round (None if no feedback):
+    {FEEDBACK}
+    
+    # Request from the User in this round:
+    {MESSAGE}
+    
+    
 
 requirements: |-
     Please follow the instructions below to complete the task:
     - {ROLE_NAME} can refer to intermediate variables in the generated code from previous successful rounds and the context summary in the current Conversation, 
     - {ROLE_NAME} should not refer to any information from failed rounds, rounds that have not been executed, or previous Conversations.
     - {ROLE_NAME} put all the result variables in the last line of the code.
-    - {ROLE_NAME} should leave "verification", "code_error", "execution_status", and "execution_result" empty in the response. 
     - {ROLE_NAME} must not import the plugins and otherwise the code will be failed to execute.
-    {PLUGIN_ONLY_PROMPT}
+    {CODE_GENERATION_REQUIREMENTS}
diff --git a/taskweaver/code_interpreter/code_generator/compression_prompt.yaml b/taskweaver/code_interpreter/code_generator/compression_prompt.yaml
index 417740af..66e6517c 100644
--- a/taskweaver/code_interpreter/code_generator/compression_prompt.yaml
+++ b/taskweaver/code_interpreter/code_generator/compression_prompt.yaml
@@ -2,16 +2,16 @@ version: 0.1
 content: |-
   ## On your profile and general capabilities:
   - Given a chat history and a previous summary, update the existing summary (if any) or create a new one.
-  - The chat involves a human interacting with an assistant capable of generating and running code to fulfill specific requests.
+  - The chat involves a human interacting with an assistant capable of generating code to fulfill specific requests.
   - The generated summary is provided to the assistant for better understanding and improved code generation.
   - Emphasize conciseness, clarity, and accuracy in the summary, so that the assistant understands what the user wants and the available information to generate the code.
-  - Pay attention to the "verification" and "execution_status" in each round of the conversation. Avoid listing variables from incorrect code or failed executions.
+  - Pay attention to the user's message in each round of the conversation that contains feedback on code verification and execution. Ignore the variables from incorrect code or failed executions.
   
   ## Output format
   The summary is desired to be organized in the following format:
   ```json
   {{
-    "ConversationHistory": "This part summarizes all the conversation rounds between a human and assistant",
+    "ConversationSummary": "This part summarizes all the conversation rounds between a human and assistant",
     "Variables": [
       {{
         "name": "variable name",
diff --git a/taskweaver/memory/compression.py b/taskweaver/memory/compression.py
index 7bb3ecf2..df5d031b 100644
--- a/taskweaver/memory/compression.py
+++ b/taskweaver/memory/compression.py
@@ -47,9 +47,6 @@ def compress_rounds(
             if _round.id in self.processed_rounds:
                 remaining_rounds -= 1
                 continue
-            if _round.state == "failed":
-                remaining_rounds -= 1
-                continue
             break
 
         # not enough rounds to compress
@@ -64,6 +61,7 @@ def compress_rounds(
         )
 
         if len(chat_summary) > 0:  # if the compression is successful
+            self.previous_summary = chat_summary
             return chat_summary, rounds[-self.rounds_to_retain :]
         else:
             return self.previous_summary, rounds[-remaining_rounds:]
@@ -86,10 +84,8 @@ def _summarize(
                 format_chat_message("user", chat_history_str),
             ]
             new_summary = self.llm_api.chat_completion(prompt, use_backup_engine=use_back_up_engine)["content"]
-            self.previous_summary = new_summary
             self.processed_rounds.update([_round.id for _round in rounds])
+            return new_summary
         except Exception as e:
             self.logger.warning(f"Failed to compress rounds: {e}")
             return ""
-
-        return self.previous_summary
diff --git a/taskweaver/memory/post.py b/taskweaver/memory/post.py
index 7f6dc216..af582701 100644
--- a/taskweaver/memory/post.py
+++ b/taskweaver/memory/post.py
@@ -2,7 +2,7 @@
 
 import secrets
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from taskweaver.memory.attachment import Attachment, AttachmentType
 from taskweaver.memory.type_vars import RoleName
@@ -34,7 +34,7 @@ class Post:
 
     @staticmethod
     def create(
-        message: str,
+        message: Union[str | None],
         send_from: RoleName,
         send_to: RoleName,
         attachment_list: Optional[List[Attachment]] = None,
diff --git a/taskweaver/planner/planner.py b/taskweaver/planner/planner.py
index de0edf73..f09c26a4 100644
--- a/taskweaver/planner/planner.py
+++ b/taskweaver/planner/planner.py
@@ -141,7 +141,7 @@ def compose_conversation_for_prompt(
                         conversation.append(
                             format_chat_message(
                                 role="assistant",
-                                message=post.get_attachment(type="invalid_response")[0],
+                                message=post.get_attachment(type=AttachmentType.invalid_response)[0],
                             ),
                         )  # append the invalid response to chat history
                         conversation.append(
diff --git a/taskweaver/role/translator.py b/taskweaver/role/translator.py
index 04f61bcc..59751db7 100644
--- a/taskweaver/role/translator.py
+++ b/taskweaver/role/translator.py
@@ -28,7 +28,7 @@ def __init__(
     def raw_text_to_post(
         self,
         llm_output: str,
-        send_from: str,
+        send_from: Literal["User", "Planner", "CodeInterpreter"],
         event_handler: Callable[[str, str], None],
         early_stop: Optional[Callable[[Union[AttachmentType, Literal["message", "send_to"]], str], bool]] = None,
         validation_func: Optional[Callable[[Post], None]] = None,
@@ -88,7 +88,7 @@ def post_to_raw_text(
         content_formatter: Callable[[Attachment], str] = lambda x: x.content,
         if_format_message: bool = True,
         if_format_send_to: bool = True,
-        ignore_types: Optional[List[AttachmentType]] = None,
+        ignored_types: Optional[List[AttachmentType]] = None,
     ) -> str:
         """
         Convert a Post object to raw text in the format of LLM output.
@@ -96,13 +96,13 @@ def post_to_raw_text(
         :param content_formatter:
         :param if_format_message:
         :param if_format_send_to:
-        :param ignore_types:
+        :param ignored_types:
         :return: str
         """
         structured_llm: List[Dict[str, str]] = []
         for attachment in post.attachment_list:
             attachments_dict = {}
-            if ignore_types is not None and attachment.type in ignore_types:
+            if ignored_types is not None and attachment.type in ignored_types:
                 continue
             attachments_dict["type"] = attachment.type.value
             attachments_dict["content"] = content_formatter(attachment)
diff --git a/tests/unit_tests/data/prompts/generator_prompt.yaml b/tests/unit_tests/data/prompts/generator_prompt.yaml
index 2771d089..16818a27 100644
--- a/tests/unit_tests/data/prompts/generator_prompt.yaml
+++ b/tests/unit_tests/data/prompts/generator_prompt.yaml
@@ -4,59 +4,58 @@ content: |-
     - Each conversation starts with "==============================\n## Conversation Start"
     - Each conversation has multiple rounds, each round starts with "-----------------------------"
     - Each conversation has a context summary and definitions of plugin functions, both could be none.
+    - Each conversation is between the {ROLE_NAME} and the User.
     
     ## On {ROLE_NAME}'s profile and general capabilities:
     - {ROLE_NAME} can understand the user request and generate syntactically correct python code to complete tasks.
-    - {ROLE_NAME} can utilize pre-defined plugins of python functions to achieve tasks.
+    - {ROLE_NAME} can utilize pre-defined python functions (a.k.a plugins) to achieve tasks.
     - {ROLE_NAME} is prohibited to define functions that have been defined as plugins.
     - {ROLE_NAME} is prohibited to use plugins defined in previous conversations.
     - {ROLE_NAME} can only refer to variables in the generated code from previous successful rounds in the current Conversation, but should not refer to any information from failed rounds, rounds that have not been executed, or previous Conversations.
-    - {ROLE_NAME} should import other libraries if needed; if the library is not pre-installed, {ROLE_NAME} should install it in {EXECUTOR_NAME} as long as the user does not forbid it.
-    - {ROLE_NAME} verifies the correctness of the generated code. If the code is incorrect, {ROLE_NAME} will generate a verification error message.
+    - {ROLE_NAME} should import other libraries if needed; if the library is not pre-installed, {ROLE_NAME} should install it (with !pip) as long as the user does not forbid it.
+    - {ROLE_NAME} must respond to the User's feedback with a new code that addresses the feedback.
     
-    ## On {EXECUTOR_NAME}'s profile and general capabilities:
-    - {EXECUTOR_NAME} executes the generated python code from {ROLE_NAME}.
-    - {EXECUTOR_NAME} is backed by a stateful python Jupyter kernel. 
-    - {EXECUTOR_NAME} has three possible status: SUCCESS, FAILURE, and NONE.
-      - SUCCESS means the code has been executed successfully.
-      - FAILURE means the code has been executed unsuccessfully due to exceptions or errors.
-      - NONE means no code has not been executed.
+    ## On User's profile and general capabilities:
+    - Upon receiving code from {ROLE_NAME}, the User will verify the correctness of the generated code by {ROLE_NAME} before executing it.
+    - User executes the generated python code from {ROLE_NAME} in a stateful Python Jupyter kernel. 
+    - If any error occurs during the verification or execution, the User will provide feedback to the {ROLE_NAME}.
 
-    ## On response format:
-    - The response is a JSON list of dictionaries, each dictionary represents a reply that has a key named 'type' and a key named 'content'.
-    - The JSON list contains replies from {ROLE_NAME} and {EXECUTOR_NAME}.
+    ## On {ROLE_NAME}'s response format:
+    - The response is a JSON array of dictionaries, each dictionary has a key named 'type' and a key named 'content', i.e., {{"response": [{{"type": "thought", "content": "..." }}, ...]}}
     - {ROLE_NAME} generates the reply to the user with 'type' that must be one of the following:
       - "thought": the thoughts on the intermediate steps
       - "sample": textual descriptions including the sample code 
       - "python": the code that can be executed by {EXECUTOR_NAME}; comments must be added calling functions from the pre-defined plugins, including the description of the function and the parameters.
       - "text": the direct response in text without code
-      - "verification": the verification status on correctness of the generated code that can be CORRECT, INCORRECT, or NONE
-      - "code_error": the verification error message if the generated code is INCORRECT
-    - The JSON list can include multiple thought replies, but it can have only one of the following: sample, python, or text, exclusively.
-    - {EXECUTOR_NAME} generates replies to the user with 'type' that must be one of the following:
-      - "execution_status": the execution status of the code generated by {ROLE_NAME}, could be SUCCESS, FAILURE, or NONE
-      - "execution_result": the code execution result by {EXECUTOR_NAME} including the output and the error message
-    - The value of 'content' is a string that contains the actual content of the reply in markdown syntax.
+    - The "response" array can include multiple thought replies, but it can have only one of sample, python, or text, exclusively.
+    - The value of "content" is a string that contains the actual content and {ROLE_NAME} must be very careful about escaping the special characters (e.g., '\', '/', and '"') in the string for JSON format.
 
 conversation_head: |-
     ==============================
     ## Conversation Start
     
     ### Context Summary
-    The context summary of the previous rounds and a list of variables that {ROLE_NAME} can refer to:
+    The context summary of previous rounds and the variables that {ROLE_NAME} can refer to:
     {SUMMARY}
     
     ### Plugin Functions
+    The functions can be directly called without importing:
     {PLUGINS}
 
 user_message_head: |-
     -----------------------------
-    - User: {MESSAGE}
+    # Feedback of the code in the last round (None if no feedback):
+    {FEEDBACK}
+    
+    # Request from the User in this round:
+    {MESSAGE}
+    
+    
 
 requirements: |-
     Please follow the instructions below to complete the task:
     - {ROLE_NAME} can refer to intermediate variables in the generated code from previous successful rounds and the context summary in the current Conversation, 
     - {ROLE_NAME} should not refer to any information from failed rounds, rounds that have not been executed, or previous Conversations.
     - {ROLE_NAME} put all the result variables in the last line of the code.
-    - {ROLE_NAME} should leave "verification", "code_error", "execution_status", and "execution_result" empty in the response. 
-    {PLUGIN_ONLY_PROMPT}
+    - {ROLE_NAME} must not import the plugins and otherwise the code will be failed to execute.
+    {CODE_GENERATION_REQUIREMENTS}
diff --git a/tests/unit_tests/test_code_generator.py b/tests/unit_tests/test_code_generator.py
index 4c749503..9ed57806 100644
--- a/tests/unit_tests/test_code_generator.py
+++ b/tests/unit_tests/test_code_generator.py
@@ -151,14 +151,19 @@ def test_compose_prompt():
         "## Conversation Start\n"
         "\n"
         "### Context Summary\n"
-        "The context summary of the previous rounds and a list of variables that "
+        "The context summary of previous rounds and the variables that "
         "ProgramApe can refer to:\n"
         "None\n"
         "\n"
         "### Plugin Functions\n"
+        "The functions can be directly called without importing:\n"
         "None\n"
         "-----------------------------\n"
-        "- User: create a dataframe"
+        "# Feedback of the code in the last round (None if no feedback):\n"
+        "None\n"
+        "\n"
+        "# Request from the User in this round:\n"
+        "create a dataframe"
     )
     assert messages[2]["role"] == "assistant"
     assert messages[2]["content"] == (
@@ -168,16 +173,22 @@ def test_compose_prompt():
         'codes."}, {"type": "python", "content": "df = pd.DataFrame(np.random.rand(10, '
         "2), columns=['DATE', 'VALUE'])\\ndescriptions = "
         '[(\\"sample_code_description\\", \\"Sample code has been generated to get a '
-        "dataframe `df` \\nwith 10 rows and 2 columns: 'DATE' and 'VALUE'\\\")]\"}, "
-        '{"type": "execution_status", "content": "SUCCESS"}, {"type": '
-        '"execution_result", "content": "A dataframe `df` with 10 rows and 2 columns: '
-        "'DATE' and 'VALUE' has been generated.\"}]}"
+        "dataframe `df` \\nwith 10 rows and 2 columns: 'DATE' and "
+        "'VALUE'\\\")]\"}]}"
     )
 
     assert messages[5]["role"] == "user"
     assert messages[5]["content"] == (
         "-----------------------------\n"
-        "- User: what is the max value?\n"
+        "# Feedback of the code in the last round (None if no feedback):\n"
+        "## Execution\n"
+        "Your code has been executed successfully with the following result:\n"
+        "The minimum value in the 'VALUE' column is 0.05;The maximum value in the "
+        "'VALUE' column is 0.99;The data range for the 'VALUE' column is 0.94\n"
+        "\n"
+        "\n"
+        "# Request from the User in this round:\n"
+        "what is the max value?\n"
         "Please follow the instructions below to complete the task:\n"
         "- ProgramApe can refer to intermediate variables in the generated code from "
         "previous successful rounds and the context summary in the current "
@@ -185,8 +196,8 @@ def test_compose_prompt():
         "- ProgramApe should not refer to any information from failed rounds, rounds "
         "that have not been executed, or previous Conversations.\n"
         "- ProgramApe put all the result variables in the last line of the code.\n"
-        '- ProgramApe should leave "verification", "code_error", "execution_status", '
-        'and "execution_result" empty in the response. \n'
+        "- ProgramApe must not import the plugins and otherwise the code will be "
+        "failed to execute.\n"
     )
 
 
@@ -294,8 +305,6 @@ def test_compose_prompt_with_plugin_only():
                 os.path.dirname(os.path.abspath(__file__)),
                 "data/examples/codeinterpreter_examples",
             ),
-            "code_interpreter.plugin_only": True,
-            "code_interpreter.code_verification_on": True,
         },
     )
     app_injector.binder.bind(AppConfigSource, to=app_config)
@@ -310,6 +319,7 @@ def test_compose_prompt_with_plugin_only():
         plugin_only=True,
         allowed_modules=[],
     )
+    code_generator.configure_verification(code_verification_on=True, plugin_only=True)
 
     code1 = (
         "df = pd.DataFrame(np.random.rand(10, 2), columns=['DATE', 'VALUE'])\n"
@@ -363,11 +373,14 @@ def test_compose_prompt_with_plugin_only():
 
     assert "read_csv" in messages[1]["content"]
     assert "write_csv" in messages[1]["content"]
+    assert "This is the feedback" in messages[3]["content"]
+    assert "Execution" in messages[3]["content"]
+    assert "Verification" in messages[3]["content"]
 
-    assert "sql_pull_data" in messages[3]["content"]
-    assert "anomaly_detection" in messages[3]["content"]
-    assert "klarna_search" in messages[3]["content"]
-    assert "paper_summary" in messages[3]["content"]
+    assert "sql_pull_data" in messages[4]["content"]
+    assert "anomaly_detection" in messages[4]["content"]
+    assert "klarna_search" in messages[4]["content"]
+    assert "paper_summary" in messages[4]["content"]
 
 
 def test_compose_prompt_with_not_plugin_only():
@@ -391,8 +404,6 @@ def test_compose_prompt_with_not_plugin_only():
                 os.path.dirname(os.path.abspath(__file__)),
                 "data/examples/codeinterpreter_examples",
             ),
-            "code_verification.plugin_only": False,
-            "code_verification.code_verification_on": False,
         },
     )
     app_injector.binder.bind(AppConfigSource, to=app_config)
@@ -400,7 +411,7 @@ def test_compose_prompt_with_not_plugin_only():
     from taskweaver.code_interpreter.code_generator import CodeGenerator
     from taskweaver.memory import Attachment, Memory, Post, Round
 
-    code_generator = app_injector.create_object(CodeGenerator)
+    code_generator = app_injector.get(CodeGenerator)
 
     code1 = (
         "df = pd.DataFrame(np.random.rand(10, 2), columns=['DATE', 'VALUE'])\n"
@@ -459,10 +470,10 @@ def test_compose_prompt_with_not_plugin_only():
     assert "klarna_search" not in messages[1]["content"]
     assert "paper_summary" not in messages[1]["content"]
 
-    assert "sql_pull_data" in messages[11]["content"]
-    assert "anomaly_detection" in messages[11]["content"]
-    assert "klarna_search" in messages[11]["content"]
-    assert "paper_summary" in messages[11]["content"]
+    assert "sql_pull_data" in messages[13]["content"]
+    assert "anomaly_detection" in messages[13]["content"]
+    assert "klarna_search" in messages[13]["content"]
+    assert "paper_summary" in messages[13]["content"]
 
 
 def test_code_correction_prompt():
@@ -547,7 +558,14 @@ def test_code_correction_prompt():
     assert messages[3]["role"] == "user"
     assert messages[3]["content"] == (
         "-----------------------------\n"
-        "- User: Please check the code and try again.\n"
+        "# Feedback of the code in the last round (None if no feedback):\n"
+        "## Execution\n"
+        "Your code has failed to execute with the following error:\n"
+        "The code failed to execute. Please check the code and try again.\n"
+        "\n"
+        "\n"
+        "# Request from the User in this round:\n"
+        "Please check the code and try again.\n"
         "Please follow the instructions below to complete the task:\n"
         "- ProgramApe can refer to intermediate variables in the generated code from "
         "previous successful rounds and the context summary in the current "
@@ -555,6 +573,6 @@ def test_code_correction_prompt():
         "- ProgramApe should not refer to any information from failed rounds, rounds "
         "that have not been executed, or previous Conversations.\n"
         "- ProgramApe put all the result variables in the last line of the code.\n"
-        '- ProgramApe should leave "verification", "code_error", "execution_status", '
-        'and "execution_result" empty in the response. \n'
+        "- ProgramApe must not import the plugins and otherwise the code will be "
+        "failed to execute.\n"
     )
diff --git a/tests/unit_tests/test_embedding.py b/tests/unit_tests/test_embedding.py
index 6897c79d..dbdd6e73 100644
--- a/tests/unit_tests/test_embedding.py
+++ b/tests/unit_tests/test_embedding.py
@@ -1,3 +1,4 @@
+import pytest
 from injector import Injector
 
 from taskweaver.config.config_mgt import AppConfigSource
@@ -28,6 +29,7 @@ def test_sentence_transformer_embedding():
     assert len(embedding1[1]) == 768
 
 
+@pytest.mark.skip(reason="missing api key")
 def test_openai_embedding():
     app_injector = Injector()
     app_config = AppConfigSource(
@@ -49,6 +51,7 @@ def test_openai_embedding():
     assert len(embedding1[1]) == 1536
 
 
+@pytest.mark.skip(reason="missing api key")
 def test_ollama_embedding():
     app_injector = Injector()
     app_config = AppConfigSource(
@@ -68,6 +71,7 @@ def test_ollama_embedding():
     assert len(embedding1[1]) == 4096
 
 
+@pytest.mark.skip(reason="missing api key")
 def test_qwen_embedding():
     app_injector = Injector()
     app_config = AppConfigSource(
diff --git a/tests/unit_tests/test_round_compressor.py b/tests/unit_tests/test_round_compressor.py
new file mode 100644
index 00000000..d1c84bb2
--- /dev/null
+++ b/tests/unit_tests/test_round_compressor.py
@@ -0,0 +1,83 @@
+from injector import Injector
+
+from taskweaver.config.config_mgt import AppConfigSource
+from taskweaver.logging import LoggingModule
+from taskweaver.memory import RoundCompressor
+
+
+def test_round_compressor():
+    from taskweaver.memory import Post, Round
+
+    app_injector = Injector(
+        [LoggingModule],
+    )
+    app_config = AppConfigSource(
+        config={
+            "llm.api_key": "test_key",
+            "round_compressor.rounds_to_compress": 2,
+            "round_compressor.rounds_to_retain": 2,
+        },
+    )
+    app_injector.binder.bind(AppConfigSource, to=app_config)
+    compressor = app_injector.get(RoundCompressor)
+
+    assert compressor.rounds_to_compress == 2
+    assert compressor.rounds_to_retain == 2
+
+    round1 = Round.create(user_query="hello", id="round-1")
+    post1 = Post.create(
+        message="hello",
+        send_from="User",
+        send_to="Planner",
+        attachment_list=[],
+    )
+    post2 = Post.create(
+        message="hello",
+        send_from="Planner",
+        send_to="User",
+        attachment_list=[],
+    )
+    round1.add_post(post1)
+    round1.add_post(post2)
+
+    summary, retained = compressor.compress_rounds(
+        [round1],
+        lambda x: x,
+        use_back_up_engine=False,
+    )
+    assert summary == "None"
+    assert len(retained) == 1
+
+    round2 = Round.create(user_query="hello", id="round-2")
+    round2.add_post(post1)
+    round2.add_post(post2)
+
+    summary, retained = compressor.compress_rounds(
+        [round1, round2],
+        lambda x: x,
+        use_back_up_engine=False,
+    )
+    assert summary == "None"
+    assert len(retained) == 2
+
+    round3 = Round.create(user_query="hello", id="round-3")
+    round3.add_post(post1)
+    round3.add_post(post2)
+    summary, retained = compressor.compress_rounds(
+        [round1, round2, round3],
+        lambda x: x,
+        use_back_up_engine=False,
+    )
+    assert summary == "None"
+    assert len(retained) == 3
+
+    round4 = Round.create(user_query="hello", id="round-4")
+    round4.add_post(post1)
+    round4.add_post(post2)
+    summary, retained = compressor.compress_rounds(
+        [round1, round2, round3, round4],
+        lambda x: x,
+        use_back_up_engine=False,
+    )
+    assert summary == "None"
+    assert len(retained) == 4

From ce58d46270c2c555a2d7553ebf663297dddd006d Mon Sep 17 00:00:00 2001
From: Liqun Li <liqli@microsoft.com>
Date: Tue, 19 Dec 2023 17:19:18 +0800
Subject: [PATCH 2/5] A pytest flow. (#80)

A pytest flow.

---------

Co-authored-by: Jack Q <qiaobo@outlook.com>
---
 .github/workflows/pytest.yml             | 43 ++++++++++++++++++++++++
 requirements.txt                         |  3 +-
 tests/unit_tests/test_embedding.py       | 11 ++++--
 tests/unit_tests/test_plugin_selector.py |  4 +++
 4 files changed, 57 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/pytest.yml

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
new file mode 100644
index 00000000..2846e23a
--- /dev/null
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,43 @@
+name: Python package
+
+on:
+  push:
+    branches:    
+      - main
+  pull_request:
+    branches:    
+      - main
+
+jobs:
+  pytest:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Display Python version
+        run: python -c "import sys; print(sys.version)"
+      - name: Install taskweaver
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
+          pip install -e .
+      - name: Test with pytest
+        run: |
+          pip install pytest pytest-cov
+          pytest tests/unit_tests --collect-only
+          pytest tests/unit_tests -v --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov=com --cov-report=xml --cov-report=html
+      - name: Upload pytest test results
+        uses: actions/upload-artifact@v3
+        with:
+          name: pytest-results-${{ matrix.python-version }}
+          path: junit/test-results-${{ matrix.python-version }}.xml
+        # Use always() to always run this step to publish test results when there are test failures
+        if: ${{ always() }}
+        
diff --git a/requirements.txt b/requirements.txt
index f419c961..cc07fac1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,9 +8,10 @@ pyyaml>=6.0
 scikit-learn>=1.2.2
 click>=8.0.1
 urllib3>=1.26.17
-jsonschema==4.17.3
+jsonschema==4.20.0
 injector>=0.21.0
 ijson>=3.2.3
+requests>=2.31.0
 
 # Code Execution related
 ipykernel==6.26.0
diff --git a/tests/unit_tests/test_embedding.py b/tests/unit_tests/test_embedding.py
index dbdd6e73..a96af2c1 100644
--- a/tests/unit_tests/test_embedding.py
+++ b/tests/unit_tests/test_embedding.py
@@ -1,3 +1,5 @@
+import os
+
 import pytest
 from injector import Injector
 
@@ -7,7 +9,10 @@
 from taskweaver.llm.openai import OpenAIService
 from taskweaver.llm.sentence_transformer import SentenceTransformerService
 
+IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"
+
 
+@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test doesn't work in Github Actions.")
 def test_sentence_transformer_embedding():
     app_injector = Injector([])
     app_config = AppConfigSource(
@@ -29,7 +34,7 @@ def test_sentence_transformer_embedding():
     assert len(embedding1[1]) == 768
 
 
-@pytest.mark.skip(reason="missing api key")
+@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test doesn't work in Github Actions.")
 def test_openai_embedding():
     app_injector = Injector()
     app_config = AppConfigSource(
@@ -51,7 +56,7 @@ def test_openai_embedding():
     assert len(embedding1[1]) == 1536
 
 
-@pytest.mark.skip(reason="missing api key")
+@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test doesn't work in Github Actions.")
 def test_ollama_embedding():
     app_injector = Injector()
     app_config = AppConfigSource(
@@ -71,7 +76,7 @@ def test_ollama_embedding():
     assert len(embedding1[1]) == 4096
 
 
-@pytest.mark.skip(reason="missing api key")
+@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test doesn't work in Github Actions.")
 def test_qwen_embedding():
     app_injector = Injector()
     app_config = AppConfigSource(
diff --git a/tests/unit_tests/test_plugin_selector.py b/tests/unit_tests/test_plugin_selector.py
index d0cc7250..fd41a758 100644
--- a/tests/unit_tests/test_plugin_selector.py
+++ b/tests/unit_tests/test_plugin_selector.py
@@ -1,12 +1,16 @@
 import os
 
+import pytest
 from injector import Injector
 
 from taskweaver.code_interpreter.code_generator.plugin_selection import PluginSelector
 from taskweaver.config.config_mgt import AppConfigSource
 from taskweaver.memory.plugin import PluginModule
 
+IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"
 
+
+@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test doesn't work in Github Actions.")
 def test_plugin_selector():
     app_injector = Injector([PluginModule])
     app_config = AppConfigSource(

From ea4884dccc21ad16c0b222c0cb7bdde874e81e5e Mon Sep 17 00:00:00 2001
From: Shilin He <shilhe@microsoft.com>
Date: Wed, 20 Dec 2023 03:13:31 +0000
Subject: [PATCH 3/5] add github action

---
 .github/workflows/deploy-website.yaml |  47 +++++++++
 website/docs/Example/example.md       | 140 --------------------------
 website/sidebars.js                   |   1 +
 3 files changed, 48 insertions(+), 140 deletions(-)
 create mode 100644 .github/workflows/deploy-website.yaml
 delete mode 100644 website/docs/Example/example.md

diff --git a/.github/workflows/deploy-website.yaml b/.github/workflows/deploy-website.yaml
new file mode 100644
index 00000000..7623067f
--- /dev/null
+++ b/.github/workflows/deploy-website.yaml
@@ -0,0 +1,47 @@
+name: docs
+
+on:
+  pull_request:
+    branches: [main]
+    path:
+      - 'website/*'
+      - '.github/workflows/deploy-website.yml'
+  push:
+    branches: [main]
+    path:
+      - 'website/*'
+      - '.github/workflows/deploy-website.yml'
+  workflow_dispatch:
+  merge_group:
+    types: [checks_requested]
+
+jobs:
+  gh-release:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: website
+    steps:
+      - uses: actions/checkout@v3
+      - name: Use Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: 20.x
+      - name: Build website
+        run: |
+          if [ -e yarn.lock ]; then
+          yarn install --frozen-lockfile --ignore-engines
+          yarn build
+          elif [ -e package-lock.json ]; then
+          npm ci
+          npm run build
+          else
+          npm i --legacy-peer-deps
+          npm run build
+          fi
+      - name: Deploy to GitHub Pages
+        uses: peaceiris/actions-gh-pages@v3
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          # Build output to publish to the `gh-pages` branch:
+          publish_dir: ./website/build
\ No newline at end of file
diff --git a/website/docs/Example/example.md b/website/docs/Example/example.md
deleted file mode 100644
index 0b2ee3cf..00000000
--- a/website/docs/Example/example.md
+++ /dev/null
@@ -1,140 +0,0 @@
-There are two types of examples: (1) planning examples and (2) code interpreter examples. 
-Planning examples are used to demonstrate how to use TaskWeaver to plan for a specific task. 
-Code generation examples are used to demonstrate how to generate code or orchestrate plugins to perform a specific task.
-
-#### Planning Examples
-
-A planning example tells LLMs how to plan for a specific query from the user; talk to the code interpreter; 
-receive the execution result from the code interpreter; and summarize the execution result.
-Before constructing the planning example, we strongly encourage you to go through the
-[planner prompt](https://github.com/microsoft/TaskWeaver/blob/main/taskweaver/planner/planner_prompt.yaml).
-
-The following is an example of a planning example which contains 4 posts. 
-Each post contains a message, a sender, a receiver, and a list of attachments.
-1. The first post is sent from the user to the planner.
-   The message is "count the rows of /home/data.csv", which is the same as the user query.
-2. The second post is sent from the planner to the code interpreter.
-   The message is "Please load the data file /home/data.csv and count the rows of the loaded data".
-   The attachment list contains 3 attachments:
-   1. The first attachment is the initial plan, which is a markdown string.
-   2. The second attachment is the plan, which is a markdown string.
-   3. The third attachment is the current plan step, which is a markdown string.
-3. The third post is sent from the code interpreter to the planner.
-   The message is "Load the data file /home/data.csv successfully and there are 100 rows in the data file".
-4. The fourth post is sent from the planner to the user.
-   The message is "The data file /home/data.csv is loaded and there are 100 rows in the data file".
-   The attachment list contains 3 attachments:
-   1. The first attachment is the initial plan, which is the same as the second attachment of the second post.
-   2. The second attachment is the plan, which is the same as the third attachment of the second post.
-   3. The third attachment is the current plan step, which is a markdown string.
-
-```yaml
-enabled: True
-rounds:
-  - user_query: count the rows of /home/data.csv
-    state: created
-    post_list:
-      - message: count the rows of /home/data.csv
-        send_from: User
-        send_to: Planner
-        attachment_list:
-      - message: Please load the data file /home/data.csv and count the rows of the loaded data
-        send_from: Planner
-        send_to: CodeInterpreter
-        attachment_list:
-        - type: init_plan
-          content: |-
-            1. load the data file
-            2. count the rows of the loaded data <sequentially depends on 1>
-            3. report the result to the user <interactively depends on 2>
-        - type: plan
-          content: |-
-            1. instruct CodeInterpreter to load the data file and count the rows of the loaded data
-            2. report the result to the user
-        - type: current_plan_step
-          content: 1. instruct CodeInterpreter to load the data file and count the rows of the loaded data
-      - message: Load the data file /home/data.csv successfully and there are 100 rows in the data file
-        send_from: CodeInterpreter
-        send_to: Planner
-        attachment_list:
-      - message: The data file /home/data.csv is loaded and there are 100 rows in the data file
-        send_from: Planner
-        send_to: User
-        attachment_list:
-          - type: init_plan
-            content: |-
-              1. load the data file
-              2. count the rows of the loaded data <sequentially depends on 1>
-              3. report the result to the user <interactively depends on 2>
-          - type: plan
-            content: |-
-              1. instruct CodeInterpreter to load the data file and count the rows of the loaded data
-              2. report the result to the user
-          - type: current_plan_step
-            content: 2. report the result to the user
-```
-
-#### Code Interpreter Examples
-
-A code interpreter example tells LLMs how to generate code or orchestrate plugins to perform a specific task.
-The task is from the planner. Before constructing the code interpreter example, we strongly encourage you to
-read the [code generator prompt](https://github.com/microsoft/TaskWeaver/blob/main/taskweaver/code_interpreter/code_generator/code_generator_prompt.yaml). 
-
-The following is an example of a code interpreter example which contains 2 posts.
-Each post contains a message, a sender, a receiver, and a list of attachments.
-
-1. The first post is sent from the planner to the code interpreter.
-   The message is "Please read file /abc/def.txt".
-2. The second post is sent from the code interpreter to the planner.
-   The message is "read file /abc/def.txt".
-   The attachment list contains 6 attachments:
-   1. The first attachment is the thought of the code interpreter, which is a markdown string.
-   2. The second attachment is the generated code, which is in python.
-   3. The third attachment is the verification status, which is CORRECT, INCORRECT, or NONE.
-   4. The fourth attachment is the verification error message, which is a markdown string.
-   5. The fifth attachment is the execution status, which is SUCCESS, FAILURE, or NONE.
-   6. The sixth attachment is the execution result, which is a markdown string.
-
-```yaml
-enabled: True
-rounds:
-  - user_query: read file /abc/def.txt
-    state: finished
-    post_list:
-      - message: read file /abc/def.txt
-        send_from: Planner
-        send_to: CodeInterpreter
-        attachment_list: []
-      - message: I'm sorry, I cannot find the file /abc/def.txt. An FileNotFoundException has been raised.
-        send_from: CodeInterpreter
-        send_to: Planner
-        attachment_list:
-          - type: thought
-            content: "{ROLE_NAME} will generate a code snippet to read the file /abc/def.txt and present the content to the user."
-          - type: python
-            content: |-
-              file_path = "/abc/def.txt"  
-
-              with open(file_path, "r") as file:  
-                  file_contents = file.read()  
-                  print(file_contents)
-          - type: verification
-            content: CORRECT
-          - type: code_error
-            content: No code error.
-          - type: execution_status
-            content: FAILURE
-          - type: execution_result
-            content: FileNotFoundException, the file /abc/def.txt does not exist.
-```
-
-In this example, `verification` is about whether the generated code is correct or not. 
-We implemented a module to verify the generated code. If the code is syntactically incorrect, 
-or the code violates the constraints, the verification status will be `INCORRECT` 
-and some error messages will be returned.
-A verification of NONE means that the code has not been verified, which means verification has been turned off.
-
-In this example, `execution_status` is about whether the generated code can be executed successfully or not.
-If the execution is successful, the execution status will be `SUCCESS` and the execution result will be returned.
-Otherwise, the execution status will be `FAILURE` and some error messages will be returned.
-A execution_status of `NONE` means that the code has not been executed.
\ No newline at end of file
diff --git a/website/sidebars.js b/website/sidebars.js
index 0f1795ab..154f8584 100644
--- a/website/sidebars.js
+++ b/website/sidebars.js
@@ -36,6 +36,7 @@ const sidebars = {
       collapsed: false,
       items: ['plugin/plugin_intro', 'plugin/plugin_selection', 'plugin/embedding'],
     },
+    'example/example',
     'compression',
     'configurations',
     'planner',

From f548f64f808ac3c2760693ce9e39a4ec84334f43 Mon Sep 17 00:00:00 2001
From: Shilin He <shilhe@microsoft.com>
Date: Wed, 20 Dec 2023 03:15:13 +0000
Subject: [PATCH 4/5] add github action

---
 .github/workflows/deploy-website.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy-website.yaml b/.github/workflows/deploy-website.yaml
index 7623067f..172573df 100644
--- a/.github/workflows/deploy-website.yaml
+++ b/.github/workflows/deploy-website.yaml
@@ -2,12 +2,12 @@ name: docs
 
 on:
   pull_request:
-    branches: [main]
+    branches: [docs]
     path:
       - 'website/*'
       - '.github/workflows/deploy-website.yml'
   push:
-    branches: [main]
+    branches: [docs]
     path:
       - 'website/*'
       - '.github/workflows/deploy-website.yml'

From 97df31b092f1c4b995c15254d18c50284de9c0b4 Mon Sep 17 00:00:00 2001
From: Shilin He <shilhe@microsoft.com>
Date: Wed, 20 Dec 2023 03:17:59 +0000
Subject: [PATCH 5/5] revise readme

---
 website/docs/overview.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/website/docs/overview.md b/website/docs/overview.md
index cbe385e6..7a923999 100644
--- a/website/docs/overview.md
+++ b/website/docs/overview.md
@@ -1 +1,3 @@
-# This is the Overview page
\ No newline at end of file
+# Overview
+
+TaskWeaver is a code-first agent framework
\ No newline at end of file