From 2e680337bf2e28c47b40f201ee548c386c8bc721 Mon Sep 17 00:00:00 2001
From: Artemy Belousov <whimox@gmail.com>
Date: Fri, 30 Aug 2024 16:15:52 +0400
Subject: [PATCH] Support custom callbacks in Langchain agents + streaming demo
 (#73)

---
 docs/source/examples.rst                      |   1 +
 docs/source/examples/research_agent.nblink    |   2 +-
 .../examples/streaming_agent_output.nblink    |   3 +
 examples/Streaming agent output.ipynb         | 255 ++++++++++++++++++
 motleycrew/agents/langchain/langchain.py      |  17 +-
 motleycrew/agents/langchain/legacy_react.py   |   4 +
 .../agents/langchain/tool_calling_react.py    |   6 +-
 motleycrew/storage/kuzu_graph_store.py        |   2 +-
 8 files changed, 286 insertions(+), 4 deletions(-)
 create mode 100644 docs/source/examples/streaming_agent_output.nblink
 create mode 100644 examples/Streaming agent output.ipynb

diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index a6d38e54..d451a368 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -9,4 +9,5 @@ Examples
    examples/research_agent
    examples/validating_agent_output
    examples/advanced_output_handling
+   examples/streaming_agent_output
    autogen
diff --git a/docs/source/examples/research_agent.nblink b/docs/source/examples/research_agent.nblink
index 59a8f8da..0de45713 100644
--- a/docs/source/examples/research_agent.nblink
+++ b/docs/source/examples/research_agent.nblink
@@ -1,3 +1,3 @@
 {
     "path": "../../../examples/Multi-step research agent.ipynb"
-}
\ No newline at end of file
+}
diff --git a/docs/source/examples/streaming_agent_output.nblink b/docs/source/examples/streaming_agent_output.nblink
new file mode 100644
index 00000000..3efc0669
--- /dev/null
+++ b/docs/source/examples/streaming_agent_output.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../examples/Streaming agent output.ipynb"
+}
diff --git a/examples/Streaming agent output.ipynb b/examples/Streaming agent output.ipynb
new file mode 100644
index 00000000..72d813d1
--- /dev/null
+++ b/examples/Streaming agent output.ipynb	
@@ -0,0 +1,255 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Streaming agent output\n",
+    "\n",
+    "In some applications, especially where humans are involved, output streaming can be useful for improving responsiveness. LLMs can take a while to generate output, and if there is a human on the other end, it would be much better if he'll instantly see the output being generated in real time, rather than wait for 30 seconds for the output to be fully generated and displayed.\n",
+    "\n",
+    "We'll demonstrate how this can be achieved in motleycrew using Langchain callbacks. In the first example, we'll stream agent output to the console while it solves a task inside a crew. The second example will show a standalone agent that asynchronously streams its output to a web client using WebSockets.\n",
+    "\n",
+    "Please note that streaming in this fashion is currently only possible with Langchain-based agents. Please file a GitHub issue if you'd like to see other frameworks supported."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.tools import DuckDuckGoSearchRun\n",
+    "from langchain.callbacks.base import BaseCallbackHandler\n",
+    "\n",
+    "from motleycrew import MotleyCrew\n",
+    "from motleycrew.tasks import SimpleTask\n",
+    "from motleycrew.agents.langchain import ReActToolCallingMotleyAgent"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For observing the output in real time, we'll use a callback that prints the output tokens as they come in from the LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CommandLineCallbackHandler(BaseCallbackHandler):\n",
+    "    def on_llm_new_token(self, token: str, **kwargs) -> None:\n",
+    "        print(token, end=\"\", flush=True)\n",
+    "\n",
+    "    def on_llm_end(self, response, **kwargs) -> None:\n",
+    "        print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent = ReActToolCallingMotleyAgent(\n",
+    "    tools=[DuckDuckGoSearchRun()],\n",
+    "    runnable_config={\"callbacks\": [CommandLineCallbackHandler()]},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Thought: To provide a brief report on the latest AI advancements in 2024, I need to gather information on recent developments, breakthroughs, and trends in AI for this year. I will use DuckDuckGo Search to find relevant articles and reports.\n",
+      "\n",
+      "I will perform a search query to gather the latest information on AI advancements in 2024.\n",
+      "\n",
+      "\n",
+      "Thought: The search results provide some insights into the latest AI advancements in 2024, including trends and technologies. I will summarize the key points from the information gathered to create a brief report.\n",
+      "\n",
+      "Final Answer: \n",
+      "\n",
+      "### Brief Report on the Latest AI Advancements in 2024\n",
+      "\n",
+      "1. **Generative AI for Everyday Use**:\n",
+      "   - Generative AI is becoming more accessible and useful for non-technical users. This year, there is a significant increase in people experimenting with various small AI models.\n",
+      "\n",
+      "2. **Top AI Trends**:\n",
+      "   - **Multimodal AI**: This technology goes beyond traditional single-mode data processing, integrating multiple types of data (e.g., text, images, audio) to create more sophisticated AI models.\n",
+      "   - **Ethics and Safety**: There is a growing emphasis on ethical AI development and deployment, with a focus on safety and compliance with evolving regulatory standards.\n",
+      "\n",
+      "3. **Breakthrough Technologies**:\n",
+      "   - According to MIT Technology Review, AI continues to be a major area of innovation, with several technologies identified as having the potential to significantly impact our lives.\n",
+      "\n",
+      "4. **Industry Leaders**:\n",
+      "   - Companies like Google, Meta, Microsoft, and OpenAI are at the forefront of AI advancements, continuously pushing the boundaries of what AI can achieve.\n",
+      "\n",
+      "5. **Integration into Daily Life**:\n",
+      "   - Since the release of OpenAI's ChatGPT, AI has been increasingly integrated into daily activities, making it a seamless part of everyday life.\n",
+      "\n",
+      "These advancements reflect a deepening sophistication in AI technologies and a cautious approach to their development and deployment, ensuring they are both innovative and responsible.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[TaskUnit(status=done)]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "crew = MotleyCrew()\n",
+    "task = SimpleTask(\n",
+    "    crew=crew,\n",
+    "    agent=agent,\n",
+    "    description=\"Conduct a brief report about the latest AI advancements in 2024.\",\n",
+    ")\n",
+    "crew.run()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Streaming agent output to a web client using WebSockets\n",
+    "\n",
+    "In real applications, it's more likely that you'll want to connect an agent to a client via some protocol. WebSockets are a common choice for tasks like this, because they allow robust low-latency communication between the client and server.\n",
+    "\n",
+    "We'll create a simple web app with FastAPI that allows you to send messages to the agent and see the output in real time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fastapi import FastAPI, WebSocket\n",
+    "from fastapi.responses import HTMLResponse\n",
+    "import uvicorn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class WebSocketCallbackHandler(BaseCallbackHandler):\n",
+    "    def __init__(self, websocket):\n",
+    "        self.websocket = websocket\n",
+    "\n",
+    "    async def on_llm_new_token(self, token: str, **kwargs) -> None:\n",
+    "        await self.websocket.send_json({\"type\": \"token\", \"content\": token})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "app = FastAPI()\n",
+    "\n",
+    "@app.get(\"/\")\n",
+    "async def get():\n",
+    "    html_content = \"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "        <head>\n",
+    "            <title>motleycrew streaming demo</title>\n",
+    "        </head>\n",
+    "        <body>\n",
+    "            <h1>motleycrew streaming demo</h1>\n",
+    "            <input type=\"text\" id=\"message\" placeholder=\"Enter your message\">\n",
+    "            <button onclick=\"sendMessage()\">Send message</button>\n",
+    "            <div id=\"response\" style=\"white-space: pre-wrap; word-break: break-word; width: 100%;\"></div>\n",
+    "            <script>\n",
+    "                var socket = new WebSocket(\"ws://localhost:8000/ws\");\n",
+    "                socket.onmessage = function(event) {\n",
+    "                    var data = JSON.parse(event.data);\n",
+    "                    if (data.type === \"token\") {\n",
+    "                        document.getElementById(\"response\").textContent += data.content;\n",
+    "                    }\n",
+    "                };\n",
+    "                function sendMessage() {\n",
+    "                    var message = document.getElementById(\"message\").value;\n",
+    "                    socket.send(message);\n",
+    "                    document.getElementById(\"response\").textContent = \"\";\n",
+    "                }\n",
+    "            </script>\n",
+    "        </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    return HTMLResponse(content=html_content)\n",
+    "\n",
+    "\n",
+    "@app.websocket(\"/ws\")\n",
+    "async def websocket_endpoint(websocket: WebSocket):\n",
+    "    await websocket.accept()\n",
+    "    while True:\n",
+    "        message = await websocket.receive_text()\n",
+    "\n",
+    "        agent = ReActToolCallingMotleyAgent(\n",
+    "            tools=[DuckDuckGoSearchRun()],\n",
+    "            runnable_config={\"callbacks\": [WebSocketCallbackHandler(websocket)]},\n",
+    "        )\n",
+    "\n",
+    "        await websocket.send_text(\"Agent response:\\n\")\n",
+    "        await agent.ainvoke({\"prompt\": message})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = uvicorn.Config(app)\n",
+    "server = uvicorn.Server(config)\n",
+    "await server.serve()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now you can open http://localhost:8000/ in your browser and send messages to the agent. You should see the output generated in real time."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/motleycrew/agents/langchain/langchain.py b/motleycrew/agents/langchain/langchain.py
index 08d37ccd..129812bd 100644
--- a/motleycrew/agents/langchain/langchain.py
+++ b/motleycrew/agents/langchain/langchain.py
@@ -5,6 +5,7 @@
 from langchain.agents import AgentExecutor
 from langchain_core.chat_history import InMemoryChatMessageHistory
 from langchain_core.runnables import RunnableConfig
+from langchain_core.runnables.config import merge_configs
 from langchain_core.runnables.history import RunnableWithMessageHistory, GetSessionHistoryCallable
 from langchain_core.prompts.chat import ChatPromptTemplate
 
@@ -28,6 +29,7 @@ def __init__(
         output_handler: MotleySupportedTool | None = None,
         chat_history: bool | GetSessionHistoryCallable = True,
         input_as_messages: bool = False,
+        runnable_config: RunnableConfig | None = None,
         verbose: bool = False,
     ):
         """
@@ -69,6 +71,9 @@ def __init__(
 
             input_as_messages: Whether the agent expects a list of messages as input instead of a single string.
 
+            runnable_config: Default Langchain config to use when invoking the agent.
+                It can be used to add callbacks, metadata, etc.
+
             verbose: Whether to log verbose output.
         """
         super().__init__(
@@ -90,6 +95,7 @@ def __init__(
             self.get_session_history_callable = chat_history
 
         self.input_as_messages = input_as_messages
+        self.runnable_config = runnable_config
 
     def materialize(self):
         """Materialize the agent and wrap it in RunnableWithMessageHistory if needed."""
@@ -147,6 +153,7 @@ def invoke(
         config: Optional[RunnableConfig] = None,
         **kwargs: Any,
     ) -> Any:
+        config = merge_configs(self.runnable_config, config)
         prompt = self.prepare_for_invocation(input=input, prompt_as_messages=self.input_as_messages)
 
         config = add_default_callbacks_to_langchain_config(config)
@@ -166,6 +173,7 @@ def from_agent(
         description: str | None = None,
         prompt_prefix: str | None = None,
         tools: Sequence[MotleySupportedTool] | None = None,
+        runnable_config: RunnableConfig | None = None,
         verbose: bool = False,
     ) -> "LangchainMotleyAgent":
         """Create a LangchainMotleyAgent from a :class:`langchain.agents.AgentExecutor` instance.
@@ -187,6 +195,9 @@ def from_agent(
 
             tools: Tools to add to the agent.
 
+            runnable_config: Default Langchain config to use when invoking the agent.
+                It can be used to add callbacks, metadata, etc.
+
             verbose: Whether to log verbose output.
         """
         # TODO: do we really need to unite the tools implicitly like this?
@@ -197,7 +208,11 @@ def from_agent(
             tools = list(tools or []) + list(agent.tools or [])
 
         wrapped_agent = LangchainMotleyAgent(
-            prompt_prefix=prompt_prefix, description=description, tools=tools, verbose=verbose
+            prompt_prefix=prompt_prefix,
+            description=description,
+            tools=tools,
+            runnable_config=runnable_config,
+            verbose=verbose,
         )
         wrapped_agent._agent = agent
         return wrapped_agent
diff --git a/motleycrew/agents/langchain/legacy_react.py b/motleycrew/agents/langchain/legacy_react.py
index 17f6e66c..abb12f8a 100644
--- a/motleycrew/agents/langchain/legacy_react.py
+++ b/motleycrew/agents/langchain/legacy_react.py
@@ -40,6 +40,7 @@ def __init__(
         handle_parsing_errors: bool = True,
         handle_tool_errors: bool = True,
         llm: BaseLanguageModel | None = None,
+        runnable_config: RunnableConfig | None = None,
         verbose: bool = False,
     ):
         """
@@ -54,6 +55,8 @@ def __init__(
             handle_parsing_errors: Whether to handle parsing errors.
             handle_tool_errors: Whether to handle tool errors.
             llm: Language model to use.
+            runnable_config: Default Langchain config to use when invoking the agent.
+                It can be used to add callbacks, metadata, etc.
             verbose: Whether to log verbose output.
         """
         if prompt is None:
@@ -96,5 +99,6 @@ def agent_factory(
             tools=tools,
             output_handler=output_handler,
             chat_history=chat_history,
+            runnable_config=runnable_config,
             verbose=verbose,
         )
diff --git a/motleycrew/agents/langchain/tool_calling_react.py b/motleycrew/agents/langchain/tool_calling_react.py
index 02e5655f..a4358bbf 100644
--- a/motleycrew/agents/langchain/tool_calling_react.py
+++ b/motleycrew/agents/langchain/tool_calling_react.py
@@ -119,6 +119,7 @@ def __init__(
         llm: BaseChatModel | None = None,
         max_iterations: int | None = Defaults.DEFAULT_REACT_AGENT_MAX_ITERATIONS,
         intermediate_steps_processor: Callable | None = None,
+        runnable_config: RunnableConfig | None = None,
         verbose: bool = False,
     ):
         """
@@ -139,10 +140,12 @@ def __init__(
             handle_tool_errors: Whether to handle tool errors.
                 If True, `handle_tool_error` and `handle_validation_error` in all tools
                 are set to True.
+            llm: Language model to use.
             max_iterations: The maximum number of agent iterations.
             intermediate_steps_processor: Function that modifies the intermediate steps array
                 in some way before each agent iteration.
-            llm: Language model to use.
+            runnable_config: Default Langchain config to use when invoking the agent.
+                It can be used to add callbacks, metadata, etc.
             verbose: Whether to log verbose output.
 
         Prompt:
@@ -205,5 +208,6 @@ def agent_factory(
             output_handler=output_handler,
             chat_history=chat_history,
             input_as_messages=True,
+            runnable_config=runnable_config,
             verbose=verbose,
         )
diff --git a/motleycrew/storage/kuzu_graph_store.py b/motleycrew/storage/kuzu_graph_store.py
index 2d8541ba..93662cd7 100644
--- a/motleycrew/storage/kuzu_graph_store.py
+++ b/motleycrew/storage/kuzu_graph_store.py
@@ -606,7 +606,7 @@ def _get_cypher_type_and_is_json_by_python_type_annotation(
         """
         cypher_type = MotleyKuzuGraphStore.PYTHON_TO_CYPHER_TYPES_MAPPING.get(annotation)
         if not cypher_type:
-            logger.warning(
+            logger.info(
                 "No known Cypher type matching annotation %s, will use JSON string",
                 annotation,
             )