diff --git a/examples/Advanced output handling.ipynb b/examples/Advanced output handling.ipynb index fd50b371..4bcc203a 100644 --- a/examples/Advanced output handling.ipynb +++ b/examples/Advanced output handling.ipynb @@ -104,7 +104,7 @@ "coder = ReActToolCallingAgent(\n", " name=\"coder\",\n", " tools=[PythonREPLTool()],\n", - " output_handler=CoderOutputHandler(),\n", + " output_handler=CoderOutputHandler(max_iterations=3),\n", " verbose=True,\n", ")\n", "\n", diff --git a/motleycrew/agents/output_handler.py b/motleycrew/agents/output_handler.py index 3a1d5c03..337f0e64 100644 --- a/motleycrew/agents/output_handler.py +++ b/motleycrew/agents/output_handler.py @@ -5,6 +5,7 @@ from motleycrew.agents.abstract_parent import MotleyAgentAbstractParent from motleycrew.common.exceptions import InvalidOutput +from motleycrew.common import Defaults from motleycrew.tools import MotleyTool @@ -22,7 +23,16 @@ class MotleyOutputHandler(MotleyTool, ABC): _exceptions_to_handle: tuple[Exception] = (InvalidOutput,) """Exceptions that should be returned to the agent when raised in the `handle_output` method.""" - def __init__(self): + def __init__(self, max_iterations: int = Defaults.DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS): + """Initialize the output handler tool. + + Args: + max_iterations (int): Maximum number of iterations to run the output handler. + If an exception is raised in the `handle_output` method, the output handler will return + the exception to the agent unless the number of iterations exceeds `max_iterations`, + in which case the output handler will raise OutputHandlerMaxIterationsExceeded. + """ + self.max_iterations = max_iterations langchain_tool = self._create_langchain_tool() super().__init__(langchain_tool) diff --git a/motleycrew/agents/parent.py b/motleycrew/agents/parent.py index 4c95af35..48311567 100644 --- a/motleycrew/agents/parent.py +++ b/motleycrew/agents/parent.py @@ -12,11 +12,12 @@ from motleycrew.agents.abstract_parent import MotleyAgentAbstractParent from motleycrew.common import MotleyAgentFactory, MotleySupportedTool -from motleycrew.common import logger +from motleycrew.common import logger, Defaults from motleycrew.common.exceptions import ( AgentNotMaterialized, CannotModifyMaterializedAgent, InvalidOutput, + OutputHandlerMaxIterationsExceeded, ) from motleycrew.tools import MotleyTool @@ -131,18 +132,32 @@ def _prepare_output_handler(self) -> Optional[MotleyTool]: if isinstance(self.output_handler, MotleyOutputHandler): exceptions_to_handle = self.output_handler.exceptions_to_handle description = self.output_handler.description + max_iterations = self.output_handler.max_iterations + else: exceptions_to_handle = (InvalidOutput,) description = self.output_handler.description or f"Output handler" assert isinstance(description, str) description += "\n ONLY RETURN THE FINAL RESULT USING THIS TOOL!" + max_iterations = Defaults.DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS + + iteration = 0 def handle_agent_output(*args, **kwargs): assert self.output_handler + nonlocal iteration + try: + iteration += 1 output = self.output_handler._run(*args, **kwargs) except exceptions_to_handle as exc: - return f"{exc.__class__.__name__}: {str(exc)}" + if iteration <= max_iterations: + return f"{exc.__class__.__name__}: {str(exc)}" + raise OutputHandlerMaxIterationsExceeded( + last_call_args=args, + last_call_kwargs=kwargs, + last_exception=exc, + ) raise DirectOutput(output) diff --git a/motleycrew/common/defaults.py b/motleycrew/common/defaults.py index e6795f16..d2e3fdb0 100644 --- a/motleycrew/common/defaults.py +++ b/motleycrew/common/defaults.py @@ -1,10 +1,11 @@ """ Module description """ + from motleycrew.common import LLMFamily from motleycrew.common import GraphStoreType class Defaults: - """ Description + """Description Attributes: DEFAULT_LLM_FAMILY (str): @@ -15,8 +16,10 @@ class Defaults: MODULE_INSTALL_COMMANDS (dict): DEFAULT_NUM_THREADS (int): DEFAULT_EVENT_LOOP_SLEEP (int): + DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS (int): """ + DEFAULT_LLM_FAMILY = LLMFamily.OPENAI DEFAULT_LLM_NAME = "gpt-4o" DEFAULT_LLM_TEMPERATURE = 0.0 @@ -35,3 +38,4 @@ class Defaults: DEFAULT_NUM_THREADS = 4 DEFAULT_EVENT_LOOP_SLEEP = 1 + DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS = 5 diff --git a/motleycrew/common/exceptions.py b/motleycrew/common/exceptions.py index 7eaefba1..eb101b42 100644 --- a/motleycrew/common/exceptions.py +++ b/motleycrew/common/exceptions.py @@ -1,6 +1,6 @@ """ Module description""" -from typing import Any, Optional +from typing import Any, Dict, Optional from motleycrew.common import Defaults @@ -142,3 +142,20 @@ class InvalidOutput(Exception): """Raised in output handlers when an agent's output is not accepted""" pass + + +class OutputHandlerMaxIterationsExceeded(BaseException): + """Raised when the output handlers iteration limit is exceeded""" + + def __init__( + self, + last_call_args: tuple, + last_call_kwargs: Dict[str, Any], + last_exception: Exception, + ): + self.last_call_args = last_call_args + self.last_call_kwargs = last_call_kwargs + self.last_exception = last_exception + + def __str__(self): + return "Maximum number of output handler iterations exceeded" diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/0e875e7397179704e9c0f59a201287a3d0c56d7b1a692ad20f775c55b97a94f6.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/0e875e7397179704e9c0f59a201287a3d0c56d7b1a692ad20f775c55b97a94f6.pkl deleted file mode 100644 index 84b065b9..00000000 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/0e875e7397179704e9c0f59a201287a3d0c56d7b1a692ad20f775c55b97a94f6.pkl and /dev/null differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/142f514247583600c43a67e4328369554bc34f77f60fed42015d2bf2aa25bfa8.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/142f514247583600c43a67e4328369554bc34f77f60fed42015d2bf2aa25bfa8.pkl deleted file mode 100644 index d033d90a..00000000 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/142f514247583600c43a67e4328369554bc34f77f60fed42015d2bf2aa25bfa8.pkl and /dev/null differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/194738103260685fcfe14b080193c65baceb5fd00bb449694913f6f0762b0291.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/194738103260685fcfe14b080193c65baceb5fd00bb449694913f6f0762b0291.pkl index 4d22d39b..2f872870 100644 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/194738103260685fcfe14b080193c65baceb5fd00bb449694913f6f0762b0291.pkl and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/194738103260685fcfe14b080193c65baceb5fd00bb449694913f6f0762b0291.pkl differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/38c587c596ba4a7320df077e92bb9a303734251dd6391f31a0b66c2dbeb0ce44.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/38c587c596ba4a7320df077e92bb9a303734251dd6391f31a0b66c2dbeb0ce44.pkl deleted file mode 100644 index 8dc4e201..00000000 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/38c587c596ba4a7320df077e92bb9a303734251dd6391f31a0b66c2dbeb0ce44.pkl and /dev/null differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/48ad067e4cacaf445989352111199526d44d510296a19e1d7244c43506147de3.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/48ad067e4cacaf445989352111199526d44d510296a19e1d7244c43506147de3.pkl new file mode 100644 index 00000000..072b0ea8 Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/48ad067e4cacaf445989352111199526d44d510296a19e1d7244c43506147de3.pkl differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/49aefe482900e5529f22f21ffea3a217532d894d30ad60d5b902317f841507c7.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/49aefe482900e5529f22f21ffea3a217532d894d30ad60d5b902317f841507c7.pkl index 4dd4367d..d537618f 100644 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/49aefe482900e5529f22f21ffea3a217532d894d30ad60d5b902317f841507c7.pkl and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/49aefe482900e5529f22f21ffea3a217532d894d30ad60d5b902317f841507c7.pkl differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/4e48419bce78705da5cb3ab1ca474e97b76ebf2327c1c3a612b665abda772ddd.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/4e48419bce78705da5cb3ab1ca474e97b76ebf2327c1c3a612b665abda772ddd.pkl new file mode 100644 index 00000000..7fa604dc Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/4e48419bce78705da5cb3ab1ca474e97b76ebf2327c1c3a612b665abda772ddd.pkl differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/50a29bae1049bc8ceee0c057f6e4f24bfa07c6c2b35ab4fa5f74d04ca879e6d8.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/50a29bae1049bc8ceee0c057f6e4f24bfa07c6c2b35ab4fa5f74d04ca879e6d8.pkl new file mode 100644 index 00000000..ae029a6a Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/50a29bae1049bc8ceee0c057f6e4f24bfa07c6c2b35ab4fa5f74d04ca879e6d8.pkl differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5e047e70f7f9302ccb2769f571a821ea9a897de0ec73acf2bbe540c63ef5d262.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5e047e70f7f9302ccb2769f571a821ea9a897de0ec73acf2bbe540c63ef5d262.pkl new file mode 100644 index 00000000..d0408a34 Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5e047e70f7f9302ccb2769f571a821ea9a897de0ec73acf2bbe540c63ef5d262.pkl differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/a0cc27bc02e7df8d60981c2aacf3d49dbf534a6f999d02431db8e8de8145e52b.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/a0cc27bc02e7df8d60981c2aacf3d49dbf534a6f999d02431db8e8de8145e52b.pkl deleted file mode 100644 index da42c0c3..00000000 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/a0cc27bc02e7df8d60981c2aacf3d49dbf534a6f999d02431db8e8de8145e52b.pkl and /dev/null differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b3c179a00e76ca4c0fcf3b0d09bd2be9dc164849323b77cb39acc6f39e9ebae2.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b3c179a00e76ca4c0fcf3b0d09bd2be9dc164849323b77cb39acc6f39e9ebae2.pkl deleted file mode 100644 index 9d533136..00000000 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b3c179a00e76ca4c0fcf3b0d09bd2be9dc164849323b77cb39acc6f39e9ebae2.pkl and /dev/null differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d522c1c5bf62810f60ab6905c9d6ee28c4ae2af7f318ed0d99052f68ae0bba56.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d522c1c5bf62810f60ab6905c9d6ee28c4ae2af7f318ed0d99052f68ae0bba56.pkl new file mode 100644 index 00000000..75fddaae Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d522c1c5bf62810f60ab6905c9d6ee28c4ae2af7f318ed0d99052f68ae0bba56.pkl differ diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/07720a3f1c589096bd9c165049cdd12711def9925eefe6d7163d44e2c052b2bf.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/e8c8b5987e3502ba0c54804c5e22cd68029ac10a7a00045e8c336a82d12dc58a.pkl similarity index 61% rename from tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/07720a3f1c589096bd9c165049cdd12711def9925eefe6d7163d44e2c052b2bf.pkl rename to tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/e8c8b5987e3502ba0c54804c5e22cd68029ac10a7a00045e8c336a82d12dc58a.pkl index aac74743..3b7c8aa0 100644 Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/07720a3f1c589096bd9c165049cdd12711def9925eefe6d7163d44e2c052b2bf.pkl and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/e8c8b5987e3502ba0c54804c5e22cd68029ac10a7a00045e8c336a82d12dc58a.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/10b915da79b3af5c79f0fb31f8fbf8ced61cf74463499dd891514cb7e5325e59.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/10b915da79b3af5c79f0fb31f8fbf8ced61cf74463499dd891514cb7e5325e59.pkl new file mode 100644 index 00000000..81297cc6 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/10b915da79b3af5c79f0fb31f8fbf8ced61cf74463499dd891514cb7e5325e59.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/11b4c38f6b58f022f99691bfb7763a6a727df2b3b42cb6fc9a288a3e0ea4e8ae.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/11b4c38f6b58f022f99691bfb7763a6a727df2b3b42cb6fc9a288a3e0ea4e8ae.pkl new file mode 100644 index 00000000..95296636 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/11b4c38f6b58f022f99691bfb7763a6a727df2b3b42cb6fc9a288a3e0ea4e8ae.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/29b0d013feff29c37c8a2aa14aaa6b8863d0d3feec1899dfa8deb79d2c486df5.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/29b0d013feff29c37c8a2aa14aaa6b8863d0d3feec1899dfa8deb79d2c486df5.pkl new file mode 100644 index 00000000..51d26583 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/29b0d013feff29c37c8a2aa14aaa6b8863d0d3feec1899dfa8deb79d2c486df5.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/2af6eae4f7a1596e28cf4bfdf6549c5eb275a3b83e3fac88d02aace6feec4cc9.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/2af6eae4f7a1596e28cf4bfdf6549c5eb275a3b83e3fac88d02aace6feec4cc9.pkl new file mode 100644 index 00000000..16dee13f Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/2af6eae4f7a1596e28cf4bfdf6549c5eb275a3b83e3fac88d02aace6feec4cc9.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/2ff106841dfa3e905032e73975a6a34938bb2668d3d76ba7a5801760b4a6eb51.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/2ff106841dfa3e905032e73975a6a34938bb2668d3d76ba7a5801760b4a6eb51.pkl new file mode 100644 index 00000000..6f7d2571 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/2ff106841dfa3e905032e73975a6a34938bb2668d3d76ba7a5801760b4a6eb51.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/3c6f8ea143fba69a0da636c750ccb0d4f9bb21acec233ffda049f5019996c013.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/3c6f8ea143fba69a0da636c750ccb0d4f9bb21acec233ffda049f5019996c013.pkl new file mode 100644 index 00000000..c0c2f9e6 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/3c6f8ea143fba69a0da636c750ccb0d4f9bb21acec233ffda049f5019996c013.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/44b802b8c97658a162e84c2a129d622340c7eac1df3843b586011c3e5676ae36.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/44b802b8c97658a162e84c2a129d622340c7eac1df3843b586011c3e5676ae36.pkl new file mode 100644 index 00000000..4ad32275 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/44b802b8c97658a162e84c2a129d622340c7eac1df3843b586011c3e5676ae36.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/584d9a8ef900f68c4a9296d9ed420b1fe415b9ee1f7eeb0df2a01ef07254926d.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/584d9a8ef900f68c4a9296d9ed420b1fe415b9ee1f7eeb0df2a01ef07254926d.pkl new file mode 100644 index 00000000..d735f39d Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/584d9a8ef900f68c4a9296d9ed420b1fe415b9ee1f7eeb0df2a01ef07254926d.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/6b29fb659ef95585653a113e3b8f957a0778f471e34b2a5169f154f0b15c20c6.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/6b29fb659ef95585653a113e3b8f957a0778f471e34b2a5169f154f0b15c20c6.pkl new file mode 100644 index 00000000..29d555b9 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/6b29fb659ef95585653a113e3b8f957a0778f471e34b2a5169f154f0b15c20c6.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/6d277e5cfca7e0db1e89ac67cd19d71bf7feb77558543cde88b6ee134971b5ad.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/6d277e5cfca7e0db1e89ac67cd19d71bf7feb77558543cde88b6ee134971b5ad.pkl new file mode 100644 index 00000000..3f380738 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/6d277e5cfca7e0db1e89ac67cd19d71bf7feb77558543cde88b6ee134971b5ad.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/7a5fc4da7dfb19a4cd5529f827086d6c119802428bd4e10e2177079d42bc527f.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/7a5fc4da7dfb19a4cd5529f827086d6c119802428bd4e10e2177079d42bc527f.pkl new file mode 100644 index 00000000..9dd34bdd Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/7a5fc4da7dfb19a4cd5529f827086d6c119802428bd4e10e2177079d42bc527f.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/89aaf10840541c1f22dca15a2d3bb7af8d5562a929fdaddef8b17c20a427c41e.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/89aaf10840541c1f22dca15a2d3bb7af8d5562a929fdaddef8b17c20a427c41e.pkl deleted file mode 100644 index 7f325357..00000000 Binary files a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/89aaf10840541c1f22dca15a2d3bb7af8d5562a929fdaddef8b17c20a427c41e.pkl and /dev/null differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/951b765d6b645e5a18b04c9cda8bf73d8f8c409bd447374df6769a33652c72c3.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/951b765d6b645e5a18b04c9cda8bf73d8f8c409bd447374df6769a33652c72c3.pkl new file mode 100644 index 00000000..20f2ec40 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/951b765d6b645e5a18b04c9cda8bf73d8f8c409bd447374df6769a33652c72c3.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/9fff94ceca00c1c34ec18766e4cb1551e247e59d6da133f54baccaac90bcf022.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/9fff94ceca00c1c34ec18766e4cb1551e247e59d6da133f54baccaac90bcf022.pkl index 40cb6308..ef236bc5 100644 Binary files a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/9fff94ceca00c1c34ec18766e4cb1551e247e59d6da133f54baccaac90bcf022.pkl and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/9fff94ceca00c1c34ec18766e4cb1551e247e59d6da133f54baccaac90bcf022.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/bd0a0eb768ecda41e7ae1fb6fd7d574c05c61fd143a9059573815c38d2c9b1e7.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/bd0a0eb768ecda41e7ae1fb6fd7d574c05c61fd143a9059573815c38d2c9b1e7.pkl new file mode 100644 index 00000000..09c1af96 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/bd0a0eb768ecda41e7ae1fb6fd7d574c05c61fd143a9059573815c38d2c9b1e7.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/ccb7751e1332467bf3d3b6a720d72a347b8777184ddce0c28e4a7afff04003cb.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/ccb7751e1332467bf3d3b6a720d72a347b8777184ddce0c28e4a7afff04003cb.pkl deleted file mode 100644 index 542ae9f8..00000000 Binary files a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/ccb7751e1332467bf3d3b6a720d72a347b8777184ddce0c28e4a7afff04003cb.pkl and /dev/null differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/e6390e10ab4a8252870620496b378e00862205c5ee5a29acebfdc33db34a988e.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/e6390e10ab4a8252870620496b378e00862205c5ee5a29acebfdc33db34a988e.pkl new file mode 100644 index 00000000..5799c396 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/e6390e10ab4a8252870620496b378e00862205c5ee5a29acebfdc33db34a988e.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/f03da7ec0c7359e684e8a9fc9c46d335c0d11100ea9402706ab0bf75bfca3eb0.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/f03da7ec0c7359e684e8a9fc9c46d335c0d11100ea9402706ab0bf75bfca3eb0.pkl new file mode 100644 index 00000000..7dbca292 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/f03da7ec0c7359e684e8a9fc9c46d335c0d11100ea9402706ab0bf75bfca3eb0.pkl differ diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/f14562827ad1e3aa08d2cf536c24c17e25412399554b0555d3aa0b35f08eab3d.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/f14562827ad1e3aa08d2cf536c24c17e25412399554b0555d3aa0b35f08eab3d.pkl new file mode 100644 index 00000000..82ffd6b5 Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/f14562827ad1e3aa08d2cf536c24c17e25412399554b0555d3aa0b35f08eab3d.pkl differ diff --git a/tests/itest_golden_data/advanced_output_handling_ipynb.json b/tests/itest_golden_data/advanced_output_handling_ipynb.json index 5a9b75f6..5bf89bf1 100644 --- a/tests/itest_golden_data/advanced_output_handling_ipynb.json +++ b/tests/itest_golden_data/advanced_output_handling_ipynb.json @@ -1 +1 @@ -"def bubble_sort(arr):\n n = len(arr)\n for i in range(n):\n swapped = False\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]:\n arr[j], arr[j+1] = arr[j+1], arr[j]\n swapped = True\n if not swapped:\n break\n return arr\n\n# Test the bubble sort function\nsample_array = [64, 34, 25, 12, 22, 11, 90]\nsorted_array = bubble_sort(sample_array)\nprint(sorted_array)\n\nThe `bubble_sort` function sorts an array using the bubble sort algorithm. It works by repeatedly stepping through the list, comparing adjacent elements and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The outer loop runs `n` times, where `n` is the length of the array, and the inner loop runs `n-i-1` times to avoid re-checking the already sorted elements. An optimization is added by using a `swapped` flag to detect if any swaps were made during an iteration. If no swaps were made, the array is already sorted, and the algorithm can terminate early. The test case demonstrates the function by sorting a sample array." \ No newline at end of file +"def bubble_sort(arr):\n n = len(arr)\n for i in range(n):\n swapped = False\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]:\n arr[j], arr[j+1] = arr[j+1], arr[j]\n swapped = True\n if not swapped:\n break\n return arr\n\n# Test the bubble sort function\nsample_array = [64, 34, 25, 12, 22, 11, 90]\nsorted_array = bubble_sort(sample_array)\nprint(sorted_array)\n\nThe `bubble_sort` function sorts an array using the bubble sort algorithm. It works by repeatedly stepping through the list, comparing adjacent elements and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The outer loop runs `n` times, where `n` is the length of the array. The inner loop runs `n-i-1` times to avoid re-checking the already sorted elements. An optimization is added by using a `swapped` flag to detect if any swaps were made during an iteration. If no swaps were made, the list is already sorted, and the algorithm can terminate early. The test case demonstrates the function by sorting a sample array." \ No newline at end of file diff --git a/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json b/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json index 144d9fb6..3ad8a85f 100644 --- a/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json +++ b/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json @@ -1 +1 @@ -"\\[\n\\begin{aligned}\nx &= \\frac{367}{71} \\\\\ny &= -\\frac{25}{49} \\\\\nx - y &= 2\n\\end{aligned}\n\\]" \ No newline at end of file +"Agent stopped due to iteration limit or time limit." \ No newline at end of file diff --git a/tests/test_agents/__init__.py b/tests/test_agents/__init__.py index e69de29b..313c43ae 100644 --- a/tests/test_agents/__init__.py +++ b/tests/test_agents/__init__.py @@ -0,0 +1,21 @@ +from typing import Type +from langchain_core.tools import BaseTool +from langchain_core.pydantic_v1 import BaseModel, Field + + +class MockToolInput(BaseModel): + """Input for the MockTool tool.""" + + tool_input: str = Field(description="tool_input") + + +class MockTool(BaseTool): + """Mock tool for run agent tests""" + + name: str = "mock tool" + description: str = "Mock tool for tests" + + args_schema: Type[BaseModel] = MockToolInput + + def _run(self, tool_input: str, *args, **kwargs): + return tool_input diff --git a/tests/test_agents/test_agents.py b/tests/test_agents/test_agents.py index b5d0ef0a..7c00a0e2 100644 --- a/tests/test_agents/test_agents.py +++ b/tests/test_agents/test_agents.py @@ -1,7 +1,6 @@ import os import pytest -from langchain_community.tools import DuckDuckGoSearchRun from langchain_core.prompts.chat import ChatPromptTemplate from motleycrew.agents.crewai.crewai_agent import CrewAIMotleyAgent from motleycrew.agents.langchain.tool_calling_react import ReActToolCallingAgent @@ -9,6 +8,7 @@ from motleycrew.common.exceptions import AgentNotMaterialized, CannotModifyMaterializedAgent from motleycrew.tools.python_repl import create_repl_tool from motleycrew.tools.tool import MotleyTool +from tests.test_agents import MockTool os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY" @@ -28,7 +28,7 @@ def crewai_agent(self): backstory="", verbose=True, delegation=False, - tools=[DuckDuckGoSearchRun()], + tools=[MockTool()], ) return agent @@ -38,7 +38,7 @@ def langchain_agent(self): name="AI writer agent", prompt_prefix="Generate AI-generated content", description="AI-generated content", - tools=[DuckDuckGoSearchRun()], + tools=[MockTool()], verbose=True, ) return agent @@ -48,7 +48,7 @@ def llama_index_agent(self): agent = ReActLlamaIndexMotleyAgent( prompt_prefix="Uncover cutting-edge developments in AI and data science", description="AI researcher", - tools=[DuckDuckGoSearchRun()], + tools=[MockTool()], verbose=True, ) return agent @@ -65,7 +65,7 @@ def agent(self, request, crewai_agent, langchain_agent, llama_index_agent): @pytest.mark.parametrize("agent", test_agents_names, indirect=True) def test_add_tools(self, agent): assert len(agent.tools) == 1 - tools = [DuckDuckGoSearchRun()] + tools = [MockTool()] agent.add_tools(tools) assert len(agent.tools) == 1 diff --git a/tests/test_agents/test_langchain_output_handler.py b/tests/test_agents/test_langchain_output_handler.py index 9253b1a4..cfdfb884 100644 --- a/tests/test_agents/test_langchain_output_handler.py +++ b/tests/test_agents/test_langchain_output_handler.py @@ -1,11 +1,11 @@ import pytest -from langchain_community.tools import DuckDuckGoSearchRun from langchain_core.agents import AgentFinish, AgentAction from motleycrew.agents import MotleyOutputHandler from motleycrew.agents.langchain.tool_calling_react import ReActToolCallingAgent from motleycrew.agents.parent import DirectOutput -from motleycrew.common.exceptions import InvalidOutput +from motleycrew.common.exceptions import InvalidOutput, OutputHandlerMaxIterationsExceeded +from tests.test_agents import MockTool invalid_output = "Add more information about AI applications in medicine." @@ -38,10 +38,10 @@ def fake_agent_take_next_step( @pytest.fixture def agent(): agent = ReActToolCallingAgent( - tools=[DuckDuckGoSearchRun()], + tools=[MockTool()], verbose=True, chat_history=True, - output_handler=ReportOutputHandler(), + output_handler=ReportOutputHandler(max_iterations=5), ) agent.materialize() object.__setattr__(agent._agent, "plan", fake_agent_plan) @@ -56,6 +56,19 @@ def agent(): return agent +@pytest.fixture +def run_kwargs(agent): + agent_executor = agent.agent.bound.bound.steps[1].bound + + run_kwargs = { + "name_to_tool_map": {tool.name: tool for tool in agent_executor.tools}, + "color_mapping": {}, + "inputs": {}, + "intermediate_steps": [], + } + return run_kwargs + + def test_agent_plan(agent): agent_executor = agent.agent agent_action = AgentAction("tool", "tool_input", "tool_log") @@ -71,15 +84,7 @@ def test_agent_plan(agent): assert step.tool_input == "test_output" -def test_agent_take_next_step(agent): - agent_executor = agent.agent.bound.bound.steps[1].bound - - run_kwargs = { - "name_to_tool_map": {tool.name: tool for tool in agent_executor.tools}, - "color_mapping": {}, - "inputs": {}, - "intermediate_steps": [], - } +def test_agent_take_next_step(agent, run_kwargs): # test wrong output input_data = "Latest advancements in AI in 2024." @@ -95,3 +100,14 @@ def test_agent_take_next_step(agent): assert isinstance(step_result.return_values, dict) output_result = step_result.return_values.get("output") assert output_result == {"checked_output": input_data} + + +def test_output_handler_max_iteration(agent, run_kwargs): + input_data = "Latest advancements in AI in 2024." + run_kwargs["inputs"] = input_data + + with pytest.raises(OutputHandlerMaxIterationsExceeded): + for iteration in range(agent.output_handler.max_iterations + 1): + agent.agent._take_next_step(**run_kwargs) + + assert iteration == agent.output_handler.max_iterations diff --git a/tests/test_agents/test_llama_index_output_handler.py b/tests/test_agents/test_llama_index_output_handler.py index 4bd8c89c..fd451617 100644 --- a/tests/test_agents/test_llama_index_output_handler.py +++ b/tests/test_agents/test_llama_index_output_handler.py @@ -2,7 +2,7 @@ from collections import deque import pytest -from langchain_community.tools import DuckDuckGoSearchRun +from langchain_core.tools import StructuredTool try: from llama_index.core.agent.types import Task, TaskStep, TaskStepOutput @@ -17,7 +17,11 @@ from motleycrew.agents.llama_index import ReActLlamaIndexMotleyAgent from motleycrew.agents import MotleyOutputHandler -from motleycrew.common.exceptions import InvalidOutput, ModuleNotInstalled +from motleycrew.common.exceptions import ( + InvalidOutput, + OutputHandlerMaxIterationsExceeded, +) +from tests.test_agents import MockTool invalid_output = "Add more information about AI applications in medicine." @@ -44,31 +48,27 @@ def fake_run_step(*args, **kwargs): @pytest.fixture def agent(): - try: - search_tool = DuckDuckGoSearchRun() - agent = ReActLlamaIndexMotleyAgent( - description="Your goal is to uncover cutting-edge developments in AI and data science", - tools=[search_tool], - output_handler=ReportOutputHandler(), - verbose=True, - ) - agent.materialize() - agent._agent._run_step = fake_run_step - agent._agent._run_step = agent.run_step_decorator()(agent._agent._run_step) - - except ModuleNotInstalled: - return + + agent = ReActLlamaIndexMotleyAgent( + description="Your goal is to uncover cutting-edge developments in AI and data science", + tools=[MockTool()], + output_handler=ReportOutputHandler(max_iterations=5), + verbose=True, + ) + agent.materialize() + agent._agent._run_step = fake_run_step + agent._agent._run_step = agent.run_step_decorator()(agent._agent._run_step) + return agent -def test_run_step(agent): +@pytest.fixture +def task_data(agent): if agent is None: return task = Task(input="User input", memory=agent._agent.memory) - task_step = TaskStep( - task_id=task.task_id, step_id=str(uuid.uuid4()), input="Test input" - ) + task_step = TaskStep(task_id=task.task_id, step_id=str(uuid.uuid4()), input="Test input") task_state = TaskState( task=task, @@ -82,6 +82,24 @@ def test_run_step(agent): output=AgentChatResponse(response="Test response"), next_steps=[], ) + return task, task_step_output + + +def find_output_handler(agent: ReActLlamaIndexMotleyAgent) -> StructuredTool: + agent_worker = agent.agent.agent_worker + output_handler = None + for tool in agent_worker._get_tools(""): + if tool.metadata.name == "output_handler": + output_handler = tool.to_langchain_tool() + break + return output_handler + + +def test_run_step(agent, task_data): + if agent is None: + return + + task, task_step_output = task_data # test not last output cur_step_output = agent._agent._run_step("", task_step_output=task_step_output) @@ -100,23 +118,12 @@ def test_run_step(agent): _task_step = step_queue.pop() assert _task_step.task_id == task.task_id - assert ( - _task_step.input - == "You must call the `{}` tool to return the output.".format( - agent.output_handler.name - ) + assert _task_step.input == "You must call the `{}` tool to return the output.".format( + agent.output_handler.name ) # test direct output - - # find output handler - agent_worker = agent.agent.agent_worker - output_handler = None - for tool in agent_worker._get_tools(""): - if tool.metadata.name == "output_handler": - output_handler = tool.to_langchain_tool() - break - + output_handler = find_output_handler(agent) if output_handler is None: return @@ -144,3 +151,26 @@ def test_run_step(agent): ) assert hasattr(agent, "direct_output") assert agent.direct_output.output == {"checked_output": output_handler_input} + + +def test_output_handler_max_iteration(agent, task_data): + if agent is None: + return + + task, task_step_output = task_data + + output_handler = find_output_handler(agent) + if output_handler is None: + return + + output_handler_input = "Latest advancements in AI in 2024." + with pytest.raises(OutputHandlerMaxIterationsExceeded): + for iteration in range(agent.output_handler.max_iterations + 1): + + agent._agent._run_step( + "", + task_step_output=task_step_output, + output_handler=output_handler, + output_handler_input=output_handler_input, + ) + assert iteration == agent.output_handler.max_iterations