om-ai-lab · zhangqianqianhzlh · Dec 13, 2024 · coderabbitai · Dec 16, 2024 · coderabbitai
diff --git a/examples/internet_meme/agent/input_interface/input_interface.py b/examples/internet_meme/agent/input_interface/input_interface.py
@@ -0,0 +1,45 @@
+from pathlib import Path
+
+from omagent_core.utils.registry import registry
+from omagent_core.utils.general import read_image
+from omagent_core.engine.worker.base import BaseWorker
+from omagent_core.utils.logger import logging
+
+CURRENT_PATH = Path(__file__).parents[0]
+
+
+@registry.register_worker()
+class InputInterface(BaseWorker):
+    """Input interface processor that handles user instructions and image input.
+
+    This processor:
+    1. Reads user input containing question and image via input interface
+    2. Extracts text instruction and image path from the input
+    3. Loads and caches the image in workflow storage
+    4. Returns the user instruction for next steps
+    """
+
+    def _run(self, *args, **kwargs):
+        # Read user input through configured input interface
+        user_input = self.input.read_input(workflow_instance_id=self.workflow_instance_id, input_prompt='Please tell me a question and a image.')
+
+        image_path = None
+        # Extract text and image content from input message
+        content = user_input['messages'][-1]['content']
+        for content_item in content:
+            if content_item['type'] == 'text':
+                user_instruction = content_item['data']
+            elif content_item['type'] == 'image_url':
+                image_path = content_item['data']
+
-        content = user_input['messages'][-1]['content']
-        for content_item in content:
-            if content_item['type'] == 'text':
-                user_instruction = content_item['data']
-            elif content_item['type'] == 'image_url':
-                image_path = content_item['data']
-        
+        user_instruction = None
+        try:
+            content = user_input.get('messages', [])
+            if not content:
+                raise ValueError("No messages found in user input")
+            
+            last_message = content[-1].get('content', [])
+            if not last_message:
+                raise ValueError("No content found in last message")
+            
+            for content_item in last_message:
+                if not isinstance(content_item, dict) or 'type' not in content_item or 'data' not in content_item:
+                    continue
+                
+                if content_item['type'] == 'text':
+                    user_instruction = content_item['data']
+                elif content_item['type'] == 'image_url':
+                    image_path = content_item['data']
+            
+            if user_instruction is None:
+                raise ValueError("No text instruction found in input")
+        except Exception as e:
+            logging.error(f"Error processing user input: {e}")
+            raise
-        content = user_input['messages'][-1]['content']
-        for content_item in content:
-            if content_item['type'] == 'text':
-                user_instruction = content_item['data']
-            elif content_item['type'] == 'image_url':
-                image_path = content_item['data']
-        
+        user_instruction = None
+        try:
+            content = user_input.get('messages', [])
+            if not content:
+                raise ValueError("No messages found in user input")
+            
+            last_message = content[-1].get('content', [])
+            if not last_message:
+                raise ValueError("No content found in last message")
+            
+            for content_item in last_message:
+                if not isinstance(content_item, dict) or 'type' not in content_item or 'data' not in content_item:
+                    continue
+                
+                if content_item['type'] == 'text':
+                    user_instruction = content_item['data']
+                elif content_item['type'] == 'image_url':
+                    image_path = content_item['data']
+            
+            if user_instruction is None:
+                raise ValueError("No text instruction found in input")
+        except Exception as e:
+            logging.error(f"Error processing user input: {e}")
+            raise
+        logging.info(f'user_instruction: {user_instruction}\nImage_path: {image_path}')
+        self.stm(self.workflow_instance_id)['user_instruction'] = user_instruction
+        # Load image from file system
+        if image_path:
+            img = read_image(input_source=image_path)
+
+            # Store image in workflow shared memory with standard key
+            image_cache = {'<image_0>' : img}
+            self.stm(self.workflow_instance_id)['image_cache'] = image_cache
+
+        return {'user_instruction': user_instruction}
diff --git a/examples/internet_meme/agent/meme_explain/meme_explain.py b/examples/internet_meme/agent/meme_explain/meme_explain.py
@@ -0,0 +1,59 @@
+from pathlib import Path
+from typing import List
+
+from omagent_core.models.llms.base import BaseLLMBackend
+from omagent_core.engine.worker.base import BaseWorker
+from omagent_core.utils.registry import registry
+from omagent_core.models.llms.prompt.prompt import PromptTemplate
+from omagent_core.models.llms.openai_gpt import OpenaiGPTLLM
+
+from pydantic import Field
+
+
+CURRENT_PATH = Path(__file__).parents[0]
+
+
+@registry.register_worker()
+class MemeExplain(BaseWorker, BaseLLMBackend):
+    llm: OpenaiGPTLLM
+
+    prompts: List[PromptTemplate] = Field(
+        default=[
+            PromptTemplate.from_file(
+                CURRENT_PATH.joinpath("sys_prompt.prompt"), role="system"
+            ),
+            PromptTemplate.from_file(
+                CURRENT_PATH.joinpath("user_prompt.prompt"), role="user"
+            ),
+        ]
+    )
+
+    def _run(self, *args, **kwargs):
+        """Process user input and generate outfit recommendations.
+
+        Retrieves user instruction and weather information from workflow context,
+        generates outfit recommendations using the LLM model, and returns the 
+        recommendations while also sending them via callback.
+
+        Args:
+            *args: Variable length argument list
+            **kwargs: Arbitrary keyword arguments
+
+        Returns:
+            str: Generated outfit recommendations
+        """
-    def _run(self, *args, **kwargs):
-        """Process user input and generate outfit recommendations.
-        
-        Retrieves user instruction and weather information from workflow context,
-        generates outfit recommendations using the LLM model, and returns the 
-        recommendations while also sending them via callback.
-
-        Args:
-            *args: Variable length argument list
-            **kwargs: Arbitrary keyword arguments
-            
-        Returns:
-            str: Generated outfit recommendations
-        """
+    def _run(self, *args, **kwargs):
+        """Process user input and generate meme explanations.
+        
+        Retrieves user instruction and search information from workflow context,
+        generates meme explanations using the LLM model, and returns the 
+        explanation while also sending it via callback.
+
+        Args:
+            *args: Variable length argument list
+            **kwargs: Arbitrary keyword arguments
+            
+        Returns:
+            str: Generated meme explanation
+        """
-    def _run(self, *args, **kwargs):
-        """Process user input and generate outfit recommendations.
-        
-        Retrieves user instruction and weather information from workflow context,
-        generates outfit recommendations using the LLM model, and returns the 
-        recommendations while also sending them via callback.
-
-        Args:
-            *args: Variable length argument list
-            **kwargs: Arbitrary keyword arguments
-            
-        Returns:
-            str: Generated outfit recommendations
-        """
+    def _run(self, *args, **kwargs):
+        """Process user input and generate meme explanations.
+        
+        Retrieves user instruction and search information from workflow context,
+        generates meme explanations using the LLM model, and returns the 
+        explanation while also sending it via callback.
+
+        Args:
+            *args: Variable length argument list
+            **kwargs: Arbitrary keyword arguments
+            
+        Returns:
+            str: Generated meme explanation
+        """
+        # Retrieve user instruction and optional weather info from workflow context
+        user_instruct = self.stm(self.workflow_instance_id).get("user_instruction")
+        search_info = self.stm(self.workflow_instance_id)["search_info"] if "search_info" in self.stm(self.workflow_instance_id) else None
+        # Generate outfit recommendations using LLM with weather and user input
+        chat_complete_res = self.simple_infer(info=str(search_info), name=user_instruct)
+
+        # Extract recommendations from LLM response
+        outfit_recommendation = chat_complete_res["choices"][0]["message"]["content"]
+
+        # Send recommendations via callback and return
+        self.callback.send_answer(agent_id=self.workflow_instance_id, msg=outfit_recommendation)
+
+        self.stm(self.workflow_instance_id).clear()
+        return outfit_recommendation
+
-        # Retrieve user instruction and optional weather info from workflow context
-        user_instruct = self.stm(self.workflow_instance_id).get("user_instruction")
-        search_info = self.stm(self.workflow_instance_id)["search_info"] if "search_info" in self.stm(self.workflow_instance_id) else None
-        # Generate outfit recommendations using LLM with weather and user input
-        chat_complete_res = self.simple_infer(info=str(search_info), name=user_instruct)
-
-        # Extract recommendations from LLM response
-        outfit_recommendation = chat_complete_res["choices"][0]["message"]["content"]
-        
-        # Send recommendations via callback and return
-        self.callback.send_answer(agent_id=self.workflow_instance_id, msg=outfit_recommendation)
-        
-        self.stm(self.workflow_instance_id).clear()
-        return outfit_recommendation
+    def _run(self, *args, **kwargs) -> str:
+        # Retrieve user instruction and optional weather info from workflow context
+        user_instruct = self.stm(self.workflow_instance_id).get("user_instruction")
+        if not user_instruct:
+            raise ValueError("User instruction is required")
+
+        search_info = self.stm(self.workflow_instance_id).get("search_info")
+
+        # Generate outfit recommendations using LLM with weather and user input
+        try:
+            chat_complete_res = self.simple_infer(info=str(search_info), name=user_instruct)
+        except Exception as e:
+            self.stm(self.workflow_instance_id).clear()
+            raise RuntimeError(f"Failed to generate meme explanation: {str(e)}")
+
+        # Extract recommendations from LLM response
+        try:
+            meme_explanation = chat_complete_res["choices"][0]["message"]["content"]
+        except (KeyError, IndexError) as e:
+            self.stm(self.workflow_instance_id).clear()
+            raise RuntimeError(f"Invalid response format from LLM: {str(e)}")
+        
+        # Send recommendations via callback and return
+        self.callback.send_answer(agent_id=self.workflow_instance_id, msg=meme_explanation)
+        
+        self.stm(self.workflow_instance_id).clear()
+        return meme_explanation
-        # Retrieve user instruction and optional weather info from workflow context
-        user_instruct = self.stm(self.workflow_instance_id).get("user_instruction")
-        search_info = self.stm(self.workflow_instance_id)["search_info"] if "search_info" in self.stm(self.workflow_instance_id) else None
-        # Generate outfit recommendations using LLM with weather and user input
-        chat_complete_res = self.simple_infer(info=str(search_info), name=user_instruct)
-
-        # Extract recommendations from LLM response
-        outfit_recommendation = chat_complete_res["choices"][0]["message"]["content"]
-        
-        # Send recommendations via callback and return
-        self.callback.send_answer(agent_id=self.workflow_instance_id, msg=outfit_recommendation)
-        
-        self.stm(self.workflow_instance_id).clear()
-        return outfit_recommendation
+    def _run(self, *args, **kwargs) -> str:
+        # Retrieve user instruction and optional weather info from workflow context
+        user_instruct = self.stm(self.workflow_instance_id).get("user_instruction")
+        if not user_instruct:
+            raise ValueError("User instruction is required")
+
+        search_info = self.stm(self.workflow_instance_id).get("search_info")
+
+        # Generate outfit recommendations using LLM with weather and user input
+        try:
+            chat_complete_res = self.simple_infer(info=str(search_info), name=user_instruct)
+        except Exception as e:
+            self.stm(self.workflow_instance_id).clear()
+            raise RuntimeError(f"Failed to generate meme explanation: {str(e)}")
+
+        # Extract recommendations from LLM response
+        try:
+            meme_explanation = chat_complete_res["choices"][0]["message"]["content"]
+        except (KeyError, IndexError) as e:
+            self.stm(self.workflow_instance_id).clear()
+            raise RuntimeError(f"Invalid response format from LLM: {str(e)}")
+        
+        # Send recommendations via callback and return
+        self.callback.send_answer(agent_id=self.workflow_instance_id, msg=meme_explanation)
+        
+        self.stm(self.workflow_instance_id).clear()
+        return meme_explanation
diff --git a/examples/internet_meme/agent/meme_explain/sys_prompt.prompt b/examples/internet_meme/agent/meme_explain/sys_prompt.prompt
@@ -0,0 +1,21 @@
+你是一个互联网网络梗百科专家。我会提供一些在网络上搜索到的关于某个梗的解释以及一些相关的使用例子，你的任务是根据网络的信息生成这个网络梗的百科页面。需要包含的信息为：
+
+1. 网络梗的介绍，解释出处
+2. 关于这个梗的3个使用案例，包括来源和使用例子的内容。如果搜到的信息没有例子，则创造三个例子，这种情况不需要输出来源。
+
+输出使用如下格式：
+### XXX定义
+XXXX
+
+### 使用案例
+1. **例子一**：
+   - **来源**：XXX
+   - **使用例子**：XXX
+
+2. **例子一**：
+   - **来源**：XXX
+   - **使用例子**：XXX
+
+3. **例子一**：
+   - **来源**：XXX
+   - **使用例子**：XXX
-2. **例子一**：
-   - **来源**：XXX
-   - **使用例子**：XXX
-
-3. **例子一**：
-   - **来源**：XXX
-   - **使用例子**：XXX
+2. **例子二**：
+   - **来源**：XXX
+   - **使用例子**：XXX
+
+3. **例子三**：
+   - **来源**：XXX
+   - **使用例子**：XXX
-2. **例子一**：
-   - **来源**：XXX
-   - **使用例子**：XXX
-
-3. **例子一**：
-   - **来源**：XXX
-   - **使用例子**：XXX
+2. **例子二**：
+   - **来源**：XXX
+   - **使用例子**：XXX
+
+3. **例子三**：
+   - **来源**：XXX
+   - **使用例子**：XXX
diff --git a/examples/internet_meme/agent/meme_explain/user_prompt.prompt b/examples/internet_meme/agent/meme_explain/user_prompt.prompt
@@ -0,0 +1,6 @@
+Now, it's your turn to complete the task.
+Give anwer using the language according to the user's answer.
-Now, it's your turn to complete the task.
-Give anwer using the language according to the user's answer.
+Now, it's your turn to complete the task.
+Give an answer using language that matches the user's response.
-Now, it's your turn to complete the task.
-Give anwer using the language according to the user's answer.
+Now, it's your turn to complete the task.
+Give an answer using language that matches the user's response.
+
+Input Information:
+- 搜到的信息: {{info}}
+- 网络梗的名称: {{name}}
diff --git a/examples/internet_meme/agent/meme_searcher/meme_seacher.py b/examples/internet_meme/agent/meme_searcher/meme_seacher.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+from omagent_core.engine.worker.base import BaseWorker
+from omagent_core.utils.registry import registry
+from omagent_core.tool_system.manager import ToolManager
+from omagent_core.utils.logger import logging
+
+CURRENT_PATH = root_path = Path(__file__).parents[0]
+
+
+@registry.register_worker()
+class MemeSearcher(BaseWorker):
+
+    tool_manager : ToolManager
+
+    def _run(self, user_instruction:str, *args, **kwargs):
+        # Construct search query with instructions for datetime and location extraction
+        # search_query = "Please consider the user instruction and generate a search query for the internet memo search tool to search for the explaination according to user requirements. You MUST choose the web search tool in the tool_call to excute. When generating the search query, please include how this memo comes from how to use this memo. User Instruction: {}".format(user_instruction)
+
+        # search_query = "Please consider the user instruction and generate a search query for the internet memo search. You MUST choose the web search tool in the tool_call to excute. User Instruction: 搜索{}, 并提供相关的3个例子，需要获得三个query results".format(user_instruction)
+
+        search_query = "Please consider the user instruction and generate a search query for the internet meme search. You MUST choose the web search tool in the tool_call to excute. User Instruction: search {} meme, and provide three examples of {} usage in context，need to gie out three query results".format(user_instruction, user_instruction)
+
+        logging.info(search_query)
+        # Execute memo search via tool manager and notify user
+        execution_status, execution_results = self.tool_manager.execute_task(
+                task=search_query
+            )
+        self.callback.send_block(agent_id=self.workflow_instance_id, msg='Using web search tool to search for meme information')
+        logging.info(execution_results)
+
+        # Store successful results in workflow context or raise error 
+        if execution_status == "success":
+            self.stm(self.workflow_instance_id)["search_info"] = execution_results
+        else:
+            raise ValueError("Web search tool execution failed.")
+
+        return
diff --git a/examples/internet_meme/agent/simple_vqa/simple_vqa.py b/examples/internet_meme/agent/simple_vqa/simple_vqa.py
@@ -0,0 +1,53 @@
+from pathlib import Path
+from typing import List
+
+from omagent_core.models.llms.base import BaseLLMBackend
+from omagent_core.utils.registry import registry
+from omagent_core.models.llms.schemas import Message, Content
+from omagent_core.utils.general import encode_image
+from omagent_core.models.llms.prompt.parser import StrParser
+from omagent_core.models.llms.openai_gpt import OpenaiGPTLLM
+from omagent_core.engine.worker.base import BaseWorker
+from omagent_core.utils.container import container
+
+
+@registry.register_worker()
+class SimpleVQA(BaseWorker, BaseLLMBackend):
+    """Simple Visual Question Answering processor that handles image-based questions.
+
+    This processor:
+    1. Takes user instruction and cached image from workflow context
+    2. Creates chat messages containing the text question and base64-encoded image
+    3. Sends messages to LLM to generate a response
+    4. Returns response and sends it via callback
+    """
+    llm: OpenaiGPTLLM
+
+    def _run(self, user_instruction:str, *args, **kwargs):
+        # Initialize empty list for chat messages
+        chat_message = []
+
+        # Add text question as first message
+        chat_message.append(Message(role="user", message_type='text', content=user_instruction))
+
+        # Retrieve cached image from workflow shared memory
+        if self.stm(self.workflow_instance_id).get('image_cache', None):
+            img = self.stm(self.workflow_instance_id)['image_cache']['<image_0>']
+
+            # Add base64 encoded image as second message
+            chat_message.append(Message(role="user", message_type='image', content=[Content(
+                                        type="image_url",
+                                        image_url={
+                                            "url": f"data:image/jpeg;base64,{encode_image(img)}"
+                                        },
+                                    )]))
+
+        # Get response from LLM model
+        chat_complete_res = self.llm.generate(records=chat_message)
+
+        # Extract answer text from response
+        answer = chat_complete_res["choices"][0]["message"]["content"]
+
+        # Send answer via callback and return
+        self.callback.send_answer(self.workflow_instance_id, msg=answer)
+        return answer
diff --git a/examples/internet_meme/compile_container.py b/examples/internet_meme/compile_container.py
@@ -0,0 +1,20 @@
+from omagent_core.utils.container import container
+from pathlib import Path
+from omagent_core.utils.registry import registry
+
+
+# Load all registered workflow components
+registry.import_module()
+
+# Configure import path for agent modules
+from pathlib import Path
+CURRENT_PATH = Path(__file__).parents[0]
+
+# Register core workflow components for state management, callbacks and input handling
+container.register_stm(stm='RedisSTM')
+container.register_callback(callback='AppCallback')
+container.register_input(input='AppInput')
-container.register_stm(stm='RedisSTM')
-container.register_callback(callback='AppCallback')
-container.register_input(input='AppInput')
+try:
+    container.register_stm(stm='RedisSTM')
+    container.register_callback(callback='AppCallback')
+    container.register_input(input='AppInput')
+except Exception as e:
+    logging.error(f"Failed to register components: {e}")
+    raise
-container.register_stm(stm='RedisSTM')
-container.register_callback(callback='AppCallback')
-container.register_input(input='AppInput')
+try:
+    container.register_stm(stm='RedisSTM')
+    container.register_callback(callback='AppCallback')
+    container.register_input(input='AppInput')
+except Exception as e:
+    logging.error(f"Failed to register components: {e}")
+    raise
+
+
+# Compile container config
+container.compile_config(CURRENT_PATH)
diff --git a/examples/internet_meme/configs/llms/gpt.yml b/examples/internet_meme/configs/llms/gpt.yml
@@ -0,0 +1,6 @@
+name: OpenaiGPTLLM
+model_id: gpt-4o
+api_key: ${env| custom_openai_key, openai_api_key}
+endpoint: ${env| custom_openai_endpoint, https://api.openai.com/v1}
+temperature: 0
+vision: true
diff --git a/examples/internet_meme/configs/llms/text_res.yml b/examples/internet_meme/configs/llms/text_res.yml
@@ -0,0 +1,6 @@
+name: OpenaiGPTLLM
+model_id: gpt-4o
+api_key: ${env| custom_openai_key, openai_api_key}
+endpoint: ${env| custom_openai_endpoint, https://api.openai.com/v1}
+temperature: 0
+vision: false
diff --git a/examples/internet_meme/configs/tools/websearch.yml b/examples/internet_meme/configs/tools/websearch.yml
@@ -0,0 +1,5 @@
+llm: ${sub| text_res}
+tools:
+  - name: WebSearch
+    bing_api_key: ${env| bing_api_key, null}
+    llm: ${sub|text_res}
diff --git a/examples/internet_meme/configs/workers/meme_explain.yaml b/examples/internet_meme/configs/workers/meme_explain.yaml
@@ -0,0 +1,2 @@
+name: MemeExplain
+llm: ${sub| gpt}
diff --git a/examples/internet_meme/configs/workers/meme_seacher.yaml b/examples/internet_meme/configs/workers/meme_seacher.yaml
@@ -0,0 +1,3 @@
+name: MemeSearcher
+llm: ${sub| text_res}
+tool_manager: ${sub|websearch}
diff --git a/examples/internet_meme/configs/workers/simple_vqa.yaml b/examples/internet_meme/configs/workers/simple_vqa.yaml
@@ -0,0 +1,2 @@
+name: SimpleVQA
+llm: ${sub|gpt}
diff --git a/examples/internet_meme/container.yaml b/examples/internet_meme/container.yaml
@@ -0,0 +1,84 @@
+conductor_config:
+  name: Configuration
+  base_url:
+    value: http://10.8.25.26:8080
+    description: The Conductor Server API endpoint
+    env_var: CONDUCTOR_SERVER_URL
-  base_url:
-    value: http://10.8.25.26:8080
-    description: The Conductor Server API endpoint
-    env_var: CONDUCTOR_SERVER_URL
+  base_url:
+    value: https://10.8.25.26:8080
+    description: The Conductor Server API endpoint
+    env_var: CONDUCTOR_SERVER_URL
-  base_url:
-    value: http://10.8.25.26:8080
-    description: The Conductor Server API endpoint
-    env_var: CONDUCTOR_SERVER_URL
+  base_url:
+    value: https://10.8.25.26:8080
+    description: The Conductor Server API endpoint
+    env_var: CONDUCTOR_SERVER_URL
+  auth_key:
+    value: null
+    description: The authorization key
+    env_var: AUTH_KEY
+  auth_secret:
+    value: null
+    description: The authorization secret
+    env_var: CONDUCTOR_AUTH_SECRET
+  auth_token_ttl_min:
+    value: 45
+    description: The authorization token refresh interval in minutes.
+    env_var: AUTH_TOKEN_TTL_MIN
+  debug:
+    value: false
+    description: Debug mode
+    env_var: DEBUG
+connectors:
+  redis_stream_client:
+    name: RedisConnector
+    host:
+      value: localhost
+      env_var: HOST
+    port:
+      value: 6379
+      env_var: PORT
+    password:
+      value: null
+      env_var: PASSWORD
+    username:
+      value: null
+      env_var: USERNAME
+    db:
+      value: 0
+      env_var: DB
+  redis_stm_client:
+    name: RedisConnector
+    host:
+      value: localhost
+      env_var: HOST
+    port:
+      value: 6379
+      env_var: PORT
+    password:
+      value: null
+      env_var: PASSWORD
+    username:
+      value: null
+      env_var: USERNAME
+    db:
+      value: 0
+      env_var: DB
+components:
+  DefaultCallback:
+    name: DefaultCallback
+    bot_id:
+      value: ''
+      env_var: BOT_ID
+    start_time:
+      value: 2024-12-03_18:51:46
+      env_var: START_TIME
+    folder_name:
+      value: ./running_logs/2024-12-03_18:51:46
+      env_var: FOLDER_NAME
+  AppInput:
+    name: AppInput
+  AppCallback:
+    name: AppCallback
+    bot_id:
+      value: ''
+      env_var: BOT_ID
+    start_time:
+      value: 2024-12-03_18:51:46
+      env_var: START_TIME
+    folder_name:
+      value: ./running_logs/2024-12-03_18:51:46
+      env_var: FOLDER_NAME
+  RedisSTM:
+    name: RedisSTM