diff --git a/examples/internet_meme/agent/input_interface/input_interface.py b/examples/internet_meme/agent/input_interface/input_interface.py new file mode 100644 index 0000000..33d4693 --- /dev/null +++ b/examples/internet_meme/agent/input_interface/input_interface.py @@ -0,0 +1,45 @@ +from pathlib import Path + +from omagent_core.utils.registry import registry +from omagent_core.utils.general import read_image +from omagent_core.engine.worker.base import BaseWorker +from omagent_core.utils.logger import logging + +CURRENT_PATH = Path(__file__).parents[0] + + +@registry.register_worker() +class InputInterface(BaseWorker): + """Input interface processor that handles user instructions and image input. + + This processor: + 1. Reads user input containing question and image via input interface + 2. Extracts text instruction and image path from the input + 3. Loads and caches the image in workflow storage + 4. Returns the user instruction for next steps + """ + + def _run(self, *args, **kwargs): + # Read user input through configured input interface + user_input = self.input.read_input(workflow_instance_id=self.workflow_instance_id, input_prompt='Please tell me a question and a image.') + + image_path = None + # Extract text and image content from input message + content = user_input['messages'][-1]['content'] + for content_item in content: + if content_item['type'] == 'text': + user_instruction = content_item['data'] + elif content_item['type'] == 'image_url': + image_path = content_item['data'] + + logging.info(f'user_instruction: {user_instruction}\nImage_path: {image_path}') + self.stm(self.workflow_instance_id)['user_instruction'] = user_instruction + # Load image from file system + if image_path: + img = read_image(input_source=image_path) + + # Store image in workflow shared memory with standard key + image_cache = {'' : img} + self.stm(self.workflow_instance_id)['image_cache'] = image_cache + + return {'user_instruction': user_instruction} diff --git a/examples/internet_meme/agent/meme_explain/meme_explain.py b/examples/internet_meme/agent/meme_explain/meme_explain.py new file mode 100644 index 0000000..c014f48 --- /dev/null +++ b/examples/internet_meme/agent/meme_explain/meme_explain.py @@ -0,0 +1,59 @@ +from pathlib import Path +from typing import List + +from omagent_core.models.llms.base import BaseLLMBackend +from omagent_core.engine.worker.base import BaseWorker +from omagent_core.utils.registry import registry +from omagent_core.models.llms.prompt.prompt import PromptTemplate +from omagent_core.models.llms.openai_gpt import OpenaiGPTLLM + +from pydantic import Field + + +CURRENT_PATH = Path(__file__).parents[0] + + +@registry.register_worker() +class MemeExplain(BaseWorker, BaseLLMBackend): + llm: OpenaiGPTLLM + + prompts: List[PromptTemplate] = Field( + default=[ + PromptTemplate.from_file( + CURRENT_PATH.joinpath("sys_prompt.prompt"), role="system" + ), + PromptTemplate.from_file( + CURRENT_PATH.joinpath("user_prompt.prompt"), role="user" + ), + ] + ) + + def _run(self, *args, **kwargs): + """Process user input and generate outfit recommendations. + + Retrieves user instruction and weather information from workflow context, + generates outfit recommendations using the LLM model, and returns the + recommendations while also sending them via callback. + + Args: + *args: Variable length argument list + **kwargs: Arbitrary keyword arguments + + Returns: + str: Generated outfit recommendations + """ + # Retrieve user instruction and optional weather info from workflow context + user_instruct = self.stm(self.workflow_instance_id).get("user_instruction") + search_info = self.stm(self.workflow_instance_id)["search_info"] if "search_info" in self.stm(self.workflow_instance_id) else None + # Generate outfit recommendations using LLM with weather and user input + chat_complete_res = self.simple_infer(info=str(search_info), name=user_instruct) + + # Extract recommendations from LLM response + outfit_recommendation = chat_complete_res["choices"][0]["message"]["content"] + + # Send recommendations via callback and return + self.callback.send_answer(agent_id=self.workflow_instance_id, msg=outfit_recommendation) + + self.stm(self.workflow_instance_id).clear() + return outfit_recommendation + diff --git a/examples/internet_meme/agent/meme_explain/sys_prompt.prompt b/examples/internet_meme/agent/meme_explain/sys_prompt.prompt new file mode 100644 index 0000000..06288c5 --- /dev/null +++ b/examples/internet_meme/agent/meme_explain/sys_prompt.prompt @@ -0,0 +1,21 @@ +你是一个互联网网络梗百科专家。我会提供一些在网络上搜索到的关于某个梗的解释以及一些相关的使用例子,你的任务是根据网络的信息生成这个网络梗的百科页面。需要包含的信息为: + +1. 网络梗的介绍,解释出处 +2. 关于这个梗的3个使用案例,包括来源和使用例子的内容。如果搜到的信息没有例子,则创造三个例子,这种情况不需要输出来源。 + +输出使用如下格式: +### XXX定义 +XXXX + +### 使用案例 +1. **例子一**: + - **来源**:XXX + - **使用例子**:XXX + +2. **例子一**: + - **来源**:XXX + - **使用例子**:XXX + +3. **例子一**: + - **来源**:XXX + - **使用例子**:XXX \ No newline at end of file diff --git a/examples/internet_meme/agent/meme_explain/user_prompt.prompt b/examples/internet_meme/agent/meme_explain/user_prompt.prompt new file mode 100644 index 0000000..0940966 --- /dev/null +++ b/examples/internet_meme/agent/meme_explain/user_prompt.prompt @@ -0,0 +1,6 @@ +Now, it's your turn to complete the task. +Give anwer using the language according to the user's answer. + +Input Information: +- 搜到的信息: {{info}} +- 网络梗的名称: {{name}} \ No newline at end of file diff --git a/examples/internet_meme/agent/meme_searcher/meme_seacher.py b/examples/internet_meme/agent/meme_searcher/meme_seacher.py new file mode 100644 index 0000000..5e21f49 --- /dev/null +++ b/examples/internet_meme/agent/meme_searcher/meme_seacher.py @@ -0,0 +1,37 @@ +from pathlib import Path +from omagent_core.engine.worker.base import BaseWorker +from omagent_core.utils.registry import registry +from omagent_core.tool_system.manager import ToolManager +from omagent_core.utils.logger import logging + +CURRENT_PATH = root_path = Path(__file__).parents[0] + + +@registry.register_worker() +class MemeSearcher(BaseWorker): + + tool_manager : ToolManager + + def _run(self, user_instruction:str, *args, **kwargs): + # Construct search query with instructions for datetime and location extraction + # search_query = "Please consider the user instruction and generate a search query for the internet memo search tool to search for the explaination according to user requirements. You MUST choose the web search tool in the tool_call to excute. When generating the search query, please include how this memo comes from how to use this memo. User Instruction: {}".format(user_instruction) + + # search_query = "Please consider the user instruction and generate a search query for the internet memo search. You MUST choose the web search tool in the tool_call to excute. User Instruction: 搜索{}, 并提供相关的3个例子,需要获得三个query results".format(user_instruction) + + search_query = "Please consider the user instruction and generate a search query for the internet meme search. You MUST choose the web search tool in the tool_call to excute. User Instruction: search {} meme, and provide three examples of {} usage in context,need to gie out three query results".format(user_instruction, user_instruction) + + logging.info(search_query) + # Execute memo search via tool manager and notify user + execution_status, execution_results = self.tool_manager.execute_task( + task=search_query + ) + self.callback.send_block(agent_id=self.workflow_instance_id, msg='Using web search tool to search for meme information') + logging.info(execution_results) + + # Store successful results in workflow context or raise error + if execution_status == "success": + self.stm(self.workflow_instance_id)["search_info"] = execution_results + else: + raise ValueError("Web search tool execution failed.") + + return diff --git a/examples/internet_meme/agent/simple_vqa/simple_vqa.py b/examples/internet_meme/agent/simple_vqa/simple_vqa.py new file mode 100644 index 0000000..9c8bf27 --- /dev/null +++ b/examples/internet_meme/agent/simple_vqa/simple_vqa.py @@ -0,0 +1,53 @@ +from pathlib import Path +from typing import List + +from omagent_core.models.llms.base import BaseLLMBackend +from omagent_core.utils.registry import registry +from omagent_core.models.llms.schemas import Message, Content +from omagent_core.utils.general import encode_image +from omagent_core.models.llms.prompt.parser import StrParser +from omagent_core.models.llms.openai_gpt import OpenaiGPTLLM +from omagent_core.engine.worker.base import BaseWorker +from omagent_core.utils.container import container + + +@registry.register_worker() +class SimpleVQA(BaseWorker, BaseLLMBackend): + """Simple Visual Question Answering processor that handles image-based questions. + + This processor: + 1. Takes user instruction and cached image from workflow context + 2. Creates chat messages containing the text question and base64-encoded image + 3. Sends messages to LLM to generate a response + 4. Returns response and sends it via callback + """ + llm: OpenaiGPTLLM + + def _run(self, user_instruction:str, *args, **kwargs): + # Initialize empty list for chat messages + chat_message = [] + + # Add text question as first message + chat_message.append(Message(role="user", message_type='text', content=user_instruction)) + + # Retrieve cached image from workflow shared memory + if self.stm(self.workflow_instance_id).get('image_cache', None): + img = self.stm(self.workflow_instance_id)['image_cache'][''] + + # Add base64 encoded image as second message + chat_message.append(Message(role="user", message_type='image', content=[Content( + type="image_url", + image_url={ + "url": f"data:image/jpeg;base64,{encode_image(img)}" + }, + )])) + + # Get response from LLM model + chat_complete_res = self.llm.generate(records=chat_message) + + # Extract answer text from response + answer = chat_complete_res["choices"][0]["message"]["content"] + + # Send answer via callback and return + self.callback.send_answer(self.workflow_instance_id, msg=answer) + return answer diff --git a/examples/internet_meme/compile_container.py b/examples/internet_meme/compile_container.py new file mode 100644 index 0000000..d35c8ba --- /dev/null +++ b/examples/internet_meme/compile_container.py @@ -0,0 +1,20 @@ +from omagent_core.utils.container import container +from pathlib import Path +from omagent_core.utils.registry import registry + + +# Load all registered workflow components +registry.import_module() + +# Configure import path for agent modules +from pathlib import Path +CURRENT_PATH = Path(__file__).parents[0] + +# Register core workflow components for state management, callbacks and input handling +container.register_stm(stm='RedisSTM') +container.register_callback(callback='AppCallback') +container.register_input(input='AppInput') + + +# Compile container config +container.compile_config(CURRENT_PATH) \ No newline at end of file diff --git a/examples/internet_meme/configs/llms/gpt.yml b/examples/internet_meme/configs/llms/gpt.yml new file mode 100644 index 0000000..06efefc --- /dev/null +++ b/examples/internet_meme/configs/llms/gpt.yml @@ -0,0 +1,6 @@ +name: OpenaiGPTLLM +model_id: gpt-4o +api_key: ${env| custom_openai_key, openai_api_key} +endpoint: ${env| custom_openai_endpoint, https://api.openai.com/v1} +temperature: 0 +vision: true \ No newline at end of file diff --git a/examples/internet_meme/configs/llms/text_res.yml b/examples/internet_meme/configs/llms/text_res.yml new file mode 100644 index 0000000..50aef99 --- /dev/null +++ b/examples/internet_meme/configs/llms/text_res.yml @@ -0,0 +1,6 @@ +name: OpenaiGPTLLM +model_id: gpt-4o +api_key: ${env| custom_openai_key, openai_api_key} +endpoint: ${env| custom_openai_endpoint, https://api.openai.com/v1} +temperature: 0 +vision: false \ No newline at end of file diff --git a/examples/internet_meme/configs/tools/websearch.yml b/examples/internet_meme/configs/tools/websearch.yml new file mode 100644 index 0000000..5544864 --- /dev/null +++ b/examples/internet_meme/configs/tools/websearch.yml @@ -0,0 +1,5 @@ +llm: ${sub| text_res} +tools: + - name: WebSearch + bing_api_key: ${env| bing_api_key, null} + llm: ${sub|text_res} \ No newline at end of file diff --git a/examples/internet_meme/configs/workers/meme_explain.yaml b/examples/internet_meme/configs/workers/meme_explain.yaml new file mode 100644 index 0000000..3251c94 --- /dev/null +++ b/examples/internet_meme/configs/workers/meme_explain.yaml @@ -0,0 +1,2 @@ +name: MemeExplain +llm: ${sub| gpt} \ No newline at end of file diff --git a/examples/internet_meme/configs/workers/meme_seacher.yaml b/examples/internet_meme/configs/workers/meme_seacher.yaml new file mode 100644 index 0000000..1905cc2 --- /dev/null +++ b/examples/internet_meme/configs/workers/meme_seacher.yaml @@ -0,0 +1,3 @@ +name: MemeSearcher +llm: ${sub| text_res} +tool_manager: ${sub|websearch} \ No newline at end of file diff --git a/examples/internet_meme/configs/workers/simple_vqa.yaml b/examples/internet_meme/configs/workers/simple_vqa.yaml new file mode 100644 index 0000000..5438d9c --- /dev/null +++ b/examples/internet_meme/configs/workers/simple_vqa.yaml @@ -0,0 +1,2 @@ +name: SimpleVQA +llm: ${sub|gpt} \ No newline at end of file diff --git a/examples/internet_meme/container.yaml b/examples/internet_meme/container.yaml new file mode 100644 index 0000000..126d50e --- /dev/null +++ b/examples/internet_meme/container.yaml @@ -0,0 +1,84 @@ +conductor_config: + name: Configuration + base_url: + value: http://10.8.25.26:8080 + description: The Conductor Server API endpoint + env_var: CONDUCTOR_SERVER_URL + auth_key: + value: null + description: The authorization key + env_var: AUTH_KEY + auth_secret: + value: null + description: The authorization secret + env_var: CONDUCTOR_AUTH_SECRET + auth_token_ttl_min: + value: 45 + description: The authorization token refresh interval in minutes. + env_var: AUTH_TOKEN_TTL_MIN + debug: + value: false + description: Debug mode + env_var: DEBUG +connectors: + redis_stream_client: + name: RedisConnector + host: + value: localhost + env_var: HOST + port: + value: 6379 + env_var: PORT + password: + value: null + env_var: PASSWORD + username: + value: null + env_var: USERNAME + db: + value: 0 + env_var: DB + redis_stm_client: + name: RedisConnector + host: + value: localhost + env_var: HOST + port: + value: 6379 + env_var: PORT + password: + value: null + env_var: PASSWORD + username: + value: null + env_var: USERNAME + db: + value: 0 + env_var: DB +components: + DefaultCallback: + name: DefaultCallback + bot_id: + value: '' + env_var: BOT_ID + start_time: + value: 2024-12-03_18:51:46 + env_var: START_TIME + folder_name: + value: ./running_logs/2024-12-03_18:51:46 + env_var: FOLDER_NAME + AppInput: + name: AppInput + AppCallback: + name: AppCallback + bot_id: + value: '' + env_var: BOT_ID + start_time: + value: 2024-12-03_18:51:46 + env_var: START_TIME + folder_name: + value: ./running_logs/2024-12-03_18:51:46 + env_var: FOLDER_NAME + RedisSTM: + name: RedisSTM diff --git a/examples/internet_meme/run_cli.py b/examples/internet_meme/run_cli.py new file mode 100644 index 0000000..7fc0bcc --- /dev/null +++ b/examples/internet_meme/run_cli.py @@ -0,0 +1,48 @@ +# Import required modules and components +from omagent_core.utils.container import container +from omagent_core.engine.workflow.conductor_workflow import ConductorWorkflow +from omagent_core.engine.workflow.task.simple_task import simple_task +from pathlib import Path +from omagent_core.utils.registry import registry +from omagent_core.clients.devices.cli.client import DefaultClient +from omagent_core.utils.logger import logging + +from agent.input_interface.input_interface import InputInterface + +# Initialize logging +logging.init_logger("omagent", "omagent", level="INFO") +# Set current working directory path +CURRENT_PATH = Path(__file__).parents[0] + +# Import registered modules +registry.import_module(project_path=CURRENT_PATH.joinpath('agent')) + +container.register_stm("RedisSTM") +# Load container configuration from YAML file +container.from_config(CURRENT_PATH.joinpath('container.yaml')) + + + +# Initialize simple VQA workflow +workflow = ConductorWorkflow(name='Internet_Meme') + +# Configure workflow tasks: +# 1. Input interface for user interaction +task1 = simple_task(task_def_name='InputInterface', task_reference_name='input_task') +# # 2. Simple VQA processing based on user input +# task2 = simple_task(task_def_name='SimpleVQA', task_reference_name='simple_vqa', inputs={'user_instruction': task1.output('user_instruction')}) + +task2 = simple_task(task_def_name='MemeSearcher', task_reference_name='meme_searcher', inputs={'user_instruction': task1.output('user_instruction')}) + +task3 = simple_task(task_def_name='MemeExplain', task_reference_name='meme_explain') + +# Configure workflow execution flow: Input -> VQA +workflow >> task1 >> task2 >> task3 + +# Register workflow +workflow.register(True) + +# Initialize and start CLI client with workflow configuration +config_path = CURRENT_PATH.joinpath('configs') +cli_client = DefaultClient(interactor=workflow, config_path=config_path, workers=[InputInterface()]) +cli_client.start_interactor()