om-ai-lab · lijingcheng2021 · Dec 6, 2024 · Dec 6, 2024 · Dec 10, 2024
diff --git a/examples/legal_assistant/README.md b/examples/legal_assistant/README.md
@@ -0,0 +1,85 @@
+# Simple Visual Question Answering Example
+
+This example demonstrates how to use the framework for visual question answering (VQA) tasks. The example code can be found in the `examples/step1_simpleVQA` directory.
+
+```bash
+   cd examples/step1_simpleVQA
+```
+
+## Overview
+
+This example implements a simple Visual Question Answering (VQA) workflow that consists of two main components:
+
+1. **Input Interface**
+   - Handles user input containing questions about images
+   - Processes and manages image data
+   - Extracts the user's questions/instructions
+
+2. **Simple VQA Processing**
+   - Takes the user input and image
+   - Analyzes the image based on the user's question
+   - Generates appropriate responses to visual queries
+
+The workflow follows a straightforward sequence:
+
+## Prerequisites
+
+- Python 3.10+
+- Required packages installed (see requirements.txt)
+- Access to OpenAI API or compatible endpoint (see configs/llms/gpt.yml)
+- Redis server running locally or remotely
+- Conductor server running locally or remotely
+
+## Configuration
+
+The container.yaml file is a configuration file that manages dependencies and settings for different components of the system, including Conductor connections, Redis connections, and other service configurations. To set up your configuration:
+
+1. Generate the container.yaml file:
+   ```bash
+   python compile_container.py
+   ```
+   This will create a container.yaml file with default settings under `examples/step1_simpleVQA`.
+
+
+2. Configure your LLM settings in `configs/llms/gpt.yml`:
+   - Set your OpenAI API key or compatible endpoint through environment variable or by directly modifying the yml file
+   ```bash
+   export custom_openai_key="your_openai_api_key"
+   export custom_openai_endpoint="your_openai_endpoint"
+   ```
+   - Configure other model settings like temperature as needed through environment variable or by directly modifying the yml file
+
+3. Update settings in the generated `container.yaml`:
+   - Modify Redis connection settings:
+     - Set the host, port and credentials for your Redis instance
+     - Configure both `redis_stream_client` and `redis_stm_client` sections
+   - Update the Conductor server URL under conductor_config section
+   - Adjust any other component settings as needed
+
+## Running the Example
+
+3. Run the simple VQA example:
+
+   For terminal/CLI usage:
+   ```bash
+   python run_cli.py
+   ```
+
+   For app/GUI usage:
+   ```bash
+   python run_app.py
+   ```
+
+## Troubleshooting
+
+If you encounter issues:
+- Verify Redis is running and accessible
+- Check your OpenAI API key is valid
+- Ensure all dependencies are installed correctly
+- Review logs for any error messages
+
+
+## Building the Example
+
+Coming soon! This section will provide detailed instructions for building and packaging the step1_simpleVQA example step by step.
+
diff --git a/examples/legal_assistant/__init__.py b/examples/legal_assistant/__init__.py
diff --git a/examples/legal_assistant/agent/input_interface/__init__.py b/examples/legal_assistant/agent/input_interface/__init__.py
diff --git a/examples/legal_assistant/agent/input_interface/input_interface.py b/examples/legal_assistant/agent/input_interface/input_interface.py
@@ -0,0 +1,45 @@
+from pathlib import Path
+
+from omagent_core.utils.registry import registry
+from omagent_core.utils.general import read_image
+from omagent_core.engine.worker.base import BaseWorker
+from omagent_core.utils.logger import logging
+
+CURRENT_PATH = Path(__file__).parents[0]
+
+
+@registry.register_worker()
+class InputInterface(BaseWorker):
+    """Enhanced input interface with conversation control."""
+
+    def _run(self, *args, **kwargs):
+        # 读取用户输入
+        user_input = self.input.read_input(
+            workflow_instance_id=self.workflow_instance_id, 
+            input_prompt='请输入您的法律问题(输入"清除历史"可以开始新的对话):'
+        )
+
+        image_path = None
+        content = user_input['messages'][-1]['content']
+
+        # 提取文本和图片内容
+        for content_item in content:
+            if content_item['type'] == 'text':
+                user_instruction = content_item['data']
+            elif content_item['type'] == 'image_url':
+                image_path = content_item['data']
+
+        # 处理特殊命令
+        if user_instruction.strip() == "清除历史":
+            self.stm(self.workflow_instance_id)['conversation_history'] = []
+            return {'user_instruction': '对话历史已清除,请输入新的问题。'}
+
+        logging.info(f'User_instruction: {user_instruction}\nImage_path: {image_path}')
+
+        # 处理图片
+        if image_path:
+            img = read_image(input_source=image_path)
+            image_cache = {'<image_0>' : img}
+            self.stm(self.workflow_instance_id)['image_cache'] = image_cache
+
+        return {'user_instruction': user_instruction}
diff --git a/examples/legal_assistant/agent/legal_assistant/__init__.py b/examples/legal_assistant/agent/legal_assistant/__init__.py
diff --git a/examples/legal_assistant/agent/legal_assistant/legal_assistant.py b/examples/legal_assistant/agent/legal_assistant/legal_assistant.py
@@ -0,0 +1,80 @@
+from pathlib import Path
+from typing import List, Dict
+
+from omagent_core.models.llms.base import BaseLLMBackend
+from omagent_core.utils.registry import registry
+from omagent_core.models.llms.schemas import Message, Content
+from omagent_core.utils.general import encode_image
+from omagent_core.models.llms.prompt.parser import StrParser
+from omagent_core.models.llms.openai_gpt import OpenaiGPTLLM
+from omagent_core.engine.worker.base import BaseWorker
+from omagent_core.utils.container import container
+from pydantic import Field
+
+@registry.register_worker()
+class LegalAssistant(BaseWorker, BaseLLMBackend):
+    """Legal assistant with multi-turn conversation capability."""
+    llm: OpenaiGPTLLM
+    max_history: int = Field(default=5)  # 保存最近5轮对话
+
+    def _run(self, user_instruction: str, *args, **kwargs):
+        # 获取或初始化对话历史
+        conversation_history = self.stm(self.workflow_instance_id).get('conversation_history', [])
+
+        # 初始化当前对话消息列表
+        chat_message = []
+
+        # 系统提示词
+        system_prompt = """你是一名专业的法律顾问。请基于之前的对话历史和当前问题,从法律角度提供专业的分析和建议。
+        在回答时请:
+        1. 引用相关法律条款
+        2. 分析可能的法律风险
+        3. 提供具体可行的建议
+        4. 如果需要更多信息,请明确指出
+        """
+        chat_message.append(Message(role="system", message_type='text', content=system_prompt))
+
+        # 添加历史对话记录
+        for msg in conversation_history[-self.max_history:]:
+            chat_message.append(Message(
+                role=msg["role"],
+                message_type='text',
+                content=msg["content"]
+            ))
+
+        # 添加当前用户问题
+        chat_message.append(Message(role="user", message_type='text', content=user_instruction))
+
+        # 如果有图片,添加图片消息
+        if self.stm(self.workflow_instance_id).get('image_cache', None):
+            img = self.stm(self.workflow_instance_id)['image_cache']['<image_0>']
+            chat_message.append(Message(
+                role="user", 
+                message_type='image', 
+                content=[Content(
+                    type="image_url",
+                    image_url={"url": f"data:image/jpeg;base64,{encode_image(img)}"}
+                )]
+            ))
+
+        # 获取LLM回复
+        chat_complete_res = self.llm.generate(records=chat_message)
+        answer = chat_complete_res["choices"][0]["message"]["content"]
+
+        # 更新对话历史
+        conversation_history.append({"role": "user", "content": user_instruction})
+        conversation_history.append({"role": "assistant", "content": answer})
+        self.stm(self.workflow_instance_id)['conversation_history'] = conversation_history
+
+        # 发送回复
+        self.callback.send_answer(self.workflow_instance_id, msg=answer)
+
+        # 返回结果
+        return {
+            'answer': answer,
+            'conversation_history': conversation_history
+        }
+
+    def clear_history(self):
+        """清除对话历史"""
+        self.stm(self.workflow_instance_id)['conversation_history'] = []
diff --git a/examples/legal_assistant/compile_container.py b/examples/legal_assistant/compile_container.py
@@ -0,0 +1,20 @@
+# Import core modules and components
+from omagent_core.utils.container import container
+
+# Import workflow related modules
+from pathlib import Path
+from omagent_core.utils.registry import registry
+
+# Set up path and import modules
+CURRENT_PATH = root_path = Path(__file__).parents[0]
+registry.import_module()
+
+# Register required components
+container.register_callback(callback='AppCallback')
+container.register_input(input='AppInput')
+container.register_stm("RedisSTM")
+# Compile container config
+container.compile_config(CURRENT_PATH)
+
+
+
diff --git a/examples/legal_assistant/configs/llms/gpt.yml b/examples/legal_assistant/configs/llms/gpt.yml
@@ -0,0 +1,6 @@
+name: OpenaiGPTLLM
+model_id: gpt-4o
+api_key: ${env| custom_openai_key, openai_api_key}
+endpoint: ${env| custom_openai_endpoint, https://api.openai.com/v1}
+temperature: 0
+vision: true
diff --git a/examples/legal_assistant/configs/workers/legal_assistant.yaml b/examples/legal_assistant/configs/workers/legal_assistant.yaml
@@ -0,0 +1,3 @@
+name: LegalAssistant
+llm: ${sub|gpt}
+max_history: 5  # 配置保存的对话轮数
diff --git a/examples/legal_assistant/container.yaml b/examples/legal_assistant/container.yaml
@@ -0,0 +1,84 @@
+conductor_config:
+  name: Configuration
+  base_url:
+    value: http://10.8.21.200:8080
+    description: The Conductor Server API endpoint
+    env_var: CONDUCTOR_SERVER_URL
+  auth_key:
+    value: null
+    description: The authorization key
+    env_var: AUTH_KEY
+  auth_secret:
+    value: null
+    description: The authorization secret
+    env_var: CONDUCTOR_AUTH_SECRET
+  auth_token_ttl_min:
+    value: 45
+    description: The authorization token refresh interval in minutes.
+    env_var: AUTH_TOKEN_TTL_MIN
+  debug:
+    value: false
+    description: Debug mode
+    env_var: DEBUG
+connectors:
+  redis_stream_client:
+    name: RedisConnector
+    host:
+      value: localhost
+      env_var: HOST
+    port:
+      value: 6379
+      env_var: PORT
+    password:
+      value: null
+      env_var: PASSWORD
+    username:
+      value: null
+      env_var: USERNAME
+    db:
+      value: 0
+      env_var: DB
+  redis_stm_client:
+    name: RedisConnector
+    host:
+      value: localhost
+      env_var: HOST
+    port:
+      value: 6379
+      env_var: PORT
+    password:
+      value: null
+      env_var: PASSWORD
+    username:
+      value: null
+      env_var: USERNAME
+    db:
+      value: 0
+      env_var: DB
+components:
+  AppCallback:
+    name: AppCallback
+    bot_id:
+      value: ''
+      env_var: BOT_ID
+    start_time:
+      value: 2024-12-10_01:43:04
+      env_var: START_TIME
+    folder_name:
+      value: ./running_logs/2024-12-10_01:43:04
+      env_var: FOLDER_NAME
+  AppInput:
+    name: AppInput
+  DefaultCallback:
+    name: DefaultCallback
+    bot_id:
+      value: ''
+      env_var: BOT_ID
+    start_time:
+      value: 2024-12-10_01:43:04
+      env_var: START_TIME
+    folder_name:
+      value: ./running_logs/2024-12-10_01:43:04
+      env_var: FOLDER_NAME
+  RedisSTM:
+    name: RedisSTM
diff --git a/examples/legal_assistant/run_app.py b/examples/legal_assistant/run_app.py
@@ -0,0 +1,52 @@
+# Import required modules and components
+from omagent_core.utils.container import container
+from omagent_core.engine.workflow.conductor_workflow import ConductorWorkflow
+from omagent_core.engine.workflow.task.simple_task import simple_task
+from pathlib import Path
+from omagent_core.utils.registry import registry
+from omagent_core.clients.devices.app.client import AppClient
+from omagent_core.utils.logger import logging
+logging.init_logger("omagent", "omagent", level="INFO")
+
+# Import agent-specific components
+from agent.input_interface.input_interface import InputInterface 
+from omagent_core.engine.workflow.task.do_while_task import DoWhileTask
+
+# Set current working directory path
+CURRENT_PATH = root_path = Path(__file__).parents[0]
+
+# Import registered modules
+registry.import_module(project_path=CURRENT_PATH.joinpath('agent'))
+
+container.register_stm("RedisSTM")
+# Load container configuration from YAML file
+container.from_config(CURRENT_PATH.joinpath('container.yaml'))
+
+
+
+# Initialize simple VQA workflow
+workflow = ConductorWorkflow(name='legal_assistant')
+
+# Configure workflow tasks:
+# 1. Input interface for user interaction
+task1 = simple_task(task_def_name='InputInterface', task_reference_name='input_task')
+# 2. Simple VQA processing based on user input
+task2 = simple_task(task_def_name='SimpleVQA', task_reference_name='simple_vqa', inputs={'user_instruction': task1.output('user_instruction')})
+
+# Create conversation loop task
+conversation_loop = DoWhileTask(
+    task_ref_name='conversation_loop',
+    tasks=[task1, task2],
+    condition="true"  # Permanent loop, until user exits
+)
+
+# Configure workflow execution flow: Input -> VQA
+workflow >> conversation_loop
+
+# Register workflow
+workflow.register(True)
+
+# Initialize and start app client with workflow configuration
+config_path = CURRENT_PATH.joinpath('configs')
+agent_client = AppClient(interactor=workflow, config_path=config_path, workers=[InputInterface()])
+agent_client.start_interactor()