diff --git a/examples/car_assistant/README.md b/examples/car_assistant/README.md new file mode 100644 index 0000000..93ca107 --- /dev/null +++ b/examples/car_assistant/README.md @@ -0,0 +1,85 @@ +# Simple Visual Question Answering Example + +This example demonstrates how to use the framework for visual question answering (VQA) tasks. The example code can be found in the `examples/step1_simpleVQA` directory. + +```bash + cd examples/step1_simpleVQA +``` + +## Overview + +This example implements a simple Visual Question Answering (VQA) workflow that consists of two main components: + +1. **Input Interface** + - Handles user input containing questions about images + - Processes and manages image data + - Extracts the user's questions/instructions + +2. **Simple VQA Processing** + - Takes the user input and image + - Analyzes the image based on the user's question + - Generates appropriate responses to visual queries + +The workflow follows a straightforward sequence: + +## Prerequisites + +- Python 3.10+ +- Required packages installed (see requirements.txt) +- Access to OpenAI API or compatible endpoint (see configs/llms/gpt.yml) +- Redis server running locally or remotely +- Conductor server running locally or remotely + +## Configuration + +The container.yaml file is a configuration file that manages dependencies and settings for different components of the system, including Conductor connections, Redis connections, and other service configurations. To set up your configuration: + +1. Generate the container.yaml file: + ```bash + python compile_container.py + ``` + This will create a container.yaml file with default settings under `examples/step1_simpleVQA`. + + +2. Configure your LLM settings in `configs/llms/gpt.yml`: + - Set your OpenAI API key or compatible endpoint through environment variable or by directly modifying the yml file + ```bash + export custom_openai_key="your_openai_api_key" + export custom_openai_endpoint="your_openai_endpoint" + ``` + - Configure other model settings like temperature as needed through environment variable or by directly modifying the yml file + +3. Update settings in the generated `container.yaml`: + - Modify Redis connection settings: + - Set the host, port and credentials for your Redis instance + - Configure both `redis_stream_client` and `redis_stm_client` sections + - Update the Conductor server URL under conductor_config section + - Adjust any other component settings as needed + +## Running the Example + +3. Run the simple VQA example: + + For terminal/CLI usage: + ```bash + python run_cli.py + ``` + + For app/GUI usage: + ```bash + python run_app.py + ``` + +## Troubleshooting + +If you encounter issues: +- Verify Redis is running and accessible +- Check your OpenAI API key is valid +- Ensure all dependencies are installed correctly +- Review logs for any error messages + + +## Building the Example + +Coming soon! This section will provide detailed instructions for building and packaging the step1_simpleVQA example step by step. + diff --git a/examples/car_assistant/__init__.py b/examples/car_assistant/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/car_assistant/compile_container.py b/examples/car_assistant/compile_container.py new file mode 100644 index 0000000..d47b398 --- /dev/null +++ b/examples/car_assistant/compile_container.py @@ -0,0 +1,20 @@ +# Import core modules and components +from omagent_core.utils.container import container + +# Import workflow related modules +from pathlib import Path +from omagent_core.utils.registry import registry + +# Set up path and import modules +CURRENT_PATH = root_path = Path(__file__).parents[0] +registry.import_module() + +# Register required components +container.register_callback(callback='AppCallback') +container.register_input(input='AppInput') +container.register_stm("RedisSTM") +# Compile container config +container.compile_config(CURRENT_PATH) + + + diff --git a/examples/car_assistant/configs/llms/gpt.yml b/examples/car_assistant/configs/llms/gpt.yml new file mode 100644 index 0000000..2f5df7d --- /dev/null +++ b/examples/car_assistant/configs/llms/gpt.yml @@ -0,0 +1,6 @@ +name: OpenaiGPTLLM +model_id: gpt-4o +api_key: ${env| custom_openai_key, sk-4zr6uGzVbNfIiq7U513aCc94Af614792938cE9AdB7D0E295} +endpoint: ${env| custom_openai_endpoint, http://36.133.246.107:11002/v1} +temperature: 0 +vision: true \ No newline at end of file diff --git a/examples/car_assistant/configs/workers/simple_vqa.yaml b/examples/car_assistant/configs/workers/simple_vqa.yaml new file mode 100644 index 0000000..5438d9c --- /dev/null +++ b/examples/car_assistant/configs/workers/simple_vqa.yaml @@ -0,0 +1,2 @@ +name: SimpleVQA +llm: ${sub|gpt} \ No newline at end of file diff --git a/examples/car_assistant/input_interface/__init__.py b/examples/car_assistant/input_interface/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/car_assistant/input_interface/input_interface.py b/examples/car_assistant/input_interface/input_interface.py new file mode 100644 index 0000000..e9331a0 --- /dev/null +++ b/examples/car_assistant/input_interface/input_interface.py @@ -0,0 +1,45 @@ +from pathlib import Path + +from omagent_core.utils.registry import registry +from omagent_core.utils.general import read_image +from omagent_core.engine.worker.base import BaseWorker +from omagent_core.utils.logger import logging + +CURRENT_PATH = Path(__file__).parents[0] + + +@registry.register_worker() +class InputInterface(BaseWorker): + """Input interface processor that handles user instructions and image input. + + This processor: + 1. Reads user input containing question and image via input interface + 2. Extracts text instruction and image path from the input + 3. Loads and caches the image in workflow storage + 4. Returns the user instruction for next steps + """ + + def _run(self, *args, **kwargs): + # Read user input through configured input interface + user_input = self.input.read_input(workflow_instance_id=self.workflow_instance_id, input_prompt='Please tell me a question about car and a image.') + + image_path = None + # Extract text and image content from input message + content = user_input['messages'][-1]['content'] + for content_item in content: + if content_item['type'] == 'text': + user_instruction = content_item['data'] + elif content_item['type'] == 'image_url': + image_path = content_item['data'] + + logging.info(f'User_instruction: {user_instruction}\nImage_path: {image_path}') + + # Load image from file system + if image_path: + img = read_image(input_source=image_path) + + # Store image in workflow shared memory with standard key + image_cache = {'' : img} + self.stm(self.workflow_instance_id)['image_cache'] = image_cache + + return {'user_instruction': user_instruction} diff --git a/examples/car_assistant/run_app.py b/examples/car_assistant/run_app.py new file mode 100644 index 0000000..23def41 --- /dev/null +++ b/examples/car_assistant/run_app.py @@ -0,0 +1,44 @@ +# Import required modules and components +from omagent_core.utils.container import container +from omagent_core.engine.workflow.conductor_workflow import ConductorWorkflow +from omagent_core.engine.workflow.task.simple_task import simple_task +from pathlib import Path +from omagent_core.utils.registry import registry +from omagent_core.clients.devices.app.client import AppClient +from omagent_core.utils.logger import logging +logging.init_logger("omagent", "omagent", level="INFO") + +# Import agent-specific components +from agent.input_interface.input_interface import InputInterface + +# Set current working directory path +CURRENT_PATH = root_path = Path(__file__).parents[0] + +# Import registered modules +registry.import_module(project_path=CURRENT_PATH.joinpath('agent')) + +container.register_stm("RedisSTM") +# Load container configuration from YAML file +container.from_config(CURRENT_PATH.joinpath('container.yaml')) + + + +# Initialize simple VQA workflow +workflow = ConductorWorkflow(name='step1_simpleVQA') + +# Configure workflow tasks: +# 1. Input interface for user interaction +task1 = simple_task(task_def_name='InputInterface', task_reference_name='input_task') +# 2. Simple VQA processing based on user input +task2 = simple_task(task_def_name='SimpleVQA', task_reference_name='simple_vqa', inputs={'user_instruction': task1.output('user_instruction')}) + +# Configure workflow execution flow: Input -> VQA +workflow >> task1 >> task2 + +# Register workflow +workflow.register(True) + +# Initialize and start app client with workflow configuration +config_path = CURRENT_PATH.joinpath('configs') +agent_client = AppClient(interactor=workflow, config_path=config_path, workers=[InputInterface()]) +agent_client.start_interactor() diff --git a/examples/car_assistant/run_cli.py b/examples/car_assistant/run_cli.py new file mode 100644 index 0000000..9c5e0c4 --- /dev/null +++ b/examples/car_assistant/run_cli.py @@ -0,0 +1,45 @@ +# Import required modules and components +from omagent_core.utils.container import container +from omagent_core.engine.workflow.conductor_workflow import ConductorWorkflow +from omagent_core.engine.workflow.task.simple_task import simple_task +from pathlib import Path +from omagent_core.utils.registry import registry +from omagent_core.clients.devices.cli.client import DefaultClient +from omagent_core.utils.logger import logging + +from agent.input_interface.input_interface import InputInterface + +# Initialize logging +logging.init_logger("omagent", "omagent", level="INFO") + +# Set current working directory path +CURRENT_PATH = Path(__file__).parents[0] + +# Import registered modules +registry.import_module(project_path=CURRENT_PATH.joinpath('agent')) + +container.register_stm("RedisSTM") +# Load container configuration from YAML file +container.from_config(CURRENT_PATH.joinpath('container.yaml')) + + + +# Initialize simple VQA workflow +workflow = ConductorWorkflow(name='car_assistant') + +# Configure workflow tasks: +# 1. Input interface for user interaction +task1 = simple_task(task_def_name='InputInterface', task_reference_name='input_task') +# 2. Simple VQA processing based on user input +task2 = simple_task(task_def_name='SimpleVQA', task_reference_name='simple_vqa', inputs={'user_instruction': task1.output('user_instruction')}) + +# Configure workflow execution flow: Input -> VQA +workflow >> task1 >> task2 + +# Register workflow +workflow.register(True) + +# Initialize and start CLI client with workflow configuration +config_path = CURRENT_PATH.joinpath('configs') +cli_client = DefaultClient(interactor=workflow, config_path=config_path, workers=[InputInterface()]) +cli_client.start_interactor()