add image_gen_lite tool

mushenL · May 27, 2024 · 5167333 · 5167333
1 parent 0993bb6
commit 5167333
Show file tree

Hide file tree

Showing 7 changed files with 88 additions and 191 deletions.
diff --git a/modelscope_agent/tools/__init__.py b/modelscope_agent/tools/__init__.py
@@ -7,7 +7,7 @@
     'code_interpreter': ['CodeInterpreter'],
     'contrib': ['AliyunRenewInstanceTool'],
     'dashscope_tools': [
-        'ImageEnhancement', 'TextToImageTool', 'TextToImageLoraTool',
+        'ImageEnhancement', 'TextToImageTool', 'TextToImageLiteTool',
         'ParaformerAsrTool', 'QWenVL', 'SambertTtsTool', 'StyleRepaint',
         'WordArtTexture'
     ],

diff --git a/modelscope_agent/tools/base.py b/modelscope_agent/tools/base.py
@@ -18,8 +18,8 @@
     'WebSearch',
     'image_gen':
     'TextToImageTool',
-    'image_gen_lora':
-    'TextToImageLoraTool',
+    'image_gen_lite':
+    'TextToImageLiteTool',
     'image_enhancement':
     'ImageEnhancement',
     'qwen_vl':

diff --git a/modelscope_agent/tools/dashscope_tools/__init__.py b/modelscope_agent/tools/dashscope_tools/__init__.py
@@ -5,7 +5,7 @@
 _import_structure = {
     'image_enhancement': ['ImageEnhancement'],
     'image_generation': ['TextToImageTool'],
-    'image_generation_lora': ['TextToImageLoraTool'],
+    'image_generation_lite': ['TextToImageLiteTool'],
     'qwen_vl': ['QWenVL'],
     'style_repaint': ['StyleRepaint'],
     'wordart_tool': ['WordArtTexture'],

diff --git a/modelscope_agent/tools/dashscope_tools/image_generation.py b/modelscope_agent/tools/dashscope_tools/image_generation.py
@@ -22,12 +22,6 @@ class TextToImageTool(BaseTool):
         '格式是 数字*数字，表示希望生成的图像的分辨率大小，选项有[1024*1024, 720*1280, 1280*720]',
         'required': True,
         'type': 'string'
-    }, {
-        'name': 'lora_index',
-        'description':
-        '如果用户指定使用lora则使用该参数，通过选择的lora层来决定生成的图像的风格，如果用户没有制定，则默认为wanxlite1.4.5_lora_huibenlite1_20240519',
-        'required': False,
-        'type': 'string'
     }]
 
     def call(self, params: str, **kwargs) -> str:
@@ -44,12 +38,7 @@ def call(self, params: str, **kwargs) -> str:
         if prompt is None:
             return None
         seed = kwargs.get('seed', None)
-        model = kwargs.get('model', 'wanx-lora-lite')
-        lora_index = params.get('lora_index', None)
-        if lora_index:
-            extra_input = {'lora_index': lora_index}
-        else:
-            extra_input = None
+        model = kwargs.get('model', 'wanx-v1')
         try:
             dashscope.api_key = get_api_key(ApiNames.dashscope_api_key,
                                             **kwargs)
@@ -62,7 +51,6 @@ def call(self, params: str, **kwargs) -> str:
             n=1,
             size=resolution,
             steps=10,
-            seed=seed,
-            extra_input=extra_input)
+            seed=seed)
         image_url = response.output['results'][0]['url']
         return f'![IMAGEGEN]({image_url})'
diff --git a/modelscope_agent/tools/dashscope_tools/image_generation_lite.py b/modelscope_agent/tools/dashscope_tools/image_generation_lite.py
@@ -0,0 +1,71 @@
+import os
+
+import dashscope
+from dashscope import ImageSynthesis
+from modelscope_agent.constants import ApiNames
+from modelscope_agent.tools.base import BaseTool, register_tool
+from modelscope_agent.utils.utils import get_api_key
+
+MAX_RETRY_TIMES = 3
+
+
+@register_tool('image_gen_lite')
+class TextToImageLiteTool(BaseTool):
+    description = 'AI绘画（图像生成）服务，输入文本描述和图像分辨率，返回根据文本信息绘制的图片URL，同时允许用户通过添加lora层来选择风格化的图片'
+    name = 'image_gen_lite'
+    parameters: list = [{
+        'name': 'text',
+        'description': '详细描述了希望生成的图像具有什么内容，例如人物、环境、动作等细节描述',
+        'required': True,
+        'type': 'string'
+    }, {
+        'name': 'resolution',
+        'description':
+        '格式是 数字*数字，表示希望生成的图像的分辨率大小，选项有[1024*1024, 720*1280, 1280*720]',
+        'required': True,
+        'type': 'string'
+    }, {
+        'name': 'lora_index',
+        'description':
+        '如果用户指定使用lora则使用该参数，通过选择的lora层来决定生成的图像的风格，如果用户没有制定，则默认为wanxlite1.4.5_lora_huibenlite1_20240519',
+        'required': False,
+        'type': 'string'
+    }]
+
+    def call(self, params: str, **kwargs) -> str:
+        params = self._verify_args(params)
+        if isinstance(params, str):
+            return 'Parameter Error'
+
+        if params['resolution'] in ['1024*1024', '720*1280', '1280*720']:
+            resolution = params['resolution']
+        else:
+            resolution = '1280*720'
+
+        prompt = params['text']
+        if prompt is None:
+            return None
+        seed = kwargs.get('seed', None)
+        lora_index = params.get('lora_index', None)
+        if lora_index:
+            extra_input = {'lora_index': lora_index}
+            model = kwargs.get('model', 'wanx-lora-lite')
+        else:
+            extra_input = None
+            model = kwargs.get('model', 'wanx-lite-v1')
+        try:
+            dashscope.api_key = get_api_key(ApiNames.dashscope_api_key,
+                                            **kwargs)
+        except AssertionError:
+            raise ValueError('Please set valid DASHSCOPE_API_KEY!')
+
+        response = ImageSynthesis.call(
+            model=model,
+            prompt=prompt,
+            n=1,
+            size=resolution,
+            steps=10,
+            seed=seed,
+            extra_input=extra_input)
+        image_url = response.output['results'][0]['url']
+        return f'![IMAGEGEN]({image_url})'
diff --git a/modelscope_agent/tools/dashscope_tools/image_generation_lora.py b/modelscope_agent/tools/dashscope_tools/image_generation_lora.py
diff --git a/tests/tools/test_image_gen_lora.py → tests/tools/test_image_gen_lite.py b/tests/tools/test_image_gen_lora.py → tests/tools/test_image_gen_lite.py
@@ -2,42 +2,39 @@
 
 import pytest
 from modelscope_agent.agents.role_play import RolePlay
-from modelscope_agent.tools.dashscope_tools.image_generation_lora import \
-    TextToImageLoraTool
+from modelscope_agent.tools.dashscope_tools.image_generation_lite import \
+    TextToImageLiteTool
 
 IS_FORKED_PR = os.getenv('IS_FORKED_PR', 'false') == 'true'
 
 
 @pytest.mark.skipif(IS_FORKED_PR, reason='only run modelscope-agent main repo')
-def test_image_gen_lora():
+def test_image_gen_lite():
     params = """
-    {'input.prompt': '一只可爱的小兔子正在花园里努力地拔一个大萝卜，周围是绿油油的草地和鲜艳的花朵，天空是清澈的蓝色，太阳公公笑眯眯地看着。',
-    'input.lora_index': 'wanxlite1.4.5_lora_huibenlite1_20240519',
-    'parameters.size': '1024*1024',
-    'parameters.n': 1
+    {'text': '一只可爱的小兔子正在花园里努力地拔一个大萝卜，周围是绿油油的草地和鲜艳的花朵，天空是清澈的蓝色，太阳公公笑眯眯地看着。',
+    'lora_index': 'wanxlite1.4.5_lora_huibenlite1_20240519',
+    'resolution': '1024*1024'
     }
     """
-    t2i = TextToImageLoraTool()
+    t2i = TextToImageLiteTool()
     res = t2i.call(params)
-    print(res)
-    assert (res.startswith('https://'))
+    assert (res.startswith('![IMAGEGEN]('))
 
 
 @pytest.mark.skipif(IS_FORKED_PR, reason='only run modelscope-agent main repo')
-def test_image_gen_lora_role():
+def test_image_gen_lite_role():
     role_template = '扮演一个绘本小助手，可以利用工具来创建符合儿童的童话绘本图片'
 
     llm_config = {'model': 'qwen-max', 'model_server': 'dashscope'}
 
     # input tool args
-    function_list = ['image_gen_lora']
+    function_list = ['image_gen_lite']
 
     bot = RolePlay(
         function_list=function_list, llm=llm_config, instruction=role_template)
 
-    response = bot.run('绘制一个小兔子拔萝卜的场景')
+    response = bot.run('绘制一个小兔子拔萝卜的场景，使用lora来控制风格')
     text = ''
     for chunk in response:
         text += chunk
-    print(text)
     assert isinstance(text, str)