修改reference类型保留字段；组件标准化单测框架更新: 更新系统变量，增加tool_eval参数和manifests匹配性检查 (#680

) * 优化组件标准化单测框架:更新系统变量，增加tool_eval参数和manifests匹配性检查 * 组件标准化manifests更改回滚 * 修改references类型的保留字段 * 修改manifests改动对应的单测 * 修改manifests改动对应的单测 --------- Co-authored-by: yepeiwen01 <[email protected]>
baidubce · Dec 23, 2024 · a746f08 · a746f08
1 parent d55e6fc
commit a746f08
Show file tree

Hide file tree

Showing 12 changed files with 204 additions and 241 deletions.
diff --git a/python/core/component.py b/python/core/component.py
@@ -83,17 +83,11 @@ class OralText(BaseModel, extra='allow'):
 
 class References(BaseModel, extra='allow'):
     type: str = Field(default="", description="类型")
-    resource_type: str = Field(default="", description="资源类型")
-    icon: str = Field(default="", description="站点图标")
-    site_name: str = Field(default="", description="站点名")
     source: str = Field(default="", description="来源")
     doc_id: str = Field(default="", description="文档id")
     title: str = Field(default="", description="标题")
     content: str = Field(default="", description="内容")
-    image_content: str = Field(default="", description="图片内容")
-    mock_id: Optional[str] = Field(default="", description="模拟数据id")
-    image_url: str = Field(default="", description="图片url")
-    video_url: str = Field(default="", description="视频url")
+    extra: Optional[dict] = Field(default={}, description="其他信息")
 
 
 class Image(BaseModel, extra='allow'):
@@ -548,8 +542,7 @@ def create_output(cls, type, text, role="tool", name="", visible_scope="all", ra
             elif type == "files":
                 key_list = ["filename", "url"]
             elif type == "references":
-                key_list = ["type", "resource_type", "icon", "site_name", "source",
-                            "doc_id", "title", "content", "image_content", "image_url", "video_url"]
+                key_list = ["type", "source", "doc_id", "title", "content"]
             elif type == "image":
                 key_list = ["filename", "url"]
             elif type == "chart":
@@ -562,7 +555,7 @@ def create_output(cls, type, text, role="tool", name="", visible_scope="all", ra
                 key_list = ["thought", "name", "arguments"]
             else:
                 raise ValueError("Unknown type: {}".format(type))
-            # assert all(key in text for key in key_list), "all keys:{} must be included in the text field".format(key_list)
+            assert all(key in text for key in key_list), "all keys:{} must be included in the text field".format(key_list)
         else:
             raise ValueError("text must be str or dict")
 

diff --git a/python/core/components/v2/handwrite_ocr/component.py b/python/core/components/v2/handwrite_ocr/component.py
@@ -59,13 +59,6 @@ class HandwriteOCR(Component):
                             "type": "string"
                         },
                         "description": "待识别文件的文件名"
-                    },
-                    "file_urls": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "description": "待识别文件的url下载地址"
                     }
                 },
                 "required": ["file_names"]
@@ -114,13 +107,11 @@ def run(self, message: Message, timeout: float = None, retry: int = 0) -> Messag
     @components_run_stream_trace
     def tool_eval(self,
                   file_names: Optional[list] = [],
-                  file_urls: Optional[dict] = {},
                   **kwargs):
         """
         工具评估函数
         Args:
             file_names (Optional[list]): 待识别文件的文件名列表
-            file_urls (Optional[dict]): 待识别文件的url下载地址字典
             **kwargs: 其他参数
         
         Raises:
@@ -133,12 +124,10 @@ def tool_eval(self,
         result = ""
 
         sys_file_names = file_names
-        sys_file_urls = file_urls
-
         if not sys_file_names:
             sys_file_names = kwargs.get('_sys_file_names', [])
-        if not sys_file_urls:
-            sys_file_urls = kwargs.get('_sys_file_urls', {})
+
+        sys_file_urls = kwargs.get('_sys_file_urls', {})
 
         for file_name in sys_file_names:
             if utils.is_url(file_name):

diff --git a/python/core/components/v2/mix_card_ocr/component.py b/python/core/components/v2/mix_card_ocr/component.py
@@ -63,20 +63,14 @@ class MixCardOCR(Component):
                             "type": "string"
                         },
                         "description": "待识别文件的文件名"
-                    },
-                    "file_urls": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "description": "待识别文件的下载URL"
                     }
                 },
                 "required": ["file_names"]
             }
         }
     ]
 
+
     @HTTPClient.check_param
     @components_run_trace
     def run(self, message: Message, timeout: float = None, retry: int = 0) -> Message:
@@ -168,22 +162,16 @@ def _check_service_error(request_id: str, data: dict):
     @components_run_stream_trace
     def tool_eval(self,
                   file_names: Optional[list] = [],
-                  file_urls: Optional[dict] = {},
                   **kwargs):
         """
         对指定文件进行OCR识别。
 
         Args:
-            name (str): API名称。
-            streaming (bool): 是否流式输出。如果为True，则逐个返回识别结果；如果为False，则一次性返回所有识别结果。
+            file_names (Optional[List], optional): 要识别的文件名列表。 
             **kwargs: 其他参数。
 
         Returns:
-            如果streaming为False，则返回包含所有识别结果的JSON字符串。
-            如果streaming为True，则逐个返回包含识别结果的字典，每个字典包含以下字段：
-                type (str): 消息类型，固定为"text"。
-                text (str): 识别结果的JSON字符串。
-                visible_scope (str): 消息可见范围，可以是"llm"或"user"。
+            ComponentOutput: 识别结果。
 
         Raises:
             InvalidRequestArgumentError: 如果请求格式错误，即文件URL不存在时抛出。
@@ -194,12 +182,10 @@ def tool_eval(self,
         traceid = kwargs.get("_sys_traceid", "")
 
         sys_file_names = file_names
-        sys_file_urls = file_urls
-
         if not sys_file_names:
             sys_file_names = kwargs.get("_sys_file_names", [])
-        if not sys_file_urls:
-            sys_file_urls = kwargs.get("_sys_file_urls", {})
+
+        sys_file_urls = kwargs.get("_sys_file_urls", {})
 
         for file_name in sys_file_names:
             if utils.is_url(file_name):

diff --git a/python/core/components/v2/qrcode_ocr/component.py b/python/core/components/v2/qrcode_ocr/component.py
@@ -66,13 +66,6 @@ class QRcodeOCR(Component):
                     "location": {
                         "type": "string",
                         "description": "是否输出二维码/条形码位置信息"
-                    },
-                    "file_urls": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "description": "待识别文件的URL下载地址"
                     }
                 },
                 "required": ["file_names"]
@@ -164,14 +157,13 @@ def _check_service_error(request_id: str, data: dict):
             )
 
     @components_run_stream_trace
-    def tool_eval(self, file_names:Optional[list]=[], location: Optional[str]="false", file_urls:Optional[dict]={}, **kwargs):
+    def tool_eval(self, file_names:Optional[list]=[], location: Optional[str]="false",  **kwargs):
         """
         ToolEval方法，用于执行二维码识别操作。
         
         Args:
             file_names (list, 可选): 待识别文件的文件名列表。
             location (str, 可选): 是否需要返回二维码位置信息，默认为 "false"。
-            file_urls (dict, 可选): 待识别文件的URL下载地址字典，格式为 {"filename": "url"}。
             
         Yields:
             ComponentOutput: 识别结果，包含识别到的二维码信息。
@@ -180,13 +172,10 @@ def tool_eval(self, file_names:Optional[list]=[], location: Optional[str]="false
         traceid = kwargs.get("_sys_traceid", "")
         # file_name
         sys_file_names = file_names
-        sys_file_urls = file_urls
-
         if not sys_file_names:
             sys_file_names = kwargs.get("_sys_file_names", [])
-        if not sys_file_urls:
-            sys_file_urls = kwargs.get("_sys_file_urls", {})
 
+        sys_file_urls = kwargs.get("_sys_file_urls", {})
 
         for file_name in sys_file_names:
             if utils.is_url(file_name):

diff --git a/python/core/components/v2/tree_mind/component.py b/python/core/components/v2/tree_mind/component.py
@@ -15,6 +15,7 @@
 r"""树图工具"""
 
 import json
+from urllib.parse import urlparse, unquote
 from typing import Dict, List, Optional, Any
 from appbuilder.core.message import Message
 from appbuilder.core._client import HTTPClient
@@ -83,6 +84,17 @@ def _post(self, query, **kwargs):
         img_link = treemind_response.info.downloadInfo.fileInfo.pic
         return img_link, jump_link
 
+    @staticmethod
+    def get_filename_from_url(url):
+        """从给定URL中提取文件名"""
+        parsed_url = urlparse(url)
+        # 提取路径部分
+        path = parsed_url.path
+        # 从路径中获取文件名
+        filename = path.split('/')[-1]
+        # 解码URL编码的文件名
+        return unquote(filename)
+
     @components_run_stream_trace
     def tool_eval(
             self,
@@ -115,6 +127,7 @@ def tool_eval(
         img_link_result = self.create_output(
             type="image",
             text={
+                "filename": self.get_filename_from_url(img_link),
                 "url": img_link
             },
             visible_scope='all',