Skip to content

Commit

Permalink
修改reference类型保留字段;组件标准化单测框架更新: 更新系统变量,增加tool_eval参数和manifests匹配性检查 (#680
Browse files Browse the repository at this point in the history
)

* 优化组件标准化单测框架:更新系统变量,增加tool_eval参数和manifests匹配性检查

* 组件标准化manifests更改回滚

* 修改references类型的保留字段

* 修改manifests改动对应的单测

* 修改manifests改动对应的单测

---------

Co-authored-by: yepeiwen01 <[email protected]>
  • Loading branch information
peiwenYe and yepeiwen01 authored Dec 23, 2024
1 parent d55e6fc commit a746f08
Show file tree
Hide file tree
Showing 12 changed files with 204 additions and 241 deletions.
13 changes: 3 additions & 10 deletions python/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,11 @@ class OralText(BaseModel, extra='allow'):

class References(BaseModel, extra='allow'):
type: str = Field(default="", description="类型")
resource_type: str = Field(default="", description="资源类型")
icon: str = Field(default="", description="站点图标")
site_name: str = Field(default="", description="站点名")
source: str = Field(default="", description="来源")
doc_id: str = Field(default="", description="文档id")
title: str = Field(default="", description="标题")
content: str = Field(default="", description="内容")
image_content: str = Field(default="", description="图片内容")
mock_id: Optional[str] = Field(default="", description="模拟数据id")
image_url: str = Field(default="", description="图片url")
video_url: str = Field(default="", description="视频url")
extra: Optional[dict] = Field(default={}, description="其他信息")


class Image(BaseModel, extra='allow'):
Expand Down Expand Up @@ -548,8 +542,7 @@ def create_output(cls, type, text, role="tool", name="", visible_scope="all", ra
elif type == "files":
key_list = ["filename", "url"]
elif type == "references":
key_list = ["type", "resource_type", "icon", "site_name", "source",
"doc_id", "title", "content", "image_content", "image_url", "video_url"]
key_list = ["type", "source", "doc_id", "title", "content"]
elif type == "image":
key_list = ["filename", "url"]
elif type == "chart":
Expand All @@ -562,7 +555,7 @@ def create_output(cls, type, text, role="tool", name="", visible_scope="all", ra
key_list = ["thought", "name", "arguments"]
else:
raise ValueError("Unknown type: {}".format(type))
# assert all(key in text for key in key_list), "all keys:{} must be included in the text field".format(key_list)
assert all(key in text for key in key_list), "all keys:{} must be included in the text field".format(key_list)
else:
raise ValueError("text must be str or dict")

Expand Down
15 changes: 2 additions & 13 deletions python/core/components/v2/handwrite_ocr/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,6 @@ class HandwriteOCR(Component):
"type": "string"
},
"description": "待识别文件的文件名"
},
"file_urls": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "待识别文件的url下载地址"
}
},
"required": ["file_names"]
Expand Down Expand Up @@ -114,13 +107,11 @@ def run(self, message: Message, timeout: float = None, retry: int = 0) -> Messag
@components_run_stream_trace
def tool_eval(self,
file_names: Optional[list] = [],
file_urls: Optional[dict] = {},
**kwargs):
"""
工具评估函数
Args:
file_names (Optional[list]): 待识别文件的文件名列表
file_urls (Optional[dict]): 待识别文件的url下载地址字典
**kwargs: 其他参数
Raises:
Expand All @@ -133,12 +124,10 @@ def tool_eval(self,
result = ""

sys_file_names = file_names
sys_file_urls = file_urls

if not sys_file_names:
sys_file_names = kwargs.get('_sys_file_names', [])
if not sys_file_urls:
sys_file_urls = kwargs.get('_sys_file_urls', {})

sys_file_urls = kwargs.get('_sys_file_urls', {})

for file_name in sys_file_names:
if utils.is_url(file_name):
Expand Down
24 changes: 5 additions & 19 deletions python/core/components/v2/mix_card_ocr/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,14 @@ class MixCardOCR(Component):
"type": "string"
},
"description": "待识别文件的文件名"
},
"file_urls": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "待识别文件的下载URL"
}
},
"required": ["file_names"]
}
}
]


@HTTPClient.check_param
@components_run_trace
def run(self, message: Message, timeout: float = None, retry: int = 0) -> Message:
Expand Down Expand Up @@ -168,22 +162,16 @@ def _check_service_error(request_id: str, data: dict):
@components_run_stream_trace
def tool_eval(self,
file_names: Optional[list] = [],
file_urls: Optional[dict] = {},
**kwargs):
"""
对指定文件进行OCR识别。
Args:
name (str): API名称。
streaming (bool): 是否流式输出。如果为True,则逐个返回识别结果;如果为False,则一次性返回所有识别结果。
file_names (Optional[List], optional): 要识别的文件名列表。
**kwargs: 其他参数。
Returns:
如果streaming为False,则返回包含所有识别结果的JSON字符串。
如果streaming为True,则逐个返回包含识别结果的字典,每个字典包含以下字段:
type (str): 消息类型,固定为"text"。
text (str): 识别结果的JSON字符串。
visible_scope (str): 消息可见范围,可以是"llm"或"user"。
ComponentOutput: 识别结果。
Raises:
InvalidRequestArgumentError: 如果请求格式错误,即文件URL不存在时抛出。
Expand All @@ -194,12 +182,10 @@ def tool_eval(self,
traceid = kwargs.get("_sys_traceid", "")

sys_file_names = file_names
sys_file_urls = file_urls

if not sys_file_names:
sys_file_names = kwargs.get("_sys_file_names", [])
if not sys_file_urls:
sys_file_urls = kwargs.get("_sys_file_urls", {})

sys_file_urls = kwargs.get("_sys_file_urls", {})

for file_name in sys_file_names:
if utils.is_url(file_name):
Expand Down
15 changes: 2 additions & 13 deletions python/core/components/v2/qrcode_ocr/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,6 @@ class QRcodeOCR(Component):
"location": {
"type": "string",
"description": "是否输出二维码/条形码位置信息"
},
"file_urls": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "待识别文件的URL下载地址"
}
},
"required": ["file_names"]
Expand Down Expand Up @@ -164,14 +157,13 @@ def _check_service_error(request_id: str, data: dict):
)

@components_run_stream_trace
def tool_eval(self, file_names:Optional[list]=[], location: Optional[str]="false", file_urls:Optional[dict]={}, **kwargs):
def tool_eval(self, file_names:Optional[list]=[], location: Optional[str]="false", **kwargs):
"""
ToolEval方法,用于执行二维码识别操作。
Args:
file_names (list, 可选): 待识别文件的文件名列表。
location (str, 可选): 是否需要返回二维码位置信息,默认为 "false"。
file_urls (dict, 可选): 待识别文件的URL下载地址字典,格式为 {"filename": "url"}。
Yields:
ComponentOutput: 识别结果,包含识别到的二维码信息。
Expand All @@ -180,13 +172,10 @@ def tool_eval(self, file_names:Optional[list]=[], location: Optional[str]="false
traceid = kwargs.get("_sys_traceid", "")
# file_name
sys_file_names = file_names
sys_file_urls = file_urls

if not sys_file_names:
sys_file_names = kwargs.get("_sys_file_names", [])
if not sys_file_urls:
sys_file_urls = kwargs.get("_sys_file_urls", {})

sys_file_urls = kwargs.get("_sys_file_urls", {})

for file_name in sys_file_names:
if utils.is_url(file_name):
Expand Down
13 changes: 13 additions & 0 deletions python/core/components/v2/tree_mind/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
r"""树图工具"""

import json
from urllib.parse import urlparse, unquote
from typing import Dict, List, Optional, Any
from appbuilder.core.message import Message
from appbuilder.core._client import HTTPClient
Expand Down Expand Up @@ -83,6 +84,17 @@ def _post(self, query, **kwargs):
img_link = treemind_response.info.downloadInfo.fileInfo.pic
return img_link, jump_link

@staticmethod
def get_filename_from_url(url):
"""从给定URL中提取文件名"""
parsed_url = urlparse(url)
# 提取路径部分
path = parsed_url.path
# 从路径中获取文件名
filename = path.split('/')[-1]
# 解码URL编码的文件名
return unquote(filename)

@components_run_stream_trace
def tool_eval(
self,
Expand Down Expand Up @@ -115,6 +127,7 @@ def tool_eval(
img_link_result = self.create_output(
type="image",
text={
"filename": self.get_filename_from_url(img_link),
"url": img_link
},
visible_scope='all',
Expand Down
Loading

0 comments on commit a746f08

Please sign in to comment.