-
Notifications
You must be signed in to change notification settings - Fork 360
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
--------- Co-authored-by: DavdGao <[email protected]>
- Loading branch information
1 parent
c266df4
commit 01530ee
Showing
5 changed files
with
282 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -138,6 +138,7 @@ AgentScope支持使用以下库快速部署本地模型服务。 | |
- 文件操作 | ||
- 文本处理 | ||
- 多模态生成 | ||
- 维基百科搜索 | ||
|
||
**样例应用** | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Search contents from WikiPedia | ||
""" | ||
import requests | ||
|
||
from ..service_response import ( | ||
ServiceResponse, | ||
ServiceExecStatus, | ||
) | ||
|
||
|
||
def wikipedia_search_categories( | ||
query: str, | ||
max_members: int = 1000, | ||
) -> ServiceResponse: | ||
"""Retrieve categories from Wikipedia:Category pages. | ||
Args: | ||
query (str): | ||
The given searching keywords | ||
max_members (int): | ||
The maximum number of members to output | ||
Returns: | ||
`ServiceResponse`: A response that contains the execution status and | ||
returned content. In the returned content, the meanings of keys: | ||
- "pageid": unique page ID for the member | ||
- "ns": namespace for the member | ||
- "title": title of the member | ||
Example: | ||
.. code-block:: python | ||
members = wiki_get_category_members( | ||
"Machine_learning", | ||
max_members=10 | ||
) | ||
print(members) | ||
It returns contents: | ||
.. code-block:: python | ||
{ | ||
'status': <ServiceExecStatus.SUCCESS: 1>, | ||
'content': [ | ||
{ | ||
'pageid': 67911196, | ||
'ns': 0, | ||
'title': 'Bayesian learning mechanisms' | ||
}, | ||
{ | ||
'pageid': 233488, | ||
'ns': 0, | ||
'title': 'Machine learning' | ||
}, | ||
# ... | ||
] | ||
} | ||
""" | ||
url = "https://en.wikipedia.org/w/api.php" | ||
limit_per_request: int = 500 | ||
params = { | ||
"action": "query", | ||
"list": "categorymembers", | ||
"cmtitle": f"Category:{query}", | ||
"cmlimit": limit_per_request, # Maximum number of results per request | ||
"format": "json", | ||
} | ||
|
||
members = [] | ||
total_fetched = 0 | ||
|
||
try: | ||
while total_fetched < max_members: | ||
response = requests.get(url, params=params, timeout=20) | ||
response.raise_for_status() | ||
|
||
data = response.json() | ||
|
||
batch_members = data["query"]["categorymembers"] | ||
members.extend(batch_members) | ||
total_fetched += len(batch_members) | ||
|
||
# Check if there is a continuation token | ||
if "continue" in data and total_fetched < max_members: | ||
params["cmcontinue"] = data["continue"]["cmcontinue"] | ||
else: | ||
break | ||
|
||
except Exception as e: | ||
return ServiceResponse( | ||
status=ServiceExecStatus.ERROR, | ||
content=str(e), | ||
) | ||
|
||
# If more members were fetched than max_members, trim the list | ||
if len(members) > max_members: | ||
members = members[:max_members] | ||
|
||
if len(members) > 0: | ||
return ServiceResponse(ServiceExecStatus.SUCCESS, members) | ||
|
||
return ServiceResponse(ServiceExecStatus.ERROR, members) | ||
|
||
|
||
def wikipedia_search( # pylint: disable=C0301 | ||
query: str, | ||
) -> ServiceResponse: | ||
"""Search the given query in Wikipedia. Note the returned text maybe related entities, which means you should adjust your query as needed and search again. | ||
Note the returned text maybe too long for some llm, it's recommended to | ||
summarize the returned text first. | ||
Args: | ||
query (`str`): | ||
The searched query in wikipedia. | ||
Return: | ||
`ServiceResponse`: A response that contains the execution status and | ||
returned content. | ||
""" # noqa | ||
|
||
url = "https://en.wikipedia.org/w/api.php" | ||
params = { | ||
"action": "query", | ||
"titles": query, | ||
"prop": "extracts", | ||
"explaintext": True, | ||
"format": "json", | ||
} | ||
try: | ||
response = requests.get(url, params=params, timeout=20) | ||
response.raise_for_status() | ||
data = response.json() | ||
|
||
# Combine into a text | ||
text = [] | ||
for page in data["query"]["pages"].values(): | ||
if "extract" in page: | ||
text.append(page["extract"]) | ||
else: | ||
return ServiceResponse( | ||
status=ServiceExecStatus.ERROR, | ||
content="No content found", | ||
) | ||
|
||
content = "\n".join(text) | ||
return ServiceResponse( | ||
status=ServiceExecStatus.SUCCESS, | ||
content=content, | ||
) | ||
|
||
except Exception as e: | ||
return ServiceResponse( | ||
status=ServiceExecStatus.ERROR, | ||
content=str(e), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Wiki retriever test.""" | ||
import unittest | ||
from unittest.mock import Mock, patch, MagicMock | ||
|
||
from agentscope.service import ( | ||
wikipedia_search, | ||
wikipedia_search_categories, | ||
ServiceResponse, | ||
ServiceExecStatus, | ||
) | ||
|
||
|
||
class TestWikipedia(unittest.TestCase): | ||
"""ExampleTest for a unit test.""" | ||
|
||
@patch("agentscope.utils.common.requests.get") | ||
def test_wikipedia_search_categories( | ||
self, | ||
mock_get: MagicMock, | ||
) -> None: | ||
"""Test test_get_category_members""" | ||
mock_response = Mock() | ||
mock_dict = { | ||
"query": { | ||
"categorymembers": [ | ||
{ | ||
"pageid": 20, | ||
"ns": 0, | ||
"title": "This is a test", | ||
}, | ||
], | ||
}, | ||
} | ||
|
||
expected_result = ServiceResponse( | ||
status=ServiceExecStatus.SUCCESS, | ||
content=[ | ||
{ | ||
"pageid": 20, | ||
"ns": 0, | ||
"title": "This is a test", | ||
}, | ||
], | ||
) | ||
|
||
mock_response.json.return_value = mock_dict | ||
mock_get.return_value = mock_response | ||
|
||
test_entity = "Test" | ||
limit_per_request = 500 | ||
params = { | ||
"action": "query", | ||
"list": "categorymembers", | ||
"cmtitle": f"Category:{test_entity}", | ||
"cmlimit": limit_per_request, | ||
"format": "json", | ||
} | ||
|
||
results = wikipedia_search_categories(query=test_entity) | ||
|
||
mock_get.assert_called_once_with( | ||
"https://en.wikipedia.org/w/api.php", | ||
params=params, | ||
timeout=20, | ||
) | ||
|
||
self.assertEqual( | ||
results, | ||
expected_result, | ||
) | ||
|
||
@patch("agentscope.utils.common.requests.get") | ||
def test_wikipedia_search( | ||
self, | ||
mock_get: MagicMock, | ||
) -> None: | ||
"""Test get_page_content_by_paragraph""" | ||
|
||
# Mock responses for extract query | ||
mock_response = Mock() | ||
mock_dict = { | ||
"query": { | ||
"pages": { | ||
"20": { | ||
"pageid": 20, | ||
"title": "Test", | ||
"extract": "This is the first paragraph.", | ||
}, | ||
"21": { | ||
"pageid": 30, | ||
"title": "Test", | ||
"extract": "This is the second paragraph.", | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
mock_response.json.return_value = mock_dict | ||
mock_get.return_value = mock_response | ||
|
||
expected_response = ServiceResponse( | ||
status=ServiceExecStatus.SUCCESS, | ||
content=( | ||
"This is the first paragraph.\n" | ||
"This is the second paragraph." | ||
), | ||
) | ||
|
||
response = wikipedia_search("Test") | ||
|
||
self.assertEqual(expected_response, response) |