Skip to content

Commit

Permalink
fix the search parameters (#348)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShilinHe authored May 16, 2024
2 parents 9c9cc17 + d051de4 commit 76f067d
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
3 changes: 2 additions & 1 deletion auto_eval/cases/web_search/web_search.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ version: 0.1
config_var:
execution_service.kernel_mode: "local"
session.roles: ["planner", "web_search"]
web_search.chunk_size: 2000
app_dir: ../project/
task_description: |-
The task is to find the authors of the paper "TaskWeaver: A Code-First Agent Framework" and their affiliations.
Expand All @@ -11,5 +12,5 @@ scoring_points:
weight: 1
- score_point: The affiliation list should include Microsoft
weight: 1
- score_point: The home page of the first author should be found at Microsoft Research (https://www.microsoft.com/en-us/research/people/boqiao/) or Google Scholar (https://scholar.google.com/citations?user=_6ugrdYAAAAJ)
- score_point: The home page of the first author should be found at Microsoft Research (https://www.microsoft.com/en-us/research/people/boqiao/) or Google Scholar (https://scholar.google.com/citations?user=_6ugrdYAAAAJ). Either one is acceptable.
weight: 1
14 changes: 12 additions & 2 deletions taskweaver/ext_role/web_search/web_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def browse_page(
query: str,
urls: List[str],
top_k: int = 3,
chunk_size: int = 1000,
chunk_size: int = 2000,
chunk_overlap: int = 250,
post_proxy: PostEventProxy = None,
) -> list[dict[str, Any]]:
Expand Down Expand Up @@ -133,6 +133,8 @@ def _configure(self):
self.google_api_key = self._get_str("google_api_key", "")
self.google_search_engine_id = self._get_str("google_search_engine_id", "")
self.bing_api_key = self._get_str("bing_api_key", "")
self.chunk_size = self._get_int("chunk_size", 2000)
self.chunk_overlap = self._get_int("chunk_overlap", 500)


class WebSearch(Role):
Expand All @@ -152,6 +154,8 @@ def __init__(
self.google_api_key = config.google_api_key
self.google_search_engine_id = config.google_search_engine_id
self.bing_api_key = config.bing_api_key
self.chunk_size = config.chunk_size
self.chunk_overlap = config.chunk_overlap

def close(self) -> None:
super().close()
Expand Down Expand Up @@ -192,7 +196,13 @@ def reply(self, memory: Memory, **kwargs) -> Post:
+ PromptUtil.wrap_text_with_delimiter(
"\n```json\n"
+ json.dumps(
browse_page(",".join(queries), list(query_urls), post_proxy=post_proxy),
browse_page(
",".join(queries),
list(query_urls),
post_proxy=post_proxy,
chunk_size=self.chunk_size,
chunk_overlap=self.chunk_overlap,
),
indent=4,
)
+ "```\n",
Expand Down

0 comments on commit 76f067d

Please sign in to comment.