Skip to content

Commit

Permalink
update doc
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunnglin committed Dec 31, 2024
1 parent c10595f commit 7c572d0
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 9 deletions.
4 changes: 2 additions & 2 deletions docs/en/advanced_guides/add_benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ The sample code is as follows:
```python
from evalscope.benchmarks import Benchmark, DataAdapter
from evalscope.metrics import WeightedAverageAccuracy
from evalscope.models import MultiChoiceModelAdapter
from evalscope.models import ChatGenerationModelAdapter


@Benchmark.register(
name='mmlu_pro',
dataset_id='modelscope/mmlu-pro',
model_adapter=MultiChoiceModelAdapter,
model_adapter=ChatGenerationModelAdapter,
subset_list=['default'],
metric_list=[WeightedAverageAccuracy],
few_shot_num=0,
Expand Down
4 changes: 2 additions & 2 deletions docs/zh/advanced_guides/add_benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ evalscope/benchmarks/
```python
from evalscope.benchmarks import Benchmark, DataAdapter
from evalscope.metrics import WeightedAverageAccuracy
from evalscope.models import MultiChoiceModelAdapter
from evalscope.models import ChatGenerationModelAdapter


@Benchmark.register(
name='mmlu_pro',
dataset_id='modelscope/mmlu-pro',
model_adapter=MultiChoiceModelAdapter,
model_adapter=ChatGenerationModelAdapter,
subset_list=['default'],
metric_list=[WeightedAverageAccuracy],
few_shot_num=0,
Expand Down
4 changes: 2 additions & 2 deletions evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ def get_sys_prompt(inp: dict) -> str:
few_shot_prompts = [self._generate_prompt(input_d=sample, include_answer=True) for sample in few_shot_list]
context: str = '\n'.join(few_shot_prompts) + '\n'
context += self._generate_prompt(input_d=input_d, include_answer=False)
full_prompt = prompt + context
full_prompt = context

return {'data': [full_prompt]}
return {'data': [full_prompt], 'system_prompt': prompt}

def get_gold_answer(self, input_d: dict) -> list:
# Get the gold choice
Expand Down
6 changes: 3 additions & 3 deletions tests/cli/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,16 @@ def test_run_eval_with_args(self):
def test_run_task(self):
task_cfg = {'model': 'qwen/Qwen2-0.5B-Instruct',
'datasets': [
'mmlu_pro',
# 'mmlu_pro',
# 'bbh',
# 'hellaswag',
# 'gsm8k',
# 'arc'
# 'race',
# 'truthful_qa',
# 'trivia_qa',
'trivia_qa',
],
'limit': 2,
'limit': 20,
'debug': True}
run_task(task_cfg=task_cfg)

Expand Down

0 comments on commit 7c572d0

Please sign in to comment.