Skip to content

Commit

Permalink
Merge branch 'main' into oz-eval
Browse files Browse the repository at this point in the history
  • Loading branch information
Stopwolf authored Oct 10, 2024
2 parents 3bf98da + 78cda93 commit cd859c1
Show file tree
Hide file tree
Showing 6 changed files with 322 additions and 181 deletions.
1 change: 1 addition & 0 deletions src/lighteval/models/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def tokenizer(self):
def cleanup(self):
destroy_model_parallel()
del self.model.llm_engine.model_executor.driver_worker
self.model = None
gc.collect()
ray.shutdown()
destroy_distributed_environment()
Expand Down
25 changes: 10 additions & 15 deletions src/lighteval/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from lighteval.models.model_loader import load_model
from lighteval.models.model_output import ModelResponse
from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks
from lighteval.tasks.registry import Registry, get_custom_tasks, taskinfo_selector
from lighteval.tasks.registry import Registry, taskinfo_selector
from lighteval.tasks.requests import SampleUid
from lighteval.utils.imports import (
NO_ACCELERATE_ERROR_MSG,
Expand Down Expand Up @@ -166,23 +166,18 @@ def _init_model(self, model_config, model):
return load_model(config=model_config, env_config=self.pipeline_parameters.env_config)
return model

def _init_tasks_and_requests(self, tasks):
def _init_tasks_and_requests(self, tasks: str):
with htrack_block("Tasks loading"):
with local_ranks_zero_first() if self.launcher_type == ParallelismManager.NANOTRON else nullcontext():
# If some tasks are provided as task groups, we load them separately
custom_tasks = self.pipeline_parameters.custom_tasks_directory
tasks_groups_dict = None
if custom_tasks:
_, tasks_groups_dict = get_custom_tasks(custom_tasks)
if tasks_groups_dict and tasks in tasks_groups_dict:
tasks = tasks_groups_dict[tasks]

# Loading all tasks
task_names_list, fewshots_dict = taskinfo_selector(tasks)
task_dict = Registry(cache_dir=self.pipeline_parameters.env_config.cache_dir).get_task_dict(
task_names_list, custom_tasks=custom_tasks
registry = Registry(
cache_dir=self.pipeline_parameters.env_config.cache_dir,
custom_tasks=self.pipeline_parameters.custom_tasks_directory,
)
task_names_list, fewshots_dict = taskinfo_selector(tasks, registry)
task_dict = registry.get_task_dict(task_names_list)
LightevalTask.load_datasets(
list(task_dict.values()), self.pipeline_parameters.dataset_loading_processes
)
LightevalTask.load_datasets(task_dict.values(), self.pipeline_parameters.dataset_loading_processes)

self.evaluation_tracker.task_config_logger.log(task_dict)

Expand Down
71 changes: 1 addition & 70 deletions src/lighteval/tasks/default_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,34 +402,6 @@
trust_dataset=True,
version=0,
)
anli_lighteval = LightevalTaskConfig(
name="anli",
suite=["lighteval", "anli"],
prompt_function=prompt.anli,
hf_repo="anli",
hf_subset="plain_text",
hf_avail_splits=[
"train_r1",
"dev_r1",
"train_r2",
"dev_r2",
"train_r3",
"dev_r3",
"test_r1",
"test_r2",
"test_r3",
],
evaluation_splits=["test_r1", "test_r2", "test_r3"],
few_shots_split=None,
few_shots_select=None,
generation_size=1,
metric=[Metrics.loglikelihood_acc_single_token],
stop_sequence=["\n"],
output_regex=None,
frozen=False,
trust_dataset=True,
version=0,
)
anli_r1_lighteval = LightevalTaskConfig(
name="anli:r1",
suite=["lighteval", "anli"],
Expand Down Expand Up @@ -2295,7 +2267,7 @@
version=0,
)
bbq_Nationality_helm = LightevalTaskConfig(
name="bbq=Nationality",
name="bbq:Nationality",
suite=["helm"],
prompt_function=prompt.bbq,
hf_repo="lighteval/bbq_helm",
Expand Down Expand Up @@ -11368,47 +11340,6 @@
trust_dataset=True,
version=0,
)
mmlu_helm = LightevalTaskConfig(
name="mmlu",
suite=["helm", "helm_general"],
prompt_function=prompt.mmlu_helm,
hf_repo="lighteval/mmlu",
hf_subset="all",
hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
evaluation_splits=["test"],
few_shots_split="dev",
few_shots_select=None,
generation_size=5,
metric=[
Metrics.exact_match,
Metrics.quasi_exact_match,
Metrics.prefix_exact_match,
Metrics.prefix_quasi_exact_match,
],
stop_sequence=["\n"],
output_regex=None,
frozen=False,
trust_dataset=True,
version=0,
)
mmlu_original = LightevalTaskConfig(
name="mmlu",
suite=["original"],
prompt_function=prompt.mmlu_helm,
hf_repo="lighteval/mmlu",
hf_subset="all",
hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
evaluation_splits=["test"],
few_shots_split="dev",
few_shots_select="sequential",
generation_size=5,
metric=[Metrics.loglikelihood_acc_single_token],
stop_sequence=["\n"],
output_regex=None,
frozen=False,
trust_dataset=True,
version=0,
)
mmlu_abstract_algebra_original = LightevalTaskConfig(
name="mmlu:abstract_algebra",
suite=["original", "mmlu"],
Expand Down
Loading

0 comments on commit cd859c1

Please sign in to comment.