Skip to content

Commit

Permalink
fixes after merge
Browse files Browse the repository at this point in the history
  • Loading branch information
NathanHB committed Jul 11, 2024
1 parent bff354e commit ebb5611
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 120 deletions.
2 changes: 1 addition & 1 deletion examples/model_configs/endpoint_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ model:
endpoint_type: "protected"
namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
env_vars:
env_vars:
null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
generation:
add_special_tokens: true
2 changes: 1 addition & 1 deletion examples/model_configs/tgi_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ model:
instance:
inference_server_address: ""
inference_server_auth: null
model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory
model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory
4 changes: 2 additions & 2 deletions src/lighteval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import argparse

from lighteval.parsers import parser_accelerate, parser_nanotron
from lighteval.utils import list_tasks_command
from lighteval.tasks.registry import Registry


def cli_evaluate():
Expand Down Expand Up @@ -57,7 +57,7 @@ def cli_evaluate():
return

if args.list_tasks:
list_tasks_command()
Registry(cache_dir="").print_all_tasks()
return


Expand Down
9 changes: 1 addition & 8 deletions src/lighteval/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,6 @@ def parser_accelerate(parser=None):
help="Model arguments to pass to the model class, e.g. 'pretrained=gpt2,dtype=float16'",
)

# Accelerate parameters
parser.add_argument(
"--num_processes",
type=int,
default=1,
help="Number of processes for accelerate to use, this directly translates to the data parallelism level",
)

# Debug
parser.add_argument("--max_samples", type=int, default=None, help="Maximum number of samples to evaluate on")
parser.add_argument("--override_batch_size", type=int, default=-1)
Expand All @@ -70,6 +62,7 @@ def parser_accelerate(parser=None):
parser.add_argument(
"--push_details_to_hub", default=False, action="store_true", help="Set to push the details to the hub"
)
parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true")
parser.add_argument(
"--public_run", default=False, action="store_true", help="Push results and details to a public repo"
)
Expand Down
14 changes: 14 additions & 0 deletions src/lighteval/tasks/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import collections
import importlib
import os
from itertools import groupby
from pathlib import Path
from pprint import pformat
from types import ModuleType
Expand Down Expand Up @@ -152,6 +153,19 @@ def get_task_dict(

return tasks_dict

def print_all_tasks(self):
"""
Print all the tasks in the task registry.
"""
tasks_names = list(self.TASK_REGISTRY.keys())
tasks_names.sort()
for suite, g in groupby(tasks_names, lambda x: x.split("|")[0]):
tasks_names = list(g)
tasks_names.sort()
print(f"\n- {suite}:")
for task_name in tasks_names:
print(f" - {task_name}")


def create_custom_tasks_module(custom_tasks: Union[str, Path, ModuleType]) -> ModuleType:
"""Creates a custom task module to load tasks defined by the user in their own file.
Expand Down
108 changes: 0 additions & 108 deletions src/lighteval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import json
import os
from dataclasses import asdict, is_dataclass
from typing import Any, Union

import numpy as np
import pkg_resources


def flatten_dict(nested: dict, sep="/") -> dict:
Expand Down Expand Up @@ -152,111 +149,6 @@ def is_accelerate_available() -> bool:
return importlib.util.find_spec("accelerate") is not None


def load_tasks_table_extended(module_name: any) -> list:
"""
load the module module_name
Args:
- module_name the name of the module we want to load
Returns:
- TASKS_TABLE: a list of the task in the module
"""
module_path = f"lighteval.tasks.extended.{module_name}.main"
module_loaded = importlib.import_module(module_path)
tasks_list = None
try:
tasks_list = module_loaded.TASKS_TABLE
except Exception as e:
print(e)
return tasks_list if tasks_list is not None else []


def get_tasks_table_json() -> list:
"""
Fetch tasks/tasks_table.jsonl
Returns
- a list of all the tasks in tasks/tasks_table.jsonl
"""
tasks = []
# Handling tasks_table.jsonl
# Get the path to the resource file
tasks_table_path = pkg_resources.resource_filename("lighteval", "tasks/tasks_table.jsonl")
with open(tasks_table_path) as jsonl_tasks_table:
jsonl_tasks_table_content = jsonl_tasks_table.read()
for jline in jsonl_tasks_table_content.splitlines():
tasks.append(json.loads(jline))
return tasks


def get_extended_tasks() -> list:
"""
Fetch all the tasks in the extended suite
Returns
- a list of all the extended tasks
"""
tasks_extended = []
extended_tasks_dir = pkg_resources.resource_filename("lighteval", "tasks/extended")
for root, dirs, files in os.walk(extended_tasks_dir):
for file in files:
if file == "main.py":
module_name = os.path.basename(root)
tasks_table = load_tasks_table_extended(module_name)
tasks_extended += tasks_table
return tasks_extended


def group_by_suite(tasks: list, tasks_extended: list) -> dict:
"""
Group tasks by suite and sort them alphabetically
Args:
- tasks: list of tasks in tasks/tasks_table.jsonl
- tasks_extended: list of extended tasks
Returns:
- a dict of tasks grouped by suite
"""
grouped_by_suite = {}
for task in tasks:
for suite in task["suite"]:
if suite not in grouped_by_suite.keys():
grouped_by_suite[suite] = [task["name"]]
else:
grouped_by_suite[suite].append(task["name"])
grouped_by_suite[suite].sort()

grouped_by_suite["extended"] = []
# Adding extended suite
for task in tasks_extended:
grouped_by_suite["extended"].append(task["name"])
grouped_by_suite["extended"].sort()
return grouped_by_suite


def list_tasks_command():
"""
List all the available tasks in tasks_table.jsonl and the extended directory
Assumes the existence of TASKS_TABLE in the main.py file for each extended
tasks in tasks/extended
"""
try:
# Handling tasks_table.jsonl
tasks = get_tasks_table_json()

# Handling extended tasks
tasks_extended = get_extended_tasks()

# Grouping by suite the tasks
grouped_by_suite = group_by_suite(tasks, tasks_extended)

# Print tasks
print("Available tasks: (Grouped by suite)\n")
for suite, task_list in grouped_by_suite.items():
print("- " + suite)
for task in task_list:
print("\t - " + task)
except Exception as e:
print("Error: ", e)


NO_ACCELERATE_ERROR_MSG = "You requested the use of accelerate for this evaluation, but it is not available in your current environement. Please install it using pip."


Expand Down

0 comments on commit ebb5611

Please sign in to comment.