From ebb56118e0d20b7184a25ab6d6f3f08e0fee6d0b Mon Sep 17 00:00:00 2001 From: Nathan Habib Date: Thu, 11 Jul 2024 13:09:18 +0200 Subject: [PATCH] fixes after merge --- examples/model_configs/endpoint_model.yaml | 2 +- examples/model_configs/tgi_model.yaml | 2 +- src/lighteval/__main__.py | 4 +- src/lighteval/parsers.py | 9 +- src/lighteval/tasks/registry.py | 14 +++ src/lighteval/utils.py | 108 --------------------- 6 files changed, 19 insertions(+), 120 deletions(-) diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml index 9e0db4374..2834cdd28 100644 --- a/examples/model_configs/endpoint_model.yaml +++ b/examples/model_configs/endpoint_model.yaml @@ -16,7 +16,7 @@ model: endpoint_type: "protected" namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models. - env_vars: + env_vars: null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048` generation: add_special_tokens: true diff --git a/examples/model_configs/tgi_model.yaml b/examples/model_configs/tgi_model.yaml index 5e45641f9..82ac50a79 100644 --- a/examples/model_configs/tgi_model.yaml +++ b/examples/model_configs/tgi_model.yaml @@ -3,4 +3,4 @@ model: instance: inference_server_address: "" inference_server_auth: null - model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory \ No newline at end of file + model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py index ef16c17f6..9deb09251 100644 --- a/src/lighteval/__main__.py +++ b/src/lighteval/__main__.py @@ -25,7 +25,7 @@ import argparse from lighteval.parsers import parser_accelerate, parser_nanotron -from lighteval.utils import list_tasks_command +from lighteval.tasks.registry import Registry def cli_evaluate(): @@ -57,7 +57,7 @@ def cli_evaluate(): return if args.list_tasks: - list_tasks_command() + Registry(cache_dir="").print_all_tasks() return diff --git a/src/lighteval/parsers.py b/src/lighteval/parsers.py index a584a3301..d05ba312f 100644 --- a/src/lighteval/parsers.py +++ b/src/lighteval/parsers.py @@ -48,14 +48,6 @@ def parser_accelerate(parser=None): help="Model arguments to pass to the model class, e.g. 'pretrained=gpt2,dtype=float16'", ) - # Accelerate parameters - parser.add_argument( - "--num_processes", - type=int, - default=1, - help="Number of processes for accelerate to use, this directly translates to the data parallelism level", - ) - # Debug parser.add_argument("--max_samples", type=int, default=None, help="Maximum number of samples to evaluate on") parser.add_argument("--override_batch_size", type=int, default=-1) @@ -70,6 +62,7 @@ def parser_accelerate(parser=None): parser.add_argument( "--push_details_to_hub", default=False, action="store_true", help="Set to push the details to the hub" ) + parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true") parser.add_argument( "--public_run", default=False, action="store_true", help="Push results and details to a public repo" ) diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py index ef575b7e1..28597763f 100644 --- a/src/lighteval/tasks/registry.py +++ b/src/lighteval/tasks/registry.py @@ -23,6 +23,7 @@ import collections import importlib import os +from itertools import groupby from pathlib import Path from pprint import pformat from types import ModuleType @@ -152,6 +153,19 @@ def get_task_dict( return tasks_dict + def print_all_tasks(self): + """ + Print all the tasks in the task registry. + """ + tasks_names = list(self.TASK_REGISTRY.keys()) + tasks_names.sort() + for suite, g in groupby(tasks_names, lambda x: x.split("|")[0]): + tasks_names = list(g) + tasks_names.sort() + print(f"\n- {suite}:") + for task_name in tasks_names: + print(f" - {task_name}") + def create_custom_tasks_module(custom_tasks: Union[str, Path, ModuleType]) -> ModuleType: """Creates a custom task module to load tasks defined by the user in their own file. diff --git a/src/lighteval/utils.py b/src/lighteval/utils.py index 0a617a6a0..3e032d1f4 100644 --- a/src/lighteval/utils.py +++ b/src/lighteval/utils.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import importlib -import json -import os from dataclasses import asdict, is_dataclass from typing import Any, Union import numpy as np -import pkg_resources def flatten_dict(nested: dict, sep="/") -> dict: @@ -152,111 +149,6 @@ def is_accelerate_available() -> bool: return importlib.util.find_spec("accelerate") is not None -def load_tasks_table_extended(module_name: any) -> list: - """ - load the module module_name - - Args: - - module_name the name of the module we want to load - Returns: - - TASKS_TABLE: a list of the task in the module - """ - module_path = f"lighteval.tasks.extended.{module_name}.main" - module_loaded = importlib.import_module(module_path) - tasks_list = None - try: - tasks_list = module_loaded.TASKS_TABLE - except Exception as e: - print(e) - return tasks_list if tasks_list is not None else [] - - -def get_tasks_table_json() -> list: - """ - Fetch tasks/tasks_table.jsonl - Returns - - a list of all the tasks in tasks/tasks_table.jsonl - """ - tasks = [] - # Handling tasks_table.jsonl - # Get the path to the resource file - tasks_table_path = pkg_resources.resource_filename("lighteval", "tasks/tasks_table.jsonl") - with open(tasks_table_path) as jsonl_tasks_table: - jsonl_tasks_table_content = jsonl_tasks_table.read() - for jline in jsonl_tasks_table_content.splitlines(): - tasks.append(json.loads(jline)) - return tasks - - -def get_extended_tasks() -> list: - """ - Fetch all the tasks in the extended suite - Returns - - a list of all the extended tasks - """ - tasks_extended = [] - extended_tasks_dir = pkg_resources.resource_filename("lighteval", "tasks/extended") - for root, dirs, files in os.walk(extended_tasks_dir): - for file in files: - if file == "main.py": - module_name = os.path.basename(root) - tasks_table = load_tasks_table_extended(module_name) - tasks_extended += tasks_table - return tasks_extended - - -def group_by_suite(tasks: list, tasks_extended: list) -> dict: - """ - Group tasks by suite and sort them alphabetically - Args: - - tasks: list of tasks in tasks/tasks_table.jsonl - - tasks_extended: list of extended tasks - Returns: - - a dict of tasks grouped by suite - """ - grouped_by_suite = {} - for task in tasks: - for suite in task["suite"]: - if suite not in grouped_by_suite.keys(): - grouped_by_suite[suite] = [task["name"]] - else: - grouped_by_suite[suite].append(task["name"]) - grouped_by_suite[suite].sort() - - grouped_by_suite["extended"] = [] - # Adding extended suite - for task in tasks_extended: - grouped_by_suite["extended"].append(task["name"]) - grouped_by_suite["extended"].sort() - return grouped_by_suite - - -def list_tasks_command(): - """ - List all the available tasks in tasks_table.jsonl and the extended directory - Assumes the existence of TASKS_TABLE in the main.py file for each extended - tasks in tasks/extended - """ - try: - # Handling tasks_table.jsonl - tasks = get_tasks_table_json() - - # Handling extended tasks - tasks_extended = get_extended_tasks() - - # Grouping by suite the tasks - grouped_by_suite = group_by_suite(tasks, tasks_extended) - - # Print tasks - print("Available tasks: (Grouped by suite)\n") - for suite, task_list in grouped_by_suite.items(): - print("- " + suite) - for task in task_list: - print("\t - " + task) - except Exception as e: - print("Error: ", e) - - NO_ACCELERATE_ERROR_MSG = "You requested the use of accelerate for this evaluation, but it is not available in your current environement. Please install it using pip."