fixes after merge

huggingface · Jul 11, 2024 · ebb5611 · ebb5611
1 parent bff354e
commit ebb5611
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 120 deletions.
diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml
@@ -16,7 +16,7 @@ model:
     endpoint_type: "protected"
     namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
     image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
-    env_vars: 
+    env_vars:
       null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
   generation:
     add_special_tokens: true
diff --git a/examples/model_configs/tgi_model.yaml b/examples/model_configs/tgi_model.yaml
@@ -3,4 +3,4 @@ model:
   instance:
     inference_server_address: ""
     inference_server_auth: null
-    model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory
+    model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory
diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py
@@ -25,7 +25,7 @@
 import argparse
 
 from lighteval.parsers import parser_accelerate, parser_nanotron
-from lighteval.utils import list_tasks_command
+from lighteval.tasks.registry import Registry
 
 
 def cli_evaluate():
@@ -57,7 +57,7 @@ def cli_evaluate():
         return
 
     if args.list_tasks:
-        list_tasks_command()
+        Registry(cache_dir="").print_all_tasks()
         return
 
 

diff --git a/src/lighteval/parsers.py b/src/lighteval/parsers.py
@@ -48,14 +48,6 @@ def parser_accelerate(parser=None):
         help="Model arguments to pass to the model class, e.g. 'pretrained=gpt2,dtype=float16'",
     )
 
-    # Accelerate parameters
-    parser.add_argument(
-        "--num_processes",
-        type=int,
-        default=1,
-        help="Number of processes for accelerate to use, this directly translates to the data parallelism level",
-    )
-
     # Debug
     parser.add_argument("--max_samples", type=int, default=None, help="Maximum number of samples to evaluate on")
     parser.add_argument("--override_batch_size", type=int, default=-1)
@@ -70,6 +62,7 @@ def parser_accelerate(parser=None):
     parser.add_argument(
         "--push_details_to_hub", default=False, action="store_true", help="Set to push the details to the hub"
     )
+    parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true")
     parser.add_argument(
         "--public_run", default=False, action="store_true", help="Push results and details to a public repo"
     )

diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
@@ -23,6 +23,7 @@
 import collections
 import importlib
 import os
+from itertools import groupby
 from pathlib import Path
 from pprint import pformat
 from types import ModuleType
@@ -152,6 +153,19 @@ def get_task_dict(
 
         return tasks_dict
 
+    def print_all_tasks(self):
+        """
+        Print all the tasks in the task registry.
+        """
+        tasks_names = list(self.TASK_REGISTRY.keys())
+        tasks_names.sort()
+        for suite, g in groupby(tasks_names, lambda x: x.split("|")[0]):
+            tasks_names = list(g)
+            tasks_names.sort()
+            print(f"\n- {suite}:")
+            for task_name in tasks_names:
+                print(f"  - {task_name}")
+
 
 def create_custom_tasks_module(custom_tasks: Union[str, Path, ModuleType]) -> ModuleType:
     """Creates a custom task module to load tasks defined by the user in their own file.

diff --git a/src/lighteval/utils.py b/src/lighteval/utils.py
@@ -12,13 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import importlib
-import json
-import os
 from dataclasses import asdict, is_dataclass
 from typing import Any, Union
 
 import numpy as np
-import pkg_resources
 
 
 def flatten_dict(nested: dict, sep="/") -> dict:
@@ -152,111 +149,6 @@ def is_accelerate_available() -> bool:
     return importlib.util.find_spec("accelerate") is not None
 
 
-def load_tasks_table_extended(module_name: any) -> list:
-    """
-    load the module module_name
-
-    Args:
-    - module_name the name of the module we want to load
-    Returns:
-    - TASKS_TABLE: a list of the task in the module
-    """
-    module_path = f"lighteval.tasks.extended.{module_name}.main"
-    module_loaded = importlib.import_module(module_path)
-    tasks_list = None
-    try:
-        tasks_list = module_loaded.TASKS_TABLE
-    except Exception as e:
-        print(e)
-    return tasks_list if tasks_list is not None else []
-
-
-def get_tasks_table_json() -> list:
-    """
-    Fetch tasks/tasks_table.jsonl
-    Returns
-    - a list of all the tasks in tasks/tasks_table.jsonl
-    """
-    tasks = []
-    # Handling tasks_table.jsonl
-    # Get the path to the resource file
-    tasks_table_path = pkg_resources.resource_filename("lighteval", "tasks/tasks_table.jsonl")
-    with open(tasks_table_path) as jsonl_tasks_table:
-        jsonl_tasks_table_content = jsonl_tasks_table.read()
-        for jline in jsonl_tasks_table_content.splitlines():
-            tasks.append(json.loads(jline))
-    return tasks
-
-
-def get_extended_tasks() -> list:
-    """
-    Fetch all the tasks in the extended suite
-    Returns
-    - a list of all the extended tasks
-    """
-    tasks_extended = []
-    extended_tasks_dir = pkg_resources.resource_filename("lighteval", "tasks/extended")
-    for root, dirs, files in os.walk(extended_tasks_dir):
-        for file in files:
-            if file == "main.py":
-                module_name = os.path.basename(root)
-                tasks_table = load_tasks_table_extended(module_name)
-                tasks_extended += tasks_table
-    return tasks_extended
-
-
-def group_by_suite(tasks: list, tasks_extended: list) -> dict:
-    """
-    Group tasks by suite and sort them alphabetically
-    Args:
-    - tasks: list of tasks in tasks/tasks_table.jsonl
-    - tasks_extended: list of extended tasks
-    Returns:
-    - a dict of tasks grouped by suite
-    """
-    grouped_by_suite = {}
-    for task in tasks:
-        for suite in task["suite"]:
-            if suite not in grouped_by_suite.keys():
-                grouped_by_suite[suite] = [task["name"]]
-            else:
-                grouped_by_suite[suite].append(task["name"])
-                grouped_by_suite[suite].sort()
-
-    grouped_by_suite["extended"] = []
-    # Adding extended suite
-    for task in tasks_extended:
-        grouped_by_suite["extended"].append(task["name"])
-    grouped_by_suite["extended"].sort()
-    return grouped_by_suite
-
-
-def list_tasks_command():
-    """
-    List all the available tasks in tasks_table.jsonl and the extended directory
-    Assumes the existence of TASKS_TABLE in the main.py file for each extended
-    tasks in tasks/extended
-    """
-    try:
-        # Handling tasks_table.jsonl
-        tasks = get_tasks_table_json()
-
-        # Handling extended tasks
-        tasks_extended = get_extended_tasks()
-
-        # Grouping by suite the tasks
-        grouped_by_suite = group_by_suite(tasks, tasks_extended)
-
-        # Print tasks
-        print("Available tasks: (Grouped by suite)\n")
-        for suite, task_list in grouped_by_suite.items():
-            print("- " + suite)
-            for task in task_list:
-                print("\t - " + task)
-    except Exception as e:
-        print("Error: ", e)
-
-
 NO_ACCELERATE_ERROR_MSG = "You requested the use of accelerate for this evaluation, but it is not available in your current environement. Please install it using pip."