From 4550cb7e119971c204e000d9c176740235855c9a Mon Sep 17 00:00:00 2001
From: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
Date: Wed, 17 Jul 2024 11:16:13 +0200
Subject: [PATCH 1/2] launch lighteval using `lighteval --args` (#152)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---------

Co-authored-by: Nathan Habib <nathan.habib@huggingface.com>
Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
---
 README.md                                  |  35 +++--
 examples/model_configs/endpoint_model.yaml |   2 +-
 examples/model_configs/tgi_model.yaml      |   2 +-
 pyproject.toml                             |   2 +-
 run_evals_accelerate.py                    |  89 ------------
 run_evals_nanotron.py                      |  55 --------
 src/lighteval/__main__.py                  |  65 +++++++++
 src/lighteval/commands/lighteval_cli.py    | 150 ---------------------
 src/lighteval/parsers.py                   | 128 ++++++++++++++++++
 src/lighteval/tasks/registry.py            |  14 ++
 tests/test_main.py                         |   6 +-
 11 files changed, 234 insertions(+), 314 deletions(-)
 delete mode 100644 run_evals_accelerate.py
 delete mode 100644 run_evals_nanotron.py
 create mode 100644 src/lighteval/__main__.py
 delete mode 100644 src/lighteval/commands/lighteval_cli.py
 create mode 100644 src/lighteval/parsers.py
diff --git a/README.md b/README.md
index 10364fe4..a6dfbc48 100644
--- a/README.md
+++ b/README.md
@@ -78,8 +78,8 @@ pre-commit install
 
 We provide two main entry points to evaluate models:
 
-* `run_evals_accelerate.py`: evaluate models on CPU or one or more GPUs using [🤗 Accelerate](https://github.com/huggingface/accelerate).
-* `run_evals_nanotron.py`: evaluate models in distributed settings using [⚡️ Nanotron](https://github.com/huggingface/nanotron).
+* `lighteval accelerate`: evaluate models on CPU or one or more GPUs using [🤗 Accelerate](https://github.com/huggingface/accelerate).
+* `lighteval nanotron`: evaluate models in distributed settings using [⚡️ Nanotron](https://github.com/huggingface/nanotron).
 
 For most users, we recommend using the 🤗 Accelerate backend - see below for specific commands.
 
@@ -94,7 +94,8 @@ accelerate config
 You can then evaluate a model using data parallelism as follows:
 
 ```shell
-accelerate launch --multi_gpu --num_processes=<num_gpus> run_evals_accelerate.py \
+accelerate launch --multi_gpu --num_processes=<num_gpus> -m \
+    lighteval accelerate \
     --model_args="pretrained=<path to model on the hub>" \
     --tasks <task parameters> \
     --output_dir output_dir
@@ -109,7 +110,8 @@ suite|task|num_few_shot|{0 or 1 to automatically reduce `num_few_shot` if prompt
 or a file path like [`examples/tasks/recommended_set.txt`](./examples/tasks/recommended_set.txt) which specifies multiple task configurations. For example, to evaluate GPT-2 on the Truthful QA benchmark run:
 
 ```shell
-accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \
+accelerate launch --multi_gpu --num_processes=8 -m \
+    lighteval accelerate \
     --model_args "pretrained=gpt2" \
     --tasks "lighteval|truthfulqa:mc|0|0" \
     --override_batch_size 1 \
@@ -119,7 +121,8 @@ accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \
 Here, `--override_batch_size` defines the _batch size per device_, so the effective batch size will be `override_batch_size x num_gpus`. To evaluate on multiple benchmarks, separate each task configuration with a comma, e.g.
 
 ```shell
-accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \
+accelerate launch --multi_gpu --num_processes=8 -m \
+    lighteval accelerate \
     --model_args "pretrained=gpt2" \
     --tasks "leaderboard|truthfulqa:mc|0|0,leaderboard|gsm8k|0|0" \
     --override_batch_size 1 \
@@ -133,7 +136,8 @@ See the [`examples/tasks/recommended_set.txt`](./examples/tasks/recommended_set.
 If you want to evaluate a model by spinning up inference endpoints, use adapter/delta weights, or more complex configuration options, you can load models using a configuration file. This is done as follows:
 
 ```shell
-accelerate launch --multi_gpu --num_processes=<num_gpus> run_evals_accelerate.py \
+accelerate launch --multi_gpu --num_processes=<num_gpus> -m \
+    lighteval accelerate \
     --model_config_path="<path to your model configuration>" \
     --tasks <task parameters> \
     --output_dir output_dir
@@ -147,13 +151,15 @@ To evaluate models larger that ~40B parameters in 16-bit precision, you will nee
 
 ```shell
 # PP=2, DP=4 - good for models < 70B params
-accelerate launch --multi_gpu --num_processes=4 run_evals_accelerate.py \
+accelerate launch --multi_gpu --num_processes=4 -m \
+    lighteval accelerate \
     --model_args="pretrained=<path to model on the hub>,model_parallel=True" \
     --tasks <task parameters> \
     --output_dir output_dir
 
 # PP=4, DP=2 - good for huge models >= 70B params
-accelerate launch --multi_gpu --num_processes=2 run_evals_accelerate.py \
+accelerate launch --multi_gpu --num_processes=2 -m \
+    lighteval accelerate \
     --model_args="pretrained=<path to model on the hub>,model_parallel=True" \
     --tasks <task parameters> \
     --output_dir output_dir
@@ -164,7 +170,8 @@ accelerate launch --multi_gpu --num_processes=2 run_evals_accelerate.py \
 To evaluate a model on all the benchmarks of the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) using a single node of 8 GPUs, run:
 
 ```shell
-accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \
+accelerate launch --multi_gpu --num_processes=8 -m \
+    lighteval accelerate \
     --model_args "pretrained=<model name>" \
     --tasks examples/tasks/open_llm_leaderboard_tasks.txt \
     --override_batch_size 1 \
@@ -176,7 +183,7 @@ accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \
 You can also use `lighteval` to evaluate models on CPU, although note this will typically be very slow for large models. To do so, run:
 
 ```shell
-python run_evals_accelerate.py \
+lighteval accelerate \
     --model_args="pretrained=<path to model on the hub>"\
     --tasks <task parameters> \
     --output_dir output_dir
@@ -211,7 +218,7 @@ Independently of the default tasks provided in `lighteval` that you will find in
 
 For example, to run an extended task like `ifeval`, you can run:
 ```shell
-python run_evals_accelerate.py \
+lighteval accelerate \
     --model_args "pretrained=HuggingFaceH4/zephyr-7b-beta" \
     --use_chat_template \ # optional, if you want to run the evaluation with the chat template
     --tasks "extended|ifeval|0|0" \
@@ -221,7 +228,7 @@ python run_evals_accelerate.py \
 To run a community or custom task, you can use (note the custom_tasks flag):
 
 ```shell
-python run_evals_accelerate.py \
+lighteval accelerate \
     --model_args="pretrained=<path to model on the hub>"\
     --tasks <task parameters> \
     --custom_tasks <path to your custom or community task> \
@@ -231,7 +238,7 @@ python run_evals_accelerate.py \
 For example, to launch `lighteval` on `arabic_mmlu:abstract_algebra` for `HuggingFaceH4/zephyr-7b-beta`, run:
 
 ```shell
-python run_evals_accelerate.py \
+lighteval accelerate \
     --model_args "pretrained=HuggingFaceH4/zephyr-7b-beta" \
     --use_chat_template \ # optional, if you want to run the evaluation with the chat template
     --tasks "community|arabic_mmlu:abstract_algebra|5|1" \
@@ -464,7 +471,7 @@ source <path_to_your_venv>/activate #or conda activate yourenv
 cd <path_to_your_lighteval>/lighteval
 
 export CUDA_LAUNCH_BLOCKING=1
-srun accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py --model_args "pretrained=your model name" --tasks examples/tasks/open_llm_leaderboard_tasks.txt --override_batch_size 1 --save_details --output_dir=your output dir
+srun accelerate launch --multi_gpu --num_processes=8 -m lighteval accelerate --model_args "pretrained=your model name" --tasks examples/tasks/open_llm_leaderboard_tasks.txt --override_batch_size 1 --save_details --output_dir=your output dir
 ```
 
 ## Releases
diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml
index 9e0db437..2834cdd2 100644
--- a/examples/model_configs/endpoint_model.yaml
+++ b/examples/model_configs/endpoint_model.yaml
@@ -16,7 +16,7 @@ model:
     endpoint_type: "protected"
     namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
     image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
-    env_vars: 
+    env_vars:
       null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
   generation:
     add_special_tokens: true
diff --git a/examples/model_configs/tgi_model.yaml b/examples/model_configs/tgi_model.yaml
index 5e45641f..82ac50a7 100644
--- a/examples/model_configs/tgi_model.yaml
+++ b/examples/model_configs/tgi_model.yaml
@@ -3,4 +3,4 @@ model:
   instance:
     inference_server_address: ""
     inference_server_auth: null
-    model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory
\ No newline at end of file
+    model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory
diff --git a/pyproject.toml b/pyproject.toml
index b771942d..95f74147 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -102,4 +102,4 @@ Issues = "https://github.com/huggingface/lighteval/issues"
 # Changelog = "https://github.com/huggingface/lighteval/blob/master/CHANGELOG.md"
 
 [project.scripts]
-lighteval = "lighteval.commands.lighteval_cli:main"
+lighteval = "lighteval.__main__:cli_evaluate"
diff --git a/run_evals_accelerate.py b/run_evals_accelerate.py
deleted file mode 100644
index d623de25..00000000
--- a/run_evals_accelerate.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# MIT License
-
-# Copyright (c) 2024 The HuggingFace Team
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-"""Example run command:
-accelerate config
-accelerate launch run_evals_accelerate.py --tasks="leaderboard|hellaswag|5|1" --output_dir "/scratch/evals" --model_args "pretrained=gpt2"
-"""
-
-import argparse
-
-from lighteval.main_accelerate import CACHE_DIR, main
-
-
-def get_parser():
-    parser = argparse.ArgumentParser()
-    group = parser.add_mutually_exclusive_group(required=True)
-    task_type_group = parser.add_mutually_exclusive_group(required=True)
-
-    # Model type: either use a config file or simply the model name
-    task_type_group.add_argument("--model_config_path")
-    task_type_group.add_argument("--model_args")
-
-    # Debug
-    parser.add_argument("--max_samples", type=int, default=None)
-    parser.add_argument("--override_batch_size", type=int, default=-1)
-    parser.add_argument("--job_id", type=str, help="Optional Job ID for future reference", default="")
-    # Saving
-    parser.add_argument("--output_dir", required=True)
-    parser.add_argument("--push_results_to_hub", default=False, action="store_true")
-    parser.add_argument("--save_details", action="store_true")
-    parser.add_argument("--push_details_to_hub", default=False, action="store_true")
-    parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true")
-    parser.add_argument(
-        "--public_run", default=False, action="store_true", help="Push results and details to a public repo"
-    )
-    parser.add_argument(
-        "--cache_dir",
-        type=str,
-        default=CACHE_DIR,
-        help="Cache directory for downloaded datasets & model, defaults to `HF_HOME` environment variable",
-    )
-    parser.add_argument(
-        "--results_org",
-        type=str,
-        help="Hub organisation where you want to store the results. Your current token must have write access to it",
-    )
-    # Common parameters
-    parser.add_argument("--use_chat_template", default=False, action="store_true")
-    parser.add_argument("--system_prompt", type=str, default=None)
-    parser.add_argument("--dataset_loading_processes", type=int, default=1)
-    parser.add_argument(
-        "--custom_tasks",
-        type=str,
-        default=None,
-        help="Path to a file with custom tasks (a TASK list of dict and potentially prompt formatting functions)",
-    )
-    group.add_argument(
-        "--tasks",
-        type=str,
-        default=None,
-        help="Comma-separated ids of tasks, e.g. 'original|mmlu:abstract_algebra|5' or path to a text file with a list of tasks",
-    )
-    parser.add_argument("--num_fewshot_seeds", type=int, default=1, help="Number of trials the few shots")
-    return parser
-
-
-if __name__ == "__main__":
-    parser = get_parser()
-    args, unknowns = parser.parse_known_args()
-    main(args)
diff --git a/run_evals_nanotron.py b/run_evals_nanotron.py
deleted file mode 100644
index 3a4a2a42..00000000
--- a/run_evals_nanotron.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# MIT License
-
-# Copyright (c) 2024 The HuggingFace Team
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-# flake8: noqa: C901
-import argparse
-
-from lighteval.main_nanotron import main
-
-
-def get_parser():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--checkpoint-config-path",
-        type=str,
-        required=True,
-        help="Path to the brr checkpoint YAML or python config file, potentially on S3",
-    )
-    parser.add_argument(
-        "--lighteval-override",
-        type=str,
-        help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
-    )
-    parser.add_argument(
-        "--cache-dir",
-        type=str,
-        default=None,
-        help="Cache directory",
-    )
-
-    return parser
-
-
-if __name__ == "__main__":
-    parser = get_parser()
-    args, unknowns = parser.parse_known_args()
-    main(args.checkpoint_config_path, args.lighteval_override, args.cache_dir)
diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py
new file mode 100644
index 00000000..9deb0925
--- /dev/null
+++ b/src/lighteval/__main__.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+# MIT License
+
+# Copyright (c) 2024 Taratra D. RAHARISON and The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import argparse
+
+from lighteval.parsers import parser_accelerate, parser_nanotron
+from lighteval.tasks.registry import Registry
+
+
+def cli_evaluate():
+    parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation")
+    subparsers = parser.add_subparsers(help="help for subcommand", dest="subcommand")
+
+    # create the parser for the "accelerate" command
+    parser_a = subparsers.add_parser("accelerate", help="use accelerate and transformers as backend for evaluation.")
+    parser_accelerate(parser_a)
+
+    # create the parser for the "nanotron" command
+    parser_b = subparsers.add_parser("nanotron", help="use nanotron as backend for evaluation.")
+    parser_nanotron(parser_b)
+
+    parser.add_argument("--list-tasks", action="store_true", help="List available tasks")
+
+    args = parser.parse_args()
+
+    if args.subcommand == "accelerate":
+        from lighteval.main_accelerate import main as main_accelerate
+
+        main_accelerate(args)
+        return
+
+    if args.subcommand == "nanotron":
+        from lighteval.main_nanotron import main as main_nanotron
+
+        main_nanotron(args.checkpoint_config_path, args.lighteval_override, args.cache_dir)
+        return
+
+    if args.list_tasks:
+        Registry(cache_dir="").print_all_tasks()
+        return
+
+
+if __name__ == "__main__":
+    cli_evaluate()
diff --git a/src/lighteval/commands/lighteval_cli.py b/src/lighteval/commands/lighteval_cli.py
deleted file mode 100644
index 61866334..00000000
--- a/src/lighteval/commands/lighteval_cli.py
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/usr/bin/env python
-
-# MIT License
-
-# Copyright (c) 2024 Taratra D. RAHARISON and The HuggingFace Team
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import argparse
-import importlib
-import json
-import os
-
-import pkg_resources
-
-
-def load_tasks_table_extended(module_name: any) -> list:
-    """
-    load the module module_name
-
-    Args:
-    - module_name the name of the module we want to load
-    Returns:
-    - TASKS_TABLE: a list of the task in the module
-    """
-    module_path = f"lighteval.tasks.extended.{module_name}.main"
-    module_loaded = importlib.import_module(module_path)
-    tasks_list = None
-    try:
-        tasks_list = module_loaded.TASKS_TABLE
-    except Exception as e:
-        print(e)
-    return tasks_list if tasks_list is not None else []
-
-
-def get_tasks_table_json() -> list:
-    """
-    Fetch tasks/tasks_table.jsonl
-    Returns
-    - a list of all the tasks in tasks/tasks_table.jsonl
-    """
-    tasks = []
-    # Handling tasks_table.jsonl
-    # Get the path to the resource file
-    tasks_table_path = pkg_resources.resource_filename("lighteval", "tasks/tasks_table.jsonl")
-    with open(tasks_table_path) as jsonl_tasks_table:
-        jsonl_tasks_table_content = jsonl_tasks_table.read()
-        for jline in jsonl_tasks_table_content.splitlines():
-            tasks.append(json.loads(jline))
-    return tasks
-
-
-def get_extended_tasks() -> list:
-    """
-    Fetch all the tasks in the extended suite
-    Returns
-    - a list of all the extended tasks
-    """
-    tasks_extended = []
-    extended_tasks_dir = pkg_resources.resource_filename("lighteval", "tasks/extended")
-    for root, dirs, files in os.walk(extended_tasks_dir):
-        for file in files:
-            if file == "main.py":
-                module_name = os.path.basename(root)
-                tasks_table = load_tasks_table_extended(module_name)
-                tasks_extended += tasks_table
-    return tasks_extended
-
-
-def group_by_suite(tasks: list, tasks_extended: list) -> dict:
-    """
-    Group tasks by suite and sort them alphabetically
-    Args:
-    - tasks: list of tasks in tasks/tasks_table.jsonl
-    - tasks_extended: list of extended tasks
-    Returns:
-    - a dict of tasks grouped by suite
-    """
-    grouped_by_suite = {}
-    for task in tasks:
-        for suite in task["suite"]:
-            if suite not in grouped_by_suite.keys():
-                grouped_by_suite[suite] = [task["name"]]
-            else:
-                grouped_by_suite[suite].append(task["name"])
-                grouped_by_suite[suite].sort()
-
-    grouped_by_suite["extended"] = []
-    # Adding extended suite
-    for task in tasks_extended:
-        grouped_by_suite["extended"].append(task["name"])
-    grouped_by_suite["extended"].sort()
-    return grouped_by_suite
-
-
-def list_tasks_command():
-    """
-    List all the available tasks in tasks_table.jsonl and the extended directory
-    Assumes the existence of TASKS_TABLE in the main.py file for each extended
-    tasks in tasks/extended
-    """
-    try:
-        # Handling tasks_table.jsonl
-        tasks = get_tasks_table_json()
-
-        # Handling extended tasks
-        tasks_extended = get_extended_tasks()
-
-        # Grouping by suite the tasks
-        grouped_by_suite = group_by_suite(tasks, tasks_extended)
-
-        # Print tasks
-        print("Available tasks: (Grouped by suite)\n")
-        for suite, task_list in grouped_by_suite.items():
-            print("- " + suite)
-            for task in task_list:
-                print("\t - " + task)
-    except Exception as e:
-        print("Error: ", e)
-
-
-def main():
-    parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation")
-    parser.add_argument("--list-tasks", action="store_true", help="List available tasks")
-    args = parser.parse_args()
-
-    if args.list_tasks:
-        list_tasks_command()
-    else:
-        parser.print_help()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/lighteval/parsers.py b/src/lighteval/parsers.py
new file mode 100644
index 00000000..d05ba312
--- /dev/null
+++ b/src/lighteval/parsers.py
@@ -0,0 +1,128 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import argparse
+import os
+
+
+TOKEN = os.getenv("HF_TOKEN")
+CACHE_DIR = os.getenv("HF_HOME")
+
+
+def parser_accelerate(parser=None):
+    if parser is None:
+        parser = argparse.ArgumentParser(
+            description="CLI tool for lighteval, a lightweight framework for LLM evaluation"
+        )
+
+    group = parser.add_mutually_exclusive_group(required=True)
+    task_type_group = parser.add_mutually_exclusive_group(required=True)
+
+    # Model type: either use a config file or simply the model name
+    task_type_group.add_argument(
+        "--model_config_path",
+        type=str,
+        help="Path to the model config file, e.g. 'examples/model_configs/base_model.yaml'",
+    )
+    task_type_group.add_argument(
+        "--model_args",
+        type=str,
+        help="Model arguments to pass to the model class, e.g. 'pretrained=gpt2,dtype=float16'",
+    )
+
+    # Debug
+    parser.add_argument("--max_samples", type=int, default=None, help="Maximum number of samples to evaluate on")
+    parser.add_argument("--override_batch_size", type=int, default=-1)
+    parser.add_argument("--job_id", type=str, help="Optional Job ID for future reference", default="")
+
+    # Saving
+    parser.add_argument("--output_dir", required=True, type=str, help="Directory to save the results")
+    parser.add_argument(
+        "--push_results_to_hub", default=False, action="store_true", help="Set to push the results to the hub"
+    )
+    parser.add_argument("--save_details", action="store_true", help="Save the details of the run in the output_dir")
+    parser.add_argument(
+        "--push_details_to_hub", default=False, action="store_true", help="Set to push the details to the hub"
+    )
+    parser.add_argument("--push_results_to_tensorboard", default=False, action="store_true")
+    parser.add_argument(
+        "--public_run", default=False, action="store_true", help="Push results and details to a public repo"
+    )
+    parser.add_argument(
+        "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models"
+    )
+    parser.add_argument(
+        "--results_org",
+        type=str,
+        help="Hub organisation where you want to store the results. Your current token must have write access to it",
+    )
+    # Common parameters
+    parser.add_argument(
+        "--use_chat_template",
+        default=False,
+        action="store_true",
+        help="Use the chat template (from the model's tokenizer) for the prompt",
+    )
+    parser.add_argument(
+        "--system_prompt", type=str, default=None, help="System prompt to use, e.g. 'You are a helpful assistant.'"
+    )
+    parser.add_argument(
+        "--dataset_loading_processes", type=int, default=1, help="Number of processes to use for loading the datasets"
+    )
+    parser.add_argument(
+        "--custom_tasks",
+        type=str,
+        default=None,
+        help="Path to a file with custom tasks (a TASK list of dict and potentially prompt formating functions)",
+    )
+    group.add_argument(
+        "--tasks",
+        type=str,
+        default=None,
+        help="Id of a task, e.g. 'original|mmlu:abstract_algebra|5' or path to a texte file with a list of tasks",
+    )
+    parser.add_argument("--num_fewshot_seeds", type=int, default=1, help="Number of trials the few shots")
+    return parser
+
+
+def parser_nanotron(parser=None):
+    if parser is None:
+        parser = argparse.ArgumentParser(
+            description="CLI tool for lighteval, a lightweight framework for LLM evaluation"
+        )
+
+    parser.add_argument(
+        "--checkpoint-config-path",
+        type=str,
+        required=True,
+        help="Path to the brr checkpoint YAML or python config file, potentially on S3",
+    )
+    parser.add_argument(
+        "--lighteval-override",
+        type=str,
+        help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
+    )
+    parser.add_argument(
+        "--cache-dir",
+        type=str,
+        default=None,
+        help="Cache directory",
+    )
diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
index ef575b7e..28597763 100644
--- a/src/lighteval/tasks/registry.py
+++ b/src/lighteval/tasks/registry.py
@@ -23,6 +23,7 @@
 import collections
 import importlib
 import os
+from itertools import groupby
 from pathlib import Path
 from pprint import pformat
 from types import ModuleType
@@ -152,6 +153,19 @@ def get_task_dict(
 
         return tasks_dict
 
+    def print_all_tasks(self):
+        """
+        Print all the tasks in the task registry.
+        """
+        tasks_names = list(self.TASK_REGISTRY.keys())
+        tasks_names.sort()
+        for suite, g in groupby(tasks_names, lambda x: x.split("|")[0]):
+            tasks_names = list(g)
+            tasks_names.sort()
+            print(f"\n- {suite}:")
+            for task_name in tasks_names:
+                print(f"  - {task_name}")
+
 
 def create_custom_tasks_module(custom_tasks: Union[str, Path, ModuleType]) -> ModuleType:
     """Creates a custom task module to load tasks defined by the user in their own file.
diff --git a/tests/test_main.py b/tests/test_main.py
index 00798cb4..27816c8b 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -27,7 +27,7 @@
 from pytest import approx
 
 from lighteval.main_accelerate import main  # noqa: E402
-from run_evals_accelerate import get_parser
+from lighteval.parsers import parser_accelerate
 from tests.reference_scores.reference_task_scores import RESULTS_FULL, RESULTS_LITE  # noqa: E402
 from tests.reference_scores.reference_tasks import ALL_SUBSETS
 
@@ -58,7 +58,7 @@ def run_model_predictions_full(model: str, tasks: list):
         "1",
         "--save_details",
     ]
-    parser = get_parser()
+    parser = parser_accelerate()
     args = parser.parse_args(lighteval_args)
     results = main(args)
     return results
@@ -77,7 +77,7 @@ def run_model_predictions_lite(model: str, tasks: list):
         "--save_details",
     ]
     lighteval_args += ["--max_samples", "10"]
-    parser = get_parser()
+    parser = parser_accelerate()
     args = parser.parse_args(lighteval_args)
     results = main(args)
     return results

From d43c9a324c81fbe1d85811ded08e56f37abac2d8 Mon Sep 17 00:00:00 2001
From: Atsuki Yamaguchi <30075338+gucci-j@users.noreply.github.com>
Date: Wed, 17 Jul 2024 13:34:40 +0100
Subject: [PATCH 2/2] Fix _init_max_length in base_model.py (#185)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update base_model.py

* Update base_model.py

* Removed try-except in base_model.py

* Update src/lighteval/models/base_model.py

* Revert "Update base_model.py"

This reverts commit 003d3896a85ac4d34e8b48a86cbd50ccb9a394c0.

---------

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
---
 src/lighteval/models/base_model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/lighteval/models/base_model.py b/src/lighteval/models/base_model.py
index 3e483d44..e5e63db9 100644
--- a/src/lighteval/models/base_model.py
+++ b/src/lighteval/models/base_model.py
@@ -267,8 +267,6 @@ def _init_max_length(self, max_length) -> int:
             if hasattr(self._config, attr):
                 return getattr(self._config, attr)
 
-        if hasattr(self.tokenizer, "model_max_length"):
-            return self.tokenizer.model_max_length
         # Default max sequence length setting for when no `max_length` is provided
         # or no max length config setting is found in the model or tokenizer.
         return 2048