added nice printing for the config and more options for task display

huggingface · Jul 17, 2024 · f892248 · f892248
1 parent 3a28895
commit f892248
Show file tree

Hide file tree

Showing 4 changed files with 81 additions and 31 deletions.
diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py
@@ -25,7 +25,7 @@
 import argparse
 import os
 
-from lighteval.parsers import parser_accelerate, parser_nanotron
+from lighteval.parsers import parser_accelerate, parser_nanotron, parser_utils_tasks
 from lighteval.tasks.registry import Registry, taskinfo_selector
 
 
@@ -36,25 +36,18 @@ def cli_evaluate():
     parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation")
     subparsers = parser.add_subparsers(help="help for subcommand", dest="subcommand")
 
-    # create the parser for the "accelerate" command
+    # Subparser for the "accelerate" command
     parser_a = subparsers.add_parser("accelerate", help="use accelerate and transformers as backend for evaluation.")
     parser_accelerate(parser_a)
 
-    # create the parser for the "nanotron" command
+    # Subparser for the "nanotron" command
     parser_b = subparsers.add_parser("nanotron", help="use nanotron as backend for evaluation.")
     parser_nanotron(parser_b)
 
-    # utils functions
-    parser.add_argument("--list-tasks", action="store_true", help="List available tasks")
-    parser.add_argument(
-        "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models"
-    )
-    parser.add_argument(
-        "--tasks-examples",
-        type=str,
-        default=None,
-        help="Id of tasks or path to a text file with a list of tasks (e.g. 'original|mmlu:abstract_algebra|5') for which you want to manually inspect samples.",
-    )
+    # Subparser for task utils functions
+    parser_c = subparsers.add_parser("tasks", help="use nanotron as backend for evaluation.")
+    parser_utils_tasks(parser_c)
+
     args = parser.parse_args()
 
     if args.subcommand == "accelerate":
@@ -67,20 +60,27 @@ def cli_evaluate():
 
         main_nanotron(args.checkpoint_config_path, args.lighteval_override, args.cache_dir)
 
-    elif args.list_tasks:
-        Registry(cache_dir="").print_all_tasks()
-
-    elif args.tasks_examples:
-        print(f"Loading the tasks dataset to cache folder: {args.cache_dir}")
-        print(
-            "All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer."
-        )
-        task_names_list, _ = taskinfo_selector(args.tasks_examples)
-        task_dict = Registry(cache_dir=args.cache_dir).get_task_dict(task_names_list)
-        for name, task in task_dict.items():
-            print("-" * 10, name, "-" * 10)
-            for sample in task.eval_docs()[:10]:
-                print(sample)
+    elif args.subcommand == "tasks":
+        if args.list:
+            Registry(cache_dir="").print_all_tasks()
+
+        if args.inspect:
+            print(f"Loading the tasks dataset to cache folder: {args.cache_dir}")
+            print(
+                "All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer."
+            )
+            # Loading task
+            task_names_list, _ = taskinfo_selector(args.inspect)
+            task_dict = Registry(cache_dir=args.cache_dir).get_task_dict(task_names_list)
+            for name, task in task_dict.items():
+                print("-" * 10, name, "-" * 10)
+                if args.show_config:
+                    print("-" * 10, "CONFIG")
+                    task.print_config()
+                for ix, sample in enumerate(task.eval_docs()[: int(args.num_samples)]):
+                    if ix == 0:
+                        print("-" * 10, "SAMPLES")
+                    print(sample)
 
     else:
         print("You did not provide any argument. Exiting")

diff --git a/src/lighteval/metrics/utils.py b/src/lighteval/metrics/utils.py
@@ -24,7 +24,7 @@
 from enum import Enum, auto
 
 
-class MetricCategory(Enum):
+class MetricCategory(str, Enum):
     TARGET_PERPLEXITY = auto()
     PERPLEXITY = auto()
     GENERATIVE = auto()
@@ -37,7 +37,7 @@ class MetricCategory(Enum):
     IGNORED = auto()
 
 
-class MetricUseCase(Enum):
+class MetricUseCase(str, Enum):
     # General
     ACCURACY = auto()
     PERPLEXITY = auto()

diff --git a/src/lighteval/parsers.py b/src/lighteval/parsers.py
@@ -123,3 +123,25 @@ def parser_nanotron(parser=None):
     parser.add_argument(
         "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models"
     )
+
+
+def parser_utils_tasks(parser=None):
+    if parser is None:
+        parser = argparse.ArgumentParser(
+            description="CLI tool for lighteval, a lightweight framework for LLM evaluation"
+        )
+
+    group = parser.add_mutually_exclusive_group(required=True)
+
+    group.add_argument("--list", action="store_true", help="List available tasks")
+    group.add_argument(
+        "--inspect",
+        type=str,
+        default=None,
+        help="Id of tasks or path to a text file with a list of tasks (e.g. 'original|mmlu:abstract_algebra|5') for which you want to manually inspect samples.",
+    )
+    parser.add_argument("--num_samples", type=int, default=10, help="Number of samples to display")
+    parser.add_argument("--show_config", default=False, action="store_true", help="Will display the full task config")
+    parser.add_argument(
+        "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models"
+    )
diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py
@@ -21,14 +21,16 @@
 # SOFTWARE.
 
 import collections
+import inspect
 import os
 import random
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from multiprocessing import Pool
 from pathlib import Path
 from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union
 
 from datasets import load_dataset
+from pytablewriter import MarkdownTableWriter
 
 from lighteval.few_shot_manager import FewShotSampler
 from lighteval.logging.hierarchical_logger import hlog, hlog_warn
@@ -232,6 +234,32 @@ def __init__(  # noqa: C901
     def cfg(self):
         return self._cfg
 
+    def print_config(self):
+        md_writer = MarkdownTableWriter()
+        md_writer.headers = ["Key", "Value"]
+
+        values = []
+
+        for k, v in asdict(self.cfg).items():
+            if k == "metric":
+                for ix, metrics in enumerate(v):
+                    for metric_k, metric_v in metrics.items():
+                        if inspect.ismethod(metric_v):
+                            values.append([f"{k} {ix}: {metric_k}", metric_v.__qualname__])
+                        else:
+                            values.append([f"{k} {ix}: {metric_k}", repr(metric_v)])
+
+            else:
+                if isinstance(v, Callable):
+                    values.append([k, v.__name__])
+                else:
+                    values.append([k, repr(v)])
+            # print(k, ":", repr(v))
+
+        md_writer.value_matrix = values
+
+        print(md_writer.dumps())
+
     def doc_to_text_without_instructions(self, doc: Doc) -> str:
         """
         Returns the query of the document without the instructions. If the