From f892248b09e40af61bbd9f92ee43334902f8f237 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Wed, 17 Jul 2024 13:23:16 +0000 Subject: [PATCH] added nice printing for the config and more options for task display --- src/lighteval/__main__.py | 56 +++++++++++++-------------- src/lighteval/metrics/utils.py | 4 +- src/lighteval/parsers.py | 22 +++++++++++ src/lighteval/tasks/lighteval_task.py | 30 +++++++++++++- 4 files changed, 81 insertions(+), 31 deletions(-) diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py index b909d9dd8..f507f151a 100644 --- a/src/lighteval/__main__.py +++ b/src/lighteval/__main__.py @@ -25,7 +25,7 @@ import argparse import os -from lighteval.parsers import parser_accelerate, parser_nanotron +from lighteval.parsers import parser_accelerate, parser_nanotron, parser_utils_tasks from lighteval.tasks.registry import Registry, taskinfo_selector @@ -36,25 +36,18 @@ def cli_evaluate(): parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation") subparsers = parser.add_subparsers(help="help for subcommand", dest="subcommand") - # create the parser for the "accelerate" command + # Subparser for the "accelerate" command parser_a = subparsers.add_parser("accelerate", help="use accelerate and transformers as backend for evaluation.") parser_accelerate(parser_a) - # create the parser for the "nanotron" command + # Subparser for the "nanotron" command parser_b = subparsers.add_parser("nanotron", help="use nanotron as backend for evaluation.") parser_nanotron(parser_b) - # utils functions - parser.add_argument("--list-tasks", action="store_true", help="List available tasks") - parser.add_argument( - "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models" - ) - parser.add_argument( - "--tasks-examples", - type=str, - default=None, - help="Id of tasks or path to a text file with a list of tasks (e.g. 'original|mmlu:abstract_algebra|5') for which you want to manually inspect samples.", - ) + # Subparser for task utils functions + parser_c = subparsers.add_parser("tasks", help="use nanotron as backend for evaluation.") + parser_utils_tasks(parser_c) + args = parser.parse_args() if args.subcommand == "accelerate": @@ -67,20 +60,27 @@ def cli_evaluate(): main_nanotron(args.checkpoint_config_path, args.lighteval_override, args.cache_dir) - elif args.list_tasks: - Registry(cache_dir="").print_all_tasks() - - elif args.tasks_examples: - print(f"Loading the tasks dataset to cache folder: {args.cache_dir}") - print( - "All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer." - ) - task_names_list, _ = taskinfo_selector(args.tasks_examples) - task_dict = Registry(cache_dir=args.cache_dir).get_task_dict(task_names_list) - for name, task in task_dict.items(): - print("-" * 10, name, "-" * 10) - for sample in task.eval_docs()[:10]: - print(sample) + elif args.subcommand == "tasks": + if args.list: + Registry(cache_dir="").print_all_tasks() + + if args.inspect: + print(f"Loading the tasks dataset to cache folder: {args.cache_dir}") + print( + "All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer." + ) + # Loading task + task_names_list, _ = taskinfo_selector(args.inspect) + task_dict = Registry(cache_dir=args.cache_dir).get_task_dict(task_names_list) + for name, task in task_dict.items(): + print("-" * 10, name, "-" * 10) + if args.show_config: + print("-" * 10, "CONFIG") + task.print_config() + for ix, sample in enumerate(task.eval_docs()[: int(args.num_samples)]): + if ix == 0: + print("-" * 10, "SAMPLES") + print(sample) else: print("You did not provide any argument. Exiting") diff --git a/src/lighteval/metrics/utils.py b/src/lighteval/metrics/utils.py index 52e8e0665..0310b5f4b 100644 --- a/src/lighteval/metrics/utils.py +++ b/src/lighteval/metrics/utils.py @@ -24,7 +24,7 @@ from enum import Enum, auto -class MetricCategory(Enum): +class MetricCategory(str, Enum): TARGET_PERPLEXITY = auto() PERPLEXITY = auto() GENERATIVE = auto() @@ -37,7 +37,7 @@ class MetricCategory(Enum): IGNORED = auto() -class MetricUseCase(Enum): +class MetricUseCase(str, Enum): # General ACCURACY = auto() PERPLEXITY = auto() diff --git a/src/lighteval/parsers.py b/src/lighteval/parsers.py index 37671b05b..499d945ec 100644 --- a/src/lighteval/parsers.py +++ b/src/lighteval/parsers.py @@ -123,3 +123,25 @@ def parser_nanotron(parser=None): parser.add_argument( "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models" ) + + +def parser_utils_tasks(parser=None): + if parser is None: + parser = argparse.ArgumentParser( + description="CLI tool for lighteval, a lightweight framework for LLM evaluation" + ) + + group = parser.add_mutually_exclusive_group(required=True) + + group.add_argument("--list", action="store_true", help="List available tasks") + group.add_argument( + "--inspect", + type=str, + default=None, + help="Id of tasks or path to a text file with a list of tasks (e.g. 'original|mmlu:abstract_algebra|5') for which you want to manually inspect samples.", + ) + parser.add_argument("--num_samples", type=int, default=10, help="Number of samples to display") + parser.add_argument("--show_config", default=False, action="store_true", help="Will display the full task config") + parser.add_argument( + "--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models" + ) diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py index 6595571fe..3e9838abf 100644 --- a/src/lighteval/tasks/lighteval_task.py +++ b/src/lighteval/tasks/lighteval_task.py @@ -21,14 +21,16 @@ # SOFTWARE. import collections +import inspect import os import random -from dataclasses import dataclass +from dataclasses import asdict, dataclass from multiprocessing import Pool from pathlib import Path from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union from datasets import load_dataset +from pytablewriter import MarkdownTableWriter from lighteval.few_shot_manager import FewShotSampler from lighteval.logging.hierarchical_logger import hlog, hlog_warn @@ -232,6 +234,32 @@ def __init__( # noqa: C901 def cfg(self): return self._cfg + def print_config(self): + md_writer = MarkdownTableWriter() + md_writer.headers = ["Key", "Value"] + + values = [] + + for k, v in asdict(self.cfg).items(): + if k == "metric": + for ix, metrics in enumerate(v): + for metric_k, metric_v in metrics.items(): + if inspect.ismethod(metric_v): + values.append([f"{k} {ix}: {metric_k}", metric_v.__qualname__]) + else: + values.append([f"{k} {ix}: {metric_k}", repr(metric_v)]) + + else: + if isinstance(v, Callable): + values.append([k, v.__name__]) + else: + values.append([k, repr(v)]) + # print(k, ":", repr(v)) + + md_writer.value_matrix = values + + print(md_writer.dumps()) + def doc_to_text_without_instructions(self, doc: Doc) -> str: """ Returns the query of the document without the instructions. If the