Skip to content

Commit

Permalink
added nice printing for the config and more options for task display
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Jul 17, 2024
1 parent 3a28895 commit f892248
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 31 deletions.
56 changes: 28 additions & 28 deletions src/lighteval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import argparse
import os

from lighteval.parsers import parser_accelerate, parser_nanotron
from lighteval.parsers import parser_accelerate, parser_nanotron, parser_utils_tasks
from lighteval.tasks.registry import Registry, taskinfo_selector


Expand All @@ -36,25 +36,18 @@ def cli_evaluate():
parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation")
subparsers = parser.add_subparsers(help="help for subcommand", dest="subcommand")

# create the parser for the "accelerate" command
# Subparser for the "accelerate" command
parser_a = subparsers.add_parser("accelerate", help="use accelerate and transformers as backend for evaluation.")
parser_accelerate(parser_a)

# create the parser for the "nanotron" command
# Subparser for the "nanotron" command
parser_b = subparsers.add_parser("nanotron", help="use nanotron as backend for evaluation.")
parser_nanotron(parser_b)

# utils functions
parser.add_argument("--list-tasks", action="store_true", help="List available tasks")
parser.add_argument(
"--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models"
)
parser.add_argument(
"--tasks-examples",
type=str,
default=None,
help="Id of tasks or path to a text file with a list of tasks (e.g. 'original|mmlu:abstract_algebra|5') for which you want to manually inspect samples.",
)
# Subparser for task utils functions
parser_c = subparsers.add_parser("tasks", help="use nanotron as backend for evaluation.")
parser_utils_tasks(parser_c)

args = parser.parse_args()

if args.subcommand == "accelerate":
Expand All @@ -67,20 +60,27 @@ def cli_evaluate():

main_nanotron(args.checkpoint_config_path, args.lighteval_override, args.cache_dir)

elif args.list_tasks:
Registry(cache_dir="").print_all_tasks()

elif args.tasks_examples:
print(f"Loading the tasks dataset to cache folder: {args.cache_dir}")
print(
"All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer."
)
task_names_list, _ = taskinfo_selector(args.tasks_examples)
task_dict = Registry(cache_dir=args.cache_dir).get_task_dict(task_names_list)
for name, task in task_dict.items():
print("-" * 10, name, "-" * 10)
for sample in task.eval_docs()[:10]:
print(sample)
elif args.subcommand == "tasks":
if args.list:
Registry(cache_dir="").print_all_tasks()

if args.inspect:
print(f"Loading the tasks dataset to cache folder: {args.cache_dir}")
print(
"All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer."
)
# Loading task
task_names_list, _ = taskinfo_selector(args.inspect)
task_dict = Registry(cache_dir=args.cache_dir).get_task_dict(task_names_list)
for name, task in task_dict.items():
print("-" * 10, name, "-" * 10)
if args.show_config:
print("-" * 10, "CONFIG")
task.print_config()
for ix, sample in enumerate(task.eval_docs()[: int(args.num_samples)]):
if ix == 0:
print("-" * 10, "SAMPLES")
print(sample)

else:
print("You did not provide any argument. Exiting")
Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from enum import Enum, auto


class MetricCategory(Enum):
class MetricCategory(str, Enum):
TARGET_PERPLEXITY = auto()
PERPLEXITY = auto()
GENERATIVE = auto()
Expand All @@ -37,7 +37,7 @@ class MetricCategory(Enum):
IGNORED = auto()


class MetricUseCase(Enum):
class MetricUseCase(str, Enum):
# General
ACCURACY = auto()
PERPLEXITY = auto()
Expand Down
22 changes: 22 additions & 0 deletions src/lighteval/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,25 @@ def parser_nanotron(parser=None):
parser.add_argument(
"--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models"
)


def parser_utils_tasks(parser=None):
if parser is None:
parser = argparse.ArgumentParser(
description="CLI tool for lighteval, a lightweight framework for LLM evaluation"
)

group = parser.add_mutually_exclusive_group(required=True)

group.add_argument("--list", action="store_true", help="List available tasks")
group.add_argument(
"--inspect",
type=str,
default=None,
help="Id of tasks or path to a text file with a list of tasks (e.g. 'original|mmlu:abstract_algebra|5') for which you want to manually inspect samples.",
)
parser.add_argument("--num_samples", type=int, default=10, help="Number of samples to display")
parser.add_argument("--show_config", default=False, action="store_true", help="Will display the full task config")
parser.add_argument(
"--cache_dir", type=str, default=CACHE_DIR, help="Cache directory used to store datasets and models"
)
30 changes: 29 additions & 1 deletion src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,16 @@
# SOFTWARE.

import collections
import inspect
import os
import random
from dataclasses import dataclass
from dataclasses import asdict, dataclass
from multiprocessing import Pool
from pathlib import Path
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union

from datasets import load_dataset
from pytablewriter import MarkdownTableWriter

from lighteval.few_shot_manager import FewShotSampler
from lighteval.logging.hierarchical_logger import hlog, hlog_warn
Expand Down Expand Up @@ -232,6 +234,32 @@ def __init__( # noqa: C901
def cfg(self):
return self._cfg

def print_config(self):
md_writer = MarkdownTableWriter()
md_writer.headers = ["Key", "Value"]

values = []

for k, v in asdict(self.cfg).items():
if k == "metric":
for ix, metrics in enumerate(v):
for metric_k, metric_v in metrics.items():
if inspect.ismethod(metric_v):
values.append([f"{k} {ix}: {metric_k}", metric_v.__qualname__])
else:
values.append([f"{k} {ix}: {metric_k}", repr(metric_v)])

else:
if isinstance(v, Callable):
values.append([k, v.__name__])
else:
values.append([k, repr(v)])
# print(k, ":", repr(v))

md_writer.value_matrix = values

print(md_writer.dumps())

def doc_to_text_without_instructions(self, doc: Doc) -> str:
"""
Returns the query of the document without the instructions. If the
Expand Down

0 comments on commit f892248

Please sign in to comment.