From 0135c2e6dc7ab273a8a5e2e33c3209541ddba8b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:07:54 +0100 Subject: [PATCH] Fix custom arabic tasks (#440) * removed unused params * fix issue with task function --- community_tasks/_template.py | 2 -- community_tasks/arabic_evals.py | 18 +----------------- docs/source/adding-a-custom-task.mdx | 9 --------- docs/source/saving-and-reading-results.mdx | 2 -- examples/nanotron/custom_evaluation_tasks.py | 16 ---------------- examples/nanotron/custom_task.py | 4 ---- 6 files changed, 1 insertion(+), 50 deletions(-) diff --git a/community_tasks/_template.py b/community_tasks/_template.py index 345aebe4b..d0099ba26 100644 --- a/community_tasks/_template.py +++ b/community_tasks/_template.py @@ -99,8 +99,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, ) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 382a780d3..07a096eca 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -109,8 +109,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, trust_dataset=True, version=0, ) @@ -174,8 +172,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, trust_dataset=True, version=0, ) @@ -241,8 +237,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, trust_dataset=True, version=0, ) @@ -299,8 +293,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, trust_dataset=True, version=0, ) @@ -361,8 +353,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=[], - output_regex=None, - frozen=False, trust_dataset=True, version=0, ) @@ -423,9 +413,7 @@ def arabic_exams_pfn(line, task_name: str = None): def alghafa_pfn(line, task_name: str = None): question = line["query"] answer_index = int(line["label"]) - # Dynamically determining the choices by excluding '__few_shots', 'query' and 'label' - choices_keys = [key for key in line.keys() if key not in ["query", "label", "__few_shots"]] - choices = [line[key] for key in choices_keys] + choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" query = f"{instruction}السؤال: {question}\n" @@ -461,8 +449,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, trust_dataset=True, version=0, ) @@ -839,8 +825,6 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, trust_dataset=True, version=0, ) diff --git a/docs/source/adding-a-custom-task.mdx b/docs/source/adding-a-custom-task.mdx index 2fbff5524..e1823b7b9 100644 --- a/docs/source/adding-a-custom-task.mdx +++ b/docs/source/adding-a-custom-task.mdx @@ -107,8 +107,6 @@ class CustomSubsetTask(LightevalTaskConfig): suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, - frozen=False, ) SUBSET_TASKS = [CustomSubsetTask(name=f"mytask:{subset}", hf_subset=subset) for subset in SAMPLE_SUBSETS] ``` @@ -154,13 +152,6 @@ Here is a list of the parameters and their meaning: for your generation - `metric` (list), the metrics you want to use for your evaluation (see next section for a detailed explanation) -- `output_regex` (str), A regex string that will be used to filter your - generation. (Generative metrics will only select tokens that are between the - first and the second sequence matched by the regex. For example, for a regex - matching `\n` and a generation `\nModel generation output\nSome other text` - the metric will only be fed with `Model generation output`) -- `frozen` (bool), for now, is set to False, but we will steadily pass all - stable tasks to True. - `trust_dataset` (bool), set to True if you trust the dataset. diff --git a/docs/source/saving-and-reading-results.mdx b/docs/source/saving-and-reading-results.mdx index 8c347cee2..993d7577b 100644 --- a/docs/source/saving-and-reading-results.mdx +++ b/docs/source/saving-and-reading-results.mdx @@ -170,9 +170,7 @@ The detail file contains the following columns: "stop_sequence": [ "Question=" ], - "output_regex": null, "num_samples": null, - "frozen": false, "suite": [ "lighteval" ], diff --git a/examples/nanotron/custom_evaluation_tasks.py b/examples/nanotron/custom_evaluation_tasks.py index 9ae066715..78c354916 100644 --- a/examples/nanotron/custom_evaluation_tasks.py +++ b/examples/nanotron/custom_evaluation_tasks.py @@ -267,8 +267,6 @@ def __init__( generation_size=40, trust_dataset=True, stop_sequence=None, - output_regex=None, - frozen=False, ): super().__init__( name=name, @@ -282,8 +280,6 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, - output_regex=output_regex, - frozen=frozen, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), ) @@ -370,8 +366,6 @@ def __init__( generation_size=-1, trust_dataset=True, stop_sequence=None, - output_regex=None, - frozen=False, ): super().__init__( name=name, @@ -387,8 +381,6 @@ def __init__( generation_size=generation_size, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), - output_regex=output_regex, - frozen=frozen, ) @@ -487,8 +479,6 @@ def __init__( generation_size=4, trust_dataset=True, stop_sequence=None, - output_regex=None, - frozen=False, ): super().__init__( name=name, @@ -504,8 +494,6 @@ def __init__( generation_size=generation_size, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), - output_regex=output_regex, - frozen=frozen, ) @@ -623,8 +611,6 @@ def __init__( generation_size=-1, trust_dataset=True, stop_sequence=None, - output_regex=None, - frozen=False, ): super().__init__( name=name, @@ -640,8 +626,6 @@ def __init__( generation_size=generation_size, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), - output_regex=output_regex, - frozen=frozen, ) diff --git a/examples/nanotron/custom_task.py b/examples/nanotron/custom_task.py index 05cea969f..feaa849ba 100644 --- a/examples/nanotron/custom_task.py +++ b/examples/nanotron/custom_task.py @@ -82,8 +82,6 @@ def mmlu_anatomy(line): generation_size=5, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, - frozen=False, ), LightevalTaskConfig( name="mmlu:anatomy_signs", @@ -98,7 +96,5 @@ def mmlu_anatomy(line): generation_size=5, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, - frozen=False, ), ]