From 02486b3ff7e29b191cf5e124e0131d7a7f2af1fd Mon Sep 17 00:00:00 2001 From: Abu Bakr Soliman Date: Wed, 3 Jul 2024 08:04:40 +0300 Subject: [PATCH 1/7] Configure MBZUAI_ArabicMMLU Arabic Task --- community_tasks/arabic_evals.py | 60 +++++++++++++++++++++++++++++ examples/tasks/all_arabic_tasks.txt | 1 + 2 files changed, 61 insertions(+) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 9e65bade..1287f3bb 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -105,6 +105,65 @@ def mmlu_arabic(line, task_name: str = None): target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix], ) +# mbzuai_arabic_mmlu # + +# fmt: off +MBZUAI_ArabicMMLU_SUBSETS = ["test"] +# fmt: on + +class CustomMBZUAIArabicMMLU(LightevalTaskConfig): + def __init__( + self, + name, + hf_subset, + ): + super().__init__( + name=name, + hf_subset=hf_subset, + prompt_function="mbzuai_arabic_mmlu", + hf_repo="MBZUAI/ArabicMMLU", + metric=["loglikelihood_acc_norm"], + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split="test", + few_shots_select="sequential", + suite=["community"], + generation_size=-1, + stop_sequence=None, + output_regex=None, + frozen=False, + trust_dataset=True, + version=0, + ) + +MBZUAI_ArabicMMLU_TASKS = [ + CustomMBZUAIArabicMMLU(name=f"mbzuai_arabic_mmlu:{subset}", hf_subset=subset) for subset in MBZUAI_ArabicMMLU_SUBSETS +] + +def mbzuai_mmlu_arabic(line, task_name: str = None): + topic = line["Subject"] + instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n" + choices = [line["Option 1"], line["Option 2"], + line["Option 3"], line["Option 4"], + line["Option 5"]] + + # Answers are provided with roman letters - we look for the correct index in LETTER_INDICES, + # it will then be applied to arabic letters + gold_ix = LETTER_INDICES.index(line["Answer Key"]) + + query = f"{instruction}{line['Question']}\n" + query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES_AR[:5], choices)]) + query += "الإجابة:" + + return Doc( + task_name=task_name, + query=query, + choices=LETTER_INDICES_AR[:5], + gold_index=gold_ix, + instruction=instruction, + target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix], + ) + # ACVA ## # fmt: off @@ -593,6 +652,7 @@ def sciq_prompt_arabic(line, task_name: str = None): + [hellaswag_okapi_ar_task] + [toxigen_ar_task] + [sciq_ar_task] + + MBZUAI_ArabicMMLU_TASKS ) # Convert to dict for lighteval diff --git a/examples/tasks/all_arabic_tasks.txt b/examples/tasks/all_arabic_tasks.txt index fa430ed1..7d5b0aa4 100644 --- a/examples/tasks/all_arabic_tasks.txt +++ b/examples/tasks/all_arabic_tasks.txt @@ -135,3 +135,4 @@ community|copa_ext_ar|5|1 community|hellaswag_okapi_ar|5|1 community|toxigen_ar|5|1 community|sciq_ar|5|1 +community|mbzuai_arabic_mmlu|5|1 From 5686f30a2398745fdb2a19ef6a92b8a3f365a81b Mon Sep 17 00:00:00 2001 From: Abu Bakr Soliman Date: Wed, 3 Jul 2024 09:34:46 +0300 Subject: [PATCH 2/7] push community|mbzuai_arabic_mmlu to OALL_tasks --- examples/tasks/OALL_tasks.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/tasks/OALL_tasks.txt b/examples/tasks/OALL_tasks.txt index 346d062c..7d3a8248 100644 --- a/examples/tasks/OALL_tasks.txt +++ b/examples/tasks/OALL_tasks.txt @@ -134,3 +134,4 @@ community|copa_ext_ar|5|1 community|hellaswag_okapi_ar|5|1 community|toxigen_ar|5|1 community|sciq_ar|5|1 +community|mbzuai_arabic_mmlu|5|1 From 7c989ad72dfb3601d8280aa7476286f6bd883e6f Mon Sep 17 00:00:00 2001 From: Abu Bakr Soliman Date: Wed, 3 Jul 2024 09:37:15 +0300 Subject: [PATCH 3/7] fix function name --- community_tasks/arabic_evals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 1287f3bb..23a8cb9c 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -140,7 +140,7 @@ def __init__( CustomMBZUAIArabicMMLU(name=f"mbzuai_arabic_mmlu:{subset}", hf_subset=subset) for subset in MBZUAI_ArabicMMLU_SUBSETS ] -def mbzuai_mmlu_arabic(line, task_name: str = None): +def mbzuai_arabic_mmlu(line, task_name: str = None): topic = line["Subject"] instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n" choices = [line["Option 1"], line["Option 2"], From 311d170ca500e189368361e6f8e67fd5612fb40e Mon Sep 17 00:00:00 2001 From: Abu Bakr Soliman Date: Wed, 3 Jul 2024 09:47:42 +0300 Subject: [PATCH 4/7] change mbzuai_arabic_mmlu definition --- community_tasks/arabic_evals.py | 51 +++++++++++---------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 23a8cb9c..a751f9b9 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -105,40 +105,21 @@ def mmlu_arabic(line, task_name: str = None): target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix], ) -# mbzuai_arabic_mmlu # - -# fmt: off -MBZUAI_ArabicMMLU_SUBSETS = ["test"] -# fmt: on - -class CustomMBZUAIArabicMMLU(LightevalTaskConfig): - def __init__( - self, - name, - hf_subset, - ): - super().__init__( - name=name, - hf_subset=hf_subset, - prompt_function="mbzuai_arabic_mmlu", - hf_repo="MBZUAI/ArabicMMLU", - metric=["loglikelihood_acc_norm"], - hf_avail_splits=["test"], - evaluation_splits=["test"], - few_shots_split="test", - few_shots_select="sequential", - suite=["community"], - generation_size=-1, - stop_sequence=None, - output_regex=None, - frozen=False, - trust_dataset=True, - version=0, - ) - -MBZUAI_ArabicMMLU_TASKS = [ - CustomMBZUAIArabicMMLU(name=f"mbzuai_arabic_mmlu:{subset}", hf_subset=subset) for subset in MBZUAI_ArabicMMLU_SUBSETS -] +# mbzuai_arabic_mmlu +mbzuai_arabic_mmlu_task = LightevalTaskConfig( + name="mbzuai_arabic_mmlu", + prompt_function="mbzuai_arabic_mmlu", + suite=["community"], + hf_repo="MBZUAI/ArabicMMLU", + hf_subset="test", + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split="validation", + few_shots_select="sequential", + metric=["loglikelihood_acc_norm"], + trust_dataset=True, + version=0, +) def mbzuai_arabic_mmlu(line, task_name: str = None): topic = line["Subject"] @@ -652,7 +633,7 @@ def sciq_prompt_arabic(line, task_name: str = None): + [hellaswag_okapi_ar_task] + [toxigen_ar_task] + [sciq_ar_task] - + MBZUAI_ArabicMMLU_TASKS + + [mbzuai_arabic_mmlu_task] ) # Convert to dict for lighteval From 0a3e338ab15a4212517d39c79e590ab9fc048436 Mon Sep 17 00:00:00 2001 From: Abu Bakr Soliman Date: Wed, 3 Jul 2024 09:49:20 +0300 Subject: [PATCH 5/7] fix few_shots_split for mbzuai_arabic_mmlu_task --- community_tasks/arabic_evals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index a751f9b9..e487d0e5 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -114,7 +114,7 @@ def mmlu_arabic(line, task_name: str = None): hf_subset="test", hf_avail_splits=["test"], evaluation_splits=["test"], - few_shots_split="validation", + few_shots_split="test", few_shots_select="sequential", metric=["loglikelihood_acc_norm"], trust_dataset=True, From 86a662b9054ad73825b0d649b10f000836873150 Mon Sep 17 00:00:00 2001 From: Abu Bakr Soliman Date: Wed, 3 Jul 2024 09:51:08 +0300 Subject: [PATCH 6/7] update hf_subset for mbzuai_arabic_mmlu_task --- community_tasks/arabic_evals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index e487d0e5..e48db4f6 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -111,7 +111,7 @@ def mmlu_arabic(line, task_name: str = None): prompt_function="mbzuai_arabic_mmlu", suite=["community"], hf_repo="MBZUAI/ArabicMMLU", - hf_subset="test", + hf_subset="default", hf_avail_splits=["test"], evaluation_splits=["test"], few_shots_split="test", From 89045744cecac01bf4054889874b5dd271f7f9c9 Mon Sep 17 00:00:00 2001 From: Abu Bakr Soliman Date: Thu, 11 Jul 2024 12:23:21 +0300 Subject: [PATCH 7/7] make mbzuai_arabic_mmlu callable for the prompt_function arg --- community_tasks/arabic_evals.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 5d798c3f..f5d81aa6 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -74,22 +74,6 @@ def mmlu_arabic(line, task_name: str = None): target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix], ) -# mbzuai_arabic_mmlu -mbzuai_arabic_mmlu_task = LightevalTaskConfig( - name="mbzuai_arabic_mmlu", - prompt_function="mbzuai_arabic_mmlu", - suite=["community"], - hf_repo="MBZUAI/ArabicMMLU", - hf_subset="default", - hf_avail_splits=["test"], - evaluation_splits=["test"], - few_shots_split="test", - few_shots_select="sequential", - metric=["loglikelihood_acc_norm"], - trust_dataset=True, - version=0, -) - def mbzuai_arabic_mmlu(line, task_name: str = None): topic = line["Subject"] instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n" @@ -114,6 +98,21 @@ def mbzuai_arabic_mmlu(line, task_name: str = None): target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix], ) +# mbzuai_arabic_mmlu +mbzuai_arabic_mmlu_task = LightevalTaskConfig( + name="mbzuai_arabic_mmlu", + prompt_function=mbzuai_arabic_mmlu, + suite=["community"], + hf_repo="MBZUAI/ArabicMMLU", + hf_subset="default", + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split="test", + few_shots_select="sequential", + metric=["loglikelihood_acc_norm"], + trust_dataset=True, + version=0, +) class CustomArabicMMLUTask(LightevalTaskConfig): def __init__(