From 02486b3ff7e29b191cf5e124e0131d7a7f2af1fd Mon Sep 17 00:00:00 2001
From: Abu Bakr Soliman <bakrianoo@gmail.com>
Date: Wed, 3 Jul 2024 08:04:40 +0300
Subject: [PATCH 1/7] Configure MBZUAI_ArabicMMLU Arabic Task

---
 community_tasks/arabic_evals.py     | 60 +++++++++++++++++++++++++++++
 examples/tasks/all_arabic_tasks.txt |  1 +
 2 files changed, 61 insertions(+)

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index 9e65bade..1287f3bb 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -105,6 +105,65 @@ def mmlu_arabic(line, task_name: str = None):
         target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix],
     )
 
+# mbzuai_arabic_mmlu #
+
+# fmt: off
+MBZUAI_ArabicMMLU_SUBSETS = ["test"]
+# fmt: on
+
+class CustomMBZUAIArabicMMLU(LightevalTaskConfig):
+    def __init__(
+        self,
+        name,
+        hf_subset,
+    ):
+        super().__init__(
+            name=name,
+            hf_subset=hf_subset,
+            prompt_function="mbzuai_arabic_mmlu",
+            hf_repo="MBZUAI/ArabicMMLU",
+            metric=["loglikelihood_acc_norm"],
+            hf_avail_splits=["test"],
+            evaluation_splits=["test"],
+            few_shots_split="test",
+            few_shots_select="sequential",
+            suite=["community"],
+            generation_size=-1,
+            stop_sequence=None,
+            output_regex=None,
+            frozen=False,
+            trust_dataset=True,
+            version=0,
+        )
+
+MBZUAI_ArabicMMLU_TASKS = [
+    CustomMBZUAIArabicMMLU(name=f"mbzuai_arabic_mmlu:{subset}", hf_subset=subset) for subset in MBZUAI_ArabicMMLU_SUBSETS
+]
+
+def mbzuai_mmlu_arabic(line, task_name: str = None):
+    topic = line["Subject"]
+    instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n"
+    choices = [line["Option 1"], line["Option 2"],
+               line["Option 3"], line["Option 4"],
+               line["Option 5"]]
+    
+    # Answers are provided with roman letters - we look for the correct index in LETTER_INDICES,
+    # it will then be applied to arabic letters
+    gold_ix = LETTER_INDICES.index(line["Answer Key"])
+
+    query = f"{instruction}{line['Question']}\n"
+    query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES_AR[:5], choices)])
+    query += "الإجابة:"
+
+    return Doc(
+        task_name=task_name,
+        query=query,
+        choices=LETTER_INDICES_AR[:5],
+        gold_index=gold_ix,
+        instruction=instruction,
+        target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix],
+    )
+
 
 # ACVA ##
 # fmt: off
@@ -593,6 +652,7 @@ def sciq_prompt_arabic(line, task_name: str = None):
     + [hellaswag_okapi_ar_task]
     + [toxigen_ar_task]
     + [sciq_ar_task]
+    + MBZUAI_ArabicMMLU_TASKS
 )
 
 # Convert to dict for lighteval
diff --git a/examples/tasks/all_arabic_tasks.txt b/examples/tasks/all_arabic_tasks.txt
index fa430ed1..7d5b0aa4 100644
--- a/examples/tasks/all_arabic_tasks.txt
+++ b/examples/tasks/all_arabic_tasks.txt
@@ -135,3 +135,4 @@ community|copa_ext_ar|5|1
 community|hellaswag_okapi_ar|5|1
 community|toxigen_ar|5|1
 community|sciq_ar|5|1
+community|mbzuai_arabic_mmlu|5|1

From 5686f30a2398745fdb2a19ef6a92b8a3f365a81b Mon Sep 17 00:00:00 2001
From: Abu Bakr Soliman <bakrianoo@gmail.com>
Date: Wed, 3 Jul 2024 09:34:46 +0300
Subject: [PATCH 2/7] push community|mbzuai_arabic_mmlu to OALL_tasks

---
 examples/tasks/OALL_tasks.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/tasks/OALL_tasks.txt b/examples/tasks/OALL_tasks.txt
index 346d062c..7d3a8248 100644
--- a/examples/tasks/OALL_tasks.txt
+++ b/examples/tasks/OALL_tasks.txt
@@ -134,3 +134,4 @@ community|copa_ext_ar|5|1
 community|hellaswag_okapi_ar|5|1
 community|toxigen_ar|5|1
 community|sciq_ar|5|1
+community|mbzuai_arabic_mmlu|5|1

From 7c989ad72dfb3601d8280aa7476286f6bd883e6f Mon Sep 17 00:00:00 2001
From: Abu Bakr Soliman <bakrianoo@gmail.com>
Date: Wed, 3 Jul 2024 09:37:15 +0300
Subject: [PATCH 3/7] fix function name

---
 community_tasks/arabic_evals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index 1287f3bb..23a8cb9c 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -140,7 +140,7 @@ def __init__(
     CustomMBZUAIArabicMMLU(name=f"mbzuai_arabic_mmlu:{subset}", hf_subset=subset) for subset in MBZUAI_ArabicMMLU_SUBSETS
 ]
 
-def mbzuai_mmlu_arabic(line, task_name: str = None):
+def mbzuai_arabic_mmlu(line, task_name: str = None):
     topic = line["Subject"]
     instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n"
     choices = [line["Option 1"], line["Option 2"],

From 311d170ca500e189368361e6f8e67fd5612fb40e Mon Sep 17 00:00:00 2001
From: Abu Bakr Soliman <bakrianoo@gmail.com>
Date: Wed, 3 Jul 2024 09:47:42 +0300
Subject: [PATCH 4/7] change mbzuai_arabic_mmlu definition

---
 community_tasks/arabic_evals.py | 51 +++++++++++----------------------
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index 23a8cb9c..a751f9b9 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -105,40 +105,21 @@ def mmlu_arabic(line, task_name: str = None):
         target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix],
     )
 
-# mbzuai_arabic_mmlu #
-
-# fmt: off
-MBZUAI_ArabicMMLU_SUBSETS = ["test"]
-# fmt: on
-
-class CustomMBZUAIArabicMMLU(LightevalTaskConfig):
-    def __init__(
-        self,
-        name,
-        hf_subset,
-    ):
-        super().__init__(
-            name=name,
-            hf_subset=hf_subset,
-            prompt_function="mbzuai_arabic_mmlu",
-            hf_repo="MBZUAI/ArabicMMLU",
-            metric=["loglikelihood_acc_norm"],
-            hf_avail_splits=["test"],
-            evaluation_splits=["test"],
-            few_shots_split="test",
-            few_shots_select="sequential",
-            suite=["community"],
-            generation_size=-1,
-            stop_sequence=None,
-            output_regex=None,
-            frozen=False,
-            trust_dataset=True,
-            version=0,
-        )
-
-MBZUAI_ArabicMMLU_TASKS = [
-    CustomMBZUAIArabicMMLU(name=f"mbzuai_arabic_mmlu:{subset}", hf_subset=subset) for subset in MBZUAI_ArabicMMLU_SUBSETS
-]
+# mbzuai_arabic_mmlu
+mbzuai_arabic_mmlu_task = LightevalTaskConfig(
+    name="mbzuai_arabic_mmlu",
+    prompt_function="mbzuai_arabic_mmlu",
+    suite=["community"],
+    hf_repo="MBZUAI/ArabicMMLU",
+    hf_subset="test",
+    hf_avail_splits=["test"],
+    evaluation_splits=["test"],
+    few_shots_split="validation",
+    few_shots_select="sequential",
+    metric=["loglikelihood_acc_norm"],
+    trust_dataset=True,
+    version=0,
+)
 
 def mbzuai_arabic_mmlu(line, task_name: str = None):
     topic = line["Subject"]
@@ -652,7 +633,7 @@ def sciq_prompt_arabic(line, task_name: str = None):
     + [hellaswag_okapi_ar_task]
     + [toxigen_ar_task]
     + [sciq_ar_task]
-    + MBZUAI_ArabicMMLU_TASKS
+    + [mbzuai_arabic_mmlu_task]
 )
 
 # Convert to dict for lighteval

From 0a3e338ab15a4212517d39c79e590ab9fc048436 Mon Sep 17 00:00:00 2001
From: Abu Bakr Soliman <bakrianoo@gmail.com>
Date: Wed, 3 Jul 2024 09:49:20 +0300
Subject: [PATCH 5/7] fix few_shots_split for mbzuai_arabic_mmlu_task

---
 community_tasks/arabic_evals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index a751f9b9..e487d0e5 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -114,7 +114,7 @@ def mmlu_arabic(line, task_name: str = None):
     hf_subset="test",
     hf_avail_splits=["test"],
     evaluation_splits=["test"],
-    few_shots_split="validation",
+    few_shots_split="test",
     few_shots_select="sequential",
     metric=["loglikelihood_acc_norm"],
     trust_dataset=True,

From 86a662b9054ad73825b0d649b10f000836873150 Mon Sep 17 00:00:00 2001
From: Abu Bakr Soliman <bakrianoo@gmail.com>
Date: Wed, 3 Jul 2024 09:51:08 +0300
Subject: [PATCH 6/7] update hf_subset for mbzuai_arabic_mmlu_task

---
 community_tasks/arabic_evals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index e487d0e5..e48db4f6 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -111,7 +111,7 @@ def mmlu_arabic(line, task_name: str = None):
     prompt_function="mbzuai_arabic_mmlu",
     suite=["community"],
     hf_repo="MBZUAI/ArabicMMLU",
-    hf_subset="test",
+    hf_subset="default",
     hf_avail_splits=["test"],
     evaluation_splits=["test"],
     few_shots_split="test",

From 89045744cecac01bf4054889874b5dd271f7f9c9 Mon Sep 17 00:00:00 2001
From: Abu Bakr Soliman <bakrianoo@gmail.com>
Date: Thu, 11 Jul 2024 12:23:21 +0300
Subject: [PATCH 7/7] make mbzuai_arabic_mmlu callable for the prompt_function
 arg

---
 community_tasks/arabic_evals.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index 5d798c3f..f5d81aa6 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -74,22 +74,6 @@ def mmlu_arabic(line, task_name: str = None):
         target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix],
     )
 
-# mbzuai_arabic_mmlu
-mbzuai_arabic_mmlu_task = LightevalTaskConfig(
-    name="mbzuai_arabic_mmlu",
-    prompt_function="mbzuai_arabic_mmlu",
-    suite=["community"],
-    hf_repo="MBZUAI/ArabicMMLU",
-    hf_subset="default",
-    hf_avail_splits=["test"],
-    evaluation_splits=["test"],
-    few_shots_split="test",
-    few_shots_select="sequential",
-    metric=["loglikelihood_acc_norm"],
-    trust_dataset=True,
-    version=0,
-)
-
 def mbzuai_arabic_mmlu(line, task_name: str = None):
     topic = line["Subject"]
     instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n"
@@ -114,6 +98,21 @@ def mbzuai_arabic_mmlu(line, task_name: str = None):
         target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix],
     )
 
+# mbzuai_arabic_mmlu
+mbzuai_arabic_mmlu_task = LightevalTaskConfig(
+    name="mbzuai_arabic_mmlu",
+    prompt_function=mbzuai_arabic_mmlu,
+    suite=["community"],
+    hf_repo="MBZUAI/ArabicMMLU",
+    hf_subset="default",
+    hf_avail_splits=["test"],
+    evaluation_splits=["test"],
+    few_shots_split="test",
+    few_shots_select="sequential",
+    metric=["loglikelihood_acc_norm"],
+    trust_dataset=True,
+    version=0,
+)
 
 class CustomArabicMMLUTask(LightevalTaskConfig):
     def __init__(