From fbca143616c37f4336f80768cc4bdddb97bf3b06 Mon Sep 17 00:00:00 2001
From: 3 a l i <58257628+alielfilali01@users.noreply.github.com>
Date: Fri, 20 Dec 2024 22:12:34 +0400
Subject: [PATCH] Update arabic_evals.py: Fix custom arabic tasks [2nd attempt]
 (#444)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix alghafa prompt function by explicitly determining the list of choices based on task_name.
(Not all subsets of AlGhafa Native share same columns)

---------

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
---
 .github/workflows/trufflehog.yml                     |  1 -
 community_tasks/arabic_evals.py                      |  7 ++-----
 docs/source/adding-a-new-metric.mdx                  |  1 -
 .../contributing-to-multilingual-evaluations.mdx     | 12 ++++++------
 docs/source/using-the-python-api.mdx                 |  2 +-
 5 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
index 8ac08ad65..ecdca01de 100644
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@@ -16,4 +16,3 @@ jobs:
         fetch-depth: 0
     - name: Secret Scanning
       uses: trufflesecurity/trufflehog@main
-
diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index 4408f22fa..86ab69e28 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -86,7 +86,6 @@ def arabic_mmlu_pfn(line, task_name: str = None):
         choices=valid_keys_arabic,  # Return only valid choices (Arabic keys)
         gold_index=answer_index,  # Correct index in the valid Arabic keys
         instruction=instruction,
-        target_for_fewshot_sorting=valid_keys_arabic[answer_index],  # Correct answer in Arabic form
     )
 
 
@@ -149,7 +148,6 @@ def arabic_mmlu_ht_pfn(line, task_name: str = None):
         choices=[str(i) for i in range(1, len(choices) + 1)],  # List of strings instead of ints
         gold_index=answer_index,
         instruction=instruction,
-        target_for_fewshot_sorting=str(answer_index),  # Assuming it's sorted based on the number
     )
 
 
@@ -328,7 +326,6 @@ def aratrust_pfn(line, task_name: str = None):
         choices=LETTER_INDICES_AR[:3],
         gold_index=answer_index,
         instruction=instruction,
-        target_for_fewshot_sorting=LETTER_INDICES_AR[answer_index],
     )
 
 
@@ -413,7 +410,8 @@ def arabic_exams_pfn(line, task_name: str = None):
 def alghafa_pfn(line, task_name: str = None):
     question = line["query"]
     answer_index = int(line["label"])
-    choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]]
+    allowed_keys = [f"sol{i}" for i in range(1, 6)]
+    choices = [line[key] for key in allowed_keys if key in line]
 
     instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
     query = f"{instruction}السؤال: {question}\n"
@@ -802,7 +800,6 @@ def madinah_qa_pfn(line, task_name: str = None):
         choices=choices,
         gold_index=answer_index,  # Correct index in the valid keys
         instruction=instruction,
-        target_for_fewshot_sorting=valid_keys_latin[answer_index],  # Correct answer in Latin form
     )
 
 
diff --git a/docs/source/adding-a-new-metric.mdx b/docs/source/adding-a-new-metric.mdx
index 35fc975f8..6433d5883 100644
--- a/docs/source/adding-a-new-metric.mdx
+++ b/docs/source/adding-a-new-metric.mdx
@@ -92,4 +92,3 @@ if __name__ == "__main__":
 
 You can then give your custom metric to lighteval by using `--custom-tasks
 path_to_your_file` when launching it.
-
diff --git a/docs/source/contributing-to-multilingual-evaluations.mdx b/docs/source/contributing-to-multilingual-evaluations.mdx
index 0d0855d75..4db1c935b 100644
--- a/docs/source/contributing-to-multilingual-evaluations.mdx
+++ b/docs/source/contributing-to-multilingual-evaluations.mdx
@@ -8,7 +8,7 @@ We welcome translations in your language!
 
 To contribute, you'll need to
 1. Open the [translation_literals](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py) file
-2. Edit the file to add or expand the literal for your language of interest. 
+2. Edit the file to add or expand the literal for your language of interest.
 
 ```python
     Language.ENGLISH: TranslationLiterals(
@@ -42,7 +42,7 @@ To contribute, you'll need to
 
 ## Contributing a new multilingual task
 
-You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use. 
+You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
 
 Then, you should take a look at the current [multilingual tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py) file, to understand how they are defined. For multilingual evaluations the `prompt_function` should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation.
 
@@ -58,7 +58,7 @@ your_tasks = [
     LightevalTaskConfig(
         # Name of your evaluation
         name=f"evalname_{language.value}_{formulation.name.lower()}",
-        # The evaluation is community contributed 
+        # The evaluation is community contributed
         suite=["community"],
         # This will automatically get the correct metrics for your chosen formulation
         metric=get_metrics_for_formulation(
@@ -72,7 +72,7 @@ your_tasks = [
         # In this function, you choose which template to follow and for which language and formulation
         prompt_function=get_template_prompt_function(
             language=language,
-            # then use the adapter to define the mapping between the 
+            # then use the adapter to define the mapping between the
             # keys of the template (left), and the keys of your dataset
             # (right)
             # To know which template keys are required and available,
@@ -83,9 +83,9 @@ your_tasks = [
             },
             formulation=formulation,
         ),
-        # You can also add specific filters to remove irrelevant samples 
+        # You can also add specific filters to remove irrelevant samples
         hf_filter=lambda line: line["label"] in <condition>,
-        # You then select your huggingface dataset as well as 
+        # You then select your huggingface dataset as well as
         # the splits available for evaluation
         hf_repo=<dataset>,
         hf_subset=<subset>,
diff --git a/docs/source/using-the-python-api.mdx b/docs/source/using-the-python-api.mdx
index 8c44050f4..583da5f54 100644
--- a/docs/source/using-the-python-api.mdx
+++ b/docs/source/using-the-python-api.mdx
@@ -35,7 +35,7 @@ def main():
         env_config=EnvConfig(cache_dir="tmp/"),
         # Remove the 2 parameters below once your configuration is tested
         override_batch_size=1,
-        max_samples=10 
+        max_samples=10
     )
 
     model_config = VLLMModelConfig(