From fbca143616c37f4336f80768cc4bdddb97bf3b06 Mon Sep 17 00:00:00 2001 From: 3 a l i <58257628+alielfilali01@users.noreply.github.com> Date: Fri, 20 Dec 2024 22:12:34 +0400 Subject: [PATCH] Update arabic_evals.py: Fix custom arabic tasks [2nd attempt] (#444) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix alghafa prompt function by explicitly determining the list of choices based on task_name. (Not all subsets of AlGhafa Native share same columns) --------- Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com> Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- .github/workflows/trufflehog.yml | 1 - community_tasks/arabic_evals.py | 7 ++----- docs/source/adding-a-new-metric.mdx | 1 - .../contributing-to-multilingual-evaluations.mdx | 12 ++++++------ docs/source/using-the-python-api.mdx | 2 +- 5 files changed, 9 insertions(+), 14 deletions(-) diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml index 8ac08ad65..ecdca01de 100644 --- a/.github/workflows/trufflehog.yml +++ b/.github/workflows/trufflehog.yml @@ -16,4 +16,3 @@ jobs: fetch-depth: 0 - name: Secret Scanning uses: trufflesecurity/trufflehog@main - diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 4408f22fa..86ab69e28 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -86,7 +86,6 @@ def arabic_mmlu_pfn(line, task_name: str = None): choices=valid_keys_arabic, # Return only valid choices (Arabic keys) gold_index=answer_index, # Correct index in the valid Arabic keys instruction=instruction, - target_for_fewshot_sorting=valid_keys_arabic[answer_index], # Correct answer in Arabic form ) @@ -149,7 +148,6 @@ def arabic_mmlu_ht_pfn(line, task_name: str = None): choices=[str(i) for i in range(1, len(choices) + 1)], # List of strings instead of ints gold_index=answer_index, instruction=instruction, - target_for_fewshot_sorting=str(answer_index), # Assuming it's sorted based on the number ) @@ -328,7 +326,6 @@ def aratrust_pfn(line, task_name: str = None): choices=LETTER_INDICES_AR[:3], gold_index=answer_index, instruction=instruction, - target_for_fewshot_sorting=LETTER_INDICES_AR[answer_index], ) @@ -413,7 +410,8 @@ def arabic_exams_pfn(line, task_name: str = None): def alghafa_pfn(line, task_name: str = None): question = line["query"] answer_index = int(line["label"]) - choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]] + allowed_keys = [f"sol{i}" for i in range(1, 6)] + choices = [line[key] for key in allowed_keys if key in line] instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n" query = f"{instruction}السؤال: {question}\n" @@ -802,7 +800,6 @@ def madinah_qa_pfn(line, task_name: str = None): choices=choices, gold_index=answer_index, # Correct index in the valid keys instruction=instruction, - target_for_fewshot_sorting=valid_keys_latin[answer_index], # Correct answer in Latin form ) diff --git a/docs/source/adding-a-new-metric.mdx b/docs/source/adding-a-new-metric.mdx index 35fc975f8..6433d5883 100644 --- a/docs/source/adding-a-new-metric.mdx +++ b/docs/source/adding-a-new-metric.mdx @@ -92,4 +92,3 @@ if __name__ == "__main__": You can then give your custom metric to lighteval by using `--custom-tasks path_to_your_file` when launching it. - diff --git a/docs/source/contributing-to-multilingual-evaluations.mdx b/docs/source/contributing-to-multilingual-evaluations.mdx index 0d0855d75..4db1c935b 100644 --- a/docs/source/contributing-to-multilingual-evaluations.mdx +++ b/docs/source/contributing-to-multilingual-evaluations.mdx @@ -8,7 +8,7 @@ We welcome translations in your language! To contribute, you'll need to 1. Open the [translation_literals](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py) file -2. Edit the file to add or expand the literal for your language of interest. +2. Edit the file to add or expand the literal for your language of interest. ```python Language.ENGLISH: TranslationLiterals( @@ -42,7 +42,7 @@ To contribute, you'll need to ## Contributing a new multilingual task -You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use. +You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use. Then, you should take a look at the current [multilingual tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py) file, to understand how they are defined. For multilingual evaluations the `prompt_function` should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation. @@ -58,7 +58,7 @@ your_tasks = [ LightevalTaskConfig( # Name of your evaluation name=f"evalname_{language.value}_{formulation.name.lower()}", - # The evaluation is community contributed + # The evaluation is community contributed suite=["community"], # This will automatically get the correct metrics for your chosen formulation metric=get_metrics_for_formulation( @@ -72,7 +72,7 @@ your_tasks = [ # In this function, you choose which template to follow and for which language and formulation prompt_function=get_template_prompt_function( language=language, - # then use the adapter to define the mapping between the + # then use the adapter to define the mapping between the # keys of the template (left), and the keys of your dataset # (right) # To know which template keys are required and available, @@ -83,9 +83,9 @@ your_tasks = [ }, formulation=formulation, ), - # You can also add specific filters to remove irrelevant samples + # You can also add specific filters to remove irrelevant samples hf_filter=lambda line: line["label"] in , - # You then select your huggingface dataset as well as + # You then select your huggingface dataset as well as # the splits available for evaluation hf_repo=, hf_subset=, diff --git a/docs/source/using-the-python-api.mdx b/docs/source/using-the-python-api.mdx index 8c44050f4..583da5f54 100644 --- a/docs/source/using-the-python-api.mdx +++ b/docs/source/using-the-python-api.mdx @@ -35,7 +35,7 @@ def main(): env_config=EnvConfig(cache_dir="tmp/"), # Remove the 2 parameters below once your configuration is tested override_batch_size=1, - max_samples=10 + max_samples=10 ) model_config = VLLMModelConfig(