Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ar support for MBZUAI-arabic-mmlu #209

Closed
wants to merge 11 commits into from
40 changes: 40 additions & 0 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,45 @@ def mmlu_arabic(line, task_name: str = None):
target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix],
)

def mbzuai_arabic_mmlu(line, task_name: str = None):
bakrianoo marked this conversation as resolved.
Show resolved Hide resolved
topic = line["Subject"]
instruction = f"الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح حول {topic.replace('_', ' ')}. \n\n"
choices = [line["Option 1"], line["Option 2"],
line["Option 3"], line["Option 4"],
line["Option 5"]]

# Answers are provided with roman letters - we look for the correct index in LETTER_INDICES,
# it will then be applied to arabic letters
gold_ix = LETTER_INDICES.index(line["Answer Key"])

query = f"{instruction}{line['Question']}\n"
query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES_AR[:5], choices)])
query += "الإجابة:"

return Doc(
task_name=task_name,
query=query,
choices=LETTER_INDICES_AR[:5],
gold_index=gold_ix,
instruction=instruction,
target_for_fewshot_sorting=LETTER_INDICES_AR[gold_ix],
)

# mbzuai_arabic_mmlu
mbzuai_arabic_mmlu_task = LightevalTaskConfig(
name="mbzuai_arabic_mmlu",
prompt_function=mbzuai_arabic_mmlu,
suite=["community"],
hf_repo="MBZUAI/ArabicMMLU",
hf_subset="default",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default is the same as test subset

hf_avail_splits=["test"],
evaluation_splits=["test"],
few_shots_split="test",
few_shots_select="sequential",
metric=["loglikelihood_acc_norm"],
trust_dataset=True,
version=0,
)

class CustomArabicMMLUTask(LightevalTaskConfig):
def __init__(
Expand Down Expand Up @@ -594,6 +633,7 @@ def sciq_prompt_arabic(line, task_name: str = None):
+ [hellaswag_okapi_ar_task]
+ [toxigen_ar_task]
+ [sciq_ar_task]
+ [mbzuai_arabic_mmlu_task]
)

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions examples/tasks/OALL_tasks.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,4 @@ community|copa_ext_ar|5|1
community|hellaswag_okapi_ar|5|1
community|toxigen_ar|5|1
community|sciq_ar|5|1
community|mbzuai_arabic_mmlu|5|1
1 change: 1 addition & 0 deletions examples/tasks/all_arabic_tasks.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,4 @@ community|copa_ext_ar|5|1
community|hellaswag_okapi_ar|5|1
community|toxigen_ar|5|1
community|sciq_ar|5|1
community|mbzuai_arabic_mmlu|5|1
Loading