From e27aa0e6c07bce65320bc31509968e50c6020565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sini=C5=A1a=20Stanivuk?= <51213388+Stopwolf@users.noreply.github.com> Date: Wed, 16 Oct 2024 09:48:30 +0200 Subject: [PATCH] Adding OZ Eval task (#225) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --------- Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com> Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- community_tasks/oz_evals.py | 94 +++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 community_tasks/oz_evals.py diff --git a/community_tasks/oz_evals.py b/community_tasks/oz_evals.py new file mode 100644 index 000000000..6252a20a0 --- /dev/null +++ b/community_tasks/oz_evals.py @@ -0,0 +1,94 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# ruff: noqa: F405, F403, F401 +""" +Custom evaluation tasks for lighteval. + +This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval. + +OZ Eval (sr. Opšte Znanje Evaluacija) dataset was created for the purposes of evaluating General Knowledge of LLM models in Serbian language. +Data consists of 1k+ high-quality questions and answers which were used as part of entry exams at the Faculty of Philosophy and Faculty of Organizational Sciences, University of Belgrade. +The exams test the General Knowledge of students and were used in the enrollment periods from 2003 to 2024. +For more details and results see: https://huggingface.co/datasets/DjMel/oz-eval + +In order to have comparable results to ours, please do not forget to run with --use_chat_template +""" + +from lighteval.metrics.metrics import Metrics +from lighteval.tasks.lighteval_task import LightevalTaskConfig +from lighteval.tasks.requests import Doc + + +def prompt_fn_oz_eval_task(line, task_name: str = None): + query_template = """Pitanje: {question}\n + Ponuđeni odgovori: + A. {choice_a} + B. {choice_b} + C. {choice_c} + D. {choice_d} + E. {choice_e} + + Krajnji odgovor:""" + + options = line["options"] + + query = query_template.format( + question=line["questions"], + choice_a=options[0], + choice_b=options[1], + choice_c=options[2], + choice_d=options[3], + choice_e=options[4], + ) + + choices = ["A", "B", "C", "D", "E"] + return Doc( + task_name=task_name, + query=query, + choices=choices, + gold_index=choices.index(line["answer"]), + ) + + +oz_eval_task = LightevalTaskConfig( + name="serbian_evals:oz_task", + prompt_function=prompt_fn_oz_eval_task, + suite=["community"], + hf_repo="DjMel/oz-eval", + hf_subset="default", + hf_avail_splits=["test"], + evaluation_splits=["test"], + few_shots_split=None, + few_shots_select=None, + metric=[Metrics.loglikelihood_acc], + version=0, +) + + +# STORE YOUR EVALS +TASKS_TABLE = [oz_eval_task] + + +if __name__ == "__main__": + print(t["name"] for t in TASKS_TABLE) + print(len(TASKS_TABLE))