diff --git a/scripts/eval/yamls/tasks.yaml b/scripts/eval/yamls/tasks.yaml index e1f65f74c0..2d2a8caae0 100644 --- a/scripts/eval/yamls/tasks.yaml +++ b/scripts/eval/yamls/tasks.yaml @@ -1,4 +1,77 @@ icl_tasks: +- + label: human_eval_execution_prediction + dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [3] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_execution_prediction +- + label: human_eval + dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation +- + label: human_eval_cpp + dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation +- + label: human_eval_js + dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation +- + label: human_eval_return_simple + dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation +- + label: human_eval_return_complex + dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation +- + label: human_eval_25 + dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation +- + label: human_eval_50 + dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation +- + label: human_eval_75 + dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI + num_fewshot: [0] + pass_at_k: 1 + num_beams: 10 + batch_size: 1 + icl_task_type: code_evaluation + - label: jeopardy dataset_uri: eval/local_data/world_knowledge/jeopardy_all.jsonl # ADD YOUR OWN DATASET URI @@ -264,75 +337,3 @@ icl_tasks: num_fewshot: [3] icl_task_type: multiple_choice has_categories: true -- - label: human_eval_execution_prediction - dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [3] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_execution_prediction -- - label: human_eval - dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation -- - label: human_eval_cpp - dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation -- - label: human_eval_js - dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation -- - label: human_eval_return_simple - dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation -- - label: human_eval_return_complex - dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation -- - label: human_eval_25 - dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation -- - label: human_eval_50 - dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation -- - label: human_eval_75 - dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI - num_fewshot: [0] - pass_at_k: 1 - num_beams: 10 - batch_size: 1 - icl_task_type: code_evaluation