diff --git a/scripts/eval/yamls/coding_tasks.yaml b/scripts/eval/yamls/coding_tasks.yaml index 0382b48d5b..524c260bbb 100644 --- a/scripts/eval/yamls/coding_tasks.yaml +++ b/scripts/eval/yamls/coding_tasks.yaml @@ -5,6 +5,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_cpp @@ -12,6 +13,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_js @@ -19,6 +21,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_return_simple @@ -26,6 +29,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_return_complex @@ -33,6 +37,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_25 @@ -40,6 +45,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_50 @@ -47,6 +53,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_75 @@ -54,4 +61,5 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation diff --git a/scripts/eval/yamls/tasks.yaml b/scripts/eval/yamls/tasks.yaml index 7481ada872..b48b30d796 100644 --- a/scripts/eval/yamls/tasks.yaml +++ b/scripts/eval/yamls/tasks.yaml @@ -179,6 +179,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_cpp @@ -186,6 +187,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_js @@ -193,6 +195,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_return_simple @@ -200,12 +203,15 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 + icl_task_type: code_evaluation - label: human_eval_return_complex dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_25 @@ -213,6 +219,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_50 @@ -220,6 +227,7 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 + batch_size: 1 icl_task_type: code_evaluation - label: human_eval_75 @@ -227,4 +235,5 @@ icl_tasks: num_fewshot: [0] pass_at_k: 1 num_beams: 20 - icl_task_type: code_evaluation + batch_size: 1 + icl_task_type: code_evaluation \ No newline at end of file