diff --git a/scripts/eval/yamls/long_context_tasks.yaml b/scripts/eval/yamls/long_context_tasks.yaml index daf958a340..153e3b9df6 100644 --- a/scripts/eval/yamls/long_context_tasks.yaml +++ b/scripts/eval/yamls/long_context_tasks.yaml @@ -3,7 +3,7 @@ icl_tasks: label: kv_pairs_beginning_2k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 2048 @@ -13,7 +13,7 @@ icl_tasks: label: kv_pairs_middle_2k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 2048 @@ -23,7 +23,7 @@ icl_tasks: label: kv_pairs_end_2k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 2048 @@ -33,7 +33,7 @@ icl_tasks: label: kv_pairs_beginning_4k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 4096 @@ -43,7 +43,7 @@ icl_tasks: label: kv_pairs_middle_4k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 4096 @@ -53,7 +53,7 @@ icl_tasks: label: kv_pairs_end_4k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 4096 @@ -63,7 +63,7 @@ icl_tasks: label: kv_pairs_beginning_8k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 8192 @@ -73,7 +73,7 @@ icl_tasks: label: kv_pairs_middle_8k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 8192 @@ -83,7 +83,7 @@ icl_tasks: label: kv_pairs_end_8k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: kv_pairs context_length: 8192 @@ -93,7 +93,7 @@ icl_tasks: label: wikiqa_2k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: wikiqa context_length: 2048 @@ -102,7 +102,7 @@ icl_tasks: label: wikiqa_4k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: wikiqa context_length: 2048 @@ -111,7 +111,7 @@ icl_tasks: label: wikiqa_8k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: wikiqa context_length: 2048 @@ -120,7 +120,7 @@ icl_tasks: label: hotpotqa_beginning_2k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 2048 @@ -130,7 +130,7 @@ icl_tasks: label: hotpotqa_middle_2k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 2048 @@ -140,7 +140,7 @@ icl_tasks: label: hotpotqa_end_2k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 2048 @@ -150,7 +150,7 @@ icl_tasks: label: hotpotqa_beginning_4k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 4096 @@ -160,7 +160,7 @@ icl_tasks: label: hotpotqa_middle_4k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 4096 @@ -170,7 +170,7 @@ icl_tasks: label: hotpotqa_end_4k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 4096 @@ -180,7 +180,7 @@ icl_tasks: label: hotpotqa_beginning_8k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 8192 @@ -190,7 +190,7 @@ icl_tasks: label: hotpotqa_middle_8k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 8192 @@ -200,7 +200,7 @@ icl_tasks: label: hotpotqa_end_8k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 8192 @@ -210,7 +210,7 @@ icl_tasks: label: hotpotqa_beginning_16k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 16384 @@ -220,7 +220,7 @@ icl_tasks: label: hotpotqa_beginning_32k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 32768 @@ -230,7 +230,7 @@ icl_tasks: label: hotpotqa_beginning_64k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 65536 @@ -240,7 +240,7 @@ icl_tasks: label: hotpotqa_middle_16k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 16384 @@ -250,7 +250,7 @@ icl_tasks: label: hotpotqa_middle_32k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 32768 @@ -260,7 +260,7 @@ icl_tasks: label: hotpotqa_middle_64k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 65536 @@ -270,7 +270,7 @@ icl_tasks: label: hotpotqa_end_16k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 16384 @@ -280,7 +280,7 @@ icl_tasks: label: hotpotqa_end_32k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 32768 @@ -290,7 +290,7 @@ icl_tasks: label: hotpotqa_end_64k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 65536 @@ -300,7 +300,7 @@ icl_tasks: label: kv_pairs_beginning_16k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 16384 @@ -310,7 +310,7 @@ icl_tasks: label: kv_pairs_beginning_32k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 32768 @@ -320,7 +320,7 @@ icl_tasks: label: kv_pairs_beginning_64k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 65536 @@ -330,7 +330,7 @@ icl_tasks: label: kv_pairs_middle_16k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 16384 @@ -340,7 +340,7 @@ icl_tasks: label: kv_pairs_middle_32k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 32768 @@ -350,7 +350,7 @@ icl_tasks: label: kv_pairs_middle_64k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 65536 @@ -360,7 +360,7 @@ icl_tasks: label: kv_pairs_end_16k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 16384 @@ -370,7 +370,7 @@ icl_tasks: label: kv_pairs_end_32k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 32768 @@ -380,7 +380,7 @@ icl_tasks: label: kv_pairs_end_64k dataset_uri: hf://mosaicml/long_context_eval num_fewshot: [0] - icl_task_type: question_answering + icl_task_type: generation_task_with_answers hf_loading_vars: name: hotpotqa context_length: 65536