Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replacing icl_task_type question_answering with generation_task_with_answers in long context eval yamls. #1250

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 39 additions & 39 deletions scripts/eval/yamls/long_context_tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ icl_tasks:
label: kv_pairs_beginning_2k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 2048
Expand All @@ -13,7 +13,7 @@ icl_tasks:
label: kv_pairs_middle_2k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 2048
Expand All @@ -23,7 +23,7 @@ icl_tasks:
label: kv_pairs_end_2k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 2048
Expand All @@ -33,7 +33,7 @@ icl_tasks:
label: kv_pairs_beginning_4k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 4096
Expand All @@ -43,7 +43,7 @@ icl_tasks:
label: kv_pairs_middle_4k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 4096
Expand All @@ -53,7 +53,7 @@ icl_tasks:
label: kv_pairs_end_4k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 4096
Expand All @@ -63,7 +63,7 @@ icl_tasks:
label: kv_pairs_beginning_8k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 8192
Expand All @@ -73,7 +73,7 @@ icl_tasks:
label: kv_pairs_middle_8k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 8192
Expand All @@ -83,7 +83,7 @@ icl_tasks:
label: kv_pairs_end_8k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: kv_pairs
context_length: 8192
Expand All @@ -93,7 +93,7 @@ icl_tasks:
label: wikiqa_2k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: wikiqa
context_length: 2048
Expand All @@ -102,7 +102,7 @@ icl_tasks:
label: wikiqa_4k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: wikiqa
context_length: 2048
Expand All @@ -111,7 +111,7 @@ icl_tasks:
label: wikiqa_8k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: wikiqa
context_length: 2048
Expand All @@ -120,7 +120,7 @@ icl_tasks:
label: hotpotqa_beginning_2k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 2048
Expand All @@ -130,7 +130,7 @@ icl_tasks:
label: hotpotqa_middle_2k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 2048
Expand All @@ -140,7 +140,7 @@ icl_tasks:
label: hotpotqa_end_2k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 2048
Expand All @@ -150,7 +150,7 @@ icl_tasks:
label: hotpotqa_beginning_4k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 4096
Expand All @@ -160,7 +160,7 @@ icl_tasks:
label: hotpotqa_middle_4k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 4096
Expand All @@ -170,7 +170,7 @@ icl_tasks:
label: hotpotqa_end_4k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 4096
Expand All @@ -180,7 +180,7 @@ icl_tasks:
label: hotpotqa_beginning_8k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 8192
Expand All @@ -190,7 +190,7 @@ icl_tasks:
label: hotpotqa_middle_8k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 8192
Expand All @@ -200,7 +200,7 @@ icl_tasks:
label: hotpotqa_end_8k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 8192
Expand All @@ -210,7 +210,7 @@ icl_tasks:
label: hotpotqa_beginning_16k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 16384
Expand All @@ -220,7 +220,7 @@ icl_tasks:
label: hotpotqa_beginning_32k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 32768
Expand All @@ -230,7 +230,7 @@ icl_tasks:
label: hotpotqa_beginning_64k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 65536
Expand All @@ -240,7 +240,7 @@ icl_tasks:
label: hotpotqa_middle_16k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 16384
Expand All @@ -250,7 +250,7 @@ icl_tasks:
label: hotpotqa_middle_32k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 32768
Expand All @@ -260,7 +260,7 @@ icl_tasks:
label: hotpotqa_middle_64k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 65536
Expand All @@ -270,7 +270,7 @@ icl_tasks:
label: hotpotqa_end_16k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 16384
Expand All @@ -280,7 +280,7 @@ icl_tasks:
label: hotpotqa_end_32k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 32768
Expand All @@ -290,7 +290,7 @@ icl_tasks:
label: hotpotqa_end_64k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 65536
Expand All @@ -300,7 +300,7 @@ icl_tasks:
label: kv_pairs_beginning_16k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 16384
Expand All @@ -310,7 +310,7 @@ icl_tasks:
label: kv_pairs_beginning_32k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 32768
Expand All @@ -320,7 +320,7 @@ icl_tasks:
label: kv_pairs_beginning_64k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 65536
Expand All @@ -330,7 +330,7 @@ icl_tasks:
label: kv_pairs_middle_16k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 16384
Expand All @@ -340,7 +340,7 @@ icl_tasks:
label: kv_pairs_middle_32k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 32768
Expand All @@ -350,7 +350,7 @@ icl_tasks:
label: kv_pairs_middle_64k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 65536
Expand All @@ -360,7 +360,7 @@ icl_tasks:
label: kv_pairs_end_16k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 16384
Expand All @@ -370,7 +370,7 @@ icl_tasks:
label: kv_pairs_end_32k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 32768
Expand All @@ -380,7 +380,7 @@ icl_tasks:
label: kv_pairs_end_64k
dataset_uri: hf://mosaicml/long_context_eval
num_fewshot: [0]
icl_task_type: question_answering
icl_task_type: generation_task_with_answers
hf_loading_vars:
name: hotpotqa
context_length: 65536
Expand Down
Loading