Skip to content

Commit

Permalink
add medical train
Browse files Browse the repository at this point in the history
  • Loading branch information
qbc2016 committed Apr 11, 2024
1 parent 7f08694 commit 84673cc
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions federatedscope/llm/dataloader/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,19 @@ def load_llm_dataset(config=None, **kwargs):
list_data_dict[i]['output'] = \
list_data_dict[i]['output'].replace('####', 'The answer is')
dataset = LLMDataset(list_data_dict, tokenizer)
elif dataset_name.lower() == "medical_tc":
fp = os.path.join(config.data.root, 'medical_tc_train.jsonl')
if not os.path.exists(fp):
download_url(
'https://federatedscope.oss-cn-beijing.aliyuncs.com/FS-LLM'
'/medical_tc_train.jsonl', config.data.root)
os.rename(os.path.join(config.data.root, 'train.jsonl'), fp)
list_data_dict = load_jsonl(fp,
instruction='question',
input='input',
output='output',
category='output')
dataset = LLMDataset(list_data_dict, tokenizer)
elif dataset_name.lower() == 'code_search_net':
from tqdm import tqdm
from federatedscope.llm.dataset.code_search_net import \
Expand Down

0 comments on commit 84673cc

Please sign in to comment.