Skip to content

Commit

Permalink
add finance train dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
qbc2016 committed Apr 11, 2024
1 parent 5a048f1 commit 997e2df
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion federatedscope/llm/dataloader/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,24 @@ def load_llm_dataset(config=None, **kwargs):
'/medical_tc_train.jsonl', config.data.root)
os.rename(os.path.join(config.data.root, 'train.jsonl'), fp)
list_data_dict = load_jsonl(fp,
instruction='question',
instruction='instruction',
input='input',
output='output',
category='output')
dataset = LLMDataset(list_data_dict, tokenizer)
elif dataset_name.lower() == "finance":
fp = os.path.join(config.data.root, 'finance_train_data.jsonl')
if not os.path.exists(fp):
download_url(
'https://federatedscope.oss-cn-beijing.aliyuncs.com/FS-LLM'
'/finance_train_data.jsonl', config.data.root)
os.rename(os.path.join(config.data.root, 'train.jsonl'), fp)
list_data_dict = load_jsonl(fp,
instruction='instruction',
input='input',
output='output',
category='category')
dataset = LLMDataset(list_data_dict, tokenizer)
elif dataset_name.lower() == 'code_search_net':
from tqdm import tqdm
from federatedscope.llm.dataset.code_search_net import \
Expand Down

0 comments on commit 997e2df

Please sign in to comment.