-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathload.py
44 lines (38 loc) · 1.17 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import sys
import datasets
import os
import glob
from typing import Union
from importlib import import_module
def list_datasets():
split_path = os.path.split(os.path.abspath(__file__))
files = glob.glob(os.path.join(split_path[0], '*'))
ds_list = []
ignore_dir = ['__pycache__', 'utils', 'raw_data']
for file in files:
if (os.path.isdir(file)):
h = os.path.split(file)
if h[1] in ignore_dir:
continue
ds_list.append(h[1])
return ds_list
def load_dataset(name, **kargs) -> Union[
datasets.DatasetDict,
datasets.Dataset,
datasets.IterableDatasetDict,
datasets.IterableDataset]:
'''
通过数据集名字加载数据集
'''
import os.path
HERE = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, HERE)
try:
load_func = getattr(import_module(name), 'load_dataset')
except ImportError:
print(f'can not found {name} in fs_datasets, try to load from huggingface')
import datasets
return datasets.load_dataset(name)
return load_func(**kargs)
if __name__ == '__main__':
print(load_dataset('afqmc'))