-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_loader.py
32 lines (25 loc) · 896 Bytes
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env python
"""
This script loads the raw data into a csv for easier reading
input: /aclImdb directory
output: data.csv
"""
import numpy as np
import pandas as pd
import os, sys
if __name__ == "__main__":
data = np.ndarray((1000,4),dtype='object')
counter = 0
for split in ['train', 'test']:
for sentiment in ['neg', 'pos']:
path = "aclImdb/"+split+"/"+sentiment
for file in os.listdir(path):
#print(sentiment, split, file)
text_file = open(path+'/'+file, 'r')
data[counter][0] = file.split('.')[0]
data[counter][1] = split
data[counter][2] = sentiment
data[counter][3] = text_file.readlines()[0]
counter+=1
df = pd.DataFrame(data = data, columns = ['file_name','split','sentiment','text'])
df.to_csv('data.csv')