-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocessing.py
58 lines (44 loc) · 1.1 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import csv
import sys
import sqlparse
tables_data={}
tables_info={}
def create_db():
f = open("files/metadata.txt",'r')
lines = f.readlines()
f.close()
temp = []
ptr = 0
chk = 0
while ptr < len(lines):
temp = []
if lines[ptr][:13] == "<begin_table>":
ptr = ptr + 1
chk = 1
while lines[ptr][:11] != "<end_table>":
if chk == 1:
tab_name = lines[ptr].strip()
chk = 0
else:
# print(lines[ptr].strip())
temp.append(lines[ptr].strip())
ptr+=1
ptr = ptr + 1
tables_info[tab_name] = temp
for tables in tables_info.keys():
f = open("files/" + tables + ".csv","r")
data = f.readlines()
f.close()
temp = []
for row in data:
vals = row.strip()
vals = vals.split(",")
for i in range(len(vals)):
vals[i] = vals[i].strip()
vals[i] = vals[i].replace('\'','')
vals[i] = vals[i].replace('\"','')
vals[i] = int(vals[i].strip())
temp.append(vals)
tables_data[tables] = temp
# print(tables_data)
# print(tables_info)