-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathloading.py
58 lines (42 loc) · 1.47 KB
/
loading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import json
import os
import smiles_parser
dir_path = os.path.dirname(os.path.realpath(__file__)) + "/"
properties = json.loads(open(dir_path + "atomic-properties.json").read())
reduction_enum = properties["Reduction enumeration"]
unique_reductions = sorted(list(set(reduction_enum.values())))
def get_data(
smiles,
apply_paths=False,
parse_cis_trans=False,
only_biggest=False,
split_ions=False,
unknown_atom_is_dummy=False
):
return smiles_parser.parse_smiles(
smiles,
apply_paths=apply_paths,
parse_cis_trans=parse_cis_trans,
only_biggest=only_biggest,
split_ions=split_ions,
unknown_atom_is_dummy=unknown_atom_is_dummy
)
def convert(x, a, e, path=None, bonds=None):
def create_one_hot(size, index):
vector = [0] * size
vector[index] = 1
return vector
converted_x = list()
# One-hot atom type
for i in range(len(x)):
atom_simplified = unique_reductions.index(reduction_enum[x[i]["type"].name])
atom_one_hot = create_one_hot(len(unique_reductions), atom_simplified)
converted_x.append(atom_one_hot)
converted_e = list()
bond_enum = bonds
bond_types = {bond: i for i, bond in enumerate(bond_enum)}
for i in range(len(e)):
bond = bond_types[e[i]["type"]]
bond_one_hot = create_one_hot(len(bond_enum), bond)
converted_e.append(bond_one_hot)
return converted_x, a, converted_e