forked from giotto-ai/the-shape-of-chemical-functions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
molecules.py
81 lines (67 loc) · 2.5 KB
/
molecules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
import networkx as nx
from giotto.graphs.create_clique_complex import CreateLaplacianMatrices, \
CreateCliqueComplex
from giotto.graphs.heat_diffusion import HeatDiffusion
from giotto.graphs.graph_entropy import GraphEntropy
def mol_to_nx(mol):
g = nx.Graph()
for atom in mol.GetAtoms():
g.add_node(atom.GetIdx(),
atomic_num=atom.GetAtomicNum(),
formal_charge=atom.GetFormalCharge(),
chiral_tag=atom.GetChiralTag(),
hybridization=atom.GetHybridization(),
num_explicit_hs=atom.GetNumExplicitHs(),
is_aromatic=atom.GetIsAromatic())
for bond in mol.GetBonds():
g.add_edge(bond.GetBeginAtomIdx(),
bond.GetEndAtomIdx(),
bond_type=bond.GetBondType())
return g
def compute_node_edge_entropy(g, i, taus_n, taus_e):
cd = CreateCliqueComplex(graph=g).create_complex_from_graph()
lap = CreateLaplacianMatrices().fit_transform(cd, (0, 1))
n_diff = HeatDiffusion().fit_transform(lap[0], taus_n)
e_diff = HeatDiffusion().fit_transform(lap[1], taus_e)
mh_n = GraphEntropy().fit_transform(n_diff).T
mh_e = GraphEntropy().fit_transform(e_diff).T
if i % 10000 == 0:
print("Atoms and Bonds of {} molecules have been embedded...".
format(i))
return [mh_n, mh_e]
def bonds_type_to_edge(g_mol):
for i, g in enumerate(g_mol):
d_e = dict()
for e in g.edges():
b = int(g.get_edge_data(e[0], e[1])['bond_type'])
d_e[e] = np.zeros(4)
if b == 12:
d_e[e][3] = 1
else:
d_e[e][b-1] = 1
nx.set_edge_attributes(g, name='bonds_one_hot', values=d_e)
def bonds_type(g_mol):
for g in g_mol:
d_n = dict()
for n in g.nodes():
d_n[n] = np.zeros(4)
for i in g.neighbors(n):
# encoding type
edge_type = int(g.get_edge_data(n, i)['bond_type'])
if edge_type == 12:
d_n[n][3] += 1
else:
d_n[n][edge_type - 1] += 1
nx.set_node_attributes(g, name='bonds_one_hot', values=d_n)
def graph_to_points(g_mol, n):
j = 0
d = list()
for i in range(len(g_mol)):
if n == 0:
d.append(np.arange(j, j + g_mol[i].number_of_nodes()))
j += g_mol[i].number_of_nodes()
else:
d.append(np.arange(j, j + g_mol[i].number_of_edges()))
j += g_mol[i].number_of_edges()
return d