-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess.py
92 lines (75 loc) · 3.03 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
import scipy.sparse as sparse
import torch
from utils import sparse_mx_to_torch_sparse_tensor
def get_T(W, train_idx):
"""
Calculate the number of iterations. The stopping criterion is max(|p_t-p_inf|) <= 1/n.
return T (integer)
"""
D_inv = sparse.spdiags(W.sum(axis=1).A1 ** -1, 0, W.shape)
W = sparse_mx_to_torch_sparse_tensor(W, cuda=True)
D_inv = sparse_mx_to_torch_sparse_tensor(D_inv, cuda=True)
RW = torch.sparse.mm(W, D_inv)
p = torch.zeros(W.shape[0], 1, device='cuda')
p[train_idx] = 1 / len(train_idx)
p_inf = W.sum(dim=1, keepdim=True) / W.sum()
T = 0
while (T < 100 or torch.max(torch.abs(p - p_inf)) > 1 / W.shape[0]) and (T < 1000):
p = torch.sparse.mm(RW, p)
T = T + 1
return T
def get_interface_idx(train_idx, all_idx, W, k):
"""
Remove k hop index from all index and return the remaining indices.
return interface_idx, type = numpy array
"""
if k == -1:
return all_idx
else:
khop_idx = train_idx
for _ in range(k):
neighbor_idx = W[khop_idx].nonzero()[1]
khop_idx = np.append(khop_idx, neighbor_idx)
khop_idx = np.unique(khop_idx)
interface_idx = np.setdiff1d(all_idx, khop_idx)
return interface_idx
def get_A(W, train_idx, interface_idx, T, subtract_mean=True):
"""
Calculate iteration matrix A.
If subtract_mean = False, A = sum_{i=0}^{T-1} (D^-1W)^i D^-1
If subtract_mean = True, A = sum_{i=0}^{T-1} J (D^-1WJ)^i D^-1, where J = I - 11^T/n is projection matrix
For any matrix M, J@M extracts column mean from M, M@J extracts row mean from M
return A, shape = [m, I], type = torch tensor
m = len(train_idx), I = len(interface_idx)
"""
D_inv = sparse.spdiags(W.sum(axis=1).A1 ** -1, 0, W.shape)
DW = D_inv * W
DW_n = DW[train_idx].todense()
# J(D^-1WJ), notice that extracting row sum is not necessary because DW all rows sum 1
JDW_n = DW[train_idx].todense() - DW.mean(axis=0)
# Move to GPU for faster computation
D_inv = sparse_mx_to_torch_sparse_tensor(D_inv, cuda=True)
DW = sparse_mx_to_torch_sparse_tensor(DW, cuda=True)
DW_n = torch.from_numpy(DW_n).cuda()
JDW_n = torch.from_numpy(JDW_n).cuda()
if subtract_mean:
# J = I - 11^T/n
J = torch.ones([len(train_idx), W.shape[1]], device='cuda') * -1 / W.shape[0]
for i, idx in enumerate(train_idx):
J[i, idx] += 1
A = J
for _ in range(T - 1):
A = A + JDW_n
JDW_n = torch.sparse.mm(JDW_n, DW)
JDW_n = JDW_n - torch.mean(JDW_n, dim=1, keepdim=True)
else:
A = torch.zeros([len(train_idx), W.shape[1]], device='cuda')
for i, idx in enumerate(train_idx):
A[i, idx] = 1
for _ in range(T - 1):
A = A + DW_n
DW_n = torch.sparse.mm(DW_n, DW)
A = torch.sparse.mm(A, D_inv)
A = A[:, interface_idx]
return A