-
Notifications
You must be signed in to change notification settings - Fork 4
/
graph_utils.py
116 lines (100 loc) · 3.58 KB
/
graph_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import numpy as np
import networkx as nx
import random
import pdb
import numpy as np
from scipy.io import loadmat
from scipy.sparse import csr_matrix
from scipy.sparse import lil_matrix
'''
This script is downloaded from :
https://github.com/vinhsuhi/GAlign
for the implementation of allmovie - Imdb dataset
'''
def print_graph_stats(G):
print('# of nodes: %d, # of edges: %d' % (G.number_of_nodes(),
G.number_of_edges()))
def construct_adjacency(G, id2idx, sparse=False, file_path=None):
idx2id = {v:k for k,v in id2idx.items()}
nodes_list = [idx2id[i] for i in range(len(id2idx))]
edges_list = list(G.edges())
edges = np.array([[id2idx[edge[0]], id2idx[edge[1]]] for edge in edges_list])
if file_path:
np.save(file_path, edges)
if sparse:
adj = nx.to_scipy_sparse_matrix(G, nodes_list).tolil()
else:
adj = nx.to_numpy_matrix(G, nodes_list)
return adj
def build_degrees(G, id2idx):
degrees = np.zeros(len(G.nodes()))
for node in G.nodes():
deg = G.degree(node)
degrees[id2idx[node]] = deg
return degrees
def build_clustering(G, id2idx):
cluster = nx.clustering(G)
# convert clustering from dict with keys are ids to array index-based
clustering = [0] * len(G.nodes())
for id, val in cluster.items():
clustering[id2idx[id]] = val
return clustering
def get_H(path, source_dataset, target_dataset, train_dict=""):
if train_dict is not None:
H = np.zeros((len(target_dataset.G.nodes()), len(source_dataset.G.nodes())))
for k, v in train_dict.items():
H[v, k] = 0.98
return H
if path is None:
H = np.ones((len(target_dataset.G.nodes()), len(source_dataset.G.nodes())))
H = H*(1/len(source_dataset.G.nodes()))
return H
else:
if not os.path.exists(path):
raise Exception("Path '{}' is not exist".format(path))
dict_H = loadmat(path)
H = dict_H['H']
return H
def get_edges(G, id2idx):
edges1 = [(id2idx[n1], id2idx[n2]) for n1, n2 in G.edges()]
edges2 = [(id2idx[n2], id2idx[n1]) for n1, n2 in G.edges()]
edges = edges1 + edges2
edges = np.array(edges)
return edges
def load_gt(path, id2idx_src=None, id2idx_trg=None, format='matrix'):
if id2idx_src:
conversion_src = type(list(id2idx_src.keys())[0])
conversion_trg = type(list(id2idx_trg.keys())[0])
if format == 'matrix':
# Dense
"""
gt = np.zeros((len(id2idx_src.keys()), len(id2idx_trg.keys())))
with open(path) as file:
for line in file:
src, trg = line.strip().split()
gt[id2idx_src[conversion_src(src)], id2idx_trg[conversion_trg(trg)]] = 1
return gt
"""
# Sparse
row = []
col = []
val = []
with open(path) as file:
for line in file:
src, trg = line.strip().split()
row.append(id2idx_src[conversion_src(src)])
col.append(id2idx_trg[conversion_trg(trg)])
val.append(1)
gt = csr_matrix((val, (row, col)), shape=(len(id2idx_src), len(id2idx_trg)))
else:
gt = {}
with open(path) as file:
for line in file:
src, trg = line.strip().split()
# print(src, trg)
if id2idx_src:
gt[id2idx_src[conversion_src(src)]] = id2idx_trg[conversion_trg(trg)]
else:
gt[str(src)] = str(trg)
return gt