-
Notifications
You must be signed in to change notification settings - Fork 4
/
function_file.py
281 lines (209 loc) · 10.6 KB
/
function_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
"""
- Defines all functions needed to compute the Greedy, RIS and RIS-Exact solutions to the IM problem.
- Functions called from the 2.run_simulations.py file.
"""
from numba import cuda, boolean
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32
import numpy as np
import pandas as pd
import math, time, itertools, sys
from numpy import float32
from os import listdir
from collections import Counter
# ---------------
# Define Algorithms to construct R - the collection of RRR sets
# ---------------
# Function creates a large number of RRR sets (to compile the "R" set)
def get_rrs_gpu(graph, node_num, p, theta): # Do we need node_num?
# Randomly choose theta number of nodes with replacement from network (theta is number of RRS sets as in paper)
sources = np.random.choice(node_num, size=theta)
# Initiate a mc x n matrix to represent R
rrs = np.full((theta, node_num), False, dtype=bool)
# Creates mc random number generator states
rng_states = create_xoroshiro128p_states(theta,seed = np.random.randint(theta))
# Number of blocks (each with 128 threads) needed to perform mc operations
threads_per_block = 128
blocks = math.ceil(theta / threads_per_block)
# Update the mc rows of the rrs array using GPU
get_node_flow_gpu[blocks, threads_per_block](graph, sources, p, rrs, theta, rng_states)
return(rrs)
# Jit function just modifies the objects
"""
Things to know:
- the below function is a "kernel"
- kernels cannot explicitly return values. all result data must be written to an array passed to the function
- kernels explicitly declare their thread hierarchy when called (see the square brackets above)
- The product of the two numbers in the square brackets gives the total number of threads launched (mc)
- block size is crucial:
- On the software side, it determines how many threads share a given area of shared memory
- On the hardware side, it must be large enough for full occupation of execution units
- the @cuda.jit thing is known as a "decorator"
- it marks a function for optimization by numba's JIT compiler
- the code is executed by every thread once, so it has to know which thread it's in to know which array element(s) of rrs it is responsible for
- this can be done manually, but easier to do automatically using cuda.grid(1), which returns the absolute position of the current thread in the entire grid of blocks
"""
@cuda.jit
def get_node_flow_gpu(graph, sources, p, rrs, theta, rng_states):
# Get abosolute position of current thread
thread_id = cuda.grid(1)
# Because of block sizes, some of the threads will be greater than theta and not needed
if thread_id >= theta:
return
# For the row in the matrix corresponding to this thread, mark the source node for the rrs set
rrs[thread_id][sources[thread_id]] = True
# Allocate an array for thread to use temporarily. The size of the array needs to be hard-coded!
# We'll use this to keep track of the nodes that were newly found in each iteration/traverse of the rrs construction
new_nodes = cuda.local.array(100, boolean)
new_nodes[sources[thread_id]] = True
# Construct the RRS set by traversing to find new nodes. Stop when no new nodes have been found.
done = False
while not done:
done = True
# Iterate through the new_nodes, looking for nodes that flow into them
for source in range(new_nodes.shape[0]):
# Only evaluate a node if it is new
if new_nodes[source]:
# Iterate through every edge in the graph to find the nodes that can influence the current node
for edge in range(graph.shape[0]):
if graph[edge][1] == source:
neighbor = graph[edge][0]
# If a neighbor node not already in the RRS and if the edge still exists, then there is a path from neighbor to source and will be added
# to both the new_nodes and rrs arrays. If at least one new node is found, the while loop continues
if (not rrs[thread_id][neighbor]) & (p > xoroshiro128p_uniform_float32(rng_states, thread_id)):
new_nodes[neighbor], rrs[thread_id][neighbor], done = True, True, False
# Once all the neighbors of a node have been discovered, it will no longer be evaluated
new_nodes[source] = False
# ---------------
# Define RIS algorithm to get solution with greedy max coverage algorithm
# ---------------
def ris(R, k):
# Extract indices of nodes in each rrs
R_copy = [np.where(r)[0] for r in R]
# Choose nodes that appear most often in R (maximum coverage greedy algorithm)
seeds, count = [], 0
for _ in range(k):
# Create large list of all occurences of nodes in R
flat_list = [item for sublist in R_copy for item in sublist]
if flat_list:
# Find node that occurs most often
seed = Counter(flat_list).most_common()[0][0]
# Find number of RRR sets covered
count += Counter(flat_list).most_common()[0][1]
# Remove RRSs containing last chosen seed
R_copy = [item for item in R_copy if seed not in item]
# Add to outputs
seeds.append(seed)
return(seeds, float(count) / R.shape[0])
# ---------------
# Define algorithms to get RIS-Exact solution
# ---------------
def get_spread(R,seeds):
# Extract indices of nodes in each rrs
R_copy = [np.where(r)[0] for r in R]
# Create list of RRSs covered by opt_solution
coverage = [item for item in R_copy if set(item) & set(seeds)]
return(len(coverage)/R.shape[0])
def ris_complete_gpu(R, k):
# Construct all possible seed sets
seed_sets = np.array(list(itertools.combinations(range(R.shape[1]),k)))
# Initiate array indexed by each possible seed set
max_val = np.zeros(seed_sets.shape[0])
# Number of blocks (each with 128 threads) needed to perform operations on all candidate seed sets
threads_per_block = 128
blocks = math.ceil(seed_sets.shape[0] / threads_per_block)
# Update each cell of max_val array using GPU
get_max_val_gpu[blocks, threads_per_block](R, seed_sets, max_val)
# Get optimal seed set
opt_solution = seed_sets[np.argmax(max_val)].tolist()
# Get coverage
spread = get_spread(R,opt_solution)
return(opt_solution, spread)
@cuda.jit
def get_max_val_gpu(R, seed_sets, max_val):
# Get absolute position of current thread
thread_id = cuda.grid(1)
# Because of fixed block sizes, some of the threads won't be needed
if thread_id >= seed_sets.shape[0]:
return
# For each RRR set in R, check if it contains a node from the candidate set
for i in range(R.shape[0]):
increment = False
# Loop over each of the k nodes in the relevant candidate set and check if its in the RRR set
for j in range(seed_sets.shape[1]):
seed = seed_sets[thread_id][j] # Can I pull this out up the top?
if R[i][seed]: # Use a set approach here?
increment = True
break
if increment: # How about removing this part?
max_val[thread_id] += 1
# ---------------
# Define algorithms to get Greedy solution
# ---------------
# Function to create adjacency matrix
def load_graph(file_name, size):
# Load graph files
df = pd.read_csv(file_name)
# Initiate adjacency matrix
graph = np.full((size, size), False, dtype=bool)
# Fill adjacency matrix based on csv file
for _, row in df.iterrows():
graph[row['source']][row['target']] = True
return(graph)
def greedy_gpu(graph, k, p, mc):
# Initiate array with entry for each node
S = np.full(graph.shape[0], False, dtype = bool)
S_nodes = []
for _ in range(k):
node, count = 0, 0 # changed this from node = count = 0
for j in range(graph.shape[0]):
# If it's already in the seed set don't worry about it
if S[j]: # Is this faster than an if-not command?
continue
# Throw this node in the seed set and compute the spread
S_copy = S.copy()
S_copy[j] = True
s = IC_gpu(graph, S_copy, p, mc)
# If the spread is greater than any previous one, then keep this node
if s > count:
count, node = s, j
# Add to seed set and record the spread
S[node] = True
S_nodes.append(node)
return(S_nodes)
def IC_gpu(graph, S, p, mc):
# Number of blocks (each with 128 threads) needed to perform all mc iterations
threads_per_block = 128
blocks = math.ceil(mc / threads_per_block)
# Creates mc random number generator states
rng_states = create_xoroshiro128p_states(mc, seed=np.random.randint(mc))
# Create an mc x n array of False's
active = np.tile(S, (mc, 1))
new_active = active.copy()
# Another mc x n array of False's
new_ones = np.full((mc, graph.shape[0]), False, dtype=bool)
# Update active array using GPU
find_spread_gpu[blocks, threads_per_block](graph, active, new_active, new_ones, mc, p, rng_states)
return(np.count_nonzero(active) / mc)
@cuda.jit
def find_spread_gpu(graph, active, new_active, new_ones, mc, p, rng_states):
# Get abosolute position of current thread
thread_id = cuda.grid(1)
# Because of fixed block sizes, some of the threads won't be needed
if thread_id >= mc:
return
done = False
while not done:
done = True
for j in range(new_active[thread_id].shape[0]):
if new_active[thread_id][j]:
for k in range(graph.shape[0]):
if graph[j][k] and p > xoroshiro128p_uniform_float32(rng_states, thread_id):
new_ones[thread_id][k] = True
for j in range(new_active[thread_id].shape[0]):
if new_ones[thread_id][j] and (not active[thread_id][j]):
active[thread_id][j] = True
new_active[thread_id][j] = True
done = False
else:
new_active[thread_id][j] = False
new_ones[thread_id][j] = False