-
Notifications
You must be signed in to change notification settings - Fork 0
/
random_walk_pagerank.py
64 lines (59 loc) · 2.32 KB
/
random_walk_pagerank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import csv
import networkx as nx
import random
# Create a directed graph
G = nx.DiGraph()
# Define a function for random walk
def random_walk(steps):
# Initialize a dictionary to store ratings for each node
rating = dict.fromkeys(list(G.nodes), 0)
# Choose a random starting node
current_node = random.choice(list(G.nodes))
# Perform random walk for the specified number of steps
for i in range(steps):
# With probability 0.15, randomly jump to any node
if random.random() < 0.15:
current_node = random.choice(list(G.nodes))
else:
# Select a neighbor of the current node to move to
neighbors = list(G.neighbors(current_node))
if neighbors:
current_node = random.choice(neighbors)
else:
# If the current node has no neighbors, randomly jump to any node
current_node = random.choice(list(G.nodes))
# Increment the rating of the current node
rating[current_node] += 1
return rating
# Open the dataset file
with open("./dataset.csv") as f:
reader = csv.reader(f)
# Iterate over each row in the dataset
for row in reader:
# Skip the header row
if reader.line_num == 1:
continue
# Get the parent node (source) from the first column
parent = row[1].lower().split('@')[0]
# Iterate over the child nodes (targets) in the row
for entry in row[2:]:
# Skip empty entries
if entry == '':
continue
# Get the child node (target) from the last word in the entry
child = entry.lower().split()[-1]
# Add an edge from the parent to the child in the graph
G.add_edge(parent, child)
# Define the total number of steps for the random walk
total = 10000000
# Perform random walk and get the ratings for each node
rating = random_walk(total)
# Sort nodes by their ratings in descending order
ranking = sorted(rating, key=rating.get, reverse=True)
# Print the top 10 leaders based on the random walk algorithm
print("The top 10 leaders -:")
for i in range(10):
name = ranking[i]
rate = rating[name] / total # Normalize rating by total steps
print(i+1, name, rate)
print("The top leader is", ranking[0],"with",rating[ranking[0]]/total, "points.")