-
Notifications
You must be signed in to change notification settings - Fork 0
/
wikipath.py
executable file
·75 lines (61 loc) · 1.99 KB
/
wikipath.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/python3 -OO
import json
from urllib.request import urlopen
from urllib.parse import quote
class Node():
def __init__(self, name, cost=1, parent=None):
self.name = name
self.cost = cost
self.parent = parent
def __eq__(self, other):
return self.name == other.name
def __str__(self):
return self.name
def get_links(node, plcontinue=False):
print(node)
url = 'http://en.wikipedia.org/w/api.php?action=query&format=json&prop=links&pllimit=500&plnamespace=0&titles=%s' % quote(str(node))
# if more than 500 results were returned, get the rest of them.
if plcontinue:
url += '&plcontinue=%s' % quote(plcontinue)
data = json.loads(urlopen(url).read().decode())
# we don't have the pageid, so we just pop the first page from pages.
pagelist = data['query']['pages'].popitem()[1]
if 'links' in pagelist:
pagelist = pagelist['links']
else:
return []
links = [page['title'] for page in pagelist]
if 'query-continue' in data:
pass
#links += get_links(node, data['query-continue']['links']['plcontinue'])
if plcontinue:
return links
else:
return [Node(link, node.cost+1, node) for link in links]
def get_path(node):
path = []
while node.parent:
path.append(str(node))
node = node.parent
path.append(str(node))
return " -> ".join(reversed(path))
def find_path(start, end):
closedlist = []
openlist = [start]
while openlist:
current = min(openlist, key=lambda link: link.cost)
if current == end:
return get_path(current)
openlist.remove(current)
closedlist.append(current)
for link in get_links(current):
if link not in closedlist:
if link not in openlist:
openlist.append(link)
def main():
start = Node("Fox")
end = Node("Cat")
path = find_path(start, end)
print(path)
if __name__ == "__main__":
main()