forked from Morail/wiki-network
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathpywc_dic_check.py
executable file
·33 lines (30 loc) · 1.01 KB
/
pywc_dic_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from sys import argv, exit
import urllib
import csv
import simplejson
def main():
if len(argv) < 4:
print "Error: Wrong parameters"
exit(0)
ns = argv[1]
inp = open(argv[2])
out = open(argv[3], "w")
csv_writer = csv.writer(out)
content = inp.read().split("%")
if (len(content)) != 3:
raise ValueError("Invalid dic file!")
keywords = list(x.split("\t")[0] for x in content[2].split("\n")
if x and not x.startswith("//"))
for k in keywords:
print "Processing keyword: %s" % k
url = "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=%s&srnamespace=%s&srprop=&srwhat=text&format=json" % (k, ns)
result = simplejson.load(urllib.urlopen(url))
try:
occurrencies = result["query"]["searchinfo"]["totalhits"]
except KeyError:
occurrencies = 0
print url
print occurrencies
csv_writer.writerow([k, ns, occurrencies])
if __name__ == "__main__":
main()