This repository has been archived by the owner on Apr 15, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinitialize_queue.py
95 lines (84 loc) · 2.51 KB
/
initialize_queue.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import csv
import getopt
import json
import os
import pika
import sys
from pika.adapters import BlockingConnection
RUN_NUMBER = None
FANOUT = None
DEPTH = None
NUM_SITES = None
DEBUG = False
OFFSET = 0
def usage():
print "python initialize_queue.py -r <run> -n <num_sites> -f <fanout> -d <depth> -o <offset> --debug"
try:
opts, args = getopt.getopt(sys.argv[1:], "hr:n:f:d:vo:", ["help", "run=", "num=", "fanout=", "depth=", "verbose", "debug", "offset="])
except getopt.GetoptError, err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
for o, a in opts:
if o in ("-v", "--verbose"):
VERBOSE = True
elif o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-r", "--run"):
RUN_NUMBER = int(a)
elif o in ("-f", "--fanout"):
FANOUT = int(a)
elif o in ("-n", "--num"):
NUM_SITES = int(a)
elif o in ("-d", "--depth"):
DEPTH = int(a)
elif o in ("--debug"):
DEBUG = True
elif o in ("-o", "--offset"):
OFFSET = int(a)
else:
assert False, "unhandled option"
if RUN_NUMBER is None:
print "Run number (-r) must be specified"
if FANOUT is None:
print "Fanout (-f) must be specified"
if DEPTH is None:
print "Depth (-d) must be specified"
if NUM_SITES is None:
print "Num sites (-n) must be specified"
if None in [RUN_NUMBER, FANOUT, DEPTH, NUM_SITES]:
exit()
# Connect to RabbitMQ
TARGET_RMQ_SERVER = "ldr.myvnc.com" if DEBUG else "noddy.cs.berkeley.edu"
parameters = pika.ConnectionParameters(TARGET_RMQ_SERVER)
rmq_connection = BlockingConnection(parameters)
rmq_channel = rmq_connection.channel()
rmq_channel.queue_declare(queue="pages", durable=True,
exclusive=False, auto_delete=False)
reader = csv.reader(open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'top-1m.csv'), 'rb'))
n = 1
for row in reader:
if n < OFFSET:
n += 1
continue
url = "http://%s" % row[1]
command_data = {
'run': RUN_NUMBER,
'url': url,
'fanout': FANOUT,
'depth': DEPTH
}
print "[%2d] Adding page %s..." % (n, url),
rmq_channel.basic_publish(exchange='',
routing_key="run%d" % RUN_NUMBER,
body=json.dumps(command_data),
properties=pika.BasicProperties(
content_type="text/plain",
delivery_mode=1))
print "Delivered"
n += 1
if n > NUM_SITES + OFFSET:
break
rmq_connection.close()