-
Notifications
You must be signed in to change notification settings - Fork 0
/
Mapper.py
125 lines (83 loc) · 3.25 KB
/
Mapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import sys
from time import sleep
import Connection
import socket
class Mapper(object):
def __init__(self, ID, port):
self.ID = ID # Used to differentiate between multiple mappers
self.port = port # Because we will have more than one
self.wordCounts = {} # Dictionary for (word, count) pairing
self.socketToCLI = None # Socket to send messages back to the client
self.incomeStream = None # Stream to receive from the client
def map(self, fileName, offset, size):
# print("filename offset size", fileName, offset, size) # Print input
currLookedAt = 0 # NOT USED
### Read contents from input file ###
with open(fileName, "r") as f: # Parse file
lines = f.readlines()
totalString = ""
for line in lines:
totalString = totalString + line + " "
# DO WE WANT THESE PRINT STATEMENTS? - No
# print("Intial total string:", totalString)
# print("Size is:", size)
totalString = totalString[offset:(offset + size + 1)] # Choose segment of the file
# print("Broken total string:", totalString)
punctuations = [".", ",", "?", "!", "\'", "\"", ":", ";", "-", "/", "\""]
words = totalString.split()
for word in words:
for punc in punctuations:
word = word.replace(punc, '')
word = word.lower()
self.wordCounts[word] = self.wordCounts.get(word, 0) + 1 # Build word counts into dictionary
# print("Dictionary:", self.wordCounts)
### Build name of output file ###
try:
outputFileName = (fileName.split("."))[0] + "_I_" + str(self.ID) + "." + (fileName.split("."))[1]
except IndexError:
outputFileName = (fileName.split("."))[0] + "_I_" + str(self.ID)
### Print out to an output file ###
self.writeToFile(outputFileName)
self.socketToCLI.sendall(("Finished Writing to file:" + outputFileName).encode())
def writeToFile(self, outputFileName):
f = open(outputFileName, 'w')
for key, value in self.wordCounts.items():
f.write(str(key) + " " + str(value) + "\n") # Writes "key value", split by line per entry
f.close()
def makeConnections(self):
incomeSock = Connection.createAcceptSocket("127.0.0.1", self.port)
sleep(5)
self.socketToCLI = Connection.createConnectSocket("127.0.0.1", 5001)
sleep(5)
self.incomeStream = Connection.openConnection(incomeSock)
def closeConnections(self):
Connection.closeSocket(self.socketToCLI)
def receiveMessages(self):
print("Mapper is receiving messages.")
while(True):
self.incomeStream.settimeout(1)
try:
data = self.incomeStream.recv(1024).decode()
if len(data) > 0:
if data[-1] == "%":
data = data[:-1]
data = data.split("%")
# print(data)
for message in data:
if message == "close":
return
fileName = message.split(" ")[0]
offset = int(message.split(" ")[1])
size = int(message.split(" ")[2])
self.map(fileName, offset, size)
except socket.timeout:
pass
############################ END MAPPER CLASS ##############################
def main():
args = sys.argv
mapper = Mapper(args[1], args[2]) # ID, port
mapper.makeConnections()
mapper.receiveMessages()
mapper.closeConnections()
if __name__ == "__main__":
main()