-
Notifications
You must be signed in to change notification settings - Fork 0
/
p059.py
189 lines (157 loc) · 6.41 KB
/
p059.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#! /usr/bin/env python
# https://projecteuler.net/problem=59
#
# The message in p059_cipher.txt is an English text that has been
# XOR-encrypted using a key that consists of three lowercase characters
# and repeated throughout the message. Determine the key, decrypt
# the message, and print the sum of the ASCII values in the text.
#
# ====================
#
# Strategy:
#
# This is principally a brute force decryption task. We do not know
# exactly how to identify the plaintext -- only that it will include
# the key in its output, and it will be written in English. We identify
# successful output if more than half the nontrivial words in the
# plaintext can be found in the system's local dictionary.
#
# A naive implementation that uses Python's native bitwise xor to
# decrypt each byte in the message one at a time is very inefficient.
# Profiling shows that this implementation spends about 95% of its
# time in the XOR function. By packing the message and key into
# 64-bit integer arrays and using numpy.bitwise_xor(), run time is
# improved by a factor of 3x-5x.
#
# hitchcock:twp% python p059.py --slow
# 107359
# 3.221932
#
# hitchcock:twp% python p059.py
# 107359
# 0.745225
import argparse
import csv
import re
import time
import numpy
def read_csv(csvfile):
"""Reads CSV data from an input file and returns a single list
containing all of the fields.
Raises ValueError if the input file contains any non-numeric fields.
"""
data = []
with open(csvfile, 'r') as f:
csvreader = csv.reader(f)
for row in csvreader:
data += [ int(x) for x in row ]
return data
def read_dictionary(dictfile='/usr/share/dict/words'):
"""Reads the contents of the specified dictionary.
Returns a set of all the dictionary entries found.
"""
d = set()
with open(dictfile, 'r') as f:
for word in f:
d.add(word.strip())
return d
class XORDecoder:
def __init__(self, dictfile=None):
self._dict = read_dictionary(dictfile) if dictfile else None
def dictionary(self):
return self._dict
def set_dictionary(self, d):
self._dict = d
def key_generator(self):
"""Generates all possible three-character lowercase keys.
Returns a string containing the key on each call to the generator."""
for a in range(ord('a'), ord('z')+1):
for b in range(ord('a'), ord('z')+1):
for c in range(ord('a'), ord('z')+1):
yield chr(a) + chr(b) + chr(c)
@classmethod
def fast_xor(cls, msg, key):
"""Repeatedly XORs a message with a key.
message: a list of integer ASCII values
key: a string of ASCII chars
Returns a string consisting of each character generated by
successively XORing elements of the message with the key.
"""
msglen = len(msg)
# numpy.bitwise_xor requires the operands to be the same length.
# If the message is longer than the key, repeat the key until
# it is long enough.
# If the key is longer than the message, truncate it.
xorkey = key
if msglen > len(key):
xorkey = key * (msglen / len(key) + 1)
xorkey = xorkey[:msglen]
# Pad out the message and the key to a multiple of 8,
# so we can xor 8 bytes at once.
data = msg
if msglen % 8 != 0:
padding = 8 - msglen % 8
data = msg + [0] * padding
xorkey += ''.join([chr(0)] * padding)
plaintext = numpy.bitwise_xor(
numpy.frombuffer(bytearray(data), dtype=numpy.dtype('<Q8')),
numpy.fromstring(xorkey, dtype=numpy.dtype('<Q8'))).tostring()
return plaintext[:msglen]
@classmethod
def slow_xor(cls, msg, key):
"""Repeatedly XORs a message with a key. This version is
only used when the --slow option is invoked.
message: a list of integer ASCII values
key: a string of ASCII chars
Returns a string consisting of each character generated by
successively XORing elements of the message with the key.
"""
plaintext = ''
for i in range(len(msg)):
plaintext += chr(msg[i] ^ ord(key[i % len(key)]))
return plaintext
def decipher(self, message, slow=False):
"""Attempts to decipher the message by repeatedly guessing keys.
Returns the first plaintext string that matches the following
conditions:
* The plaintext includes at least two occurrences of the key
* At least half of the plaintext words at least five letters long
are found in the dictionary.
If no plaintext can be found that satisfies these conditions,
returns None.
"""
xor_func = XORDecoder.slow_xor if slow else XORDecoder.fast_xor
for key in self.key_generator():
cleartext = xor_func(message, key)
# The cleartext is known to include repeated instances of the key.
# Skip any possible solutions that do not include the key at least
# twice.
if cleartext.lower().count(key) < 2:
continue
# The text is expected to consist principally of English
# words; however, it is not guaranteed that every word in
# the text is English or that it will be found in the
# dictionary we have. Report success if we found at least
# one dictionary word, and of the cleartext words that are
# five letters or longer, at least half are found in the
# dictionary.
words = re.findall(r'[A-Za-z]+', cleartext)
longwords = [ w for w in words if len(w) >= 5 ]
englishwords = [ w for w in longwords if w in self._dict ]
if englishwords and len(englishwords) >= len(longwords) / 2:
return cleartext
# Continuing through the end of the loop means that we did not
# find any cleartext that satisfied decryption.
return None
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--slow", help='use slow XOR', action='store_true')
args = parser.parse_args()
t1 = time.clock()
xd = XORDecoder(dictfile='/usr/share/dict/words')
msg = read_csv('p059_cipher.txt')
s = xd.decipher(msg, args.slow)
msgsum = sum([ ord(ch) for ch in s ])
t2 = time.clock()
print msgsum
print "{} seconds".format(t2 - t1)